Shrimp Weight Odd/Even Classifier

Content is user-generated and unverified.
import cv2
import pytesseract
import numpy as np
import os
from typing import List, Tuple
import re

class ShrimpWeightClassifier:
    def __init__(self):
        """
        Initialize the classifier for determining if shrimp weight is odd or even
        """
        # OCR configuration for better number recognition
        self.ocr_config = '--oem 3 --psm 8 -c tessedit_char_whitelist=0123456789.'
        
    def preprocess_image(self, image: np.ndarray) -> np.ndarray:
        """
        Preprocess image for better OCR accuracy
        """
        # Convert to grayscale if needed
        if len(image.shape) == 3:
            gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
        else:
            gray = image
            
        # Apply threshold to make text clearer
        _, thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
        
        # Optional: Apply morphological operations to clean up
        kernel = np.ones((2,2), np.uint8)
        cleaned = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, kernel)
        
        return cleaned
    
    def extract_weight_region(self, image: np.ndarray, region_coords: Tuple[int, int, int, int] = None) -> np.ndarray:
        """
        Extract the region containing weight text
        If region_coords is None, assumes the weight text is in a consistent position
        """
        if region_coords is None:
            # Since position is consistent, you'll need to determine these coordinates
            # from your sample images (x, y, width, height)
            # Example coordinates - adjust based on your actual images
            h, w = image.shape[:2]
            # Assuming weight text is in bottom portion of image
            x, y, width, height = int(w*0.1), int(h*0.8), int(w*0.8), int(h*0.15)
        else:
            x, y, width, height = region_coords
            
        weight_region = image[y:y+height, x:x+width]
        return weight_region
    
    def read_weight_from_image(self, image: np.ndarray, region_coords: Tuple[int, int, int, int] = None) -> str:
        """
        Read weight value from image using OCR
        """
        # Extract weight region
        weight_region = self.extract_weight_region(image, region_coords)
        
        # Preprocess for better OCR
        processed_region = self.preprocess_image(weight_region)
        
        # Apply OCR
        text = pytesseract.image_to_string(processed_region, config=self.ocr_config)
        
        # Clean up the text and extract weight
        text = text.strip().replace(' ', '')
        
        # Use regex to find decimal number pattern
        weight_match = re.search(r'\d+\.?\d*', text)
        
        if weight_match:
            return weight_match.group()
        else:
            raise ValueError(f"Could not extract weight from text: {text}")
    
    def get_last_digit(self, weight_str: str) -> int:
        """
        Extract the last digit from weight string
        """
        # Remove decimal point and get last digit
        digits_only = weight_str.replace('.', '')
        if digits_only:
            return int(digits_only[-1])
        else:
            raise ValueError(f"No digits found in weight: {weight_str}")
    
    def is_odd_or_even(self, weight_str: str) -> str:
        """
        Determine if weight is odd or even based on last digit
        """
        last_digit = self.get_last_digit(weight_str)
        return "คี่" if last_digit % 2 == 1 else "คู่"
    
    def classify_single_image(self, image_path: str, region_coords: Tuple[int, int, int, int] = None) -> dict:
        """
        Classify a single image
        """
        try:
            # Load image
            image = cv2.imread(image_path)
            if image is None:
                raise ValueError(f"Could not load image: {image_path}")
            
            # Read weight
            weight_str = self.read_weight_from_image(image, region_coords)
            
            # Classify odd/even
            classification = self.is_odd_or_even(weight_str)
            
            return {
                'image_path': image_path,
                'weight': weight_str,
                'last_digit': self.get_last_digit(weight_str),
                'classification': classification,
                'success': True
            }
            
        except Exception as e:
            return {
                'image_path': image_path,
                'weight': None,
                'last_digit': None,
                'classification': None,
                'success': False,
                'error': str(e)
            }
    
    def classify_batch(self, image_folder: str, region_coords: Tuple[int, int, int, int] = None) -> List[dict]:
        """
        Classify all images in a folder
        """
        results = []
        
        # Get all image files
        image_extensions = ['.jpg', '.jpeg', '.png', '.bmp', '.tiff']
        image_files = []
        
        for file in os.listdir(image_folder):
            if any(file.lower().endswith(ext) for ext in image_extensions):
                image_files.append(os.path.join(image_folder, file))
        
        print(f"Processing {len(image_files)} images...")
        
        for i, image_path in enumerate(image_files):
            result = self.classify_single_image(image_path, region_coords)
            results.append(result)
            
            if (i + 1) % 100 == 0:
                print(f"Processed {i + 1}/{len(image_files)} images")
        
        return results
    
    def calculate_accuracy(self, results: List[dict], true_labels: List[str] = None) -> dict:
        """
        Calculate accuracy statistics
        """
        successful_results = [r for r in results if r['success']]
        total_processed = len(successful_results)
        
        odd_count = len([r for r in successful_results if r['classification'] == 'คี่'])
        even_count = len([r for r in successful_results if r['classification'] == 'คู่'])
        
        stats = {
            'total_images': len(results),
            'successfully_processed': total_processed,
            'failed_processing': len(results) - total_processed,
            'success_rate': total_processed / len(results) * 100 if results else 0,
            'odd_count': odd_count,
            'even_count': even_count
        }
        
        if true_labels and len(true_labels) == len(successful_results):
            correct_predictions = sum(1 for i, r in enumerate(successful_results) 
                                    if r['classification'] == true_labels[i])
            stats['accuracy'] = correct_predictions / len(successful_results) * 100
        
        return stats

# Example usage
if __name__ == "__main__":
    # Initialize classifier
    classifier = ShrimpWeightClassifier()
    
    # Example: Classify single image
    # result = classifier.classify_single_image('path/to/shrimp_image.jpg')
    # print(f"Weight: {result['weight']}, Classification: {result['classification']}")
    
    # Example: Batch processing
    # results = classifier.classify_batch('path/to/image_folder')
    # stats = classifier.calculate_accuracy(results)
    # print(f"Processing statistics: {stats}")
    
    print("Shrimp Weight Classifier initialized successfully!")
    print("Usage:")
    print("1. classifier.classify_single_image('image_path.jpg')")
    print("2. classifier.classify_batch('image_folder_path')")
Content is user-generated and unverified.