JdeRobot · rudrakatkar · Apr 7, 2025
diff --git a/demo_run.py b/demo_run.py
@@ -0,0 +1,62 @@
+import torch
+from torchvision.models.detection import maskrcnn_resnet50_fpn
+from torchvision.transforms import functional as F
+from PIL import Image
+import os
+import json
+from detectionmetrics.utils.evaluator import Evaluator
+
+
+class RealModel:
+    def __init__(self):
+        self.model = maskrcnn_resnet50_fpn(pretrained=True)
+        self.model.eval()
+
+    def predict(self, image):
+
+        image_tensor = F.to_tensor(image).unsqueeze(0)  # [1, C, H, W]
+        with torch.no_grad():
+            outputs = self.model(image_tensor)[0]
+
+
+        threshold = 0.5
+        boxes = outputs['boxes'][outputs['scores'] > threshold].tolist()
+        labels = outputs['labels'][outputs['scores'] > threshold].tolist()
+        scores = outputs['scores'][outputs['scores'] > threshold].tolist()
+
+        return {
+            "boxes": boxes,
+            "labels": labels,
+            "scores": scores
+        }
+
+
+class SimpleDataset:
+    def __init__(self, image_dir):
+        self.image_dir = image_dir
+        self.images = [f for f in os.listdir(image_dir) if f.endswith(('.jpg', '.png'))]
+        self.annotations_path = os.path.join(image_dir, "annotations.json")
+        with open(self.annotations_path) as f:
+            self.annotations = json.load(f)
+
+    def __len__(self):
+        return len(self.images)
+
+    def __getitem__(self, idx):
+        image_name = self.images[idx]
+        image_path = os.path.join(self.image_dir, image_name)
+        image = Image.open(image_path).convert("RGB")
+        gt = self.annotations.get(image_name, {})
+        return image, {
+            "boxes": gt.get("boxes", []),
+            "labels": gt.get("labels", [])
+        }
+
+
+model = RealModel()
+dataset = SimpleDataset("sample_data")
+evaluator = Evaluator(model=model, dataset=dataset)
+
+
+metrics = evaluator.evaluate()
+print(metrics)
diff --git a/detectionmetrics/datasets/image_detection_dataset.py b/detectionmetrics/datasets/image_detection_dataset.py
@@ -0,0 +1,49 @@
+import os
+import json
+from PIL import Image
+from torch.utils.data import Dataset
+
+class ImageDetectionDataset(Dataset):
+    def __init__(self, images_dir, annotation_file):
+        self.images_dir = images_dir
+
+        with open(annotation_file, 'r') as f:
+            data = json.load(f)
+
+        self.images = data["images"]
+        self.annotations = data["annotations"]
+        self.categories = data["categories"]
+
+
+        self.image_to_annotations = {}
+        for ann in self.annotations:
+            image_id = ann["image_id"]
+            if image_id not in self.image_to_annotations:
+                self.image_to_annotations[image_id] = []
+            self.image_to_annotations[image_id].append(ann)
+
+    def __len__(self):
+        return len(self.images)
+
+    def __getitem__(self, idx):
+        image_info = self.images[idx]
+        image_path = os.path.join(self.images_dir, image_info["file_name"])
+        image = Image.open(image_path).convert("RGB")
+
+        # Get annotations
+        anns = self.image_to_annotations.get(image_info["id"], [])
+        boxes = []
+        labels = []
+
+        for ann in anns:
+            x, y, w, h = ann["bbox"]
+            boxes.append([x, y, x + w, y + h])
+            labels.append(ann["category_id"])
+
+        ground_truth = {
+            "boxes": boxes,
+            "labels": labels
+        }
+
+        return image, ground_truth
+
diff --git a/detectionmetrics/models/image_detection_model.py b/detectionmetrics/models/image_detection_model.py
@@ -0,0 +1,19 @@
+import torchvision
+import torch
+
+class TorchvisionModel:
+    def __init__(self, device='cpu'):
+        self.device = torch.device(device)
+        self.model = torchvision.models.detection.maskrcnn_resnet50_fpn(weights="DEFAULT")
+        self.model.to(self.device)
+        self.model.eval()
+
+    def predict(self, image):
+        with torch.no_grad():
+            image_tensor = torchvision.transforms.functional.to_tensor(image).to(self.device)
+            output = self.model([image_tensor])[0]
+        return {
+            "boxes": output["boxes"].cpu().tolist(),
+            "labels": output["labels"].cpu().tolist(),
+            "scores": output["scores"].cpu().tolist()
+        }
diff --git a/detectionmetrics/utils/evaluator.py b/detectionmetrics/utils/evaluator.py
@@ -0,0 +1,56 @@
+from detectionmetrics.utils.object_detection_metrics import compute_detection_metrics
+
+class Evaluator:
+    def __init__(self, model, dataset, iou_threshold=0.3):
+        self.model = model
+        self.dataset = dataset
+        self.iou_threshold = iou_threshold
+
+    def evaluate(self):
+        total_tp, total_fp, total_fn = 0, 0, 0
+
+        for i in range(len(self.dataset)):
+            image, ground_truth = self.dataset[i]
+            prediction = self.model.predict(image)
+
+            print(f"\n=== Sample {i} ===")
+            print("Predicted:", prediction)
+            print("Ground Truth:", ground_truth)
+
+            if not prediction or not ground_truth:
+                continue
+
+            # Convert prediction to expected format
+            pred = [
+                {'box': box, 'label': label, 'score': score}
+                for box, label, score in zip(prediction['boxes'], prediction['labels'], prediction['scores'])
+            ]
+
+            # Convert ground truth to expected format
+            gt = [
+                {'box': box, 'label': label}
+                for box, label in zip(ground_truth['boxes'], ground_truth['labels'])
+            ]
+
+            # Compute metrics (IoU prints happen inside this)
+            metrics = compute_detection_metrics(pred, gt, self.iou_threshold)
+            tp = metrics["true_positives"]
+            fp = metrics["false_positives"]
+            fn = metrics["false_negatives"]
+
+            total_tp += tp
+            total_fp += fp
+            total_fn += fn
+
+        precision = total_tp / (total_tp + total_fp) if (total_tp + total_fp) > 0 else 0.0
+        recall = total_tp / (total_tp + total_fn) if (total_tp + total_fn) > 0 else 0.0
+        f1 = 2 * precision * recall / (precision + recall) if (precision + recall) > 0 else 0.0
+
+        return {
+            'precision': precision,
+            'recall': recall,
+            'f1_score': f1,
+            'true_positives': total_tp,
+            'false_positives': total_fp,
+            'false_negatives': total_fn
+        }
diff --git a/detectionmetrics/utils/object_detection_metrics.py b/detectionmetrics/utils/object_detection_metrics.py
@@ -0,0 +1,86 @@
+import torch
+
+def match_predictions_to_ground_truth(pred, gt, iou_threshold=0.5):
+    matched_gt = set()
+    tp = 0  # True Positives
+    fp = 0  # False Positives
+
+    for pred_item in pred:
+        pred_box = pred_item["box"]
+        pred_label = pred_item["label"]
+        match_found = False
+
+        for i, gt_item in enumerate(gt):
+            gt_box = gt_item["box"]
+            gt_label = gt_item["label"]
+
+            if i in matched_gt:
+                continue
+
+            iou = compute_iou(pred_box, gt_box)
+
+            if iou >= iou_threshold and pred_label == gt_label:
+                tp += 1
+                matched_gt.add(i)
+                match_found = True
+                break
+
+        if not match_found:
+            fp += 1
+
+    fn = len(gt) - len(matched_gt)  # False Negatives
+
+    return tp, fp, fn
+
+def compute_detection_metrics(pred, gt, iou_threshold=0.5):
+    tp, fp, fn = match_predictions_to_ground_truth(pred, gt, iou_threshold)
+
+    precision = tp / (tp + fp) if (tp + fp) else 0.0
+    recall = tp / (tp + fn) if (tp + fn) else 0.0
+    f1_score = (
+        2 * precision * recall / (precision + recall) if (precision + recall) else 0.0
+    )
+    return {
+        "precision": precision,
+        "recall": recall,
+        "f1_score": f1_score,
+        "true_positives": tp,
+        "false_positives": fp,
+        "false_negatives": fn,
+    }
+def compute_iou(boxA, boxB):
+    """Computes IoU between two boxes."""
+    xA = max(boxA[0], boxB[0])
+    yA = max(boxA[1], boxB[1])
+    xB = min(boxA[2], boxB[2])
+    yB = min(boxA[3], boxB[3])
+
+    interArea = max(0, xB - xA) * max(0, yB - yA)
+    boxAArea = (boxA[2] - boxA[0]) * (boxA[3] - boxA[1])
+    boxBArea = (boxB[2] - boxB[0]) * (boxB[3] - boxB[1])
+
+    iou = interArea / float(boxAArea + boxBArea - interArea + 1e-6)
+    return iou
+
+def precision_recall(pred_boxes, gt_boxes, iou_threshold=0.5):
+    tp = 0
+    fp = 0
+    matched_gt = set()
+
+    for pred_box in pred_boxes:
+        match_found = False
+        for i, gt_box in enumerate(gt_boxes):
+            if i in matched_gt:
+                continue
+            iou = compute_iou(pred_box, gt_box)
+            if iou >= iou_threshold:
+                tp += 1
+                matched_gt.add(i)
+                match_found = True
+                break
+        if not match_found:
+            fp += 1
+    fn = len(gt_boxes) - tp
+    precision = tp / (tp + fp + 1e-6)
+    recall = tp / (tp + fn + 1e-6)
+    return precision, recall
diff --git a/examples/object_detection_eval.py b/examples/object_detection_eval.py
@@ -0,0 +1,15 @@
+from detectionmetrics.models.image_detection_model import ImageDetectionModel
+from detectionmetrics.datasets.image_detection_dataset import ImageDetectionDataset
+import matplotlib.pyplot as plt
+import torchvision.transforms.functional as F
+
+dataset = ImageDetectionDataset("sample_data/")
+model = ImageDetectionModel()
+
+for image_tensor, path in dataset:
+    prediction = model.predict(image_tensor)
+
+    print(f"Results for {path}:")
+    print("Boxes:", prediction['boxes'])
+    print("Labels:", prediction['labels'])
+    print("Scores:", prediction['scores'])
diff --git a/examples/test_maskrcnn_inference.py b/examples/test_maskrcnn_inference.py
@@ -0,0 +1,27 @@
+import torchvision
+from PIL import Image
+from torchvision import transforms
+import torch
+
+# Load pretrained model (use recommended weight enum)
+weights = torchvision.models.detection.MaskRCNN_ResNet50_FPN_Weights.DEFAULT
+model = torchvision.models.detection.maskrcnn_resnet50_fpn(weights=weights)
+model.eval()
+
+# Load the image from local file
+image_path = "examples/dog.jpg"  # Relative path from project root
+image = Image.open(image_path).convert("RGB")
+
+# Transform image to tensor
+transform = transforms.Compose([transforms.ToTensor()])
+img_tensor = transform(image)
+
+# Run inference
+with torch.no_grad():
+    output = model([img_tensor])[0]
+
+# Print output
+print("Boxes:", output['boxes'])
+print("Labels:", output['labels'])
+print("Scores:", output['scores'])
+
diff --git a/repo_structure.txt b/repo_structure.txt
diff --git a/sample_data/annotations.json b/sample_data/annotations.json
@@ -0,0 +1,6 @@
+{
+  "dog.jpg": {
+    "boxes": [[100, 100, 400, 400], [500, 300, 700, 800]],
+    "labels": [18, 17]
+  }
+}
diff --git a/sample_data/dog.jpg b/sample_data/dog.jpg