diff --git a/demo_run.py b/demo_run.py new file mode 100644 index 00000000..a6406bce --- /dev/null +++ b/demo_run.py @@ -0,0 +1,62 @@ +import torch +from torchvision.models.detection import maskrcnn_resnet50_fpn +from torchvision.transforms import functional as F +from PIL import Image +import os +import json +from detectionmetrics.utils.evaluator import Evaluator + + +class RealModel: + def __init__(self): + self.model = maskrcnn_resnet50_fpn(pretrained=True) + self.model.eval() + + def predict(self, image): + + image_tensor = F.to_tensor(image).unsqueeze(0) # [1, C, H, W] + with torch.no_grad(): + outputs = self.model(image_tensor)[0] + + + threshold = 0.5 + boxes = outputs['boxes'][outputs['scores'] > threshold].tolist() + labels = outputs['labels'][outputs['scores'] > threshold].tolist() + scores = outputs['scores'][outputs['scores'] > threshold].tolist() + + return { + "boxes": boxes, + "labels": labels, + "scores": scores + } + + +class SimpleDataset: + def __init__(self, image_dir): + self.image_dir = image_dir + self.images = [f for f in os.listdir(image_dir) if f.endswith(('.jpg', '.png'))] + self.annotations_path = os.path.join(image_dir, "annotations.json") + with open(self.annotations_path) as f: + self.annotations = json.load(f) + + def __len__(self): + return len(self.images) + + def __getitem__(self, idx): + image_name = self.images[idx] + image_path = os.path.join(self.image_dir, image_name) + image = Image.open(image_path).convert("RGB") + gt = self.annotations.get(image_name, {}) + return image, { + "boxes": gt.get("boxes", []), + "labels": gt.get("labels", []) + } + + +model = RealModel() +dataset = SimpleDataset("sample_data") +evaluator = Evaluator(model=model, dataset=dataset) + + +metrics = evaluator.evaluate() +print(metrics) diff --git a/detectionmetrics/datasets/image_detection_dataset.py b/detectionmetrics/datasets/image_detection_dataset.py new file mode 100644 index 00000000..26a01ea3 --- /dev/null +++ b/detectionmetrics/datasets/image_detection_dataset.py @@ -0,0 +1,49 @@ +import os +import json +from PIL import Image +from torch.utils.data import Dataset + +class ImageDetectionDataset(Dataset): + def __init__(self, images_dir, annotation_file): + self.images_dir = images_dir + + with open(annotation_file, 'r') as f: + data = json.load(f) + + self.images = data["images"] + self.annotations = data["annotations"] + self.categories = data["categories"] + + + self.image_to_annotations = {} + for ann in self.annotations: + image_id = ann["image_id"] + if image_id not in self.image_to_annotations: + self.image_to_annotations[image_id] = [] + self.image_to_annotations[image_id].append(ann) + + def __len__(self): + return len(self.images) + + def __getitem__(self, idx): + image_info = self.images[idx] + image_path = os.path.join(self.images_dir, image_info["file_name"]) + image = Image.open(image_path).convert("RGB") + + # Get annotations + anns = self.image_to_annotations.get(image_info["id"], []) + boxes = [] + labels = [] + + for ann in anns: + x, y, w, h = ann["bbox"] + boxes.append([x, y, x + w, y + h]) + labels.append(ann["category_id"]) + + ground_truth = { + "boxes": boxes, + "labels": labels + } + + return image, ground_truth + diff --git a/detectionmetrics/models/image_detection_model.py b/detectionmetrics/models/image_detection_model.py new file mode 100644 index 00000000..acdf58db --- /dev/null +++ b/detectionmetrics/models/image_detection_model.py @@ -0,0 +1,19 @@ +import torchvision +import torch + +class TorchvisionModel: + def __init__(self, device='cpu'): + self.device = torch.device(device) + self.model = torchvision.models.detection.maskrcnn_resnet50_fpn(weights="DEFAULT") + self.model.to(self.device) + self.model.eval() + + def predict(self, image): + with torch.no_grad(): + image_tensor = torchvision.transforms.functional.to_tensor(image).to(self.device) + output = self.model([image_tensor])[0] + return { + "boxes": output["boxes"].cpu().tolist(), + "labels": output["labels"].cpu().tolist(), + "scores": output["scores"].cpu().tolist() + } diff --git a/detectionmetrics/utils/evaluator.py b/detectionmetrics/utils/evaluator.py new file mode 100644 index 00000000..f7590352 --- /dev/null +++ b/detectionmetrics/utils/evaluator.py @@ -0,0 +1,56 @@ +from detectionmetrics.utils.object_detection_metrics import compute_detection_metrics + +class Evaluator: + def __init__(self, model, dataset, iou_threshold=0.3): + self.model = model + self.dataset = dataset + self.iou_threshold = iou_threshold + + def evaluate(self): + total_tp, total_fp, total_fn = 0, 0, 0 + + for i in range(len(self.dataset)): + image, ground_truth = self.dataset[i] + prediction = self.model.predict(image) + + print(f"\n=== Sample {i} ===") + print("Predicted:", prediction) + print("Ground Truth:", ground_truth) + + if not prediction or not ground_truth: + continue + + # Convert prediction to expected format + pred = [ + {'box': box, 'label': label, 'score': score} + for box, label, score in zip(prediction['boxes'], prediction['labels'], prediction['scores']) + ] + + # Convert ground truth to expected format + gt = [ + {'box': box, 'label': label} + for box, label in zip(ground_truth['boxes'], ground_truth['labels']) + ] + + # Compute metrics (IoU prints happen inside this) + metrics = compute_detection_metrics(pred, gt, self.iou_threshold) + tp = metrics["true_positives"] + fp = metrics["false_positives"] + fn = metrics["false_negatives"] + + total_tp += tp + total_fp += fp + total_fn += fn + + precision = total_tp / (total_tp + total_fp) if (total_tp + total_fp) > 0 else 0.0 + recall = total_tp / (total_tp + total_fn) if (total_tp + total_fn) > 0 else 0.0 + f1 = 2 * precision * recall / (precision + recall) if (precision + recall) > 0 else 0.0 + + return { + 'precision': precision, + 'recall': recall, + 'f1_score': f1, + 'true_positives': total_tp, + 'false_positives': total_fp, + 'false_negatives': total_fn + } diff --git a/detectionmetrics/utils/object_detection_metrics.py b/detectionmetrics/utils/object_detection_metrics.py new file mode 100644 index 00000000..c4cce735 --- /dev/null +++ b/detectionmetrics/utils/object_detection_metrics.py @@ -0,0 +1,86 @@ +import torch + +def match_predictions_to_ground_truth(pred, gt, iou_threshold=0.5): + matched_gt = set() + tp = 0 # True Positives + fp = 0 # False Positives + + for pred_item in pred: + pred_box = pred_item["box"] + pred_label = pred_item["label"] + match_found = False + + for i, gt_item in enumerate(gt): + gt_box = gt_item["box"] + gt_label = gt_item["label"] + + if i in matched_gt: + continue + + iou = compute_iou(pred_box, gt_box) + + if iou >= iou_threshold and pred_label == gt_label: + tp += 1 + matched_gt.add(i) + match_found = True + break + + if not match_found: + fp += 1 + + fn = len(gt) - len(matched_gt) # False Negatives + + return tp, fp, fn + +def compute_detection_metrics(pred, gt, iou_threshold=0.5): + tp, fp, fn = match_predictions_to_ground_truth(pred, gt, iou_threshold) + + precision = tp / (tp + fp) if (tp + fp) else 0.0 + recall = tp / (tp + fn) if (tp + fn) else 0.0 + f1_score = ( + 2 * precision * recall / (precision + recall) if (precision + recall) else 0.0 + ) + return { + "precision": precision, + "recall": recall, + "f1_score": f1_score, + "true_positives": tp, + "false_positives": fp, + "false_negatives": fn, + } +def compute_iou(boxA, boxB): + """Computes IoU between two boxes.""" + xA = max(boxA[0], boxB[0]) + yA = max(boxA[1], boxB[1]) + xB = min(boxA[2], boxB[2]) + yB = min(boxA[3], boxB[3]) + + interArea = max(0, xB - xA) * max(0, yB - yA) + boxAArea = (boxA[2] - boxA[0]) * (boxA[3] - boxA[1]) + boxBArea = (boxB[2] - boxB[0]) * (boxB[3] - boxB[1]) + + iou = interArea / float(boxAArea + boxBArea - interArea + 1e-6) + return iou + +def precision_recall(pred_boxes, gt_boxes, iou_threshold=0.5): + tp = 0 + fp = 0 + matched_gt = set() + + for pred_box in pred_boxes: + match_found = False + for i, gt_box in enumerate(gt_boxes): + if i in matched_gt: + continue + iou = compute_iou(pred_box, gt_box) + if iou >= iou_threshold: + tp += 1 + matched_gt.add(i) + match_found = True + break + if not match_found: + fp += 1 + fn = len(gt_boxes) - tp + precision = tp / (tp + fp + 1e-6) + recall = tp / (tp + fn + 1e-6) + return precision, recall diff --git a/examples/object_detection_eval.py b/examples/object_detection_eval.py new file mode 100644 index 00000000..91229580 --- /dev/null +++ b/examples/object_detection_eval.py @@ -0,0 +1,15 @@ +from detectionmetrics.models.image_detection_model import ImageDetectionModel +from detectionmetrics.datasets.image_detection_dataset import ImageDetectionDataset +import matplotlib.pyplot as plt +import torchvision.transforms.functional as F + +dataset = ImageDetectionDataset("sample_data/") +model = ImageDetectionModel() + +for image_tensor, path in dataset: + prediction = model.predict(image_tensor) + + print(f"Results for {path}:") + print("Boxes:", prediction['boxes']) + print("Labels:", prediction['labels']) + print("Scores:", prediction['scores']) diff --git a/examples/test_maskrcnn_inference.py b/examples/test_maskrcnn_inference.py new file mode 100644 index 00000000..427934db --- /dev/null +++ b/examples/test_maskrcnn_inference.py @@ -0,0 +1,27 @@ +import torchvision +from PIL import Image +from torchvision import transforms +import torch + +# Load pretrained model (use recommended weight enum) +weights = torchvision.models.detection.MaskRCNN_ResNet50_FPN_Weights.DEFAULT +model = torchvision.models.detection.maskrcnn_resnet50_fpn(weights=weights) +model.eval() + +# Load the image from local file +image_path = "examples/dog.jpg" # Relative path from project root +image = Image.open(image_path).convert("RGB") + +# Transform image to tensor +transform = transforms.Compose([transforms.ToTensor()]) +img_tensor = transform(image) + +# Run inference +with torch.no_grad(): + output = model([img_tensor])[0] + +# Print output +print("Boxes:", output['boxes']) +print("Labels:", output['labels']) +print("Scores:", output['scores']) + diff --git a/repo_structure.txt b/repo_structure.txt new file mode 100644 index 00000000..fba1441c Binary files /dev/null and b/repo_structure.txt differ diff --git a/sample_data/annotations.json b/sample_data/annotations.json new file mode 100644 index 00000000..829e45da --- /dev/null +++ b/sample_data/annotations.json @@ -0,0 +1,6 @@ +{ + "dog.jpg": { + "boxes": [[100, 100, 400, 400], [500, 300, 700, 800]], + "labels": [18, 17] + } +} diff --git a/sample_data/dog.jpg b/sample_data/dog.jpg new file mode 100644 index 00000000..12f0e0dd Binary files /dev/null and b/sample_data/dog.jpg differ