Skip to content

Add initial support for object detection evaluation #306

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 1 commit into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
62 changes: 62 additions & 0 deletions demo_run.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
import torch
from torchvision.models.detection import maskrcnn_resnet50_fpn
from torchvision.transforms import functional as F
from PIL import Image
import os
import json
from detectionmetrics.utils.evaluator import Evaluator


class RealModel:
def __init__(self):
self.model = maskrcnn_resnet50_fpn(pretrained=True)
self.model.eval()

def predict(self, image):

image_tensor = F.to_tensor(image).unsqueeze(0) # [1, C, H, W]
with torch.no_grad():
outputs = self.model(image_tensor)[0]


threshold = 0.5
boxes = outputs['boxes'][outputs['scores'] > threshold].tolist()
labels = outputs['labels'][outputs['scores'] > threshold].tolist()
scores = outputs['scores'][outputs['scores'] > threshold].tolist()

return {
"boxes": boxes,
"labels": labels,
"scores": scores
}


class SimpleDataset:
def __init__(self, image_dir):
self.image_dir = image_dir
self.images = [f for f in os.listdir(image_dir) if f.endswith(('.jpg', '.png'))]
self.annotations_path = os.path.join(image_dir, "annotations.json")
with open(self.annotations_path) as f:
self.annotations = json.load(f)

def __len__(self):
return len(self.images)

def __getitem__(self, idx):
image_name = self.images[idx]
image_path = os.path.join(self.image_dir, image_name)
image = Image.open(image_path).convert("RGB")
gt = self.annotations.get(image_name, {})
return image, {
"boxes": gt.get("boxes", []),
"labels": gt.get("labels", [])
}


model = RealModel()
dataset = SimpleDataset("sample_data")
evaluator = Evaluator(model=model, dataset=dataset)


metrics = evaluator.evaluate()
print(metrics)
49 changes: 49 additions & 0 deletions detectionmetrics/datasets/image_detection_dataset.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
import os
import json
from PIL import Image
from torch.utils.data import Dataset

class ImageDetectionDataset(Dataset):
def __init__(self, images_dir, annotation_file):
self.images_dir = images_dir

with open(annotation_file, 'r') as f:
data = json.load(f)

self.images = data["images"]
self.annotations = data["annotations"]
self.categories = data["categories"]


self.image_to_annotations = {}
for ann in self.annotations:
image_id = ann["image_id"]
if image_id not in self.image_to_annotations:
self.image_to_annotations[image_id] = []
self.image_to_annotations[image_id].append(ann)

def __len__(self):
return len(self.images)

def __getitem__(self, idx):
image_info = self.images[idx]
image_path = os.path.join(self.images_dir, image_info["file_name"])
image = Image.open(image_path).convert("RGB")

# Get annotations
anns = self.image_to_annotations.get(image_info["id"], [])
boxes = []
labels = []

for ann in anns:
x, y, w, h = ann["bbox"]
boxes.append([x, y, x + w, y + h])
labels.append(ann["category_id"])

ground_truth = {
"boxes": boxes,
"labels": labels
}

return image, ground_truth

19 changes: 19 additions & 0 deletions detectionmetrics/models/image_detection_model.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
import torchvision
import torch

class TorchvisionModel:
def __init__(self, device='cpu'):
self.device = torch.device(device)
self.model = torchvision.models.detection.maskrcnn_resnet50_fpn(weights="DEFAULT")
self.model.to(self.device)
self.model.eval()

def predict(self, image):
with torch.no_grad():
image_tensor = torchvision.transforms.functional.to_tensor(image).to(self.device)
output = self.model([image_tensor])[0]
return {
"boxes": output["boxes"].cpu().tolist(),
"labels": output["labels"].cpu().tolist(),
"scores": output["scores"].cpu().tolist()
}
56 changes: 56 additions & 0 deletions detectionmetrics/utils/evaluator.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
from detectionmetrics.utils.object_detection_metrics import compute_detection_metrics

class Evaluator:
def __init__(self, model, dataset, iou_threshold=0.3):
self.model = model
self.dataset = dataset
self.iou_threshold = iou_threshold

def evaluate(self):
total_tp, total_fp, total_fn = 0, 0, 0

for i in range(len(self.dataset)):
image, ground_truth = self.dataset[i]
prediction = self.model.predict(image)

print(f"\n=== Sample {i} ===")
print("Predicted:", prediction)
print("Ground Truth:", ground_truth)

if not prediction or not ground_truth:
continue

# Convert prediction to expected format
pred = [
{'box': box, 'label': label, 'score': score}
for box, label, score in zip(prediction['boxes'], prediction['labels'], prediction['scores'])
]

# Convert ground truth to expected format
gt = [
{'box': box, 'label': label}
for box, label in zip(ground_truth['boxes'], ground_truth['labels'])
]

# Compute metrics (IoU prints happen inside this)
metrics = compute_detection_metrics(pred, gt, self.iou_threshold)
tp = metrics["true_positives"]
fp = metrics["false_positives"]
fn = metrics["false_negatives"]

total_tp += tp
total_fp += fp
total_fn += fn

precision = total_tp / (total_tp + total_fp) if (total_tp + total_fp) > 0 else 0.0
recall = total_tp / (total_tp + total_fn) if (total_tp + total_fn) > 0 else 0.0
f1 = 2 * precision * recall / (precision + recall) if (precision + recall) > 0 else 0.0

return {
'precision': precision,
'recall': recall,
'f1_score': f1,
'true_positives': total_tp,
'false_positives': total_fp,
'false_negatives': total_fn
}
86 changes: 86 additions & 0 deletions detectionmetrics/utils/object_detection_metrics.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
import torch

def match_predictions_to_ground_truth(pred, gt, iou_threshold=0.5):
matched_gt = set()
tp = 0 # True Positives
fp = 0 # False Positives

for pred_item in pred:
pred_box = pred_item["box"]
pred_label = pred_item["label"]
match_found = False

for i, gt_item in enumerate(gt):
gt_box = gt_item["box"]
gt_label = gt_item["label"]

if i in matched_gt:
continue

iou = compute_iou(pred_box, gt_box)

if iou >= iou_threshold and pred_label == gt_label:
tp += 1
matched_gt.add(i)
match_found = True
break

if not match_found:
fp += 1

fn = len(gt) - len(matched_gt) # False Negatives

return tp, fp, fn

def compute_detection_metrics(pred, gt, iou_threshold=0.5):
tp, fp, fn = match_predictions_to_ground_truth(pred, gt, iou_threshold)

precision = tp / (tp + fp) if (tp + fp) else 0.0
recall = tp / (tp + fn) if (tp + fn) else 0.0
f1_score = (
2 * precision * recall / (precision + recall) if (precision + recall) else 0.0
)
return {
"precision": precision,
"recall": recall,
"f1_score": f1_score,
"true_positives": tp,
"false_positives": fp,
"false_negatives": fn,
}
def compute_iou(boxA, boxB):
"""Computes IoU between two boxes."""
xA = max(boxA[0], boxB[0])
yA = max(boxA[1], boxB[1])
xB = min(boxA[2], boxB[2])
yB = min(boxA[3], boxB[3])

interArea = max(0, xB - xA) * max(0, yB - yA)
boxAArea = (boxA[2] - boxA[0]) * (boxA[3] - boxA[1])
boxBArea = (boxB[2] - boxB[0]) * (boxB[3] - boxB[1])

iou = interArea / float(boxAArea + boxBArea - interArea + 1e-6)
return iou

def precision_recall(pred_boxes, gt_boxes, iou_threshold=0.5):
tp = 0
fp = 0
matched_gt = set()

for pred_box in pred_boxes:
match_found = False
for i, gt_box in enumerate(gt_boxes):
if i in matched_gt:
continue
iou = compute_iou(pred_box, gt_box)
if iou >= iou_threshold:
tp += 1
matched_gt.add(i)
match_found = True
break
if not match_found:
fp += 1
fn = len(gt_boxes) - tp
precision = tp / (tp + fp + 1e-6)
recall = tp / (tp + fn + 1e-6)
return precision, recall
15 changes: 15 additions & 0 deletions examples/object_detection_eval.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
from detectionmetrics.models.image_detection_model import ImageDetectionModel
from detectionmetrics.datasets.image_detection_dataset import ImageDetectionDataset
import matplotlib.pyplot as plt
import torchvision.transforms.functional as F

dataset = ImageDetectionDataset("sample_data/")
model = ImageDetectionModel()

for image_tensor, path in dataset:
prediction = model.predict(image_tensor)

print(f"Results for {path}:")
print("Boxes:", prediction['boxes'])
print("Labels:", prediction['labels'])
print("Scores:", prediction['scores'])
27 changes: 27 additions & 0 deletions examples/test_maskrcnn_inference.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
import torchvision
from PIL import Image
from torchvision import transforms
import torch

# Load pretrained model (use recommended weight enum)
weights = torchvision.models.detection.MaskRCNN_ResNet50_FPN_Weights.DEFAULT
model = torchvision.models.detection.maskrcnn_resnet50_fpn(weights=weights)
model.eval()

# Load the image from local file
image_path = "examples/dog.jpg" # Relative path from project root
image = Image.open(image_path).convert("RGB")

# Transform image to tensor
transform = transforms.Compose([transforms.ToTensor()])
img_tensor = transform(image)

# Run inference
with torch.no_grad():
output = model([img_tensor])[0]

# Print output
print("Boxes:", output['boxes'])
print("Labels:", output['labels'])
print("Scores:", output['scores'])

Binary file added repo_structure.txt
Binary file not shown.
6 changes: 6 additions & 0 deletions sample_data/annotations.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
{
"dog.jpg": {
"boxes": [[100, 100, 400, 400], [500, 300, 700, 800]],
"labels": [18, 17]
}
}
Binary file added sample_data/dog.jpg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.