JdeRobot · dpascualhe · Jul 18, 2025 · Jun 5, 2025 · Jun 6, 2025 · Jun 6, 2025
diff --git a/.DS_Store b/.DS_Store
diff --git a/detectionmetrics/datasets/__init__.py b/detectionmetrics/datasets/__init__.py
@@ -16,7 +16,7 @@
 )
 from detectionmetrics.datasets.rugd import RUGDImageSegmentationDataset
 from detectionmetrics.datasets.wildscenes import WildscenesImageSegmentationDataset
-
+from detectionmetrics.datasets.coco import CocoDataset
 
 REGISTRY = {
     "gaia_image_segmentation": GaiaImageSegmentationDataset,
@@ -29,4 +29,5 @@
     "rellis3d_lidar_segmentation": Rellis3DLiDARSegmentationDataset,
     "rugd_image_segmentation": RUGDImageSegmentationDataset,
     "wildscenes_image_segmentation": WildscenesImageSegmentationDataset,
+    "coco_image_detection": CocoDataset,
 }
diff --git a/detectionmetrics/datasets/coco.py b/detectionmetrics/datasets/coco.py
@@ -0,0 +1,122 @@
+from pycocotools.coco import COCO
+import os
+import pandas as pd
+from typing import Tuple, List, Optional
+
+from detectionmetrics.datasets.detection import ImageDetectionDataset
+
+
+def build_coco_dataset(
+    annotation_file: str,
+    image_dir: str,
+    coco_obj: Optional[COCO] = None,
+    split: str = "train",
+) -> Tuple[pd.DataFrame, dict]:
+    """Build dataset and ontology dictionaries from COCO dataset structure
+
+    :param annotation_file: Path to the COCO-format JSON annotation file
+    :type annotation_file: str
+    :param image_dir: Path to the directory containing image files
+    :type image_dir: str
+    :param coco_obj: Optional pre-loaded COCO object to reuse
+    :type coco_obj: COCO
+    :param split: Dataset split name (e.g., "train", "val", "test")
+    :type split: str
+    :return: Dataset DataFrame and ontology dictionary
+    :rtype: Tuple[pd.DataFrame, dict]
+    """
+    # Check that provided paths exist
+    assert os.path.isfile(
+        annotation_file
+    ), f"Annotation file not found: {annotation_file}"
+    assert os.path.isdir(image_dir), f"Image directory not found: {image_dir}"
+
+    # Load COCO annotations (reuse if provided)
+    if coco_obj is None:
+        coco = COCO(annotation_file)
+    else:
+        coco = coco_obj
+
+    # Build ontology from COCO categories
+    ontology = {}
+    for cat in coco.loadCats(coco.getCatIds()):
+        ontology[cat["name"]] = {
+            "idx": cat["id"],
+            # "name": cat["name"],
+            "rgb": [0, 0, 0],  # Placeholder; COCO doesn't define RGB colors
+        }
+
+    # Build dataset DataFrame from COCO image IDs
+    rows = []
+    for img_id in coco.getImgIds():
+        img_info = coco.loadImgs(img_id)[0]
+        rows.append(
+            {
+                "image": img_info["file_name"],
+                "annotation": str(img_id),
+                "split": split,  # Use provided split parameter
+            }
+        )
+
+    dataset = pd.DataFrame(rows)
+    dataset.attrs = {"ontology": ontology}
+
+    return dataset, ontology
+
+
+class CocoDataset(ImageDetectionDataset):
+    """
+    Specific class for COCO-styled object detection datasets.
+
+    :param annotation_file: Path to the COCO-format JSON annotation file
+    :type annotation_file: str
+    :param image_dir: Path to the directory containing image files
+    :type image_dir: str
+    :param split: Dataset split name (e.g., "train", "val", "test")
+    :type split: str
+    """
+
+    def __init__(self, annotation_file: str, image_dir: str, split: str = "train"):
+        # Load COCO object once
+        self.coco = COCO(annotation_file)
+        self.image_dir = image_dir
+        self.split = split
+
+        # Build dataset using the same COCO object and split
+        dataset, ontology = build_coco_dataset(
+            annotation_file, image_dir, self.coco, split=split
+        )
+
+        super().__init__(dataset=dataset, dataset_dir=image_dir, ontology=ontology)
+
+    def read_annotation(
+        self, fname: str
+    ) -> Tuple[List[List[float]], List[int], List[int]]:
+        """Return bounding boxes, labels, and category_ids for a given image ID.
+
+        :param fname: str (image_id in string form)
+        :return: Tuple of (boxes, labels, category_ids)
+        """
+        # Extract image ID (fname might be a path or ID string)
+        try:
+            image_id = int(
+                os.path.basename(fname)
+            )  # handles both '123' and '/path/to/123'
+        except ValueError:
+            raise ValueError(f"Invalid annotation ID: {fname}")
+
+        ann_ids = self.coco.getAnnIds(imgIds=image_id)
+        anns = self.coco.loadAnns(ann_ids)
+
+        boxes = []
+        labels = []
+        category_ids = []
+
+        for ann in anns:
+            # Convert [x, y, width, height] to [x1, y1, x2, y2]
+            x, y, w, h = ann["bbox"]
+            boxes.append([x, y, x + w, y + h])
+            labels.append(ann["category_id"])
+            category_ids.append(ann["category_id"])
+
+        return boxes, labels, category_ids
diff --git a/detectionmetrics/datasets/detection.py b/detectionmetrics/datasets/detection.py
@@ -0,0 +1,110 @@
+from abc import ABC, abstractmethod
+import os
+import shutil
+from typing import List, Optional, Tuple
+from typing_extensions import Self
+
+import cv2
+import numpy as np
+import pandas as pd
+from tqdm import tqdm
+
+from detectionmetrics.datasets.perception import PerceptionDataset
+import detectionmetrics.utils.io as uio
+import detectionmetrics.utils.conversion as uc
+
+
+class DetectionDataset(PerceptionDataset):
+    """Abstract perception detection dataset class."""
+
+    @abstractmethod
+    def read_annotation(self, fname: str):
+        """Read detection annotation from a file.
+
+        :param fname: Annotation file name
+        """
+        raise NotImplementedError
+
+    def get_label_count(self, splits: Optional[List[str]] = None):
+        """Count detection labels per class for given splits.
+
+        :param splits: List of splits to consider
+        :return: Numpy array of label counts per class
+        """
+        if splits is None:
+            splits = ["train", "val"]
+
+        df = self.dataset[self.dataset["split"].isin(splits)]
+        n_classes = max(c["idx"] for c in self.ontology.values()) + 1
+        label_count = np.zeros(n_classes, dtype=np.uint64)
+
+        for annotation_file in tqdm(df["annotation"], desc="Counting labels"):
+            annots = self.read_annotation(annotation_file)
+            for annot in annots:
+                class_idx = annot[
+                    "category_id"
+                ]  # Should override the key category_id if needed in specific dataset class
+                label_count[class_idx] += 1
+
+        return label_count
+
+
+class ImageDetectionDataset(DetectionDataset):
+    """Image detection dataset class."""
+
+    def make_fname_global(self):
+        """Convert relative filenames in 'image' and 'annotation' columns to global paths."""
+        if self.dataset_dir is not None:
+            self.dataset["image"] = self.dataset["image"].apply(
+                lambda x: os.path.join(self.dataset_dir, x) if x is not None else None
+            )
+            self.dataset["annotation"] = self.dataset["annotation"].apply(
+                lambda x: os.path.join(self.dataset_dir, x) if x is not None else None
+            )
+            self.dataset_dir = None
+
+    def read_annotation(self, fname: str):
+        """Read detection annotation from a file.
+
+        Override this based on annotation format (e.g., COCO JSON, XML, TXT).
+
+        :param fname: Annotation filename
+        :return: Parsed annotations (e.g., list of dicts)
+        """
+        # TODO implement COCO or VOC parsing in their classes separately.
+        raise NotImplementedError("Implement annotation reading logic")
+
+
+class LiDARDetectionDataset(DetectionDataset):
+    """LiDAR detection dataset class."""
+
+    def __init__(
+        self,
+        dataset: pd.DataFrame,
+        dataset_dir: str,
+        ontology: dict,
+        is_kitti_format: bool = True,
+    ):
+        super().__init__(dataset, dataset_dir, ontology)
+        self.is_kitti_format = is_kitti_format
+
+    def make_fname_global(self):
+        if self.dataset_dir is not None:
+            self.dataset["points"] = self.dataset["points"].apply(
+                lambda x: os.path.join(self.dataset_dir, x) if x is not None else None
+            )
+            self.dataset["annotation"] = self.dataset["annotation"].apply(
+                lambda x: os.path.join(self.dataset_dir, x) if x is not None else None
+            )
+            self.dataset_dir = None
+
+    def read_annotation(self, fname: str):
+        """Read LiDAR detection annotation.
+
+        For example, read KITTI format label files or custom format.
+
+        :param fname: Annotation file path
+        :return: Parsed annotations (e.g., list of dicts)
+        """
+        # TODO Implement format specific parsing
+        raise NotImplementedError("Implement LiDAR detection annotation reading")
diff --git a/detectionmetrics/datasets/gaia.py b/detectionmetrics/datasets/gaia.py
@@ -3,7 +3,7 @@
 
 import pandas as pd
 
-from detectionmetrics.datasets import dataset as dm_dataset
+from detectionmetrics.datasets import segmentation as dm_segmentation_dataset
 import detectionmetrics.utils.io as uio
 
 
@@ -34,7 +34,7 @@ def build_dataset(dataset_fname: str) -> Tuple[pd.DataFrame, str, dict]:
     return dataset, dataset_dir, ontology
 
 
-class GaiaImageSegmentationDataset(dm_dataset.ImageSegmentationDataset):
+class GaiaImageSegmentationDataset(dm_segmentation_dataset.ImageSegmentationDataset):
     """Specific class for GAIA-styled image segmentation datasets
 
     :param dataset_fname: Parquet dataset filename
@@ -46,7 +46,7 @@ def __init__(self, dataset_fname: str):
         super().__init__(dataset, dataset_dir, ontology)
 
 
-class GaiaLiDARSegmentationDataset(dm_dataset.LiDARSegmentationDataset):
+class GaiaLiDARSegmentationDataset(dm_segmentation_dataset.LiDARSegmentationDataset):
     """Specific class for GAIA-styled LiDAR segmentation datasets
 
     :param dataset_fname: Parquet dataset filename

diff --git a/detectionmetrics/datasets/generic.py b/detectionmetrics/datasets/generic.py
@@ -6,7 +6,7 @@
 
 import pandas as pd
 
-from detectionmetrics.datasets import dataset as dm_dataset
+from detectionmetrics.datasets import segmentation as dm_segmentation_dataset
 import detectionmetrics.utils.io as uio
 
 
@@ -111,7 +111,7 @@ def build_dataset(
     return dataset, ontology
 
 
-class GenericImageSegmentationDataset(dm_dataset.ImageSegmentationDataset):
+class GenericImageSegmentationDataset(dm_segmentation_dataset.ImageSegmentationDataset):
     """Generic class for image segmentation datasets.
 
     :param data_suffix: File suffix to be used to filter data
@@ -160,7 +160,7 @@ def __init__(
         super().__init__(dataset, dataset_dir, ontology)
 
 
-class GenericLiDARSegmentationDataset(dm_dataset.LiDARSegmentationDataset):
+class GenericLiDARSegmentationDataset(dm_segmentation_dataset.LiDARSegmentationDataset):
     """Generic class for LiDAR segmentation datasets.
 
     :param data_suffix: File suffix to be used to filter data

diff --git a/detectionmetrics/datasets/goose.py b/detectionmetrics/datasets/goose.py
@@ -5,7 +5,7 @@
 
 import pandas as pd
 
-from detectionmetrics.datasets import dataset as dm_dataset
+from detectionmetrics.datasets import segmentation as dm_segmentation_dataset
 import detectionmetrics.utils.conversion as uc
 
 
@@ -84,7 +84,7 @@ def build_dataset(
     return dataset, ontology
 
 
-class GOOSEImageSegmentationDataset(dm_dataset.ImageSegmentationDataset):
+class GOOSEImageSegmentationDataset(dm_segmentation_dataset.ImageSegmentationDataset):
     """Specific class for GOOSE-styled image segmentation datasets. All data can be
     downloaded from the official webpage (https://goose-dataset.de):
     train -> https://goose-dataset.de/storage/goose_2d_train.zip
@@ -128,7 +128,7 @@ def __init__(
         super().__init__(dataset, dataset_dir, ontology)
 
 
-class GOOSELiDARSegmentationDataset(dm_dataset.LiDARSegmentationDataset):
+class GOOSELiDARSegmentationDataset(dm_segmentation_dataset.LiDARSegmentationDataset):
     """Specific class for GOOSE-styled LiDAR segmentation datasets. All data can be
     downloaded from the official webpage (https://goose-dataset.de):
     train -> https://goose-dataset.de/storage/goose_3d_train.zip