Merge different COCO files

2025-09-18 19:16:11 +08:00 · 2025-05-07 06:05:24 +02:00 · 2025-05-07 06:05:24 +02:00 · 649871d5e6
commit 649871d5e6
parent cd2f79c702
1 changed files with 223 additions and 0 deletions
--- a/merge_coco.py
+++ b/merge_coco.py
@ -0,0 +1,223 @@
 from pycocotools.coco import COCO
 import json
 import shutil
 from pathlib import Path
 from io import BytesIO
 from tqdm import tqdm
 category_dict = {
    0: "box0_small",
    1: "box0_medium",
    2: "box0_large",
    3: "box1_medium",
    4: "box1_large",
    5: "box2_medium",
    6: "box2_large",
    7: "box3_small",
    8: "box3_medium",
    9: "box3_large",
    10: "cart_0",
    11: "cart_1",
    12: "cone_1",
    13: "traffic cone",
    14: "crate_0_small",
    15: "crate_1_small",
    16: "crate_0_large",
    17: "crate_1_large",
    18: "ram",
    19: "dvere",
    20: "euro_pallet",
    21: "shelf",
    22: "piso mojado",
 }
 def get_key(dictionary, value):
    return [key for key, val in dictionary.items() if val == value][0]
 def convert_coco_json_to_yaml(json_file, output_dir):
    # Create output directory if it doesn't exist
    output_dir = Path(output_dir)
    output_dir.mkdir(parents=True, exist_ok=True)
    # Load JSON data
    with open(json_file, "r") as f:
        data = json.load(f)
    # Prepare YAML data
    yaml_data = {
        "path": str(Path(json_file).parent),
        "train": "images/train",
        "val": "images/val",
        "test": "",
        "names": {
            get_key(category_dict, cat["name"]): cat["name"]
            for cat in data["categories"]
        },
        "nc": len(data["categories"]),
    }
    # Create image to annotation mapping
    img_to_anns = {}
    for ann in data["annotations"]:
        ann["category_id"] = get_key(category_dict, ann["category_id"])
        img_id = ann["image_id"]
        if img_id not in img_to_anns:
            img_to_anns[img_id] = []
        img_to_anns[img_id].append(ann)
    # Process annotations
    for img in tqdm(data["images"], desc="Processing annotations"):
        img_id = img["id"]
        filename = img["file_name"]
        width, height = img["width"], img["height"]
        annotations = []
        if img_id in img_to_anns:
            for ann in img_to_anns[img_id]:
                category_id = ann["category_id"]  # YOLO format uses 0-indexed classes
                bbox = ann["bbox"]
                x_center = (bbox[0] + bbox[2] / 2) / width
                y_center = (bbox[1] + bbox[3] / 2) / height
                bbox_width = bbox[2] / width
                bbox_height = bbox[3] / height
                annotations.append(
                    {
                        "class": category_id,
                        "x_center": x_center,
                        "y_center": y_center,
                        "width": bbox_width,
                        "height": bbox_height,
                    }
                )
        # Write YOLO format annotation file
        output_file = output_dir / f"{str(img_id)+Path(filename).stem}.txt"
        with open(output_file, "w") as f:
            for ann in annotations:
                f.write(
                    f"{ann['class']} {ann['x_center']:.6f} {ann['y_center']:.6f} {ann['width']:.6f} {ann['height']:.6f}\n"
                )
    # Write YAML file
    yaml_file = output_dir / "dataset_full.yaml"
    with open(yaml_file, "w") as f:
        yaml.dump(yaml_data, f, sort_keys=False)
    print(f"Conversion complete. YAML file saved to {yaml_file}")
 def merge_coco_json(json_files, output_file, target_image_dir):
    merged_annotations = {
        "info": {},
        "licenses": [],
        "images": [],
        "annotations": [],
        "categories": [],
    }
    image_id_offset = 0
    annotation_id_offset = 0
    category_id_offset = 0
    existing_category_ids = set()
    for idx, file in enumerate(json_files):
        # destination_folder =
        coco = COCO(file)
        # Update image IDs to avoid conflicts
        for image in coco.dataset["images"]:
            image["id"] += image_id_offset
            target_image_name = (
                target_image_dir
                + Path(file).parent.stem
                + Path(image["file_name"]).stem
                + ".jpg"
            )
            image["file_name"] = target_image_name
            # shutil.copy2(Path(file).parent / image["file_name"], target_image_name)
            merged_annotations["images"].append(image)
        # Update annotation IDs to avoid conflicts
        for annotation in coco.dataset["annotations"]:
            annotation["id"] += annotation_id_offset
            annotation["image_id"] += image_id_offset
            merged_annotations["annotations"].append(annotation)
        # # Update categories and their IDs to avoid conflicts
        for category in coco.dataset["categories"]:
            if category["id"] not in existing_category_ids:
                # category["id"] += category_id_offset
                merged_annotations["categories"].append(category)
                existing_category_ids.add(category["id"])
        image_id_offset = len(merged_annotations["images"])
        annotation_id_offset = len(merged_annotations["annotations"])
        category_id_offset = len(merged_annotations["categories"])
    # Save merged annotations to output file
    # with open(output_file, "w") as f:
    #     json.dump(merged_annotations, f)
    print("Saving merged annotations...")
    # Convert to bytes first
    json_bytes = json.dumps(merged_annotations).encode("utf-8")
    total_size = len(json_bytes)
    # Create a byte stream
    stream = BytesIO(json_bytes)
    chunk_size_mb = 1  # 1 MB chunks
    chunk_size = chunk_size_mb * 1024 * 1024
    with open(output_file, "w") as f:
        with tqdm(
            total=total_size, unit="B", unit_scale=True, desc="Writing JSON"
        ) as pbar:
            while True:
                chunk = stream.read(chunk_size).decode("utf-8")
                if not chunk:
                    break
                f.write(chunk)
                pbar.update(len(chunk.encode("utf-8")))
 if __name__ == "__main__":
    # List of paths to COCO JSON files to merge
    # json_files = [
    #     "/home/vyasd/projects/warehouse-demo/data/data-from-fraunhofer/version_2/warehouse_3002-1280x720/train/train_pbr/000000/scene_gt_coco.json",
    #     "/home/vyasd/projects/warehouse-demo/data/data-from-fraunhofer/version_2/warehouse_3002-1280x720/train/train_pbr/000001/scene_gt_coco.json",
    # ]
    json_files = [
        # "/home/vyasd/projects/ARCHIVED/warehouse-demo/data/data-from-fraunhofer/version_2/warehouse_3002-1280x720/train/train_pbr/000000/scene_gt_coco.json",
        # "/home/vyasd/projects/ARCHIVED/warehouse-demo/data/data-from-fraunhofer/version_2/warehouse_3002-1280x720/train/train_pbr/000001/scene_gt_coco.json",
        # "/home/vyasd/projects/ARCHIVED/warehouse-demo/data/data-from-fraunhofer/version_2/warehouse_3002-1280x720/train/train_pbr/000002/scene_gt_coco.json",
        # "/home/vyasd/projects/ARCHIVED/warehouse-demo/data/data-from-fraunhofer/version_2/warehouse_3002-1280x720/train/train_pbr/000003/scene_gt_coco.json",
        # "/home/vyasd/projects/ARCHIVED/warehouse-demo/data/data-from-fraunhofer/version_2/warehouse_3002-1280x720/train/train_pbr/000004/scene_gt_coco.json",
        # "/home/vyasd/projects/ARCHIVED/warehouse-demo/data/data-from-fraunhofer/version_2/warehouse_3002-1280x720/train/train_pbr/000005/scene_gt_coco.json",
        # "/home/vyasd/projects/ARCHIVED/warehouse-demo/data/data-from-fraunhofer/version_2/warehouse_3002-1280x720/train/train_pbr/000006/scene_gt_coco.json",
        # "/home/vyasd/projects/ARCHIVED/warehouse-demo/data/data-from-fraunhofer/version_2/warehouse_3002-1280x720/train/train_pbr/000007/scene_gt_coco.json",
        # "/home/vyasd/projects/ARCHIVED/warehouse-demo/data/data-from-fraunhofer/version_2/warehouse_3002-1280x720/train/train_pbr/000008/scene_gt_coco.json",
        "/home/vyasd/projects/ARCHIVED/warehouse-demo/data/data-from-fraunhofer/version_2/warehouse_3002-1280x720/val/train_pbr/000000/scene_gt_coco.json",
        "/home/vyasd/projects/ARCHIVED/warehouse-demo/data/data-from-fraunhofer/version_2/warehouse_3002-1280x720/val/train_pbr/000001/scene_gt_coco.json",
        "/home/vyasd/projects/ARCHIVED/warehouse-demo/data/data-from-fraunhofer/version_2/warehouse_3002-1280x720/val/train_pbr/000002/scene_gt_coco.json",
        "/home/vyasd/projects/ARCHIVED/warehouse-demo/data/data-from-fraunhofer/version_2/warehouse_3002-1280x720/val/train_pbr/000003/scene_gt_coco.json",
        "/home/vyasd/projects/ARCHIVED/warehouse-demo/data/data-from-fraunhofer/version_2/warehouse_3002-1280x720/val/train_pbr/000004/scene_gt_coco.json",
        "/home/vyasd/projects/ARCHIVED/warehouse-demo/data/data-from-fraunhofer/version_2/warehouse_3002-1280x720/val/train_pbr/000005/scene_gt_coco.json",
        "/home/vyasd/projects/ARCHIVED/warehouse-demo/data/data-from-fraunhofer/version_2/warehouse_3002-1280x720/val/train_pbr/000006/scene_gt_coco.json",
        "/home/vyasd/projects/ARCHIVED/warehouse-demo/data/data-from-fraunhofer/version_2/warehouse_3002-1280x720/val/train_pbr/000007/scene_gt_coco.json",
        "/home/vyasd/projects/ARCHIVED/warehouse-demo/data/data-from-fraunhofer/version_2/warehouse_3002-1280x720/val/train_pbr/000008/scene_gt_coco.json",
    ]
    target_image_dir = "images/val/"
    # Output file path for merged annotations
    output_file = "./merged_coco_version_2_val_corrected.json"
    # Merge COCO JSON files
    merge_coco_json(json_files, output_file, target_image_dir)
    print("Merged COCO JSON files saved to", output_file)
 # end main