mirror of
				https://github.com/THU-MIG/yolov10.git
				synced 2025-11-04 17:05:40 +08:00 
			
		
		
		
	Merge different COCO files
This commit is contained in:
		
							parent
							
								
									cd2f79c702
								
							
						
					
					
						commit
						649871d5e6
					
				
							
								
								
									
										223
									
								
								merge_coco.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										223
									
								
								merge_coco.py
									
									
									
									
									
										Normal file
									
								
							@ -0,0 +1,223 @@
 | 
				
			|||||||
 | 
					from pycocotools.coco import COCO
 | 
				
			||||||
 | 
					import json
 | 
				
			||||||
 | 
					import shutil
 | 
				
			||||||
 | 
					from pathlib import Path
 | 
				
			||||||
 | 
					from io import BytesIO
 | 
				
			||||||
 | 
					from tqdm import tqdm
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					category_dict = {
 | 
				
			||||||
 | 
					    0: "box0_small",
 | 
				
			||||||
 | 
					    1: "box0_medium",
 | 
				
			||||||
 | 
					    2: "box0_large",
 | 
				
			||||||
 | 
					    3: "box1_medium",
 | 
				
			||||||
 | 
					    4: "box1_large",
 | 
				
			||||||
 | 
					    5: "box2_medium",
 | 
				
			||||||
 | 
					    6: "box2_large",
 | 
				
			||||||
 | 
					    7: "box3_small",
 | 
				
			||||||
 | 
					    8: "box3_medium",
 | 
				
			||||||
 | 
					    9: "box3_large",
 | 
				
			||||||
 | 
					    10: "cart_0",
 | 
				
			||||||
 | 
					    11: "cart_1",
 | 
				
			||||||
 | 
					    12: "cone_1",
 | 
				
			||||||
 | 
					    13: "traffic cone",
 | 
				
			||||||
 | 
					    14: "crate_0_small",
 | 
				
			||||||
 | 
					    15: "crate_1_small",
 | 
				
			||||||
 | 
					    16: "crate_0_large",
 | 
				
			||||||
 | 
					    17: "crate_1_large",
 | 
				
			||||||
 | 
					    18: "ram",
 | 
				
			||||||
 | 
					    19: "dvere",
 | 
				
			||||||
 | 
					    20: "euro_pallet",
 | 
				
			||||||
 | 
					    21: "shelf",
 | 
				
			||||||
 | 
					    22: "piso mojado",
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def get_key(dictionary, value):
 | 
				
			||||||
 | 
					    return [key for key, val in dictionary.items() if val == value][0]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def convert_coco_json_to_yaml(json_file, output_dir):
 | 
				
			||||||
 | 
					    # Create output directory if it doesn't exist
 | 
				
			||||||
 | 
					    output_dir = Path(output_dir)
 | 
				
			||||||
 | 
					    output_dir.mkdir(parents=True, exist_ok=True)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    # Load JSON data
 | 
				
			||||||
 | 
					    with open(json_file, "r") as f:
 | 
				
			||||||
 | 
					        data = json.load(f)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    # Prepare YAML data
 | 
				
			||||||
 | 
					    yaml_data = {
 | 
				
			||||||
 | 
					        "path": str(Path(json_file).parent),
 | 
				
			||||||
 | 
					        "train": "images/train",
 | 
				
			||||||
 | 
					        "val": "images/val",
 | 
				
			||||||
 | 
					        "test": "",
 | 
				
			||||||
 | 
					        "names": {
 | 
				
			||||||
 | 
					            get_key(category_dict, cat["name"]): cat["name"]
 | 
				
			||||||
 | 
					            for cat in data["categories"]
 | 
				
			||||||
 | 
					        },
 | 
				
			||||||
 | 
					        "nc": len(data["categories"]),
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    # Create image to annotation mapping
 | 
				
			||||||
 | 
					    img_to_anns = {}
 | 
				
			||||||
 | 
					    for ann in data["annotations"]:
 | 
				
			||||||
 | 
					        ann["category_id"] = get_key(category_dict, ann["category_id"])
 | 
				
			||||||
 | 
					        img_id = ann["image_id"]
 | 
				
			||||||
 | 
					        if img_id not in img_to_anns:
 | 
				
			||||||
 | 
					            img_to_anns[img_id] = []
 | 
				
			||||||
 | 
					        img_to_anns[img_id].append(ann)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    # Process annotations
 | 
				
			||||||
 | 
					    for img in tqdm(data["images"], desc="Processing annotations"):
 | 
				
			||||||
 | 
					        img_id = img["id"]
 | 
				
			||||||
 | 
					        filename = img["file_name"]
 | 
				
			||||||
 | 
					        width, height = img["width"], img["height"]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        annotations = []
 | 
				
			||||||
 | 
					        if img_id in img_to_anns:
 | 
				
			||||||
 | 
					            for ann in img_to_anns[img_id]:
 | 
				
			||||||
 | 
					                category_id = ann["category_id"]  # YOLO format uses 0-indexed classes
 | 
				
			||||||
 | 
					                bbox = ann["bbox"]
 | 
				
			||||||
 | 
					                x_center = (bbox[0] + bbox[2] / 2) / width
 | 
				
			||||||
 | 
					                y_center = (bbox[1] + bbox[3] / 2) / height
 | 
				
			||||||
 | 
					                bbox_width = bbox[2] / width
 | 
				
			||||||
 | 
					                bbox_height = bbox[3] / height
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					                annotations.append(
 | 
				
			||||||
 | 
					                    {
 | 
				
			||||||
 | 
					                        "class": category_id,
 | 
				
			||||||
 | 
					                        "x_center": x_center,
 | 
				
			||||||
 | 
					                        "y_center": y_center,
 | 
				
			||||||
 | 
					                        "width": bbox_width,
 | 
				
			||||||
 | 
					                        "height": bbox_height,
 | 
				
			||||||
 | 
					                    }
 | 
				
			||||||
 | 
					                )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        # Write YOLO format annotation file
 | 
				
			||||||
 | 
					        output_file = output_dir / f"{str(img_id)+Path(filename).stem}.txt"
 | 
				
			||||||
 | 
					        with open(output_file, "w") as f:
 | 
				
			||||||
 | 
					            for ann in annotations:
 | 
				
			||||||
 | 
					                f.write(
 | 
				
			||||||
 | 
					                    f"{ann['class']} {ann['x_center']:.6f} {ann['y_center']:.6f} {ann['width']:.6f} {ann['height']:.6f}\n"
 | 
				
			||||||
 | 
					                )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    # Write YAML file
 | 
				
			||||||
 | 
					    yaml_file = output_dir / "dataset_full.yaml"
 | 
				
			||||||
 | 
					    with open(yaml_file, "w") as f:
 | 
				
			||||||
 | 
					        yaml.dump(yaml_data, f, sort_keys=False)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    print(f"Conversion complete. YAML file saved to {yaml_file}")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def merge_coco_json(json_files, output_file, target_image_dir):
 | 
				
			||||||
 | 
					    merged_annotations = {
 | 
				
			||||||
 | 
					        "info": {},
 | 
				
			||||||
 | 
					        "licenses": [],
 | 
				
			||||||
 | 
					        "images": [],
 | 
				
			||||||
 | 
					        "annotations": [],
 | 
				
			||||||
 | 
					        "categories": [],
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    image_id_offset = 0
 | 
				
			||||||
 | 
					    annotation_id_offset = 0
 | 
				
			||||||
 | 
					    category_id_offset = 0
 | 
				
			||||||
 | 
					    existing_category_ids = set()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    for idx, file in enumerate(json_files):
 | 
				
			||||||
 | 
					        # destination_folder =
 | 
				
			||||||
 | 
					        coco = COCO(file)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        # Update image IDs to avoid conflicts
 | 
				
			||||||
 | 
					        for image in coco.dataset["images"]:
 | 
				
			||||||
 | 
					            image["id"] += image_id_offset
 | 
				
			||||||
 | 
					            target_image_name = (
 | 
				
			||||||
 | 
					                target_image_dir
 | 
				
			||||||
 | 
					                + Path(file).parent.stem
 | 
				
			||||||
 | 
					                + Path(image["file_name"]).stem
 | 
				
			||||||
 | 
					                + ".jpg"
 | 
				
			||||||
 | 
					            )
 | 
				
			||||||
 | 
					            image["file_name"] = target_image_name
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					            # shutil.copy2(Path(file).parent / image["file_name"], target_image_name)
 | 
				
			||||||
 | 
					            merged_annotations["images"].append(image)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        # Update annotation IDs to avoid conflicts
 | 
				
			||||||
 | 
					        for annotation in coco.dataset["annotations"]:
 | 
				
			||||||
 | 
					            annotation["id"] += annotation_id_offset
 | 
				
			||||||
 | 
					            annotation["image_id"] += image_id_offset
 | 
				
			||||||
 | 
					            merged_annotations["annotations"].append(annotation)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        # # Update categories and their IDs to avoid conflicts
 | 
				
			||||||
 | 
					        for category in coco.dataset["categories"]:
 | 
				
			||||||
 | 
					            if category["id"] not in existing_category_ids:
 | 
				
			||||||
 | 
					                # category["id"] += category_id_offset
 | 
				
			||||||
 | 
					                merged_annotations["categories"].append(category)
 | 
				
			||||||
 | 
					                existing_category_ids.add(category["id"])
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        image_id_offset = len(merged_annotations["images"])
 | 
				
			||||||
 | 
					        annotation_id_offset = len(merged_annotations["annotations"])
 | 
				
			||||||
 | 
					        category_id_offset = len(merged_annotations["categories"])
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    # Save merged annotations to output file
 | 
				
			||||||
 | 
					    # with open(output_file, "w") as f:
 | 
				
			||||||
 | 
					    #     json.dump(merged_annotations, f)
 | 
				
			||||||
 | 
					    print("Saving merged annotations...")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    # Convert to bytes first
 | 
				
			||||||
 | 
					    json_bytes = json.dumps(merged_annotations).encode("utf-8")
 | 
				
			||||||
 | 
					    total_size = len(json_bytes)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    # Create a byte stream
 | 
				
			||||||
 | 
					    stream = BytesIO(json_bytes)
 | 
				
			||||||
 | 
					    chunk_size_mb = 1  # 1 MB chunks
 | 
				
			||||||
 | 
					    chunk_size = chunk_size_mb * 1024 * 1024
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    with open(output_file, "w") as f:
 | 
				
			||||||
 | 
					        with tqdm(
 | 
				
			||||||
 | 
					            total=total_size, unit="B", unit_scale=True, desc="Writing JSON"
 | 
				
			||||||
 | 
					        ) as pbar:
 | 
				
			||||||
 | 
					            while True:
 | 
				
			||||||
 | 
					                chunk = stream.read(chunk_size).decode("utf-8")
 | 
				
			||||||
 | 
					                if not chunk:
 | 
				
			||||||
 | 
					                    break
 | 
				
			||||||
 | 
					                f.write(chunk)
 | 
				
			||||||
 | 
					                pbar.update(len(chunk.encode("utf-8")))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					if __name__ == "__main__":
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    # List of paths to COCO JSON files to merge
 | 
				
			||||||
 | 
					    # json_files = [
 | 
				
			||||||
 | 
					    #     "/home/vyasd/projects/warehouse-demo/data/data-from-fraunhofer/version_2/warehouse_3002-1280x720/train/train_pbr/000000/scene_gt_coco.json",
 | 
				
			||||||
 | 
					    #     "/home/vyasd/projects/warehouse-demo/data/data-from-fraunhofer/version_2/warehouse_3002-1280x720/train/train_pbr/000001/scene_gt_coco.json",
 | 
				
			||||||
 | 
					    # ]
 | 
				
			||||||
 | 
					    json_files = [
 | 
				
			||||||
 | 
					        # "/home/vyasd/projects/ARCHIVED/warehouse-demo/data/data-from-fraunhofer/version_2/warehouse_3002-1280x720/train/train_pbr/000000/scene_gt_coco.json",
 | 
				
			||||||
 | 
					        # "/home/vyasd/projects/ARCHIVED/warehouse-demo/data/data-from-fraunhofer/version_2/warehouse_3002-1280x720/train/train_pbr/000001/scene_gt_coco.json",
 | 
				
			||||||
 | 
					        # "/home/vyasd/projects/ARCHIVED/warehouse-demo/data/data-from-fraunhofer/version_2/warehouse_3002-1280x720/train/train_pbr/000002/scene_gt_coco.json",
 | 
				
			||||||
 | 
					        # "/home/vyasd/projects/ARCHIVED/warehouse-demo/data/data-from-fraunhofer/version_2/warehouse_3002-1280x720/train/train_pbr/000003/scene_gt_coco.json",
 | 
				
			||||||
 | 
					        # "/home/vyasd/projects/ARCHIVED/warehouse-demo/data/data-from-fraunhofer/version_2/warehouse_3002-1280x720/train/train_pbr/000004/scene_gt_coco.json",
 | 
				
			||||||
 | 
					        # "/home/vyasd/projects/ARCHIVED/warehouse-demo/data/data-from-fraunhofer/version_2/warehouse_3002-1280x720/train/train_pbr/000005/scene_gt_coco.json",
 | 
				
			||||||
 | 
					        # "/home/vyasd/projects/ARCHIVED/warehouse-demo/data/data-from-fraunhofer/version_2/warehouse_3002-1280x720/train/train_pbr/000006/scene_gt_coco.json",
 | 
				
			||||||
 | 
					        # "/home/vyasd/projects/ARCHIVED/warehouse-demo/data/data-from-fraunhofer/version_2/warehouse_3002-1280x720/train/train_pbr/000007/scene_gt_coco.json",
 | 
				
			||||||
 | 
					        # "/home/vyasd/projects/ARCHIVED/warehouse-demo/data/data-from-fraunhofer/version_2/warehouse_3002-1280x720/train/train_pbr/000008/scene_gt_coco.json",
 | 
				
			||||||
 | 
					        "/home/vyasd/projects/ARCHIVED/warehouse-demo/data/data-from-fraunhofer/version_2/warehouse_3002-1280x720/val/train_pbr/000000/scene_gt_coco.json",
 | 
				
			||||||
 | 
					        "/home/vyasd/projects/ARCHIVED/warehouse-demo/data/data-from-fraunhofer/version_2/warehouse_3002-1280x720/val/train_pbr/000001/scene_gt_coco.json",
 | 
				
			||||||
 | 
					        "/home/vyasd/projects/ARCHIVED/warehouse-demo/data/data-from-fraunhofer/version_2/warehouse_3002-1280x720/val/train_pbr/000002/scene_gt_coco.json",
 | 
				
			||||||
 | 
					        "/home/vyasd/projects/ARCHIVED/warehouse-demo/data/data-from-fraunhofer/version_2/warehouse_3002-1280x720/val/train_pbr/000003/scene_gt_coco.json",
 | 
				
			||||||
 | 
					        "/home/vyasd/projects/ARCHIVED/warehouse-demo/data/data-from-fraunhofer/version_2/warehouse_3002-1280x720/val/train_pbr/000004/scene_gt_coco.json",
 | 
				
			||||||
 | 
					        "/home/vyasd/projects/ARCHIVED/warehouse-demo/data/data-from-fraunhofer/version_2/warehouse_3002-1280x720/val/train_pbr/000005/scene_gt_coco.json",
 | 
				
			||||||
 | 
					        "/home/vyasd/projects/ARCHIVED/warehouse-demo/data/data-from-fraunhofer/version_2/warehouse_3002-1280x720/val/train_pbr/000006/scene_gt_coco.json",
 | 
				
			||||||
 | 
					        "/home/vyasd/projects/ARCHIVED/warehouse-demo/data/data-from-fraunhofer/version_2/warehouse_3002-1280x720/val/train_pbr/000007/scene_gt_coco.json",
 | 
				
			||||||
 | 
					        "/home/vyasd/projects/ARCHIVED/warehouse-demo/data/data-from-fraunhofer/version_2/warehouse_3002-1280x720/val/train_pbr/000008/scene_gt_coco.json",
 | 
				
			||||||
 | 
					    ]
 | 
				
			||||||
 | 
					    target_image_dir = "images/val/"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    # Output file path for merged annotations
 | 
				
			||||||
 | 
					    output_file = "./merged_coco_version_2_val_corrected.json"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    # Merge COCO JSON files
 | 
				
			||||||
 | 
					    merge_coco_json(json_files, output_file, target_image_dir)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    print("Merged COCO JSON files saved to", output_file)
 | 
				
			||||||
 | 
					# end main
 | 
				
			||||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user