diff --git a/merge_coco.py b/merge_coco.py new file mode 100644 index 00000000..1e35a8d1 --- /dev/null +++ b/merge_coco.py @@ -0,0 +1,223 @@ +from pycocotools.coco import COCO +import json +import shutil +from pathlib import Path +from io import BytesIO +from tqdm import tqdm + +category_dict = { + 0: "box0_small", + 1: "box0_medium", + 2: "box0_large", + 3: "box1_medium", + 4: "box1_large", + 5: "box2_medium", + 6: "box2_large", + 7: "box3_small", + 8: "box3_medium", + 9: "box3_large", + 10: "cart_0", + 11: "cart_1", + 12: "cone_1", + 13: "traffic cone", + 14: "crate_0_small", + 15: "crate_1_small", + 16: "crate_0_large", + 17: "crate_1_large", + 18: "ram", + 19: "dvere", + 20: "euro_pallet", + 21: "shelf", + 22: "piso mojado", +} + + +def get_key(dictionary, value): + return [key for key, val in dictionary.items() if val == value][0] + + +def convert_coco_json_to_yaml(json_file, output_dir): + # Create output directory if it doesn't exist + output_dir = Path(output_dir) + output_dir.mkdir(parents=True, exist_ok=True) + + # Load JSON data + with open(json_file, "r") as f: + data = json.load(f) + + # Prepare YAML data + yaml_data = { + "path": str(Path(json_file).parent), + "train": "images/train", + "val": "images/val", + "test": "", + "names": { + get_key(category_dict, cat["name"]): cat["name"] + for cat in data["categories"] + }, + "nc": len(data["categories"]), + } + + # Create image to annotation mapping + img_to_anns = {} + for ann in data["annotations"]: + ann["category_id"] = get_key(category_dict, ann["category_id"]) + img_id = ann["image_id"] + if img_id not in img_to_anns: + img_to_anns[img_id] = [] + img_to_anns[img_id].append(ann) + + # Process annotations + for img in tqdm(data["images"], desc="Processing annotations"): + img_id = img["id"] + filename = img["file_name"] + width, height = img["width"], img["height"] + + annotations = [] + if img_id in img_to_anns: + for ann in img_to_anns[img_id]: + category_id = ann["category_id"] # YOLO format uses 0-indexed classes + bbox = ann["bbox"] + x_center = (bbox[0] + bbox[2] / 2) / width + y_center = (bbox[1] + bbox[3] / 2) / height + bbox_width = bbox[2] / width + bbox_height = bbox[3] / height + + annotations.append( + { + "class": category_id, + "x_center": x_center, + "y_center": y_center, + "width": bbox_width, + "height": bbox_height, + } + ) + + # Write YOLO format annotation file + output_file = output_dir / f"{str(img_id)+Path(filename).stem}.txt" + with open(output_file, "w") as f: + for ann in annotations: + f.write( + f"{ann['class']} {ann['x_center']:.6f} {ann['y_center']:.6f} {ann['width']:.6f} {ann['height']:.6f}\n" + ) + + # Write YAML file + yaml_file = output_dir / "dataset_full.yaml" + with open(yaml_file, "w") as f: + yaml.dump(yaml_data, f, sort_keys=False) + + print(f"Conversion complete. YAML file saved to {yaml_file}") + + +def merge_coco_json(json_files, output_file, target_image_dir): + merged_annotations = { + "info": {}, + "licenses": [], + "images": [], + "annotations": [], + "categories": [], + } + + image_id_offset = 0 + annotation_id_offset = 0 + category_id_offset = 0 + existing_category_ids = set() + + for idx, file in enumerate(json_files): + # destination_folder = + coco = COCO(file) + + # Update image IDs to avoid conflicts + for image in coco.dataset["images"]: + image["id"] += image_id_offset + target_image_name = ( + target_image_dir + + Path(file).parent.stem + + Path(image["file_name"]).stem + + ".jpg" + ) + image["file_name"] = target_image_name + + # shutil.copy2(Path(file).parent / image["file_name"], target_image_name) + merged_annotations["images"].append(image) + + # Update annotation IDs to avoid conflicts + for annotation in coco.dataset["annotations"]: + annotation["id"] += annotation_id_offset + annotation["image_id"] += image_id_offset + merged_annotations["annotations"].append(annotation) + + # # Update categories and their IDs to avoid conflicts + for category in coco.dataset["categories"]: + if category["id"] not in existing_category_ids: + # category["id"] += category_id_offset + merged_annotations["categories"].append(category) + existing_category_ids.add(category["id"]) + + image_id_offset = len(merged_annotations["images"]) + annotation_id_offset = len(merged_annotations["annotations"]) + category_id_offset = len(merged_annotations["categories"]) + + # Save merged annotations to output file + # with open(output_file, "w") as f: + # json.dump(merged_annotations, f) + print("Saving merged annotations...") + + # Convert to bytes first + json_bytes = json.dumps(merged_annotations).encode("utf-8") + total_size = len(json_bytes) + + # Create a byte stream + stream = BytesIO(json_bytes) + chunk_size_mb = 1 # 1 MB chunks + chunk_size = chunk_size_mb * 1024 * 1024 + + with open(output_file, "w") as f: + with tqdm( + total=total_size, unit="B", unit_scale=True, desc="Writing JSON" + ) as pbar: + while True: + chunk = stream.read(chunk_size).decode("utf-8") + if not chunk: + break + f.write(chunk) + pbar.update(len(chunk.encode("utf-8"))) + + +if __name__ == "__main__": + + # List of paths to COCO JSON files to merge + # json_files = [ + # "/home/vyasd/projects/warehouse-demo/data/data-from-fraunhofer/version_2/warehouse_3002-1280x720/train/train_pbr/000000/scene_gt_coco.json", + # "/home/vyasd/projects/warehouse-demo/data/data-from-fraunhofer/version_2/warehouse_3002-1280x720/train/train_pbr/000001/scene_gt_coco.json", + # ] + json_files = [ + # "/home/vyasd/projects/ARCHIVED/warehouse-demo/data/data-from-fraunhofer/version_2/warehouse_3002-1280x720/train/train_pbr/000000/scene_gt_coco.json", + # "/home/vyasd/projects/ARCHIVED/warehouse-demo/data/data-from-fraunhofer/version_2/warehouse_3002-1280x720/train/train_pbr/000001/scene_gt_coco.json", + # "/home/vyasd/projects/ARCHIVED/warehouse-demo/data/data-from-fraunhofer/version_2/warehouse_3002-1280x720/train/train_pbr/000002/scene_gt_coco.json", + # "/home/vyasd/projects/ARCHIVED/warehouse-demo/data/data-from-fraunhofer/version_2/warehouse_3002-1280x720/train/train_pbr/000003/scene_gt_coco.json", + # "/home/vyasd/projects/ARCHIVED/warehouse-demo/data/data-from-fraunhofer/version_2/warehouse_3002-1280x720/train/train_pbr/000004/scene_gt_coco.json", + # "/home/vyasd/projects/ARCHIVED/warehouse-demo/data/data-from-fraunhofer/version_2/warehouse_3002-1280x720/train/train_pbr/000005/scene_gt_coco.json", + # "/home/vyasd/projects/ARCHIVED/warehouse-demo/data/data-from-fraunhofer/version_2/warehouse_3002-1280x720/train/train_pbr/000006/scene_gt_coco.json", + # "/home/vyasd/projects/ARCHIVED/warehouse-demo/data/data-from-fraunhofer/version_2/warehouse_3002-1280x720/train/train_pbr/000007/scene_gt_coco.json", + # "/home/vyasd/projects/ARCHIVED/warehouse-demo/data/data-from-fraunhofer/version_2/warehouse_3002-1280x720/train/train_pbr/000008/scene_gt_coco.json", + "/home/vyasd/projects/ARCHIVED/warehouse-demo/data/data-from-fraunhofer/version_2/warehouse_3002-1280x720/val/train_pbr/000000/scene_gt_coco.json", + "/home/vyasd/projects/ARCHIVED/warehouse-demo/data/data-from-fraunhofer/version_2/warehouse_3002-1280x720/val/train_pbr/000001/scene_gt_coco.json", + "/home/vyasd/projects/ARCHIVED/warehouse-demo/data/data-from-fraunhofer/version_2/warehouse_3002-1280x720/val/train_pbr/000002/scene_gt_coco.json", + "/home/vyasd/projects/ARCHIVED/warehouse-demo/data/data-from-fraunhofer/version_2/warehouse_3002-1280x720/val/train_pbr/000003/scene_gt_coco.json", + "/home/vyasd/projects/ARCHIVED/warehouse-demo/data/data-from-fraunhofer/version_2/warehouse_3002-1280x720/val/train_pbr/000004/scene_gt_coco.json", + "/home/vyasd/projects/ARCHIVED/warehouse-demo/data/data-from-fraunhofer/version_2/warehouse_3002-1280x720/val/train_pbr/000005/scene_gt_coco.json", + "/home/vyasd/projects/ARCHIVED/warehouse-demo/data/data-from-fraunhofer/version_2/warehouse_3002-1280x720/val/train_pbr/000006/scene_gt_coco.json", + "/home/vyasd/projects/ARCHIVED/warehouse-demo/data/data-from-fraunhofer/version_2/warehouse_3002-1280x720/val/train_pbr/000007/scene_gt_coco.json", + "/home/vyasd/projects/ARCHIVED/warehouse-demo/data/data-from-fraunhofer/version_2/warehouse_3002-1280x720/val/train_pbr/000008/scene_gt_coco.json", + ] + target_image_dir = "images/val/" + + # Output file path for merged annotations + output_file = "./merged_coco_version_2_val_corrected.json" + + # Merge COCO JSON files + merge_coco_json(json_files, output_file, target_image_dir) + + print("Merged COCO JSON files saved to", output_file) +# end main