Merge different COCO files

This commit is contained in:
dev-vyas-2023 2025-05-07 06:05:24 +02:00
parent cd2f79c702
commit 649871d5e6

223
merge_coco.py Normal file
View File

@ -0,0 +1,223 @@
from pycocotools.coco import COCO
import json
import shutil
from pathlib import Path
from io import BytesIO
from tqdm import tqdm
category_dict = {
0: "box0_small",
1: "box0_medium",
2: "box0_large",
3: "box1_medium",
4: "box1_large",
5: "box2_medium",
6: "box2_large",
7: "box3_small",
8: "box3_medium",
9: "box3_large",
10: "cart_0",
11: "cart_1",
12: "cone_1",
13: "traffic cone",
14: "crate_0_small",
15: "crate_1_small",
16: "crate_0_large",
17: "crate_1_large",
18: "ram",
19: "dvere",
20: "euro_pallet",
21: "shelf",
22: "piso mojado",
}
def get_key(dictionary, value):
return [key for key, val in dictionary.items() if val == value][0]
def convert_coco_json_to_yaml(json_file, output_dir):
# Create output directory if it doesn't exist
output_dir = Path(output_dir)
output_dir.mkdir(parents=True, exist_ok=True)
# Load JSON data
with open(json_file, "r") as f:
data = json.load(f)
# Prepare YAML data
yaml_data = {
"path": str(Path(json_file).parent),
"train": "images/train",
"val": "images/val",
"test": "",
"names": {
get_key(category_dict, cat["name"]): cat["name"]
for cat in data["categories"]
},
"nc": len(data["categories"]),
}
# Create image to annotation mapping
img_to_anns = {}
for ann in data["annotations"]:
ann["category_id"] = get_key(category_dict, ann["category_id"])
img_id = ann["image_id"]
if img_id not in img_to_anns:
img_to_anns[img_id] = []
img_to_anns[img_id].append(ann)
# Process annotations
for img in tqdm(data["images"], desc="Processing annotations"):
img_id = img["id"]
filename = img["file_name"]
width, height = img["width"], img["height"]
annotations = []
if img_id in img_to_anns:
for ann in img_to_anns[img_id]:
category_id = ann["category_id"] # YOLO format uses 0-indexed classes
bbox = ann["bbox"]
x_center = (bbox[0] + bbox[2] / 2) / width
y_center = (bbox[1] + bbox[3] / 2) / height
bbox_width = bbox[2] / width
bbox_height = bbox[3] / height
annotations.append(
{
"class": category_id,
"x_center": x_center,
"y_center": y_center,
"width": bbox_width,
"height": bbox_height,
}
)
# Write YOLO format annotation file
output_file = output_dir / f"{str(img_id)+Path(filename).stem}.txt"
with open(output_file, "w") as f:
for ann in annotations:
f.write(
f"{ann['class']} {ann['x_center']:.6f} {ann['y_center']:.6f} {ann['width']:.6f} {ann['height']:.6f}\n"
)
# Write YAML file
yaml_file = output_dir / "dataset_full.yaml"
with open(yaml_file, "w") as f:
yaml.dump(yaml_data, f, sort_keys=False)
print(f"Conversion complete. YAML file saved to {yaml_file}")
def merge_coco_json(json_files, output_file, target_image_dir):
merged_annotations = {
"info": {},
"licenses": [],
"images": [],
"annotations": [],
"categories": [],
}
image_id_offset = 0
annotation_id_offset = 0
category_id_offset = 0
existing_category_ids = set()
for idx, file in enumerate(json_files):
# destination_folder =
coco = COCO(file)
# Update image IDs to avoid conflicts
for image in coco.dataset["images"]:
image["id"] += image_id_offset
target_image_name = (
target_image_dir
+ Path(file).parent.stem
+ Path(image["file_name"]).stem
+ ".jpg"
)
image["file_name"] = target_image_name
# shutil.copy2(Path(file).parent / image["file_name"], target_image_name)
merged_annotations["images"].append(image)
# Update annotation IDs to avoid conflicts
for annotation in coco.dataset["annotations"]:
annotation["id"] += annotation_id_offset
annotation["image_id"] += image_id_offset
merged_annotations["annotations"].append(annotation)
# # Update categories and their IDs to avoid conflicts
for category in coco.dataset["categories"]:
if category["id"] not in existing_category_ids:
# category["id"] += category_id_offset
merged_annotations["categories"].append(category)
existing_category_ids.add(category["id"])
image_id_offset = len(merged_annotations["images"])
annotation_id_offset = len(merged_annotations["annotations"])
category_id_offset = len(merged_annotations["categories"])
# Save merged annotations to output file
# with open(output_file, "w") as f:
# json.dump(merged_annotations, f)
print("Saving merged annotations...")
# Convert to bytes first
json_bytes = json.dumps(merged_annotations).encode("utf-8")
total_size = len(json_bytes)
# Create a byte stream
stream = BytesIO(json_bytes)
chunk_size_mb = 1 # 1 MB chunks
chunk_size = chunk_size_mb * 1024 * 1024
with open(output_file, "w") as f:
with tqdm(
total=total_size, unit="B", unit_scale=True, desc="Writing JSON"
) as pbar:
while True:
chunk = stream.read(chunk_size).decode("utf-8")
if not chunk:
break
f.write(chunk)
pbar.update(len(chunk.encode("utf-8")))
if __name__ == "__main__":
# List of paths to COCO JSON files to merge
# json_files = [
# "/home/vyasd/projects/warehouse-demo/data/data-from-fraunhofer/version_2/warehouse_3002-1280x720/train/train_pbr/000000/scene_gt_coco.json",
# "/home/vyasd/projects/warehouse-demo/data/data-from-fraunhofer/version_2/warehouse_3002-1280x720/train/train_pbr/000001/scene_gt_coco.json",
# ]
json_files = [
# "/home/vyasd/projects/ARCHIVED/warehouse-demo/data/data-from-fraunhofer/version_2/warehouse_3002-1280x720/train/train_pbr/000000/scene_gt_coco.json",
# "/home/vyasd/projects/ARCHIVED/warehouse-demo/data/data-from-fraunhofer/version_2/warehouse_3002-1280x720/train/train_pbr/000001/scene_gt_coco.json",
# "/home/vyasd/projects/ARCHIVED/warehouse-demo/data/data-from-fraunhofer/version_2/warehouse_3002-1280x720/train/train_pbr/000002/scene_gt_coco.json",
# "/home/vyasd/projects/ARCHIVED/warehouse-demo/data/data-from-fraunhofer/version_2/warehouse_3002-1280x720/train/train_pbr/000003/scene_gt_coco.json",
# "/home/vyasd/projects/ARCHIVED/warehouse-demo/data/data-from-fraunhofer/version_2/warehouse_3002-1280x720/train/train_pbr/000004/scene_gt_coco.json",
# "/home/vyasd/projects/ARCHIVED/warehouse-demo/data/data-from-fraunhofer/version_2/warehouse_3002-1280x720/train/train_pbr/000005/scene_gt_coco.json",
# "/home/vyasd/projects/ARCHIVED/warehouse-demo/data/data-from-fraunhofer/version_2/warehouse_3002-1280x720/train/train_pbr/000006/scene_gt_coco.json",
# "/home/vyasd/projects/ARCHIVED/warehouse-demo/data/data-from-fraunhofer/version_2/warehouse_3002-1280x720/train/train_pbr/000007/scene_gt_coco.json",
# "/home/vyasd/projects/ARCHIVED/warehouse-demo/data/data-from-fraunhofer/version_2/warehouse_3002-1280x720/train/train_pbr/000008/scene_gt_coco.json",
"/home/vyasd/projects/ARCHIVED/warehouse-demo/data/data-from-fraunhofer/version_2/warehouse_3002-1280x720/val/train_pbr/000000/scene_gt_coco.json",
"/home/vyasd/projects/ARCHIVED/warehouse-demo/data/data-from-fraunhofer/version_2/warehouse_3002-1280x720/val/train_pbr/000001/scene_gt_coco.json",
"/home/vyasd/projects/ARCHIVED/warehouse-demo/data/data-from-fraunhofer/version_2/warehouse_3002-1280x720/val/train_pbr/000002/scene_gt_coco.json",
"/home/vyasd/projects/ARCHIVED/warehouse-demo/data/data-from-fraunhofer/version_2/warehouse_3002-1280x720/val/train_pbr/000003/scene_gt_coco.json",
"/home/vyasd/projects/ARCHIVED/warehouse-demo/data/data-from-fraunhofer/version_2/warehouse_3002-1280x720/val/train_pbr/000004/scene_gt_coco.json",
"/home/vyasd/projects/ARCHIVED/warehouse-demo/data/data-from-fraunhofer/version_2/warehouse_3002-1280x720/val/train_pbr/000005/scene_gt_coco.json",
"/home/vyasd/projects/ARCHIVED/warehouse-demo/data/data-from-fraunhofer/version_2/warehouse_3002-1280x720/val/train_pbr/000006/scene_gt_coco.json",
"/home/vyasd/projects/ARCHIVED/warehouse-demo/data/data-from-fraunhofer/version_2/warehouse_3002-1280x720/val/train_pbr/000007/scene_gt_coco.json",
"/home/vyasd/projects/ARCHIVED/warehouse-demo/data/data-from-fraunhofer/version_2/warehouse_3002-1280x720/val/train_pbr/000008/scene_gt_coco.json",
]
target_image_dir = "images/val/"
# Output file path for merged annotations
output_file = "./merged_coco_version_2_val_corrected.json"
# Merge COCO JSON files
merge_coco_json(json_files, output_file, target_image_dir)
print("Merged COCO JSON files saved to", output_file)
# end main