Merge 84128a569e3d21b132163ed8ff713262680a0441 into 453c6e38a51e9d1d5a2aa5fb7f1014a711913397

2025-10-26 19:25:39 +08:00 · 2025-03-14 10:54:32 +08:00 · 2025-03-14 10:54:32 +08:00 · ac85783c27
commit ac85783c27
parent 453c6e38a5 84128a569e
52 changed files with 771 additions and 4497 deletions
--- a/ultralytics/init.py
+++ b/ultralytics/init.py
@ -3,7 +3,7 @@
 __version__ = "8.1.34"
 from ultralytics.data.explorer.explorer import Explorer
-from ultralytics.models import RTDETR, SAM, YOLO, YOLOWorld, YOLOv10
+from ultralytics.models import RTDETR, SAM, YOLO, YOLOWorld, YOLOv10,YOLOv10Seg
 from ultralytics.models.fastsam import FastSAM
 from ultralytics.models.nas import NAS
 from ultralytics.utils import ASSETS, SETTINGS as settings
--- a/ultralytics/assets/bus.jpg
+++ b/ultralytics/assets/bus.jpg
--- a/ultralytics/assets/zidane.jpg
+++ b/ultralytics/assets/zidane.jpg
--- a/ultralytics/cfg/datasets/Argoverse.yaml
+++ b/ultralytics/cfg/datasets/Argoverse.yaml
@ -1,74 +0,0 @@
 # Ultralytics YOLO 🚀, AGPL-3.0 license
 # Argoverse-HD dataset (ring-front-center camera) https://www.cs.cmu.edu/~mengtial/proj/streaming/ by Argo AI
 # Documentation: https://docs.ultralytics.com/datasets/detect/argoverse/
 # Example usage: yolo train data=Argoverse.yaml
 # parent
 # ├── ultralytics
 # └── datasets
 #     └── Argoverse  ← downloads here (31.5 GB)
 # Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
 path: ../datasets/Argoverse # dataset root dir
 train: Argoverse-1.1/images/train/ # train images (relative to 'path') 39384 images
 val: Argoverse-1.1/images/val/ # val images (relative to 'path') 15062 images
 test: Argoverse-1.1/images/test/ # test images (optional) https://eval.ai/web/challenges/challenge-page/800/overview
 # Classes
 names:
  0: person
  1: bicycle
  2: car
  3: motorcycle
  4: bus
  5: truck
  6: traffic_light
  7: stop_sign
 # Download script/URL (optional) ---------------------------------------------------------------------------------------
 download: |
  import json
  from tqdm import tqdm
  from ultralytics.utils.downloads import download
  from pathlib import Path
  def argoverse2yolo(set):
      labels = {}
      a = json.load(open(set, "rb"))
      for annot in tqdm(a['annotations'], desc=f"Converting {set} to YOLOv5 format..."):
          img_id = annot['image_id']
          img_name = a['images'][img_id]['name']
          img_label_name = f'{img_name[:-3]}txt'
          cls = annot['category_id']  # instance class id
          x_center, y_center, width, height = annot['bbox']
          x_center = (x_center + width / 2) / 1920.0  # offset and scale
          y_center = (y_center + height / 2) / 1200.0  # offset and scale
          width /= 1920.0  # scale
          height /= 1200.0  # scale
          img_dir = set.parents[2] / 'Argoverse-1.1' / 'labels' / a['seq_dirs'][a['images'][annot['image_id']]['sid']]
          if not img_dir.exists():
              img_dir.mkdir(parents=True, exist_ok=True)
          k = str(img_dir / img_label_name)
          if k not in labels:
              labels[k] = []
          labels[k].append(f"{cls} {x_center} {y_center} {width} {height}\n")
      for k in labels:
          with open(k, "w") as f:
              f.writelines(labels[k])
  # Download 'https://argoverse-hd.s3.us-east-2.amazonaws.com/Argoverse-HD-Full.zip' (deprecated S3 link)
  dir = Path(yaml['path'])  # dataset root dir
  urls = ['https://drive.google.com/file/d/1st9qW3BeIwQsnR0t8mRpvbsSWIo16ACi/view?usp=drive_link']
  print("\n\nWARNING: Argoverse dataset MUST be downloaded manually, autodownload will NOT work.")
  print(f"WARNING: Manually download Argoverse dataset '{urls[0]}' to '{dir}' and re-run your command.\n\n")
  # download(urls, dir=dir)
  # Convert
  annotations_dir = 'Argoverse-HD/annotations/'
  (dir / 'Argoverse-1.1' / 'tracking').rename(dir / 'Argoverse-1.1' / 'images')  # rename 'tracking' to 'images'
  for d in "train.json", "val.json":
      argoverse2yolo(dir / annotations_dir / d)  # convert Argoverse annotations to YOLO labels
--- a/ultralytics/cfg/datasets/DOTAv1.5.yaml
+++ b/ultralytics/cfg/datasets/DOTAv1.5.yaml
@ -1,36 +0,0 @@
 # Ultralytics YOLO 🚀, AGPL-3.0 license
 # DOTA 1.5 dataset https://captain-whu.github.io/DOTA/index.html for object detection in aerial images by Wuhan University
 # Documentation: https://docs.ultralytics.com/datasets/obb/dota-v2/
 # Example usage: yolo train model=yolov8n-obb.pt data=DOTAv1.5.yaml
 # parent
 # ├── ultralytics
 # └── datasets
 #     └── dota1.5  ← downloads here (2GB)
 # Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
 path: ../datasets/DOTAv1.5 # dataset root dir
 train: images/train # train images (relative to 'path') 1411 images
 val: images/val # val images (relative to 'path') 458 images
 test: images/test # test images (optional) 937 images
 # Classes for DOTA 1.5
 names:
  0: plane
  1: ship
  2: storage tank
  3: baseball diamond
  4: tennis court
  5: basketball court
  6: ground track field
  7: harbor
  8: bridge
  9: large vehicle
  10: small vehicle
  11: helicopter
  12: roundabout
  13: soccer ball field
  14: swimming pool
  15: container crane
 # Download script/URL (optional)
 download: https://github.com/ultralytics/yolov5/releases/download/v1.0/DOTAv1.5.zip
--- a/ultralytics/cfg/datasets/DOTAv1.yaml
+++ b/ultralytics/cfg/datasets/DOTAv1.yaml
@ -1,35 +0,0 @@
 # Ultralytics YOLO 🚀, AGPL-3.0 license
 # DOTA 1.0 dataset https://captain-whu.github.io/DOTA/index.html for object detection in aerial images by Wuhan University
 # Documentation: https://docs.ultralytics.com/datasets/obb/dota-v2/
 # Example usage: yolo train model=yolov8n-obb.pt data=DOTAv1.yaml
 # parent
 # ├── ultralytics
 # └── datasets
 #     └── dota1  ← downloads here (2GB)
 # Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
 path: ../datasets/DOTAv1 # dataset root dir
 train: images/train # train images (relative to 'path') 1411 images
 val: images/val # val images (relative to 'path') 458 images
 test: images/test # test images (optional) 937 images
 # Classes for DOTA 1.0
 names:
  0: plane
  1: ship
  2: storage tank
  3: baseball diamond
  4: tennis court
  5: basketball court
  6: ground track field
  7: harbor
  8: bridge
  9: large vehicle
  10: small vehicle
  11: helicopter
  12: roundabout
  13: soccer ball field
  14: swimming pool
 # Download script/URL (optional)
 download: https://github.com/ultralytics/yolov5/releases/download/v1.0/DOTAv1.zip
--- a/ultralytics/cfg/datasets/GlobalWheat2020.yaml
+++ b/ultralytics/cfg/datasets/GlobalWheat2020.yaml
@ -1,53 +0,0 @@
 # Ultralytics YOLO 🚀, AGPL-3.0 license
 # Global Wheat 2020 dataset https://www.global-wheat.com/ by University of Saskatchewan
 # Documentation: https://docs.ultralytics.com/datasets/detect/globalwheat2020/
 # Example usage: yolo train data=GlobalWheat2020.yaml
 # parent
 # ├── ultralytics
 # └── datasets
 #     └── GlobalWheat2020  ← downloads here (7.0 GB)
 # Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
 path: ../datasets/GlobalWheat2020 # dataset root dir
 train: # train images (relative to 'path') 3422 images
  - images/arvalis_1
  - images/arvalis_2
  - images/arvalis_3
  - images/ethz_1
  - images/rres_1
  - images/inrae_1
  - images/usask_1
 val: # val images (relative to 'path') 748 images (WARNING: train set contains ethz_1)
  - images/ethz_1
 test: # test images (optional) 1276 images
  - images/utokyo_1
  - images/utokyo_2
  - images/nau_1
  - images/uq_1
 # Classes
 names:
  0: wheat_head
 # Download script/URL (optional) ---------------------------------------------------------------------------------------
 download: |
  from ultralytics.utils.downloads import download
  from pathlib import Path
  # Download
  dir = Path(yaml['path'])  # dataset root dir
  urls = ['https://zenodo.org/record/4298502/files/global-wheat-codalab-official.zip',
          'https://github.com/ultralytics/yolov5/releases/download/v1.0/GlobalWheat2020_labels.zip']
  download(urls, dir=dir)
  # Make Directories
  for p in 'annotations', 'images', 'labels':
      (dir / p).mkdir(parents=True, exist_ok=True)
  # Move
  for p in 'arvalis_1', 'arvalis_2', 'arvalis_3', 'ethz_1', 'rres_1', 'inrae_1', 'usask_1', \
           'utokyo_1', 'utokyo_2', 'nau_1', 'uq_1':
      (dir / 'global-wheat-codalab-official' / p).rename(dir / 'images' / p)  # move to /images
      f = (dir / 'global-wheat-codalab-official' / p).with_suffix('.json')  # json file
      if f.exists():
          f.rename((dir / 'annotations' / p).with_suffix('.json'))  # move to /annotations
--- a/ultralytics/cfg/datasets/ImageNet.yaml
+++ b/ultralytics/cfg/datasets/ImageNet.yaml
--- a/ultralytics/cfg/datasets/Objects365.yaml
+++ b/ultralytics/cfg/datasets/Objects365.yaml
@ -1,442 +0,0 @@
 # Ultralytics YOLO 🚀, AGPL-3.0 license
 # Objects365 dataset https://www.objects365.org/ by Megvii
 # Documentation: https://docs.ultralytics.com/datasets/detect/objects365/
 # Example usage: yolo train data=Objects365.yaml
 # parent
 # ├── ultralytics
 # └── datasets
 #     └── Objects365  ← downloads here (712 GB = 367G data + 345G zips)
 # Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
 path: ../datasets/Objects365 # dataset root dir
 train: images/train # train images (relative to 'path') 1742289 images
 val: images/val # val images (relative to 'path') 80000 images
 test: # test images (optional)
 # Classes
 names:
  0: Person
  1: Sneakers
  2: Chair
  3: Other Shoes
  4: Hat
  5: Car
  6: Lamp
  7: Glasses
  8: Bottle
  9: Desk
  10: Cup
  11: Street Lights
  12: Cabinet/shelf
  13: Handbag/Satchel
  14: Bracelet
  15: Plate
  16: Picture/Frame
  17: Helmet
  18: Book
  19: Gloves
  20: Storage box
  21: Boat
  22: Leather Shoes
  23: Flower
  24: Bench
  25: Potted Plant
  26: Bowl/Basin
  27: Flag
  28: Pillow
  29: Boots
  30: Vase
  31: Microphone
  32: Necklace
  33: Ring
  34: SUV
  35: Wine Glass
  36: Belt
  37: Monitor/TV
  38: Backpack
  39: Umbrella
  40: Traffic Light
  41: Speaker
  42: Watch
  43: Tie
  44: Trash bin Can
  45: Slippers
  46: Bicycle
  47: Stool
  48: Barrel/bucket
  49: Van
  50: Couch
  51: Sandals
  52: Basket
  53: Drum
  54: Pen/Pencil
  55: Bus
  56: Wild Bird
  57: High Heels
  58: Motorcycle
  59: Guitar
  60: Carpet
  61: Cell Phone
  62: Bread
  63: Camera
  64: Canned
  65: Truck
  66: Traffic cone
  67: Cymbal
  68: Lifesaver
  69: Towel
  70: Stuffed Toy
  71: Candle
  72: Sailboat
  73: Laptop
  74: Awning
  75: Bed
  76: Faucet
  77: Tent
  78: Horse
  79: Mirror
  80: Power outlet
  81: Sink
  82: Apple
  83: Air Conditioner
  84: Knife
  85: Hockey Stick
  86: Paddle
  87: Pickup Truck
  88: Fork
  89: Traffic Sign
  90: Balloon
  91: Tripod
  92: Dog
  93: Spoon
  94: Clock
  95: Pot
  96: Cow
  97: Cake
  98: Dinning Table
  99: Sheep
  100: Hanger
  101: Blackboard/Whiteboard
  102: Napkin
  103: Other Fish
  104: Orange/Tangerine
  105: Toiletry
  106: Keyboard
  107: Tomato
  108: Lantern
  109: Machinery Vehicle
  110: Fan
  111: Green Vegetables
  112: Banana
  113: Baseball Glove
  114: Airplane
  115: Mouse
  116: Train
  117: Pumpkin
  118: Soccer
  119: Skiboard
  120: Luggage
  121: Nightstand
  122: Tea pot
  123: Telephone
  124: Trolley
  125: Head Phone
  126: Sports Car
  127: Stop Sign
  128: Dessert
  129: Scooter
  130: Stroller
  131: Crane
  132: Remote
  133: Refrigerator
  134: Oven
  135: Lemon
  136: Duck
  137: Baseball Bat
  138: Surveillance Camera
  139: Cat
  140: Jug
  141: Broccoli
  142: Piano
  143: Pizza
  144: Elephant
  145: Skateboard
  146: Surfboard
  147: Gun
  148: Skating and Skiing shoes
  149: Gas stove
  150: Donut
  151: Bow Tie
  152: Carrot
  153: Toilet
  154: Kite
  155: Strawberry
  156: Other Balls
  157: Shovel
  158: Pepper
  159: Computer Box
  160: Toilet Paper
  161: Cleaning Products
  162: Chopsticks
  163: Microwave
  164: Pigeon
  165: Baseball
  166: Cutting/chopping Board
  167: Coffee Table
  168: Side Table
  169: Scissors
  170: Marker
  171: Pie
  172: Ladder
  173: Snowboard
  174: Cookies
  175: Radiator
  176: Fire Hydrant
  177: Basketball
  178: Zebra
  179: Grape
  180: Giraffe
  181: Potato
  182: Sausage
  183: Tricycle
  184: Violin
  185: Egg
  186: Fire Extinguisher
  187: Candy
  188: Fire Truck
  189: Billiards
  190: Converter
  191: Bathtub
  192: Wheelchair
  193: Golf Club
  194: Briefcase
  195: Cucumber
  196: Cigar/Cigarette
  197: Paint Brush
  198: Pear
  199: Heavy Truck
  200: Hamburger
  201: Extractor
  202: Extension Cord
  203: Tong
  204: Tennis Racket
  205: Folder
  206: American Football
  207: earphone
  208: Mask
  209: Kettle
  210: Tennis
  211: Ship
  212: Swing
  213: Coffee Machine
  214: Slide
  215: Carriage
  216: Onion
  217: Green beans
  218: Projector
  219: Frisbee
  220: Washing Machine/Drying Machine
  221: Chicken
  222: Printer
  223: Watermelon
  224: Saxophone
  225: Tissue
  226: Toothbrush
  227: Ice cream
  228: Hot-air balloon
  229: Cello
  230: French Fries
  231: Scale
  232: Trophy
  233: Cabbage
  234: Hot dog
  235: Blender
  236: Peach
  237: Rice
  238: Wallet/Purse
  239: Volleyball
  240: Deer
  241: Goose
  242: Tape
  243: Tablet
  244: Cosmetics
  245: Trumpet
  246: Pineapple
  247: Golf Ball
  248: Ambulance
  249: Parking meter
  250: Mango
  251: Key
  252: Hurdle
  253: Fishing Rod
  254: Medal
  255: Flute
  256: Brush
  257: Penguin
  258: Megaphone
  259: Corn
  260: Lettuce
  261: Garlic
  262: Swan
  263: Helicopter
  264: Green Onion
  265: Sandwich
  266: Nuts
  267: Speed Limit Sign
  268: Induction Cooker
  269: Broom
  270: Trombone
  271: Plum
  272: Rickshaw
  273: Goldfish
  274: Kiwi fruit
  275: Router/modem
  276: Poker Card
  277: Toaster
  278: Shrimp
  279: Sushi
  280: Cheese
  281: Notepaper
  282: Cherry
  283: Pliers
  284: CD
  285: Pasta
  286: Hammer
  287: Cue
  288: Avocado
  289: Hamimelon
  290: Flask
  291: Mushroom
  292: Screwdriver
  293: Soap
  294: Recorder
  295: Bear
  296: Eggplant
  297: Board Eraser
  298: Coconut
  299: Tape Measure/Ruler
  300: Pig
  301: Showerhead
  302: Globe
  303: Chips
  304: Steak
  305: Crosswalk Sign
  306: Stapler
  307: Camel
  308: Formula 1
  309: Pomegranate
  310: Dishwasher
  311: Crab
  312: Hoverboard
  313: Meat ball
  314: Rice Cooker
  315: Tuba
  316: Calculator
  317: Papaya
  318: Antelope
  319: Parrot
  320: Seal
  321: Butterfly
  322: Dumbbell
  323: Donkey
  324: Lion
  325: Urinal
  326: Dolphin
  327: Electric Drill
  328: Hair Dryer
  329: Egg tart
  330: Jellyfish
  331: Treadmill
  332: Lighter
  333: Grapefruit
  334: Game board
  335: Mop
  336: Radish
  337: Baozi
  338: Target
  339: French
  340: Spring Rolls
  341: Monkey
  342: Rabbit
  343: Pencil Case
  344: Yak
  345: Red Cabbage
  346: Binoculars
  347: Asparagus
  348: Barbell
  349: Scallop
  350: Noddles
  351: Comb
  352: Dumpling
  353: Oyster
  354: Table Tennis paddle
  355: Cosmetics Brush/Eyeliner Pencil
  356: Chainsaw
  357: Eraser
  358: Lobster
  359: Durian
  360: Okra
  361: Lipstick
  362: Cosmetics Mirror
  363: Curling
  364: Table Tennis
 # Download script/URL (optional) ---------------------------------------------------------------------------------------
 download: |
  from tqdm import tqdm
  from ultralytics.utils.checks import check_requirements
  from ultralytics.utils.downloads import download
  from ultralytics.utils.ops import xyxy2xywhn
  import numpy as np
  from pathlib import Path
  check_requirements(('pycocotools>=2.0',))
  from pycocotools.coco import COCO
  # Make Directories
  dir = Path(yaml['path'])  # dataset root dir
  for p in 'images', 'labels':
      (dir / p).mkdir(parents=True, exist_ok=True)
      for q in 'train', 'val':
          (dir / p / q).mkdir(parents=True, exist_ok=True)
  # Train, Val Splits
  for split, patches in [('train', 50 + 1), ('val', 43 + 1)]:
      print(f"Processing {split} in {patches} patches ...")
      images, labels = dir / 'images' / split, dir / 'labels' / split
      # Download
      url = f"https://dorc.ks3-cn-beijing.ksyun.com/data-set/2020Objects365%E6%95%B0%E6%8D%AE%E9%9B%86/{split}/"
      if split == 'train':
          download([f'{url}zhiyuan_objv2_{split}.tar.gz'], dir=dir)  # annotations json
          download([f'{url}patch{i}.tar.gz' for i in range(patches)], dir=images, curl=True, threads=8)
      elif split == 'val':
          download([f'{url}zhiyuan_objv2_{split}.json'], dir=dir)  # annotations json
          download([f'{url}images/v1/patch{i}.tar.gz' for i in range(15 + 1)], dir=images, curl=True, threads=8)
          download([f'{url}images/v2/patch{i}.tar.gz' for i in range(16, patches)], dir=images, curl=True, threads=8)
      # Move
      for f in tqdm(images.rglob('*.jpg'), desc=f'Moving {split} images'):
          f.rename(images / f.name)  # move to /images/{split}
      # Labels
      coco = COCO(dir / f'zhiyuan_objv2_{split}.json')
      names = [x["name"] for x in coco.loadCats(coco.getCatIds())]
      for cid, cat in enumerate(names):
          catIds = coco.getCatIds(catNms=[cat])
          imgIds = coco.getImgIds(catIds=catIds)
          for im in tqdm(coco.loadImgs(imgIds), desc=f'Class {cid + 1}/{len(names)} {cat}'):
              width, height = im["width"], im["height"]
              path = Path(im["file_name"])  # image filename
              try:
                  with open(labels / path.with_suffix('.txt').name, 'a') as file:
                      annIds = coco.getAnnIds(imgIds=im["id"], catIds=catIds, iscrowd=None)
                      for a in coco.loadAnns(annIds):
                          x, y, w, h = a['bbox']  # bounding box in xywh (xy top-left corner)
                          xyxy = np.array([x, y, x + w, y + h])[None]  # pixels(1,4)
                          x, y, w, h = xyxy2xywhn(xyxy, w=width, h=height, clip=True)[0]  # normalized and clipped
                          file.write(f"{cid} {x:.5f} {y:.5f} {w:.5f} {h:.5f}\n")
              except Exception as e:
                  print(e)
--- a/ultralytics/cfg/datasets/SKU-110K.yaml
+++ b/ultralytics/cfg/datasets/SKU-110K.yaml
@ -1,57 +0,0 @@
 # Ultralytics YOLO 🚀, AGPL-3.0 license
 # SKU-110K retail items dataset https://github.com/eg4000/SKU110K_CVPR19 by Trax Retail
 # Documentation: https://docs.ultralytics.com/datasets/detect/sku-110k/
 # Example usage: yolo train data=SKU-110K.yaml
 # parent
 # ├── ultralytics
 # └── datasets
 #     └── SKU-110K  ← downloads here (13.6 GB)
 # Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
 path: ../datasets/SKU-110K # dataset root dir
 train: train.txt # train images (relative to 'path')  8219 images
 val: val.txt # val images (relative to 'path')  588 images
 test: test.txt # test images (optional)  2936 images
 # Classes
 names:
  0: object
 # Download script/URL (optional) ---------------------------------------------------------------------------------------
 download: |
  import shutil
  from pathlib import Path
  import numpy as np
  import pandas as pd
  from tqdm import tqdm
  from ultralytics.utils.downloads import download
  from ultralytics.utils.ops import xyxy2xywh
  # Download
  dir = Path(yaml['path'])  # dataset root dir
  parent = Path(dir.parent)  # download dir
  urls = ['http://trax-geometry.s3.amazonaws.com/cvpr_challenge/SKU110K_fixed.tar.gz']
  download(urls, dir=parent)
  # Rename directories
  if dir.exists():
      shutil.rmtree(dir)
  (parent / 'SKU110K_fixed').rename(dir)  # rename dir
  (dir / 'labels').mkdir(parents=True, exist_ok=True)  # create labels dir
  # Convert labels
  names = 'image', 'x1', 'y1', 'x2', 'y2', 'class', 'image_width', 'image_height'  # column names
  for d in 'annotations_train.csv', 'annotations_val.csv', 'annotations_test.csv':
      x = pd.read_csv(dir / 'annotations' / d, names=names).values  # annotations
      images, unique_images = x[:, 0], np.unique(x[:, 0])
      with open((dir / d).with_suffix('.txt').__str__().replace('annotations_', ''), 'w') as f:
          f.writelines(f'./images/{s}\n' for s in unique_images)
      for im in tqdm(unique_images, desc=f'Converting {dir / d}'):
          cls = 0  # single-class dataset
          with open((dir / 'labels' / im).with_suffix('.txt'), 'a') as f:
              for r in x[images == im]:
                  w, h = r[6], r[7]  # image width, height
                  xywh = xyxy2xywh(np.array([[r[1] / w, r[2] / h, r[3] / w, r[4] / h]]))[0]  # instance
                  f.write(f"{cls} {xywh[0]:.5f} {xywh[1]:.5f} {xywh[2]:.5f} {xywh[3]:.5f}\n")  # write label
--- a/ultralytics/cfg/datasets/VOC.yaml
+++ b/ultralytics/cfg/datasets/VOC.yaml
@ -1,99 +0,0 @@
 # Ultralytics YOLO 🚀, AGPL-3.0 license
 # PASCAL VOC dataset http://host.robots.ox.ac.uk/pascal/VOC by University of Oxford
 # Documentation: # Documentation: https://docs.ultralytics.com/datasets/detect/voc/
 # Example usage: yolo train data=VOC.yaml
 # parent
 # ├── ultralytics
 # └── datasets
 #     └── VOC  ← downloads here (2.8 GB)
 # Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
 path: ../datasets/VOC
 train: # train images (relative to 'path')  16551 images
  - images/train2012
  - images/train2007
  - images/val2012
  - images/val2007
 val: # val images (relative to 'path')  4952 images
  - images/test2007
 test: # test images (optional)
  - images/test2007
 # Classes
 names:
  0: aeroplane
  1: bicycle
  2: bird
  3: boat
  4: bottle
  5: bus
  6: car
  7: cat
  8: chair
  9: cow
  10: diningtable
  11: dog
  12: horse
  13: motorbike
  14: person
  15: pottedplant
  16: sheep
  17: sofa
  18: train
  19: tvmonitor
 # Download script/URL (optional) ---------------------------------------------------------------------------------------
 download: |
  import xml.etree.ElementTree as ET
  from tqdm import tqdm
  from ultralytics.utils.downloads import download
  from pathlib import Path
  def convert_label(path, lb_path, year, image_id):
      def convert_box(size, box):
          dw, dh = 1. / size[0], 1. / size[1]
          x, y, w, h = (box[0] + box[1]) / 2.0 - 1, (box[2] + box[3]) / 2.0 - 1, box[1] - box[0], box[3] - box[2]
          return x * dw, y * dh, w * dw, h * dh
      in_file = open(path / f'VOC{year}/Annotations/{image_id}.xml')
      out_file = open(lb_path, 'w')
      tree = ET.parse(in_file)
      root = tree.getroot()
      size = root.find('size')
      w = int(size.find('width').text)
      h = int(size.find('height').text)
      names = list(yaml['names'].values())  # names list
      for obj in root.iter('object'):
          cls = obj.find('name').text
          if cls in names and int(obj.find('difficult').text) != 1:
              xmlbox = obj.find('bndbox')
              bb = convert_box((w, h), [float(xmlbox.find(x).text) for x in ('xmin', 'xmax', 'ymin', 'ymax')])
              cls_id = names.index(cls)  # class id
              out_file.write(" ".join(str(a) for a in (cls_id, *bb)) + '\n')
  # Download
  dir = Path(yaml['path'])  # dataset root dir
  url = 'https://github.com/ultralytics/yolov5/releases/download/v1.0/'
  urls = [f'{url}VOCtrainval_06-Nov-2007.zip',  # 446MB, 5012 images
          f'{url}VOCtest_06-Nov-2007.zip',  # 438MB, 4953 images
          f'{url}VOCtrainval_11-May-2012.zip']  # 1.95GB, 17126 images
  download(urls, dir=dir / 'images', curl=True, threads=3, exist_ok=True)  # download and unzip over existing paths (required)
  # Convert
  path = dir / 'images/VOCdevkit'
  for year, image_set in ('2012', 'train'), ('2012', 'val'), ('2007', 'train'), ('2007', 'val'), ('2007', 'test'):
      imgs_path = dir / 'images' / f'{image_set}{year}'
      lbs_path = dir / 'labels' / f'{image_set}{year}'
      imgs_path.mkdir(exist_ok=True, parents=True)
      lbs_path.mkdir(exist_ok=True, parents=True)
      with open(path / f'VOC{year}/ImageSets/Main/{image_set}.txt') as f:
          image_ids = f.read().strip().split()
      for id in tqdm(image_ids, desc=f'{image_set}{year}'):
          f = path / f'VOC{year}/JPEGImages/{id}.jpg'  # old img path
          lb_path = (lbs_path / f.name).with_suffix('.txt')  # new label path
          f.rename(imgs_path / f.name)  # move image
          convert_label(path, lb_path, year, id)  # convert labels to YOLO format
--- a/ultralytics/cfg/datasets/VisDrone.yaml
+++ b/ultralytics/cfg/datasets/VisDrone.yaml
@ -1,72 +0,0 @@
 # Ultralytics YOLO 🚀, AGPL-3.0 license
 # VisDrone2019-DET dataset https://github.com/VisDrone/VisDrone-Dataset by Tianjin University
 # Documentation: https://docs.ultralytics.com/datasets/detect/visdrone/
 # Example usage: yolo train data=VisDrone.yaml
 # parent
 # ├── ultralytics
 # └── datasets
 #     └── VisDrone  ← downloads here (2.3 GB)
 # Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
 path: ../datasets/VisDrone # dataset root dir
 train: VisDrone2019-DET-train/images # train images (relative to 'path')  6471 images
 val: VisDrone2019-DET-val/images # val images (relative to 'path')  548 images
 test: VisDrone2019-DET-test-dev/images # test images (optional)  1610 images
 # Classes
 names:
  0: pedestrian
  1: people
  2: bicycle
  3: car
  4: van
  5: truck
  6: tricycle
  7: awning-tricycle
  8: bus
  9: motor
 # Download script/URL (optional) ---------------------------------------------------------------------------------------
 download: |
  import os
  from pathlib import Path
  from ultralytics.utils.downloads import download
  def visdrone2yolo(dir):
      from PIL import Image
      from tqdm import tqdm
      def convert_box(size, box):
          # Convert VisDrone box to YOLO xywh box
          dw = 1. / size[0]
          dh = 1. / size[1]
          return (box[0] + box[2] / 2) * dw, (box[1] + box[3] / 2) * dh, box[2] * dw, box[3] * dh
      (dir / 'labels').mkdir(parents=True, exist_ok=True)  # make labels directory
      pbar = tqdm((dir / 'annotations').glob('*.txt'), desc=f'Converting {dir}')
      for f in pbar:
          img_size = Image.open((dir / 'images' / f.name).with_suffix('.jpg')).size
          lines = []
          with open(f, 'r') as file:  # read annotation.txt
              for row in [x.split(',') for x in file.read().strip().splitlines()]:
                  if row[4] == '0':  # VisDrone 'ignored regions' class 0
                      continue
                  cls = int(row[5]) - 1
                  box = convert_box(img_size, tuple(map(int, row[:4])))
                  lines.append(f"{cls} {' '.join(f'{x:.6f}' for x in box)}\n")
                  with open(str(f).replace(f'{os.sep}annotations{os.sep}', f'{os.sep}labels{os.sep}'), 'w') as fl:
                      fl.writelines(lines)  # write label.txt
  # Download
  dir = Path(yaml['path'])  # dataset root dir
  urls = ['https://github.com/ultralytics/yolov5/releases/download/v1.0/VisDrone2019-DET-train.zip',
          'https://github.com/ultralytics/yolov5/releases/download/v1.0/VisDrone2019-DET-val.zip',
          'https://github.com/ultralytics/yolov5/releases/download/v1.0/VisDrone2019-DET-test-dev.zip',
          'https://github.com/ultralytics/yolov5/releases/download/v1.0/VisDrone2019-DET-test-challenge.zip']
  download(urls, dir=dir, curl=True, threads=4)
  # Convert
  for d in 'VisDrone2019-DET-train', 'VisDrone2019-DET-val', 'VisDrone2019-DET-test-dev':
      visdrone2yolo(dir / d)  # convert VisDrone annotations to YOLO labels
--- a/ultralytics/cfg/datasets/african-wildlife.yaml
+++ b/ultralytics/cfg/datasets/african-wildlife.yaml
@ -1,24 +0,0 @@
 # Ultralytics YOLO 🚀, AGPL-3.0 license
 # African-wildlife dataset by Ultralytics
 # Documentation: https://docs.ultralytics.com/datasets/detect/african-wildlife/
 # Example usage: yolo train data=african-wildlife.yaml
 # parent
 # ├── ultralytics
 # └── datasets
 #     └── african-wildlife  ← downloads here (100 MB)
 # Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
 path: ../datasets/african-wildlife # dataset root dir
 train: train/images # train images (relative to 'path') 1052 images
 val: valid/images # val images (relative to 'path') 225 images
 test: test/images # test images (relative to 'path') 227 images
 # Classes
 names:
  0: buffalo
  1: elephant
  2: rhino
  3: zebra
 # Download script/URL (optional)
 download: https://ultralytics.com/assets/african-wildlife.zip
--- a/ultralytics/cfg/datasets/brain-tumor.yaml
+++ b/ultralytics/cfg/datasets/brain-tumor.yaml
@ -1,22 +0,0 @@
 # Ultralytics YOLO 🚀, AGPL-3.0 license
 # Brain-tumor dataset by Ultralytics
 # Documentation: https://docs.ultralytics.com/datasets/detect/brain-tumor/
 # Example usage: yolo train data=brain-tumor.yaml
 # parent
 # ├── ultralytics
 # └── datasets
 #     └── brain-tumor  ← downloads here (4.05 MB)
 # Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
 path: ../datasets/brain-tumor # dataset root dir
 train: train/images # train images (relative to 'path') 893 images
 val: valid/images # val images (relative to 'path') 223 images
 test: # test images (relative to 'path')
 # Classes
 names:
  0: negative
  1: positive
 # Download script/URL (optional)
 download: https://ultralytics.com/assets/brain-tumor.zip
--- a/ultralytics/cfg/datasets/carparts-seg.yaml
+++ b/ultralytics/cfg/datasets/carparts-seg.yaml
@ -1,43 +0,0 @@
 # Ultralytics YOLO 🚀, AGPL-3.0 license
 # Carparts-seg dataset by Ultralytics
 # Documentation: https://docs.ultralytics.com/datasets/segment/carparts-seg/
 # Example usage: yolo train data=carparts-seg.yaml
 # parent
 # ├── ultralytics
 # └── datasets
 #     └── carparts-seg  ← downloads here (132 MB)
 # Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
 path: ../datasets/carparts-seg # dataset root dir
 train: train/images # train images (relative to 'path') 3516 images
 val: valid/images # val images (relative to 'path') 276 images
 test: test/images # test images (relative to 'path') 401 images
 # Classes
 names:
  0: back_bumper
  1: back_door
  2: back_glass
  3: back_left_door
  4: back_left_light
  5: back_light
  6: back_right_door
  7: back_right_light
  8: front_bumper
  9: front_door
  10: front_glass
  11: front_left_door
  12: front_left_light
  13: front_light
  14: front_right_door
  15: front_right_light
  16: hood
  17: left_mirror
  18: object
  19: right_mirror
  20: tailgate
  21: trunk
  22: wheel
 # Download script/URL (optional)
 download: https://ultralytics.com/assets/carparts-seg.zip
--- a/ultralytics/cfg/datasets/coco-pose.yaml
+++ b/ultralytics/cfg/datasets/coco-pose.yaml
@ -1,38 +0,0 @@
 # Ultralytics YOLO 🚀, AGPL-3.0 license
 # COCO 2017 dataset https://cocodataset.org by Microsoft
 # Documentation: https://docs.ultralytics.com/datasets/pose/coco/
 # Example usage: yolo train data=coco-pose.yaml
 # parent
 # ├── ultralytics
 # └── datasets
 #     └── coco-pose  ← downloads here (20.1 GB)
 # Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
 path: ../datasets/coco-pose # dataset root dir
 train: train2017.txt # train images (relative to 'path') 118287 images
 val: val2017.txt # val images (relative to 'path') 5000 images
 test: test-dev2017.txt # 20288 of 40670 images, submit to https://competitions.codalab.org/competitions/20794
 # Keypoints
 kpt_shape: [17, 3] # number of keypoints, number of dims (2 for x,y or 3 for x,y,visible)
 flip_idx: [0, 2, 1, 4, 3, 6, 5, 8, 7, 10, 9, 12, 11, 14, 13, 16, 15]
 # Classes
 names:
  0: person
 # Download script/URL (optional)
 download: |
  from ultralytics.utils.downloads import download
  from pathlib import Path
  # Download labels
  dir = Path(yaml['path'])  # dataset root dir
  url = 'https://github.com/ultralytics/yolov5/releases/download/v1.0/'
  urls = [url + 'coco2017labels-pose.zip']  # labels
  download(urls, dir=dir.parent)
  # Download data
  urls = ['http://images.cocodataset.org/zips/train2017.zip',  # 19G, 118k images
          'http://images.cocodataset.org/zips/val2017.zip',  # 1G, 5k images
          'http://images.cocodataset.org/zips/test2017.zip']  # 7G, 41k images (optional)
  download(urls, dir=dir / 'images', threads=3)
--- a/ultralytics/cfg/datasets/coco.yaml
+++ b/ultralytics/cfg/datasets/coco.yaml
@ -1,114 +0,0 @@
 # Ultralytics YOLO 🚀, AGPL-3.0 license
 # COCO 2017 dataset https://cocodataset.org by Microsoft
 # Documentation: https://docs.ultralytics.com/datasets/detect/coco/
 # Example usage: yolo train data=coco.yaml
 # parent
 # ├── ultralytics
 # └── datasets
 #     └── coco  ← downloads here (20.1 GB)
 # Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
 path: ../datasets/coco # dataset root dir
 train: train2017.txt # train images (relative to 'path') 118287 images
 val: val2017.txt # val images (relative to 'path') 5000 images
 test: test-dev2017.txt # 20288 of 40670 images, submit to https://competitions.codalab.org/competitions/20794
 # Classes
 names:
  0: person
  1: bicycle
  2: car
  3: motorcycle
  4: airplane
  5: bus
  6: train
  7: truck
  8: boat
  9: traffic light
  10: fire hydrant
  11: stop sign
  12: parking meter
  13: bench
  14: bird
  15: cat
  16: dog
  17: horse
  18: sheep
  19: cow
  20: elephant
  21: bear
  22: zebra
  23: giraffe
  24: backpack
  25: umbrella
  26: handbag
  27: tie
  28: suitcase
  29: frisbee
  30: skis
  31: snowboard
  32: sports ball
  33: kite
  34: baseball bat
  35: baseball glove
  36: skateboard
  37: surfboard
  38: tennis racket
  39: bottle
  40: wine glass
  41: cup
  42: fork
  43: knife
  44: spoon
  45: bowl
  46: banana
  47: apple
  48: sandwich
  49: orange
  50: broccoli
  51: carrot
  52: hot dog
  53: pizza
  54: donut
  55: cake
  56: chair
  57: couch
  58: potted plant
  59: bed
  60: dining table
  61: toilet
  62: tv
  63: laptop
  64: mouse
  65: remote
  66: keyboard
  67: cell phone
  68: microwave
  69: oven
  70: toaster
  71: sink
  72: refrigerator
  73: book
  74: clock
  75: vase
  76: scissors
  77: teddy bear
  78: hair drier
  79: toothbrush
 # Download script/URL (optional)
 download: |
  from ultralytics.utils.downloads import download
  from pathlib import Path
  # Download labels
  segments = True  # segment or box labels
  dir = Path(yaml['path'])  # dataset root dir
  url = 'https://github.com/ultralytics/yolov5/releases/download/v1.0/'
  urls = [url + ('coco2017labels-segments.zip' if segments else 'coco2017labels.zip')]  # labels
  download(urls, dir=dir.parent)
  # Download data
  urls = ['http://images.cocodataset.org/zips/train2017.zip',  # 19G, 118k images
          'http://images.cocodataset.org/zips/val2017.zip',  # 1G, 5k images
          'http://images.cocodataset.org/zips/test2017.zip']  # 7G, 41k images (optional)
  download(urls, dir=dir / 'images', threads=3)
--- a/ultralytics/cfg/datasets/coco128-seg.yaml
+++ b/ultralytics/cfg/datasets/coco128-seg.yaml
@ -1,100 +0,0 @@
 # Ultralytics YOLO 🚀, AGPL-3.0 license
 # COCO128-seg dataset https://www.kaggle.com/ultralytics/coco128 (first 128 images from COCO train2017) by Ultralytics
 # Documentation: https://docs.ultralytics.com/datasets/segment/coco/
 # Example usage: yolo train data=coco128.yaml
 # parent
 # ├── ultralytics
 # └── datasets
 #     └── coco128-seg  ← downloads here (7 MB)
 # Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
 path: ../datasets/coco128-seg # dataset root dir
 train: images/train2017 # train images (relative to 'path') 128 images
 val: images/train2017 # val images (relative to 'path') 128 images
 test: # test images (optional)
 # Classes
 names:
  0: person
  1: bicycle
  2: car
  3: motorcycle
  4: airplane
  5: bus
  6: train
  7: truck
  8: boat
  9: traffic light
  10: fire hydrant
  11: stop sign
  12: parking meter
  13: bench
  14: bird
  15: cat
  16: dog
  17: horse
  18: sheep
  19: cow
  20: elephant
  21: bear
  22: zebra
  23: giraffe
  24: backpack
  25: umbrella
  26: handbag
  27: tie
  28: suitcase
  29: frisbee
  30: skis
  31: snowboard
  32: sports ball
  33: kite
  34: baseball bat
  35: baseball glove
  36: skateboard
  37: surfboard
  38: tennis racket
  39: bottle
  40: wine glass
  41: cup
  42: fork
  43: knife
  44: spoon
  45: bowl
  46: banana
  47: apple
  48: sandwich
  49: orange
  50: broccoli
  51: carrot
  52: hot dog
  53: pizza
  54: donut
  55: cake
  56: chair
  57: couch
  58: potted plant
  59: bed
  60: dining table
  61: toilet
  62: tv
  63: laptop
  64: mouse
  65: remote
  66: keyboard
  67: cell phone
  68: microwave
  69: oven
  70: toaster
  71: sink
  72: refrigerator
  73: book
  74: clock
  75: vase
  76: scissors
  77: teddy bear
  78: hair drier
  79: toothbrush
 # Download script/URL (optional)
 download: https://ultralytics.com/assets/coco128-seg.zip
--- a/ultralytics/cfg/datasets/coco128.yaml
+++ b/ultralytics/cfg/datasets/coco128.yaml
@ -1,100 +0,0 @@
 # Ultralytics YOLO 🚀, AGPL-3.0 license
 # COCO128 dataset https://www.kaggle.com/ultralytics/coco128 (first 128 images from COCO train2017) by Ultralytics
 # Documentation: https://docs.ultralytics.com/datasets/detect/coco/
 # Example usage: yolo train data=coco128.yaml
 # parent
 # ├── ultralytics
 # └── datasets
 #     └── coco128  ← downloads here (7 MB)
 # Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
 path: ../datasets/coco128 # dataset root dir
 train: images/train2017 # train images (relative to 'path') 128 images
 val: images/train2017 # val images (relative to 'path') 128 images
 test: # test images (optional)
 # Classes
 names:
  0: person
  1: bicycle
  2: car
  3: motorcycle
  4: airplane
  5: bus
  6: train
  7: truck
  8: boat
  9: traffic light
  10: fire hydrant
  11: stop sign
  12: parking meter
  13: bench
  14: bird
  15: cat
  16: dog
  17: horse
  18: sheep
  19: cow
  20: elephant
  21: bear
  22: zebra
  23: giraffe
  24: backpack
  25: umbrella
  26: handbag
  27: tie
  28: suitcase
  29: frisbee
  30: skis
  31: snowboard
  32: sports ball
  33: kite
  34: baseball bat
  35: baseball glove
  36: skateboard
  37: surfboard
  38: tennis racket
  39: bottle
  40: wine glass
  41: cup
  42: fork
  43: knife
  44: spoon
  45: bowl
  46: banana
  47: apple
  48: sandwich
  49: orange
  50: broccoli
  51: carrot
  52: hot dog
  53: pizza
  54: donut
  55: cake
  56: chair
  57: couch
  58: potted plant
  59: bed
  60: dining table
  61: toilet
  62: tv
  63: laptop
  64: mouse
  65: remote
  66: keyboard
  67: cell phone
  68: microwave
  69: oven
  70: toaster
  71: sink
  72: refrigerator
  73: book
  74: clock
  75: vase
  76: scissors
  77: teddy bear
  78: hair drier
  79: toothbrush
 # Download script/URL (optional)
 download: https://ultralytics.com/assets/coco128.zip
--- a/ultralytics/cfg/datasets/coco8-pose.yaml
+++ b/ultralytics/cfg/datasets/coco8-pose.yaml
@ -1,25 +0,0 @@
 # Ultralytics YOLO 🚀, AGPL-3.0 license
 # COCO8-pose dataset (first 8 images from COCO train2017) by Ultralytics
 # Documentation: https://docs.ultralytics.com/datasets/pose/coco8-pose/
 # Example usage: yolo train data=coco8-pose.yaml
 # parent
 # ├── ultralytics
 # └── datasets
 #     └── coco8-pose  ← downloads here (1 MB)
 # Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
 path: ../datasets/coco8-pose # dataset root dir
 train: images/train # train images (relative to 'path') 4 images
 val: images/val # val images (relative to 'path') 4 images
 test: # test images (optional)
 # Keypoints
 kpt_shape: [17, 3] # number of keypoints, number of dims (2 for x,y or 3 for x,y,visible)
 flip_idx: [0, 2, 1, 4, 3, 6, 5, 8, 7, 10, 9, 12, 11, 14, 13, 16, 15]
 # Classes
 names:
  0: person
 # Download script/URL (optional)
 download: https://ultralytics.com/assets/coco8-pose.zip
--- a/ultralytics/cfg/datasets/coco8-seg.yaml
+++ b/ultralytics/cfg/datasets/coco8-seg.yaml
@ -1,100 +0,0 @@
 # Ultralytics YOLO 🚀, AGPL-3.0 license
 # COCO8-seg dataset (first 8 images from COCO train2017) by Ultralytics
 # Documentation: https://docs.ultralytics.com/datasets/segment/coco8-seg/
 # Example usage: yolo train data=coco8-seg.yaml
 # parent
 # ├── ultralytics
 # └── datasets
 #     └── coco8-seg  ← downloads here (1 MB)
 # Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
 path: ../datasets/coco8-seg # dataset root dir
 train: images/train # train images (relative to 'path') 4 images
 val: images/val # val images (relative to 'path') 4 images
 test: # test images (optional)
 # Classes
 names:
  0: person
  1: bicycle
  2: car
  3: motorcycle
  4: airplane
  5: bus
  6: train
  7: truck
  8: boat
  9: traffic light
  10: fire hydrant
  11: stop sign
  12: parking meter
  13: bench
  14: bird
  15: cat
  16: dog
  17: horse
  18: sheep
  19: cow
  20: elephant
  21: bear
  22: zebra
  23: giraffe
  24: backpack
  25: umbrella
  26: handbag
  27: tie
  28: suitcase
  29: frisbee
  30: skis
  31: snowboard
  32: sports ball
  33: kite
  34: baseball bat
  35: baseball glove
  36: skateboard
  37: surfboard
  38: tennis racket
  39: bottle
  40: wine glass
  41: cup
  42: fork
  43: knife
  44: spoon
  45: bowl
  46: banana
  47: apple
  48: sandwich
  49: orange
  50: broccoli
  51: carrot
  52: hot dog
  53: pizza
  54: donut
  55: cake
  56: chair
  57: couch
  58: potted plant
  59: bed
  60: dining table
  61: toilet
  62: tv
  63: laptop
  64: mouse
  65: remote
  66: keyboard
  67: cell phone
  68: microwave
  69: oven
  70: toaster
  71: sink
  72: refrigerator
  73: book
  74: clock
  75: vase
  76: scissors
  77: teddy bear
  78: hair drier
  79: toothbrush
 # Download script/URL (optional)
 download: https://ultralytics.com/assets/coco8-seg.zip
--- a/ultralytics/cfg/datasets/coco8.yaml
+++ b/ultralytics/cfg/datasets/coco8.yaml
@ -1,100 +0,0 @@
 # Ultralytics YOLO 🚀, AGPL-3.0 license
 # COCO8 dataset (first 8 images from COCO train2017) by Ultralytics
 # Documentation: https://docs.ultralytics.com/datasets/detect/coco8/
 # Example usage: yolo train data=coco8.yaml
 # parent
 # ├── ultralytics
 # └── datasets
 #     └── coco8  ← downloads here (1 MB)
 # Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
 path: ../datasets/coco8 # dataset root dir
 train: images/train # train images (relative to 'path') 4 images
 val: images/val # val images (relative to 'path') 4 images
 test: # test images (optional)
 # Classes
 names:
  0: person
  1: bicycle
  2: car
  3: motorcycle
  4: airplane
  5: bus
  6: train
  7: truck
  8: boat
  9: traffic light
  10: fire hydrant
  11: stop sign
  12: parking meter
  13: bench
  14: bird
  15: cat
  16: dog
  17: horse
  18: sheep
  19: cow
  20: elephant
  21: bear
  22: zebra
  23: giraffe
  24: backpack
  25: umbrella
  26: handbag
  27: tie
  28: suitcase
  29: frisbee
  30: skis
  31: snowboard
  32: sports ball
  33: kite
  34: baseball bat
  35: baseball glove
  36: skateboard
  37: surfboard
  38: tennis racket
  39: bottle
  40: wine glass
  41: cup
  42: fork
  43: knife
  44: spoon
  45: bowl
  46: banana
  47: apple
  48: sandwich
  49: orange
  50: broccoli
  51: carrot
  52: hot dog
  53: pizza
  54: donut
  55: cake
  56: chair
  57: couch
  58: potted plant
  59: bed
  60: dining table
  61: toilet
  62: tv
  63: laptop
  64: mouse
  65: remote
  66: keyboard
  67: cell phone
  68: microwave
  69: oven
  70: toaster
  71: sink
  72: refrigerator
  73: book
  74: clock
  75: vase
  76: scissors
  77: teddy bear
  78: hair drier
  79: toothbrush
 # Download script/URL (optional)
 download: https://ultralytics.com/assets/coco8.zip
--- a/ultralytics/cfg/datasets/crack-seg.yaml
+++ b/ultralytics/cfg/datasets/crack-seg.yaml
@ -1,21 +0,0 @@
 # Ultralytics YOLO 🚀, AGPL-3.0 license
 # Crack-seg dataset by Ultralytics
 # Documentation: https://docs.ultralytics.com/datasets/segment/crack-seg/
 # Example usage: yolo train data=crack-seg.yaml
 # parent
 # ├── ultralytics
 # └── datasets
 #     └── crack-seg  ← downloads here (91.2 MB)
 # Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
 path: ../datasets/crack-seg # dataset root dir
 train: train/images # train images (relative to 'path') 3717 images
 val: valid/images # val images (relative to 'path') 112 images
 test: test/images # test images (relative to 'path') 200 images
 # Classes
 names:
  0: crack
 # Download script/URL (optional)
 download: https://ultralytics.com/assets/crack-seg.zip
--- a/ultralytics/cfg/datasets/dota8.yaml
+++ b/ultralytics/cfg/datasets/dota8.yaml
@ -1,34 +0,0 @@
 # Ultralytics YOLO 🚀, AGPL-3.0 license
 # DOTA8 dataset 8 images from split DOTAv1 dataset by Ultralytics
 # Documentation: https://docs.ultralytics.com/datasets/obb/dota8/
 # Example usage: yolo train model=yolov8n-obb.pt data=dota8.yaml
 # parent
 # ├── ultralytics
 # └── datasets
 #     └── dota8  ← downloads here (1MB)
 # Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
 path: ../datasets/dota8 # dataset root dir
 train: images/train # train images (relative to 'path') 4 images
 val: images/val # val images (relative to 'path') 4 images
 # Classes for DOTA 1.0
 names:
  0: plane
  1: ship
  2: storage tank
  3: baseball diamond
  4: tennis court
  5: basketball court
  6: ground track field
  7: harbor
  8: bridge
  9: large vehicle
  10: small vehicle
  11: helicopter
  12: roundabout
  13: soccer ball field
  14: swimming pool
 # Download script/URL (optional)
 download: https://github.com/ultralytics/yolov5/releases/download/v1.0/dota8.zip
--- a/ultralytics/cfg/datasets/open-images-v7.yaml
+++ b/ultralytics/cfg/datasets/open-images-v7.yaml
@ -1,660 +0,0 @@
 # Ultralytics YOLO 🚀, AGPL-3.0 license
 # Open Images v7 dataset https://storage.googleapis.com/openimages/web/index.html by Google
 # Documentation: https://docs.ultralytics.com/datasets/detect/open-images-v7/
 # Example usage: yolo train data=open-images-v7.yaml
 # parent
 # ├── ultralytics
 # └── datasets
 #     └── open-images-v7  ← downloads here (561 GB)
 # Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
 path: ../datasets/open-images-v7 # dataset root dir
 train: images/train # train images (relative to 'path') 1743042 images
 val: images/val # val images (relative to 'path') 41620 images
 test: # test images (optional)
 # Classes
 names:
  0: Accordion
  1: Adhesive tape
  2: Aircraft
  3: Airplane
  4: Alarm clock
  5: Alpaca
  6: Ambulance
  7: Animal
  8: Ant
  9: Antelope
  10: Apple
  11: Armadillo
  12: Artichoke
  13: Auto part
  14: Axe
  15: Backpack
  16: Bagel
  17: Baked goods
  18: Balance beam
  19: Ball
  20: Balloon
  21: Banana
  22: Band-aid
  23: Banjo
  24: Barge
  25: Barrel
  26: Baseball bat
  27: Baseball glove
  28: Bat (Animal)
  29: Bathroom accessory
  30: Bathroom cabinet
  31: Bathtub
  32: Beaker
  33: Bear
  34: Bed
  35: Bee
  36: Beehive
  37: Beer
  38: Beetle
  39: Bell pepper
  40: Belt
  41: Bench
  42: Bicycle
  43: Bicycle helmet
  44: Bicycle wheel
  45: Bidet
  46: Billboard
  47: Billiard table
  48: Binoculars
  49: Bird
  50: Blender
  51: Blue jay
  52: Boat
  53: Bomb
  54: Book
  55: Bookcase
  56: Boot
  57: Bottle
  58: Bottle opener
  59: Bow and arrow
  60: Bowl
  61: Bowling equipment
  62: Box
  63: Boy
  64: Brassiere
  65: Bread
  66: Briefcase
  67: Broccoli
  68: Bronze sculpture
  69: Brown bear
  70: Building
  71: Bull
  72: Burrito
  73: Bus
  74: Bust
  75: Butterfly
  76: Cabbage
  77: Cabinetry
  78: Cake
  79: Cake stand
  80: Calculator
  81: Camel
  82: Camera
  83: Can opener
  84: Canary
  85: Candle
  86: Candy
  87: Cannon
  88: Canoe
  89: Cantaloupe
  90: Car
  91: Carnivore
  92: Carrot
  93: Cart
  94: Cassette deck
  95: Castle
  96: Cat
  97: Cat furniture
  98: Caterpillar
  99: Cattle
  100: Ceiling fan
  101: Cello
  102: Centipede
  103: Chainsaw
  104: Chair
  105: Cheese
  106: Cheetah
  107: Chest of drawers
  108: Chicken
  109: Chime
  110: Chisel
  111: Chopsticks
  112: Christmas tree
  113: Clock
  114: Closet
  115: Clothing
  116: Coat
  117: Cocktail
  118: Cocktail shaker
  119: Coconut
  120: Coffee
  121: Coffee cup
  122: Coffee table
  123: Coffeemaker
  124: Coin
  125: Common fig
  126: Common sunflower
  127: Computer keyboard
  128: Computer monitor
  129: Computer mouse
  130: Container
  131: Convenience store
  132: Cookie
  133: Cooking spray
  134: Corded phone
  135: Cosmetics
  136: Couch
  137: Countertop
  138: Cowboy hat
  139: Crab
  140: Cream
  141: Cricket ball
  142: Crocodile
  143: Croissant
  144: Crown
  145: Crutch
  146: Cucumber
  147: Cupboard
  148: Curtain
  149: Cutting board
  150: Dagger
  151: Dairy Product
  152: Deer
  153: Desk
  154: Dessert
  155: Diaper
  156: Dice
  157: Digital clock
  158: Dinosaur
  159: Dishwasher
  160: Dog
  161: Dog bed
  162: Doll
  163: Dolphin
  164: Door
  165: Door handle
  166: Doughnut
  167: Dragonfly
  168: Drawer
  169: Dress
  170: Drill (Tool)
  171: Drink
  172: Drinking straw
  173: Drum
  174: Duck
  175: Dumbbell
  176: Eagle
  177: Earrings
  178: Egg (Food)
  179: Elephant
  180: Envelope
  181: Eraser
  182: Face powder
  183: Facial tissue holder
  184: Falcon
  185: Fashion accessory
  186: Fast food
  187: Fax
  188: Fedora
  189: Filing cabinet
  190: Fire hydrant
  191: Fireplace
  192: Fish
  193: Flag
  194: Flashlight
  195: Flower
  196: Flowerpot
  197: Flute
  198: Flying disc
  199: Food
  200: Food processor
  201: Football
  202: Football helmet
  203: Footwear
  204: Fork
  205: Fountain
  206: Fox
  207: French fries
  208: French horn
  209: Frog
  210: Fruit
  211: Frying pan
  212: Furniture
  213: Garden Asparagus
  214: Gas stove
  215: Giraffe
  216: Girl
  217: Glasses
  218: Glove
  219: Goat
  220: Goggles
  221: Goldfish
  222: Golf ball
  223: Golf cart
  224: Gondola
  225: Goose
  226: Grape
  227: Grapefruit
  228: Grinder
  229: Guacamole
  230: Guitar
  231: Hair dryer
  232: Hair spray
  233: Hamburger
  234: Hammer
  235: Hamster
  236: Hand dryer
  237: Handbag
  238: Handgun
  239: Harbor seal
  240: Harmonica
  241: Harp
  242: Harpsichord
  243: Hat
  244: Headphones
  245: Heater
  246: Hedgehog
  247: Helicopter
  248: Helmet
  249: High heels
  250: Hiking equipment
  251: Hippopotamus
  252: Home appliance
  253: Honeycomb
  254: Horizontal bar
  255: Horse
  256: Hot dog
  257: House
  258: Houseplant
  259: Human arm
  260: Human beard
  261: Human body
  262: Human ear
  263: Human eye
  264: Human face
  265: Human foot
  266: Human hair
  267: Human hand
  268: Human head
  269: Human leg
  270: Human mouth
  271: Human nose
  272: Humidifier
  273: Ice cream
  274: Indoor rower
  275: Infant bed
  276: Insect
  277: Invertebrate
  278: Ipod
  279: Isopod
  280: Jacket
  281: Jacuzzi
  282: Jaguar (Animal)
  283: Jeans
  284: Jellyfish
  285: Jet ski
  286: Jug
  287: Juice
  288: Kangaroo
  289: Kettle
  290: Kitchen & dining room table
  291: Kitchen appliance
  292: Kitchen knife
  293: Kitchen utensil
  294: Kitchenware
  295: Kite
  296: Knife
  297: Koala
  298: Ladder
  299: Ladle
  300: Ladybug
  301: Lamp
  302: Land vehicle
  303: Lantern
  304: Laptop
  305: Lavender (Plant)
  306: Lemon
  307: Leopard
  308: Light bulb
  309: Light switch
  310: Lighthouse
  311: Lily
  312: Limousine
  313: Lion
  314: Lipstick
  315: Lizard
  316: Lobster
  317: Loveseat
  318: Luggage and bags
  319: Lynx
  320: Magpie
  321: Mammal
  322: Man
  323: Mango
  324: Maple
  325: Maracas
  326: Marine invertebrates
  327: Marine mammal
  328: Measuring cup
  329: Mechanical fan
  330: Medical equipment
  331: Microphone
  332: Microwave oven
  333: Milk
  334: Miniskirt
  335: Mirror
  336: Missile
  337: Mixer
  338: Mixing bowl
  339: Mobile phone
  340: Monkey
  341: Moths and butterflies
  342: Motorcycle
  343: Mouse
  344: Muffin
  345: Mug
  346: Mule
  347: Mushroom
  348: Musical instrument
  349: Musical keyboard
  350: Nail (Construction)
  351: Necklace
  352: Nightstand
  353: Oboe
  354: Office building
  355: Office supplies
  356: Orange
  357: Organ (Musical Instrument)
  358: Ostrich
  359: Otter
  360: Oven
  361: Owl
  362: Oyster
  363: Paddle
  364: Palm tree
  365: Pancake
  366: Panda
  367: Paper cutter
  368: Paper towel
  369: Parachute
  370: Parking meter
  371: Parrot
  372: Pasta
  373: Pastry
  374: Peach
  375: Pear
  376: Pen
  377: Pencil case
  378: Pencil sharpener
  379: Penguin
  380: Perfume
  381: Person
  382: Personal care
  383: Personal flotation device
  384: Piano
  385: Picnic basket
  386: Picture frame
  387: Pig
  388: Pillow
  389: Pineapple
  390: Pitcher (Container)
  391: Pizza
  392: Pizza cutter
  393: Plant
  394: Plastic bag
  395: Plate
  396: Platter
  397: Plumbing fixture
  398: Polar bear
  399: Pomegranate
  400: Popcorn
  401: Porch
  402: Porcupine
  403: Poster
  404: Potato
  405: Power plugs and sockets
  406: Pressure cooker
  407: Pretzel
  408: Printer
  409: Pumpkin
  410: Punching bag
  411: Rabbit
  412: Raccoon
  413: Racket
  414: Radish
  415: Ratchet (Device)
  416: Raven
  417: Rays and skates
  418: Red panda
  419: Refrigerator
  420: Remote control
  421: Reptile
  422: Rhinoceros
  423: Rifle
  424: Ring binder
  425: Rocket
  426: Roller skates
  427: Rose
  428: Rugby ball
  429: Ruler
  430: Salad
  431: Salt and pepper shakers
  432: Sandal
  433: Sandwich
  434: Saucer
  435: Saxophone
  436: Scale
  437: Scarf
  438: Scissors
  439: Scoreboard
  440: Scorpion
  441: Screwdriver
  442: Sculpture
  443: Sea lion
  444: Sea turtle
  445: Seafood
  446: Seahorse
  447: Seat belt
  448: Segway
  449: Serving tray
  450: Sewing machine
  451: Shark
  452: Sheep
  453: Shelf
  454: Shellfish
  455: Shirt
  456: Shorts
  457: Shotgun
  458: Shower
  459: Shrimp
  460: Sink
  461: Skateboard
  462: Ski
  463: Skirt
  464: Skull
  465: Skunk
  466: Skyscraper
  467: Slow cooker
  468: Snack
  469: Snail
  470: Snake
  471: Snowboard
  472: Snowman
  473: Snowmobile
  474: Snowplow
  475: Soap dispenser
  476: Sock
  477: Sofa bed
  478: Sombrero
  479: Sparrow
  480: Spatula
  481: Spice rack
  482: Spider
  483: Spoon
  484: Sports equipment
  485: Sports uniform
  486: Squash (Plant)
  487: Squid
  488: Squirrel
  489: Stairs
  490: Stapler
  491: Starfish
  492: Stationary bicycle
  493: Stethoscope
  494: Stool
  495: Stop sign
  496: Strawberry
  497: Street light
  498: Stretcher
  499: Studio couch
  500: Submarine
  501: Submarine sandwich
  502: Suit
  503: Suitcase
  504: Sun hat
  505: Sunglasses
  506: Surfboard
  507: Sushi
  508: Swan
  509: Swim cap
  510: Swimming pool
  511: Swimwear
  512: Sword
  513: Syringe
  514: Table
  515: Table tennis racket
  516: Tablet computer
  517: Tableware
  518: Taco
  519: Tank
  520: Tap
  521: Tart
  522: Taxi
  523: Tea
  524: Teapot
  525: Teddy bear
  526: Telephone
  527: Television
  528: Tennis ball
  529: Tennis racket
  530: Tent
  531: Tiara
  532: Tick
  533: Tie
  534: Tiger
  535: Tin can
  536: Tire
  537: Toaster
  538: Toilet
  539: Toilet paper
  540: Tomato
  541: Tool
  542: Toothbrush
  543: Torch
  544: Tortoise
  545: Towel
  546: Tower
  547: Toy
  548: Traffic light
  549: Traffic sign
  550: Train
  551: Training bench
  552: Treadmill
  553: Tree
  554: Tree house
  555: Tripod
  556: Trombone
  557: Trousers
  558: Truck
  559: Trumpet
  560: Turkey
  561: Turtle
  562: Umbrella
  563: Unicycle
  564: Van
  565: Vase
  566: Vegetable
  567: Vehicle
  568: Vehicle registration plate
  569: Violin
  570: Volleyball (Ball)
  571: Waffle
  572: Waffle iron
  573: Wall clock
  574: Wardrobe
  575: Washing machine
  576: Waste container
  577: Watch
  578: Watercraft
  579: Watermelon
  580: Weapon
  581: Whale
  582: Wheel
  583: Wheelchair
  584: Whisk
  585: Whiteboard
  586: Willow
  587: Window
  588: Window blind
  589: Wine
  590: Wine glass
  591: Wine rack
  592: Winter melon
  593: Wok
  594: Woman
  595: Wood-burning stove
  596: Woodpecker
  597: Worm
  598: Wrench
  599: Zebra
  600: Zucchini
 # Download script/URL (optional) ---------------------------------------------------------------------------------------
 download: |
  from ultralytics.utils import LOGGER, SETTINGS, Path, is_ubuntu, get_ubuntu_version
  from ultralytics.utils.checks import check_requirements, check_version
  check_requirements('fiftyone')
  if is_ubuntu() and check_version(get_ubuntu_version(), '>=22.04'):
      # Ubuntu>=22.04 patch https://github.com/voxel51/fiftyone/issues/2961#issuecomment-1666519347
      check_requirements('fiftyone-db-ubuntu2204')
  import fiftyone as fo
  import fiftyone.zoo as foz
  import warnings
  name = 'open-images-v7'
  fraction = 1.0  # fraction of full dataset to use
  LOGGER.warning('WARNING ⚠️ Open Images V7 dataset requires at least **561 GB of free space. Starting download...')
  for split in 'train', 'validation':  # 1743042 train, 41620 val images
      train = split == 'train'
      # Load Open Images dataset
      dataset = foz.load_zoo_dataset(name,
                                     split=split,
                                     label_types=['detections'],
                                     dataset_dir=Path(SETTINGS['datasets_dir']) / 'fiftyone' / name,
                                     max_samples=round((1743042 if train else 41620) * fraction))
      # Define classes
      if train:
          classes = dataset.default_classes  # all classes
          # classes = dataset.distinct('ground_truth.detections.label')  # only observed classes
      # Export to YOLO format
      with warnings.catch_warnings():
          warnings.filterwarnings("ignore", category=UserWarning, module="fiftyone.utils.yolo")
          dataset.export(export_dir=str(Path(SETTINGS['datasets_dir']) / name),
                         dataset_type=fo.types.YOLOv5Dataset,
                         label_field='ground_truth',
                         split='val' if split == 'validation' else split,
                         classes=classes,
                         overwrite=train)
--- a/ultralytics/cfg/datasets/package-seg.yaml
+++ b/ultralytics/cfg/datasets/package-seg.yaml
@ -1,21 +0,0 @@
 # Ultralytics YOLO 🚀, AGPL-3.0 license
 # Package-seg dataset by Ultralytics
 # Documentation: https://docs.ultralytics.com/datasets/segment/package-seg/
 # Example usage: yolo train data=package-seg.yaml
 # parent
 # ├── ultralytics
 # └── datasets
 #     └── package-seg  ← downloads here (102 MB)
 # Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
 path: ../datasets/package-seg # dataset root dir
 train: images/train # train images (relative to 'path') 1920 images
 val: images/val # val images (relative to 'path') 89 images
 test: test/images # test images (relative to 'path') 188 images
 # Classes
 names:
  0: package
 # Download script/URL (optional)
 download: https://ultralytics.com/assets/package-seg.zip
--- a/ultralytics/cfg/datasets/tiger-pose.yaml
+++ b/ultralytics/cfg/datasets/tiger-pose.yaml
@ -1,24 +0,0 @@
 # Ultralytics YOLO 🚀, AGPL-3.0 license
 # Tiger Pose dataset by Ultralytics
 # Documentation: https://docs.ultralytics.com/datasets/pose/tiger-pose/
 # Example usage: yolo train data=tiger-pose.yaml
 # parent
 # ├── ultralytics
 # └── datasets
 #     └── tiger-pose  ← downloads here (75.3 MB)
 # Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
 path: ../datasets/tiger-pose # dataset root dir
 train: train # train images (relative to 'path') 210 images
 val: val # val images (relative to 'path') 53 images
 # Keypoints
 kpt_shape: [12, 2] # number of keypoints, number of dims (2 for x,y or 3 for x,y,visible)
 flip_idx: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]
 # Classes
 names:
  0: tiger
 # Download script/URL (optional)
 download: https://ultralytics.com/assets/tiger-pose.zip
--- a/ultralytics/cfg/datasets/xView.yaml
+++ b/ultralytics/cfg/datasets/xView.yaml
@ -1,152 +0,0 @@
 # Ultralytics YOLO 🚀, AGPL-3.0 license
 # DIUx xView 2018 Challenge https://challenge.xviewdataset.org by U.S. National Geospatial-Intelligence Agency (NGA)
 # --------  DOWNLOAD DATA MANUALLY and jar xf val_images.zip to 'datasets/xView' before running train command!  --------
 # Documentation: https://docs.ultralytics.com/datasets/detect/xview/
 # Example usage: yolo train data=xView.yaml
 # parent
 # ├── ultralytics
 # └── datasets
 #     └── xView  ← downloads here (20.7 GB)
 # Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
 path: ../datasets/xView # dataset root dir
 train: images/autosplit_train.txt # train images (relative to 'path') 90% of 847 train images
 val: images/autosplit_val.txt # train images (relative to 'path') 10% of 847 train images
 # Classes
 names:
  0: Fixed-wing Aircraft
  1: Small Aircraft
  2: Cargo Plane
  3: Helicopter
  4: Passenger Vehicle
  5: Small Car
  6: Bus
  7: Pickup Truck
  8: Utility Truck
  9: Truck
  10: Cargo Truck
  11: Truck w/Box
  12: Truck Tractor
  13: Trailer
  14: Truck w/Flatbed
  15: Truck w/Liquid
  16: Crane Truck
  17: Railway Vehicle
  18: Passenger Car
  19: Cargo Car
  20: Flat Car
  21: Tank car
  22: Locomotive
  23: Maritime Vessel
  24: Motorboat
  25: Sailboat
  26: Tugboat
  27: Barge
  28: Fishing Vessel
  29: Ferry
  30: Yacht
  31: Container Ship
  32: Oil Tanker
  33: Engineering Vehicle
  34: Tower crane
  35: Container Crane
  36: Reach Stacker
  37: Straddle Carrier
  38: Mobile Crane
  39: Dump Truck
  40: Haul Truck
  41: Scraper/Tractor
  42: Front loader/Bulldozer
  43: Excavator
  44: Cement Mixer
  45: Ground Grader
  46: Hut/Tent
  47: Shed
  48: Building
  49: Aircraft Hangar
  50: Damaged Building
  51: Facility
  52: Construction Site
  53: Vehicle Lot
  54: Helipad
  55: Storage Tank
  56: Shipping container lot
  57: Shipping Container
  58: Pylon
  59: Tower
 # Download script/URL (optional) ---------------------------------------------------------------------------------------
 download: |
  import json
  import os
  from pathlib import Path
  import numpy as np
  from PIL import Image
  from tqdm import tqdm
  from ultralytics.data.utils import autosplit
  from ultralytics.utils.ops import xyxy2xywhn
  def convert_labels(fname=Path('xView/xView_train.geojson')):
      # Convert xView geoJSON labels to YOLO format
      path = fname.parent
      with open(fname) as f:
          print(f'Loading {fname}...')
          data = json.load(f)
      # Make dirs
      labels = Path(path / 'labels' / 'train')
      os.system(f'rm -rf {labels}')
      labels.mkdir(parents=True, exist_ok=True)
      # xView classes 11-94 to 0-59
      xview_class2index = [-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0, 1, 2, -1, 3, -1, 4, 5, 6, 7, 8, -1, 9, 10, 11,
                           12, 13, 14, 15, -1, -1, 16, 17, 18, 19, 20, 21, 22, -1, 23, 24, 25, -1, 26, 27, -1, 28, -1,
                           29, 30, 31, 32, 33, 34, 35, 36, 37, -1, 38, 39, 40, 41, 42, 43, 44, 45, -1, -1, -1, -1, 46,
                           47, 48, 49, -1, 50, 51, -1, 52, -1, -1, -1, 53, 54, -1, 55, -1, -1, 56, -1, 57, -1, 58, 59]
      shapes = {}
      for feature in tqdm(data['features'], desc=f'Converting {fname}'):
          p = feature['properties']
          if p['bounds_imcoords']:
              id = p['image_id']
              file = path / 'train_images' / id
              if file.exists():  # 1395.tif missing
                  try:
                      box = np.array([int(num) for num in p['bounds_imcoords'].split(",")])
                      assert box.shape[0] == 4, f'incorrect box shape {box.shape[0]}'
                      cls = p['type_id']
                      cls = xview_class2index[int(cls)]  # xView class to 0-60
                      assert 59 >= cls >= 0, f'incorrect class index {cls}'
                      # Write YOLO label
                      if id not in shapes:
                          shapes[id] = Image.open(file).size
                      box = xyxy2xywhn(box[None].astype(np.float), w=shapes[id][0], h=shapes[id][1], clip=True)
                      with open((labels / id).with_suffix('.txt'), 'a') as f:
                          f.write(f"{cls} {' '.join(f'{x:.6f}' for x in box[0])}\n")  # write label.txt
                  except Exception as e:
                      print(f'WARNING: skipping one label for {file}: {e}')
  # Download manually from https://challenge.xviewdataset.org
  dir = Path(yaml['path'])  # dataset root dir
  # urls = ['https://d307kc0mrhucc3.cloudfront.net/train_labels.zip',  # train labels
  #         'https://d307kc0mrhucc3.cloudfront.net/train_images.zip',  # 15G, 847 train images
  #         'https://d307kc0mrhucc3.cloudfront.net/val_images.zip']  # 5G, 282 val images (no labels)
  # download(urls, dir=dir)
  # Convert labels
  convert_labels(dir / 'xView_train.geojson')
  # Move images
  images = Path(dir / 'images')
  images.mkdir(parents=True, exist_ok=True)
  Path(dir / 'train_images').rename(dir / 'images' / 'train')
  Path(dir / 'val_images').rename(dir / 'images' / 'val')
  # Split
  autosplit(dir / 'images' / 'train')
--- a/ultralytics/cfg/default.yaml
+++ b/ultralytics/cfg/default.yaml
@ -10,14 +10,14 @@ data: # (str, optional) path to data file, i.e. coco128.yaml
 epochs: 100 # (int) number of epochs to train for
 time: # (float, optional) number of hours to train for, overrides epochs if supplied
 patience: 100 # (int) epochs to wait for no observable improvement for early stopping of training
-batch: 16 # (int) number of images per batch (-1 for AutoBatch)
+batch: 8 # (int) number of images per batch (-1 for AutoBatch)
 imgsz: 640 # (int | list) input images size as int for train and val modes, or list[w,h] for predict and export modes
 save: True # (bool) save train checkpoints and predict results
 save_period: -1 # (int) Save checkpoint every x epochs (disabled if < 1)
 val_period: 1 # (int) Validation every x epochs
 cache: False # (bool) True/ram, disk or False. Use cache for data loading
 device: # (int | str | list, optional) device to run on, i.e. cuda device=0 or device=0,1,2,3 or device=cpu
-workers: 8 # (int) number of worker threads for data loading (per RANK if DDP)
+workers: 2 # (int) number of worker threads for data loading (per RANK if DDP)
 project: # (str, optional) project name
 name: # (str, optional) experiment name, results saved to 'project/name' directory
 exist_ok: False # (bool) whether to overwrite existing experiment
--- a/ultralytics/cfg/models/v10/yolov10b-seg.yaml
+++ b/ultralytics/cfg/models/v10/yolov10b-seg.yaml
@ -0,0 +1,40 @@
 # Parameters
 nc: 80 # number of classes
 scales: # model compound scaling constants, i.e. 'model=yolov8n.yaml' will call yolov8.yaml with scale 'n'
  # [depth, width, max_channels]
  b: [0.67, 1.00, 512] 
 # YOLOv8.0n backbone
 backbone:
  # [from, repeats, module, args]
  - [-1, 1, Conv, [64, 3, 2]] # 0-P1/2
  - [-1, 1, Conv, [128, 3, 2]] # 1-P2/4
  - [-1, 3, C2f, [128, True]]
  - [-1, 1, Conv, [256, 3, 2]] # 3-P3/8
  - [-1, 6, C2f, [256, True]]
  - [-1, 1, SCDown, [512, 3, 2]] # 5-P4/16
  - [-1, 6, C2f, [512, True]]
  - [-1, 1, SCDown, [1024, 3, 2]] # 7-P5/32
  - [-1, 3, C2fCIB, [1024, True]]
  - [-1, 1, SPPF, [1024, 5]] # 9
  - [-1, 1, PSA, [1024]] # 10
 # YOLOv8.0n head
 head:
  - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
  - [[-1, 6], 1, Concat, [1]] # cat backbone P4
  - [-1, 3, C2fCIB, [512, True]] # 13
  - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
  - [[-1, 4], 1, Concat, [1]] # cat backbone P3
  - [-1, 3, C2f, [256]] # 16 (P3/8-small)
  - [-1, 1, Conv, [256, 3, 2]]
  - [[-1, 13], 1, Concat, [1]] # cat head P4
  - [-1, 3, C2fCIB, [512, True]] # 19 (P4/16-medium)
  - [-1, 1, SCDown, [512, 3, 2]]
  - [[-1, 10], 1, Concat, [1]] # cat head P5
  - [-1, 3, C2fCIB, [1024, True]] # 22 (P5/32-large)
  - [[16, 19, 22], 1, v10Segment, [nc, 32, 256]] # segment(P3, P4, P5)
--- a/ultralytics/cfg/models/v10/yolov10l-seg.yaml
+++ b/ultralytics/cfg/models/v10/yolov10l-seg.yaml
@ -0,0 +1,40 @@
 # Parameters
 nc: 80 # number of classes
 scales: # model compound scaling constants, i.e. 'model=yolov8n.yaml' will call yolov8.yaml with scale 'n'
  # [depth, width, max_channels]
  l: [1.00, 1.00, 512] # YOLOv8l summary: 365 layers, 43691520 parameters, 43691504 gradients, 165.7 GFLOPs
 # YOLOv8.0n backbone
 backbone:
  # [from, repeats, module, args]
  - [-1, 1, Conv, [64, 3, 2]] # 0-P1/2
  - [-1, 1, Conv, [128, 3, 2]] # 1-P2/4
  - [-1, 3, C2f, [128, True]]
  - [-1, 1, Conv, [256, 3, 2]] # 3-P3/8
  - [-1, 6, C2f, [256, True]]
  - [-1, 1, SCDown, [512, 3, 2]] # 5-P4/16
  - [-1, 6, C2f, [512, True]]
  - [-1, 1, SCDown, [1024, 3, 2]] # 7-P5/32
  - [-1, 3, C2fCIB, [1024, True]]
  - [-1, 1, SPPF, [1024, 5]] # 9
  - [-1, 1, PSA, [1024]] # 10
 # YOLOv8.0n head
 head:
  - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
  - [[-1, 6], 1, Concat, [1]] # cat backbone P4
  - [-1, 3, C2fCIB, [512, True]] # 13
  - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
  - [[-1, 4], 1, Concat, [1]] # cat backbone P3
  - [-1, 3, C2f, [256]] # 16 (P3/8-small)
  - [-1, 1, Conv, [256, 3, 2]]
  - [[-1, 13], 1, Concat, [1]] # cat head P4
  - [-1, 3, C2fCIB, [512, True]] # 19 (P4/16-medium)
  - [-1, 1, SCDown, [512, 3, 2]]
  - [[-1, 10], 1, Concat, [1]] # cat head P5
  - [-1, 3, C2fCIB, [1024, True]] # 22 (P5/32-large)
  - [[16, 19, 22], 1, v10Segment, [nc, 32, 256]] # segment(P3, P4, P5)
--- a/ultralytics/cfg/models/v10/yolov10m-seg.yaml
+++ b/ultralytics/cfg/models/v10/yolov10m-seg.yaml
@ -0,0 +1,43 @@
 # Ultralytics YOLO 🚀, AGPL-3.0 license
 # YOLOv8 object detection model with P3-P5 outputs. For Usage examples see https://docs.ultralytics.com/tasks/detect
 # Parameters
 nc: 80 # number of classes
 scales: # model compound scaling constants, i.e. 'model=yolov8n.yaml' will call yolov8.yaml with scale 'n'
  # [depth, width, max_channels]
  m: [0.67, 0.75, 768] # YOLOv8m summary: 295 layers, 25902640 parameters, 25902624 gradients,  79.3 GFLOPs
 # YOLOv8.0n backbone
 backbone:
  # [from, repeats, module, args]
  - [-1, 1, Conv, [64, 3, 2]] # 0-P1/2
  - [-1, 1, Conv, [128, 3, 2]] # 1-P2/4
  - [-1, 3, C2f, [128, True]]
  - [-1, 1, Conv, [256, 3, 2]] # 3-P3/8
  - [-1, 6, C2f, [256, True]]
  - [-1, 1, SCDown, [512, 3, 2]] # 5-P4/16
  - [-1, 6, C2f, [512, True]]
  - [-1, 1, SCDown, [1024, 3, 2]] # 7-P5/32
  - [-1, 3, C2fCIB, [1024, True]]
  - [-1, 1, SPPF, [1024, 5]] # 9
  - [-1, 1, PSA, [1024]] # 10
 # YOLOv8.0n head
 head:
  - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
  - [[-1, 6], 1, Concat, [1]] # cat backbone P4
  - [-1, 3, C2f, [512]] # 13
  - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
  - [[-1, 4], 1, Concat, [1]] # cat backbone P3
  - [-1, 3, C2f, [256]] # 16 (P3/8-small)
  - [-1, 1, Conv, [256, 3, 2]]
  - [[-1, 13], 1, Concat, [1]] # cat head P4
  - [-1, 3, C2fCIB, [512, True]] # 19 (P4/16-medium)
  - [-1, 1, SCDown, [512, 3, 2]]
  - [[-1, 10], 1, Concat, [1]] # cat head P5
  - [-1, 3, C2fCIB, [1024, True]] # 22 (P5/32-large)
  - [[16, 19, 22], 1, v10Segment, [nc, 32, 256]] # segment(P3, P4, P5)
--- a/ultralytics/cfg/models/v10/yolov10n-seg.yaml
+++ b/ultralytics/cfg/models/v10/yolov10n-seg.yaml
@ -0,0 +1,40 @@
 # Parameters
 nc: 80 # number of classes
 scales: # model compound scaling constants, i.e. 'model=yolov8n.yaml' will call yolov8.yaml with scale 'n'
  # [depth, width, max_channels]
  n: [0.33, 0.25, 1024] 
 # YOLOv8.0n backbone
 backbone:
  # [from, repeats, module, args]
  - [-1, 1, Conv, [64, 3, 2]] # 0-P1/2
  - [-1, 1, Conv, [128, 3, 2]] # 1-P2/4
  - [-1, 3, C2f, [128, True]]
  - [-1, 1, Conv, [256, 3, 2]] # 3-P3/8
  - [-1, 6, C2f, [256, True]]
  - [-1, 1, SCDown, [512, 3, 2]] # 5-P4/16
  - [-1, 6, C2f, [512, True]]
  - [-1, 1, SCDown, [1024, 3, 2]] # 7-P5/32
  - [-1, 3, C2f, [1024, True]]
  - [-1, 1, SPPF, [1024, 5]] # 9
  - [-1, 1, PSA, [1024]] # 10
 # YOLOv8.0n head
 head:
  - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
  - [[-1, 6], 1, Concat, [1]] # cat backbone P4
  - [-1, 3, C2f, [512]] # 13
  - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
  - [[-1, 4], 1, Concat, [1]] # cat backbone P3
  - [-1, 3, C2f, [256]] # 16 (P3/8-small)
  - [-1, 1, Conv, [256, 3, 2]]
  - [[-1, 13], 1, Concat, [1]] # cat head P4
  - [-1, 3, C2f, [512]] # 19 (P4/16-medium)
  - [-1, 1, SCDown, [512, 3, 2]]
  - [[-1, 10], 1, Concat, [1]] # cat head P5
  - [-1, 3, C2fCIB, [1024, True, True]] # 22 (P5/32-large)
  - [[16, 19, 22], 1, v10Segment, [nc, 32, 256]] # segment(P3, P4, P5)
--- a/ultralytics/cfg/models/v10/yolov10s-seg.yaml
+++ b/ultralytics/cfg/models/v10/yolov10s-seg.yaml
@ -0,0 +1,39 @@
 # Parameters
 nc: 80 # number of classes
 scales: # model compound scaling constants, i.e. 'model=yolov8n.yaml' will call yolov8.yaml with scale 'n'
  # [depth, width, max_channels]
  s: [0.33, 0.50, 1024]
 backbone:
  # [from, repeats, module, args]
  - [-1, 1, Conv, [64, 3, 2]] # 0-P1/2
  - [-1, 1, Conv, [128, 3, 2]] # 1-P2/4
  - [-1, 3, C2f, [128, True]]
  - [-1, 1, Conv, [256, 3, 2]] # 3-P3/8
  - [-1, 6, C2f, [256, True]]
  - [-1, 1, SCDown, [512, 3, 2]] # 5-P4/16
  - [-1, 6, C2f, [512, True]]
  - [-1, 1, SCDown, [1024, 3, 2]] # 7-P5/32
  - [-1, 3, C2fCIB, [1024, True, True]]
  - [-1, 1, SPPF, [1024, 5]] # 9
  - [-1, 1, PSA, [1024]] # 10
 # YOLOv8.0n head
 head:
  - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
  - [[-1, 6], 1, Concat, [1]] # cat backbone P4
  - [-1, 3, C2f, [512]] # 13
  - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
  - [[-1, 4], 1, Concat, [1]] # cat backbone P3
  - [-1, 3, C2f, [256]] # 16 (P3/8-small)
  - [-1, 1, Conv, [256, 3, 2]]
  - [[-1, 13], 1, Concat, [1]] # cat head P4
  - [-1, 3, C2f, [512]] # 19 (P4/16-medium)
  - [-1, 1, SCDown, [512, 3, 2]]
  - [[-1, 10], 1, Concat, [1]] # cat head P5
  - [-1, 3, C2fCIB, [1024, True, True]] # 22 (P5/32-large)
  - [[16, 19, 22], 1, v10Segment, [nc, 32, 256]] # segment(P3, P4, P5)
--- a/ultralytics/cfg/models/v10/yolov10x-seg.yaml
+++ b/ultralytics/cfg/models/v10/yolov10x-seg.yaml
@ -0,0 +1,40 @@
 # Parameters
 nc: 80 # number of classes
 scales: # model compound scaling constants, i.e. 'model=yolov8n.yaml' will call yolov8.yaml with scale 'n'
  # [depth, width, max_channels]
  x: [1.00, 1.25, 512]
 # YOLOv8.0n backbone
 backbone:
  # [from, repeats, module, args]
  - [-1, 1, Conv, [64, 3, 2]] # 0-P1/2
  - [-1, 1, Conv, [128, 3, 2]] # 1-P2/4
  - [-1, 3, C2f, [128, True]]
  - [-1, 1, Conv, [256, 3, 2]] # 3-P3/8
  - [-1, 6, C2f, [256, True]]
  - [-1, 1, SCDown, [512, 3, 2]] # 5-P4/16
  - [-1, 6, C2fCIB, [512, True]]
  - [-1, 1, SCDown, [1024, 3, 2]] # 7-P5/32
  - [-1, 3, C2fCIB, [1024, True]]
  - [-1, 1, SPPF, [1024, 5]] # 9
  - [-1, 1, PSA, [1024]] # 10
 # YOLOv8.0n head
 head:
  - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
  - [[-1, 6], 1, Concat, [1]] # cat backbone P4
  - [-1, 3, C2fCIB, [512, True]] # 13
  - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
  - [[-1, 4], 1, Concat, [1]] # cat backbone P3
  - [-1, 3, C2f, [256]] # 16 (P3/8-small)
  - [-1, 1, Conv, [256, 3, 2]]
  - [[-1, 13], 1, Concat, [1]] # cat head P4
  - [-1, 3, C2fCIB, [512, True]] # 19 (P4/16-medium)
  - [-1, 1, SCDown, [512, 3, 2]]
  - [[-1, 10], 1, Concat, [1]] # cat head P5
  - [-1, 3, C2fCIB, [1024, True]] # 22 (P5/32-large)
  - [[16, 19, 22], 1, v10Segment, [nc, 32, 256]] # segment(P3, P4, P5)
--- a/ultralytics/data/base.py
+++ b/ultralytics/data/base.py
@ -267,7 +267,7 @@ class BaseDataset(Dataset):
    def __len__(self):
        """Returns the length of the labels list for the dataset."""
-        return len(self.labels)
+        return int(0.1*len(self.labels))
    def update_labels_info(self, label):
        """Custom your label format here."""
--- a/ultralytics/engine/trainer.py
+++ b/ultralytics/engine/trainer.py
@ -268,7 +268,7 @@ class BaseTrainer:
        self.amp = torch.tensor(self.args.amp).to(self.device)  # True or False
        if self.amp and RANK in (-1, 0):  # Single-GPU and DDP
            callbacks_backup = callbacks.default_callbacks.copy()  # backup callbacks as check_amp() resets them
-            self.amp = torch.tensor(check_amp(self.model), device=self.device)
+            self.amp = True# torch.tensor(check_amp(self.model), device=self.device)
            callbacks.default_callbacks = callbacks_backup  # restore callbacks
        if RANK > -1 and world_size > 1:  # DDP
            dist.broadcast(self.amp, src=0)  # broadcast the tensor from rank 0 to all other ranks (returns None)
@ -420,6 +420,7 @@ class BaseTrainer:
            self.lr = {f"lr/pg{ir}": x["lr"] for ir, x in enumerate(self.optimizer.param_groups)}  # for loggers
            self.run_callbacks("on_train_epoch_end")
            # validate
            if RANK in (-1, 0):
                final_epoch = epoch + 1 == self.epochs
                self.ema.update_attr(self.model, include=["yaml", "nc", "args", "names", "stride", "class_weights"])
--- a/ultralytics/engine/validator.py
+++ b/ultralytics/engine/validator.py
@ -187,7 +187,7 @@ class BaseValidator:
                preds = self.postprocess(preds)
            self.update_metrics(preds, batch)
-            if self.args.plots and batch_i < 3:
+            if False:#self.args.plots and batch_i < 3:
                self.plot_val_samples(batch, batch_i)
                self.plot_predictions(batch, preds, batch_i)
--- a/ultralytics/models/init.py
+++ b/ultralytics/models/init.py
@ -4,5 +4,5 @@ from .rtdetr import RTDETR
 from .sam import SAM
 from .yolo import YOLO, YOLOWorld
 from .yolov10 import YOLOv10
-
+from .yolov10seg import YOLOv10Seg
-__all__ = "YOLO", "RTDETR", "SAM", "YOLOWorld", "YOLOv10"  # allow simpler import
+__all__ = "YOLO", "RTDETR", "SAM", "YOLOWorld", "YOLOv10","YOLOv10Seg"  # allow simpler import
--- a/ultralytics/models/yolov10/model.py
+++ b/ultralytics/models/yolov10/model.py
@ -7,7 +7,7 @@ from .train import YOLOv10DetectionTrainer
 from huggingface_hub import PyTorchModelHubMixin
 from .card import card_template_text
-class YOLOv10(Model, PyTorchModelHubMixin, model_card_template=card_template_text):
+class YOLOv10(Model):# , PyTorchModelHubMixin, model_card_template=card_template_text):
    def __init__(self, model="yolov10n.pt", task=None, verbose=False, 
                 names=None):
@ -33,4 +33,6 @@ class YOLOv10(Model, PyTorchModelHubMixin, model_card_template=card_template_tex
                "validator": YOLOv10DetectionValidator,
                "predictor": YOLOv10DetectionPredictor,
            },
-        }
+        }
--- a/ultralytics/models/yolov10seg/init.py
+++ b/ultralytics/models/yolov10seg/init.py
@ -0,0 +1,5 @@
 from .model import YOLOv10Seg
 from .predict import YOLOv10SegPredictor
 from .val import YOLOv10SegValidator
 __all__ = "YOLOv10DetectionPredictor", "YOLOv10DetectionValidator", "YOLOv10Seg"
--- a/ultralytics/models/yolov10seg/card.py
+++ b/ultralytics/models/yolov10seg/card.py
@ -0,0 +1,64 @@
 card_template_text = """
 ---
 license: agpl-3.0
 library_name: ultralytics
 repo_url: https://github.com/THU-MIG/yolov10
 tags:
 - object-detection
 - computer-vision
 - yolov10
 datasets:
 - detection-datasets/coco
 inference: false
 ---
 ### Model Description
 [YOLOv10: Real-Time End-to-End Object Detection](https://arxiv.org/abs/2405.14458v1)
 - arXiv: https://arxiv.org/abs/2405.14458v1
 - github: https://github.com/THU-MIG/yolov10
 ### Installation
 ```
 pip install git+https://github.com/THU-MIG/yolov10.git
 ```
 ### Training and validation
 ```python
 from ultralytics import YOLOv10
 model = YOLOv10.from_pretrained('jameslahm/yolov10n')
 # Training
 model.train(...)
 # after training, one can push to the hub
 model.push_to_hub("your-hf-username/yolov10-finetuned")
 # Validation
 model.val(...)
 ```
 ### Inference
 Here's an end-to-end example showcasing inference on a cats image:
 ```python
 from ultralytics import YOLOv10
 model = YOLOv10.from_pretrained('jameslahm/yolov10n')
 source = 'http://images.cocodataset.org/val2017/000000039769.jpg'
 model.predict(source=source, save=True)
 ```
 which shows:
 ![image/png](https://cdn-uploads.huggingface.co/production/uploads/628ece6054698ce61d1e7be3/tBwAsKcQA_96HCYQp7BRr.png)
 ### BibTeX Entry and Citation Info
 ```
@article{wang2024yolov10,
  title={YOLOv10: Real-Time End-to-End Object Detection},
  author={Wang, Ao and Chen, Hui and Liu, Lihao and Chen, Kai and Lin, Zijia and Han, Jungong and Ding, Guiguang},
  journal={arXiv preprint arXiv:2405.14458},
  year={2024}
 }
 ```
 """.strip()
--- a/ultralytics/models/yolov10seg/model.py
+++ b/ultralytics/models/yolov10seg/model.py
@ -0,0 +1,36 @@
 from ultralytics.engine.model import Model
 from ultralytics.nn.tasks import YOLOv10SegmentationModel
 from .val import YOLOv10SegValidator
 from .predict import YOLOv10SegPredictor
 from .train import YOLOv10SegTrainer
 from huggingface_hub import PyTorchModelHubMixin
 from .card import card_template_text
 class YOLOv10Seg(Model):# , PyTorchModelHubMixin, model_card_template=card_template_text):
    def __init__(self, model="yolov10n.pt", task=None, verbose=False, 
                 names=None):
        super().__init__(model=model, task=task, verbose=verbose)
        if names is not None:
            setattr(self.model, 'names', names)
    def push_to_hub(self, repo_name, **kwargs):
        config = kwargs.get('config', {})
        config['names'] = self.names
        config['model'] = self.model.yaml['yaml_file']
        config['task'] = self.task
        kwargs['config'] = config
        super().push_to_hub(repo_name, **kwargs)
    @property
    def task_map(self):
        """Map head to model, trainer, validator, and predictor classes."""
        return {
            "segment": {
                "model": YOLOv10SegmentationModel,
                "trainer": YOLOv10SegTrainer,
                "validator": YOLOv10SegValidator,
                "predictor": YOLOv10SegPredictor,
            },
        }
--- a/ultralytics/models/yolov10seg/predict.py
+++ b/ultralytics/models/yolov10seg/predict.py
@ -0,0 +1,45 @@
 from ultralytics.models.yolo.detect import DetectionPredictor
 import torch
 from ultralytics.utils import ops
 from ultralytics.engine.results import Results
 import torch.nn.functional as F
 class YOLOv10SegPredictor(DetectionPredictor):
    def postprocess(self, preds, img, orig_imgs):
        coef,proto = None,None
        if isinstance(preds, dict):
            coef = preds['coef']
            proto = preds['proto']
            preds = preds["one2one"]
        if isinstance(preds, (list, tuple)):
            preds = preds[0]  # [1,5,6006]  coef[1,32,6006]  proto[1,32,104,176]
        if preds.shape[-1] == 6:
            pass
        else:
            preds = preds.transpose(-1, -2)  # [1,6006,5]
            coef = coef.transpose(-1, -2)
            # bboxes, scores, labels = ops.v10postprocess(preds, self.args.max_det, preds.shape[-1]-4)
            bboxes, scores, labels,segmask = ops.v10segpostprocess([preds,coef,proto], self.args.max_det, preds.shape[-1]-4)
            bboxes = ops.xywh2xyxy(bboxes)
            preds = torch.cat([bboxes, scores.unsqueeze(-1), labels.unsqueeze(-1)], dim=-1)
        mask = preds[..., 4] > self.args.conf
        if self.args.classes is not None:
            mask = mask & (preds[..., 5:6] == torch.tensor(self.args.classes, device=preds.device).unsqueeze(0)).any(2)
        preds = [p[mask[idx]] for idx, p in enumerate(preds)]
        segmask = [p[mask[idx]] for idx, p in enumerate(segmask)]
        if not isinstance(orig_imgs, list):  # input images are a torch.Tensor, not a list
            orig_imgs = ops.convert_torch2numpy_batch(orig_imgs)
        results = []
        for i, pred in enumerate(preds):
            orig_img = orig_imgs[i]
            seg = segmask[i]
            cc,hh,ww = seg.shape
            seg = F.interpolate(seg[None], (hh*4, ww*4), mode="bilinear", align_corners=False)[0].gt_(0)            
            pred[:, :4] = ops.scale_boxes(img.shape[2:], pred[:, :4], orig_img.shape)
            img_path = self.batch[0][i]
            results.append(Results(orig_img, path=img_path, names=self.model.names, boxes=pred,masks=seg))
        return results
--- a/ultralytics/models/yolov10seg/train.py
+++ b/ultralytics/models/yolov10seg/train.py
@ -0,0 +1,20 @@
 from ultralytics.models.yolo.detect import DetectionTrainer
 from .val import YOLOv10SegValidator
 from .model import YOLOv10SegmentationModel
 from copy import copy
 from ultralytics.utils import RANK
 class YOLOv10SegTrainer(DetectionTrainer):
    def get_validator(self):
        """Returns a DetectionValidator for YOLO model validation."""
        self.loss_names = "box_om", "cls_om", "dfl_om", "box_oo", "cls_oo", "dfl_oo", 
        return YOLOv10SegValidator(
            self.test_loader, save_dir=self.save_dir, args=copy(self.args), _callbacks=self.callbacks
        )
    def get_model(self, cfg=None, weights=None, verbose=True):
        """Return a YOLO detection model."""
        model = YOLOv10SegmentationModel(cfg, nc=self.data["nc"], verbose=verbose and RANK == -1)
        if weights:
            model.load(weights)
        return model
--- a/ultralytics/models/yolov10seg/val.py
+++ b/ultralytics/models/yolov10seg/val.py
@ -0,0 +1,226 @@
 from ultralytics.models.yolo.detect import DetectionValidator
 from ultralytics.utils import ops
 import torch
 from ultralytics.utils.metrics import SegmentMetrics, box_iou, mask_iou
 import torch.nn.functional as F
 class YOLOv10SegValidator(DetectionValidator):
    def __init__(self, dataloader=None, save_dir=None, pbar=None, args=None, _callbacks=None):
        super().__init__(dataloader, save_dir, pbar, args, _callbacks)
        self.args.save_json |= self.is_coco
        self.plot_masks = None
        self.process = None
        self.args.task = "segment"
        self.metrics = SegmentMetrics(save_dir=self.save_dir, on_plot=self.on_plot)
    def init_metrics(self, model):
        """Initialize metrics and select mask processing function based on save_json flag."""
        super().init_metrics(model)
        self.plot_masks = []
        if self.args.save_json:
            # check_requirements("pycocotools>=2.0.6")
            self.process = ops.process_mask_upsample  # more accurate
        else:
            self.process = ops.process_mask  # faster
        self.stats = dict(tp_m=[], tp=[], conf=[], pred_cls=[], target_cls=[])
    def get_desc(self):
        """Return a formatted description of evaluation metrics."""
        return ("%22s" + "%11s" * 10) % (
            "Class",
            "Images",
            "Instances",
            "Box(P",
            "R",
            "mAP50",
            "mAP50-95)",
            "Mask(P",
            "R",
            "mAP50",
            "mAP50-95)",
        ) 
    def _prepare_batch(self, si, batch):
        """Prepares a batch for training or inference by processing images and targets."""
        prepared_batch = super()._prepare_batch(si, batch)
        midx = [si] if self.args.overlap_mask else batch["batch_idx"] == si
        prepared_batch["masks"] = batch["masks"][midx]
        return prepared_batch
    def finalize_metrics(self, *args, **kwargs):
        """Sets speed and confusion matrix for evaluation metrics."""
        self.metrics.speed = self.speed
        self.metrics.confusion_matrix = self.confusion_matrix
    def preprocess(self, batch):
        """Preprocesses batch by converting masks to float and sending to device."""
        batch = super().preprocess(batch)
        batch["masks"] = batch["masks"].to(self.device).float()
        return batch
    def postprocess(self, preds):
        coef,proto = None,None
        if isinstance(preds, dict):
            coef = preds["coef"]  # [1,32,5294]
            proto = preds["proto"]  # [1,32,92,168]
            preds = preds["one2one"]
        if isinstance(preds, (list, tuple)):
            preds = preds[0]
        # Acknowledgement: Thanks to sanha9999 in #190 and #181!
        if preds.shape[-1] == 6:
            return preds
        else:
            preds = preds.transpose(-1, -2)  # [1,6006,5]
            coef = coef.transpose(-1, -2)
            # bboxes, scores, labels = ops.v10postprocess(preds, self.args.max_det, preds.shape[-1]-4)
            bboxes, scores, labels,segmask = ops.v10segpostprocess([preds,coef,proto], self.args.max_det, preds.shape[-1]-4)
            bboxes = ops.xywh2xyxy(bboxes)
            preds = torch.cat([bboxes, scores.unsqueeze(-1), labels.unsqueeze(-1)], dim=-1)
            return [preds,segmask]
    def process_mask(self,segmask, bboxes, shape, upsample=False):
        c, mh, mw = segmask.shape  # CHW
        ih, iw = shape
        width_ratio = mw / iw
        height_ratio = mh / ih
        downsampled_bboxes = bboxes.clone()
        downsampled_bboxes[:, 0] *= width_ratio
        downsampled_bboxes[:, 2] *= width_ratio
        downsampled_bboxes[:, 3] *= height_ratio
        downsampled_bboxes[:, 1] *= height_ratio
        segmask = ops.crop_mask(segmask, downsampled_bboxes)  # CHW
        if upsample:
            segmask = F.interpolate(segmask[None], shape, mode="bilinear", align_corners=False)[0]  # CHW
        return segmask.gt_(0)
    def _prepare_pred(self, inputx, pbatch):
        """Prepares a batch for training or inference by processing images and targets."""
        pred, pred_masks = None,None
        if isinstance(inputx,list):
            pred, pred_masks = inputx
        else:
            print("error!!!!!!!")
        predn = pred.clone()
        predn[:, :4] = ops.scale_boxes(
            pbatch["imgsz"], predn[:, :4], pbatch["ori_shape"], ratio_pad=pbatch["ratio_pad"]
        )  # native-space pred
        pred_masks = self.process_mask(pred_masks, pred[:, :4], pbatch["imgsz"])
        return predn, pred_masks
    def _process_batch(self, detections, gt_bboxes, gt_cls, pred_masks=None, gt_masks=None, overlap=False, masks=False):
        """
        Return correct prediction matrix.
        Args:
            detections (array[N, 6]), x1, y1, x2, y2, conf, class
            labels (array[M, 5]), class, x1, y1, x2, y2
        Returns:
            correct (array[N, 10]), for 10 IoU levels
        """
        if masks:
            if overlap:
                nl = len(gt_cls)
                index = torch.arange(nl, device=gt_masks.device).view(nl, 1, 1) + 1
                gt_masks = gt_masks.repeat(nl, 1, 1)  # shape(1,640,640) -> (n,640,640)
                gt_masks = torch.where(gt_masks == index, 1.0, 0.0)
            if gt_masks.shape[1:] != pred_masks.shape[1:]:
                gt_masks = F.interpolate(gt_masks[None], pred_masks.shape[1:], mode="bilinear", align_corners=False)[0]
                gt_masks = gt_masks.gt_(0.5)
            iou = mask_iou(gt_masks.view(gt_masks.shape[0], -1), pred_masks.view(pred_masks.shape[0], -1))
        else:  # boxes
            iou = box_iou(gt_bboxes, detections[:, :4])
        return self.match_predictions(detections[:, 5], gt_cls, iou)
    def update_metrics(self, preds, batch):
        """Metrics."""
        # box + score + label
        # preds  [[1,300,6],[1,300,96,168]]
        for si, (pred, pred_masks) in enumerate(zip(preds[0], preds[1])):
            self.seen += 1
            npr = len(pred)  # 300
            stat = dict(
                conf=torch.zeros(0, device=self.device),
                pred_cls=torch.zeros(0, device=self.device),
                tp=torch.zeros(npr, self.niou, dtype=torch.bool, device=self.device),
                tp_m=torch.zeros(npr, self.niou, dtype=torch.bool, device=self.device),
            )
            pbatch = self._prepare_batch(si, batch)
            cls, bbox = pbatch.pop("cls"), pbatch.pop("bbox")
            nl = len(cls)
            stat["target_cls"] = cls
            if npr == 0:
                if nl:
                    for k in self.stats.keys():
                        self.stats[k].append(stat[k])
                    if self.args.plots:
                        self.confusion_matrix.process_batch(detections=None, gt_bboxes=bbox, gt_cls=cls)
                continue
            # Masks
            gt_masks = pbatch.pop("masks")
            # Predictions
            if self.args.single_cls:
                pred[:, 5] = 0
            predn,pred_masks = self._prepare_pred([pred,pred_masks], pbatch)
            stat["conf"] = predn[:, 4]
            stat["pred_cls"] = predn[:, 5]
            # Evaluate
            if nl:
                stat["tp"] = self._process_batch(predn, bbox, cls)
                stat["tp_m"] = self._process_batch(
                    predn, bbox, cls, pred_masks, gt_masks, self.args.overlap_mask, masks=True
                )
                if self.args.plots:
                    self.confusion_matrix.process_batch(predn, bbox, cls)
            for k in self.stats.keys():
                self.stats[k].append(stat[k])
            pred_masks = torch.as_tensor(pred_masks, dtype=torch.uint8)
            if self.args.plots and self.batch_i < 3:
                self.plot_masks.append(pred_masks[:15].cpu())  # filter top 15 to plot
            # Save
            if False:# self.args.save_json:
                pred_masks = ops.scale_image(
                    pred_masks.permute(1, 2, 0).contiguous().cpu().numpy(),
                    pbatch["ori_shape"],
                    ratio_pad=batch["ratio_pad"][si],
                )
                self.pred_to_json(predn, batch["im_file"][si], pred_masks)
 """class YOLOv10SegValidator(DetectionValidator):
    def __init__(self, cfg=DEFAULT_CFG, overrides=None, _callbacks=None):
        if overrides is None:
            overrides = {}
        overrides["task"] = "segment"
        super().__init__(cfg, overrides, _callbacks)
    def get_model(self, cfg=None, weights=None, verbose=True):
        model = SegmentationModel(cfg, ch=3, nc=self.data["nc"], verbose=verbose and RANK == -1)
        if weights:
            model.load(weights)
        return model
    def get_validator(self):
        self.loss_names = "box_loss", "seg_loss", "cls_loss", "dfl_loss"
        return yolo.segment.SegmentationValidator(
            self.test_loader, save_dir=self.save_dir, args=copy(self.args), _callbacks=self.callbacks
        )
    def plot_training_samples(self, batch, ni):
        plot_images(
            batch["img"],
            batch["batch_idx"],
            batch["cls"].squeeze(-1),
            batch["bboxes"],
            masks=batch["masks"],
            paths=batch["im_file"],
            fname=self.save_dir / f"train_batch{ni}.jpg",
            on_plot=self.on_plot,
        )
    def plot_metrics(self):
        plot_results(file=self.csv, segment=True, on_plot=self.on_plot)  # save results.png"""
--- a/ultralytics/nn/modules/init.py
+++ b/ultralytics/nn/modules/init.py
@ -66,7 +66,7 @@ from .conv import (
    RepConv,
    SpatialAttention,
 )
-from .head import OBB, Classify, Detect, Pose, RTDETRDecoder, Segment, WorldDetect, v10Detect
+from .head import OBB, Classify, Detect, Pose, RTDETRDecoder, Segment, WorldDetect, v10Detect,v10Segment
 from .transformer import (
    AIFI,
    MLP,
@ -143,5 +143,6 @@ __all__ = (
    "C2fCIB",
    "SCDown",
    "RepVGGDW",
-    "v10Detect"
+    "v10Detect",
    "v10Segment"
 )
--- a/ultralytics/nn/modules/head.py
+++ b/ultralytics/nn/modules/head.py
@ -494,8 +494,8 @@ class RTDETRDecoder(nn.Module):
        for layer in self.input_proj:
            xavier_uniform_(layer[0].weight)
 class v10Detect(Detect):
 class v10Detect(Detect):
    max_det = 300
    def __init__(self, nc=80, ch=()):
@ -515,13 +515,13 @@ class v10Detect(Detect):
        if not self.training:
            one2one = self.inference(one2one)
-            if not self.export:
+            if not self.export:  
                return {"one2many": one2many, "one2one": one2one}
-            else:
+            else: 
                assert(self.max_det != -1)
                boxes, scores, labels = ops.v10postprocess(one2one.permute(0, 2, 1), self.max_det, self.nc)
                return torch.cat([boxes, scores.unsqueeze(-1), labels.unsqueeze(-1).to(boxes.dtype)], dim=-1)
-        else:
+        else: 
            return {"one2many": one2many, "one2one": one2one}
    def bias_init(self):
@ -533,3 +533,39 @@ class v10Detect(Detect):
        for a, b, s in zip(m.one2one_cv2, m.one2one_cv3, m.stride):  # from
            a[-1].bias.data[:] = 1.0  # box
            b[-1].bias.data[: m.nc] = math.log(5 / m.nc / (640 / s) ** 2)  # cls (.01 objects, 80 classes, 640 img)
 class v10Segment(v10Detect):
    def __init__(self, nc=80,nm=32, npr=256,ch=()):
        super().__init__(nc, ch)
        # seg
        self.nm = nm
        self.npr = npr
        self.proto = Proto(ch[0], self.npr, self.nm)  # protos
        self.detect = v10Detect.forward
        c4 = max(ch[0] // 4, self.nm)
        self.cv4 = nn.ModuleList(nn.Sequential(Conv(x, c4, 3), Conv(c4, c4, 3), nn.Conv2d(c4, self.nm, 1)) for x in ch)
    def forward(self, x):
        p = self.proto(x[0])  # mask protos
        bs = p.shape[0]  # batch size
        # mask coefficients
        mc = torch.cat([self.cv4[i](x[i]).view(bs, self.nm, -1) for i in range(self.nl)], 2)  
        x = self.detect(self, x)
        # if self.training:
        #     return x, mc, p
        # return (torch.cat([x, mc], 1), p) if self.export else (torch.cat([x[0], mc], 1), (x[1], mc, p))
        if not self.training:
            if not self.export:  # 验证
                x["coef"] = mc
                x["proto"] = p
                return x
                # return {"one2many": one2many, "one2one": one2one,"coef":mc, "proto":p}
            else:  # 导出onnx
                return x,mc,p
        else:  # 训练
            x["coef"] = mc
            x["proto"] = p
            return x # {"one2many": one2many, "one2one": one2one,"coef":mc, "proto":p}
--- a/ultralytics/nn/tasks.py
+++ b/ultralytics/nn/tasks.py
@ -53,11 +53,12 @@ from ultralytics.nn.modules import (
    PSA,
    SCDown,
    RepVGGDW,
-    v10Detect
+    v10Detect,
    v10Segment
 )
 from ultralytics.utils import DEFAULT_CFG_DICT, DEFAULT_CFG_KEYS, LOGGER, colorstr, emojis, yaml_load
 from ultralytics.utils.checks import check_requirements, check_suffix, check_yaml
-from ultralytics.utils.loss import v8ClassificationLoss, v8DetectionLoss, v8OBBLoss, v8PoseLoss, v8SegmentationLoss, v10DetectLoss
+from ultralytics.utils.loss import v8ClassificationLoss, v8DetectionLoss, v8OBBLoss, v8PoseLoss, v8SegmentationLoss, v10DetectLoss,v10SegmentationLoss
 from ultralytics.utils.plotting import feature_visualization
 from ultralytics.utils.torch_utils import (
    fuse_conv_and_bn,
@ -303,7 +304,9 @@ class DetectionModel(BaseModel):
            m.inplace = self.inplace
            forward = lambda x: self.forward(x)[0] if isinstance(m, (Segment, Pose, OBB)) else self.forward(x)
            if isinstance(m, v10Detect):
-                forward = lambda x: self.forward(x)["one2many"]
+                forward = lambda x: self.forward(x)["one2many"] if isinstance(m, (v10Segment)) else self.forward(x)["one2many"]
                # forward = lambda x: self.forward(x)["one2many"]
            m.stride = torch.tensor([s / x.shape[-2] for x in forward(torch.zeros(1, ch, s, s))])  # forward
            self.stride = m.stride
            m.bias_init()  # only run once
@ -645,6 +648,16 @@ class YOLOv10DetectionModel(DetectionModel):
    def init_criterion(self):
        return v10DetectLoss(self)
 class YOLOv10SegmentationModel(DetectionModel):
    def __init__(self, cfg='yolov10n-seg.yaml', ch=3, nc=None, verbose=True):
        """Initialize YOLOv8 segmentation model with given config and parameters."""
        super().__init__(cfg=cfg, ch=ch, nc=nc, verbose=verbose)
    def init_criterion(self):
        return v10SegmentationLoss(self)
        # return v10DetectLoss(self)
 class Ensemble(nn.ModuleList):
    """Ensemble of models."""
@ -917,10 +930,11 @@ def parse_model(d, ch, verbose=True):  # model_dict, input_channels(3)
            args = [ch[f]]
        elif m is Concat:
            c2 = sum(ch[x] for x in f)
-        elif m in {Detect, WorldDetect, Segment, Pose, OBB, ImagePoolingAttn, v10Detect}:
+        elif m in {Detect, WorldDetect, Segment, Pose, OBB, ImagePoolingAttn, v10Detect,v10Segment}:
            args.append([ch[x] for x in f])
-            if m is Segment:
+            if m is Segment or m is v10Segment:
                args[2] = make_divisible(min(args[2], max_channels) * width, 8)
        elif m is RTDETRDecoder:  # special case, channels arg must be passed in index 1
            args.insert(1, [ch[x] for x in f])
        elif m is CBLinear:
@ -1007,7 +1021,7 @@ def guess_model_task(model):
            return "classify"
        if m == "detect" or m == "v10detect":
            return "detect"
-        if m == "segment":
+        if m == "segment" or m == "v10segment":
            return "segment"
        if m == "pose":
            return "pose"
--- a/ultralytics/utils/loss.py
+++ b/ultralytics/utils/loss.py
@ -191,6 +191,7 @@ class v8DetectionLoss:
            pred_dist = pred_dist.view(b, a, 4, c // 4).softmax(3).matmul(self.proj.type(pred_dist.dtype))
            # pred_dist = pred_dist.view(b, a, c // 4, 4).transpose(2,3).softmax(3).matmul(self.proj.type(pred_dist.dtype))
            # pred_dist = (pred_dist.view(b, a, c // 4, 4).softmax(2) * self.proj.type(pred_dist.dtype).view(1, 1, -1, 1)).sum(2)
        return dist2bbox(pred_dist, anchor_points, xywh=False)
    def __call__(self, preds, batch):
@ -250,15 +251,21 @@ class v8DetectionLoss:
 class v8SegmentationLoss(v8DetectionLoss):
    """Criterion class for computing training losses."""
-    def __init__(self, model):  # model must be de-paralleled
+    def __init__(self, model,tal_topk=10):  # model must be de-paralleled
        """Initializes the v8SegmentationLoss class, taking a de-paralleled model as argument."""
-        super().__init__(model)
+        super().__init__(model,tal_topk)
        self.overlap = model.args.overlap_mask
    def __call__(self, preds, batch):
        """Calculate and return the loss for the YOLO model."""
        loss = torch.zeros(4, device=self.device)  # box, cls, dfl
        feats, pred_masks, proto = preds if len(preds) == 3 else preds[1]
        feats = feats[1] if isinstance(feats, tuple) else feats
        # [4, 65, 80, 80] [4, 65, 40, 40] [4, 65, 20, 20]
        # [4, 32, 8400] [4, 32, 160, 160]
        batch_size, _, mask_h, mask_w = proto.shape  # batch size, number of masks, mask height, mask width
        pred_distri, pred_scores = torch.cat([xi.view(feats[0].shape[0], self.no, -1) for xi in feats], 2).split(
            (self.reg_max * 4, self.nc), 1
@ -288,7 +295,6 @@ class v8SegmentationLoss(v8DetectionLoss):
                "correctly formatted 'segment' dataset using 'data=coco8-seg.yaml' "
                "as an example.\nSee https://docs.ultralytics.com/datasets/segment/ for help."
            ) from e
        # Pboxes
        pred_bboxes = self.bbox_decode(anchor_points, pred_distri)  # xyxy, (b, h*w, 4)
@ -725,3 +731,26 @@ class v10DetectLoss:
        one2one = preds["one2one"]
        loss_one2one = self.one2one(one2one, batch)
        return loss_one2many[0] + loss_one2one[0], torch.cat((loss_one2many[1], loss_one2one[1]))
 class v10SegmentationLoss:
    """Criterion class for computing training losses."""
    def __init__(self, model):  # model must be de-paralleled
        """Initializes the v8SegmentationLoss class, taking a de-paralleled model as argument."""
        self.overlap = model.args.overlap_mask
        self.one2many = v8SegmentationLoss(model, tal_topk=10)
        self.one2one = v8SegmentationLoss(model, tal_topk=1)
    def __call__(self, preds, batch):
        """Calculate and return the loss for the YOLO model."""   
        one2many = [preds["one2many"],preds["coef"],preds["proto"]]
        loss_one2many = self.one2many(one2many, batch)
        one2one = [preds["one2one"],preds["coef"],preds["proto"]]
        loss_one2one = self.one2one(one2one, batch)
        return loss_one2many[0] + loss_one2one[0], torch.cat((loss_one2many[1], loss_one2one[1]))
--- a/ultralytics/utils/ops.py
+++ b/ultralytics/utils/ops.py
@ -851,7 +851,7 @@ def clean_str(s):
 def v10postprocess(preds, max_det, nc=80):
    assert(4 + nc == preds.shape[-1])
    boxes, scores = preds.split([4, nc], dim=-1)
-    max_scores = scores.amax(dim=-1)
+    max_scores = scores.amax(dim=-1)  
    max_scores, index = torch.topk(max_scores, max_det, dim=-1)
    index = index.unsqueeze(-1)
    boxes = torch.gather(boxes, dim=1, index=index.repeat(1, 1, boxes.shape[-1]))
@ -861,4 +861,26 @@ def v10postprocess(preds, max_det, nc=80):
    labels = index % nc
    index = index // nc
    boxes = boxes.gather(dim=1, index=index.unsqueeze(-1).repeat(1, 1, boxes.shape[-1]))
-    return boxes, scores, labels
+    return boxes, scores, labels
 def v10segpostprocess(preds, max_det, nc=80):
    proto,coef = None,None
    proto_w,proto_h = 0,0
    if isinstance(preds,list):
        preds,coef,proto = preds  
        batch,cc,proto_h,proto_w = proto.shape
    assert(4 + nc == preds.shape[-1])
    boxes, scores = preds.split([4, nc], dim=-1) 
    max_scores = scores.amax(dim=-1) 
    max_scores, index = torch.topk(max_scores, max_det, dim=-1)
    index = index.unsqueeze(-1)  
    boxes = torch.gather(boxes, dim=1, index=index.repeat(1, 1, boxes.shape[-1]))
    scores = torch.gather(scores, dim=1, index=index.repeat(1, 1, scores.shape[-1]))
    coef = torch.gather(coef, dim=1, index=index.repeat(1, 1, coef.shape[-1])) # [1,32,300]
    segmask = torch.matmul(coef, proto.reshape(batch,cc,-1)).reshape(batch, max_det,proto_h,proto_w)
    scores, index = torch.topk(scores.flatten(1), max_det, dim=-1)
    labels = index % nc
    index = index // nc
    boxes = boxes.gather(dim=1, index=index.unsqueeze(-1).repeat(1, 1, boxes.shape[-1]))
    return boxes, scores, labels,segmask
--- a/ultralytics/utils/tal.py
+++ b/ultralytics/utils/tal.py
@ -301,6 +301,7 @@ def make_anchors(feats, strides, grid_cell_offset=0.5):
        sx = torch.arange(end=w, device=device, dtype=dtype) + grid_cell_offset  # shift x
        sy = torch.arange(end=h, device=device, dtype=dtype) + grid_cell_offset  # shift y
        sy, sx = torch.meshgrid(sy, sx, indexing="ij") if TORCH_1_10 else torch.meshgrid(sy, sx)
        # print("torch.stack((sx, sy), -1).view(-1, 2) = ",torch.stack((sx, sy), -1).view(-1, 2).shape)
        anchor_points.append(torch.stack((sx, sy), -1).view(-1, 2))
        stride_tensor.append(torch.full((h * w, 1), stride, dtype=dtype, device=device))
    return torch.cat(anchor_points), torch.cat(stride_tensor)