From 40a349bceb25ea7da19e734d369d1769288d5e22 Mon Sep 17 00:00:00 2001 From: Glenn Jocher Date: Fri, 24 Nov 2023 15:41:10 +0100 Subject: [PATCH] `ultralytics 8.0.217` HUB YAML `path` improvements (#6556) Signed-off-by: Glenn Jocher --- docs/en/guides/kfold-cross-validation.md | 23 +++++++------ docs/hi/models/sam.md | 2 +- ultralytics/data/utils.py | 41 ++++++++++++------------ 3 files changed, 33 insertions(+), 33 deletions(-) diff --git a/docs/en/guides/kfold-cross-validation.md b/docs/en/guides/kfold-cross-validation.md index 1510e975..400c420e 100644 --- a/docs/en/guides/kfold-cross-validation.md +++ b/docs/en/guides/kfold-cross-validation.md @@ -24,19 +24,18 @@ Without further ado, let's dive in! - This guide assumes that annotation files are locally available. - - For our demonstration, we use the [Fruit Detection](https://www.kaggle.com/datasets/lakshaytyagi01/fruit-detection/code) dataset. +- For our demonstration, we use the [Fruit Detection](https://www.kaggle.com/datasets/lakshaytyagi01/fruit-detection/code) dataset. + - This dataset contains a total of 8479 images. + - It includes 6 class labels, each with its total instance counts listed below. - - This dataset contains a total of 8479 images. - - It includes 6 class labels, each with its total instance counts listed below. - - | Class Label | Instance Count | - |:------------|:--------------:| - | Apple | 7049 | - | Grapes | 7202 | - | Pineapple | 1613 | - | Orange | 15549 | - | Banana | 3536 | - | Watermelon | 1976 | +| Class Label | Instance Count | +|:------------|:--------------:| +| Apple | 7049 | +| Grapes | 7202 | +| Pineapple | 1613 | +| Orange | 15549 | +| Banana | 3536 | +| Watermelon | 1976 | - Necessary Python packages include: diff --git a/docs/hi/models/sam.md b/docs/hi/models/sam.md index ceecf286..80571787 100644 --- a/docs/hi/models/sam.md +++ b/docs/hi/models/sam.md @@ -140,7 +140,7 @@ Segment Anything Model का उपयोग उपस्थित डेटा | SAM का सबसे छोटा, SAM-b | 358 MB | 94.7 M | 51096 ms/im | | [मोबाइल SAM](mobile-sam.md) | 40.7 MB | 10.1 M | 46122 ms/im | | [अग्री सेगमेंटेशन वाली FastSAM-s, YOLOv8 बैकबोन सहित](fast-sam.md) | 23.7 MB | 11.8 M | 115 ms/im | -| Ultralytics [योलोवी8न-seg](../टास्क/सेगमेंट.md) | **6.7 MB** (53.4 गुना छोटा) | **3.4 M** (27.9x कम) | **59 ms/im** (866x तेज) | +| Ultralytics [योलोवी8न-seg](yolov8.md) | **6.7 MB** (53.4 गुना छोटा) | **3.4 M** (27.9x कम) | **59 ms/im** (866x तेज) | यह तुलना मॉडल के आकार और गति में दस्तावेजीय अंतर दिखाती है। जहां SAM स्वचालित सेगमेंटेशन के लिए अद्वितीय क्षमताओं को प्रस्तुत करता है, वहीं Ultralytics विद्यमान सेगमेंटेशन मानदंडों के तुलनात्मक आकार, गति और संचालन क्षमता में समर्थन प्रदान करती है। diff --git a/ultralytics/data/utils.py b/ultralytics/data/utils.py index aa0e14e5..a6295c71 100644 --- a/ultralytics/data/utils.py +++ b/ultralytics/data/utils.py @@ -18,7 +18,7 @@ from PIL import Image, ImageOps from ultralytics.nn.autobackend import check_class_names from ultralytics.utils import (DATASETS_DIR, LOGGER, NUM_THREADS, ROOT, SETTINGS_YAML, TQDM, clean_url, colorstr, - emojis, yaml_load) + emojis, yaml_load, yaml_save) from ultralytics.utils.checks import check_file, check_font, is_ascii from ultralytics.utils.downloads import download, safe_download, unzip_file from ultralytics.utils.ops import segments2boxes @@ -250,28 +250,26 @@ def check_det_dataset(dataset, autodownload=True): (dict): Parsed dataset information and paths. """ - data = check_file(dataset) + file = check_file(dataset) # Download (optional) extract_dir = '' - if isinstance(data, (str, Path)) and (zipfile.is_zipfile(data) or is_tarfile(data)): - new_dir = safe_download(data, dir=DATASETS_DIR, unzip=True, delete=False) - data = find_dataset_yaml(DATASETS_DIR / new_dir) - extract_dir, autodownload = data.parent, False + if zipfile.is_zipfile(file) or is_tarfile(file): + new_dir = safe_download(file, dir=DATASETS_DIR, unzip=True, delete=False) + file = find_dataset_yaml(DATASETS_DIR / new_dir) + extract_dir, autodownload = file.parent, False - # Read YAML (optional) - if isinstance(data, (str, Path)): - data = yaml_load(data, append_filename=True) # dictionary + # Read YAML + data = yaml_load(file, append_filename=True) # dictionary # Checks for k in 'train', 'val': if k not in data: - if k == 'val' and 'validation' in data: - LOGGER.info("WARNING ⚠️ renaming data YAML 'validation' key to 'val' to match YOLO format.") - data['val'] = data.pop('validation') # replace 'validation' key with 'val' key - else: + if k != 'val' or 'validation' not in data: raise SyntaxError( emojis(f"{dataset} '{k}:' key missing ❌.\n'train' and 'val' are required in all data YAMLs.")) + LOGGER.info("WARNING ⚠️ renaming data YAML 'validation' key to 'val' to match YOLO format.") + data['val'] = data.pop('validation') # replace 'validation' key with 'val' key if 'names' not in data and 'nc' not in data: raise SyntaxError(emojis(f"{dataset} key missing ❌.\n either 'names' or 'nc' are required in all data YAMLs.")) if 'names' in data and 'nc' in data and len(data['names']) != data['nc']: @@ -285,9 +283,10 @@ def check_det_dataset(dataset, autodownload=True): # Resolve paths path = Path(extract_dir or data.get('path') or Path(data.get('yaml_file', '')).parent) # dataset root - if not path.is_absolute(): path = (DATASETS_DIR / path).resolve() + + # Set paths data['path'] = path # download scripts for k in 'train', 'val', 'test': if data.get(k): # prepend path @@ -404,7 +403,7 @@ class HUBDatasetStats: A class for generating HUB dataset JSON and `-hub` dataset directory. Args: - path (str): Path to data.yaml or data.zip (with data.yaml inside data.zip). Default is 'coco128.yaml'. + path (str): Path to data.yaml or data.zip (with data.yaml inside data.zip). Default is 'coco8.yaml'. task (str): Dataset task. Options are 'detect', 'segment', 'pose', 'classify'. Default is 'detect'. autodownload (bool): Attempt to download dataset if not found locally. Default is False. @@ -424,7 +423,7 @@ class HUBDatasetStats: ``` """ - def __init__(self, path='coco128.yaml', task='detect', autodownload=False): + def __init__(self, path='coco8.yaml', task='detect', autodownload=False): """Initialize class.""" path = Path(path).resolve() LOGGER.info(f'Starting HUB dataset checks for {path}....') @@ -437,10 +436,12 @@ class HUBDatasetStats: else: # detect, segment, pose zipped, data_dir, yaml_path = self._unzip(Path(path)) try: - # data = yaml_load(check_yaml(yaml_path)) # data dict - data = check_det_dataset(yaml_path, autodownload) # data dict - if zipped: - data['path'] = data_dir + # Load YAML with checks + data = yaml_load(yaml_path) + data['path'] = '' # strip path since YAML should be in dataset root for all HUB datasets + yaml_save(yaml_path, data) + data = check_det_dataset(yaml_path, autodownload) # dict + data['path'] = data_dir # YAML path should be set to '' (relative) or parent (absolute) except Exception as e: raise Exception('error/HUB/dataset_stats/init') from e