ultralytics 8.0.217 HUB YAML path improvements (#6556)

Signed-off-by: Glenn Jocher <glenn.jocher@ultralytics.com>
2026-01-01 11:46:11 +08:00 · 2023-11-24 15:41:10 +01:00 · 2023-11-24 15:41:10 +01:00 · 40a349bceb
commit 40a349bceb
parent 8f1c3f3d1e
3 changed files with 33 additions and 33 deletions
--- a/docs/en/guides/kfold-cross-validation.md
+++ b/docs/en/guides/kfold-cross-validation.md
@ -25,7 +25,6 @@ Without further ado, let's dive in!
 - This guide assumes that annotation files are locally available.
 - For our demonstration, we use the [Fruit Detection](https://www.kaggle.com/datasets/lakshaytyagi01/fruit-detection/code) dataset.
    - This dataset contains a total of 8479 images.
    - It includes 6 class labels, each with its total instance counts listed below.
--- a/docs/hi/models/sam.md
+++ b/docs/hi/models/sam.md
@ -140,7 +140,7 @@ Segment Anything Model का उपयोग उपस्थित डेटा
 | SAM का सबसे छोटा, SAM-b                                            | 358 MB                      | 94.7 M               | 51096 ms/im             |
 | [मोबाइल SAM](mobile-sam.md)                                        | 40.7 MB                     | 10.1 M               | 46122 ms/im             |
 | [अग्री सेगमेंटेशन वाली FastSAM-s, YOLOv8 बैकबोन सहित](fast-sam.md) | 23.7 MB                     | 11.8 M               | 115 ms/im               |
-| Ultralytics [योलोवी8न-seg](../टास्क/सेगमेंट.md)                    | **6.7 MB** (53.4 गुना छोटा) | **3.4 M** (27.9x कम) | **59 ms/im** (866x तेज) |
+| Ultralytics [योलोवी8न-seg](yolov8.md)                              | **6.7 MB** (53.4 गुना छोटा) | **3.4 M** (27.9x कम) | **59 ms/im** (866x तेज) |
 यह तुलना मॉडल के आकार और गति में दस्तावेजीय अंतर दिखाती है। जहां SAM स्वचालित सेगमेंटेशन के लिए अद्वितीय क्षमताओं को प्रस्तुत करता है, वहीं Ultralytics विद्यमान सेगमेंटेशन मानदंडों के तुलनात्मक आकार, गति और संचालन क्षमता में समर्थन प्रदान करती है।
--- a/ultralytics/data/utils.py
+++ b/ultralytics/data/utils.py
@ -18,7 +18,7 @@ from PIL import Image, ImageOps
 from ultralytics.nn.autobackend import check_class_names
 from ultralytics.utils import (DATASETS_DIR, LOGGER, NUM_THREADS, ROOT, SETTINGS_YAML, TQDM, clean_url, colorstr,
-                               emojis, yaml_load)
+                               emojis, yaml_load, yaml_save)
 from ultralytics.utils.checks import check_file, check_font, is_ascii
 from ultralytics.utils.downloads import download, safe_download, unzip_file
 from ultralytics.utils.ops import segments2boxes
@ -250,28 +250,26 @@ def check_det_dataset(dataset, autodownload=True):
        (dict): Parsed dataset information and paths.
    """
-    data = check_file(dataset)
+    file = check_file(dataset)
    # Download (optional)
    extract_dir = ''
-    if isinstance(data, (str, Path)) and (zipfile.is_zipfile(data) or is_tarfile(data)):
+    if zipfile.is_zipfile(file) or is_tarfile(file):
-        new_dir = safe_download(data, dir=DATASETS_DIR, unzip=True, delete=False)
+        new_dir = safe_download(file, dir=DATASETS_DIR, unzip=True, delete=False)
-        data = find_dataset_yaml(DATASETS_DIR / new_dir)
+        file = find_dataset_yaml(DATASETS_DIR / new_dir)
-        extract_dir, autodownload = data.parent, False
+        extract_dir, autodownload = file.parent, False
-    # Read YAML (optional)
+    # Read YAML
-    if isinstance(data, (str, Path)):
+    data = yaml_load(file, append_filename=True)  # dictionary
        data = yaml_load(data, append_filename=True)  # dictionary
    # Checks
    for k in 'train', 'val':
        if k not in data:
-            if k == 'val' and 'validation' in data:
+            if k != 'val' or 'validation' not in data:
                LOGGER.info("WARNING ⚠️ renaming data YAML 'validation' key to 'val' to match YOLO format.")
                data['val'] = data.pop('validation')  # replace 'validation' key with 'val' key
            else:
                raise SyntaxError(
                    emojis(f"{dataset} '{k}:' key missing ❌.\n'train' and 'val' are required in all data YAMLs."))
            LOGGER.info("WARNING ⚠️ renaming data YAML 'validation' key to 'val' to match YOLO format.")
            data['val'] = data.pop('validation')  # replace 'validation' key with 'val' key
    if 'names' not in data and 'nc' not in data:
        raise SyntaxError(emojis(f"{dataset} key missing ❌.\n either 'names' or 'nc' are required in all data YAMLs."))
    if 'names' in data and 'nc' in data and len(data['names']) != data['nc']:
@ -285,9 +283,10 @@ def check_det_dataset(dataset, autodownload=True):
    # Resolve paths
    path = Path(extract_dir or data.get('path') or Path(data.get('yaml_file', '')).parent)  # dataset root
    if not path.is_absolute():
        path = (DATASETS_DIR / path).resolve()
    # Set paths
    data['path'] = path  # download scripts
    for k in 'train', 'val', 'test':
        if data.get(k):  # prepend path
@ -404,7 +403,7 @@ class HUBDatasetStats:
    A class for generating HUB dataset JSON and `-hub` dataset directory.
    Args:
-        path (str): Path to data.yaml or data.zip (with data.yaml inside data.zip). Default is 'coco128.yaml'.
+        path (str): Path to data.yaml or data.zip (with data.yaml inside data.zip). Default is 'coco8.yaml'.
        task (str): Dataset task. Options are 'detect', 'segment', 'pose', 'classify'. Default is 'detect'.
        autodownload (bool): Attempt to download dataset if not found locally. Default is False.
@ -424,7 +423,7 @@ class HUBDatasetStats:
        ```
    """
-    def __init__(self, path='coco128.yaml', task='detect', autodownload=False):
+    def __init__(self, path='coco8.yaml', task='detect', autodownload=False):
        """Initialize class."""
        path = Path(path).resolve()
        LOGGER.info(f'Starting HUB dataset checks for {path}....')
@ -437,10 +436,12 @@ class HUBDatasetStats:
        else:  # detect, segment, pose
            zipped, data_dir, yaml_path = self._unzip(Path(path))
            try:
-                # data = yaml_load(check_yaml(yaml_path))  # data dict
+                # Load YAML with checks
-                data = check_det_dataset(yaml_path, autodownload)  # data dict
+                data = yaml_load(yaml_path)
-                if zipped:
+                data['path'] = ''  # strip path since YAML should be in dataset root for all HUB datasets
-                    data['path'] = data_dir
+                yaml_save(yaml_path, data)
                data = check_det_dataset(yaml_path, autodownload)  # dict
                data['path'] = data_dir  # YAML path should be set to '' (relative) or parent (absolute)
            except Exception as e:
                raise Exception('error/HUB/dataset_stats/init') from e