ultralytics 8.0.217 HUB YAML path improvements (#6556)

Signed-off-by: Glenn Jocher <glenn.jocher@ultralytics.com>
This commit is contained in:
Glenn Jocher 2023-11-24 15:41:10 +01:00 committed by GitHub
parent 8f1c3f3d1e
commit 40a349bceb
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 33 additions and 33 deletions

View File

@ -25,7 +25,6 @@ Without further ado, let's dive in!
- This guide assumes that annotation files are locally available. - This guide assumes that annotation files are locally available.
- For our demonstration, we use the [Fruit Detection](https://www.kaggle.com/datasets/lakshaytyagi01/fruit-detection/code) dataset. - For our demonstration, we use the [Fruit Detection](https://www.kaggle.com/datasets/lakshaytyagi01/fruit-detection/code) dataset.
- This dataset contains a total of 8479 images. - This dataset contains a total of 8479 images.
- It includes 6 class labels, each with its total instance counts listed below. - It includes 6 class labels, each with its total instance counts listed below.

View File

@ -140,7 +140,7 @@ Segment Anything Model का उपयोग उपस्थित डेटा
| SAM का सबसे छोटा, SAM-b | 358 MB | 94.7 M | 51096 ms/im | | SAM का सबसे छोटा, SAM-b | 358 MB | 94.7 M | 51096 ms/im |
| [मोबाइल SAM](mobile-sam.md) | 40.7 MB | 10.1 M | 46122 ms/im | | [मोबाइल SAM](mobile-sam.md) | 40.7 MB | 10.1 M | 46122 ms/im |
| [अग्री सेगमेंटेशन वाली FastSAM-s, YOLOv8 बैकबोन सहित](fast-sam.md) | 23.7 MB | 11.8 M | 115 ms/im | | [अग्री सेगमेंटेशन वाली FastSAM-s, YOLOv8 बैकबोन सहित](fast-sam.md) | 23.7 MB | 11.8 M | 115 ms/im |
| Ultralytics [योलोवी8न-seg](../टास्क/सेगमेंट.md) | **6.7 MB** (53.4 गुना छोटा) | **3.4 M** (27.9x कम) | **59 ms/im** (866x तेज) | | Ultralytics [योलोवी8न-seg](yolov8.md) | **6.7 MB** (53.4 गुना छोटा) | **3.4 M** (27.9x कम) | **59 ms/im** (866x तेज) |
यह तुलना मॉडल के आकार और गति में दस्तावेजीय अंतर दिखाती है। जहां SAM स्वचालित सेगमेंटेशन के लिए अद्वितीय क्षमताओं को प्रस्तुत करता है, वहीं Ultralytics विद्यमान सेगमेंटेशन मानदंडों के तुलनात्मक आकार, गति और संचालन क्षमता में समर्थन प्रदान करती है। यह तुलना मॉडल के आकार और गति में दस्तावेजीय अंतर दिखाती है। जहां SAM स्वचालित सेगमेंटेशन के लिए अद्वितीय क्षमताओं को प्रस्तुत करता है, वहीं Ultralytics विद्यमान सेगमेंटेशन मानदंडों के तुलनात्मक आकार, गति और संचालन क्षमता में समर्थन प्रदान करती है।

View File

@ -18,7 +18,7 @@ from PIL import Image, ImageOps
from ultralytics.nn.autobackend import check_class_names from ultralytics.nn.autobackend import check_class_names
from ultralytics.utils import (DATASETS_DIR, LOGGER, NUM_THREADS, ROOT, SETTINGS_YAML, TQDM, clean_url, colorstr, from ultralytics.utils import (DATASETS_DIR, LOGGER, NUM_THREADS, ROOT, SETTINGS_YAML, TQDM, clean_url, colorstr,
emojis, yaml_load) emojis, yaml_load, yaml_save)
from ultralytics.utils.checks import check_file, check_font, is_ascii from ultralytics.utils.checks import check_file, check_font, is_ascii
from ultralytics.utils.downloads import download, safe_download, unzip_file from ultralytics.utils.downloads import download, safe_download, unzip_file
from ultralytics.utils.ops import segments2boxes from ultralytics.utils.ops import segments2boxes
@ -250,28 +250,26 @@ def check_det_dataset(dataset, autodownload=True):
(dict): Parsed dataset information and paths. (dict): Parsed dataset information and paths.
""" """
data = check_file(dataset) file = check_file(dataset)
# Download (optional) # Download (optional)
extract_dir = '' extract_dir = ''
if isinstance(data, (str, Path)) and (zipfile.is_zipfile(data) or is_tarfile(data)): if zipfile.is_zipfile(file) or is_tarfile(file):
new_dir = safe_download(data, dir=DATASETS_DIR, unzip=True, delete=False) new_dir = safe_download(file, dir=DATASETS_DIR, unzip=True, delete=False)
data = find_dataset_yaml(DATASETS_DIR / new_dir) file = find_dataset_yaml(DATASETS_DIR / new_dir)
extract_dir, autodownload = data.parent, False extract_dir, autodownload = file.parent, False
# Read YAML (optional) # Read YAML
if isinstance(data, (str, Path)): data = yaml_load(file, append_filename=True) # dictionary
data = yaml_load(data, append_filename=True) # dictionary
# Checks # Checks
for k in 'train', 'val': for k in 'train', 'val':
if k not in data: if k not in data:
if k == 'val' and 'validation' in data: if k != 'val' or 'validation' not in data:
LOGGER.info("WARNING ⚠️ renaming data YAML 'validation' key to 'val' to match YOLO format.")
data['val'] = data.pop('validation') # replace 'validation' key with 'val' key
else:
raise SyntaxError( raise SyntaxError(
emojis(f"{dataset} '{k}:' key missing ❌.\n'train' and 'val' are required in all data YAMLs.")) emojis(f"{dataset} '{k}:' key missing ❌.\n'train' and 'val' are required in all data YAMLs."))
LOGGER.info("WARNING ⚠️ renaming data YAML 'validation' key to 'val' to match YOLO format.")
data['val'] = data.pop('validation') # replace 'validation' key with 'val' key
if 'names' not in data and 'nc' not in data: if 'names' not in data and 'nc' not in data:
raise SyntaxError(emojis(f"{dataset} key missing ❌.\n either 'names' or 'nc' are required in all data YAMLs.")) raise SyntaxError(emojis(f"{dataset} key missing ❌.\n either 'names' or 'nc' are required in all data YAMLs."))
if 'names' in data and 'nc' in data and len(data['names']) != data['nc']: if 'names' in data and 'nc' in data and len(data['names']) != data['nc']:
@ -285,9 +283,10 @@ def check_det_dataset(dataset, autodownload=True):
# Resolve paths # Resolve paths
path = Path(extract_dir or data.get('path') or Path(data.get('yaml_file', '')).parent) # dataset root path = Path(extract_dir or data.get('path') or Path(data.get('yaml_file', '')).parent) # dataset root
if not path.is_absolute(): if not path.is_absolute():
path = (DATASETS_DIR / path).resolve() path = (DATASETS_DIR / path).resolve()
# Set paths
data['path'] = path # download scripts data['path'] = path # download scripts
for k in 'train', 'val', 'test': for k in 'train', 'val', 'test':
if data.get(k): # prepend path if data.get(k): # prepend path
@ -404,7 +403,7 @@ class HUBDatasetStats:
A class for generating HUB dataset JSON and `-hub` dataset directory. A class for generating HUB dataset JSON and `-hub` dataset directory.
Args: Args:
path (str): Path to data.yaml or data.zip (with data.yaml inside data.zip). Default is 'coco128.yaml'. path (str): Path to data.yaml or data.zip (with data.yaml inside data.zip). Default is 'coco8.yaml'.
task (str): Dataset task. Options are 'detect', 'segment', 'pose', 'classify'. Default is 'detect'. task (str): Dataset task. Options are 'detect', 'segment', 'pose', 'classify'. Default is 'detect'.
autodownload (bool): Attempt to download dataset if not found locally. Default is False. autodownload (bool): Attempt to download dataset if not found locally. Default is False.
@ -424,7 +423,7 @@ class HUBDatasetStats:
``` ```
""" """
def __init__(self, path='coco128.yaml', task='detect', autodownload=False): def __init__(self, path='coco8.yaml', task='detect', autodownload=False):
"""Initialize class.""" """Initialize class."""
path = Path(path).resolve() path = Path(path).resolve()
LOGGER.info(f'Starting HUB dataset checks for {path}....') LOGGER.info(f'Starting HUB dataset checks for {path}....')
@ -437,10 +436,12 @@ class HUBDatasetStats:
else: # detect, segment, pose else: # detect, segment, pose
zipped, data_dir, yaml_path = self._unzip(Path(path)) zipped, data_dir, yaml_path = self._unzip(Path(path))
try: try:
# data = yaml_load(check_yaml(yaml_path)) # data dict # Load YAML with checks
data = check_det_dataset(yaml_path, autodownload) # data dict data = yaml_load(yaml_path)
if zipped: data['path'] = '' # strip path since YAML should be in dataset root for all HUB datasets
data['path'] = data_dir yaml_save(yaml_path, data)
data = check_det_dataset(yaml_path, autodownload) # dict
data['path'] = data_dir # YAML path should be set to '' (relative) or parent (absolute)
except Exception as e: except Exception as e:
raise Exception('error/HUB/dataset_stats/init') from e raise Exception('error/HUB/dataset_stats/init') from e