ultralytics 8.0.217 HUB YAML path improvements (#6556)

Signed-off-by: Glenn Jocher <glenn.jocher@ultralytics.com>
This commit is contained in:
Glenn Jocher 2023-11-24 15:41:10 +01:00 committed by GitHub
parent 8f1c3f3d1e
commit 40a349bceb
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 33 additions and 33 deletions

View File

@ -25,7 +25,6 @@ Without further ado, let's dive in!
- This guide assumes that annotation files are locally available.
- For our demonstration, we use the [Fruit Detection](https://www.kaggle.com/datasets/lakshaytyagi01/fruit-detection/code) dataset.
- This dataset contains a total of 8479 images.
- It includes 6 class labels, each with its total instance counts listed below.

View File

@ -140,7 +140,7 @@ Segment Anything Model का उपयोग उपस्थित डेटा
| SAM का सबसे छोटा, SAM-b | 358 MB | 94.7 M | 51096 ms/im |
| [मोबाइल SAM](mobile-sam.md) | 40.7 MB | 10.1 M | 46122 ms/im |
| [अग्री सेगमेंटेशन वाली FastSAM-s, YOLOv8 बैकबोन सहित](fast-sam.md) | 23.7 MB | 11.8 M | 115 ms/im |
| Ultralytics [योलोवी8न-seg](../टास्क/सेगमेंट.md) | **6.7 MB** (53.4 गुना छोटा) | **3.4 M** (27.9x कम) | **59 ms/im** (866x तेज) |
| Ultralytics [योलोवी8न-seg](yolov8.md) | **6.7 MB** (53.4 गुना छोटा) | **3.4 M** (27.9x कम) | **59 ms/im** (866x तेज) |
यह तुलना मॉडल के आकार और गति में दस्तावेजीय अंतर दिखाती है। जहां SAM स्वचालित सेगमेंटेशन के लिए अद्वितीय क्षमताओं को प्रस्तुत करता है, वहीं Ultralytics विद्यमान सेगमेंटेशन मानदंडों के तुलनात्मक आकार, गति और संचालन क्षमता में समर्थन प्रदान करती है।

View File

@ -18,7 +18,7 @@ from PIL import Image, ImageOps
from ultralytics.nn.autobackend import check_class_names
from ultralytics.utils import (DATASETS_DIR, LOGGER, NUM_THREADS, ROOT, SETTINGS_YAML, TQDM, clean_url, colorstr,
emojis, yaml_load)
emojis, yaml_load, yaml_save)
from ultralytics.utils.checks import check_file, check_font, is_ascii
from ultralytics.utils.downloads import download, safe_download, unzip_file
from ultralytics.utils.ops import segments2boxes
@ -250,28 +250,26 @@ def check_det_dataset(dataset, autodownload=True):
(dict): Parsed dataset information and paths.
"""
data = check_file(dataset)
file = check_file(dataset)
# Download (optional)
extract_dir = ''
if isinstance(data, (str, Path)) and (zipfile.is_zipfile(data) or is_tarfile(data)):
new_dir = safe_download(data, dir=DATASETS_DIR, unzip=True, delete=False)
data = find_dataset_yaml(DATASETS_DIR / new_dir)
extract_dir, autodownload = data.parent, False
if zipfile.is_zipfile(file) or is_tarfile(file):
new_dir = safe_download(file, dir=DATASETS_DIR, unzip=True, delete=False)
file = find_dataset_yaml(DATASETS_DIR / new_dir)
extract_dir, autodownload = file.parent, False
# Read YAML (optional)
if isinstance(data, (str, Path)):
data = yaml_load(data, append_filename=True) # dictionary
# Read YAML
data = yaml_load(file, append_filename=True) # dictionary
# Checks
for k in 'train', 'val':
if k not in data:
if k == 'val' and 'validation' in data:
LOGGER.info("WARNING ⚠️ renaming data YAML 'validation' key to 'val' to match YOLO format.")
data['val'] = data.pop('validation') # replace 'validation' key with 'val' key
else:
if k != 'val' or 'validation' not in data:
raise SyntaxError(
emojis(f"{dataset} '{k}:' key missing ❌.\n'train' and 'val' are required in all data YAMLs."))
LOGGER.info("WARNING ⚠️ renaming data YAML 'validation' key to 'val' to match YOLO format.")
data['val'] = data.pop('validation') # replace 'validation' key with 'val' key
if 'names' not in data and 'nc' not in data:
raise SyntaxError(emojis(f"{dataset} key missing ❌.\n either 'names' or 'nc' are required in all data YAMLs."))
if 'names' in data and 'nc' in data and len(data['names']) != data['nc']:
@ -285,9 +283,10 @@ def check_det_dataset(dataset, autodownload=True):
# Resolve paths
path = Path(extract_dir or data.get('path') or Path(data.get('yaml_file', '')).parent) # dataset root
if not path.is_absolute():
path = (DATASETS_DIR / path).resolve()
# Set paths
data['path'] = path # download scripts
for k in 'train', 'val', 'test':
if data.get(k): # prepend path
@ -404,7 +403,7 @@ class HUBDatasetStats:
A class for generating HUB dataset JSON and `-hub` dataset directory.
Args:
path (str): Path to data.yaml or data.zip (with data.yaml inside data.zip). Default is 'coco128.yaml'.
path (str): Path to data.yaml or data.zip (with data.yaml inside data.zip). Default is 'coco8.yaml'.
task (str): Dataset task. Options are 'detect', 'segment', 'pose', 'classify'. Default is 'detect'.
autodownload (bool): Attempt to download dataset if not found locally. Default is False.
@ -424,7 +423,7 @@ class HUBDatasetStats:
```
"""
def __init__(self, path='coco128.yaml', task='detect', autodownload=False):
def __init__(self, path='coco8.yaml', task='detect', autodownload=False):
"""Initialize class."""
path = Path(path).resolve()
LOGGER.info(f'Starting HUB dataset checks for {path}....')
@ -437,10 +436,12 @@ class HUBDatasetStats:
else: # detect, segment, pose
zipped, data_dir, yaml_path = self._unzip(Path(path))
try:
# data = yaml_load(check_yaml(yaml_path)) # data dict
data = check_det_dataset(yaml_path, autodownload) # data dict
if zipped:
data['path'] = data_dir
# Load YAML with checks
data = yaml_load(yaml_path)
data['path'] = '' # strip path since YAML should be in dataset root for all HUB datasets
yaml_save(yaml_path, data)
data = check_det_dataset(yaml_path, autodownload) # dict
data['path'] = data_dir # YAML path should be set to '' (relative) or parent (absolute)
except Exception as e:
raise Exception('error/HUB/dataset_stats/init') from e