ultralytics 8.0.166 expand HUBDatasetStats to Classify tasks (#4635)

This commit is contained in:
Glenn Jocher 2023-08-29 16:17:59 +02:00 committed by GitHub
parent 896da0c0a0
commit 52afadcb97
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 67 additions and 37 deletions

View File

@ -133,7 +133,7 @@ jobs:
run: coverage run -a --source=ultralytics -m ultralytics.cfg.__init__ benchmark model='path with spaces/${{ matrix.model }}-seg.pt' imgsz=160 verbose=0.30 run: coverage run -a --source=ultralytics -m ultralytics.cfg.__init__ benchmark model='path with spaces/${{ matrix.model }}-seg.pt' imgsz=160 verbose=0.30
- name: Benchmark ClassificationModel - name: Benchmark ClassificationModel
shell: bash shell: bash
run: coverage run -a --source=ultralytics -m ultralytics.cfg.__init__ benchmark model='path with spaces/${{ matrix.model }}-cls.pt' imgsz=160 verbose=0.36 run: coverage run -a --source=ultralytics -m ultralytics.cfg.__init__ benchmark model='path with spaces/${{ matrix.model }}-cls.pt' imgsz=160 verbose=0.16
- name: Benchmark PoseModel - name: Benchmark PoseModel
shell: bash shell: bash
run: coverage run -a --source=ultralytics -m ultralytics.cfg.__init__ benchmark model='path with spaces/${{ matrix.model }}-pose.pt' imgsz=160 verbose=0.17 run: coverage run -a --source=ultralytics -m ultralytics.cfg.__init__ benchmark model='path with spaces/${{ matrix.model }}-pose.pt' imgsz=160 verbose=0.17

View File

@ -277,7 +277,7 @@ def test_data_utils():
# from ultralytics.utils.files import WorkingDirectory # from ultralytics.utils.files import WorkingDirectory
# with WorkingDirectory(ROOT.parent / 'tests'): # with WorkingDirectory(ROOT.parent / 'tests'):
for task in 'detect', 'segment', 'pose': for task in 'detect', 'segment', 'pose', 'classify':
file = Path(TASK2DATA[task]).with_suffix('.zip') # i.e. coco8.zip file = Path(TASK2DATA[task]).with_suffix('.zip') # i.e. coco8.zip
download(f'https://github.com/ultralytics/hub/raw/main/example_datasets/{file}', unzip=False, dir=TMP) download(f'https://github.com/ultralytics/hub/raw/main/example_datasets/{file}', unzip=False, dir=TMP)
stats = HUBDatasetStats(TMP / file, task=task) stats = HUBDatasetStats(TMP / file, task=task)

View File

@ -1,6 +1,6 @@
# Ultralytics YOLO 🚀, AGPL-3.0 license # Ultralytics YOLO 🚀, AGPL-3.0 license
__version__ = '8.0.165' __version__ = '8.0.166'
from ultralytics.models import RTDETR, SAM, YOLO from ultralytics.models import RTDETR, SAM, YOLO
from ultralytics.models.fastsam import FastSAM from ultralytics.models.fastsam import FastSAM

View File

@ -15,7 +15,7 @@ from ultralytics.utils import (ASSETS, DEFAULT_CFG, DEFAULT_CFG_DICT, DEFAULT_CF
# Define valid tasks and modes # Define valid tasks and modes
MODES = 'train', 'val', 'predict', 'export', 'track', 'benchmark' MODES = 'train', 'val', 'predict', 'export', 'track', 'benchmark'
TASKS = 'detect', 'segment', 'classify', 'pose' TASKS = 'detect', 'segment', 'classify', 'pose'
TASK2DATA = {'detect': 'coco8.yaml', 'segment': 'coco8-seg.yaml', 'classify': 'imagenet100', 'pose': 'coco8-pose.yaml'} TASK2DATA = {'detect': 'coco8.yaml', 'segment': 'coco8-seg.yaml', 'classify': 'imagenet10', 'pose': 'coco8-pose.yaml'}
TASK2MODEL = { TASK2MODEL = {
'detect': 'yolov8n.pt', 'detect': 'yolov8n.pt',
'segment': 'yolov8n-seg.pt', 'segment': 'yolov8n-seg.pt',

View File

@ -296,7 +296,7 @@ def check_det_dataset(dataset, autodownload=True):
return data # dictionary return data # dictionary
def check_cls_dataset(dataset: str, split=''): def check_cls_dataset(dataset, split=''):
""" """
Checks a classification dataset such as Imagenet. Checks a classification dataset such as Imagenet.
@ -304,7 +304,7 @@ def check_cls_dataset(dataset: str, split=''):
If the dataset is not found locally, it attempts to download the dataset from the internet and save it locally. If the dataset is not found locally, it attempts to download the dataset from the internet and save it locally.
Args: Args:
dataset (str): The name of the dataset. dataset (str | Path): The name of the dataset.
split (str, optional): The split of the dataset. Either 'val', 'test', or ''. Defaults to ''. split (str, optional): The split of the dataset. Either 'val', 'test', or ''. Defaults to ''.
Returns: Returns:
@ -360,7 +360,7 @@ def check_cls_dataset(dataset: str, split=''):
else: else:
LOGGER.info(f'{prefix} found {nf} images in {nd} classes ✅ ') LOGGER.info(f'{prefix} found {nf} images in {nd} classes ✅ ')
return {'train': train_set, 'val': val_set or test_set, 'test': test_set or val_set, 'nc': nc, 'names': names} return {'train': train_set, 'val': val_set, 'test': test_set, 'nc': nc, 'names': names}
class HUBDatasetStats: class HUBDatasetStats:
@ -373,14 +373,17 @@ class HUBDatasetStats:
autodownload (bool): Attempt to download dataset if not found locally. Default is False. autodownload (bool): Attempt to download dataset if not found locally. Default is False.
Example: Example:
Download *.zip files from i.e. https://github.com/ultralytics/hub/raw/main/example_datasets/coco8.zip. Download *.zip files from https://github.com/ultralytics/hub/tree/main/example_datasets
i.e. https://github.com/ultralytics/hub/raw/main/example_datasets/coco8.zip for coco8.zip.
```python ```python
from ultralytics.data.utils import HUBDatasetStats from ultralytics.data.utils import HUBDatasetStats
stats = HUBDatasetStats('path/to/coco8.zip', task='detect') # detect dataset stats = HUBDatasetStats('path/to/coco8.zip', task='detect') # detect dataset
stats = HUBDatasetStats('path/to/coco8-seg.zip', task='segment') # segment dataset stats = HUBDatasetStats('path/to/coco8-seg.zip', task='segment') # segment dataset
stats = HUBDatasetStats('path/to/coco8-pose.zip', task='pose') # pose dataset stats = HUBDatasetStats('path/to/coco8-pose.zip', task='pose') # pose dataset
stats.get_json(save=False) stats = HUBDatasetStats('path/to/imagenet10.zip', task='classify') # classification dataset
stats.get_json(save=True)
stats.process_images() stats.process_images()
``` ```
""" """
@ -389,21 +392,27 @@ class HUBDatasetStats:
"""Initialize class.""" """Initialize class."""
path = Path(path).resolve() path = Path(path).resolve()
LOGGER.info(f'Starting HUB dataset checks for {path}....') LOGGER.info(f'Starting HUB dataset checks for {path}....')
zipped, data_dir, yaml_path = self._unzip(path)
self.task = task # detect, segment, pose, classify
if self.task == 'classify':
unzip_dir = unzip_file(path)
data = check_cls_dataset(unzip_dir)
data['path'] = unzip_dir
else: # detect, segment, pose
zipped, data_dir, yaml_path = self._unzip(Path(path))
try: try:
# data = yaml_load(check_yaml(yaml_path)) # data dict # data = yaml_load(check_yaml(yaml_path)) # data dict
data = check_det_dataset(yaml_path, autodownload) # data dict data = check_det_dataset(yaml_path, autodownload) # data dict
if zipped: if zipped:
data['path'] = data_dir data['path'] = data_dir
except Exception as e: except Exception as e:
raise Exception('error/HUB/dataset_stats/yaml_load') from e raise Exception('error/HUB/dataset_stats/init') from e
self.hub_dir = Path(str(data['path']) + '-hub') self.hub_dir = Path(f'{data["path"]}-hub')
self.im_dir = self.hub_dir / 'images' self.im_dir = self.hub_dir / 'images'
self.im_dir.mkdir(parents=True, exist_ok=True) # makes /images self.im_dir.mkdir(parents=True, exist_ok=True) # makes /images
self.stats = {'nc': len(data['names']), 'names': list(data['names'].values())} # statistics dictionary self.stats = {'nc': len(data['names']), 'names': list(data['names'].values())} # statistics dictionary
self.data = data self.data = data
self.task = task # detect, segment, pose, classify
@staticmethod @staticmethod
def _find_yaml(dir): def _find_yaml(dir):
@ -430,7 +439,6 @@ class HUBDatasetStats:
def get_json(self, save=False, verbose=False): def get_json(self, save=False, verbose=False):
"""Return dataset JSON for Ultralytics HUB.""" """Return dataset JSON for Ultralytics HUB."""
from ultralytics.data import YOLODataset # ClassificationDataset
def _round(labels): def _round(labels):
"""Update labels to integer class and 4 decimal place floats.""" """Update labels to integer class and 4 decimal place floats."""
@ -458,6 +466,28 @@ class HUBDatasetStats:
continue continue
# Get dataset statistics # Get dataset statistics
if self.task == 'classify':
from torchvision.datasets import ImageFolder
dataset = ImageFolder(self.data[split])
x = np.zeros(len(dataset.classes)).astype(int)
for im in dataset.imgs:
x[im[1]] += 1
self.stats[split] = {
'instance_stats': {
'total': len(dataset),
'per_class': x.tolist()},
'image_stats': {
'total': len(dataset),
'unlabelled': 0,
'per_class': x.tolist()},
'labels': [{
Path(k).name: v} for k, v in dataset.imgs]}
else:
from ultralytics.data import YOLODataset
dataset = YOLODataset(img_path=self.data[split], dataset = YOLODataset(img_path=self.data[split],
data=self.data, data=self.data,
use_segments=self.task == 'segment', use_segments=self.task == 'segment',