mirror of
https://github.com/THU-MIG/yolov10.git
synced 2025-05-23 05:24:22 +08:00
Compare commits
3 Commits
ac85783c27
...
1bae90c82a
Author | SHA1 | Date | |
---|---|---|---|
![]() |
1bae90c82a | ||
![]() |
1385285579 | ||
![]() |
84128a569e |
@ -1,27 +0,0 @@
|
||||
# Ultralytics YOLO 🚀, AGPL-3.0 license
|
||||
|
||||
__version__ = "8.1.34"
|
||||
|
||||
from ultralytics.data.explorer.explorer import Explorer
|
||||
from ultralytics.models import RTDETR, SAM, YOLO, YOLOWorld, YOLOv10
|
||||
from ultralytics.models.fastsam import FastSAM
|
||||
from ultralytics.models.nas import NAS
|
||||
from ultralytics.utils import ASSETS, SETTINGS as settings
|
||||
from ultralytics.utils.checks import check_yolo as checks
|
||||
from ultralytics.utils.downloads import download
|
||||
|
||||
__all__ = (
|
||||
"__version__",
|
||||
"ASSETS",
|
||||
"YOLO",
|
||||
"YOLOWorld",
|
||||
"NAS",
|
||||
"SAM",
|
||||
"FastSAM",
|
||||
"RTDETR",
|
||||
"checks",
|
||||
"download",
|
||||
"settings",
|
||||
"Explorer",
|
||||
"YOLOv10"
|
||||
)
|
Binary file not shown.
Before Width: | Height: | Size: 134 KiB |
Binary file not shown.
Before Width: | Height: | Size: 49 KiB |
@ -1,613 +0,0 @@
|
||||
# Ultralytics YOLO 🚀, AGPL-3.0 license
|
||||
|
||||
import contextlib
|
||||
import os
|
||||
import shutil
|
||||
import subprocess
|
||||
import sys
|
||||
from pathlib import Path
|
||||
from types import SimpleNamespace
|
||||
from typing import Dict, List, Union
|
||||
import re
|
||||
|
||||
from ultralytics.utils import (
|
||||
ASSETS,
|
||||
DEFAULT_CFG,
|
||||
DEFAULT_CFG_DICT,
|
||||
DEFAULT_CFG_PATH,
|
||||
LOGGER,
|
||||
RANK,
|
||||
ROOT,
|
||||
RUNS_DIR,
|
||||
SETTINGS,
|
||||
SETTINGS_YAML,
|
||||
TESTS_RUNNING,
|
||||
IterableSimpleNamespace,
|
||||
__version__,
|
||||
checks,
|
||||
colorstr,
|
||||
deprecation_warn,
|
||||
yaml_load,
|
||||
yaml_print,
|
||||
)
|
||||
|
||||
# Define valid tasks and modes
|
||||
MODES = {"train", "val", "predict", "export", "track", "benchmark"}
|
||||
TASKS = {"detect", "segment", "classify", "pose", "obb"}
|
||||
TASK2DATA = {
|
||||
"detect": "coco8.yaml",
|
||||
"segment": "coco8-seg.yaml",
|
||||
"classify": "imagenet10",
|
||||
"pose": "coco8-pose.yaml",
|
||||
"obb": "dota8.yaml",
|
||||
}
|
||||
TASK2MODEL = {
|
||||
"detect": "yolov8n.pt",
|
||||
"segment": "yolov8n-seg.pt",
|
||||
"classify": "yolov8n-cls.pt",
|
||||
"pose": "yolov8n-pose.pt",
|
||||
"obb": "yolov8n-obb.pt",
|
||||
}
|
||||
TASK2METRIC = {
|
||||
"detect": "metrics/mAP50-95(B)",
|
||||
"segment": "metrics/mAP50-95(M)",
|
||||
"classify": "metrics/accuracy_top1",
|
||||
"pose": "metrics/mAP50-95(P)",
|
||||
"obb": "metrics/mAP50-95(B)",
|
||||
}
|
||||
|
||||
CLI_HELP_MSG = f"""
|
||||
Arguments received: {str(['yolo'] + sys.argv[1:])}. Ultralytics 'yolo' commands use the following syntax:
|
||||
|
||||
yolo TASK MODE ARGS
|
||||
|
||||
Where TASK (optional) is one of {TASKS}
|
||||
MODE (required) is one of {MODES}
|
||||
ARGS (optional) are any number of custom 'arg=value' pairs like 'imgsz=320' that override defaults.
|
||||
See all ARGS at https://docs.ultralytics.com/usage/cfg or with 'yolo cfg'
|
||||
|
||||
1. Train a detection model for 10 epochs with an initial learning_rate of 0.01
|
||||
yolo train data=coco128.yaml model=yolov8n.pt epochs=10 lr0=0.01
|
||||
|
||||
2. Predict a YouTube video using a pretrained segmentation model at image size 320:
|
||||
yolo predict model=yolov8n-seg.pt source='https://youtu.be/LNwODJXcvt4' imgsz=320
|
||||
|
||||
3. Val a pretrained detection model at batch-size 1 and image size 640:
|
||||
yolo val model=yolov8n.pt data=coco128.yaml batch=1 imgsz=640
|
||||
|
||||
4. Export a YOLOv8n classification model to ONNX format at image size 224 by 128 (no TASK required)
|
||||
yolo export model=yolov8n-cls.pt format=onnx imgsz=224,128
|
||||
|
||||
6. Explore your datasets using semantic search and SQL with a simple GUI powered by Ultralytics Explorer API
|
||||
yolo explorer
|
||||
|
||||
5. Run special commands:
|
||||
yolo help
|
||||
yolo checks
|
||||
yolo version
|
||||
yolo settings
|
||||
yolo copy-cfg
|
||||
yolo cfg
|
||||
|
||||
Docs: https://docs.ultralytics.com
|
||||
Community: https://community.ultralytics.com
|
||||
GitHub: https://github.com/ultralytics/ultralytics
|
||||
"""
|
||||
|
||||
# Define keys for arg type checks
|
||||
CFG_FLOAT_KEYS = {"warmup_epochs", "box", "cls", "dfl", "degrees", "shear", "time"}
|
||||
CFG_FRACTION_KEYS = {
|
||||
"dropout",
|
||||
"iou",
|
||||
"lr0",
|
||||
"lrf",
|
||||
"momentum",
|
||||
"weight_decay",
|
||||
"warmup_momentum",
|
||||
"warmup_bias_lr",
|
||||
"label_smoothing",
|
||||
"hsv_h",
|
||||
"hsv_s",
|
||||
"hsv_v",
|
||||
"translate",
|
||||
"scale",
|
||||
"perspective",
|
||||
"flipud",
|
||||
"fliplr",
|
||||
"bgr",
|
||||
"mosaic",
|
||||
"mixup",
|
||||
"copy_paste",
|
||||
"conf",
|
||||
"iou",
|
||||
"fraction",
|
||||
} # fraction floats 0.0 - 1.0
|
||||
CFG_INT_KEYS = {
|
||||
"epochs",
|
||||
"patience",
|
||||
"batch",
|
||||
"workers",
|
||||
"seed",
|
||||
"close_mosaic",
|
||||
"mask_ratio",
|
||||
"max_det",
|
||||
"vid_stride",
|
||||
"line_width",
|
||||
"workspace",
|
||||
"nbs",
|
||||
"save_period",
|
||||
}
|
||||
CFG_BOOL_KEYS = {
|
||||
"save",
|
||||
"exist_ok",
|
||||
"verbose",
|
||||
"deterministic",
|
||||
"single_cls",
|
||||
"rect",
|
||||
"cos_lr",
|
||||
"overlap_mask",
|
||||
"val",
|
||||
"save_json",
|
||||
"save_hybrid",
|
||||
"half",
|
||||
"dnn",
|
||||
"plots",
|
||||
"show",
|
||||
"save_txt",
|
||||
"save_conf",
|
||||
"save_crop",
|
||||
"save_frames",
|
||||
"show_labels",
|
||||
"show_conf",
|
||||
"visualize",
|
||||
"augment",
|
||||
"agnostic_nms",
|
||||
"retina_masks",
|
||||
"show_boxes",
|
||||
"keras",
|
||||
"optimize",
|
||||
"int8",
|
||||
"dynamic",
|
||||
"simplify",
|
||||
"nms",
|
||||
"profile",
|
||||
"multi_scale",
|
||||
}
|
||||
|
||||
|
||||
def cfg2dict(cfg):
|
||||
"""
|
||||
Convert a configuration object to a dictionary, whether it is a file path, a string, or a SimpleNamespace object.
|
||||
|
||||
Args:
|
||||
cfg (str | Path | dict | SimpleNamespace): Configuration object to be converted to a dictionary.
|
||||
|
||||
Returns:
|
||||
cfg (dict): Configuration object in dictionary format.
|
||||
"""
|
||||
if isinstance(cfg, (str, Path)):
|
||||
cfg = yaml_load(cfg) # load dict
|
||||
elif isinstance(cfg, SimpleNamespace):
|
||||
cfg = vars(cfg) # convert to dict
|
||||
return cfg
|
||||
|
||||
|
||||
def get_cfg(cfg: Union[str, Path, Dict, SimpleNamespace] = DEFAULT_CFG_DICT, overrides: Dict = None):
|
||||
"""
|
||||
Load and merge configuration data from a file or dictionary.
|
||||
|
||||
Args:
|
||||
cfg (str | Path | Dict | SimpleNamespace): Configuration data.
|
||||
overrides (str | Dict | optional): Overrides in the form of a file name or a dictionary. Default is None.
|
||||
|
||||
Returns:
|
||||
(SimpleNamespace): Training arguments namespace.
|
||||
"""
|
||||
cfg = cfg2dict(cfg)
|
||||
|
||||
# Merge overrides
|
||||
if overrides:
|
||||
overrides = cfg2dict(overrides)
|
||||
if "save_dir" not in cfg:
|
||||
overrides.pop("save_dir", None) # special override keys to ignore
|
||||
check_dict_alignment(cfg, overrides)
|
||||
cfg = {**cfg, **overrides} # merge cfg and overrides dicts (prefer overrides)
|
||||
|
||||
# Special handling for numeric project/name
|
||||
for k in "project", "name":
|
||||
if k in cfg and isinstance(cfg[k], (int, float)):
|
||||
cfg[k] = str(cfg[k])
|
||||
if cfg.get("name") == "model": # assign model to 'name' arg
|
||||
cfg["name"] = cfg.get("model", "").split(".")[0]
|
||||
LOGGER.warning(f"WARNING ⚠️ 'name=model' automatically updated to 'name={cfg['name']}'.")
|
||||
|
||||
# Type and Value checks
|
||||
check_cfg(cfg)
|
||||
|
||||
# Return instance
|
||||
return IterableSimpleNamespace(**cfg)
|
||||
|
||||
|
||||
def check_cfg(cfg, hard=True):
|
||||
"""Check Ultralytics configuration argument types and values."""
|
||||
for k, v in cfg.items():
|
||||
if v is not None: # None values may be from optional args
|
||||
if k in CFG_FLOAT_KEYS and not isinstance(v, (int, float)):
|
||||
if hard:
|
||||
raise TypeError(
|
||||
f"'{k}={v}' is of invalid type {type(v).__name__}. "
|
||||
f"Valid '{k}' types are int (i.e. '{k}=0') or float (i.e. '{k}=0.5')"
|
||||
)
|
||||
cfg[k] = float(v)
|
||||
elif k in CFG_FRACTION_KEYS:
|
||||
if not isinstance(v, (int, float)):
|
||||
if hard:
|
||||
raise TypeError(
|
||||
f"'{k}={v}' is of invalid type {type(v).__name__}. "
|
||||
f"Valid '{k}' types are int (i.e. '{k}=0') or float (i.e. '{k}=0.5')"
|
||||
)
|
||||
cfg[k] = v = float(v)
|
||||
if not (0.0 <= v <= 1.0):
|
||||
raise ValueError(f"'{k}={v}' is an invalid value. " f"Valid '{k}' values are between 0.0 and 1.0.")
|
||||
elif k in CFG_INT_KEYS and not isinstance(v, int):
|
||||
if hard:
|
||||
raise TypeError(
|
||||
f"'{k}={v}' is of invalid type {type(v).__name__}. " f"'{k}' must be an int (i.e. '{k}=8')"
|
||||
)
|
||||
cfg[k] = int(v)
|
||||
elif k in CFG_BOOL_KEYS and not isinstance(v, bool):
|
||||
if hard:
|
||||
raise TypeError(
|
||||
f"'{k}={v}' is of invalid type {type(v).__name__}. "
|
||||
f"'{k}' must be a bool (i.e. '{k}=True' or '{k}=False')"
|
||||
)
|
||||
cfg[k] = bool(v)
|
||||
|
||||
|
||||
def get_save_dir(args, name=None):
|
||||
"""Return save_dir as created from train/val/predict arguments."""
|
||||
|
||||
if getattr(args, "save_dir", None):
|
||||
save_dir = args.save_dir
|
||||
else:
|
||||
from ultralytics.utils.files import increment_path
|
||||
|
||||
project = args.project or (ROOT.parent / "tests/tmp/runs" if TESTS_RUNNING else RUNS_DIR) / args.task
|
||||
name = name or args.name or f"{args.mode}"
|
||||
save_dir = increment_path(Path(project) / name, exist_ok=args.exist_ok if RANK in (-1, 0) else True)
|
||||
|
||||
return Path(save_dir)
|
||||
|
||||
|
||||
def _handle_deprecation(custom):
|
||||
"""Hardcoded function to handle deprecated config keys."""
|
||||
|
||||
for key in custom.copy().keys():
|
||||
if key == "boxes":
|
||||
deprecation_warn(key, "show_boxes")
|
||||
custom["show_boxes"] = custom.pop("boxes")
|
||||
if key == "hide_labels":
|
||||
deprecation_warn(key, "show_labels")
|
||||
custom["show_labels"] = custom.pop("hide_labels") == "False"
|
||||
if key == "hide_conf":
|
||||
deprecation_warn(key, "show_conf")
|
||||
custom["show_conf"] = custom.pop("hide_conf") == "False"
|
||||
if key == "line_thickness":
|
||||
deprecation_warn(key, "line_width")
|
||||
custom["line_width"] = custom.pop("line_thickness")
|
||||
|
||||
return custom
|
||||
|
||||
|
||||
def check_dict_alignment(base: Dict, custom: Dict, e=None):
|
||||
"""
|
||||
This function checks for any mismatched keys between a custom configuration list and a base configuration list. If
|
||||
any mismatched keys are found, the function prints out similar keys from the base list and exits the program.
|
||||
|
||||
Args:
|
||||
custom (dict): a dictionary of custom configuration options
|
||||
base (dict): a dictionary of base configuration options
|
||||
e (Error, optional): An optional error that is passed by the calling function.
|
||||
"""
|
||||
custom = _handle_deprecation(custom)
|
||||
base_keys, custom_keys = (set(x.keys()) for x in (base, custom))
|
||||
mismatched = [k for k in custom_keys if k not in base_keys]
|
||||
if mismatched:
|
||||
from difflib import get_close_matches
|
||||
|
||||
string = ""
|
||||
for x in mismatched:
|
||||
matches = get_close_matches(x, base_keys) # key list
|
||||
matches = [f"{k}={base[k]}" if base.get(k) is not None else k for k in matches]
|
||||
match_str = f"Similar arguments are i.e. {matches}." if matches else ""
|
||||
string += f"'{colorstr('red', 'bold', x)}' is not a valid YOLO argument. {match_str}\n"
|
||||
raise SyntaxError(string + CLI_HELP_MSG) from e
|
||||
|
||||
|
||||
def merge_equals_args(args: List[str]) -> List[str]:
|
||||
"""
|
||||
Merges arguments around isolated '=' args in a list of strings. The function considers cases where the first
|
||||
argument ends with '=' or the second starts with '=', as well as when the middle one is an equals sign.
|
||||
|
||||
Args:
|
||||
args (List[str]): A list of strings where each element is an argument.
|
||||
|
||||
Returns:
|
||||
(List[str]): A list of strings where the arguments around isolated '=' are merged.
|
||||
"""
|
||||
new_args = []
|
||||
for i, arg in enumerate(args):
|
||||
if arg == "=" and 0 < i < len(args) - 1: # merge ['arg', '=', 'val']
|
||||
new_args[-1] += f"={args[i + 1]}"
|
||||
del args[i + 1]
|
||||
elif arg.endswith("=") and i < len(args) - 1 and "=" not in args[i + 1]: # merge ['arg=', 'val']
|
||||
new_args.append(f"{arg}{args[i + 1]}")
|
||||
del args[i + 1]
|
||||
elif arg.startswith("=") and i > 0: # merge ['arg', '=val']
|
||||
new_args[-1] += arg
|
||||
else:
|
||||
new_args.append(arg)
|
||||
return new_args
|
||||
|
||||
|
||||
def handle_yolo_hub(args: List[str]) -> None:
|
||||
"""
|
||||
Handle Ultralytics HUB command-line interface (CLI) commands.
|
||||
|
||||
This function processes Ultralytics HUB CLI commands such as login and logout.
|
||||
It should be called when executing a script with arguments related to HUB authentication.
|
||||
|
||||
Args:
|
||||
args (List[str]): A list of command line arguments
|
||||
|
||||
Example:
|
||||
```bash
|
||||
python my_script.py hub login your_api_key
|
||||
```
|
||||
"""
|
||||
from ultralytics import hub
|
||||
|
||||
if args[0] == "login":
|
||||
key = args[1] if len(args) > 1 else ""
|
||||
# Log in to Ultralytics HUB using the provided API key
|
||||
hub.login(key)
|
||||
elif args[0] == "logout":
|
||||
# Log out from Ultralytics HUB
|
||||
hub.logout()
|
||||
|
||||
|
||||
def handle_yolo_settings(args: List[str]) -> None:
|
||||
"""
|
||||
Handle YOLO settings command-line interface (CLI) commands.
|
||||
|
||||
This function processes YOLO settings CLI commands such as reset.
|
||||
It should be called when executing a script with arguments related to YOLO settings management.
|
||||
|
||||
Args:
|
||||
args (List[str]): A list of command line arguments for YOLO settings management.
|
||||
|
||||
Example:
|
||||
```bash
|
||||
python my_script.py yolo settings reset
|
||||
```
|
||||
"""
|
||||
url = "https://docs.ultralytics.com/quickstart/#ultralytics-settings" # help URL
|
||||
try:
|
||||
if any(args):
|
||||
if args[0] == "reset":
|
||||
SETTINGS_YAML.unlink() # delete the settings file
|
||||
SETTINGS.reset() # create new settings
|
||||
LOGGER.info("Settings reset successfully") # inform the user that settings have been reset
|
||||
else: # save a new setting
|
||||
new = dict(parse_key_value_pair(a) for a in args)
|
||||
check_dict_alignment(SETTINGS, new)
|
||||
SETTINGS.update(new)
|
||||
|
||||
LOGGER.info(f"💡 Learn about settings at {url}")
|
||||
yaml_print(SETTINGS_YAML) # print the current settings
|
||||
except Exception as e:
|
||||
LOGGER.warning(f"WARNING ⚠️ settings error: '{e}'. Please see {url} for help.")
|
||||
|
||||
|
||||
def handle_explorer():
|
||||
"""Open the Ultralytics Explorer GUI."""
|
||||
checks.check_requirements("streamlit")
|
||||
LOGGER.info("💡 Loading Explorer dashboard...")
|
||||
subprocess.run(["streamlit", "run", ROOT / "data/explorer/gui/dash.py", "--server.maxMessageSize", "2048"])
|
||||
|
||||
|
||||
def parse_key_value_pair(pair):
|
||||
"""Parse one 'key=value' pair and return key and value."""
|
||||
k, v = pair.split("=", 1) # split on first '=' sign
|
||||
k, v = k.strip(), v.strip() # remove spaces
|
||||
assert v, f"missing '{k}' value"
|
||||
return k, smart_value(v)
|
||||
|
||||
|
||||
def smart_value(v):
|
||||
"""Convert a string to an underlying type such as int, float, bool, etc."""
|
||||
v_lower = v.lower()
|
||||
if v_lower == "none":
|
||||
return None
|
||||
elif v_lower == "true":
|
||||
return True
|
||||
elif v_lower == "false":
|
||||
return False
|
||||
else:
|
||||
with contextlib.suppress(Exception):
|
||||
return eval(v)
|
||||
return v
|
||||
|
||||
|
||||
def entrypoint(debug=""):
|
||||
"""
|
||||
This function is the ultralytics package entrypoint, it's responsible for parsing the command line arguments passed
|
||||
to the package.
|
||||
|
||||
This function allows for:
|
||||
- passing mandatory YOLO args as a list of strings
|
||||
- specifying the task to be performed, either 'detect', 'segment' or 'classify'
|
||||
- specifying the mode, either 'train', 'val', 'test', or 'predict'
|
||||
- running special modes like 'checks'
|
||||
- passing overrides to the package's configuration
|
||||
|
||||
It uses the package's default cfg and initializes it using the passed overrides.
|
||||
Then it calls the CLI function with the composed cfg
|
||||
"""
|
||||
args = (debug.split(" ") if debug else sys.argv)[1:]
|
||||
if not args: # no arguments passed
|
||||
LOGGER.info(CLI_HELP_MSG)
|
||||
return
|
||||
|
||||
special = {
|
||||
"help": lambda: LOGGER.info(CLI_HELP_MSG),
|
||||
"checks": checks.collect_system_info,
|
||||
"version": lambda: LOGGER.info(__version__),
|
||||
"settings": lambda: handle_yolo_settings(args[1:]),
|
||||
"cfg": lambda: yaml_print(DEFAULT_CFG_PATH),
|
||||
"hub": lambda: handle_yolo_hub(args[1:]),
|
||||
"login": lambda: handle_yolo_hub(args),
|
||||
"copy-cfg": copy_default_cfg,
|
||||
"explorer": lambda: handle_explorer(),
|
||||
}
|
||||
full_args_dict = {**DEFAULT_CFG_DICT, **{k: None for k in TASKS}, **{k: None for k in MODES}, **special}
|
||||
|
||||
# Define common misuses of special commands, i.e. -h, -help, --help
|
||||
special.update({k[0]: v for k, v in special.items()}) # singular
|
||||
special.update({k[:-1]: v for k, v in special.items() if len(k) > 1 and k.endswith("s")}) # singular
|
||||
special = {**special, **{f"-{k}": v for k, v in special.items()}, **{f"--{k}": v for k, v in special.items()}}
|
||||
|
||||
overrides = {} # basic overrides, i.e. imgsz=320
|
||||
for a in merge_equals_args(args): # merge spaces around '=' sign
|
||||
if a.startswith("--"):
|
||||
LOGGER.warning(f"WARNING ⚠️ argument '{a}' does not require leading dashes '--', updating to '{a[2:]}'.")
|
||||
a = a[2:]
|
||||
if a.endswith(","):
|
||||
LOGGER.warning(f"WARNING ⚠️ argument '{a}' does not require trailing comma ',', updating to '{a[:-1]}'.")
|
||||
a = a[:-1]
|
||||
if "=" in a:
|
||||
try:
|
||||
k, v = parse_key_value_pair(a)
|
||||
if k == "cfg" and v is not None: # custom.yaml passed
|
||||
LOGGER.info(f"Overriding {DEFAULT_CFG_PATH} with {v}")
|
||||
overrides = {k: val for k, val in yaml_load(checks.check_yaml(v)).items() if k != "cfg"}
|
||||
else:
|
||||
overrides[k] = v
|
||||
except (NameError, SyntaxError, ValueError, AssertionError) as e:
|
||||
check_dict_alignment(full_args_dict, {a: ""}, e)
|
||||
|
||||
elif a in TASKS:
|
||||
overrides["task"] = a
|
||||
elif a in MODES:
|
||||
overrides["mode"] = a
|
||||
elif a.lower() in special:
|
||||
special[a.lower()]()
|
||||
return
|
||||
elif a in DEFAULT_CFG_DICT and isinstance(DEFAULT_CFG_DICT[a], bool):
|
||||
overrides[a] = True # auto-True for default bool args, i.e. 'yolo show' sets show=True
|
||||
elif a in DEFAULT_CFG_DICT:
|
||||
raise SyntaxError(
|
||||
f"'{colorstr('red', 'bold', a)}' is a valid YOLO argument but is missing an '=' sign "
|
||||
f"to set its value, i.e. try '{a}={DEFAULT_CFG_DICT[a]}'\n{CLI_HELP_MSG}"
|
||||
)
|
||||
else:
|
||||
check_dict_alignment(full_args_dict, {a: ""})
|
||||
|
||||
# Check keys
|
||||
check_dict_alignment(full_args_dict, overrides)
|
||||
|
||||
# Mode
|
||||
mode = overrides.get("mode")
|
||||
if mode is None:
|
||||
mode = DEFAULT_CFG.mode or "predict"
|
||||
LOGGER.warning(f"WARNING ⚠️ 'mode' argument is missing. Valid modes are {MODES}. Using default 'mode={mode}'.")
|
||||
elif mode not in MODES:
|
||||
raise ValueError(f"Invalid 'mode={mode}'. Valid modes are {MODES}.\n{CLI_HELP_MSG}")
|
||||
|
||||
# Task
|
||||
task = overrides.pop("task", None)
|
||||
if task:
|
||||
if task not in TASKS:
|
||||
raise ValueError(f"Invalid 'task={task}'. Valid tasks are {TASKS}.\n{CLI_HELP_MSG}")
|
||||
if "model" not in overrides:
|
||||
overrides["model"] = TASK2MODEL[task]
|
||||
|
||||
# Model
|
||||
model = overrides.pop("model", DEFAULT_CFG.model)
|
||||
if model is None:
|
||||
model = "yolov8n.pt"
|
||||
LOGGER.warning(f"WARNING ⚠️ 'model' argument is missing. Using default 'model={model}'.")
|
||||
overrides["model"] = model
|
||||
# stem = Path(model).stem.lower()
|
||||
stem = model.lower()
|
||||
if "rtdetr" in stem: # guess architecture
|
||||
from ultralytics import RTDETR
|
||||
|
||||
model = RTDETR(model) # no task argument
|
||||
elif "fastsam" in stem:
|
||||
from ultralytics import FastSAM
|
||||
|
||||
model = FastSAM(model)
|
||||
elif "sam" in stem:
|
||||
from ultralytics import SAM
|
||||
|
||||
model = SAM(model)
|
||||
elif re.search("v3|v5|v6|v8|v9", stem):
|
||||
from ultralytics import YOLO
|
||||
|
||||
model = YOLO(model, task=task)
|
||||
else:
|
||||
from ultralytics import YOLOv10
|
||||
|
||||
# Special case for the HuggingFace Hub
|
||||
split_path = model.split('/')
|
||||
if len(split_path) == 2 and (not os.path.exists(model)):
|
||||
model = YOLOv10.from_pretrained(model)
|
||||
else:
|
||||
model = YOLOv10(model)
|
||||
if isinstance(overrides.get("pretrained"), str):
|
||||
model.load(overrides["pretrained"])
|
||||
|
||||
# Task Update
|
||||
if task != model.task:
|
||||
if task:
|
||||
LOGGER.warning(
|
||||
f"WARNING ⚠️ conflicting 'task={task}' passed with 'task={model.task}' model. "
|
||||
f"Ignoring 'task={task}' and updating to 'task={model.task}' to match model."
|
||||
)
|
||||
task = model.task
|
||||
|
||||
# Mode
|
||||
if mode in ("predict", "track") and "source" not in overrides:
|
||||
overrides["source"] = DEFAULT_CFG.source or ASSETS
|
||||
LOGGER.warning(f"WARNING ⚠️ 'source' argument is missing. Using default 'source={overrides['source']}'.")
|
||||
elif mode in ("train", "val"):
|
||||
if "data" not in overrides and "resume" not in overrides:
|
||||
overrides["data"] = DEFAULT_CFG.data or TASK2DATA.get(task or DEFAULT_CFG.task, DEFAULT_CFG.data)
|
||||
LOGGER.warning(f"WARNING ⚠️ 'data' argument is missing. Using default 'data={overrides['data']}'.")
|
||||
elif mode == "export":
|
||||
if "format" not in overrides:
|
||||
overrides["format"] = DEFAULT_CFG.format or "torchscript"
|
||||
LOGGER.warning(f"WARNING ⚠️ 'format' argument is missing. Using default 'format={overrides['format']}'.")
|
||||
|
||||
# Run command in python
|
||||
getattr(model, mode)(**overrides) # default args from model
|
||||
|
||||
# Show help
|
||||
LOGGER.info(f"💡 Learn more at https://docs.ultralytics.com/modes/{mode}")
|
||||
|
||||
|
||||
# Special modes --------------------------------------------------------------------------------------------------------
|
||||
def copy_default_cfg():
|
||||
"""Copy and create a new default configuration file with '_copy' appended to its name."""
|
||||
new_file = Path.cwd() / DEFAULT_CFG_PATH.name.replace(".yaml", "_copy.yaml")
|
||||
shutil.copy2(DEFAULT_CFG_PATH, new_file)
|
||||
LOGGER.info(
|
||||
f"{DEFAULT_CFG_PATH} copied to {new_file}\n"
|
||||
f"Example YOLO command with this new custom cfg:\n yolo cfg='{new_file}' imgsz=320 batch=8"
|
||||
)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
# Example: entrypoint(debug='yolo predict model=yolov8n.pt')
|
||||
entrypoint(debug="")
|
@ -1,74 +0,0 @@
|
||||
# Ultralytics YOLO 🚀, AGPL-3.0 license
|
||||
# Argoverse-HD dataset (ring-front-center camera) https://www.cs.cmu.edu/~mengtial/proj/streaming/ by Argo AI
|
||||
# Documentation: https://docs.ultralytics.com/datasets/detect/argoverse/
|
||||
# Example usage: yolo train data=Argoverse.yaml
|
||||
# parent
|
||||
# ├── ultralytics
|
||||
# └── datasets
|
||||
# └── Argoverse ← downloads here (31.5 GB)
|
||||
|
||||
# Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
|
||||
path: ../datasets/Argoverse # dataset root dir
|
||||
train: Argoverse-1.1/images/train/ # train images (relative to 'path') 39384 images
|
||||
val: Argoverse-1.1/images/val/ # val images (relative to 'path') 15062 images
|
||||
test: Argoverse-1.1/images/test/ # test images (optional) https://eval.ai/web/challenges/challenge-page/800/overview
|
||||
|
||||
# Classes
|
||||
names:
|
||||
0: person
|
||||
1: bicycle
|
||||
2: car
|
||||
3: motorcycle
|
||||
4: bus
|
||||
5: truck
|
||||
6: traffic_light
|
||||
7: stop_sign
|
||||
|
||||
# Download script/URL (optional) ---------------------------------------------------------------------------------------
|
||||
download: |
|
||||
import json
|
||||
from tqdm import tqdm
|
||||
from ultralytics.utils.downloads import download
|
||||
from pathlib import Path
|
||||
|
||||
def argoverse2yolo(set):
|
||||
labels = {}
|
||||
a = json.load(open(set, "rb"))
|
||||
for annot in tqdm(a['annotations'], desc=f"Converting {set} to YOLOv5 format..."):
|
||||
img_id = annot['image_id']
|
||||
img_name = a['images'][img_id]['name']
|
||||
img_label_name = f'{img_name[:-3]}txt'
|
||||
|
||||
cls = annot['category_id'] # instance class id
|
||||
x_center, y_center, width, height = annot['bbox']
|
||||
x_center = (x_center + width / 2) / 1920.0 # offset and scale
|
||||
y_center = (y_center + height / 2) / 1200.0 # offset and scale
|
||||
width /= 1920.0 # scale
|
||||
height /= 1200.0 # scale
|
||||
|
||||
img_dir = set.parents[2] / 'Argoverse-1.1' / 'labels' / a['seq_dirs'][a['images'][annot['image_id']]['sid']]
|
||||
if not img_dir.exists():
|
||||
img_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
k = str(img_dir / img_label_name)
|
||||
if k not in labels:
|
||||
labels[k] = []
|
||||
labels[k].append(f"{cls} {x_center} {y_center} {width} {height}\n")
|
||||
|
||||
for k in labels:
|
||||
with open(k, "w") as f:
|
||||
f.writelines(labels[k])
|
||||
|
||||
|
||||
# Download 'https://argoverse-hd.s3.us-east-2.amazonaws.com/Argoverse-HD-Full.zip' (deprecated S3 link)
|
||||
dir = Path(yaml['path']) # dataset root dir
|
||||
urls = ['https://drive.google.com/file/d/1st9qW3BeIwQsnR0t8mRpvbsSWIo16ACi/view?usp=drive_link']
|
||||
print("\n\nWARNING: Argoverse dataset MUST be downloaded manually, autodownload will NOT work.")
|
||||
print(f"WARNING: Manually download Argoverse dataset '{urls[0]}' to '{dir}' and re-run your command.\n\n")
|
||||
# download(urls, dir=dir)
|
||||
|
||||
# Convert
|
||||
annotations_dir = 'Argoverse-HD/annotations/'
|
||||
(dir / 'Argoverse-1.1' / 'tracking').rename(dir / 'Argoverse-1.1' / 'images') # rename 'tracking' to 'images'
|
||||
for d in "train.json", "val.json":
|
||||
argoverse2yolo(dir / annotations_dir / d) # convert Argoverse annotations to YOLO labels
|
@ -1,36 +0,0 @@
|
||||
# Ultralytics YOLO 🚀, AGPL-3.0 license
|
||||
# DOTA 1.5 dataset https://captain-whu.github.io/DOTA/index.html for object detection in aerial images by Wuhan University
|
||||
# Documentation: https://docs.ultralytics.com/datasets/obb/dota-v2/
|
||||
# Example usage: yolo train model=yolov8n-obb.pt data=DOTAv1.5.yaml
|
||||
# parent
|
||||
# ├── ultralytics
|
||||
# └── datasets
|
||||
# └── dota1.5 ← downloads here (2GB)
|
||||
|
||||
# Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
|
||||
path: ../datasets/DOTAv1.5 # dataset root dir
|
||||
train: images/train # train images (relative to 'path') 1411 images
|
||||
val: images/val # val images (relative to 'path') 458 images
|
||||
test: images/test # test images (optional) 937 images
|
||||
|
||||
# Classes for DOTA 1.5
|
||||
names:
|
||||
0: plane
|
||||
1: ship
|
||||
2: storage tank
|
||||
3: baseball diamond
|
||||
4: tennis court
|
||||
5: basketball court
|
||||
6: ground track field
|
||||
7: harbor
|
||||
8: bridge
|
||||
9: large vehicle
|
||||
10: small vehicle
|
||||
11: helicopter
|
||||
12: roundabout
|
||||
13: soccer ball field
|
||||
14: swimming pool
|
||||
15: container crane
|
||||
|
||||
# Download script/URL (optional)
|
||||
download: https://github.com/ultralytics/yolov5/releases/download/v1.0/DOTAv1.5.zip
|
@ -1,35 +0,0 @@
|
||||
# Ultralytics YOLO 🚀, AGPL-3.0 license
|
||||
# DOTA 1.0 dataset https://captain-whu.github.io/DOTA/index.html for object detection in aerial images by Wuhan University
|
||||
# Documentation: https://docs.ultralytics.com/datasets/obb/dota-v2/
|
||||
# Example usage: yolo train model=yolov8n-obb.pt data=DOTAv1.yaml
|
||||
# parent
|
||||
# ├── ultralytics
|
||||
# └── datasets
|
||||
# └── dota1 ← downloads here (2GB)
|
||||
|
||||
# Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
|
||||
path: ../datasets/DOTAv1 # dataset root dir
|
||||
train: images/train # train images (relative to 'path') 1411 images
|
||||
val: images/val # val images (relative to 'path') 458 images
|
||||
test: images/test # test images (optional) 937 images
|
||||
|
||||
# Classes for DOTA 1.0
|
||||
names:
|
||||
0: plane
|
||||
1: ship
|
||||
2: storage tank
|
||||
3: baseball diamond
|
||||
4: tennis court
|
||||
5: basketball court
|
||||
6: ground track field
|
||||
7: harbor
|
||||
8: bridge
|
||||
9: large vehicle
|
||||
10: small vehicle
|
||||
11: helicopter
|
||||
12: roundabout
|
||||
13: soccer ball field
|
||||
14: swimming pool
|
||||
|
||||
# Download script/URL (optional)
|
||||
download: https://github.com/ultralytics/yolov5/releases/download/v1.0/DOTAv1.zip
|
@ -1,53 +0,0 @@
|
||||
# Ultralytics YOLO 🚀, AGPL-3.0 license
|
||||
# Global Wheat 2020 dataset https://www.global-wheat.com/ by University of Saskatchewan
|
||||
# Documentation: https://docs.ultralytics.com/datasets/detect/globalwheat2020/
|
||||
# Example usage: yolo train data=GlobalWheat2020.yaml
|
||||
# parent
|
||||
# ├── ultralytics
|
||||
# └── datasets
|
||||
# └── GlobalWheat2020 ← downloads here (7.0 GB)
|
||||
|
||||
# Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
|
||||
path: ../datasets/GlobalWheat2020 # dataset root dir
|
||||
train: # train images (relative to 'path') 3422 images
|
||||
- images/arvalis_1
|
||||
- images/arvalis_2
|
||||
- images/arvalis_3
|
||||
- images/ethz_1
|
||||
- images/rres_1
|
||||
- images/inrae_1
|
||||
- images/usask_1
|
||||
val: # val images (relative to 'path') 748 images (WARNING: train set contains ethz_1)
|
||||
- images/ethz_1
|
||||
test: # test images (optional) 1276 images
|
||||
- images/utokyo_1
|
||||
- images/utokyo_2
|
||||
- images/nau_1
|
||||
- images/uq_1
|
||||
|
||||
# Classes
|
||||
names:
|
||||
0: wheat_head
|
||||
|
||||
# Download script/URL (optional) ---------------------------------------------------------------------------------------
|
||||
download: |
|
||||
from ultralytics.utils.downloads import download
|
||||
from pathlib import Path
|
||||
|
||||
# Download
|
||||
dir = Path(yaml['path']) # dataset root dir
|
||||
urls = ['https://zenodo.org/record/4298502/files/global-wheat-codalab-official.zip',
|
||||
'https://github.com/ultralytics/yolov5/releases/download/v1.0/GlobalWheat2020_labels.zip']
|
||||
download(urls, dir=dir)
|
||||
|
||||
# Make Directories
|
||||
for p in 'annotations', 'images', 'labels':
|
||||
(dir / p).mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# Move
|
||||
for p in 'arvalis_1', 'arvalis_2', 'arvalis_3', 'ethz_1', 'rres_1', 'inrae_1', 'usask_1', \
|
||||
'utokyo_1', 'utokyo_2', 'nau_1', 'uq_1':
|
||||
(dir / 'global-wheat-codalab-official' / p).rename(dir / 'images' / p) # move to /images
|
||||
f = (dir / 'global-wheat-codalab-official' / p).with_suffix('.json') # json file
|
||||
if f.exists():
|
||||
f.rename((dir / 'annotations' / p).with_suffix('.json')) # move to /annotations
|
File diff suppressed because it is too large
Load Diff
@ -1,442 +0,0 @@
|
||||
# Ultralytics YOLO 🚀, AGPL-3.0 license
|
||||
# Objects365 dataset https://www.objects365.org/ by Megvii
|
||||
# Documentation: https://docs.ultralytics.com/datasets/detect/objects365/
|
||||
# Example usage: yolo train data=Objects365.yaml
|
||||
# parent
|
||||
# ├── ultralytics
|
||||
# └── datasets
|
||||
# └── Objects365 ← downloads here (712 GB = 367G data + 345G zips)
|
||||
|
||||
# Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
|
||||
path: ../datasets/Objects365 # dataset root dir
|
||||
train: images/train # train images (relative to 'path') 1742289 images
|
||||
val: images/val # val images (relative to 'path') 80000 images
|
||||
test: # test images (optional)
|
||||
|
||||
# Classes
|
||||
names:
|
||||
0: Person
|
||||
1: Sneakers
|
||||
2: Chair
|
||||
3: Other Shoes
|
||||
4: Hat
|
||||
5: Car
|
||||
6: Lamp
|
||||
7: Glasses
|
||||
8: Bottle
|
||||
9: Desk
|
||||
10: Cup
|
||||
11: Street Lights
|
||||
12: Cabinet/shelf
|
||||
13: Handbag/Satchel
|
||||
14: Bracelet
|
||||
15: Plate
|
||||
16: Picture/Frame
|
||||
17: Helmet
|
||||
18: Book
|
||||
19: Gloves
|
||||
20: Storage box
|
||||
21: Boat
|
||||
22: Leather Shoes
|
||||
23: Flower
|
||||
24: Bench
|
||||
25: Potted Plant
|
||||
26: Bowl/Basin
|
||||
27: Flag
|
||||
28: Pillow
|
||||
29: Boots
|
||||
30: Vase
|
||||
31: Microphone
|
||||
32: Necklace
|
||||
33: Ring
|
||||
34: SUV
|
||||
35: Wine Glass
|
||||
36: Belt
|
||||
37: Monitor/TV
|
||||
38: Backpack
|
||||
39: Umbrella
|
||||
40: Traffic Light
|
||||
41: Speaker
|
||||
42: Watch
|
||||
43: Tie
|
||||
44: Trash bin Can
|
||||
45: Slippers
|
||||
46: Bicycle
|
||||
47: Stool
|
||||
48: Barrel/bucket
|
||||
49: Van
|
||||
50: Couch
|
||||
51: Sandals
|
||||
52: Basket
|
||||
53: Drum
|
||||
54: Pen/Pencil
|
||||
55: Bus
|
||||
56: Wild Bird
|
||||
57: High Heels
|
||||
58: Motorcycle
|
||||
59: Guitar
|
||||
60: Carpet
|
||||
61: Cell Phone
|
||||
62: Bread
|
||||
63: Camera
|
||||
64: Canned
|
||||
65: Truck
|
||||
66: Traffic cone
|
||||
67: Cymbal
|
||||
68: Lifesaver
|
||||
69: Towel
|
||||
70: Stuffed Toy
|
||||
71: Candle
|
||||
72: Sailboat
|
||||
73: Laptop
|
||||
74: Awning
|
||||
75: Bed
|
||||
76: Faucet
|
||||
77: Tent
|
||||
78: Horse
|
||||
79: Mirror
|
||||
80: Power outlet
|
||||
81: Sink
|
||||
82: Apple
|
||||
83: Air Conditioner
|
||||
84: Knife
|
||||
85: Hockey Stick
|
||||
86: Paddle
|
||||
87: Pickup Truck
|
||||
88: Fork
|
||||
89: Traffic Sign
|
||||
90: Balloon
|
||||
91: Tripod
|
||||
92: Dog
|
||||
93: Spoon
|
||||
94: Clock
|
||||
95: Pot
|
||||
96: Cow
|
||||
97: Cake
|
||||
98: Dinning Table
|
||||
99: Sheep
|
||||
100: Hanger
|
||||
101: Blackboard/Whiteboard
|
||||
102: Napkin
|
||||
103: Other Fish
|
||||
104: Orange/Tangerine
|
||||
105: Toiletry
|
||||
106: Keyboard
|
||||
107: Tomato
|
||||
108: Lantern
|
||||
109: Machinery Vehicle
|
||||
110: Fan
|
||||
111: Green Vegetables
|
||||
112: Banana
|
||||
113: Baseball Glove
|
||||
114: Airplane
|
||||
115: Mouse
|
||||
116: Train
|
||||
117: Pumpkin
|
||||
118: Soccer
|
||||
119: Skiboard
|
||||
120: Luggage
|
||||
121: Nightstand
|
||||
122: Tea pot
|
||||
123: Telephone
|
||||
124: Trolley
|
||||
125: Head Phone
|
||||
126: Sports Car
|
||||
127: Stop Sign
|
||||
128: Dessert
|
||||
129: Scooter
|
||||
130: Stroller
|
||||
131: Crane
|
||||
132: Remote
|
||||
133: Refrigerator
|
||||
134: Oven
|
||||
135: Lemon
|
||||
136: Duck
|
||||
137: Baseball Bat
|
||||
138: Surveillance Camera
|
||||
139: Cat
|
||||
140: Jug
|
||||
141: Broccoli
|
||||
142: Piano
|
||||
143: Pizza
|
||||
144: Elephant
|
||||
145: Skateboard
|
||||
146: Surfboard
|
||||
147: Gun
|
||||
148: Skating and Skiing shoes
|
||||
149: Gas stove
|
||||
150: Donut
|
||||
151: Bow Tie
|
||||
152: Carrot
|
||||
153: Toilet
|
||||
154: Kite
|
||||
155: Strawberry
|
||||
156: Other Balls
|
||||
157: Shovel
|
||||
158: Pepper
|
||||
159: Computer Box
|
||||
160: Toilet Paper
|
||||
161: Cleaning Products
|
||||
162: Chopsticks
|
||||
163: Microwave
|
||||
164: Pigeon
|
||||
165: Baseball
|
||||
166: Cutting/chopping Board
|
||||
167: Coffee Table
|
||||
168: Side Table
|
||||
169: Scissors
|
||||
170: Marker
|
||||
171: Pie
|
||||
172: Ladder
|
||||
173: Snowboard
|
||||
174: Cookies
|
||||
175: Radiator
|
||||
176: Fire Hydrant
|
||||
177: Basketball
|
||||
178: Zebra
|
||||
179: Grape
|
||||
180: Giraffe
|
||||
181: Potato
|
||||
182: Sausage
|
||||
183: Tricycle
|
||||
184: Violin
|
||||
185: Egg
|
||||
186: Fire Extinguisher
|
||||
187: Candy
|
||||
188: Fire Truck
|
||||
189: Billiards
|
||||
190: Converter
|
||||
191: Bathtub
|
||||
192: Wheelchair
|
||||
193: Golf Club
|
||||
194: Briefcase
|
||||
195: Cucumber
|
||||
196: Cigar/Cigarette
|
||||
197: Paint Brush
|
||||
198: Pear
|
||||
199: Heavy Truck
|
||||
200: Hamburger
|
||||
201: Extractor
|
||||
202: Extension Cord
|
||||
203: Tong
|
||||
204: Tennis Racket
|
||||
205: Folder
|
||||
206: American Football
|
||||
207: earphone
|
||||
208: Mask
|
||||
209: Kettle
|
||||
210: Tennis
|
||||
211: Ship
|
||||
212: Swing
|
||||
213: Coffee Machine
|
||||
214: Slide
|
||||
215: Carriage
|
||||
216: Onion
|
||||
217: Green beans
|
||||
218: Projector
|
||||
219: Frisbee
|
||||
220: Washing Machine/Drying Machine
|
||||
221: Chicken
|
||||
222: Printer
|
||||
223: Watermelon
|
||||
224: Saxophone
|
||||
225: Tissue
|
||||
226: Toothbrush
|
||||
227: Ice cream
|
||||
228: Hot-air balloon
|
||||
229: Cello
|
||||
230: French Fries
|
||||
231: Scale
|
||||
232: Trophy
|
||||
233: Cabbage
|
||||
234: Hot dog
|
||||
235: Blender
|
||||
236: Peach
|
||||
237: Rice
|
||||
238: Wallet/Purse
|
||||
239: Volleyball
|
||||
240: Deer
|
||||
241: Goose
|
||||
242: Tape
|
||||
243: Tablet
|
||||
244: Cosmetics
|
||||
245: Trumpet
|
||||
246: Pineapple
|
||||
247: Golf Ball
|
||||
248: Ambulance
|
||||
249: Parking meter
|
||||
250: Mango
|
||||
251: Key
|
||||
252: Hurdle
|
||||
253: Fishing Rod
|
||||
254: Medal
|
||||
255: Flute
|
||||
256: Brush
|
||||
257: Penguin
|
||||
258: Megaphone
|
||||
259: Corn
|
||||
260: Lettuce
|
||||
261: Garlic
|
||||
262: Swan
|
||||
263: Helicopter
|
||||
264: Green Onion
|
||||
265: Sandwich
|
||||
266: Nuts
|
||||
267: Speed Limit Sign
|
||||
268: Induction Cooker
|
||||
269: Broom
|
||||
270: Trombone
|
||||
271: Plum
|
||||
272: Rickshaw
|
||||
273: Goldfish
|
||||
274: Kiwi fruit
|
||||
275: Router/modem
|
||||
276: Poker Card
|
||||
277: Toaster
|
||||
278: Shrimp
|
||||
279: Sushi
|
||||
280: Cheese
|
||||
281: Notepaper
|
||||
282: Cherry
|
||||
283: Pliers
|
||||
284: CD
|
||||
285: Pasta
|
||||
286: Hammer
|
||||
287: Cue
|
||||
288: Avocado
|
||||
289: Hamimelon
|
||||
290: Flask
|
||||
291: Mushroom
|
||||
292: Screwdriver
|
||||
293: Soap
|
||||
294: Recorder
|
||||
295: Bear
|
||||
296: Eggplant
|
||||
297: Board Eraser
|
||||
298: Coconut
|
||||
299: Tape Measure/Ruler
|
||||
300: Pig
|
||||
301: Showerhead
|
||||
302: Globe
|
||||
303: Chips
|
||||
304: Steak
|
||||
305: Crosswalk Sign
|
||||
306: Stapler
|
||||
307: Camel
|
||||
308: Formula 1
|
||||
309: Pomegranate
|
||||
310: Dishwasher
|
||||
311: Crab
|
||||
312: Hoverboard
|
||||
313: Meat ball
|
||||
314: Rice Cooker
|
||||
315: Tuba
|
||||
316: Calculator
|
||||
317: Papaya
|
||||
318: Antelope
|
||||
319: Parrot
|
||||
320: Seal
|
||||
321: Butterfly
|
||||
322: Dumbbell
|
||||
323: Donkey
|
||||
324: Lion
|
||||
325: Urinal
|
||||
326: Dolphin
|
||||
327: Electric Drill
|
||||
328: Hair Dryer
|
||||
329: Egg tart
|
||||
330: Jellyfish
|
||||
331: Treadmill
|
||||
332: Lighter
|
||||
333: Grapefruit
|
||||
334: Game board
|
||||
335: Mop
|
||||
336: Radish
|
||||
337: Baozi
|
||||
338: Target
|
||||
339: French
|
||||
340: Spring Rolls
|
||||
341: Monkey
|
||||
342: Rabbit
|
||||
343: Pencil Case
|
||||
344: Yak
|
||||
345: Red Cabbage
|
||||
346: Binoculars
|
||||
347: Asparagus
|
||||
348: Barbell
|
||||
349: Scallop
|
||||
350: Noddles
|
||||
351: Comb
|
||||
352: Dumpling
|
||||
353: Oyster
|
||||
354: Table Tennis paddle
|
||||
355: Cosmetics Brush/Eyeliner Pencil
|
||||
356: Chainsaw
|
||||
357: Eraser
|
||||
358: Lobster
|
||||
359: Durian
|
||||
360: Okra
|
||||
361: Lipstick
|
||||
362: Cosmetics Mirror
|
||||
363: Curling
|
||||
364: Table Tennis
|
||||
|
||||
# Download script/URL (optional) ---------------------------------------------------------------------------------------
|
||||
download: |
|
||||
from tqdm import tqdm
|
||||
|
||||
from ultralytics.utils.checks import check_requirements
|
||||
from ultralytics.utils.downloads import download
|
||||
from ultralytics.utils.ops import xyxy2xywhn
|
||||
|
||||
import numpy as np
|
||||
from pathlib import Path
|
||||
|
||||
check_requirements(('pycocotools>=2.0',))
|
||||
from pycocotools.coco import COCO
|
||||
|
||||
# Make Directories
|
||||
dir = Path(yaml['path']) # dataset root dir
|
||||
for p in 'images', 'labels':
|
||||
(dir / p).mkdir(parents=True, exist_ok=True)
|
||||
for q in 'train', 'val':
|
||||
(dir / p / q).mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# Train, Val Splits
|
||||
for split, patches in [('train', 50 + 1), ('val', 43 + 1)]:
|
||||
print(f"Processing {split} in {patches} patches ...")
|
||||
images, labels = dir / 'images' / split, dir / 'labels' / split
|
||||
|
||||
# Download
|
||||
url = f"https://dorc.ks3-cn-beijing.ksyun.com/data-set/2020Objects365%E6%95%B0%E6%8D%AE%E9%9B%86/{split}/"
|
||||
if split == 'train':
|
||||
download([f'{url}zhiyuan_objv2_{split}.tar.gz'], dir=dir) # annotations json
|
||||
download([f'{url}patch{i}.tar.gz' for i in range(patches)], dir=images, curl=True, threads=8)
|
||||
elif split == 'val':
|
||||
download([f'{url}zhiyuan_objv2_{split}.json'], dir=dir) # annotations json
|
||||
download([f'{url}images/v1/patch{i}.tar.gz' for i in range(15 + 1)], dir=images, curl=True, threads=8)
|
||||
download([f'{url}images/v2/patch{i}.tar.gz' for i in range(16, patches)], dir=images, curl=True, threads=8)
|
||||
|
||||
# Move
|
||||
for f in tqdm(images.rglob('*.jpg'), desc=f'Moving {split} images'):
|
||||
f.rename(images / f.name) # move to /images/{split}
|
||||
|
||||
# Labels
|
||||
coco = COCO(dir / f'zhiyuan_objv2_{split}.json')
|
||||
names = [x["name"] for x in coco.loadCats(coco.getCatIds())]
|
||||
for cid, cat in enumerate(names):
|
||||
catIds = coco.getCatIds(catNms=[cat])
|
||||
imgIds = coco.getImgIds(catIds=catIds)
|
||||
for im in tqdm(coco.loadImgs(imgIds), desc=f'Class {cid + 1}/{len(names)} {cat}'):
|
||||
width, height = im["width"], im["height"]
|
||||
path = Path(im["file_name"]) # image filename
|
||||
try:
|
||||
with open(labels / path.with_suffix('.txt').name, 'a') as file:
|
||||
annIds = coco.getAnnIds(imgIds=im["id"], catIds=catIds, iscrowd=None)
|
||||
for a in coco.loadAnns(annIds):
|
||||
x, y, w, h = a['bbox'] # bounding box in xywh (xy top-left corner)
|
||||
xyxy = np.array([x, y, x + w, y + h])[None] # pixels(1,4)
|
||||
x, y, w, h = xyxy2xywhn(xyxy, w=width, h=height, clip=True)[0] # normalized and clipped
|
||||
file.write(f"{cid} {x:.5f} {y:.5f} {w:.5f} {h:.5f}\n")
|
||||
except Exception as e:
|
||||
print(e)
|
@ -1,57 +0,0 @@
|
||||
# Ultralytics YOLO 🚀, AGPL-3.0 license
|
||||
# SKU-110K retail items dataset https://github.com/eg4000/SKU110K_CVPR19 by Trax Retail
|
||||
# Documentation: https://docs.ultralytics.com/datasets/detect/sku-110k/
|
||||
# Example usage: yolo train data=SKU-110K.yaml
|
||||
# parent
|
||||
# ├── ultralytics
|
||||
# └── datasets
|
||||
# └── SKU-110K ← downloads here (13.6 GB)
|
||||
|
||||
# Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
|
||||
path: ../datasets/SKU-110K # dataset root dir
|
||||
train: train.txt # train images (relative to 'path') 8219 images
|
||||
val: val.txt # val images (relative to 'path') 588 images
|
||||
test: test.txt # test images (optional) 2936 images
|
||||
|
||||
# Classes
|
||||
names:
|
||||
0: object
|
||||
|
||||
# Download script/URL (optional) ---------------------------------------------------------------------------------------
|
||||
download: |
|
||||
import shutil
|
||||
from pathlib import Path
|
||||
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
from tqdm import tqdm
|
||||
|
||||
from ultralytics.utils.downloads import download
|
||||
from ultralytics.utils.ops import xyxy2xywh
|
||||
|
||||
# Download
|
||||
dir = Path(yaml['path']) # dataset root dir
|
||||
parent = Path(dir.parent) # download dir
|
||||
urls = ['http://trax-geometry.s3.amazonaws.com/cvpr_challenge/SKU110K_fixed.tar.gz']
|
||||
download(urls, dir=parent)
|
||||
|
||||
# Rename directories
|
||||
if dir.exists():
|
||||
shutil.rmtree(dir)
|
||||
(parent / 'SKU110K_fixed').rename(dir) # rename dir
|
||||
(dir / 'labels').mkdir(parents=True, exist_ok=True) # create labels dir
|
||||
|
||||
# Convert labels
|
||||
names = 'image', 'x1', 'y1', 'x2', 'y2', 'class', 'image_width', 'image_height' # column names
|
||||
for d in 'annotations_train.csv', 'annotations_val.csv', 'annotations_test.csv':
|
||||
x = pd.read_csv(dir / 'annotations' / d, names=names).values # annotations
|
||||
images, unique_images = x[:, 0], np.unique(x[:, 0])
|
||||
with open((dir / d).with_suffix('.txt').__str__().replace('annotations_', ''), 'w') as f:
|
||||
f.writelines(f'./images/{s}\n' for s in unique_images)
|
||||
for im in tqdm(unique_images, desc=f'Converting {dir / d}'):
|
||||
cls = 0 # single-class dataset
|
||||
with open((dir / 'labels' / im).with_suffix('.txt'), 'a') as f:
|
||||
for r in x[images == im]:
|
||||
w, h = r[6], r[7] # image width, height
|
||||
xywh = xyxy2xywh(np.array([[r[1] / w, r[2] / h, r[3] / w, r[4] / h]]))[0] # instance
|
||||
f.write(f"{cls} {xywh[0]:.5f} {xywh[1]:.5f} {xywh[2]:.5f} {xywh[3]:.5f}\n") # write label
|
@ -1,99 +0,0 @@
|
||||
# Ultralytics YOLO 🚀, AGPL-3.0 license
|
||||
# PASCAL VOC dataset http://host.robots.ox.ac.uk/pascal/VOC by University of Oxford
|
||||
# Documentation: # Documentation: https://docs.ultralytics.com/datasets/detect/voc/
|
||||
# Example usage: yolo train data=VOC.yaml
|
||||
# parent
|
||||
# ├── ultralytics
|
||||
# └── datasets
|
||||
# └── VOC ← downloads here (2.8 GB)
|
||||
|
||||
# Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
|
||||
path: ../datasets/VOC
|
||||
train: # train images (relative to 'path') 16551 images
|
||||
- images/train2012
|
||||
- images/train2007
|
||||
- images/val2012
|
||||
- images/val2007
|
||||
val: # val images (relative to 'path') 4952 images
|
||||
- images/test2007
|
||||
test: # test images (optional)
|
||||
- images/test2007
|
||||
|
||||
# Classes
|
||||
names:
|
||||
0: aeroplane
|
||||
1: bicycle
|
||||
2: bird
|
||||
3: boat
|
||||
4: bottle
|
||||
5: bus
|
||||
6: car
|
||||
7: cat
|
||||
8: chair
|
||||
9: cow
|
||||
10: diningtable
|
||||
11: dog
|
||||
12: horse
|
||||
13: motorbike
|
||||
14: person
|
||||
15: pottedplant
|
||||
16: sheep
|
||||
17: sofa
|
||||
18: train
|
||||
19: tvmonitor
|
||||
|
||||
# Download script/URL (optional) ---------------------------------------------------------------------------------------
|
||||
download: |
|
||||
import xml.etree.ElementTree as ET
|
||||
|
||||
from tqdm import tqdm
|
||||
from ultralytics.utils.downloads import download
|
||||
from pathlib import Path
|
||||
|
||||
def convert_label(path, lb_path, year, image_id):
|
||||
def convert_box(size, box):
|
||||
dw, dh = 1. / size[0], 1. / size[1]
|
||||
x, y, w, h = (box[0] + box[1]) / 2.0 - 1, (box[2] + box[3]) / 2.0 - 1, box[1] - box[0], box[3] - box[2]
|
||||
return x * dw, y * dh, w * dw, h * dh
|
||||
|
||||
in_file = open(path / f'VOC{year}/Annotations/{image_id}.xml')
|
||||
out_file = open(lb_path, 'w')
|
||||
tree = ET.parse(in_file)
|
||||
root = tree.getroot()
|
||||
size = root.find('size')
|
||||
w = int(size.find('width').text)
|
||||
h = int(size.find('height').text)
|
||||
|
||||
names = list(yaml['names'].values()) # names list
|
||||
for obj in root.iter('object'):
|
||||
cls = obj.find('name').text
|
||||
if cls in names and int(obj.find('difficult').text) != 1:
|
||||
xmlbox = obj.find('bndbox')
|
||||
bb = convert_box((w, h), [float(xmlbox.find(x).text) for x in ('xmin', 'xmax', 'ymin', 'ymax')])
|
||||
cls_id = names.index(cls) # class id
|
||||
out_file.write(" ".join(str(a) for a in (cls_id, *bb)) + '\n')
|
||||
|
||||
|
||||
# Download
|
||||
dir = Path(yaml['path']) # dataset root dir
|
||||
url = 'https://github.com/ultralytics/yolov5/releases/download/v1.0/'
|
||||
urls = [f'{url}VOCtrainval_06-Nov-2007.zip', # 446MB, 5012 images
|
||||
f'{url}VOCtest_06-Nov-2007.zip', # 438MB, 4953 images
|
||||
f'{url}VOCtrainval_11-May-2012.zip'] # 1.95GB, 17126 images
|
||||
download(urls, dir=dir / 'images', curl=True, threads=3, exist_ok=True) # download and unzip over existing paths (required)
|
||||
|
||||
# Convert
|
||||
path = dir / 'images/VOCdevkit'
|
||||
for year, image_set in ('2012', 'train'), ('2012', 'val'), ('2007', 'train'), ('2007', 'val'), ('2007', 'test'):
|
||||
imgs_path = dir / 'images' / f'{image_set}{year}'
|
||||
lbs_path = dir / 'labels' / f'{image_set}{year}'
|
||||
imgs_path.mkdir(exist_ok=True, parents=True)
|
||||
lbs_path.mkdir(exist_ok=True, parents=True)
|
||||
|
||||
with open(path / f'VOC{year}/ImageSets/Main/{image_set}.txt') as f:
|
||||
image_ids = f.read().strip().split()
|
||||
for id in tqdm(image_ids, desc=f'{image_set}{year}'):
|
||||
f = path / f'VOC{year}/JPEGImages/{id}.jpg' # old img path
|
||||
lb_path = (lbs_path / f.name).with_suffix('.txt') # new label path
|
||||
f.rename(imgs_path / f.name) # move image
|
||||
convert_label(path, lb_path, year, id) # convert labels to YOLO format
|
@ -1,72 +0,0 @@
|
||||
# Ultralytics YOLO 🚀, AGPL-3.0 license
|
||||
# VisDrone2019-DET dataset https://github.com/VisDrone/VisDrone-Dataset by Tianjin University
|
||||
# Documentation: https://docs.ultralytics.com/datasets/detect/visdrone/
|
||||
# Example usage: yolo train data=VisDrone.yaml
|
||||
# parent
|
||||
# ├── ultralytics
|
||||
# └── datasets
|
||||
# └── VisDrone ← downloads here (2.3 GB)
|
||||
|
||||
# Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
|
||||
path: ../datasets/VisDrone # dataset root dir
|
||||
train: VisDrone2019-DET-train/images # train images (relative to 'path') 6471 images
|
||||
val: VisDrone2019-DET-val/images # val images (relative to 'path') 548 images
|
||||
test: VisDrone2019-DET-test-dev/images # test images (optional) 1610 images
|
||||
|
||||
# Classes
|
||||
names:
|
||||
0: pedestrian
|
||||
1: people
|
||||
2: bicycle
|
||||
3: car
|
||||
4: van
|
||||
5: truck
|
||||
6: tricycle
|
||||
7: awning-tricycle
|
||||
8: bus
|
||||
9: motor
|
||||
|
||||
# Download script/URL (optional) ---------------------------------------------------------------------------------------
|
||||
download: |
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
from ultralytics.utils.downloads import download
|
||||
|
||||
def visdrone2yolo(dir):
|
||||
from PIL import Image
|
||||
from tqdm import tqdm
|
||||
|
||||
def convert_box(size, box):
|
||||
# Convert VisDrone box to YOLO xywh box
|
||||
dw = 1. / size[0]
|
||||
dh = 1. / size[1]
|
||||
return (box[0] + box[2] / 2) * dw, (box[1] + box[3] / 2) * dh, box[2] * dw, box[3] * dh
|
||||
|
||||
(dir / 'labels').mkdir(parents=True, exist_ok=True) # make labels directory
|
||||
pbar = tqdm((dir / 'annotations').glob('*.txt'), desc=f'Converting {dir}')
|
||||
for f in pbar:
|
||||
img_size = Image.open((dir / 'images' / f.name).with_suffix('.jpg')).size
|
||||
lines = []
|
||||
with open(f, 'r') as file: # read annotation.txt
|
||||
for row in [x.split(',') for x in file.read().strip().splitlines()]:
|
||||
if row[4] == '0': # VisDrone 'ignored regions' class 0
|
||||
continue
|
||||
cls = int(row[5]) - 1
|
||||
box = convert_box(img_size, tuple(map(int, row[:4])))
|
||||
lines.append(f"{cls} {' '.join(f'{x:.6f}' for x in box)}\n")
|
||||
with open(str(f).replace(f'{os.sep}annotations{os.sep}', f'{os.sep}labels{os.sep}'), 'w') as fl:
|
||||
fl.writelines(lines) # write label.txt
|
||||
|
||||
|
||||
# Download
|
||||
dir = Path(yaml['path']) # dataset root dir
|
||||
urls = ['https://github.com/ultralytics/yolov5/releases/download/v1.0/VisDrone2019-DET-train.zip',
|
||||
'https://github.com/ultralytics/yolov5/releases/download/v1.0/VisDrone2019-DET-val.zip',
|
||||
'https://github.com/ultralytics/yolov5/releases/download/v1.0/VisDrone2019-DET-test-dev.zip',
|
||||
'https://github.com/ultralytics/yolov5/releases/download/v1.0/VisDrone2019-DET-test-challenge.zip']
|
||||
download(urls, dir=dir, curl=True, threads=4)
|
||||
|
||||
# Convert
|
||||
for d in 'VisDrone2019-DET-train', 'VisDrone2019-DET-val', 'VisDrone2019-DET-test-dev':
|
||||
visdrone2yolo(dir / d) # convert VisDrone annotations to YOLO labels
|
@ -1,24 +0,0 @@
|
||||
# Ultralytics YOLO 🚀, AGPL-3.0 license
|
||||
# African-wildlife dataset by Ultralytics
|
||||
# Documentation: https://docs.ultralytics.com/datasets/detect/african-wildlife/
|
||||
# Example usage: yolo train data=african-wildlife.yaml
|
||||
# parent
|
||||
# ├── ultralytics
|
||||
# └── datasets
|
||||
# └── african-wildlife ← downloads here (100 MB)
|
||||
|
||||
# Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
|
||||
path: ../datasets/african-wildlife # dataset root dir
|
||||
train: train/images # train images (relative to 'path') 1052 images
|
||||
val: valid/images # val images (relative to 'path') 225 images
|
||||
test: test/images # test images (relative to 'path') 227 images
|
||||
|
||||
# Classes
|
||||
names:
|
||||
0: buffalo
|
||||
1: elephant
|
||||
2: rhino
|
||||
3: zebra
|
||||
|
||||
# Download script/URL (optional)
|
||||
download: https://ultralytics.com/assets/african-wildlife.zip
|
@ -1,22 +0,0 @@
|
||||
# Ultralytics YOLO 🚀, AGPL-3.0 license
|
||||
# Brain-tumor dataset by Ultralytics
|
||||
# Documentation: https://docs.ultralytics.com/datasets/detect/brain-tumor/
|
||||
# Example usage: yolo train data=brain-tumor.yaml
|
||||
# parent
|
||||
# ├── ultralytics
|
||||
# └── datasets
|
||||
# └── brain-tumor ← downloads here (4.05 MB)
|
||||
|
||||
# Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
|
||||
path: ../datasets/brain-tumor # dataset root dir
|
||||
train: train/images # train images (relative to 'path') 893 images
|
||||
val: valid/images # val images (relative to 'path') 223 images
|
||||
test: # test images (relative to 'path')
|
||||
|
||||
# Classes
|
||||
names:
|
||||
0: negative
|
||||
1: positive
|
||||
|
||||
# Download script/URL (optional)
|
||||
download: https://ultralytics.com/assets/brain-tumor.zip
|
@ -1,43 +0,0 @@
|
||||
# Ultralytics YOLO 🚀, AGPL-3.0 license
|
||||
# Carparts-seg dataset by Ultralytics
|
||||
# Documentation: https://docs.ultralytics.com/datasets/segment/carparts-seg/
|
||||
# Example usage: yolo train data=carparts-seg.yaml
|
||||
# parent
|
||||
# ├── ultralytics
|
||||
# └── datasets
|
||||
# └── carparts-seg ← downloads here (132 MB)
|
||||
|
||||
# Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
|
||||
path: ../datasets/carparts-seg # dataset root dir
|
||||
train: train/images # train images (relative to 'path') 3516 images
|
||||
val: valid/images # val images (relative to 'path') 276 images
|
||||
test: test/images # test images (relative to 'path') 401 images
|
||||
|
||||
# Classes
|
||||
names:
|
||||
0: back_bumper
|
||||
1: back_door
|
||||
2: back_glass
|
||||
3: back_left_door
|
||||
4: back_left_light
|
||||
5: back_light
|
||||
6: back_right_door
|
||||
7: back_right_light
|
||||
8: front_bumper
|
||||
9: front_door
|
||||
10: front_glass
|
||||
11: front_left_door
|
||||
12: front_left_light
|
||||
13: front_light
|
||||
14: front_right_door
|
||||
15: front_right_light
|
||||
16: hood
|
||||
17: left_mirror
|
||||
18: object
|
||||
19: right_mirror
|
||||
20: tailgate
|
||||
21: trunk
|
||||
22: wheel
|
||||
|
||||
# Download script/URL (optional)
|
||||
download: https://ultralytics.com/assets/carparts-seg.zip
|
@ -1,38 +0,0 @@
|
||||
# Ultralytics YOLO 🚀, AGPL-3.0 license
|
||||
# COCO 2017 dataset https://cocodataset.org by Microsoft
|
||||
# Documentation: https://docs.ultralytics.com/datasets/pose/coco/
|
||||
# Example usage: yolo train data=coco-pose.yaml
|
||||
# parent
|
||||
# ├── ultralytics
|
||||
# └── datasets
|
||||
# └── coco-pose ← downloads here (20.1 GB)
|
||||
|
||||
# Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
|
||||
path: ../datasets/coco-pose # dataset root dir
|
||||
train: train2017.txt # train images (relative to 'path') 118287 images
|
||||
val: val2017.txt # val images (relative to 'path') 5000 images
|
||||
test: test-dev2017.txt # 20288 of 40670 images, submit to https://competitions.codalab.org/competitions/20794
|
||||
|
||||
# Keypoints
|
||||
kpt_shape: [17, 3] # number of keypoints, number of dims (2 for x,y or 3 for x,y,visible)
|
||||
flip_idx: [0, 2, 1, 4, 3, 6, 5, 8, 7, 10, 9, 12, 11, 14, 13, 16, 15]
|
||||
|
||||
# Classes
|
||||
names:
|
||||
0: person
|
||||
|
||||
# Download script/URL (optional)
|
||||
download: |
|
||||
from ultralytics.utils.downloads import download
|
||||
from pathlib import Path
|
||||
|
||||
# Download labels
|
||||
dir = Path(yaml['path']) # dataset root dir
|
||||
url = 'https://github.com/ultralytics/yolov5/releases/download/v1.0/'
|
||||
urls = [url + 'coco2017labels-pose.zip'] # labels
|
||||
download(urls, dir=dir.parent)
|
||||
# Download data
|
||||
urls = ['http://images.cocodataset.org/zips/train2017.zip', # 19G, 118k images
|
||||
'http://images.cocodataset.org/zips/val2017.zip', # 1G, 5k images
|
||||
'http://images.cocodataset.org/zips/test2017.zip'] # 7G, 41k images (optional)
|
||||
download(urls, dir=dir / 'images', threads=3)
|
@ -1,114 +0,0 @@
|
||||
# Ultralytics YOLO 🚀, AGPL-3.0 license
|
||||
# COCO 2017 dataset https://cocodataset.org by Microsoft
|
||||
# Documentation: https://docs.ultralytics.com/datasets/detect/coco/
|
||||
# Example usage: yolo train data=coco.yaml
|
||||
# parent
|
||||
# ├── ultralytics
|
||||
# └── datasets
|
||||
# └── coco ← downloads here (20.1 GB)
|
||||
|
||||
# Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
|
||||
path: ../datasets/coco # dataset root dir
|
||||
train: train2017.txt # train images (relative to 'path') 118287 images
|
||||
val: val2017.txt # val images (relative to 'path') 5000 images
|
||||
test: test-dev2017.txt # 20288 of 40670 images, submit to https://competitions.codalab.org/competitions/20794
|
||||
|
||||
# Classes
|
||||
names:
|
||||
0: person
|
||||
1: bicycle
|
||||
2: car
|
||||
3: motorcycle
|
||||
4: airplane
|
||||
5: bus
|
||||
6: train
|
||||
7: truck
|
||||
8: boat
|
||||
9: traffic light
|
||||
10: fire hydrant
|
||||
11: stop sign
|
||||
12: parking meter
|
||||
13: bench
|
||||
14: bird
|
||||
15: cat
|
||||
16: dog
|
||||
17: horse
|
||||
18: sheep
|
||||
19: cow
|
||||
20: elephant
|
||||
21: bear
|
||||
22: zebra
|
||||
23: giraffe
|
||||
24: backpack
|
||||
25: umbrella
|
||||
26: handbag
|
||||
27: tie
|
||||
28: suitcase
|
||||
29: frisbee
|
||||
30: skis
|
||||
31: snowboard
|
||||
32: sports ball
|
||||
33: kite
|
||||
34: baseball bat
|
||||
35: baseball glove
|
||||
36: skateboard
|
||||
37: surfboard
|
||||
38: tennis racket
|
||||
39: bottle
|
||||
40: wine glass
|
||||
41: cup
|
||||
42: fork
|
||||
43: knife
|
||||
44: spoon
|
||||
45: bowl
|
||||
46: banana
|
||||
47: apple
|
||||
48: sandwich
|
||||
49: orange
|
||||
50: broccoli
|
||||
51: carrot
|
||||
52: hot dog
|
||||
53: pizza
|
||||
54: donut
|
||||
55: cake
|
||||
56: chair
|
||||
57: couch
|
||||
58: potted plant
|
||||
59: bed
|
||||
60: dining table
|
||||
61: toilet
|
||||
62: tv
|
||||
63: laptop
|
||||
64: mouse
|
||||
65: remote
|
||||
66: keyboard
|
||||
67: cell phone
|
||||
68: microwave
|
||||
69: oven
|
||||
70: toaster
|
||||
71: sink
|
||||
72: refrigerator
|
||||
73: book
|
||||
74: clock
|
||||
75: vase
|
||||
76: scissors
|
||||
77: teddy bear
|
||||
78: hair drier
|
||||
79: toothbrush
|
||||
|
||||
# Download script/URL (optional)
|
||||
download: |
|
||||
from ultralytics.utils.downloads import download
|
||||
from pathlib import Path
|
||||
|
||||
# Download labels
|
||||
segments = True # segment or box labels
|
||||
dir = Path(yaml['path']) # dataset root dir
|
||||
url = 'https://github.com/ultralytics/yolov5/releases/download/v1.0/'
|
||||
urls = [url + ('coco2017labels-segments.zip' if segments else 'coco2017labels.zip')] # labels
|
||||
download(urls, dir=dir.parent)
|
||||
# Download data
|
||||
urls = ['http://images.cocodataset.org/zips/train2017.zip', # 19G, 118k images
|
||||
'http://images.cocodataset.org/zips/val2017.zip', # 1G, 5k images
|
||||
'http://images.cocodataset.org/zips/test2017.zip'] # 7G, 41k images (optional)
|
||||
download(urls, dir=dir / 'images', threads=3)
|
@ -1,100 +0,0 @@
|
||||
# Ultralytics YOLO 🚀, AGPL-3.0 license
|
||||
# COCO128-seg dataset https://www.kaggle.com/ultralytics/coco128 (first 128 images from COCO train2017) by Ultralytics
|
||||
# Documentation: https://docs.ultralytics.com/datasets/segment/coco/
|
||||
# Example usage: yolo train data=coco128.yaml
|
||||
# parent
|
||||
# ├── ultralytics
|
||||
# └── datasets
|
||||
# └── coco128-seg ← downloads here (7 MB)
|
||||
|
||||
# Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
|
||||
path: ../datasets/coco128-seg # dataset root dir
|
||||
train: images/train2017 # train images (relative to 'path') 128 images
|
||||
val: images/train2017 # val images (relative to 'path') 128 images
|
||||
test: # test images (optional)
|
||||
|
||||
# Classes
|
||||
names:
|
||||
0: person
|
||||
1: bicycle
|
||||
2: car
|
||||
3: motorcycle
|
||||
4: airplane
|
||||
5: bus
|
||||
6: train
|
||||
7: truck
|
||||
8: boat
|
||||
9: traffic light
|
||||
10: fire hydrant
|
||||
11: stop sign
|
||||
12: parking meter
|
||||
13: bench
|
||||
14: bird
|
||||
15: cat
|
||||
16: dog
|
||||
17: horse
|
||||
18: sheep
|
||||
19: cow
|
||||
20: elephant
|
||||
21: bear
|
||||
22: zebra
|
||||
23: giraffe
|
||||
24: backpack
|
||||
25: umbrella
|
||||
26: handbag
|
||||
27: tie
|
||||
28: suitcase
|
||||
29: frisbee
|
||||
30: skis
|
||||
31: snowboard
|
||||
32: sports ball
|
||||
33: kite
|
||||
34: baseball bat
|
||||
35: baseball glove
|
||||
36: skateboard
|
||||
37: surfboard
|
||||
38: tennis racket
|
||||
39: bottle
|
||||
40: wine glass
|
||||
41: cup
|
||||
42: fork
|
||||
43: knife
|
||||
44: spoon
|
||||
45: bowl
|
||||
46: banana
|
||||
47: apple
|
||||
48: sandwich
|
||||
49: orange
|
||||
50: broccoli
|
||||
51: carrot
|
||||
52: hot dog
|
||||
53: pizza
|
||||
54: donut
|
||||
55: cake
|
||||
56: chair
|
||||
57: couch
|
||||
58: potted plant
|
||||
59: bed
|
||||
60: dining table
|
||||
61: toilet
|
||||
62: tv
|
||||
63: laptop
|
||||
64: mouse
|
||||
65: remote
|
||||
66: keyboard
|
||||
67: cell phone
|
||||
68: microwave
|
||||
69: oven
|
||||
70: toaster
|
||||
71: sink
|
||||
72: refrigerator
|
||||
73: book
|
||||
74: clock
|
||||
75: vase
|
||||
76: scissors
|
||||
77: teddy bear
|
||||
78: hair drier
|
||||
79: toothbrush
|
||||
|
||||
# Download script/URL (optional)
|
||||
download: https://ultralytics.com/assets/coco128-seg.zip
|
@ -1,100 +0,0 @@
|
||||
# Ultralytics YOLO 🚀, AGPL-3.0 license
|
||||
# COCO128 dataset https://www.kaggle.com/ultralytics/coco128 (first 128 images from COCO train2017) by Ultralytics
|
||||
# Documentation: https://docs.ultralytics.com/datasets/detect/coco/
|
||||
# Example usage: yolo train data=coco128.yaml
|
||||
# parent
|
||||
# ├── ultralytics
|
||||
# └── datasets
|
||||
# └── coco128 ← downloads here (7 MB)
|
||||
|
||||
# Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
|
||||
path: ../datasets/coco128 # dataset root dir
|
||||
train: images/train2017 # train images (relative to 'path') 128 images
|
||||
val: images/train2017 # val images (relative to 'path') 128 images
|
||||
test: # test images (optional)
|
||||
|
||||
# Classes
|
||||
names:
|
||||
0: person
|
||||
1: bicycle
|
||||
2: car
|
||||
3: motorcycle
|
||||
4: airplane
|
||||
5: bus
|
||||
6: train
|
||||
7: truck
|
||||
8: boat
|
||||
9: traffic light
|
||||
10: fire hydrant
|
||||
11: stop sign
|
||||
12: parking meter
|
||||
13: bench
|
||||
14: bird
|
||||
15: cat
|
||||
16: dog
|
||||
17: horse
|
||||
18: sheep
|
||||
19: cow
|
||||
20: elephant
|
||||
21: bear
|
||||
22: zebra
|
||||
23: giraffe
|
||||
24: backpack
|
||||
25: umbrella
|
||||
26: handbag
|
||||
27: tie
|
||||
28: suitcase
|
||||
29: frisbee
|
||||
30: skis
|
||||
31: snowboard
|
||||
32: sports ball
|
||||
33: kite
|
||||
34: baseball bat
|
||||
35: baseball glove
|
||||
36: skateboard
|
||||
37: surfboard
|
||||
38: tennis racket
|
||||
39: bottle
|
||||
40: wine glass
|
||||
41: cup
|
||||
42: fork
|
||||
43: knife
|
||||
44: spoon
|
||||
45: bowl
|
||||
46: banana
|
||||
47: apple
|
||||
48: sandwich
|
||||
49: orange
|
||||
50: broccoli
|
||||
51: carrot
|
||||
52: hot dog
|
||||
53: pizza
|
||||
54: donut
|
||||
55: cake
|
||||
56: chair
|
||||
57: couch
|
||||
58: potted plant
|
||||
59: bed
|
||||
60: dining table
|
||||
61: toilet
|
||||
62: tv
|
||||
63: laptop
|
||||
64: mouse
|
||||
65: remote
|
||||
66: keyboard
|
||||
67: cell phone
|
||||
68: microwave
|
||||
69: oven
|
||||
70: toaster
|
||||
71: sink
|
||||
72: refrigerator
|
||||
73: book
|
||||
74: clock
|
||||
75: vase
|
||||
76: scissors
|
||||
77: teddy bear
|
||||
78: hair drier
|
||||
79: toothbrush
|
||||
|
||||
# Download script/URL (optional)
|
||||
download: https://ultralytics.com/assets/coco128.zip
|
@ -1,25 +0,0 @@
|
||||
# Ultralytics YOLO 🚀, AGPL-3.0 license
|
||||
# COCO8-pose dataset (first 8 images from COCO train2017) by Ultralytics
|
||||
# Documentation: https://docs.ultralytics.com/datasets/pose/coco8-pose/
|
||||
# Example usage: yolo train data=coco8-pose.yaml
|
||||
# parent
|
||||
# ├── ultralytics
|
||||
# └── datasets
|
||||
# └── coco8-pose ← downloads here (1 MB)
|
||||
|
||||
# Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
|
||||
path: ../datasets/coco8-pose # dataset root dir
|
||||
train: images/train # train images (relative to 'path') 4 images
|
||||
val: images/val # val images (relative to 'path') 4 images
|
||||
test: # test images (optional)
|
||||
|
||||
# Keypoints
|
||||
kpt_shape: [17, 3] # number of keypoints, number of dims (2 for x,y or 3 for x,y,visible)
|
||||
flip_idx: [0, 2, 1, 4, 3, 6, 5, 8, 7, 10, 9, 12, 11, 14, 13, 16, 15]
|
||||
|
||||
# Classes
|
||||
names:
|
||||
0: person
|
||||
|
||||
# Download script/URL (optional)
|
||||
download: https://ultralytics.com/assets/coco8-pose.zip
|
@ -1,100 +0,0 @@
|
||||
# Ultralytics YOLO 🚀, AGPL-3.0 license
|
||||
# COCO8-seg dataset (first 8 images from COCO train2017) by Ultralytics
|
||||
# Documentation: https://docs.ultralytics.com/datasets/segment/coco8-seg/
|
||||
# Example usage: yolo train data=coco8-seg.yaml
|
||||
# parent
|
||||
# ├── ultralytics
|
||||
# └── datasets
|
||||
# └── coco8-seg ← downloads here (1 MB)
|
||||
|
||||
# Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
|
||||
path: ../datasets/coco8-seg # dataset root dir
|
||||
train: images/train # train images (relative to 'path') 4 images
|
||||
val: images/val # val images (relative to 'path') 4 images
|
||||
test: # test images (optional)
|
||||
|
||||
# Classes
|
||||
names:
|
||||
0: person
|
||||
1: bicycle
|
||||
2: car
|
||||
3: motorcycle
|
||||
4: airplane
|
||||
5: bus
|
||||
6: train
|
||||
7: truck
|
||||
8: boat
|
||||
9: traffic light
|
||||
10: fire hydrant
|
||||
11: stop sign
|
||||
12: parking meter
|
||||
13: bench
|
||||
14: bird
|
||||
15: cat
|
||||
16: dog
|
||||
17: horse
|
||||
18: sheep
|
||||
19: cow
|
||||
20: elephant
|
||||
21: bear
|
||||
22: zebra
|
||||
23: giraffe
|
||||
24: backpack
|
||||
25: umbrella
|
||||
26: handbag
|
||||
27: tie
|
||||
28: suitcase
|
||||
29: frisbee
|
||||
30: skis
|
||||
31: snowboard
|
||||
32: sports ball
|
||||
33: kite
|
||||
34: baseball bat
|
||||
35: baseball glove
|
||||
36: skateboard
|
||||
37: surfboard
|
||||
38: tennis racket
|
||||
39: bottle
|
||||
40: wine glass
|
||||
41: cup
|
||||
42: fork
|
||||
43: knife
|
||||
44: spoon
|
||||
45: bowl
|
||||
46: banana
|
||||
47: apple
|
||||
48: sandwich
|
||||
49: orange
|
||||
50: broccoli
|
||||
51: carrot
|
||||
52: hot dog
|
||||
53: pizza
|
||||
54: donut
|
||||
55: cake
|
||||
56: chair
|
||||
57: couch
|
||||
58: potted plant
|
||||
59: bed
|
||||
60: dining table
|
||||
61: toilet
|
||||
62: tv
|
||||
63: laptop
|
||||
64: mouse
|
||||
65: remote
|
||||
66: keyboard
|
||||
67: cell phone
|
||||
68: microwave
|
||||
69: oven
|
||||
70: toaster
|
||||
71: sink
|
||||
72: refrigerator
|
||||
73: book
|
||||
74: clock
|
||||
75: vase
|
||||
76: scissors
|
||||
77: teddy bear
|
||||
78: hair drier
|
||||
79: toothbrush
|
||||
|
||||
# Download script/URL (optional)
|
||||
download: https://ultralytics.com/assets/coco8-seg.zip
|
@ -1,100 +0,0 @@
|
||||
# Ultralytics YOLO 🚀, AGPL-3.0 license
|
||||
# COCO8 dataset (first 8 images from COCO train2017) by Ultralytics
|
||||
# Documentation: https://docs.ultralytics.com/datasets/detect/coco8/
|
||||
# Example usage: yolo train data=coco8.yaml
|
||||
# parent
|
||||
# ├── ultralytics
|
||||
# └── datasets
|
||||
# └── coco8 ← downloads here (1 MB)
|
||||
|
||||
# Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
|
||||
path: ../datasets/coco8 # dataset root dir
|
||||
train: images/train # train images (relative to 'path') 4 images
|
||||
val: images/val # val images (relative to 'path') 4 images
|
||||
test: # test images (optional)
|
||||
|
||||
# Classes
|
||||
names:
|
||||
0: person
|
||||
1: bicycle
|
||||
2: car
|
||||
3: motorcycle
|
||||
4: airplane
|
||||
5: bus
|
||||
6: train
|
||||
7: truck
|
||||
8: boat
|
||||
9: traffic light
|
||||
10: fire hydrant
|
||||
11: stop sign
|
||||
12: parking meter
|
||||
13: bench
|
||||
14: bird
|
||||
15: cat
|
||||
16: dog
|
||||
17: horse
|
||||
18: sheep
|
||||
19: cow
|
||||
20: elephant
|
||||
21: bear
|
||||
22: zebra
|
||||
23: giraffe
|
||||
24: backpack
|
||||
25: umbrella
|
||||
26: handbag
|
||||
27: tie
|
||||
28: suitcase
|
||||
29: frisbee
|
||||
30: skis
|
||||
31: snowboard
|
||||
32: sports ball
|
||||
33: kite
|
||||
34: baseball bat
|
||||
35: baseball glove
|
||||
36: skateboard
|
||||
37: surfboard
|
||||
38: tennis racket
|
||||
39: bottle
|
||||
40: wine glass
|
||||
41: cup
|
||||
42: fork
|
||||
43: knife
|
||||
44: spoon
|
||||
45: bowl
|
||||
46: banana
|
||||
47: apple
|
||||
48: sandwich
|
||||
49: orange
|
||||
50: broccoli
|
||||
51: carrot
|
||||
52: hot dog
|
||||
53: pizza
|
||||
54: donut
|
||||
55: cake
|
||||
56: chair
|
||||
57: couch
|
||||
58: potted plant
|
||||
59: bed
|
||||
60: dining table
|
||||
61: toilet
|
||||
62: tv
|
||||
63: laptop
|
||||
64: mouse
|
||||
65: remote
|
||||
66: keyboard
|
||||
67: cell phone
|
||||
68: microwave
|
||||
69: oven
|
||||
70: toaster
|
||||
71: sink
|
||||
72: refrigerator
|
||||
73: book
|
||||
74: clock
|
||||
75: vase
|
||||
76: scissors
|
||||
77: teddy bear
|
||||
78: hair drier
|
||||
79: toothbrush
|
||||
|
||||
# Download script/URL (optional)
|
||||
download: https://ultralytics.com/assets/coco8.zip
|
@ -1,21 +0,0 @@
|
||||
# Ultralytics YOLO 🚀, AGPL-3.0 license
|
||||
# Crack-seg dataset by Ultralytics
|
||||
# Documentation: https://docs.ultralytics.com/datasets/segment/crack-seg/
|
||||
# Example usage: yolo train data=crack-seg.yaml
|
||||
# parent
|
||||
# ├── ultralytics
|
||||
# └── datasets
|
||||
# └── crack-seg ← downloads here (91.2 MB)
|
||||
|
||||
# Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
|
||||
path: ../datasets/crack-seg # dataset root dir
|
||||
train: train/images # train images (relative to 'path') 3717 images
|
||||
val: valid/images # val images (relative to 'path') 112 images
|
||||
test: test/images # test images (relative to 'path') 200 images
|
||||
|
||||
# Classes
|
||||
names:
|
||||
0: crack
|
||||
|
||||
# Download script/URL (optional)
|
||||
download: https://ultralytics.com/assets/crack-seg.zip
|
@ -1,34 +0,0 @@
|
||||
# Ultralytics YOLO 🚀, AGPL-3.0 license
|
||||
# DOTA8 dataset 8 images from split DOTAv1 dataset by Ultralytics
|
||||
# Documentation: https://docs.ultralytics.com/datasets/obb/dota8/
|
||||
# Example usage: yolo train model=yolov8n-obb.pt data=dota8.yaml
|
||||
# parent
|
||||
# ├── ultralytics
|
||||
# └── datasets
|
||||
# └── dota8 ← downloads here (1MB)
|
||||
|
||||
# Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
|
||||
path: ../datasets/dota8 # dataset root dir
|
||||
train: images/train # train images (relative to 'path') 4 images
|
||||
val: images/val # val images (relative to 'path') 4 images
|
||||
|
||||
# Classes for DOTA 1.0
|
||||
names:
|
||||
0: plane
|
||||
1: ship
|
||||
2: storage tank
|
||||
3: baseball diamond
|
||||
4: tennis court
|
||||
5: basketball court
|
||||
6: ground track field
|
||||
7: harbor
|
||||
8: bridge
|
||||
9: large vehicle
|
||||
10: small vehicle
|
||||
11: helicopter
|
||||
12: roundabout
|
||||
13: soccer ball field
|
||||
14: swimming pool
|
||||
|
||||
# Download script/URL (optional)
|
||||
download: https://github.com/ultralytics/yolov5/releases/download/v1.0/dota8.zip
|
@ -1,660 +0,0 @@
|
||||
# Ultralytics YOLO 🚀, AGPL-3.0 license
|
||||
# Open Images v7 dataset https://storage.googleapis.com/openimages/web/index.html by Google
|
||||
# Documentation: https://docs.ultralytics.com/datasets/detect/open-images-v7/
|
||||
# Example usage: yolo train data=open-images-v7.yaml
|
||||
# parent
|
||||
# ├── ultralytics
|
||||
# └── datasets
|
||||
# └── open-images-v7 ← downloads here (561 GB)
|
||||
|
||||
# Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
|
||||
path: ../datasets/open-images-v7 # dataset root dir
|
||||
train: images/train # train images (relative to 'path') 1743042 images
|
||||
val: images/val # val images (relative to 'path') 41620 images
|
||||
test: # test images (optional)
|
||||
|
||||
# Classes
|
||||
names:
|
||||
0: Accordion
|
||||
1: Adhesive tape
|
||||
2: Aircraft
|
||||
3: Airplane
|
||||
4: Alarm clock
|
||||
5: Alpaca
|
||||
6: Ambulance
|
||||
7: Animal
|
||||
8: Ant
|
||||
9: Antelope
|
||||
10: Apple
|
||||
11: Armadillo
|
||||
12: Artichoke
|
||||
13: Auto part
|
||||
14: Axe
|
||||
15: Backpack
|
||||
16: Bagel
|
||||
17: Baked goods
|
||||
18: Balance beam
|
||||
19: Ball
|
||||
20: Balloon
|
||||
21: Banana
|
||||
22: Band-aid
|
||||
23: Banjo
|
||||
24: Barge
|
||||
25: Barrel
|
||||
26: Baseball bat
|
||||
27: Baseball glove
|
||||
28: Bat (Animal)
|
||||
29: Bathroom accessory
|
||||
30: Bathroom cabinet
|
||||
31: Bathtub
|
||||
32: Beaker
|
||||
33: Bear
|
||||
34: Bed
|
||||
35: Bee
|
||||
36: Beehive
|
||||
37: Beer
|
||||
38: Beetle
|
||||
39: Bell pepper
|
||||
40: Belt
|
||||
41: Bench
|
||||
42: Bicycle
|
||||
43: Bicycle helmet
|
||||
44: Bicycle wheel
|
||||
45: Bidet
|
||||
46: Billboard
|
||||
47: Billiard table
|
||||
48: Binoculars
|
||||
49: Bird
|
||||
50: Blender
|
||||
51: Blue jay
|
||||
52: Boat
|
||||
53: Bomb
|
||||
54: Book
|
||||
55: Bookcase
|
||||
56: Boot
|
||||
57: Bottle
|
||||
58: Bottle opener
|
||||
59: Bow and arrow
|
||||
60: Bowl
|
||||
61: Bowling equipment
|
||||
62: Box
|
||||
63: Boy
|
||||
64: Brassiere
|
||||
65: Bread
|
||||
66: Briefcase
|
||||
67: Broccoli
|
||||
68: Bronze sculpture
|
||||
69: Brown bear
|
||||
70: Building
|
||||
71: Bull
|
||||
72: Burrito
|
||||
73: Bus
|
||||
74: Bust
|
||||
75: Butterfly
|
||||
76: Cabbage
|
||||
77: Cabinetry
|
||||
78: Cake
|
||||
79: Cake stand
|
||||
80: Calculator
|
||||
81: Camel
|
||||
82: Camera
|
||||
83: Can opener
|
||||
84: Canary
|
||||
85: Candle
|
||||
86: Candy
|
||||
87: Cannon
|
||||
88: Canoe
|
||||
89: Cantaloupe
|
||||
90: Car
|
||||
91: Carnivore
|
||||
92: Carrot
|
||||
93: Cart
|
||||
94: Cassette deck
|
||||
95: Castle
|
||||
96: Cat
|
||||
97: Cat furniture
|
||||
98: Caterpillar
|
||||
99: Cattle
|
||||
100: Ceiling fan
|
||||
101: Cello
|
||||
102: Centipede
|
||||
103: Chainsaw
|
||||
104: Chair
|
||||
105: Cheese
|
||||
106: Cheetah
|
||||
107: Chest of drawers
|
||||
108: Chicken
|
||||
109: Chime
|
||||
110: Chisel
|
||||
111: Chopsticks
|
||||
112: Christmas tree
|
||||
113: Clock
|
||||
114: Closet
|
||||
115: Clothing
|
||||
116: Coat
|
||||
117: Cocktail
|
||||
118: Cocktail shaker
|
||||
119: Coconut
|
||||
120: Coffee
|
||||
121: Coffee cup
|
||||
122: Coffee table
|
||||
123: Coffeemaker
|
||||
124: Coin
|
||||
125: Common fig
|
||||
126: Common sunflower
|
||||
127: Computer keyboard
|
||||
128: Computer monitor
|
||||
129: Computer mouse
|
||||
130: Container
|
||||
131: Convenience store
|
||||
132: Cookie
|
||||
133: Cooking spray
|
||||
134: Corded phone
|
||||
135: Cosmetics
|
||||
136: Couch
|
||||
137: Countertop
|
||||
138: Cowboy hat
|
||||
139: Crab
|
||||
140: Cream
|
||||
141: Cricket ball
|
||||
142: Crocodile
|
||||
143: Croissant
|
||||
144: Crown
|
||||
145: Crutch
|
||||
146: Cucumber
|
||||
147: Cupboard
|
||||
148: Curtain
|
||||
149: Cutting board
|
||||
150: Dagger
|
||||
151: Dairy Product
|
||||
152: Deer
|
||||
153: Desk
|
||||
154: Dessert
|
||||
155: Diaper
|
||||
156: Dice
|
||||
157: Digital clock
|
||||
158: Dinosaur
|
||||
159: Dishwasher
|
||||
160: Dog
|
||||
161: Dog bed
|
||||
162: Doll
|
||||
163: Dolphin
|
||||
164: Door
|
||||
165: Door handle
|
||||
166: Doughnut
|
||||
167: Dragonfly
|
||||
168: Drawer
|
||||
169: Dress
|
||||
170: Drill (Tool)
|
||||
171: Drink
|
||||
172: Drinking straw
|
||||
173: Drum
|
||||
174: Duck
|
||||
175: Dumbbell
|
||||
176: Eagle
|
||||
177: Earrings
|
||||
178: Egg (Food)
|
||||
179: Elephant
|
||||
180: Envelope
|
||||
181: Eraser
|
||||
182: Face powder
|
||||
183: Facial tissue holder
|
||||
184: Falcon
|
||||
185: Fashion accessory
|
||||
186: Fast food
|
||||
187: Fax
|
||||
188: Fedora
|
||||
189: Filing cabinet
|
||||
190: Fire hydrant
|
||||
191: Fireplace
|
||||
192: Fish
|
||||
193: Flag
|
||||
194: Flashlight
|
||||
195: Flower
|
||||
196: Flowerpot
|
||||
197: Flute
|
||||
198: Flying disc
|
||||
199: Food
|
||||
200: Food processor
|
||||
201: Football
|
||||
202: Football helmet
|
||||
203: Footwear
|
||||
204: Fork
|
||||
205: Fountain
|
||||
206: Fox
|
||||
207: French fries
|
||||
208: French horn
|
||||
209: Frog
|
||||
210: Fruit
|
||||
211: Frying pan
|
||||
212: Furniture
|
||||
213: Garden Asparagus
|
||||
214: Gas stove
|
||||
215: Giraffe
|
||||
216: Girl
|
||||
217: Glasses
|
||||
218: Glove
|
||||
219: Goat
|
||||
220: Goggles
|
||||
221: Goldfish
|
||||
222: Golf ball
|
||||
223: Golf cart
|
||||
224: Gondola
|
||||
225: Goose
|
||||
226: Grape
|
||||
227: Grapefruit
|
||||
228: Grinder
|
||||
229: Guacamole
|
||||
230: Guitar
|
||||
231: Hair dryer
|
||||
232: Hair spray
|
||||
233: Hamburger
|
||||
234: Hammer
|
||||
235: Hamster
|
||||
236: Hand dryer
|
||||
237: Handbag
|
||||
238: Handgun
|
||||
239: Harbor seal
|
||||
240: Harmonica
|
||||
241: Harp
|
||||
242: Harpsichord
|
||||
243: Hat
|
||||
244: Headphones
|
||||
245: Heater
|
||||
246: Hedgehog
|
||||
247: Helicopter
|
||||
248: Helmet
|
||||
249: High heels
|
||||
250: Hiking equipment
|
||||
251: Hippopotamus
|
||||
252: Home appliance
|
||||
253: Honeycomb
|
||||
254: Horizontal bar
|
||||
255: Horse
|
||||
256: Hot dog
|
||||
257: House
|
||||
258: Houseplant
|
||||
259: Human arm
|
||||
260: Human beard
|
||||
261: Human body
|
||||
262: Human ear
|
||||
263: Human eye
|
||||
264: Human face
|
||||
265: Human foot
|
||||
266: Human hair
|
||||
267: Human hand
|
||||
268: Human head
|
||||
269: Human leg
|
||||
270: Human mouth
|
||||
271: Human nose
|
||||
272: Humidifier
|
||||
273: Ice cream
|
||||
274: Indoor rower
|
||||
275: Infant bed
|
||||
276: Insect
|
||||
277: Invertebrate
|
||||
278: Ipod
|
||||
279: Isopod
|
||||
280: Jacket
|
||||
281: Jacuzzi
|
||||
282: Jaguar (Animal)
|
||||
283: Jeans
|
||||
284: Jellyfish
|
||||
285: Jet ski
|
||||
286: Jug
|
||||
287: Juice
|
||||
288: Kangaroo
|
||||
289: Kettle
|
||||
290: Kitchen & dining room table
|
||||
291: Kitchen appliance
|
||||
292: Kitchen knife
|
||||
293: Kitchen utensil
|
||||
294: Kitchenware
|
||||
295: Kite
|
||||
296: Knife
|
||||
297: Koala
|
||||
298: Ladder
|
||||
299: Ladle
|
||||
300: Ladybug
|
||||
301: Lamp
|
||||
302: Land vehicle
|
||||
303: Lantern
|
||||
304: Laptop
|
||||
305: Lavender (Plant)
|
||||
306: Lemon
|
||||
307: Leopard
|
||||
308: Light bulb
|
||||
309: Light switch
|
||||
310: Lighthouse
|
||||
311: Lily
|
||||
312: Limousine
|
||||
313: Lion
|
||||
314: Lipstick
|
||||
315: Lizard
|
||||
316: Lobster
|
||||
317: Loveseat
|
||||
318: Luggage and bags
|
||||
319: Lynx
|
||||
320: Magpie
|
||||
321: Mammal
|
||||
322: Man
|
||||
323: Mango
|
||||
324: Maple
|
||||
325: Maracas
|
||||
326: Marine invertebrates
|
||||
327: Marine mammal
|
||||
328: Measuring cup
|
||||
329: Mechanical fan
|
||||
330: Medical equipment
|
||||
331: Microphone
|
||||
332: Microwave oven
|
||||
333: Milk
|
||||
334: Miniskirt
|
||||
335: Mirror
|
||||
336: Missile
|
||||
337: Mixer
|
||||
338: Mixing bowl
|
||||
339: Mobile phone
|
||||
340: Monkey
|
||||
341: Moths and butterflies
|
||||
342: Motorcycle
|
||||
343: Mouse
|
||||
344: Muffin
|
||||
345: Mug
|
||||
346: Mule
|
||||
347: Mushroom
|
||||
348: Musical instrument
|
||||
349: Musical keyboard
|
||||
350: Nail (Construction)
|
||||
351: Necklace
|
||||
352: Nightstand
|
||||
353: Oboe
|
||||
354: Office building
|
||||
355: Office supplies
|
||||
356: Orange
|
||||
357: Organ (Musical Instrument)
|
||||
358: Ostrich
|
||||
359: Otter
|
||||
360: Oven
|
||||
361: Owl
|
||||
362: Oyster
|
||||
363: Paddle
|
||||
364: Palm tree
|
||||
365: Pancake
|
||||
366: Panda
|
||||
367: Paper cutter
|
||||
368: Paper towel
|
||||
369: Parachute
|
||||
370: Parking meter
|
||||
371: Parrot
|
||||
372: Pasta
|
||||
373: Pastry
|
||||
374: Peach
|
||||
375: Pear
|
||||
376: Pen
|
||||
377: Pencil case
|
||||
378: Pencil sharpener
|
||||
379: Penguin
|
||||
380: Perfume
|
||||
381: Person
|
||||
382: Personal care
|
||||
383: Personal flotation device
|
||||
384: Piano
|
||||
385: Picnic basket
|
||||
386: Picture frame
|
||||
387: Pig
|
||||
388: Pillow
|
||||
389: Pineapple
|
||||
390: Pitcher (Container)
|
||||
391: Pizza
|
||||
392: Pizza cutter
|
||||
393: Plant
|
||||
394: Plastic bag
|
||||
395: Plate
|
||||
396: Platter
|
||||
397: Plumbing fixture
|
||||
398: Polar bear
|
||||
399: Pomegranate
|
||||
400: Popcorn
|
||||
401: Porch
|
||||
402: Porcupine
|
||||
403: Poster
|
||||
404: Potato
|
||||
405: Power plugs and sockets
|
||||
406: Pressure cooker
|
||||
407: Pretzel
|
||||
408: Printer
|
||||
409: Pumpkin
|
||||
410: Punching bag
|
||||
411: Rabbit
|
||||
412: Raccoon
|
||||
413: Racket
|
||||
414: Radish
|
||||
415: Ratchet (Device)
|
||||
416: Raven
|
||||
417: Rays and skates
|
||||
418: Red panda
|
||||
419: Refrigerator
|
||||
420: Remote control
|
||||
421: Reptile
|
||||
422: Rhinoceros
|
||||
423: Rifle
|
||||
424: Ring binder
|
||||
425: Rocket
|
||||
426: Roller skates
|
||||
427: Rose
|
||||
428: Rugby ball
|
||||
429: Ruler
|
||||
430: Salad
|
||||
431: Salt and pepper shakers
|
||||
432: Sandal
|
||||
433: Sandwich
|
||||
434: Saucer
|
||||
435: Saxophone
|
||||
436: Scale
|
||||
437: Scarf
|
||||
438: Scissors
|
||||
439: Scoreboard
|
||||
440: Scorpion
|
||||
441: Screwdriver
|
||||
442: Sculpture
|
||||
443: Sea lion
|
||||
444: Sea turtle
|
||||
445: Seafood
|
||||
446: Seahorse
|
||||
447: Seat belt
|
||||
448: Segway
|
||||
449: Serving tray
|
||||
450: Sewing machine
|
||||
451: Shark
|
||||
452: Sheep
|
||||
453: Shelf
|
||||
454: Shellfish
|
||||
455: Shirt
|
||||
456: Shorts
|
||||
457: Shotgun
|
||||
458: Shower
|
||||
459: Shrimp
|
||||
460: Sink
|
||||
461: Skateboard
|
||||
462: Ski
|
||||
463: Skirt
|
||||
464: Skull
|
||||
465: Skunk
|
||||
466: Skyscraper
|
||||
467: Slow cooker
|
||||
468: Snack
|
||||
469: Snail
|
||||
470: Snake
|
||||
471: Snowboard
|
||||
472: Snowman
|
||||
473: Snowmobile
|
||||
474: Snowplow
|
||||
475: Soap dispenser
|
||||
476: Sock
|
||||
477: Sofa bed
|
||||
478: Sombrero
|
||||
479: Sparrow
|
||||
480: Spatula
|
||||
481: Spice rack
|
||||
482: Spider
|
||||
483: Spoon
|
||||
484: Sports equipment
|
||||
485: Sports uniform
|
||||
486: Squash (Plant)
|
||||
487: Squid
|
||||
488: Squirrel
|
||||
489: Stairs
|
||||
490: Stapler
|
||||
491: Starfish
|
||||
492: Stationary bicycle
|
||||
493: Stethoscope
|
||||
494: Stool
|
||||
495: Stop sign
|
||||
496: Strawberry
|
||||
497: Street light
|
||||
498: Stretcher
|
||||
499: Studio couch
|
||||
500: Submarine
|
||||
501: Submarine sandwich
|
||||
502: Suit
|
||||
503: Suitcase
|
||||
504: Sun hat
|
||||
505: Sunglasses
|
||||
506: Surfboard
|
||||
507: Sushi
|
||||
508: Swan
|
||||
509: Swim cap
|
||||
510: Swimming pool
|
||||
511: Swimwear
|
||||
512: Sword
|
||||
513: Syringe
|
||||
514: Table
|
||||
515: Table tennis racket
|
||||
516: Tablet computer
|
||||
517: Tableware
|
||||
518: Taco
|
||||
519: Tank
|
||||
520: Tap
|
||||
521: Tart
|
||||
522: Taxi
|
||||
523: Tea
|
||||
524: Teapot
|
||||
525: Teddy bear
|
||||
526: Telephone
|
||||
527: Television
|
||||
528: Tennis ball
|
||||
529: Tennis racket
|
||||
530: Tent
|
||||
531: Tiara
|
||||
532: Tick
|
||||
533: Tie
|
||||
534: Tiger
|
||||
535: Tin can
|
||||
536: Tire
|
||||
537: Toaster
|
||||
538: Toilet
|
||||
539: Toilet paper
|
||||
540: Tomato
|
||||
541: Tool
|
||||
542: Toothbrush
|
||||
543: Torch
|
||||
544: Tortoise
|
||||
545: Towel
|
||||
546: Tower
|
||||
547: Toy
|
||||
548: Traffic light
|
||||
549: Traffic sign
|
||||
550: Train
|
||||
551: Training bench
|
||||
552: Treadmill
|
||||
553: Tree
|
||||
554: Tree house
|
||||
555: Tripod
|
||||
556: Trombone
|
||||
557: Trousers
|
||||
558: Truck
|
||||
559: Trumpet
|
||||
560: Turkey
|
||||
561: Turtle
|
||||
562: Umbrella
|
||||
563: Unicycle
|
||||
564: Van
|
||||
565: Vase
|
||||
566: Vegetable
|
||||
567: Vehicle
|
||||
568: Vehicle registration plate
|
||||
569: Violin
|
||||
570: Volleyball (Ball)
|
||||
571: Waffle
|
||||
572: Waffle iron
|
||||
573: Wall clock
|
||||
574: Wardrobe
|
||||
575: Washing machine
|
||||
576: Waste container
|
||||
577: Watch
|
||||
578: Watercraft
|
||||
579: Watermelon
|
||||
580: Weapon
|
||||
581: Whale
|
||||
582: Wheel
|
||||
583: Wheelchair
|
||||
584: Whisk
|
||||
585: Whiteboard
|
||||
586: Willow
|
||||
587: Window
|
||||
588: Window blind
|
||||
589: Wine
|
||||
590: Wine glass
|
||||
591: Wine rack
|
||||
592: Winter melon
|
||||
593: Wok
|
||||
594: Woman
|
||||
595: Wood-burning stove
|
||||
596: Woodpecker
|
||||
597: Worm
|
||||
598: Wrench
|
||||
599: Zebra
|
||||
600: Zucchini
|
||||
|
||||
# Download script/URL (optional) ---------------------------------------------------------------------------------------
|
||||
download: |
|
||||
from ultralytics.utils import LOGGER, SETTINGS, Path, is_ubuntu, get_ubuntu_version
|
||||
from ultralytics.utils.checks import check_requirements, check_version
|
||||
|
||||
check_requirements('fiftyone')
|
||||
if is_ubuntu() and check_version(get_ubuntu_version(), '>=22.04'):
|
||||
# Ubuntu>=22.04 patch https://github.com/voxel51/fiftyone/issues/2961#issuecomment-1666519347
|
||||
check_requirements('fiftyone-db-ubuntu2204')
|
||||
|
||||
import fiftyone as fo
|
||||
import fiftyone.zoo as foz
|
||||
import warnings
|
||||
|
||||
name = 'open-images-v7'
|
||||
fraction = 1.0 # fraction of full dataset to use
|
||||
LOGGER.warning('WARNING ⚠️ Open Images V7 dataset requires at least **561 GB of free space. Starting download...')
|
||||
for split in 'train', 'validation': # 1743042 train, 41620 val images
|
||||
train = split == 'train'
|
||||
|
||||
# Load Open Images dataset
|
||||
dataset = foz.load_zoo_dataset(name,
|
||||
split=split,
|
||||
label_types=['detections'],
|
||||
dataset_dir=Path(SETTINGS['datasets_dir']) / 'fiftyone' / name,
|
||||
max_samples=round((1743042 if train else 41620) * fraction))
|
||||
|
||||
# Define classes
|
||||
if train:
|
||||
classes = dataset.default_classes # all classes
|
||||
# classes = dataset.distinct('ground_truth.detections.label') # only observed classes
|
||||
|
||||
# Export to YOLO format
|
||||
with warnings.catch_warnings():
|
||||
warnings.filterwarnings("ignore", category=UserWarning, module="fiftyone.utils.yolo")
|
||||
dataset.export(export_dir=str(Path(SETTINGS['datasets_dir']) / name),
|
||||
dataset_type=fo.types.YOLOv5Dataset,
|
||||
label_field='ground_truth',
|
||||
split='val' if split == 'validation' else split,
|
||||
classes=classes,
|
||||
overwrite=train)
|
@ -1,21 +0,0 @@
|
||||
# Ultralytics YOLO 🚀, AGPL-3.0 license
|
||||
# Package-seg dataset by Ultralytics
|
||||
# Documentation: https://docs.ultralytics.com/datasets/segment/package-seg/
|
||||
# Example usage: yolo train data=package-seg.yaml
|
||||
# parent
|
||||
# ├── ultralytics
|
||||
# └── datasets
|
||||
# └── package-seg ← downloads here (102 MB)
|
||||
|
||||
# Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
|
||||
path: ../datasets/package-seg # dataset root dir
|
||||
train: images/train # train images (relative to 'path') 1920 images
|
||||
val: images/val # val images (relative to 'path') 89 images
|
||||
test: test/images # test images (relative to 'path') 188 images
|
||||
|
||||
# Classes
|
||||
names:
|
||||
0: package
|
||||
|
||||
# Download script/URL (optional)
|
||||
download: https://ultralytics.com/assets/package-seg.zip
|
@ -1,24 +0,0 @@
|
||||
# Ultralytics YOLO 🚀, AGPL-3.0 license
|
||||
# Tiger Pose dataset by Ultralytics
|
||||
# Documentation: https://docs.ultralytics.com/datasets/pose/tiger-pose/
|
||||
# Example usage: yolo train data=tiger-pose.yaml
|
||||
# parent
|
||||
# ├── ultralytics
|
||||
# └── datasets
|
||||
# └── tiger-pose ← downloads here (75.3 MB)
|
||||
|
||||
# Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
|
||||
path: ../datasets/tiger-pose # dataset root dir
|
||||
train: train # train images (relative to 'path') 210 images
|
||||
val: val # val images (relative to 'path') 53 images
|
||||
|
||||
# Keypoints
|
||||
kpt_shape: [12, 2] # number of keypoints, number of dims (2 for x,y or 3 for x,y,visible)
|
||||
flip_idx: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]
|
||||
|
||||
# Classes
|
||||
names:
|
||||
0: tiger
|
||||
|
||||
# Download script/URL (optional)
|
||||
download: https://ultralytics.com/assets/tiger-pose.zip
|
@ -1,152 +0,0 @@
|
||||
# Ultralytics YOLO 🚀, AGPL-3.0 license
|
||||
# DIUx xView 2018 Challenge https://challenge.xviewdataset.org by U.S. National Geospatial-Intelligence Agency (NGA)
|
||||
# -------- DOWNLOAD DATA MANUALLY and jar xf val_images.zip to 'datasets/xView' before running train command! --------
|
||||
# Documentation: https://docs.ultralytics.com/datasets/detect/xview/
|
||||
# Example usage: yolo train data=xView.yaml
|
||||
# parent
|
||||
# ├── ultralytics
|
||||
# └── datasets
|
||||
# └── xView ← downloads here (20.7 GB)
|
||||
|
||||
# Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
|
||||
path: ../datasets/xView # dataset root dir
|
||||
train: images/autosplit_train.txt # train images (relative to 'path') 90% of 847 train images
|
||||
val: images/autosplit_val.txt # train images (relative to 'path') 10% of 847 train images
|
||||
|
||||
# Classes
|
||||
names:
|
||||
0: Fixed-wing Aircraft
|
||||
1: Small Aircraft
|
||||
2: Cargo Plane
|
||||
3: Helicopter
|
||||
4: Passenger Vehicle
|
||||
5: Small Car
|
||||
6: Bus
|
||||
7: Pickup Truck
|
||||
8: Utility Truck
|
||||
9: Truck
|
||||
10: Cargo Truck
|
||||
11: Truck w/Box
|
||||
12: Truck Tractor
|
||||
13: Trailer
|
||||
14: Truck w/Flatbed
|
||||
15: Truck w/Liquid
|
||||
16: Crane Truck
|
||||
17: Railway Vehicle
|
||||
18: Passenger Car
|
||||
19: Cargo Car
|
||||
20: Flat Car
|
||||
21: Tank car
|
||||
22: Locomotive
|
||||
23: Maritime Vessel
|
||||
24: Motorboat
|
||||
25: Sailboat
|
||||
26: Tugboat
|
||||
27: Barge
|
||||
28: Fishing Vessel
|
||||
29: Ferry
|
||||
30: Yacht
|
||||
31: Container Ship
|
||||
32: Oil Tanker
|
||||
33: Engineering Vehicle
|
||||
34: Tower crane
|
||||
35: Container Crane
|
||||
36: Reach Stacker
|
||||
37: Straddle Carrier
|
||||
38: Mobile Crane
|
||||
39: Dump Truck
|
||||
40: Haul Truck
|
||||
41: Scraper/Tractor
|
||||
42: Front loader/Bulldozer
|
||||
43: Excavator
|
||||
44: Cement Mixer
|
||||
45: Ground Grader
|
||||
46: Hut/Tent
|
||||
47: Shed
|
||||
48: Building
|
||||
49: Aircraft Hangar
|
||||
50: Damaged Building
|
||||
51: Facility
|
||||
52: Construction Site
|
||||
53: Vehicle Lot
|
||||
54: Helipad
|
||||
55: Storage Tank
|
||||
56: Shipping container lot
|
||||
57: Shipping Container
|
||||
58: Pylon
|
||||
59: Tower
|
||||
|
||||
# Download script/URL (optional) ---------------------------------------------------------------------------------------
|
||||
download: |
|
||||
import json
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
import numpy as np
|
||||
from PIL import Image
|
||||
from tqdm import tqdm
|
||||
|
||||
from ultralytics.data.utils import autosplit
|
||||
from ultralytics.utils.ops import xyxy2xywhn
|
||||
|
||||
|
||||
def convert_labels(fname=Path('xView/xView_train.geojson')):
|
||||
# Convert xView geoJSON labels to YOLO format
|
||||
path = fname.parent
|
||||
with open(fname) as f:
|
||||
print(f'Loading {fname}...')
|
||||
data = json.load(f)
|
||||
|
||||
# Make dirs
|
||||
labels = Path(path / 'labels' / 'train')
|
||||
os.system(f'rm -rf {labels}')
|
||||
labels.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# xView classes 11-94 to 0-59
|
||||
xview_class2index = [-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0, 1, 2, -1, 3, -1, 4, 5, 6, 7, 8, -1, 9, 10, 11,
|
||||
12, 13, 14, 15, -1, -1, 16, 17, 18, 19, 20, 21, 22, -1, 23, 24, 25, -1, 26, 27, -1, 28, -1,
|
||||
29, 30, 31, 32, 33, 34, 35, 36, 37, -1, 38, 39, 40, 41, 42, 43, 44, 45, -1, -1, -1, -1, 46,
|
||||
47, 48, 49, -1, 50, 51, -1, 52, -1, -1, -1, 53, 54, -1, 55, -1, -1, 56, -1, 57, -1, 58, 59]
|
||||
|
||||
shapes = {}
|
||||
for feature in tqdm(data['features'], desc=f'Converting {fname}'):
|
||||
p = feature['properties']
|
||||
if p['bounds_imcoords']:
|
||||
id = p['image_id']
|
||||
file = path / 'train_images' / id
|
||||
if file.exists(): # 1395.tif missing
|
||||
try:
|
||||
box = np.array([int(num) for num in p['bounds_imcoords'].split(",")])
|
||||
assert box.shape[0] == 4, f'incorrect box shape {box.shape[0]}'
|
||||
cls = p['type_id']
|
||||
cls = xview_class2index[int(cls)] # xView class to 0-60
|
||||
assert 59 >= cls >= 0, f'incorrect class index {cls}'
|
||||
|
||||
# Write YOLO label
|
||||
if id not in shapes:
|
||||
shapes[id] = Image.open(file).size
|
||||
box = xyxy2xywhn(box[None].astype(np.float), w=shapes[id][0], h=shapes[id][1], clip=True)
|
||||
with open((labels / id).with_suffix('.txt'), 'a') as f:
|
||||
f.write(f"{cls} {' '.join(f'{x:.6f}' for x in box[0])}\n") # write label.txt
|
||||
except Exception as e:
|
||||
print(f'WARNING: skipping one label for {file}: {e}')
|
||||
|
||||
|
||||
# Download manually from https://challenge.xviewdataset.org
|
||||
dir = Path(yaml['path']) # dataset root dir
|
||||
# urls = ['https://d307kc0mrhucc3.cloudfront.net/train_labels.zip', # train labels
|
||||
# 'https://d307kc0mrhucc3.cloudfront.net/train_images.zip', # 15G, 847 train images
|
||||
# 'https://d307kc0mrhucc3.cloudfront.net/val_images.zip'] # 5G, 282 val images (no labels)
|
||||
# download(urls, dir=dir)
|
||||
|
||||
# Convert labels
|
||||
convert_labels(dir / 'xView_train.geojson')
|
||||
|
||||
# Move images
|
||||
images = Path(dir / 'images')
|
||||
images.mkdir(parents=True, exist_ok=True)
|
||||
Path(dir / 'train_images').rename(dir / 'images' / 'train')
|
||||
Path(dir / 'val_images').rename(dir / 'images' / 'val')
|
||||
|
||||
# Split
|
||||
autosplit(dir / 'images' / 'train')
|
@ -1,127 +0,0 @@
|
||||
# Ultralytics YOLO 🚀, AGPL-3.0 license
|
||||
# Default training settings and hyperparameters for medium-augmentation COCO training
|
||||
|
||||
task: detect # (str) YOLO task, i.e. detect, segment, classify, pose
|
||||
mode: train # (str) YOLO mode, i.e. train, val, predict, export, track, benchmark
|
||||
|
||||
# Train settings -------------------------------------------------------------------------------------------------------
|
||||
model: # (str, optional) path to model file, i.e. yolov8n.pt, yolov8n.yaml
|
||||
data: # (str, optional) path to data file, i.e. coco128.yaml
|
||||
epochs: 100 # (int) number of epochs to train for
|
||||
time: # (float, optional) number of hours to train for, overrides epochs if supplied
|
||||
patience: 100 # (int) epochs to wait for no observable improvement for early stopping of training
|
||||
batch: 16 # (int) number of images per batch (-1 for AutoBatch)
|
||||
imgsz: 640 # (int | list) input images size as int for train and val modes, or list[w,h] for predict and export modes
|
||||
save: True # (bool) save train checkpoints and predict results
|
||||
save_period: -1 # (int) Save checkpoint every x epochs (disabled if < 1)
|
||||
val_period: 1 # (int) Validation every x epochs
|
||||
cache: False # (bool) True/ram, disk or False. Use cache for data loading
|
||||
device: # (int | str | list, optional) device to run on, i.e. cuda device=0 or device=0,1,2,3 or device=cpu
|
||||
workers: 8 # (int) number of worker threads for data loading (per RANK if DDP)
|
||||
project: # (str, optional) project name
|
||||
name: # (str, optional) experiment name, results saved to 'project/name' directory
|
||||
exist_ok: False # (bool) whether to overwrite existing experiment
|
||||
pretrained: True # (bool | str) whether to use a pretrained model (bool) or a model to load weights from (str)
|
||||
optimizer: auto # (str) optimizer to use, choices=[SGD, Adam, Adamax, AdamW, NAdam, RAdam, RMSProp, auto]
|
||||
verbose: True # (bool) whether to print verbose output
|
||||
seed: 0 # (int) random seed for reproducibility
|
||||
deterministic: True # (bool) whether to enable deterministic mode
|
||||
single_cls: False # (bool) train multi-class data as single-class
|
||||
rect: False # (bool) rectangular training if mode='train' or rectangular validation if mode='val'
|
||||
cos_lr: False # (bool) use cosine learning rate scheduler
|
||||
close_mosaic: 10 # (int) disable mosaic augmentation for final epochs (0 to disable)
|
||||
resume: False # (bool) resume training from last checkpoint
|
||||
amp: True # (bool) Automatic Mixed Precision (AMP) training, choices=[True, False], True runs AMP check
|
||||
fraction: 1.0 # (float) dataset fraction to train on (default is 1.0, all images in train set)
|
||||
profile: False # (bool) profile ONNX and TensorRT speeds during training for loggers
|
||||
freeze: None # (int | list, optional) freeze first n layers, or freeze list of layer indices during training
|
||||
multi_scale: False # (bool) Whether to use multiscale during training
|
||||
# Segmentation
|
||||
overlap_mask: True # (bool) masks should overlap during training (segment train only)
|
||||
mask_ratio: 4 # (int) mask downsample ratio (segment train only)
|
||||
# Classification
|
||||
dropout: 0.0 # (float) use dropout regularization (classify train only)
|
||||
|
||||
# Val/Test settings ----------------------------------------------------------------------------------------------------
|
||||
val: True # (bool) validate/test during training
|
||||
split: val # (str) dataset split to use for validation, i.e. 'val', 'test' or 'train'
|
||||
save_json: False # (bool) save results to JSON file
|
||||
save_hybrid: False # (bool) save hybrid version of labels (labels + additional predictions)
|
||||
conf: # (float, optional) object confidence threshold for detection (default 0.25 predict, 0.001 val)
|
||||
iou: 0.7 # (float) intersection over union (IoU) threshold for NMS
|
||||
max_det: 300 # (int) maximum number of detections per image
|
||||
half: False # (bool) use half precision (FP16)
|
||||
dnn: False # (bool) use OpenCV DNN for ONNX inference
|
||||
plots: True # (bool) save plots and images during train/val
|
||||
|
||||
# Predict settings -----------------------------------------------------------------------------------------------------
|
||||
source: # (str, optional) source directory for images or videos
|
||||
vid_stride: 1 # (int) video frame-rate stride
|
||||
stream_buffer: False # (bool) buffer all streaming frames (True) or return the most recent frame (False)
|
||||
visualize: False # (bool) visualize model features
|
||||
augment: False # (bool) apply image augmentation to prediction sources
|
||||
agnostic_nms: False # (bool) class-agnostic NMS
|
||||
classes: # (int | list[int], optional) filter results by class, i.e. classes=0, or classes=[0,2,3]
|
||||
retina_masks: False # (bool) use high-resolution segmentation masks
|
||||
embed: # (list[int], optional) return feature vectors/embeddings from given layers
|
||||
|
||||
# Visualize settings ---------------------------------------------------------------------------------------------------
|
||||
show: False # (bool) show predicted images and videos if environment allows
|
||||
save_frames: False # (bool) save predicted individual video frames
|
||||
save_txt: False # (bool) save results as .txt file
|
||||
save_conf: False # (bool) save results with confidence scores
|
||||
save_crop: False # (bool) save cropped images with results
|
||||
show_labels: True # (bool) show prediction labels, i.e. 'person'
|
||||
show_conf: True # (bool) show prediction confidence, i.e. '0.99'
|
||||
show_boxes: True # (bool) show prediction boxes
|
||||
line_width: # (int, optional) line width of the bounding boxes. Scaled to image size if None.
|
||||
|
||||
# Export settings ------------------------------------------------------------------------------------------------------
|
||||
format: torchscript # (str) format to export to, choices at https://docs.ultralytics.com/modes/export/#export-formats
|
||||
keras: False # (bool) use Kera=s
|
||||
optimize: False # (bool) TorchScript: optimize for mobile
|
||||
int8: False # (bool) CoreML/TF INT8 quantization
|
||||
dynamic: False # (bool) ONNX/TF/TensorRT: dynamic axes
|
||||
simplify: False # (bool) ONNX: simplify model using `onnxslim`
|
||||
opset: # (int, optional) ONNX: opset version
|
||||
workspace: 4 # (int) TensorRT: workspace size (GB)
|
||||
nms: False # (bool) CoreML: add NMS
|
||||
|
||||
# Hyperparameters ------------------------------------------------------------------------------------------------------
|
||||
lr0: 0.01 # (float) initial learning rate (i.e. SGD=1E-2, Adam=1E-3)
|
||||
lrf: 0.01 # (float) final learning rate (lr0 * lrf)
|
||||
momentum: 0.937 # (float) SGD momentum/Adam beta1
|
||||
weight_decay: 0.0005 # (float) optimizer weight decay 5e-4
|
||||
warmup_epochs: 3.0 # (float) warmup epochs (fractions ok)
|
||||
warmup_momentum: 0.8 # (float) warmup initial momentum
|
||||
warmup_bias_lr: 0.1 # (float) warmup initial bias lr
|
||||
box: 7.5 # (float) box loss gain
|
||||
cls: 0.5 # (float) cls loss gain (scale with pixels)
|
||||
dfl: 1.5 # (float) dfl loss gain
|
||||
pose: 12.0 # (float) pose loss gain
|
||||
kobj: 1.0 # (float) keypoint obj loss gain
|
||||
label_smoothing: 0.0 # (float) label smoothing (fraction)
|
||||
nbs: 64 # (int) nominal batch size
|
||||
hsv_h: 0.015 # (float) image HSV-Hue augmentation (fraction)
|
||||
hsv_s: 0.7 # (float) image HSV-Saturation augmentation (fraction)
|
||||
hsv_v: 0.4 # (float) image HSV-Value augmentation (fraction)
|
||||
degrees: 0.0 # (float) image rotation (+/- deg)
|
||||
translate: 0.1 # (float) image translation (+/- fraction)
|
||||
scale: 0.5 # (float) image scale (+/- gain)
|
||||
shear: 0.0 # (float) image shear (+/- deg)
|
||||
perspective: 0.0 # (float) image perspective (+/- fraction), range 0-0.001
|
||||
flipud: 0.0 # (float) image flip up-down (probability)
|
||||
fliplr: 0.5 # (float) image flip left-right (probability)
|
||||
bgr: 0.0 # (float) image channel BGR (probability)
|
||||
mosaic: 1.0 # (float) image mosaic (probability)
|
||||
mixup: 0.0 # (float) image mixup (probability)
|
||||
copy_paste: 0.0 # (float) segment copy-paste (probability)
|
||||
auto_augment: randaugment # (str) auto augmentation policy for classification (randaugment, autoaugment, augmix)
|
||||
erasing: 0.4 # (float) probability of random erasing during classification training (0-1)
|
||||
crop_fraction: 1.0 # (float) image crop fraction for classification evaluation/inference (0-1)
|
||||
|
||||
# Custom config.yaml ---------------------------------------------------------------------------------------------------
|
||||
cfg: # (str, optional) for overriding defaults.yaml
|
||||
|
||||
# Tracker settings ------------------------------------------------------------------------------------------------------
|
||||
tracker: botsort.yaml # (str) tracker type, choices=[botsort.yaml, bytetrack.yaml]
|
@ -1,40 +0,0 @@
|
||||
## Models
|
||||
|
||||
Welcome to the Ultralytics Models directory! Here you will find a wide variety of pre-configured model configuration files (`*.yaml`s) that can be used to create custom YOLO models. The models in this directory have been expertly crafted and fine-tuned by the Ultralytics team to provide the best performance for a wide range of object detection and image segmentation tasks.
|
||||
|
||||
These model configurations cover a wide range of scenarios, from simple object detection to more complex tasks like instance segmentation and object tracking. They are also designed to run efficiently on a variety of hardware platforms, from CPUs to GPUs. Whether you are a seasoned machine learning practitioner or just getting started with YOLO, this directory provides a great starting point for your custom model development needs.
|
||||
|
||||
To get started, simply browse through the models in this directory and find one that best suits your needs. Once you've selected a model, you can use the provided `*.yaml` file to train and deploy your custom YOLO model with ease. See full details at the Ultralytics [Docs](https://docs.ultralytics.com/models), and if you need help or have any questions, feel free to reach out to the Ultralytics team for support. So, don't wait, start creating your custom YOLO model now!
|
||||
|
||||
### Usage
|
||||
|
||||
Model `*.yaml` files may be used directly in the Command Line Interface (CLI) with a `yolo` command:
|
||||
|
||||
```bash
|
||||
yolo task=detect mode=train model=yolov8n.yaml data=coco128.yaml epochs=100
|
||||
```
|
||||
|
||||
They may also be used directly in a Python environment, and accepts the same [arguments](https://docs.ultralytics.com/usage/cfg/) as in the CLI example above:
|
||||
|
||||
```python
|
||||
from ultralytics import YOLO
|
||||
|
||||
model = YOLO("model.yaml") # build a YOLOv8n model from scratch
|
||||
# YOLO("model.pt") use pre-trained model if available
|
||||
model.info() # display model information
|
||||
model.train(data="coco128.yaml", epochs=100) # train the model
|
||||
```
|
||||
|
||||
## Pre-trained Model Architectures
|
||||
|
||||
Ultralytics supports many model architectures. Visit https://docs.ultralytics.com/models to view detailed information and usage. Any of these models can be used by loading their configs or pretrained checkpoints if available.
|
||||
|
||||
## Contribute New Models
|
||||
|
||||
Have you trained a new YOLO variant or achieved state-of-the-art performance with specific tuning? We'd love to showcase your work in our Models section! Contributions from the community in the form of new models, architectures, or optimizations are highly valued and can significantly enrich our repository.
|
||||
|
||||
By contributing to this section, you're helping us offer a wider array of model choices and configurations to the community. It's a fantastic way to share your knowledge and expertise while making the Ultralytics YOLO ecosystem even more versatile.
|
||||
|
||||
To get started, please consult our [Contributing Guide](https://docs.ultralytics.com/help/contributing) for step-by-step instructions on how to submit a Pull Request (PR) 🛠️. Your contributions are eagerly awaited!
|
||||
|
||||
Let's join hands to extend the range and capabilities of the Ultralytics YOLO models 🙏!
|
@ -1,50 +0,0 @@
|
||||
# Ultralytics YOLO 🚀, AGPL-3.0 license
|
||||
# RT-DETR-l object detection model with P3-P5 outputs. For details see https://docs.ultralytics.com/models/rtdetr
|
||||
|
||||
# Parameters
|
||||
nc: 80 # number of classes
|
||||
scales: # model compound scaling constants, i.e. 'model=yolov8n-cls.yaml' will call yolov8-cls.yaml with scale 'n'
|
||||
# [depth, width, max_channels]
|
||||
l: [1.00, 1.00, 1024]
|
||||
|
||||
backbone:
|
||||
# [from, repeats, module, args]
|
||||
- [-1, 1, HGStem, [32, 48]] # 0-P2/4
|
||||
- [-1, 6, HGBlock, [48, 128, 3]] # stage 1
|
||||
|
||||
- [-1, 1, DWConv, [128, 3, 2, 1, False]] # 2-P3/8
|
||||
- [-1, 6, HGBlock, [96, 512, 3]] # stage 2
|
||||
|
||||
- [-1, 1, DWConv, [512, 3, 2, 1, False]] # 4-P3/16
|
||||
- [-1, 6, HGBlock, [192, 1024, 5, True, False]] # cm, c2, k, light, shortcut
|
||||
- [-1, 6, HGBlock, [192, 1024, 5, True, True]]
|
||||
- [-1, 6, HGBlock, [192, 1024, 5, True, True]] # stage 3
|
||||
|
||||
- [-1, 1, DWConv, [1024, 3, 2, 1, False]] # 8-P4/32
|
||||
- [-1, 6, HGBlock, [384, 2048, 5, True, False]] # stage 4
|
||||
|
||||
head:
|
||||
- [-1, 1, Conv, [256, 1, 1, None, 1, 1, False]] # 10 input_proj.2
|
||||
- [-1, 1, AIFI, [1024, 8]]
|
||||
- [-1, 1, Conv, [256, 1, 1]] # 12, Y5, lateral_convs.0
|
||||
|
||||
- [-1, 1, nn.Upsample, [None, 2, "nearest"]]
|
||||
- [7, 1, Conv, [256, 1, 1, None, 1, 1, False]] # 14 input_proj.1
|
||||
- [[-2, -1], 1, Concat, [1]]
|
||||
- [-1, 3, RepC3, [256]] # 16, fpn_blocks.0
|
||||
- [-1, 1, Conv, [256, 1, 1]] # 17, Y4, lateral_convs.1
|
||||
|
||||
- [-1, 1, nn.Upsample, [None, 2, "nearest"]]
|
||||
- [3, 1, Conv, [256, 1, 1, None, 1, 1, False]] # 19 input_proj.0
|
||||
- [[-2, -1], 1, Concat, [1]] # cat backbone P4
|
||||
- [-1, 3, RepC3, [256]] # X3 (21), fpn_blocks.1
|
||||
|
||||
- [-1, 1, Conv, [256, 3, 2]] # 22, downsample_convs.0
|
||||
- [[-1, 17], 1, Concat, [1]] # cat Y4
|
||||
- [-1, 3, RepC3, [256]] # F4 (24), pan_blocks.0
|
||||
|
||||
- [-1, 1, Conv, [256, 3, 2]] # 25, downsample_convs.1
|
||||
- [[-1, 12], 1, Concat, [1]] # cat Y5
|
||||
- [-1, 3, RepC3, [256]] # F5 (27), pan_blocks.1
|
||||
|
||||
- [[21, 24, 27], 1, RTDETRDecoder, [nc]] # Detect(P3, P4, P5)
|
@ -1,42 +0,0 @@
|
||||
# Ultralytics YOLO 🚀, AGPL-3.0 license
|
||||
# RT-DETR-ResNet101 object detection model with P3-P5 outputs.
|
||||
|
||||
# Parameters
|
||||
nc: 80 # number of classes
|
||||
scales: # model compound scaling constants, i.e. 'model=yolov8n-cls.yaml' will call yolov8-cls.yaml with scale 'n'
|
||||
# [depth, width, max_channels]
|
||||
l: [1.00, 1.00, 1024]
|
||||
|
||||
backbone:
|
||||
# [from, repeats, module, args]
|
||||
- [-1, 1, ResNetLayer, [3, 64, 1, True, 1]] # 0
|
||||
- [-1, 1, ResNetLayer, [64, 64, 1, False, 3]] # 1
|
||||
- [-1, 1, ResNetLayer, [256, 128, 2, False, 4]] # 2
|
||||
- [-1, 1, ResNetLayer, [512, 256, 2, False, 23]] # 3
|
||||
- [-1, 1, ResNetLayer, [1024, 512, 2, False, 3]] # 4
|
||||
|
||||
head:
|
||||
- [-1, 1, Conv, [256, 1, 1, None, 1, 1, False]] # 5
|
||||
- [-1, 1, AIFI, [1024, 8]]
|
||||
- [-1, 1, Conv, [256, 1, 1]] # 7
|
||||
|
||||
- [-1, 1, nn.Upsample, [None, 2, "nearest"]]
|
||||
- [3, 1, Conv, [256, 1, 1, None, 1, 1, False]] # 9
|
||||
- [[-2, -1], 1, Concat, [1]]
|
||||
- [-1, 3, RepC3, [256]] # 11
|
||||
- [-1, 1, Conv, [256, 1, 1]] # 12
|
||||
|
||||
- [-1, 1, nn.Upsample, [None, 2, "nearest"]]
|
||||
- [2, 1, Conv, [256, 1, 1, None, 1, 1, False]] # 14
|
||||
- [[-2, -1], 1, Concat, [1]] # cat backbone P4
|
||||
- [-1, 3, RepC3, [256]] # X3 (16), fpn_blocks.1
|
||||
|
||||
- [-1, 1, Conv, [256, 3, 2]] # 17, downsample_convs.0
|
||||
- [[-1, 12], 1, Concat, [1]] # cat Y4
|
||||
- [-1, 3, RepC3, [256]] # F4 (19), pan_blocks.0
|
||||
|
||||
- [-1, 1, Conv, [256, 3, 2]] # 20, downsample_convs.1
|
||||
- [[-1, 7], 1, Concat, [1]] # cat Y5
|
||||
- [-1, 3, RepC3, [256]] # F5 (22), pan_blocks.1
|
||||
|
||||
- [[16, 19, 22], 1, RTDETRDecoder, [nc]] # Detect(P3, P4, P5)
|
@ -1,42 +0,0 @@
|
||||
# Ultralytics YOLO 🚀, AGPL-3.0 license
|
||||
# RT-DETR-ResNet50 object detection model with P3-P5 outputs.
|
||||
|
||||
# Parameters
|
||||
nc: 80 # number of classes
|
||||
scales: # model compound scaling constants, i.e. 'model=yolov8n-cls.yaml' will call yolov8-cls.yaml with scale 'n'
|
||||
# [depth, width, max_channels]
|
||||
l: [1.00, 1.00, 1024]
|
||||
|
||||
backbone:
|
||||
# [from, repeats, module, args]
|
||||
- [-1, 1, ResNetLayer, [3, 64, 1, True, 1]] # 0
|
||||
- [-1, 1, ResNetLayer, [64, 64, 1, False, 3]] # 1
|
||||
- [-1, 1, ResNetLayer, [256, 128, 2, False, 4]] # 2
|
||||
- [-1, 1, ResNetLayer, [512, 256, 2, False, 6]] # 3
|
||||
- [-1, 1, ResNetLayer, [1024, 512, 2, False, 3]] # 4
|
||||
|
||||
head:
|
||||
- [-1, 1, Conv, [256, 1, 1, None, 1, 1, False]] # 5
|
||||
- [-1, 1, AIFI, [1024, 8]]
|
||||
- [-1, 1, Conv, [256, 1, 1]] # 7
|
||||
|
||||
- [-1, 1, nn.Upsample, [None, 2, "nearest"]]
|
||||
- [3, 1, Conv, [256, 1, 1, None, 1, 1, False]] # 9
|
||||
- [[-2, -1], 1, Concat, [1]]
|
||||
- [-1, 3, RepC3, [256]] # 11
|
||||
- [-1, 1, Conv, [256, 1, 1]] # 12
|
||||
|
||||
- [-1, 1, nn.Upsample, [None, 2, "nearest"]]
|
||||
- [2, 1, Conv, [256, 1, 1, None, 1, 1, False]] # 14
|
||||
- [[-2, -1], 1, Concat, [1]] # cat backbone P4
|
||||
- [-1, 3, RepC3, [256]] # X3 (16), fpn_blocks.1
|
||||
|
||||
- [-1, 1, Conv, [256, 3, 2]] # 17, downsample_convs.0
|
||||
- [[-1, 12], 1, Concat, [1]] # cat Y4
|
||||
- [-1, 3, RepC3, [256]] # F4 (19), pan_blocks.0
|
||||
|
||||
- [-1, 1, Conv, [256, 3, 2]] # 20, downsample_convs.1
|
||||
- [[-1, 7], 1, Concat, [1]] # cat Y5
|
||||
- [-1, 3, RepC3, [256]] # F5 (22), pan_blocks.1
|
||||
|
||||
- [[16, 19, 22], 1, RTDETRDecoder, [nc]] # Detect(P3, P4, P5)
|
@ -1,54 +0,0 @@
|
||||
# Ultralytics YOLO 🚀, AGPL-3.0 license
|
||||
# RT-DETR-x object detection model with P3-P5 outputs. For details see https://docs.ultralytics.com/models/rtdetr
|
||||
|
||||
# Parameters
|
||||
nc: 80 # number of classes
|
||||
scales: # model compound scaling constants, i.e. 'model=yolov8n-cls.yaml' will call yolov8-cls.yaml with scale 'n'
|
||||
# [depth, width, max_channels]
|
||||
x: [1.00, 1.00, 2048]
|
||||
|
||||
backbone:
|
||||
# [from, repeats, module, args]
|
||||
- [-1, 1, HGStem, [32, 64]] # 0-P2/4
|
||||
- [-1, 6, HGBlock, [64, 128, 3]] # stage 1
|
||||
|
||||
- [-1, 1, DWConv, [128, 3, 2, 1, False]] # 2-P3/8
|
||||
- [-1, 6, HGBlock, [128, 512, 3]]
|
||||
- [-1, 6, HGBlock, [128, 512, 3, False, True]] # 4-stage 2
|
||||
|
||||
- [-1, 1, DWConv, [512, 3, 2, 1, False]] # 5-P3/16
|
||||
- [-1, 6, HGBlock, [256, 1024, 5, True, False]] # cm, c2, k, light, shortcut
|
||||
- [-1, 6, HGBlock, [256, 1024, 5, True, True]]
|
||||
- [-1, 6, HGBlock, [256, 1024, 5, True, True]]
|
||||
- [-1, 6, HGBlock, [256, 1024, 5, True, True]]
|
||||
- [-1, 6, HGBlock, [256, 1024, 5, True, True]] # 10-stage 3
|
||||
|
||||
- [-1, 1, DWConv, [1024, 3, 2, 1, False]] # 11-P4/32
|
||||
- [-1, 6, HGBlock, [512, 2048, 5, True, False]]
|
||||
- [-1, 6, HGBlock, [512, 2048, 5, True, True]] # 13-stage 4
|
||||
|
||||
head:
|
||||
- [-1, 1, Conv, [384, 1, 1, None, 1, 1, False]] # 14 input_proj.2
|
||||
- [-1, 1, AIFI, [2048, 8]]
|
||||
- [-1, 1, Conv, [384, 1, 1]] # 16, Y5, lateral_convs.0
|
||||
|
||||
- [-1, 1, nn.Upsample, [None, 2, "nearest"]]
|
||||
- [10, 1, Conv, [384, 1, 1, None, 1, 1, False]] # 18 input_proj.1
|
||||
- [[-2, -1], 1, Concat, [1]]
|
||||
- [-1, 3, RepC3, [384]] # 20, fpn_blocks.0
|
||||
- [-1, 1, Conv, [384, 1, 1]] # 21, Y4, lateral_convs.1
|
||||
|
||||
- [-1, 1, nn.Upsample, [None, 2, "nearest"]]
|
||||
- [4, 1, Conv, [384, 1, 1, None, 1, 1, False]] # 23 input_proj.0
|
||||
- [[-2, -1], 1, Concat, [1]] # cat backbone P4
|
||||
- [-1, 3, RepC3, [384]] # X3 (25), fpn_blocks.1
|
||||
|
||||
- [-1, 1, Conv, [384, 3, 2]] # 26, downsample_convs.0
|
||||
- [[-1, 21], 1, Concat, [1]] # cat Y4
|
||||
- [-1, 3, RepC3, [384]] # F4 (28), pan_blocks.0
|
||||
|
||||
- [-1, 1, Conv, [384, 3, 2]] # 29, downsample_convs.1
|
||||
- [[-1, 16], 1, Concat, [1]] # cat Y5
|
||||
- [-1, 3, RepC3, [384]] # F5 (31), pan_blocks.1
|
||||
|
||||
- [[25, 28, 31], 1, RTDETRDecoder, [nc]] # Detect(P3, P4, P5)
|
@ -1,40 +0,0 @@
|
||||
# Parameters
|
||||
nc: 80 # number of classes
|
||||
scales: # model compound scaling constants, i.e. 'model=yolov8n.yaml' will call yolov8.yaml with scale 'n'
|
||||
# [depth, width, max_channels]
|
||||
b: [0.67, 1.00, 512]
|
||||
|
||||
# YOLOv8.0n backbone
|
||||
backbone:
|
||||
# [from, repeats, module, args]
|
||||
- [-1, 1, Conv, [64, 3, 2]] # 0-P1/2
|
||||
- [-1, 1, Conv, [128, 3, 2]] # 1-P2/4
|
||||
- [-1, 3, C2f, [128, True]]
|
||||
- [-1, 1, Conv, [256, 3, 2]] # 3-P3/8
|
||||
- [-1, 6, C2f, [256, True]]
|
||||
- [-1, 1, SCDown, [512, 3, 2]] # 5-P4/16
|
||||
- [-1, 6, C2f, [512, True]]
|
||||
- [-1, 1, SCDown, [1024, 3, 2]] # 7-P5/32
|
||||
- [-1, 3, C2fCIB, [1024, True]]
|
||||
- [-1, 1, SPPF, [1024, 5]] # 9
|
||||
- [-1, 1, PSA, [1024]] # 10
|
||||
|
||||
# YOLOv8.0n head
|
||||
head:
|
||||
- [-1, 1, nn.Upsample, [None, 2, "nearest"]]
|
||||
- [[-1, 6], 1, Concat, [1]] # cat backbone P4
|
||||
- [-1, 3, C2fCIB, [512, True]] # 13
|
||||
|
||||
- [-1, 1, nn.Upsample, [None, 2, "nearest"]]
|
||||
- [[-1, 4], 1, Concat, [1]] # cat backbone P3
|
||||
- [-1, 3, C2f, [256]] # 16 (P3/8-small)
|
||||
|
||||
- [-1, 1, Conv, [256, 3, 2]]
|
||||
- [[-1, 13], 1, Concat, [1]] # cat head P4
|
||||
- [-1, 3, C2fCIB, [512, True]] # 19 (P4/16-medium)
|
||||
|
||||
- [-1, 1, SCDown, [512, 3, 2]]
|
||||
- [[-1, 10], 1, Concat, [1]] # cat head P5
|
||||
- [-1, 3, C2fCIB, [1024, True]] # 22 (P5/32-large)
|
||||
|
||||
- [[16, 19, 22], 1, v10Detect, [nc]] # Detect(P3, P4, P5)
|
@ -1,40 +0,0 @@
|
||||
# Parameters
|
||||
nc: 80 # number of classes
|
||||
scales: # model compound scaling constants, i.e. 'model=yolov8n.yaml' will call yolov8.yaml with scale 'n'
|
||||
# [depth, width, max_channels]
|
||||
l: [1.00, 1.00, 512] # YOLOv8l summary: 365 layers, 43691520 parameters, 43691504 gradients, 165.7 GFLOPs
|
||||
|
||||
# YOLOv8.0n backbone
|
||||
backbone:
|
||||
# [from, repeats, module, args]
|
||||
- [-1, 1, Conv, [64, 3, 2]] # 0-P1/2
|
||||
- [-1, 1, Conv, [128, 3, 2]] # 1-P2/4
|
||||
- [-1, 3, C2f, [128, True]]
|
||||
- [-1, 1, Conv, [256, 3, 2]] # 3-P3/8
|
||||
- [-1, 6, C2f, [256, True]]
|
||||
- [-1, 1, SCDown, [512, 3, 2]] # 5-P4/16
|
||||
- [-1, 6, C2f, [512, True]]
|
||||
- [-1, 1, SCDown, [1024, 3, 2]] # 7-P5/32
|
||||
- [-1, 3, C2fCIB, [1024, True]]
|
||||
- [-1, 1, SPPF, [1024, 5]] # 9
|
||||
- [-1, 1, PSA, [1024]] # 10
|
||||
|
||||
# YOLOv8.0n head
|
||||
head:
|
||||
- [-1, 1, nn.Upsample, [None, 2, "nearest"]]
|
||||
- [[-1, 6], 1, Concat, [1]] # cat backbone P4
|
||||
- [-1, 3, C2fCIB, [512, True]] # 13
|
||||
|
||||
- [-1, 1, nn.Upsample, [None, 2, "nearest"]]
|
||||
- [[-1, 4], 1, Concat, [1]] # cat backbone P3
|
||||
- [-1, 3, C2f, [256]] # 16 (P3/8-small)
|
||||
|
||||
- [-1, 1, Conv, [256, 3, 2]]
|
||||
- [[-1, 13], 1, Concat, [1]] # cat head P4
|
||||
- [-1, 3, C2fCIB, [512, True]] # 19 (P4/16-medium)
|
||||
|
||||
- [-1, 1, SCDown, [512, 3, 2]]
|
||||
- [[-1, 10], 1, Concat, [1]] # cat head P5
|
||||
- [-1, 3, C2fCIB, [1024, True]] # 22 (P5/32-large)
|
||||
|
||||
- [[16, 19, 22], 1, v10Detect, [nc]] # Detect(P3, P4, P5)
|
@ -1,43 +0,0 @@
|
||||
# Ultralytics YOLO 🚀, AGPL-3.0 license
|
||||
# YOLOv8 object detection model with P3-P5 outputs. For Usage examples see https://docs.ultralytics.com/tasks/detect
|
||||
|
||||
# Parameters
|
||||
nc: 80 # number of classes
|
||||
scales: # model compound scaling constants, i.e. 'model=yolov8n.yaml' will call yolov8.yaml with scale 'n'
|
||||
# [depth, width, max_channels]
|
||||
m: [0.67, 0.75, 768] # YOLOv8m summary: 295 layers, 25902640 parameters, 25902624 gradients, 79.3 GFLOPs
|
||||
|
||||
# YOLOv8.0n backbone
|
||||
backbone:
|
||||
# [from, repeats, module, args]
|
||||
- [-1, 1, Conv, [64, 3, 2]] # 0-P1/2
|
||||
- [-1, 1, Conv, [128, 3, 2]] # 1-P2/4
|
||||
- [-1, 3, C2f, [128, True]]
|
||||
- [-1, 1, Conv, [256, 3, 2]] # 3-P3/8
|
||||
- [-1, 6, C2f, [256, True]]
|
||||
- [-1, 1, SCDown, [512, 3, 2]] # 5-P4/16
|
||||
- [-1, 6, C2f, [512, True]]
|
||||
- [-1, 1, SCDown, [1024, 3, 2]] # 7-P5/32
|
||||
- [-1, 3, C2fCIB, [1024, True]]
|
||||
- [-1, 1, SPPF, [1024, 5]] # 9
|
||||
- [-1, 1, PSA, [1024]] # 10
|
||||
|
||||
# YOLOv8.0n head
|
||||
head:
|
||||
- [-1, 1, nn.Upsample, [None, 2, "nearest"]]
|
||||
- [[-1, 6], 1, Concat, [1]] # cat backbone P4
|
||||
- [-1, 3, C2f, [512]] # 13
|
||||
|
||||
- [-1, 1, nn.Upsample, [None, 2, "nearest"]]
|
||||
- [[-1, 4], 1, Concat, [1]] # cat backbone P3
|
||||
- [-1, 3, C2f, [256]] # 16 (P3/8-small)
|
||||
|
||||
- [-1, 1, Conv, [256, 3, 2]]
|
||||
- [[-1, 13], 1, Concat, [1]] # cat head P4
|
||||
- [-1, 3, C2fCIB, [512, True]] # 19 (P4/16-medium)
|
||||
|
||||
- [-1, 1, SCDown, [512, 3, 2]]
|
||||
- [[-1, 10], 1, Concat, [1]] # cat head P5
|
||||
- [-1, 3, C2fCIB, [1024, True]] # 22 (P5/32-large)
|
||||
|
||||
- [[16, 19, 22], 1, v10Detect, [nc]] # Detect(P3, P4, P5)
|
@ -1,40 +0,0 @@
|
||||
# Parameters
|
||||
nc: 80 # number of classes
|
||||
scales: # model compound scaling constants, i.e. 'model=yolov8n.yaml' will call yolov8.yaml with scale 'n'
|
||||
# [depth, width, max_channels]
|
||||
n: [0.33, 0.25, 1024]
|
||||
|
||||
# YOLOv8.0n backbone
|
||||
backbone:
|
||||
# [from, repeats, module, args]
|
||||
- [-1, 1, Conv, [64, 3, 2]] # 0-P1/2
|
||||
- [-1, 1, Conv, [128, 3, 2]] # 1-P2/4
|
||||
- [-1, 3, C2f, [128, True]]
|
||||
- [-1, 1, Conv, [256, 3, 2]] # 3-P3/8
|
||||
- [-1, 6, C2f, [256, True]]
|
||||
- [-1, 1, SCDown, [512, 3, 2]] # 5-P4/16
|
||||
- [-1, 6, C2f, [512, True]]
|
||||
- [-1, 1, SCDown, [1024, 3, 2]] # 7-P5/32
|
||||
- [-1, 3, C2f, [1024, True]]
|
||||
- [-1, 1, SPPF, [1024, 5]] # 9
|
||||
- [-1, 1, PSA, [1024]] # 10
|
||||
|
||||
# YOLOv8.0n head
|
||||
head:
|
||||
- [-1, 1, nn.Upsample, [None, 2, "nearest"]]
|
||||
- [[-1, 6], 1, Concat, [1]] # cat backbone P4
|
||||
- [-1, 3, C2f, [512]] # 13
|
||||
|
||||
- [-1, 1, nn.Upsample, [None, 2, "nearest"]]
|
||||
- [[-1, 4], 1, Concat, [1]] # cat backbone P3
|
||||
- [-1, 3, C2f, [256]] # 16 (P3/8-small)
|
||||
|
||||
- [-1, 1, Conv, [256, 3, 2]]
|
||||
- [[-1, 13], 1, Concat, [1]] # cat head P4
|
||||
- [-1, 3, C2f, [512]] # 19 (P4/16-medium)
|
||||
|
||||
- [-1, 1, SCDown, [512, 3, 2]]
|
||||
- [[-1, 10], 1, Concat, [1]] # cat head P5
|
||||
- [-1, 3, C2fCIB, [1024, True, True]] # 22 (P5/32-large)
|
||||
|
||||
- [[16, 19, 22], 1, v10Detect, [nc]] # Detect(P3, P4, P5)
|
@ -1,39 +0,0 @@
|
||||
# Parameters
|
||||
nc: 80 # number of classes
|
||||
scales: # model compound scaling constants, i.e. 'model=yolov8n.yaml' will call yolov8.yaml with scale 'n'
|
||||
# [depth, width, max_channels]
|
||||
s: [0.33, 0.50, 1024]
|
||||
|
||||
backbone:
|
||||
# [from, repeats, module, args]
|
||||
- [-1, 1, Conv, [64, 3, 2]] # 0-P1/2
|
||||
- [-1, 1, Conv, [128, 3, 2]] # 1-P2/4
|
||||
- [-1, 3, C2f, [128, True]]
|
||||
- [-1, 1, Conv, [256, 3, 2]] # 3-P3/8
|
||||
- [-1, 6, C2f, [256, True]]
|
||||
- [-1, 1, SCDown, [512, 3, 2]] # 5-P4/16
|
||||
- [-1, 6, C2f, [512, True]]
|
||||
- [-1, 1, SCDown, [1024, 3, 2]] # 7-P5/32
|
||||
- [-1, 3, C2fCIB, [1024, True, True]]
|
||||
- [-1, 1, SPPF, [1024, 5]] # 9
|
||||
- [-1, 1, PSA, [1024]] # 10
|
||||
|
||||
# YOLOv8.0n head
|
||||
head:
|
||||
- [-1, 1, nn.Upsample, [None, 2, "nearest"]]
|
||||
- [[-1, 6], 1, Concat, [1]] # cat backbone P4
|
||||
- [-1, 3, C2f, [512]] # 13
|
||||
|
||||
- [-1, 1, nn.Upsample, [None, 2, "nearest"]]
|
||||
- [[-1, 4], 1, Concat, [1]] # cat backbone P3
|
||||
- [-1, 3, C2f, [256]] # 16 (P3/8-small)
|
||||
|
||||
- [-1, 1, Conv, [256, 3, 2]]
|
||||
- [[-1, 13], 1, Concat, [1]] # cat head P4
|
||||
- [-1, 3, C2f, [512]] # 19 (P4/16-medium)
|
||||
|
||||
- [-1, 1, SCDown, [512, 3, 2]]
|
||||
- [[-1, 10], 1, Concat, [1]] # cat head P5
|
||||
- [-1, 3, C2fCIB, [1024, True, True]] # 22 (P5/32-large)
|
||||
|
||||
- [[16, 19, 22], 1, v10Detect, [nc]] # Detect(P3, P4, P5)
|
@ -1,40 +0,0 @@
|
||||
# Parameters
|
||||
nc: 80 # number of classes
|
||||
scales: # model compound scaling constants, i.e. 'model=yolov8n.yaml' will call yolov8.yaml with scale 'n'
|
||||
# [depth, width, max_channels]
|
||||
x: [1.00, 1.25, 512]
|
||||
|
||||
# YOLOv8.0n backbone
|
||||
backbone:
|
||||
# [from, repeats, module, args]
|
||||
- [-1, 1, Conv, [64, 3, 2]] # 0-P1/2
|
||||
- [-1, 1, Conv, [128, 3, 2]] # 1-P2/4
|
||||
- [-1, 3, C2f, [128, True]]
|
||||
- [-1, 1, Conv, [256, 3, 2]] # 3-P3/8
|
||||
- [-1, 6, C2f, [256, True]]
|
||||
- [-1, 1, SCDown, [512, 3, 2]] # 5-P4/16
|
||||
- [-1, 6, C2fCIB, [512, True]]
|
||||
- [-1, 1, SCDown, [1024, 3, 2]] # 7-P5/32
|
||||
- [-1, 3, C2fCIB, [1024, True]]
|
||||
- [-1, 1, SPPF, [1024, 5]] # 9
|
||||
- [-1, 1, PSA, [1024]] # 10
|
||||
|
||||
# YOLOv8.0n head
|
||||
head:
|
||||
- [-1, 1, nn.Upsample, [None, 2, "nearest"]]
|
||||
- [[-1, 6], 1, Concat, [1]] # cat backbone P4
|
||||
- [-1, 3, C2fCIB, [512, True]] # 13
|
||||
|
||||
- [-1, 1, nn.Upsample, [None, 2, "nearest"]]
|
||||
- [[-1, 4], 1, Concat, [1]] # cat backbone P3
|
||||
- [-1, 3, C2f, [256]] # 16 (P3/8-small)
|
||||
|
||||
- [-1, 1, Conv, [256, 3, 2]]
|
||||
- [[-1, 13], 1, Concat, [1]] # cat head P4
|
||||
- [-1, 3, C2fCIB, [512, True]] # 19 (P4/16-medium)
|
||||
|
||||
- [-1, 1, SCDown, [512, 3, 2]]
|
||||
- [[-1, 10], 1, Concat, [1]] # cat head P5
|
||||
- [-1, 3, C2fCIB, [1024, True]] # 22 (P5/32-large)
|
||||
|
||||
- [[16, 19, 22], 1, v10Detect, [nc]] # Detect(P3, P4, P5)
|
@ -1,46 +0,0 @@
|
||||
# Ultralytics YOLO 🚀, AGPL-3.0 license
|
||||
# YOLOv3-SPP object detection model with P3-P5 outputs. For details see https://docs.ultralytics.com/models/yolov3
|
||||
|
||||
# Parameters
|
||||
nc: 80 # number of classes
|
||||
depth_multiple: 1.0 # model depth multiple
|
||||
width_multiple: 1.0 # layer channel multiple
|
||||
|
||||
# darknet53 backbone
|
||||
backbone:
|
||||
# [from, number, module, args]
|
||||
- [-1, 1, Conv, [32, 3, 1]] # 0
|
||||
- [-1, 1, Conv, [64, 3, 2]] # 1-P1/2
|
||||
- [-1, 1, Bottleneck, [64]]
|
||||
- [-1, 1, Conv, [128, 3, 2]] # 3-P2/4
|
||||
- [-1, 2, Bottleneck, [128]]
|
||||
- [-1, 1, Conv, [256, 3, 2]] # 5-P3/8
|
||||
- [-1, 8, Bottleneck, [256]]
|
||||
- [-1, 1, Conv, [512, 3, 2]] # 7-P4/16
|
||||
- [-1, 8, Bottleneck, [512]]
|
||||
- [-1, 1, Conv, [1024, 3, 2]] # 9-P5/32
|
||||
- [-1, 4, Bottleneck, [1024]] # 10
|
||||
|
||||
# YOLOv3-SPP head
|
||||
head:
|
||||
- [-1, 1, Bottleneck, [1024, False]]
|
||||
- [-1, 1, SPP, [512, [5, 9, 13]]]
|
||||
- [-1, 1, Conv, [1024, 3, 1]]
|
||||
- [-1, 1, Conv, [512, 1, 1]]
|
||||
- [-1, 1, Conv, [1024, 3, 1]] # 15 (P5/32-large)
|
||||
|
||||
- [-2, 1, Conv, [256, 1, 1]]
|
||||
- [-1, 1, nn.Upsample, [None, 2, "nearest"]]
|
||||
- [[-1, 8], 1, Concat, [1]] # cat backbone P4
|
||||
- [-1, 1, Bottleneck, [512, False]]
|
||||
- [-1, 1, Bottleneck, [512, False]]
|
||||
- [-1, 1, Conv, [256, 1, 1]]
|
||||
- [-1, 1, Conv, [512, 3, 1]] # 22 (P4/16-medium)
|
||||
|
||||
- [-2, 1, Conv, [128, 1, 1]]
|
||||
- [-1, 1, nn.Upsample, [None, 2, "nearest"]]
|
||||
- [[-1, 6], 1, Concat, [1]] # cat backbone P3
|
||||
- [-1, 1, Bottleneck, [256, False]]
|
||||
- [-1, 2, Bottleneck, [256, False]] # 27 (P3/8-small)
|
||||
|
||||
- [[27, 22, 15], 1, Detect, [nc]] # Detect(P3, P4, P5)
|
@ -1,37 +0,0 @@
|
||||
# Ultralytics YOLO 🚀, AGPL-3.0 license
|
||||
# YOLOv3-tiny object detection model with P4-P5 outputs. For details see https://docs.ultralytics.com/models/yolov3
|
||||
|
||||
# Parameters
|
||||
nc: 80 # number of classes
|
||||
depth_multiple: 1.0 # model depth multiple
|
||||
width_multiple: 1.0 # layer channel multiple
|
||||
|
||||
# YOLOv3-tiny backbone
|
||||
backbone:
|
||||
# [from, number, module, args]
|
||||
- [-1, 1, Conv, [16, 3, 1]] # 0
|
||||
- [-1, 1, nn.MaxPool2d, [2, 2, 0]] # 1-P1/2
|
||||
- [-1, 1, Conv, [32, 3, 1]]
|
||||
- [-1, 1, nn.MaxPool2d, [2, 2, 0]] # 3-P2/4
|
||||
- [-1, 1, Conv, [64, 3, 1]]
|
||||
- [-1, 1, nn.MaxPool2d, [2, 2, 0]] # 5-P3/8
|
||||
- [-1, 1, Conv, [128, 3, 1]]
|
||||
- [-1, 1, nn.MaxPool2d, [2, 2, 0]] # 7-P4/16
|
||||
- [-1, 1, Conv, [256, 3, 1]]
|
||||
- [-1, 1, nn.MaxPool2d, [2, 2, 0]] # 9-P5/32
|
||||
- [-1, 1, Conv, [512, 3, 1]]
|
||||
- [-1, 1, nn.ZeroPad2d, [[0, 1, 0, 1]]] # 11
|
||||
- [-1, 1, nn.MaxPool2d, [2, 1, 0]] # 12
|
||||
|
||||
# YOLOv3-tiny head
|
||||
head:
|
||||
- [-1, 1, Conv, [1024, 3, 1]]
|
||||
- [-1, 1, Conv, [256, 1, 1]]
|
||||
- [-1, 1, Conv, [512, 3, 1]] # 15 (P5/32-large)
|
||||
|
||||
- [-2, 1, Conv, [128, 1, 1]]
|
||||
- [-1, 1, nn.Upsample, [None, 2, "nearest"]]
|
||||
- [[-1, 8], 1, Concat, [1]] # cat backbone P4
|
||||
- [-1, 1, Conv, [256, 3, 1]] # 19 (P4/16-medium)
|
||||
|
||||
- [[19, 15], 1, Detect, [nc]] # Detect(P4, P5)
|
@ -1,46 +0,0 @@
|
||||
# Ultralytics YOLO 🚀, AGPL-3.0 license
|
||||
# YOLOv3 object detection model with P3-P5 outputs. For details see https://docs.ultralytics.com/models/yolov3
|
||||
|
||||
# Parameters
|
||||
nc: 80 # number of classes
|
||||
depth_multiple: 1.0 # model depth multiple
|
||||
width_multiple: 1.0 # layer channel multiple
|
||||
|
||||
# darknet53 backbone
|
||||
backbone:
|
||||
# [from, number, module, args]
|
||||
- [-1, 1, Conv, [32, 3, 1]] # 0
|
||||
- [-1, 1, Conv, [64, 3, 2]] # 1-P1/2
|
||||
- [-1, 1, Bottleneck, [64]]
|
||||
- [-1, 1, Conv, [128, 3, 2]] # 3-P2/4
|
||||
- [-1, 2, Bottleneck, [128]]
|
||||
- [-1, 1, Conv, [256, 3, 2]] # 5-P3/8
|
||||
- [-1, 8, Bottleneck, [256]]
|
||||
- [-1, 1, Conv, [512, 3, 2]] # 7-P4/16
|
||||
- [-1, 8, Bottleneck, [512]]
|
||||
- [-1, 1, Conv, [1024, 3, 2]] # 9-P5/32
|
||||
- [-1, 4, Bottleneck, [1024]] # 10
|
||||
|
||||
# YOLOv3 head
|
||||
head:
|
||||
- [-1, 1, Bottleneck, [1024, False]]
|
||||
- [-1, 1, Conv, [512, 1, 1]]
|
||||
- [-1, 1, Conv, [1024, 3, 1]]
|
||||
- [-1, 1, Conv, [512, 1, 1]]
|
||||
- [-1, 1, Conv, [1024, 3, 1]] # 15 (P5/32-large)
|
||||
|
||||
- [-2, 1, Conv, [256, 1, 1]]
|
||||
- [-1, 1, nn.Upsample, [None, 2, "nearest"]]
|
||||
- [[-1, 8], 1, Concat, [1]] # cat backbone P4
|
||||
- [-1, 1, Bottleneck, [512, False]]
|
||||
- [-1, 1, Bottleneck, [512, False]]
|
||||
- [-1, 1, Conv, [256, 1, 1]]
|
||||
- [-1, 1, Conv, [512, 3, 1]] # 22 (P4/16-medium)
|
||||
|
||||
- [-2, 1, Conv, [128, 1, 1]]
|
||||
- [-1, 1, nn.Upsample, [None, 2, "nearest"]]
|
||||
- [[-1, 6], 1, Concat, [1]] # cat backbone P3
|
||||
- [-1, 1, Bottleneck, [256, False]]
|
||||
- [-1, 2, Bottleneck, [256, False]] # 27 (P3/8-small)
|
||||
|
||||
- [[27, 22, 15], 1, Detect, [nc]] # Detect(P3, P4, P5)
|
@ -1,59 +0,0 @@
|
||||
# Ultralytics YOLO 🚀, AGPL-3.0 license
|
||||
# YOLOv5 object detection model with P3-P6 outputs. For details see https://docs.ultralytics.com/models/yolov5
|
||||
|
||||
# Parameters
|
||||
nc: 80 # number of classes
|
||||
scales: # model compound scaling constants, i.e. 'model=yolov5n-p6.yaml' will call yolov5-p6.yaml with scale 'n'
|
||||
# [depth, width, max_channels]
|
||||
n: [0.33, 0.25, 1024]
|
||||
s: [0.33, 0.50, 1024]
|
||||
m: [0.67, 0.75, 1024]
|
||||
l: [1.00, 1.00, 1024]
|
||||
x: [1.33, 1.25, 1024]
|
||||
|
||||
# YOLOv5 v6.0 backbone
|
||||
backbone:
|
||||
# [from, number, module, args]
|
||||
- [-1, 1, Conv, [64, 6, 2, 2]] # 0-P1/2
|
||||
- [-1, 1, Conv, [128, 3, 2]] # 1-P2/4
|
||||
- [-1, 3, C3, [128]]
|
||||
- [-1, 1, Conv, [256, 3, 2]] # 3-P3/8
|
||||
- [-1, 6, C3, [256]]
|
||||
- [-1, 1, Conv, [512, 3, 2]] # 5-P4/16
|
||||
- [-1, 9, C3, [512]]
|
||||
- [-1, 1, Conv, [768, 3, 2]] # 7-P5/32
|
||||
- [-1, 3, C3, [768]]
|
||||
- [-1, 1, Conv, [1024, 3, 2]] # 9-P6/64
|
||||
- [-1, 3, C3, [1024]]
|
||||
- [-1, 1, SPPF, [1024, 5]] # 11
|
||||
|
||||
# YOLOv5 v6.0 head
|
||||
head:
|
||||
- [-1, 1, Conv, [768, 1, 1]]
|
||||
- [-1, 1, nn.Upsample, [None, 2, "nearest"]]
|
||||
- [[-1, 8], 1, Concat, [1]] # cat backbone P5
|
||||
- [-1, 3, C3, [768, False]] # 15
|
||||
|
||||
- [-1, 1, Conv, [512, 1, 1]]
|
||||
- [-1, 1, nn.Upsample, [None, 2, "nearest"]]
|
||||
- [[-1, 6], 1, Concat, [1]] # cat backbone P4
|
||||
- [-1, 3, C3, [512, False]] # 19
|
||||
|
||||
- [-1, 1, Conv, [256, 1, 1]]
|
||||
- [-1, 1, nn.Upsample, [None, 2, "nearest"]]
|
||||
- [[-1, 4], 1, Concat, [1]] # cat backbone P3
|
||||
- [-1, 3, C3, [256, False]] # 23 (P3/8-small)
|
||||
|
||||
- [-1, 1, Conv, [256, 3, 2]]
|
||||
- [[-1, 20], 1, Concat, [1]] # cat head P4
|
||||
- [-1, 3, C3, [512, False]] # 26 (P4/16-medium)
|
||||
|
||||
- [-1, 1, Conv, [512, 3, 2]]
|
||||
- [[-1, 16], 1, Concat, [1]] # cat head P5
|
||||
- [-1, 3, C3, [768, False]] # 29 (P5/32-large)
|
||||
|
||||
- [-1, 1, Conv, [768, 3, 2]]
|
||||
- [[-1, 12], 1, Concat, [1]] # cat head P6
|
||||
- [-1, 3, C3, [1024, False]] # 32 (P6/64-xlarge)
|
||||
|
||||
- [[23, 26, 29, 32], 1, Detect, [nc]] # Detect(P3, P4, P5, P6)
|
@ -1,48 +0,0 @@
|
||||
# Ultralytics YOLO 🚀, AGPL-3.0 license
|
||||
# YOLOv5 object detection model with P3-P5 outputs. For details see https://docs.ultralytics.com/models/yolov5
|
||||
|
||||
# Parameters
|
||||
nc: 80 # number of classes
|
||||
scales: # model compound scaling constants, i.e. 'model=yolov5n.yaml' will call yolov5.yaml with scale 'n'
|
||||
# [depth, width, max_channels]
|
||||
n: [0.33, 0.25, 1024]
|
||||
s: [0.33, 0.50, 1024]
|
||||
m: [0.67, 0.75, 1024]
|
||||
l: [1.00, 1.00, 1024]
|
||||
x: [1.33, 1.25, 1024]
|
||||
|
||||
# YOLOv5 v6.0 backbone
|
||||
backbone:
|
||||
# [from, number, module, args]
|
||||
- [-1, 1, Conv, [64, 6, 2, 2]] # 0-P1/2
|
||||
- [-1, 1, Conv, [128, 3, 2]] # 1-P2/4
|
||||
- [-1, 3, C3, [128]]
|
||||
- [-1, 1, Conv, [256, 3, 2]] # 3-P3/8
|
||||
- [-1, 6, C3, [256]]
|
||||
- [-1, 1, Conv, [512, 3, 2]] # 5-P4/16
|
||||
- [-1, 9, C3, [512]]
|
||||
- [-1, 1, Conv, [1024, 3, 2]] # 7-P5/32
|
||||
- [-1, 3, C3, [1024]]
|
||||
- [-1, 1, SPPF, [1024, 5]] # 9
|
||||
|
||||
# YOLOv5 v6.0 head
|
||||
head:
|
||||
- [-1, 1, Conv, [512, 1, 1]]
|
||||
- [-1, 1, nn.Upsample, [None, 2, "nearest"]]
|
||||
- [[-1, 6], 1, Concat, [1]] # cat backbone P4
|
||||
- [-1, 3, C3, [512, False]] # 13
|
||||
|
||||
- [-1, 1, Conv, [256, 1, 1]]
|
||||
- [-1, 1, nn.Upsample, [None, 2, "nearest"]]
|
||||
- [[-1, 4], 1, Concat, [1]] # cat backbone P3
|
||||
- [-1, 3, C3, [256, False]] # 17 (P3/8-small)
|
||||
|
||||
- [-1, 1, Conv, [256, 3, 2]]
|
||||
- [[-1, 14], 1, Concat, [1]] # cat head P4
|
||||
- [-1, 3, C3, [512, False]] # 20 (P4/16-medium)
|
||||
|
||||
- [-1, 1, Conv, [512, 3, 2]]
|
||||
- [[-1, 10], 1, Concat, [1]] # cat head P5
|
||||
- [-1, 3, C3, [1024, False]] # 23 (P5/32-large)
|
||||
|
||||
- [[17, 20, 23], 1, Detect, [nc]] # Detect(P3, P4, P5)
|
@ -1,53 +0,0 @@
|
||||
# Ultralytics YOLO 🚀, AGPL-3.0 license
|
||||
# YOLOv6 object detection model with P3-P5 outputs. For Usage examples see https://docs.ultralytics.com/models/yolov6
|
||||
|
||||
# Parameters
|
||||
nc: 80 # number of classes
|
||||
activation: nn.ReLU() # (optional) model default activation function
|
||||
scales: # model compound scaling constants, i.e. 'model=yolov6n.yaml' will call yolov8.yaml with scale 'n'
|
||||
# [depth, width, max_channels]
|
||||
n: [0.33, 0.25, 1024]
|
||||
s: [0.33, 0.50, 1024]
|
||||
m: [0.67, 0.75, 768]
|
||||
l: [1.00, 1.00, 512]
|
||||
x: [1.00, 1.25, 512]
|
||||
|
||||
# YOLOv6-3.0s backbone
|
||||
backbone:
|
||||
# [from, repeats, module, args]
|
||||
- [-1, 1, Conv, [64, 3, 2]] # 0-P1/2
|
||||
- [-1, 1, Conv, [128, 3, 2]] # 1-P2/4
|
||||
- [-1, 6, Conv, [128, 3, 1]]
|
||||
- [-1, 1, Conv, [256, 3, 2]] # 3-P3/8
|
||||
- [-1, 12, Conv, [256, 3, 1]]
|
||||
- [-1, 1, Conv, [512, 3, 2]] # 5-P4/16
|
||||
- [-1, 18, Conv, [512, 3, 1]]
|
||||
- [-1, 1, Conv, [1024, 3, 2]] # 7-P5/32
|
||||
- [-1, 6, Conv, [1024, 3, 1]]
|
||||
- [-1, 1, SPPF, [1024, 5]] # 9
|
||||
|
||||
# YOLOv6-3.0s head
|
||||
head:
|
||||
- [-1, 1, Conv, [256, 1, 1]]
|
||||
- [-1, 1, nn.ConvTranspose2d, [256, 2, 2, 0]]
|
||||
- [[-1, 6], 1, Concat, [1]] # cat backbone P4
|
||||
- [-1, 1, Conv, [256, 3, 1]]
|
||||
- [-1, 9, Conv, [256, 3, 1]] # 14
|
||||
|
||||
- [-1, 1, Conv, [128, 1, 1]]
|
||||
- [-1, 1, nn.ConvTranspose2d, [128, 2, 2, 0]]
|
||||
- [[-1, 4], 1, Concat, [1]] # cat backbone P3
|
||||
- [-1, 1, Conv, [128, 3, 1]]
|
||||
- [-1, 9, Conv, [128, 3, 1]] # 19
|
||||
|
||||
- [-1, 1, Conv, [128, 3, 2]]
|
||||
- [[-1, 15], 1, Concat, [1]] # cat head P4
|
||||
- [-1, 1, Conv, [256, 3, 1]]
|
||||
- [-1, 9, Conv, [256, 3, 1]] # 23
|
||||
|
||||
- [-1, 1, Conv, [256, 3, 2]]
|
||||
- [[-1, 10], 1, Concat, [1]] # cat head P5
|
||||
- [-1, 1, Conv, [512, 3, 1]]
|
||||
- [-1, 9, Conv, [512, 3, 1]] # 27
|
||||
|
||||
- [[19, 23, 27], 1, Detect, [nc]] # Detect(P3, P4, P5)
|
@ -1,25 +0,0 @@
|
||||
# Ultralytics YOLO 🚀, AGPL-3.0 license
|
||||
# YOLOv8-cls image classification model. For Usage examples see https://docs.ultralytics.com/tasks/classify
|
||||
|
||||
# Parameters
|
||||
nc: 1000 # number of classes
|
||||
scales: # model compound scaling constants, i.e. 'model=yolov8n-cls.yaml' will call yolov8-cls.yaml with scale 'n'
|
||||
# [depth, width, max_channels]
|
||||
n: [0.33, 0.25, 1024]
|
||||
s: [0.33, 0.50, 1024]
|
||||
m: [0.67, 0.75, 1024]
|
||||
l: [1.00, 1.00, 1024]
|
||||
x: [1.00, 1.25, 1024]
|
||||
|
||||
# YOLOv8.0n backbone
|
||||
backbone:
|
||||
# [from, repeats, module, args]
|
||||
- [-1, 1, ResNetLayer, [3, 64, 1, True, 1]] # 0-P1/2
|
||||
- [-1, 1, ResNetLayer, [64, 64, 1, False, 3]] # 1-P2/4
|
||||
- [-1, 1, ResNetLayer, [256, 128, 2, False, 4]] # 2-P3/8
|
||||
- [-1, 1, ResNetLayer, [512, 256, 2, False, 23]] # 3-P4/16
|
||||
- [-1, 1, ResNetLayer, [1024, 512, 2, False, 3]] # 4-P5/32
|
||||
|
||||
# YOLOv8.0n head
|
||||
head:
|
||||
- [-1, 1, Classify, [nc]] # Classify
|
@ -1,25 +0,0 @@
|
||||
# Ultralytics YOLO 🚀, AGPL-3.0 license
|
||||
# YOLOv8-cls image classification model. For Usage examples see https://docs.ultralytics.com/tasks/classify
|
||||
|
||||
# Parameters
|
||||
nc: 1000 # number of classes
|
||||
scales: # model compound scaling constants, i.e. 'model=yolov8n-cls.yaml' will call yolov8-cls.yaml with scale 'n'
|
||||
# [depth, width, max_channels]
|
||||
n: [0.33, 0.25, 1024]
|
||||
s: [0.33, 0.50, 1024]
|
||||
m: [0.67, 0.75, 1024]
|
||||
l: [1.00, 1.00, 1024]
|
||||
x: [1.00, 1.25, 1024]
|
||||
|
||||
# YOLOv8.0n backbone
|
||||
backbone:
|
||||
# [from, repeats, module, args]
|
||||
- [-1, 1, ResNetLayer, [3, 64, 1, True, 1]] # 0-P1/2
|
||||
- [-1, 1, ResNetLayer, [64, 64, 1, False, 3]] # 1-P2/4
|
||||
- [-1, 1, ResNetLayer, [256, 128, 2, False, 4]] # 2-P3/8
|
||||
- [-1, 1, ResNetLayer, [512, 256, 2, False, 6]] # 3-P4/16
|
||||
- [-1, 1, ResNetLayer, [1024, 512, 2, False, 3]] # 4-P5/32
|
||||
|
||||
# YOLOv8.0n head
|
||||
head:
|
||||
- [-1, 1, Classify, [nc]] # Classify
|
@ -1,29 +0,0 @@
|
||||
# Ultralytics YOLO 🚀, AGPL-3.0 license
|
||||
# YOLOv8-cls image classification model. For Usage examples see https://docs.ultralytics.com/tasks/classify
|
||||
|
||||
# Parameters
|
||||
nc: 1000 # number of classes
|
||||
scales: # model compound scaling constants, i.e. 'model=yolov8n-cls.yaml' will call yolov8-cls.yaml with scale 'n'
|
||||
# [depth, width, max_channels]
|
||||
n: [0.33, 0.25, 1024]
|
||||
s: [0.33, 0.50, 1024]
|
||||
m: [0.67, 0.75, 1024]
|
||||
l: [1.00, 1.00, 1024]
|
||||
x: [1.00, 1.25, 1024]
|
||||
|
||||
# YOLOv8.0n backbone
|
||||
backbone:
|
||||
# [from, repeats, module, args]
|
||||
- [-1, 1, Conv, [64, 3, 2]] # 0-P1/2
|
||||
- [-1, 1, Conv, [128, 3, 2]] # 1-P2/4
|
||||
- [-1, 3, C2f, [128, True]]
|
||||
- [-1, 1, Conv, [256, 3, 2]] # 3-P3/8
|
||||
- [-1, 6, C2f, [256, True]]
|
||||
- [-1, 1, Conv, [512, 3, 2]] # 5-P4/16
|
||||
- [-1, 6, C2f, [512, True]]
|
||||
- [-1, 1, Conv, [1024, 3, 2]] # 7-P5/32
|
||||
- [-1, 3, C2f, [1024, True]]
|
||||
|
||||
# YOLOv8.0n head
|
||||
head:
|
||||
- [-1, 1, Classify, [nc]] # Classify
|
@ -1,54 +0,0 @@
|
||||
# Ultralytics YOLO 🚀, AGPL-3.0 license
|
||||
# YOLOv8 object detection model with P2-P5 outputs. For Usage examples see https://docs.ultralytics.com/tasks/detect
|
||||
|
||||
# Parameters
|
||||
nc: 80 # number of classes
|
||||
scales: # model compound scaling constants, i.e. 'model=yolov8n.yaml' will call yolov8.yaml with scale 'n'
|
||||
# [depth, width, max_channels]
|
||||
n: [0.33, 0.25, 1024] # YOLOv8n-ghost-p2 summary: 491 layers, 2033944 parameters, 2033928 gradients, 13.8 GFLOPs
|
||||
s: [0.33, 0.50, 1024] # YOLOv8s-ghost-p2 summary: 491 layers, 5562080 parameters, 5562064 gradients, 25.1 GFLOPs
|
||||
m: [0.67, 0.75, 768] # YOLOv8m-ghost-p2 summary: 731 layers, 9031728 parameters, 9031712 gradients, 42.8 GFLOPs
|
||||
l: [1.00, 1.00, 512] # YOLOv8l-ghost-p2 summary: 971 layers, 12214448 parameters, 12214432 gradients, 69.1 GFLOPs
|
||||
x: [1.00, 1.25, 512] # YOLOv8x-ghost-p2 summary: 971 layers, 18664776 parameters, 18664760 gradients, 103.3 GFLOPs
|
||||
|
||||
# YOLOv8.0-ghost backbone
|
||||
backbone:
|
||||
# [from, repeats, module, args]
|
||||
- [-1, 1, Conv, [64, 3, 2]] # 0-P1/2
|
||||
- [-1, 1, GhostConv, [128, 3, 2]] # 1-P2/4
|
||||
- [-1, 3, C3Ghost, [128, True]]
|
||||
- [-1, 1, GhostConv, [256, 3, 2]] # 3-P3/8
|
||||
- [-1, 6, C3Ghost, [256, True]]
|
||||
- [-1, 1, GhostConv, [512, 3, 2]] # 5-P4/16
|
||||
- [-1, 6, C3Ghost, [512, True]]
|
||||
- [-1, 1, GhostConv, [1024, 3, 2]] # 7-P5/32
|
||||
- [-1, 3, C3Ghost, [1024, True]]
|
||||
- [-1, 1, SPPF, [1024, 5]] # 9
|
||||
|
||||
# YOLOv8.0-ghost-p2 head
|
||||
head:
|
||||
- [-1, 1, nn.Upsample, [None, 2, "nearest"]]
|
||||
- [[-1, 6], 1, Concat, [1]] # cat backbone P4
|
||||
- [-1, 3, C3Ghost, [512]] # 12
|
||||
|
||||
- [-1, 1, nn.Upsample, [None, 2, "nearest"]]
|
||||
- [[-1, 4], 1, Concat, [1]] # cat backbone P3
|
||||
- [-1, 3, C3Ghost, [256]] # 15 (P3/8-small)
|
||||
|
||||
- [-1, 1, nn.Upsample, [None, 2, "nearest"]]
|
||||
- [[-1, 2], 1, Concat, [1]] # cat backbone P2
|
||||
- [-1, 3, C3Ghost, [128]] # 18 (P2/4-xsmall)
|
||||
|
||||
- [-1, 1, GhostConv, [128, 3, 2]]
|
||||
- [[-1, 15], 1, Concat, [1]] # cat head P3
|
||||
- [-1, 3, C3Ghost, [256]] # 21 (P3/8-small)
|
||||
|
||||
- [-1, 1, GhostConv, [256, 3, 2]]
|
||||
- [[-1, 12], 1, Concat, [1]] # cat head P4
|
||||
- [-1, 3, C3Ghost, [512]] # 24 (P4/16-medium)
|
||||
|
||||
- [-1, 1, GhostConv, [512, 3, 2]]
|
||||
- [[-1, 9], 1, Concat, [1]] # cat head P5
|
||||
- [-1, 3, C3Ghost, [1024]] # 27 (P5/32-large)
|
||||
|
||||
- [[18, 21, 24, 27], 1, Detect, [nc]] # Detect(P2, P3, P4, P5)
|
@ -1,56 +0,0 @@
|
||||
# Ultralytics YOLO 🚀, AGPL-3.0 license
|
||||
# YOLOv8 object detection model with P3-P6 outputs. For Usage examples see https://docs.ultralytics.com/tasks/detect
|
||||
|
||||
# Parameters
|
||||
nc: 80 # number of classes
|
||||
scales: # model compound scaling constants, i.e. 'model=yolov8n-p6.yaml' will call yolov8-p6.yaml with scale 'n'
|
||||
# [depth, width, max_channels]
|
||||
n: [0.33, 0.25, 1024] # YOLOv8n-ghost-p6 summary: 529 layers, 2901100 parameters, 2901084 gradients, 5.8 GFLOPs
|
||||
s: [0.33, 0.50, 1024] # YOLOv8s-ghost-p6 summary: 529 layers, 9520008 parameters, 9519992 gradients, 16.4 GFLOPs
|
||||
m: [0.67, 0.75, 768] # YOLOv8m-ghost-p6 summary: 789 layers, 18002904 parameters, 18002888 gradients, 34.4 GFLOPs
|
||||
l: [1.00, 1.00, 512] # YOLOv8l-ghost-p6 summary: 1049 layers, 21227584 parameters, 21227568 gradients, 55.3 GFLOPs
|
||||
x: [1.00, 1.25, 512] # YOLOv8x-ghost-p6 summary: 1049 layers, 33057852 parameters, 33057836 gradients, 85.7 GFLOPs
|
||||
|
||||
# YOLOv8.0-ghost backbone
|
||||
backbone:
|
||||
# [from, repeats, module, args]
|
||||
- [-1, 1, Conv, [64, 3, 2]] # 0-P1/2
|
||||
- [-1, 1, GhostConv, [128, 3, 2]] # 1-P2/4
|
||||
- [-1, 3, C3Ghost, [128, True]]
|
||||
- [-1, 1, GhostConv, [256, 3, 2]] # 3-P3/8
|
||||
- [-1, 6, C3Ghost, [256, True]]
|
||||
- [-1, 1, GhostConv, [512, 3, 2]] # 5-P4/16
|
||||
- [-1, 6, C3Ghost, [512, True]]
|
||||
- [-1, 1, GhostConv, [768, 3, 2]] # 7-P5/32
|
||||
- [-1, 3, C3Ghost, [768, True]]
|
||||
- [-1, 1, GhostConv, [1024, 3, 2]] # 9-P6/64
|
||||
- [-1, 3, C3Ghost, [1024, True]]
|
||||
- [-1, 1, SPPF, [1024, 5]] # 11
|
||||
|
||||
# YOLOv8.0-ghost-p6 head
|
||||
head:
|
||||
- [-1, 1, nn.Upsample, [None, 2, "nearest"]]
|
||||
- [[-1, 8], 1, Concat, [1]] # cat backbone P5
|
||||
- [-1, 3, C3Ghost, [768]] # 14
|
||||
|
||||
- [-1, 1, nn.Upsample, [None, 2, "nearest"]]
|
||||
- [[-1, 6], 1, Concat, [1]] # cat backbone P4
|
||||
- [-1, 3, C3Ghost, [512]] # 17
|
||||
|
||||
- [-1, 1, nn.Upsample, [None, 2, "nearest"]]
|
||||
- [[-1, 4], 1, Concat, [1]] # cat backbone P3
|
||||
- [-1, 3, C3Ghost, [256]] # 20 (P3/8-small)
|
||||
|
||||
- [-1, 1, GhostConv, [256, 3, 2]]
|
||||
- [[-1, 17], 1, Concat, [1]] # cat head P4
|
||||
- [-1, 3, C3Ghost, [512]] # 23 (P4/16-medium)
|
||||
|
||||
- [-1, 1, GhostConv, [512, 3, 2]]
|
||||
- [[-1, 14], 1, Concat, [1]] # cat head P5
|
||||
- [-1, 3, C3Ghost, [768]] # 26 (P5/32-large)
|
||||
|
||||
- [-1, 1, GhostConv, [768, 3, 2]]
|
||||
- [[-1, 11], 1, Concat, [1]] # cat head P6
|
||||
- [-1, 3, C3Ghost, [1024]] # 29 (P6/64-xlarge)
|
||||
|
||||
- [[20, 23, 26, 29], 1, Detect, [nc]] # Detect(P3, P4, P5, P6)
|
@ -1,47 +0,0 @@
|
||||
# Ultralytics YOLO 🚀, AGPL-3.0 license
|
||||
# YOLOv8 object detection model with P3-P5 outputs. For Usage examples see https://docs.ultralytics.com/tasks/detect
|
||||
# Employs Ghost convolutions and modules proposed in Huawei's GhostNet in https://arxiv.org/abs/1911.11907v2
|
||||
|
||||
# Parameters
|
||||
nc: 80 # number of classes
|
||||
scales: # model compound scaling constants, i.e. 'model=yolov8n.yaml' will call yolov8.yaml with scale 'n'
|
||||
# [depth, width, max_channels]
|
||||
n: [0.33, 0.25, 1024] # YOLOv8n-ghost summary: 403 layers, 1865316 parameters, 1865300 gradients, 5.8 GFLOPs
|
||||
s: [0.33, 0.50, 1024] # YOLOv8s-ghost summary: 403 layers, 5960072 parameters, 5960056 gradients, 16.4 GFLOPs
|
||||
m: [0.67, 0.75, 768] # YOLOv8m-ghost summary: 603 layers, 10336312 parameters, 10336296 gradients, 32.7 GFLOPs
|
||||
l: [1.00, 1.00, 512] # YOLOv8l-ghost summary: 803 layers, 14277872 parameters, 14277856 gradients, 53.7 GFLOPs
|
||||
x: [1.00, 1.25, 512] # YOLOv8x-ghost summary: 803 layers, 22229308 parameters, 22229292 gradients, 83.3 GFLOPs
|
||||
|
||||
# YOLOv8.0n-ghost backbone
|
||||
backbone:
|
||||
# [from, repeats, module, args]
|
||||
- [-1, 1, Conv, [64, 3, 2]] # 0-P1/2
|
||||
- [-1, 1, GhostConv, [128, 3, 2]] # 1-P2/4
|
||||
- [-1, 3, C3Ghost, [128, True]]
|
||||
- [-1, 1, GhostConv, [256, 3, 2]] # 3-P3/8
|
||||
- [-1, 6, C3Ghost, [256, True]]
|
||||
- [-1, 1, GhostConv, [512, 3, 2]] # 5-P4/16
|
||||
- [-1, 6, C3Ghost, [512, True]]
|
||||
- [-1, 1, GhostConv, [1024, 3, 2]] # 7-P5/32
|
||||
- [-1, 3, C3Ghost, [1024, True]]
|
||||
- [-1, 1, SPPF, [1024, 5]] # 9
|
||||
|
||||
# YOLOv8.0n head
|
||||
head:
|
||||
- [-1, 1, nn.Upsample, [None, 2, "nearest"]]
|
||||
- [[-1, 6], 1, Concat, [1]] # cat backbone P4
|
||||
- [-1, 3, C3Ghost, [512]] # 12
|
||||
|
||||
- [-1, 1, nn.Upsample, [None, 2, "nearest"]]
|
||||
- [[-1, 4], 1, Concat, [1]] # cat backbone P3
|
||||
- [-1, 3, C3Ghost, [256]] # 15 (P3/8-small)
|
||||
|
||||
- [-1, 1, GhostConv, [256, 3, 2]]
|
||||
- [[-1, 12], 1, Concat, [1]] # cat head P4
|
||||
- [-1, 3, C3Ghost, [512]] # 18 (P4/16-medium)
|
||||
|
||||
- [-1, 1, GhostConv, [512, 3, 2]]
|
||||
- [[-1, 9], 1, Concat, [1]] # cat head P5
|
||||
- [-1, 3, C3Ghost, [1024]] # 21 (P5/32-large)
|
||||
|
||||
- [[15, 18, 21], 1, Detect, [nc]] # Detect(P3, P4, P5)
|
@ -1,46 +0,0 @@
|
||||
# Ultralytics YOLO 🚀, AGPL-3.0 license
|
||||
# YOLOv8 Oriented Bounding Boxes (OBB) model with P3-P5 outputs. For Usage examples see https://docs.ultralytics.com/tasks/detect
|
||||
|
||||
# Parameters
|
||||
nc: 80 # number of classes
|
||||
scales: # model compound scaling constants, i.e. 'model=yolov8n.yaml' will call yolov8.yaml with scale 'n'
|
||||
# [depth, width, max_channels]
|
||||
n: [0.33, 0.25, 1024] # YOLOv8n summary: 225 layers, 3157200 parameters, 3157184 gradients, 8.9 GFLOPs
|
||||
s: [0.33, 0.50, 1024] # YOLOv8s summary: 225 layers, 11166560 parameters, 11166544 gradients, 28.8 GFLOPs
|
||||
m: [0.67, 0.75, 768] # YOLOv8m summary: 295 layers, 25902640 parameters, 25902624 gradients, 79.3 GFLOPs
|
||||
l: [1.00, 1.00, 512] # YOLOv8l summary: 365 layers, 43691520 parameters, 43691504 gradients, 165.7 GFLOPs
|
||||
x: [1.00, 1.25, 512] # YOLOv8x summary: 365 layers, 68229648 parameters, 68229632 gradients, 258.5 GFLOPs
|
||||
|
||||
# YOLOv8.0n backbone
|
||||
backbone:
|
||||
# [from, repeats, module, args]
|
||||
- [-1, 1, Conv, [64, 3, 2]] # 0-P1/2
|
||||
- [-1, 1, Conv, [128, 3, 2]] # 1-P2/4
|
||||
- [-1, 3, C2f, [128, True]]
|
||||
- [-1, 1, Conv, [256, 3, 2]] # 3-P3/8
|
||||
- [-1, 6, C2f, [256, True]]
|
||||
- [-1, 1, Conv, [512, 3, 2]] # 5-P4/16
|
||||
- [-1, 6, C2f, [512, True]]
|
||||
- [-1, 1, Conv, [1024, 3, 2]] # 7-P5/32
|
||||
- [-1, 3, C2f, [1024, True]]
|
||||
- [-1, 1, SPPF, [1024, 5]] # 9
|
||||
|
||||
# YOLOv8.0n head
|
||||
head:
|
||||
- [-1, 1, nn.Upsample, [None, 2, "nearest"]]
|
||||
- [[-1, 6], 1, Concat, [1]] # cat backbone P4
|
||||
- [-1, 3, C2f, [512]] # 12
|
||||
|
||||
- [-1, 1, nn.Upsample, [None, 2, "nearest"]]
|
||||
- [[-1, 4], 1, Concat, [1]] # cat backbone P3
|
||||
- [-1, 3, C2f, [256]] # 15 (P3/8-small)
|
||||
|
||||
- [-1, 1, Conv, [256, 3, 2]]
|
||||
- [[-1, 12], 1, Concat, [1]] # cat head P4
|
||||
- [-1, 3, C2f, [512]] # 18 (P4/16-medium)
|
||||
|
||||
- [-1, 1, Conv, [512, 3, 2]]
|
||||
- [[-1, 9], 1, Concat, [1]] # cat head P5
|
||||
- [-1, 3, C2f, [1024]] # 21 (P5/32-large)
|
||||
|
||||
- [[15, 18, 21], 1, OBB, [nc, 1]] # OBB(P3, P4, P5)
|
@ -1,54 +0,0 @@
|
||||
# Ultralytics YOLO 🚀, AGPL-3.0 license
|
||||
# YOLOv8 object detection model with P2-P5 outputs. For Usage examples see https://docs.ultralytics.com/tasks/detect
|
||||
|
||||
# Parameters
|
||||
nc: 80 # number of classes
|
||||
scales: # model compound scaling constants, i.e. 'model=yolov8n.yaml' will call yolov8.yaml with scale 'n'
|
||||
# [depth, width, max_channels]
|
||||
n: [0.33, 0.25, 1024]
|
||||
s: [0.33, 0.50, 1024]
|
||||
m: [0.67, 0.75, 768]
|
||||
l: [1.00, 1.00, 512]
|
||||
x: [1.00, 1.25, 512]
|
||||
|
||||
# YOLOv8.0 backbone
|
||||
backbone:
|
||||
# [from, repeats, module, args]
|
||||
- [-1, 1, Conv, [64, 3, 2]] # 0-P1/2
|
||||
- [-1, 1, Conv, [128, 3, 2]] # 1-P2/4
|
||||
- [-1, 3, C2f, [128, True]]
|
||||
- [-1, 1, Conv, [256, 3, 2]] # 3-P3/8
|
||||
- [-1, 6, C2f, [256, True]]
|
||||
- [-1, 1, Conv, [512, 3, 2]] # 5-P4/16
|
||||
- [-1, 6, C2f, [512, True]]
|
||||
- [-1, 1, Conv, [1024, 3, 2]] # 7-P5/32
|
||||
- [-1, 3, C2f, [1024, True]]
|
||||
- [-1, 1, SPPF, [1024, 5]] # 9
|
||||
|
||||
# YOLOv8.0-p2 head
|
||||
head:
|
||||
- [-1, 1, nn.Upsample, [None, 2, "nearest"]]
|
||||
- [[-1, 6], 1, Concat, [1]] # cat backbone P4
|
||||
- [-1, 3, C2f, [512]] # 12
|
||||
|
||||
- [-1, 1, nn.Upsample, [None, 2, "nearest"]]
|
||||
- [[-1, 4], 1, Concat, [1]] # cat backbone P3
|
||||
- [-1, 3, C2f, [256]] # 15 (P3/8-small)
|
||||
|
||||
- [-1, 1, nn.Upsample, [None, 2, "nearest"]]
|
||||
- [[-1, 2], 1, Concat, [1]] # cat backbone P2
|
||||
- [-1, 3, C2f, [128]] # 18 (P2/4-xsmall)
|
||||
|
||||
- [-1, 1, Conv, [128, 3, 2]]
|
||||
- [[-1, 15], 1, Concat, [1]] # cat head P3
|
||||
- [-1, 3, C2f, [256]] # 21 (P3/8-small)
|
||||
|
||||
- [-1, 1, Conv, [256, 3, 2]]
|
||||
- [[-1, 12], 1, Concat, [1]] # cat head P4
|
||||
- [-1, 3, C2f, [512]] # 24 (P4/16-medium)
|
||||
|
||||
- [-1, 1, Conv, [512, 3, 2]]
|
||||
- [[-1, 9], 1, Concat, [1]] # cat head P5
|
||||
- [-1, 3, C2f, [1024]] # 27 (P5/32-large)
|
||||
|
||||
- [[18, 21, 24, 27], 1, Detect, [nc]] # Detect(P2, P3, P4, P5)
|
@ -1,56 +0,0 @@
|
||||
# Ultralytics YOLO 🚀, AGPL-3.0 license
|
||||
# YOLOv8 object detection model with P3-P6 outputs. For Usage examples see https://docs.ultralytics.com/tasks/detect
|
||||
|
||||
# Parameters
|
||||
nc: 80 # number of classes
|
||||
scales: # model compound scaling constants, i.e. 'model=yolov8n-p6.yaml' will call yolov8-p6.yaml with scale 'n'
|
||||
# [depth, width, max_channels]
|
||||
n: [0.33, 0.25, 1024]
|
||||
s: [0.33, 0.50, 1024]
|
||||
m: [0.67, 0.75, 768]
|
||||
l: [1.00, 1.00, 512]
|
||||
x: [1.00, 1.25, 512]
|
||||
|
||||
# YOLOv8.0x6 backbone
|
||||
backbone:
|
||||
# [from, repeats, module, args]
|
||||
- [-1, 1, Conv, [64, 3, 2]] # 0-P1/2
|
||||
- [-1, 1, Conv, [128, 3, 2]] # 1-P2/4
|
||||
- [-1, 3, C2f, [128, True]]
|
||||
- [-1, 1, Conv, [256, 3, 2]] # 3-P3/8
|
||||
- [-1, 6, C2f, [256, True]]
|
||||
- [-1, 1, Conv, [512, 3, 2]] # 5-P4/16
|
||||
- [-1, 6, C2f, [512, True]]
|
||||
- [-1, 1, Conv, [768, 3, 2]] # 7-P5/32
|
||||
- [-1, 3, C2f, [768, True]]
|
||||
- [-1, 1, Conv, [1024, 3, 2]] # 9-P6/64
|
||||
- [-1, 3, C2f, [1024, True]]
|
||||
- [-1, 1, SPPF, [1024, 5]] # 11
|
||||
|
||||
# YOLOv8.0x6 head
|
||||
head:
|
||||
- [-1, 1, nn.Upsample, [None, 2, "nearest"]]
|
||||
- [[-1, 8], 1, Concat, [1]] # cat backbone P5
|
||||
- [-1, 3, C2, [768, False]] # 14
|
||||
|
||||
- [-1, 1, nn.Upsample, [None, 2, "nearest"]]
|
||||
- [[-1, 6], 1, Concat, [1]] # cat backbone P4
|
||||
- [-1, 3, C2, [512, False]] # 17
|
||||
|
||||
- [-1, 1, nn.Upsample, [None, 2, "nearest"]]
|
||||
- [[-1, 4], 1, Concat, [1]] # cat backbone P3
|
||||
- [-1, 3, C2, [256, False]] # 20 (P3/8-small)
|
||||
|
||||
- [-1, 1, Conv, [256, 3, 2]]
|
||||
- [[-1, 17], 1, Concat, [1]] # cat head P4
|
||||
- [-1, 3, C2, [512, False]] # 23 (P4/16-medium)
|
||||
|
||||
- [-1, 1, Conv, [512, 3, 2]]
|
||||
- [[-1, 14], 1, Concat, [1]] # cat head P5
|
||||
- [-1, 3, C2, [768, False]] # 26 (P5/32-large)
|
||||
|
||||
- [-1, 1, Conv, [768, 3, 2]]
|
||||
- [[-1, 11], 1, Concat, [1]] # cat head P6
|
||||
- [-1, 3, C2, [1024, False]] # 29 (P6/64-xlarge)
|
||||
|
||||
- [[20, 23, 26, 29], 1, Detect, [nc]] # Detect(P3, P4, P5, P6)
|
@ -1,57 +0,0 @@
|
||||
# Ultralytics YOLO 🚀, AGPL-3.0 license
|
||||
# YOLOv8-pose-p6 keypoints/pose estimation model. For Usage examples see https://docs.ultralytics.com/tasks/pose
|
||||
|
||||
# Parameters
|
||||
nc: 1 # number of classes
|
||||
kpt_shape: [17, 3] # number of keypoints, number of dims (2 for x,y or 3 for x,y,visible)
|
||||
scales: # model compound scaling constants, i.e. 'model=yolov8n-p6.yaml' will call yolov8-p6.yaml with scale 'n'
|
||||
# [depth, width, max_channels]
|
||||
n: [0.33, 0.25, 1024]
|
||||
s: [0.33, 0.50, 1024]
|
||||
m: [0.67, 0.75, 768]
|
||||
l: [1.00, 1.00, 512]
|
||||
x: [1.00, 1.25, 512]
|
||||
|
||||
# YOLOv8.0x6 backbone
|
||||
backbone:
|
||||
# [from, repeats, module, args]
|
||||
- [-1, 1, Conv, [64, 3, 2]] # 0-P1/2
|
||||
- [-1, 1, Conv, [128, 3, 2]] # 1-P2/4
|
||||
- [-1, 3, C2f, [128, True]]
|
||||
- [-1, 1, Conv, [256, 3, 2]] # 3-P3/8
|
||||
- [-1, 6, C2f, [256, True]]
|
||||
- [-1, 1, Conv, [512, 3, 2]] # 5-P4/16
|
||||
- [-1, 6, C2f, [512, True]]
|
||||
- [-1, 1, Conv, [768, 3, 2]] # 7-P5/32
|
||||
- [-1, 3, C2f, [768, True]]
|
||||
- [-1, 1, Conv, [1024, 3, 2]] # 9-P6/64
|
||||
- [-1, 3, C2f, [1024, True]]
|
||||
- [-1, 1, SPPF, [1024, 5]] # 11
|
||||
|
||||
# YOLOv8.0x6 head
|
||||
head:
|
||||
- [-1, 1, nn.Upsample, [None, 2, "nearest"]]
|
||||
- [[-1, 8], 1, Concat, [1]] # cat backbone P5
|
||||
- [-1, 3, C2, [768, False]] # 14
|
||||
|
||||
- [-1, 1, nn.Upsample, [None, 2, "nearest"]]
|
||||
- [[-1, 6], 1, Concat, [1]] # cat backbone P4
|
||||
- [-1, 3, C2, [512, False]] # 17
|
||||
|
||||
- [-1, 1, nn.Upsample, [None, 2, "nearest"]]
|
||||
- [[-1, 4], 1, Concat, [1]] # cat backbone P3
|
||||
- [-1, 3, C2, [256, False]] # 20 (P3/8-small)
|
||||
|
||||
- [-1, 1, Conv, [256, 3, 2]]
|
||||
- [[-1, 17], 1, Concat, [1]] # cat head P4
|
||||
- [-1, 3, C2, [512, False]] # 23 (P4/16-medium)
|
||||
|
||||
- [-1, 1, Conv, [512, 3, 2]]
|
||||
- [[-1, 14], 1, Concat, [1]] # cat head P5
|
||||
- [-1, 3, C2, [768, False]] # 26 (P5/32-large)
|
||||
|
||||
- [-1, 1, Conv, [768, 3, 2]]
|
||||
- [[-1, 11], 1, Concat, [1]] # cat head P6
|
||||
- [-1, 3, C2, [1024, False]] # 29 (P6/64-xlarge)
|
||||
|
||||
- [[20, 23, 26, 29], 1, Pose, [nc, kpt_shape]] # Pose(P3, P4, P5, P6)
|
@ -1,47 +0,0 @@
|
||||
# Ultralytics YOLO 🚀, AGPL-3.0 license
|
||||
# YOLOv8-pose keypoints/pose estimation model. For Usage examples see https://docs.ultralytics.com/tasks/pose
|
||||
|
||||
# Parameters
|
||||
nc: 1 # number of classes
|
||||
kpt_shape: [17, 3] # number of keypoints, number of dims (2 for x,y or 3 for x,y,visible)
|
||||
scales: # model compound scaling constants, i.e. 'model=yolov8n-pose.yaml' will call yolov8-pose.yaml with scale 'n'
|
||||
# [depth, width, max_channels]
|
||||
n: [0.33, 0.25, 1024]
|
||||
s: [0.33, 0.50, 1024]
|
||||
m: [0.67, 0.75, 768]
|
||||
l: [1.00, 1.00, 512]
|
||||
x: [1.00, 1.25, 512]
|
||||
|
||||
# YOLOv8.0n backbone
|
||||
backbone:
|
||||
# [from, repeats, module, args]
|
||||
- [-1, 1, Conv, [64, 3, 2]] # 0-P1/2
|
||||
- [-1, 1, Conv, [128, 3, 2]] # 1-P2/4
|
||||
- [-1, 3, C2f, [128, True]]
|
||||
- [-1, 1, Conv, [256, 3, 2]] # 3-P3/8
|
||||
- [-1, 6, C2f, [256, True]]
|
||||
- [-1, 1, Conv, [512, 3, 2]] # 5-P4/16
|
||||
- [-1, 6, C2f, [512, True]]
|
||||
- [-1, 1, Conv, [1024, 3, 2]] # 7-P5/32
|
||||
- [-1, 3, C2f, [1024, True]]
|
||||
- [-1, 1, SPPF, [1024, 5]] # 9
|
||||
|
||||
# YOLOv8.0n head
|
||||
head:
|
||||
- [-1, 1, nn.Upsample, [None, 2, "nearest"]]
|
||||
- [[-1, 6], 1, Concat, [1]] # cat backbone P4
|
||||
- [-1, 3, C2f, [512]] # 12
|
||||
|
||||
- [-1, 1, nn.Upsample, [None, 2, "nearest"]]
|
||||
- [[-1, 4], 1, Concat, [1]] # cat backbone P3
|
||||
- [-1, 3, C2f, [256]] # 15 (P3/8-small)
|
||||
|
||||
- [-1, 1, Conv, [256, 3, 2]]
|
||||
- [[-1, 12], 1, Concat, [1]] # cat head P4
|
||||
- [-1, 3, C2f, [512]] # 18 (P4/16-medium)
|
||||
|
||||
- [-1, 1, Conv, [512, 3, 2]]
|
||||
- [[-1, 9], 1, Concat, [1]] # cat head P5
|
||||
- [-1, 3, C2f, [1024]] # 21 (P5/32-large)
|
||||
|
||||
- [[15, 18, 21], 1, Pose, [nc, kpt_shape]] # Pose(P3, P4, P5)
|
@ -1,46 +0,0 @@
|
||||
# Ultralytics YOLO 🚀, AGPL-3.0 license
|
||||
# YOLOv8 object detection model with P3-P5 outputs. For Usage examples see https://docs.ultralytics.com/tasks/detect
|
||||
|
||||
# Parameters
|
||||
nc: 80 # number of classes
|
||||
scales: # model compound scaling constants, i.e. 'model=yolov8n.yaml' will call yolov8.yaml with scale 'n'
|
||||
# [depth, width, max_channels]
|
||||
n: [0.33, 0.25, 1024] # YOLOv8n summary: 225 layers, 3157200 parameters, 3157184 gradients, 8.9 GFLOPs
|
||||
s: [0.33, 0.50, 1024] # YOLOv8s summary: 225 layers, 11166560 parameters, 11166544 gradients, 28.8 GFLOPs
|
||||
m: [0.67, 0.75, 768] # YOLOv8m summary: 295 layers, 25902640 parameters, 25902624 gradients, 79.3 GFLOPs
|
||||
l: [1.00, 1.00, 512] # YOLOv8l summary: 365 layers, 43691520 parameters, 43691504 gradients, 165.7 GFLOPs
|
||||
x: [1.00, 1.25, 512] # YOLOv8x summary: 365 layers, 68229648 parameters, 68229632 gradients, 258.5 GFLOPs
|
||||
|
||||
# YOLOv8.0n backbone
|
||||
backbone:
|
||||
# [from, repeats, module, args]
|
||||
- [-1, 1, Conv, [64, 3, 2]] # 0-P1/2
|
||||
- [-1, 1, Conv, [128, 3, 2]] # 1-P2/4
|
||||
- [-1, 3, C2f, [128, True]]
|
||||
- [-1, 1, Conv, [256, 3, 2]] # 3-P3/8
|
||||
- [-1, 6, C2f, [256, True]]
|
||||
- [-1, 1, Conv, [512, 3, 2]] # 5-P4/16
|
||||
- [-1, 6, C2f, [512, True]]
|
||||
- [-1, 1, Conv, [1024, 3, 2]] # 7-P5/32
|
||||
- [-1, 3, C2f, [1024, True]]
|
||||
- [-1, 1, SPPF, [1024, 5]] # 9
|
||||
|
||||
# YOLOv8.0n head
|
||||
head:
|
||||
- [-1, 1, nn.Upsample, [None, 2, "nearest"]]
|
||||
- [[-1, 6], 1, Concat, [1]] # cat backbone P4
|
||||
- [-1, 3, C2f, [512]] # 12
|
||||
|
||||
- [-1, 1, nn.Upsample, [None, 2, "nearest"]]
|
||||
- [[-1, 4], 1, Concat, [1]] # cat backbone P3
|
||||
- [-1, 3, C2f, [256]] # 15 (P3/8-small)
|
||||
|
||||
- [-1, 1, Conv, [256, 3, 2]]
|
||||
- [[-1, 12], 1, Concat, [1]] # cat head P4
|
||||
- [-1, 3, C2f, [512]] # 18 (P4/16-medium)
|
||||
|
||||
- [-1, 1, Conv, [512, 3, 2]]
|
||||
- [[-1, 9], 1, Concat, [1]] # cat head P5
|
||||
- [-1, 3, C2f, [1024]] # 21 (P5/32-large)
|
||||
|
||||
- [[15, 18, 21], 1, RTDETRDecoder, [nc]] # Detect(P3, P4, P5)
|
@ -1,56 +0,0 @@
|
||||
# Ultralytics YOLO 🚀, AGPL-3.0 license
|
||||
# YOLOv8-seg-p6 instance segmentation model. For Usage examples see https://docs.ultralytics.com/tasks/segment
|
||||
|
||||
# Parameters
|
||||
nc: 80 # number of classes
|
||||
scales: # model compound scaling constants, i.e. 'model=yolov8n-seg-p6.yaml' will call yolov8-seg-p6.yaml with scale 'n'
|
||||
# [depth, width, max_channels]
|
||||
n: [0.33, 0.25, 1024]
|
||||
s: [0.33, 0.50, 1024]
|
||||
m: [0.67, 0.75, 768]
|
||||
l: [1.00, 1.00, 512]
|
||||
x: [1.00, 1.25, 512]
|
||||
|
||||
# YOLOv8.0x6 backbone
|
||||
backbone:
|
||||
# [from, repeats, module, args]
|
||||
- [-1, 1, Conv, [64, 3, 2]] # 0-P1/2
|
||||
- [-1, 1, Conv, [128, 3, 2]] # 1-P2/4
|
||||
- [-1, 3, C2f, [128, True]]
|
||||
- [-1, 1, Conv, [256, 3, 2]] # 3-P3/8
|
||||
- [-1, 6, C2f, [256, True]]
|
||||
- [-1, 1, Conv, [512, 3, 2]] # 5-P4/16
|
||||
- [-1, 6, C2f, [512, True]]
|
||||
- [-1, 1, Conv, [768, 3, 2]] # 7-P5/32
|
||||
- [-1, 3, C2f, [768, True]]
|
||||
- [-1, 1, Conv, [1024, 3, 2]] # 9-P6/64
|
||||
- [-1, 3, C2f, [1024, True]]
|
||||
- [-1, 1, SPPF, [1024, 5]] # 11
|
||||
|
||||
# YOLOv8.0x6 head
|
||||
head:
|
||||
- [-1, 1, nn.Upsample, [None, 2, "nearest"]]
|
||||
- [[-1, 8], 1, Concat, [1]] # cat backbone P5
|
||||
- [-1, 3, C2, [768, False]] # 14
|
||||
|
||||
- [-1, 1, nn.Upsample, [None, 2, "nearest"]]
|
||||
- [[-1, 6], 1, Concat, [1]] # cat backbone P4
|
||||
- [-1, 3, C2, [512, False]] # 17
|
||||
|
||||
- [-1, 1, nn.Upsample, [None, 2, "nearest"]]
|
||||
- [[-1, 4], 1, Concat, [1]] # cat backbone P3
|
||||
- [-1, 3, C2, [256, False]] # 20 (P3/8-small)
|
||||
|
||||
- [-1, 1, Conv, [256, 3, 2]]
|
||||
- [[-1, 17], 1, Concat, [1]] # cat head P4
|
||||
- [-1, 3, C2, [512, False]] # 23 (P4/16-medium)
|
||||
|
||||
- [-1, 1, Conv, [512, 3, 2]]
|
||||
- [[-1, 14], 1, Concat, [1]] # cat head P5
|
||||
- [-1, 3, C2, [768, False]] # 26 (P5/32-large)
|
||||
|
||||
- [-1, 1, Conv, [768, 3, 2]]
|
||||
- [[-1, 11], 1, Concat, [1]] # cat head P6
|
||||
- [-1, 3, C2, [1024, False]] # 29 (P6/64-xlarge)
|
||||
|
||||
- [[20, 23, 26, 29], 1, Segment, [nc, 32, 256]] # Pose(P3, P4, P5, P6)
|
@ -1,46 +0,0 @@
|
||||
# Ultralytics YOLO 🚀, AGPL-3.0 license
|
||||
# YOLOv8-seg instance segmentation model. For Usage examples see https://docs.ultralytics.com/tasks/segment
|
||||
|
||||
# Parameters
|
||||
nc: 80 # number of classes
|
||||
scales: # model compound scaling constants, i.e. 'model=yolov8n-seg.yaml' will call yolov8-seg.yaml with scale 'n'
|
||||
# [depth, width, max_channels]
|
||||
n: [0.33, 0.25, 1024]
|
||||
s: [0.33, 0.50, 1024]
|
||||
m: [0.67, 0.75, 768]
|
||||
l: [1.00, 1.00, 512]
|
||||
x: [1.00, 1.25, 512]
|
||||
|
||||
# YOLOv8.0n backbone
|
||||
backbone:
|
||||
# [from, repeats, module, args]
|
||||
- [-1, 1, Conv, [64, 3, 2]] # 0-P1/2
|
||||
- [-1, 1, Conv, [128, 3, 2]] # 1-P2/4
|
||||
- [-1, 3, C2f, [128, True]]
|
||||
- [-1, 1, Conv, [256, 3, 2]] # 3-P3/8
|
||||
- [-1, 6, C2f, [256, True]]
|
||||
- [-1, 1, Conv, [512, 3, 2]] # 5-P4/16
|
||||
- [-1, 6, C2f, [512, True]]
|
||||
- [-1, 1, Conv, [1024, 3, 2]] # 7-P5/32
|
||||
- [-1, 3, C2f, [1024, True]]
|
||||
- [-1, 1, SPPF, [1024, 5]] # 9
|
||||
|
||||
# YOLOv8.0n head
|
||||
head:
|
||||
- [-1, 1, nn.Upsample, [None, 2, "nearest"]]
|
||||
- [[-1, 6], 1, Concat, [1]] # cat backbone P4
|
||||
- [-1, 3, C2f, [512]] # 12
|
||||
|
||||
- [-1, 1, nn.Upsample, [None, 2, "nearest"]]
|
||||
- [[-1, 4], 1, Concat, [1]] # cat backbone P3
|
||||
- [-1, 3, C2f, [256]] # 15 (P3/8-small)
|
||||
|
||||
- [-1, 1, Conv, [256, 3, 2]]
|
||||
- [[-1, 12], 1, Concat, [1]] # cat head P4
|
||||
- [-1, 3, C2f, [512]] # 18 (P4/16-medium)
|
||||
|
||||
- [-1, 1, Conv, [512, 3, 2]]
|
||||
- [[-1, 9], 1, Concat, [1]] # cat head P5
|
||||
- [-1, 3, C2f, [1024]] # 21 (P5/32-large)
|
||||
|
||||
- [[15, 18, 21], 1, Segment, [nc, 32, 256]] # Segment(P3, P4, P5)
|
@ -1,48 +0,0 @@
|
||||
# Ultralytics YOLO 🚀, AGPL-3.0 license
|
||||
# YOLOv8-World object detection model with P3-P5 outputs. For details see https://docs.ultralytics.com/tasks/detect
|
||||
|
||||
# Parameters
|
||||
nc: 80 # number of classes
|
||||
scales: # model compound scaling constants, i.e. 'model=yolov8n.yaml' will call yolov8.yaml with scale 'n'
|
||||
# [depth, width, max_channels]
|
||||
n: [0.33, 0.25, 1024] # YOLOv8n summary: 225 layers, 3157200 parameters, 3157184 gradients, 8.9 GFLOPs
|
||||
s: [0.33, 0.50, 1024] # YOLOv8s summary: 225 layers, 11166560 parameters, 11166544 gradients, 28.8 GFLOPs
|
||||
m: [0.67, 0.75, 768] # YOLOv8m summary: 295 layers, 25902640 parameters, 25902624 gradients, 79.3 GFLOPs
|
||||
l: [1.00, 1.00, 512] # YOLOv8l summary: 365 layers, 43691520 parameters, 43691504 gradients, 165.7 GFLOPs
|
||||
x: [1.00, 1.25, 512] # YOLOv8x summary: 365 layers, 68229648 parameters, 68229632 gradients, 258.5 GFLOPs
|
||||
|
||||
# YOLOv8.0n backbone
|
||||
backbone:
|
||||
# [from, repeats, module, args]
|
||||
- [-1, 1, Conv, [64, 3, 2]] # 0-P1/2
|
||||
- [-1, 1, Conv, [128, 3, 2]] # 1-P2/4
|
||||
- [-1, 3, C2f, [128, True]]
|
||||
- [-1, 1, Conv, [256, 3, 2]] # 3-P3/8
|
||||
- [-1, 6, C2f, [256, True]]
|
||||
- [-1, 1, Conv, [512, 3, 2]] # 5-P4/16
|
||||
- [-1, 6, C2f, [512, True]]
|
||||
- [-1, 1, Conv, [1024, 3, 2]] # 7-P5/32
|
||||
- [-1, 3, C2f, [1024, True]]
|
||||
- [-1, 1, SPPF, [1024, 5]] # 9
|
||||
|
||||
# YOLOv8.0n head
|
||||
head:
|
||||
- [-1, 1, nn.Upsample, [None, 2, "nearest"]]
|
||||
- [[-1, 6], 1, Concat, [1]] # cat backbone P4
|
||||
- [-1, 3, C2fAttn, [512, 256, 8]] # 12
|
||||
|
||||
- [-1, 1, nn.Upsample, [None, 2, "nearest"]]
|
||||
- [[-1, 4], 1, Concat, [1]] # cat backbone P3
|
||||
- [-1, 3, C2fAttn, [256, 128, 4]] # 15 (P3/8-small)
|
||||
|
||||
- [[15, 12, 9], 1, ImagePoolingAttn, [256]] # 16 (P3/8-small)
|
||||
|
||||
- [15, 1, Conv, [256, 3, 2]]
|
||||
- [[-1, 12], 1, Concat, [1]] # cat head P4
|
||||
- [-1, 3, C2fAttn, [512, 256, 8]] # 19 (P4/16-medium)
|
||||
|
||||
- [-1, 1, Conv, [512, 3, 2]]
|
||||
- [[-1, 9], 1, Concat, [1]] # cat head P5
|
||||
- [-1, 3, C2fAttn, [1024, 512, 16]] # 22 (P5/32-large)
|
||||
|
||||
- [[15, 19, 22], 1, WorldDetect, [nc, 512, False]] # Detect(P3, P4, P5)
|
@ -1,46 +0,0 @@
|
||||
# Ultralytics YOLO 🚀, AGPL-3.0 license
|
||||
# YOLOv8-World-v2 object detection model with P3-P5 outputs. For details see https://docs.ultralytics.com/tasks/detect
|
||||
|
||||
# Parameters
|
||||
nc: 80 # number of classes
|
||||
scales: # model compound scaling constants, i.e. 'model=yolov8n.yaml' will call yolov8.yaml with scale 'n'
|
||||
# [depth, width, max_channels]
|
||||
n: [0.33, 0.25, 1024] # YOLOv8n summary: 225 layers, 3157200 parameters, 3157184 gradients, 8.9 GFLOPs
|
||||
s: [0.33, 0.50, 1024] # YOLOv8s summary: 225 layers, 11166560 parameters, 11166544 gradients, 28.8 GFLOPs
|
||||
m: [0.67, 0.75, 768] # YOLOv8m summary: 295 layers, 25902640 parameters, 25902624 gradients, 79.3 GFLOPs
|
||||
l: [1.00, 1.00, 512] # YOLOv8l summary: 365 layers, 43691520 parameters, 43691504 gradients, 165.7 GFLOPs
|
||||
x: [1.00, 1.25, 512] # YOLOv8x summary: 365 layers, 68229648 parameters, 68229632 gradients, 258.5 GFLOPs
|
||||
|
||||
# YOLOv8.0n backbone
|
||||
backbone:
|
||||
# [from, repeats, module, args]
|
||||
- [-1, 1, Conv, [64, 3, 2]] # 0-P1/2
|
||||
- [-1, 1, Conv, [128, 3, 2]] # 1-P2/4
|
||||
- [-1, 3, C2f, [128, True]]
|
||||
- [-1, 1, Conv, [256, 3, 2]] # 3-P3/8
|
||||
- [-1, 6, C2f, [256, True]]
|
||||
- [-1, 1, Conv, [512, 3, 2]] # 5-P4/16
|
||||
- [-1, 6, C2f, [512, True]]
|
||||
- [-1, 1, Conv, [1024, 3, 2]] # 7-P5/32
|
||||
- [-1, 3, C2f, [1024, True]]
|
||||
- [-1, 1, SPPF, [1024, 5]] # 9
|
||||
|
||||
# YOLOv8.0n head
|
||||
head:
|
||||
- [-1, 1, nn.Upsample, [None, 2, "nearest"]]
|
||||
- [[-1, 6], 1, Concat, [1]] # cat backbone P4
|
||||
- [-1, 3, C2fAttn, [512, 256, 8]] # 12
|
||||
|
||||
- [-1, 1, nn.Upsample, [None, 2, "nearest"]]
|
||||
- [[-1, 4], 1, Concat, [1]] # cat backbone P3
|
||||
- [-1, 3, C2fAttn, [256, 128, 4]] # 15 (P3/8-small)
|
||||
|
||||
- [15, 1, Conv, [256, 3, 2]]
|
||||
- [[-1, 12], 1, Concat, [1]] # cat head P4
|
||||
- [-1, 3, C2fAttn, [512, 256, 8]] # 18 (P4/16-medium)
|
||||
|
||||
- [-1, 1, Conv, [512, 3, 2]]
|
||||
- [[-1, 9], 1, Concat, [1]] # cat head P5
|
||||
- [-1, 3, C2fAttn, [1024, 512, 16]] # 21 (P5/32-large)
|
||||
|
||||
- [[15, 18, 21], 1, WorldDetect, [nc, 512, True]] # Detect(P3, P4, P5)
|
@ -1,46 +0,0 @@
|
||||
# Ultralytics YOLO 🚀, AGPL-3.0 license
|
||||
# YOLOv8 object detection model with P3-P5 outputs. For Usage examples see https://docs.ultralytics.com/tasks/detect
|
||||
|
||||
# Parameters
|
||||
nc: 80 # number of classes
|
||||
scales: # model compound scaling constants, i.e. 'model=yolov8n.yaml' will call yolov8.yaml with scale 'n'
|
||||
# [depth, width, max_channels]
|
||||
n: [0.33, 0.25, 1024] # YOLOv8n summary: 225 layers, 3157200 parameters, 3157184 gradients, 8.9 GFLOPs
|
||||
s: [0.33, 0.50, 1024] # YOLOv8s summary: 225 layers, 11166560 parameters, 11166544 gradients, 28.8 GFLOPs
|
||||
m: [0.67, 0.75, 768] # YOLOv8m summary: 295 layers, 25902640 parameters, 25902624 gradients, 79.3 GFLOPs
|
||||
l: [1.00, 1.00, 512] # YOLOv8l summary: 365 layers, 43691520 parameters, 43691504 gradients, 165.7 GFLOPs
|
||||
x: [1.00, 1.25, 512] # YOLOv8x summary: 365 layers, 68229648 parameters, 68229632 gradients, 258.5 GFLOPs
|
||||
|
||||
# YOLOv8.0n backbone
|
||||
backbone:
|
||||
# [from, repeats, module, args]
|
||||
- [-1, 1, Conv, [64, 3, 2]] # 0-P1/2
|
||||
- [-1, 1, Conv, [128, 3, 2]] # 1-P2/4
|
||||
- [-1, 3, C2f, [128, True]]
|
||||
- [-1, 1, Conv, [256, 3, 2]] # 3-P3/8
|
||||
- [-1, 6, C2f, [256, True]]
|
||||
- [-1, 1, Conv, [512, 3, 2]] # 5-P4/16
|
||||
- [-1, 6, C2f, [512, True]]
|
||||
- [-1, 1, Conv, [1024, 3, 2]] # 7-P5/32
|
||||
- [-1, 3, C2f, [1024, True]]
|
||||
- [-1, 1, SPPF, [1024, 5]] # 9
|
||||
|
||||
# YOLOv8.0n head
|
||||
head:
|
||||
- [-1, 1, nn.Upsample, [None, 2, "nearest"]]
|
||||
- [[-1, 6], 1, Concat, [1]] # cat backbone P4
|
||||
- [-1, 3, C2f, [512]] # 12
|
||||
|
||||
- [-1, 1, nn.Upsample, [None, 2, "nearest"]]
|
||||
- [[-1, 4], 1, Concat, [1]] # cat backbone P3
|
||||
- [-1, 3, C2f, [256]] # 15 (P3/8-small)
|
||||
|
||||
- [-1, 1, Conv, [256, 3, 2]]
|
||||
- [[-1, 12], 1, Concat, [1]] # cat head P4
|
||||
- [-1, 3, C2f, [512]] # 18 (P4/16-medium)
|
||||
|
||||
- [-1, 1, Conv, [512, 3, 2]]
|
||||
- [[-1, 9], 1, Concat, [1]] # cat head P5
|
||||
- [-1, 3, C2f, [1024]] # 21 (P5/32-large)
|
||||
|
||||
- [[15, 18, 21], 1, Detect, [nc]] # Detect(P3, P4, P5)
|
@ -1,36 +0,0 @@
|
||||
# YOLOv9
|
||||
|
||||
# parameters
|
||||
nc: 80 # number of classes
|
||||
|
||||
# gelan backbone
|
||||
backbone:
|
||||
- [-1, 1, Conv, [64, 3, 2]] # 0-P1/2
|
||||
- [-1, 1, Conv, [128, 3, 2]] # 1-P2/4
|
||||
- [-1, 1, RepNCSPELAN4, [256, 128, 64, 1]] # 2
|
||||
- [-1, 1, ADown, [256]] # 3-P3/8
|
||||
- [-1, 1, RepNCSPELAN4, [512, 256, 128, 1]] # 4
|
||||
- [-1, 1, ADown, [512]] # 5-P4/16
|
||||
- [-1, 1, RepNCSPELAN4, [512, 512, 256, 1]] # 6
|
||||
- [-1, 1, ADown, [512]] # 7-P5/32
|
||||
- [-1, 1, RepNCSPELAN4, [512, 512, 256, 1]] # 8
|
||||
- [-1, 1, SPPELAN, [512, 256]] # 9
|
||||
|
||||
head:
|
||||
- [-1, 1, nn.Upsample, [None, 2, 'nearest']]
|
||||
- [[-1, 6], 1, Concat, [1]] # cat backbone P4
|
||||
- [-1, 1, RepNCSPELAN4, [512, 512, 256, 1]] # 12
|
||||
|
||||
- [-1, 1, nn.Upsample, [None, 2, 'nearest']]
|
||||
- [[-1, 4], 1, Concat, [1]] # cat backbone P3
|
||||
- [-1, 1, RepNCSPELAN4, [256, 256, 128, 1]] # 15 (P3/8-small)
|
||||
|
||||
- [-1, 1, ADown, [256]]
|
||||
- [[-1, 12], 1, Concat, [1]] # cat head P4
|
||||
- [-1, 1, RepNCSPELAN4, [512, 512, 256, 1]] # 18 (P4/16-medium)
|
||||
|
||||
- [-1, 1, ADown, [512]]
|
||||
- [[-1, 9], 1, Concat, [1]] # cat head P5
|
||||
- [-1, 1, RepNCSPELAN4, [512, 512, 256, 1]] # 21 (P5/32-large)
|
||||
|
||||
- [[15, 18, 21], 1, Detect, [nc]] # DDetect(P3, P4, P5)
|
@ -1,60 +0,0 @@
|
||||
# YOLOv9
|
||||
|
||||
# parameters
|
||||
nc: 80 # number of classes
|
||||
|
||||
# gelan backbone
|
||||
backbone:
|
||||
- [-1, 1, Silence, []]
|
||||
- [-1, 1, Conv, [64, 3, 2]] # 1-P1/2
|
||||
- [-1, 1, Conv, [128, 3, 2]] # 2-P2/4
|
||||
- [-1, 1, RepNCSPELAN4, [256, 128, 64, 2]] # 3
|
||||
- [-1, 1, ADown, [256]] # 4-P3/8
|
||||
- [-1, 1, RepNCSPELAN4, [512, 256, 128, 2]] # 5
|
||||
- [-1, 1, ADown, [512]] # 6-P4/16
|
||||
- [-1, 1, RepNCSPELAN4, [1024, 512, 256, 2]] # 7
|
||||
- [-1, 1, ADown, [1024]] # 8-P5/32
|
||||
- [-1, 1, RepNCSPELAN4, [1024, 512, 256, 2]] # 9
|
||||
|
||||
- [1, 1, CBLinear, [[64]]] # 10
|
||||
- [3, 1, CBLinear, [[64, 128]]] # 11
|
||||
- [5, 1, CBLinear, [[64, 128, 256]]] # 12
|
||||
- [7, 1, CBLinear, [[64, 128, 256, 512]]] # 13
|
||||
- [9, 1, CBLinear, [[64, 128, 256, 512, 1024]]] # 14
|
||||
|
||||
- [0, 1, Conv, [64, 3, 2]] # 15-P1/2
|
||||
- [[10, 11, 12, 13, 14, -1], 1, CBFuse, [[0, 0, 0, 0, 0]]] # 16
|
||||
- [-1, 1, Conv, [128, 3, 2]] # 17-P2/4
|
||||
- [[11, 12, 13, 14, -1], 1, CBFuse, [[1, 1, 1, 1]]] # 18
|
||||
- [-1, 1, RepNCSPELAN4, [256, 128, 64, 2]] # 19
|
||||
- [-1, 1, ADown, [256]] # 20-P3/8
|
||||
- [[12, 13, 14, -1], 1, CBFuse, [[2, 2, 2]]] # 21
|
||||
- [-1, 1, RepNCSPELAN4, [512, 256, 128, 2]] # 22
|
||||
- [-1, 1, ADown, [512]] # 23-P4/16
|
||||
- [[13, 14, -1], 1, CBFuse, [[3, 3]]] # 24
|
||||
- [-1, 1, RepNCSPELAN4, [1024, 512, 256, 2]] # 25
|
||||
- [-1, 1, ADown, [1024]] # 26-P5/32
|
||||
- [[14, -1], 1, CBFuse, [[4]]] # 27
|
||||
- [-1, 1, RepNCSPELAN4, [1024, 512, 256, 2]] # 28
|
||||
- [-1, 1, SPPELAN, [512, 256]] # 29
|
||||
|
||||
# gelan head
|
||||
head:
|
||||
- [-1, 1, nn.Upsample, [None, 2, 'nearest']]
|
||||
- [[-1, 25], 1, Concat, [1]] # cat backbone P4
|
||||
- [-1, 1, RepNCSPELAN4, [512, 512, 256, 2]] # 32
|
||||
|
||||
- [-1, 1, nn.Upsample, [None, 2, 'nearest']]
|
||||
- [[-1, 22], 1, Concat, [1]] # cat backbone P3
|
||||
- [-1, 1, RepNCSPELAN4, [256, 256, 128, 2]] # 35 (P3/8-small)
|
||||
|
||||
- [-1, 1, ADown, [256]]
|
||||
- [[-1, 32], 1, Concat, [1]] # cat head P4
|
||||
- [-1, 1, RepNCSPELAN4, [512, 512, 256, 2]] # 38 (P4/16-medium)
|
||||
|
||||
- [-1, 1, ADown, [512]]
|
||||
- [[-1, 29], 1, Concat, [1]] # cat head P5
|
||||
- [-1, 1, RepNCSPELAN4, [512, 1024, 512, 2]] # 41 (P5/32-large)
|
||||
|
||||
# detect
|
||||
- [[35, 38, 41], 1, Detect, [nc]] # Detect(P3, P4, P5)
|
@ -1,18 +0,0 @@
|
||||
# Ultralytics YOLO 🚀, AGPL-3.0 license
|
||||
# Default YOLO tracker settings for BoT-SORT tracker https://github.com/NirAharon/BoT-SORT
|
||||
|
||||
tracker_type: botsort # tracker type, ['botsort', 'bytetrack']
|
||||
track_high_thresh: 0.5 # threshold for the first association
|
||||
track_low_thresh: 0.1 # threshold for the second association
|
||||
new_track_thresh: 0.6 # threshold for init new track if the detection does not match any tracks
|
||||
track_buffer: 30 # buffer to calculate the time when to remove tracks
|
||||
match_thresh: 0.8 # threshold for matching tracks
|
||||
# min_box_area: 10 # threshold for min box areas(for tracker evaluation, not used for now)
|
||||
# mot20: False # for tracker evaluation(not used for now)
|
||||
|
||||
# BoT-SORT settings
|
||||
gmc_method: sparseOptFlow # method of global motion compensation
|
||||
# ReID model related thresh (not supported yet)
|
||||
proximity_thresh: 0.5
|
||||
appearance_thresh: 0.25
|
||||
with_reid: False
|
@ -1,11 +0,0 @@
|
||||
# Ultralytics YOLO 🚀, AGPL-3.0 license
|
||||
# Default YOLO tracker settings for ByteTrack tracker https://github.com/ifzhang/ByteTrack
|
||||
|
||||
tracker_type: bytetrack # tracker type, ['botsort', 'bytetrack']
|
||||
track_high_thresh: 0.5 # threshold for the first association
|
||||
track_low_thresh: 0.1 # threshold for the second association
|
||||
new_track_thresh: 0.6 # threshold for init new track if the detection does not match any tracks
|
||||
track_buffer: 30 # buffer to calculate the time when to remove tracks
|
||||
match_thresh: 0.8 # threshold for matching tracks
|
||||
# min_box_area: 10 # threshold for min box areas(for tracker evaluation, not used for now)
|
||||
# mot20: False # for tracker evaluation(not used for now)
|
@ -1,15 +0,0 @@
|
||||
# Ultralytics YOLO 🚀, AGPL-3.0 license
|
||||
|
||||
from .base import BaseDataset
|
||||
from .build import build_dataloader, build_yolo_dataset, load_inference_source
|
||||
from .dataset import ClassificationDataset, SemanticDataset, YOLODataset
|
||||
|
||||
__all__ = (
|
||||
"BaseDataset",
|
||||
"ClassificationDataset",
|
||||
"SemanticDataset",
|
||||
"YOLODataset",
|
||||
"build_yolo_dataset",
|
||||
"build_dataloader",
|
||||
"load_inference_source",
|
||||
)
|
@ -1,50 +0,0 @@
|
||||
# Ultralytics YOLO 🚀, AGPL-3.0 license
|
||||
|
||||
from pathlib import Path
|
||||
|
||||
from ultralytics import SAM, YOLO
|
||||
|
||||
|
||||
def auto_annotate(data, det_model="yolov8x.pt", sam_model="sam_b.pt", device="", output_dir=None):
|
||||
"""
|
||||
Automatically annotates images using a YOLO object detection model and a SAM segmentation model.
|
||||
|
||||
Args:
|
||||
data (str): Path to a folder containing images to be annotated.
|
||||
det_model (str, optional): Pre-trained YOLO detection model. Defaults to 'yolov8x.pt'.
|
||||
sam_model (str, optional): Pre-trained SAM segmentation model. Defaults to 'sam_b.pt'.
|
||||
device (str, optional): Device to run the models on. Defaults to an empty string (CPU or GPU, if available).
|
||||
output_dir (str | None | optional): Directory to save the annotated results.
|
||||
Defaults to a 'labels' folder in the same directory as 'data'.
|
||||
|
||||
Example:
|
||||
```python
|
||||
from ultralytics.data.annotator import auto_annotate
|
||||
|
||||
auto_annotate(data='ultralytics/assets', det_model='yolov8n.pt', sam_model='mobile_sam.pt')
|
||||
```
|
||||
"""
|
||||
det_model = YOLO(det_model)
|
||||
sam_model = SAM(sam_model)
|
||||
|
||||
data = Path(data)
|
||||
if not output_dir:
|
||||
output_dir = data.parent / f"{data.stem}_auto_annotate_labels"
|
||||
Path(output_dir).mkdir(exist_ok=True, parents=True)
|
||||
|
||||
det_results = det_model(data, stream=True, device=device)
|
||||
|
||||
for result in det_results:
|
||||
class_ids = result.boxes.cls.int().tolist() # noqa
|
||||
if len(class_ids):
|
||||
boxes = result.boxes.xyxy # Boxes object for bbox outputs
|
||||
sam_results = sam_model(result.orig_img, bboxes=boxes, verbose=False, save=False, device=device)
|
||||
segments = sam_results[0].masks.xyn # noqa
|
||||
|
||||
with open(f"{Path(output_dir) / Path(result.path).stem}.txt", "w") as f:
|
||||
for i in range(len(segments)):
|
||||
s = segments[i]
|
||||
if len(s) == 0:
|
||||
continue
|
||||
segment = map(str, segments[i].reshape(-1).tolist())
|
||||
f.write(f"{class_ids[i]} " + " ".join(segment) + "\n")
|
File diff suppressed because it is too large
Load Diff
@ -1,311 +0,0 @@
|
||||
# Ultralytics YOLO 🚀, AGPL-3.0 license
|
||||
|
||||
import glob
|
||||
import math
|
||||
import os
|
||||
import random
|
||||
from copy import deepcopy
|
||||
from multiprocessing.pool import ThreadPool
|
||||
from pathlib import Path
|
||||
from typing import Optional
|
||||
|
||||
import cv2
|
||||
import numpy as np
|
||||
import psutil
|
||||
from torch.utils.data import Dataset
|
||||
|
||||
from ultralytics.utils import DEFAULT_CFG, LOCAL_RANK, LOGGER, NUM_THREADS, TQDM
|
||||
from .utils import HELP_URL, IMG_FORMATS
|
||||
|
||||
|
||||
class BaseDataset(Dataset):
|
||||
"""
|
||||
Base dataset class for loading and processing image data.
|
||||
|
||||
Args:
|
||||
img_path (str): Path to the folder containing images.
|
||||
imgsz (int, optional): Image size. Defaults to 640.
|
||||
cache (bool, optional): Cache images to RAM or disk during training. Defaults to False.
|
||||
augment (bool, optional): If True, data augmentation is applied. Defaults to True.
|
||||
hyp (dict, optional): Hyperparameters to apply data augmentation. Defaults to None.
|
||||
prefix (str, optional): Prefix to print in log messages. Defaults to ''.
|
||||
rect (bool, optional): If True, rectangular training is used. Defaults to False.
|
||||
batch_size (int, optional): Size of batches. Defaults to None.
|
||||
stride (int, optional): Stride. Defaults to 32.
|
||||
pad (float, optional): Padding. Defaults to 0.0.
|
||||
single_cls (bool, optional): If True, single class training is used. Defaults to False.
|
||||
classes (list): List of included classes. Default is None.
|
||||
fraction (float): Fraction of dataset to utilize. Default is 1.0 (use all data).
|
||||
|
||||
Attributes:
|
||||
im_files (list): List of image file paths.
|
||||
labels (list): List of label data dictionaries.
|
||||
ni (int): Number of images in the dataset.
|
||||
ims (list): List of loaded images.
|
||||
npy_files (list): List of numpy file paths.
|
||||
transforms (callable): Image transformation function.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
img_path,
|
||||
imgsz=640,
|
||||
cache=False,
|
||||
augment=True,
|
||||
hyp=DEFAULT_CFG,
|
||||
prefix="",
|
||||
rect=False,
|
||||
batch_size=16,
|
||||
stride=32,
|
||||
pad=0.5,
|
||||
single_cls=False,
|
||||
classes=None,
|
||||
fraction=1.0,
|
||||
):
|
||||
"""Initialize BaseDataset with given configuration and options."""
|
||||
super().__init__()
|
||||
self.img_path = img_path
|
||||
self.imgsz = imgsz
|
||||
self.augment = augment
|
||||
self.single_cls = single_cls
|
||||
self.prefix = prefix
|
||||
self.fraction = fraction
|
||||
self.im_files = self.get_img_files(self.img_path)
|
||||
self.labels = self.get_labels()
|
||||
self.update_labels(include_class=classes) # single_cls and include_class
|
||||
self.ni = len(self.labels) # number of images
|
||||
self.rect = rect
|
||||
self.batch_size = batch_size
|
||||
self.stride = stride
|
||||
self.pad = pad
|
||||
if self.rect:
|
||||
assert self.batch_size is not None
|
||||
self.set_rectangle()
|
||||
|
||||
# Buffer thread for mosaic images
|
||||
self.buffer = [] # buffer size = batch size
|
||||
self.max_buffer_length = min((self.ni, self.batch_size * 8, 1000)) if self.augment else 0
|
||||
|
||||
# Cache images
|
||||
if cache == "ram" and not self.check_cache_ram():
|
||||
cache = False
|
||||
self.ims, self.im_hw0, self.im_hw = [None] * self.ni, [None] * self.ni, [None] * self.ni
|
||||
self.npy_files = [Path(f).with_suffix(".npy") for f in self.im_files]
|
||||
if cache:
|
||||
self.cache_images(cache)
|
||||
|
||||
# Transforms
|
||||
self.transforms = self.build_transforms(hyp=hyp)
|
||||
|
||||
def get_img_files(self, img_path):
|
||||
"""Read image files."""
|
||||
try:
|
||||
f = [] # image files
|
||||
for p in img_path if isinstance(img_path, list) else [img_path]:
|
||||
p = Path(p) # os-agnostic
|
||||
if p.is_dir(): # dir
|
||||
f += glob.glob(str(p / "**" / "*.*"), recursive=True)
|
||||
# F = list(p.rglob('*.*')) # pathlib
|
||||
elif p.is_file(): # file
|
||||
with open(p) as t:
|
||||
t = t.read().strip().splitlines()
|
||||
parent = str(p.parent) + os.sep
|
||||
f += [x.replace("./", parent) if x.startswith("./") else x for x in t] # local to global path
|
||||
# F += [p.parent / x.lstrip(os.sep) for x in t] # local to global path (pathlib)
|
||||
else:
|
||||
raise FileNotFoundError(f"{self.prefix}{p} does not exist")
|
||||
im_files = sorted(x.replace("/", os.sep) for x in f if x.split(".")[-1].lower() in IMG_FORMATS)
|
||||
# self.img_files = sorted([x for x in f if x.suffix[1:].lower() in IMG_FORMATS]) # pathlib
|
||||
assert im_files, f"{self.prefix}No images found in {img_path}"
|
||||
except Exception as e:
|
||||
raise FileNotFoundError(f"{self.prefix}Error loading data from {img_path}\n{HELP_URL}") from e
|
||||
if self.fraction < 1:
|
||||
# im_files = im_files[: round(len(im_files) * self.fraction)]
|
||||
num_elements_to_select = round(len(im_files) * self.fraction)
|
||||
im_files = random.sample(im_files, num_elements_to_select)
|
||||
return im_files
|
||||
|
||||
def update_labels(self, include_class: Optional[list]):
|
||||
"""Update labels to include only these classes (optional)."""
|
||||
include_class_array = np.array(include_class).reshape(1, -1)
|
||||
for i in range(len(self.labels)):
|
||||
if include_class is not None:
|
||||
cls = self.labels[i]["cls"]
|
||||
bboxes = self.labels[i]["bboxes"]
|
||||
segments = self.labels[i]["segments"]
|
||||
keypoints = self.labels[i]["keypoints"]
|
||||
j = (cls == include_class_array).any(1)
|
||||
self.labels[i]["cls"] = cls[j]
|
||||
self.labels[i]["bboxes"] = bboxes[j]
|
||||
if segments:
|
||||
self.labels[i]["segments"] = [segments[si] for si, idx in enumerate(j) if idx]
|
||||
if keypoints is not None:
|
||||
self.labels[i]["keypoints"] = keypoints[j]
|
||||
if self.single_cls:
|
||||
self.labels[i]["cls"][:, 0] = 0
|
||||
|
||||
def load_image(self, i, rect_mode=True):
|
||||
"""Loads 1 image from dataset index 'i', returns (im, resized hw)."""
|
||||
im, f, fn = self.ims[i], self.im_files[i], self.npy_files[i]
|
||||
if im is None: # not cached in RAM
|
||||
if fn.exists(): # load npy
|
||||
try:
|
||||
im = np.load(fn)
|
||||
except Exception as e:
|
||||
LOGGER.warning(f"{self.prefix}WARNING ⚠️ Removing corrupt *.npy image file {fn} due to: {e}")
|
||||
Path(fn).unlink(missing_ok=True)
|
||||
im = cv2.imread(f) # BGR
|
||||
else: # read image
|
||||
im = cv2.imread(f) # BGR
|
||||
if im is None:
|
||||
raise FileNotFoundError(f"Image Not Found {f}")
|
||||
|
||||
h0, w0 = im.shape[:2] # orig hw
|
||||
if rect_mode: # resize long side to imgsz while maintaining aspect ratio
|
||||
r = self.imgsz / max(h0, w0) # ratio
|
||||
if r != 1: # if sizes are not equal
|
||||
w, h = (min(math.ceil(w0 * r), self.imgsz), min(math.ceil(h0 * r), self.imgsz))
|
||||
im = cv2.resize(im, (w, h), interpolation=cv2.INTER_LINEAR)
|
||||
elif not (h0 == w0 == self.imgsz): # resize by stretching image to square imgsz
|
||||
im = cv2.resize(im, (self.imgsz, self.imgsz), interpolation=cv2.INTER_LINEAR)
|
||||
|
||||
# Add to buffer if training with augmentations
|
||||
if self.augment:
|
||||
self.ims[i], self.im_hw0[i], self.im_hw[i] = im, (h0, w0), im.shape[:2] # im, hw_original, hw_resized
|
||||
self.buffer.append(i)
|
||||
if len(self.buffer) >= self.max_buffer_length:
|
||||
j = self.buffer.pop(0)
|
||||
self.ims[j], self.im_hw0[j], self.im_hw[j] = None, None, None
|
||||
|
||||
return im, (h0, w0), im.shape[:2]
|
||||
|
||||
return self.ims[i], self.im_hw0[i], self.im_hw[i]
|
||||
|
||||
def cache_images(self, cache):
|
||||
"""Cache images to memory or disk."""
|
||||
b, gb = 0, 1 << 30 # bytes of cached images, bytes per gigabytes
|
||||
fcn = self.cache_images_to_disk if cache == "disk" else self.load_image
|
||||
with ThreadPool(NUM_THREADS) as pool:
|
||||
results = pool.imap(fcn, range(self.ni))
|
||||
pbar = TQDM(enumerate(results), total=self.ni, disable=LOCAL_RANK > 0)
|
||||
for i, x in pbar:
|
||||
if cache == "disk":
|
||||
b += self.npy_files[i].stat().st_size
|
||||
else: # 'ram'
|
||||
self.ims[i], self.im_hw0[i], self.im_hw[i] = x # im, hw_orig, hw_resized = load_image(self, i)
|
||||
b += self.ims[i].nbytes
|
||||
pbar.desc = f"{self.prefix}Caching images ({b / gb:.1f}GB {cache})"
|
||||
pbar.close()
|
||||
|
||||
def cache_images_to_disk(self, i):
|
||||
"""Saves an image as an *.npy file for faster loading."""
|
||||
f = self.npy_files[i]
|
||||
if not f.exists():
|
||||
np.save(f.as_posix(), cv2.imread(self.im_files[i]), allow_pickle=False)
|
||||
|
||||
def check_cache_ram(self, safety_margin=0.5):
|
||||
"""Check image caching requirements vs available memory."""
|
||||
b, gb = 0, 1 << 30 # bytes of cached images, bytes per gigabytes
|
||||
n = min(self.ni, 30) # extrapolate from 30 random images
|
||||
for _ in range(n):
|
||||
im = cv2.imread(random.choice(self.im_files)) # sample image
|
||||
ratio = self.imgsz / max(im.shape[0], im.shape[1]) # max(h, w) # ratio
|
||||
b += im.nbytes * ratio**2
|
||||
mem_required = b * self.ni / n * (1 + safety_margin) # GB required to cache dataset into RAM
|
||||
mem = psutil.virtual_memory()
|
||||
cache = mem_required < mem.available # to cache or not to cache, that is the question
|
||||
if not cache:
|
||||
LOGGER.info(
|
||||
f'{self.prefix}{mem_required / gb:.1f}GB RAM required to cache images '
|
||||
f'with {int(safety_margin * 100)}% safety margin but only '
|
||||
f'{mem.available / gb:.1f}/{mem.total / gb:.1f}GB available, '
|
||||
f"{'caching images ✅' if cache else 'not caching images ⚠️'}"
|
||||
)
|
||||
return cache
|
||||
|
||||
def set_rectangle(self):
|
||||
"""Sets the shape of bounding boxes for YOLO detections as rectangles."""
|
||||
bi = np.floor(np.arange(self.ni) / self.batch_size).astype(int) # batch index
|
||||
nb = bi[-1] + 1 # number of batches
|
||||
|
||||
s = np.array([x.pop("shape") for x in self.labels]) # hw
|
||||
ar = s[:, 0] / s[:, 1] # aspect ratio
|
||||
irect = ar.argsort()
|
||||
self.im_files = [self.im_files[i] for i in irect]
|
||||
self.labels = [self.labels[i] for i in irect]
|
||||
ar = ar[irect]
|
||||
|
||||
# Set training image shapes
|
||||
shapes = [[1, 1]] * nb
|
||||
for i in range(nb):
|
||||
ari = ar[bi == i]
|
||||
mini, maxi = ari.min(), ari.max()
|
||||
if maxi < 1:
|
||||
shapes[i] = [maxi, 1]
|
||||
elif mini > 1:
|
||||
shapes[i] = [1, 1 / mini]
|
||||
|
||||
self.batch_shapes = np.ceil(np.array(shapes) * self.imgsz / self.stride + self.pad).astype(int) * self.stride
|
||||
self.batch = bi # batch index of image
|
||||
|
||||
def __getitem__(self, index):
|
||||
"""Returns transformed label information for given index."""
|
||||
return self.transforms(self.get_image_and_label(index))
|
||||
|
||||
def get_image_and_label(self, index):
|
||||
"""Get and return label information from the dataset."""
|
||||
label = deepcopy(self.labels[index]) # requires deepcopy() https://github.com/ultralytics/ultralytics/pull/1948
|
||||
label.pop("shape", None) # shape is for rect, remove it
|
||||
label["img"], label["ori_shape"], label["resized_shape"] = self.load_image(index)
|
||||
label["ratio_pad"] = (
|
||||
label["resized_shape"][0] / label["ori_shape"][0],
|
||||
label["resized_shape"][1] / label["ori_shape"][1],
|
||||
) # for evaluation
|
||||
if self.rect:
|
||||
label["rect_shape"] = self.batch_shapes[self.batch[index]]
|
||||
return self.update_labels_info(label)
|
||||
|
||||
def __len__(self):
|
||||
"""Returns the length of the labels list for the dataset."""
|
||||
return len(self.labels)
|
||||
|
||||
def update_labels_info(self, label):
|
||||
"""Custom your label format here."""
|
||||
return label
|
||||
|
||||
def build_transforms(self, hyp=None):
|
||||
"""
|
||||
Users can customize augmentations here.
|
||||
|
||||
Example:
|
||||
```python
|
||||
if self.augment:
|
||||
# Training transforms
|
||||
return Compose([])
|
||||
else:
|
||||
# Val transforms
|
||||
return Compose([])
|
||||
```
|
||||
"""
|
||||
raise NotImplementedError
|
||||
|
||||
def get_labels(self):
|
||||
"""
|
||||
Users can customize their own format here.
|
||||
|
||||
Note:
|
||||
Ensure output is a dictionary with the following keys:
|
||||
```python
|
||||
dict(
|
||||
im_file=im_file,
|
||||
shape=shape, # format: (height, width)
|
||||
cls=cls,
|
||||
bboxes=bboxes, # xywh
|
||||
segments=segments, # xy
|
||||
keypoints=keypoints, # xy
|
||||
normalized=True, # or False
|
||||
bbox_format="xyxy", # or xywh, ltwh
|
||||
)
|
||||
```
|
||||
"""
|
||||
raise NotImplementedError
|
@ -1,186 +0,0 @@
|
||||
# Ultralytics YOLO 🚀, AGPL-3.0 license
|
||||
|
||||
import os
|
||||
import random
|
||||
from pathlib import Path
|
||||
|
||||
import numpy as np
|
||||
import torch
|
||||
from PIL import Image
|
||||
from torch.utils.data import dataloader, distributed
|
||||
|
||||
from ultralytics.data.loaders import (
|
||||
LOADERS,
|
||||
LoadImagesAndVideos,
|
||||
LoadPilAndNumpy,
|
||||
LoadScreenshots,
|
||||
LoadStreams,
|
||||
LoadTensor,
|
||||
SourceTypes,
|
||||
autocast_list,
|
||||
)
|
||||
from ultralytics.data.utils import IMG_FORMATS, VID_FORMATS
|
||||
from ultralytics.utils import RANK, colorstr
|
||||
from ultralytics.utils.checks import check_file
|
||||
from .dataset import YOLODataset
|
||||
from .utils import PIN_MEMORY
|
||||
|
||||
|
||||
class InfiniteDataLoader(dataloader.DataLoader):
|
||||
"""
|
||||
Dataloader that reuses workers.
|
||||
|
||||
Uses same syntax as vanilla DataLoader.
|
||||
"""
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
"""Dataloader that infinitely recycles workers, inherits from DataLoader."""
|
||||
super().__init__(*args, **kwargs)
|
||||
object.__setattr__(self, "batch_sampler", _RepeatSampler(self.batch_sampler))
|
||||
self.iterator = super().__iter__()
|
||||
|
||||
def __len__(self):
|
||||
"""Returns the length of the batch sampler's sampler."""
|
||||
return len(self.batch_sampler.sampler)
|
||||
|
||||
def __iter__(self):
|
||||
"""Creates a sampler that repeats indefinitely."""
|
||||
for _ in range(len(self)):
|
||||
yield next(self.iterator)
|
||||
|
||||
def reset(self):
|
||||
"""
|
||||
Reset iterator.
|
||||
|
||||
This is useful when we want to modify settings of dataset while training.
|
||||
"""
|
||||
self.iterator = self._get_iterator()
|
||||
|
||||
|
||||
class _RepeatSampler:
|
||||
"""
|
||||
Sampler that repeats forever.
|
||||
|
||||
Args:
|
||||
sampler (Dataset.sampler): The sampler to repeat.
|
||||
"""
|
||||
|
||||
def __init__(self, sampler):
|
||||
"""Initializes an object that repeats a given sampler indefinitely."""
|
||||
self.sampler = sampler
|
||||
|
||||
def __iter__(self):
|
||||
"""Iterates over the 'sampler' and yields its contents."""
|
||||
while True:
|
||||
yield from iter(self.sampler)
|
||||
|
||||
|
||||
def seed_worker(worker_id): # noqa
|
||||
"""Set dataloader worker seed https://pytorch.org/docs/stable/notes/randomness.html#dataloader."""
|
||||
worker_seed = torch.initial_seed() % 2**32
|
||||
np.random.seed(worker_seed)
|
||||
random.seed(worker_seed)
|
||||
|
||||
|
||||
def build_yolo_dataset(cfg, img_path, batch, data, mode="train", rect=False, stride=32):
|
||||
"""Build YOLO Dataset."""
|
||||
return YOLODataset(
|
||||
img_path=img_path,
|
||||
imgsz=cfg.imgsz,
|
||||
batch_size=batch,
|
||||
augment=mode == "train", # augmentation
|
||||
hyp=cfg, # TODO: probably add a get_hyps_from_cfg function
|
||||
rect=cfg.rect or rect, # rectangular batches
|
||||
cache=cfg.cache or None,
|
||||
single_cls=cfg.single_cls or False,
|
||||
stride=int(stride),
|
||||
pad=0.0 if mode == "train" else 0.5,
|
||||
prefix=colorstr(f"{mode}: "),
|
||||
task=cfg.task,
|
||||
classes=cfg.classes,
|
||||
data=data,
|
||||
fraction=cfg.fraction if mode == "train" else 1.0,
|
||||
)
|
||||
|
||||
|
||||
def build_dataloader(dataset, batch, workers, shuffle=True, rank=-1):
|
||||
"""Return an InfiniteDataLoader or DataLoader for training or validation set."""
|
||||
batch = min(batch, len(dataset))
|
||||
nd = torch.cuda.device_count() # number of CUDA devices
|
||||
nw = min([os.cpu_count() // max(nd, 1), workers]) # number of workers
|
||||
sampler = None if rank == -1 else distributed.DistributedSampler(dataset, shuffle=shuffle)
|
||||
generator = torch.Generator()
|
||||
generator.manual_seed(6148914691236517205 + RANK)
|
||||
return InfiniteDataLoader(
|
||||
dataset=dataset,
|
||||
batch_size=batch,
|
||||
shuffle=shuffle and sampler is None,
|
||||
num_workers=nw,
|
||||
sampler=sampler,
|
||||
pin_memory=PIN_MEMORY,
|
||||
collate_fn=getattr(dataset, "collate_fn", None),
|
||||
worker_init_fn=seed_worker,
|
||||
generator=generator,
|
||||
)
|
||||
|
||||
|
||||
def check_source(source):
|
||||
"""Check source type and return corresponding flag values."""
|
||||
webcam, screenshot, from_img, in_memory, tensor = False, False, False, False, False
|
||||
if isinstance(source, (str, int, Path)): # int for local usb camera
|
||||
source = str(source)
|
||||
is_file = Path(source).suffix[1:] in (IMG_FORMATS | VID_FORMATS)
|
||||
is_url = source.lower().startswith(("https://", "http://", "rtsp://", "rtmp://", "tcp://"))
|
||||
webcam = source.isnumeric() or source.endswith(".streams") or (is_url and not is_file)
|
||||
screenshot = source.lower() == "screen"
|
||||
if is_url and is_file:
|
||||
source = check_file(source) # download
|
||||
elif isinstance(source, LOADERS):
|
||||
in_memory = True
|
||||
elif isinstance(source, (list, tuple)):
|
||||
source = autocast_list(source) # convert all list elements to PIL or np arrays
|
||||
from_img = True
|
||||
elif isinstance(source, (Image.Image, np.ndarray)):
|
||||
from_img = True
|
||||
elif isinstance(source, torch.Tensor):
|
||||
tensor = True
|
||||
else:
|
||||
raise TypeError("Unsupported image type. For supported types see https://docs.ultralytics.com/modes/predict")
|
||||
|
||||
return source, webcam, screenshot, from_img, in_memory, tensor
|
||||
|
||||
|
||||
def load_inference_source(source=None, batch=1, vid_stride=1, buffer=False):
|
||||
"""
|
||||
Loads an inference source for object detection and applies necessary transformations.
|
||||
|
||||
Args:
|
||||
source (str, Path, Tensor, PIL.Image, np.ndarray): The input source for inference.
|
||||
batch (int, optional): Batch size for dataloaders. Default is 1.
|
||||
vid_stride (int, optional): The frame interval for video sources. Default is 1.
|
||||
buffer (bool, optional): Determined whether stream frames will be buffered. Default is False.
|
||||
|
||||
Returns:
|
||||
dataset (Dataset): A dataset object for the specified input source.
|
||||
"""
|
||||
source, stream, screenshot, from_img, in_memory, tensor = check_source(source)
|
||||
source_type = source.source_type if in_memory else SourceTypes(stream, screenshot, from_img, tensor)
|
||||
|
||||
# Dataloader
|
||||
if tensor:
|
||||
dataset = LoadTensor(source)
|
||||
elif in_memory:
|
||||
dataset = source
|
||||
elif stream:
|
||||
dataset = LoadStreams(source, vid_stride=vid_stride, buffer=buffer)
|
||||
elif screenshot:
|
||||
dataset = LoadScreenshots(source)
|
||||
elif from_img:
|
||||
dataset = LoadPilAndNumpy(source)
|
||||
else:
|
||||
dataset = LoadImagesAndVideos(source, batch=batch, vid_stride=vid_stride)
|
||||
|
||||
# Attach source types to the dataset
|
||||
setattr(dataset, "source_type", source_type)
|
||||
|
||||
return dataset
|
@ -1,542 +0,0 @@
|
||||
# Ultralytics YOLO 🚀, AGPL-3.0 license
|
||||
|
||||
import json
|
||||
from collections import defaultdict
|
||||
from pathlib import Path
|
||||
|
||||
import cv2
|
||||
import numpy as np
|
||||
|
||||
from ultralytics.utils import LOGGER, TQDM
|
||||
from ultralytics.utils.files import increment_path
|
||||
|
||||
|
||||
def coco91_to_coco80_class():
|
||||
"""
|
||||
Converts 91-index COCO class IDs to 80-index COCO class IDs.
|
||||
|
||||
Returns:
|
||||
(list): A list of 91 class IDs where the index represents the 80-index class ID and the value is the
|
||||
corresponding 91-index class ID.
|
||||
"""
|
||||
return [
|
||||
0,
|
||||
1,
|
||||
2,
|
||||
3,
|
||||
4,
|
||||
5,
|
||||
6,
|
||||
7,
|
||||
8,
|
||||
9,
|
||||
10,
|
||||
None,
|
||||
11,
|
||||
12,
|
||||
13,
|
||||
14,
|
||||
15,
|
||||
16,
|
||||
17,
|
||||
18,
|
||||
19,
|
||||
20,
|
||||
21,
|
||||
22,
|
||||
23,
|
||||
None,
|
||||
24,
|
||||
25,
|
||||
None,
|
||||
None,
|
||||
26,
|
||||
27,
|
||||
28,
|
||||
29,
|
||||
30,
|
||||
31,
|
||||
32,
|
||||
33,
|
||||
34,
|
||||
35,
|
||||
36,
|
||||
37,
|
||||
38,
|
||||
39,
|
||||
None,
|
||||
40,
|
||||
41,
|
||||
42,
|
||||
43,
|
||||
44,
|
||||
45,
|
||||
46,
|
||||
47,
|
||||
48,
|
||||
49,
|
||||
50,
|
||||
51,
|
||||
52,
|
||||
53,
|
||||
54,
|
||||
55,
|
||||
56,
|
||||
57,
|
||||
58,
|
||||
59,
|
||||
None,
|
||||
60,
|
||||
None,
|
||||
None,
|
||||
61,
|
||||
None,
|
||||
62,
|
||||
63,
|
||||
64,
|
||||
65,
|
||||
66,
|
||||
67,
|
||||
68,
|
||||
69,
|
||||
70,
|
||||
71,
|
||||
72,
|
||||
None,
|
||||
73,
|
||||
74,
|
||||
75,
|
||||
76,
|
||||
77,
|
||||
78,
|
||||
79,
|
||||
None,
|
||||
]
|
||||
|
||||
|
||||
def coco80_to_coco91_class():
|
||||
"""
|
||||
Converts 80-index (val2014) to 91-index (paper).
|
||||
For details see https://tech.amikelive.com/node-718/what-object-categories-labels-are-in-coco-dataset/.
|
||||
|
||||
Example:
|
||||
```python
|
||||
import numpy as np
|
||||
|
||||
a = np.loadtxt('data/coco.names', dtype='str', delimiter='\n')
|
||||
b = np.loadtxt('data/coco_paper.names', dtype='str', delimiter='\n')
|
||||
x1 = [list(a[i] == b).index(True) + 1 for i in range(80)] # darknet to coco
|
||||
x2 = [list(b[i] == a).index(True) if any(b[i] == a) else None for i in range(91)] # coco to darknet
|
||||
```
|
||||
"""
|
||||
return [
|
||||
1,
|
||||
2,
|
||||
3,
|
||||
4,
|
||||
5,
|
||||
6,
|
||||
7,
|
||||
8,
|
||||
9,
|
||||
10,
|
||||
11,
|
||||
13,
|
||||
14,
|
||||
15,
|
||||
16,
|
||||
17,
|
||||
18,
|
||||
19,
|
||||
20,
|
||||
21,
|
||||
22,
|
||||
23,
|
||||
24,
|
||||
25,
|
||||
27,
|
||||
28,
|
||||
31,
|
||||
32,
|
||||
33,
|
||||
34,
|
||||
35,
|
||||
36,
|
||||
37,
|
||||
38,
|
||||
39,
|
||||
40,
|
||||
41,
|
||||
42,
|
||||
43,
|
||||
44,
|
||||
46,
|
||||
47,
|
||||
48,
|
||||
49,
|
||||
50,
|
||||
51,
|
||||
52,
|
||||
53,
|
||||
54,
|
||||
55,
|
||||
56,
|
||||
57,
|
||||
58,
|
||||
59,
|
||||
60,
|
||||
61,
|
||||
62,
|
||||
63,
|
||||
64,
|
||||
65,
|
||||
67,
|
||||
70,
|
||||
72,
|
||||
73,
|
||||
74,
|
||||
75,
|
||||
76,
|
||||
77,
|
||||
78,
|
||||
79,
|
||||
80,
|
||||
81,
|
||||
82,
|
||||
84,
|
||||
85,
|
||||
86,
|
||||
87,
|
||||
88,
|
||||
89,
|
||||
90,
|
||||
]
|
||||
|
||||
|
||||
def convert_coco(
|
||||
labels_dir="../coco/annotations/",
|
||||
save_dir="coco_converted/",
|
||||
use_segments=False,
|
||||
use_keypoints=False,
|
||||
cls91to80=True,
|
||||
):
|
||||
"""
|
||||
Converts COCO dataset annotations to a YOLO annotation format suitable for training YOLO models.
|
||||
|
||||
Args:
|
||||
labels_dir (str, optional): Path to directory containing COCO dataset annotation files.
|
||||
save_dir (str, optional): Path to directory to save results to.
|
||||
use_segments (bool, optional): Whether to include segmentation masks in the output.
|
||||
use_keypoints (bool, optional): Whether to include keypoint annotations in the output.
|
||||
cls91to80 (bool, optional): Whether to map 91 COCO class IDs to the corresponding 80 COCO class IDs.
|
||||
|
||||
Example:
|
||||
```python
|
||||
from ultralytics.data.converter import convert_coco
|
||||
|
||||
convert_coco('../datasets/coco/annotations/', use_segments=True, use_keypoints=False, cls91to80=True)
|
||||
```
|
||||
|
||||
Output:
|
||||
Generates output files in the specified output directory.
|
||||
"""
|
||||
|
||||
# Create dataset directory
|
||||
save_dir = increment_path(save_dir) # increment if save directory already exists
|
||||
for p in save_dir / "labels", save_dir / "images":
|
||||
p.mkdir(parents=True, exist_ok=True) # make dir
|
||||
|
||||
# Convert classes
|
||||
coco80 = coco91_to_coco80_class()
|
||||
|
||||
# Import json
|
||||
for json_file in sorted(Path(labels_dir).resolve().glob("*.json")):
|
||||
fn = Path(save_dir) / "labels" / json_file.stem.replace("instances_", "") # folder name
|
||||
fn.mkdir(parents=True, exist_ok=True)
|
||||
with open(json_file) as f:
|
||||
data = json.load(f)
|
||||
|
||||
# Create image dict
|
||||
images = {f'{x["id"]:d}': x for x in data["images"]}
|
||||
# Create image-annotations dict
|
||||
imgToAnns = defaultdict(list)
|
||||
for ann in data["annotations"]:
|
||||
imgToAnns[ann["image_id"]].append(ann)
|
||||
|
||||
# Write labels file
|
||||
for img_id, anns in TQDM(imgToAnns.items(), desc=f"Annotations {json_file}"):
|
||||
img = images[f"{img_id:d}"]
|
||||
h, w, f = img["height"], img["width"], img["file_name"]
|
||||
|
||||
bboxes = []
|
||||
segments = []
|
||||
keypoints = []
|
||||
for ann in anns:
|
||||
if ann["iscrowd"]:
|
||||
continue
|
||||
# The COCO box format is [top left x, top left y, width, height]
|
||||
box = np.array(ann["bbox"], dtype=np.float64)
|
||||
box[:2] += box[2:] / 2 # xy top-left corner to center
|
||||
box[[0, 2]] /= w # normalize x
|
||||
box[[1, 3]] /= h # normalize y
|
||||
if box[2] <= 0 or box[3] <= 0: # if w <= 0 and h <= 0
|
||||
continue
|
||||
|
||||
cls = coco80[ann["category_id"] - 1] if cls91to80 else ann["category_id"] - 1 # class
|
||||
box = [cls] + box.tolist()
|
||||
if box not in bboxes:
|
||||
bboxes.append(box)
|
||||
if use_segments and ann.get("segmentation") is not None:
|
||||
if len(ann["segmentation"]) == 0:
|
||||
segments.append([])
|
||||
continue
|
||||
elif len(ann["segmentation"]) > 1:
|
||||
s = merge_multi_segment(ann["segmentation"])
|
||||
s = (np.concatenate(s, axis=0) / np.array([w, h])).reshape(-1).tolist()
|
||||
else:
|
||||
s = [j for i in ann["segmentation"] for j in i] # all segments concatenated
|
||||
s = (np.array(s).reshape(-1, 2) / np.array([w, h])).reshape(-1).tolist()
|
||||
s = [cls] + s
|
||||
segments.append(s)
|
||||
if use_keypoints and ann.get("keypoints") is not None:
|
||||
keypoints.append(
|
||||
box + (np.array(ann["keypoints"]).reshape(-1, 3) / np.array([w, h, 1])).reshape(-1).tolist()
|
||||
)
|
||||
|
||||
# Write
|
||||
with open((fn / f).with_suffix(".txt"), "a") as file:
|
||||
for i in range(len(bboxes)):
|
||||
if use_keypoints:
|
||||
line = (*(keypoints[i]),) # cls, box, keypoints
|
||||
else:
|
||||
line = (
|
||||
*(segments[i] if use_segments and len(segments[i]) > 0 else bboxes[i]),
|
||||
) # cls, box or segments
|
||||
file.write(("%g " * len(line)).rstrip() % line + "\n")
|
||||
|
||||
LOGGER.info(f"COCO data converted successfully.\nResults saved to {save_dir.resolve()}")
|
||||
|
||||
|
||||
def convert_dota_to_yolo_obb(dota_root_path: str):
|
||||
"""
|
||||
Converts DOTA dataset annotations to YOLO OBB (Oriented Bounding Box) format.
|
||||
|
||||
The function processes images in the 'train' and 'val' folders of the DOTA dataset. For each image, it reads the
|
||||
associated label from the original labels directory and writes new labels in YOLO OBB format to a new directory.
|
||||
|
||||
Args:
|
||||
dota_root_path (str): The root directory path of the DOTA dataset.
|
||||
|
||||
Example:
|
||||
```python
|
||||
from ultralytics.data.converter import convert_dota_to_yolo_obb
|
||||
|
||||
convert_dota_to_yolo_obb('path/to/DOTA')
|
||||
```
|
||||
|
||||
Notes:
|
||||
The directory structure assumed for the DOTA dataset:
|
||||
|
||||
- DOTA
|
||||
├─ images
|
||||
│ ├─ train
|
||||
│ └─ val
|
||||
└─ labels
|
||||
├─ train_original
|
||||
└─ val_original
|
||||
|
||||
After execution, the function will organize the labels into:
|
||||
|
||||
- DOTA
|
||||
└─ labels
|
||||
├─ train
|
||||
└─ val
|
||||
"""
|
||||
dota_root_path = Path(dota_root_path)
|
||||
|
||||
# Class names to indices mapping
|
||||
class_mapping = {
|
||||
"plane": 0,
|
||||
"ship": 1,
|
||||
"storage-tank": 2,
|
||||
"baseball-diamond": 3,
|
||||
"tennis-court": 4,
|
||||
"basketball-court": 5,
|
||||
"ground-track-field": 6,
|
||||
"harbor": 7,
|
||||
"bridge": 8,
|
||||
"large-vehicle": 9,
|
||||
"small-vehicle": 10,
|
||||
"helicopter": 11,
|
||||
"roundabout": 12,
|
||||
"soccer-ball-field": 13,
|
||||
"swimming-pool": 14,
|
||||
"container-crane": 15,
|
||||
"airport": 16,
|
||||
"helipad": 17,
|
||||
}
|
||||
|
||||
def convert_label(image_name, image_width, image_height, orig_label_dir, save_dir):
|
||||
"""Converts a single image's DOTA annotation to YOLO OBB format and saves it to a specified directory."""
|
||||
orig_label_path = orig_label_dir / f"{image_name}.txt"
|
||||
save_path = save_dir / f"{image_name}.txt"
|
||||
|
||||
with orig_label_path.open("r") as f, save_path.open("w") as g:
|
||||
lines = f.readlines()
|
||||
for line in lines:
|
||||
parts = line.strip().split()
|
||||
if len(parts) < 9:
|
||||
continue
|
||||
class_name = parts[8]
|
||||
class_idx = class_mapping[class_name]
|
||||
coords = [float(p) for p in parts[:8]]
|
||||
normalized_coords = [
|
||||
coords[i] / image_width if i % 2 == 0 else coords[i] / image_height for i in range(8)
|
||||
]
|
||||
formatted_coords = ["{:.6g}".format(coord) for coord in normalized_coords]
|
||||
g.write(f"{class_idx} {' '.join(formatted_coords)}\n")
|
||||
|
||||
for phase in ["train", "val"]:
|
||||
image_dir = dota_root_path / "images" / phase
|
||||
orig_label_dir = dota_root_path / "labels" / f"{phase}_original"
|
||||
save_dir = dota_root_path / "labels" / phase
|
||||
|
||||
save_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
image_paths = list(image_dir.iterdir())
|
||||
for image_path in TQDM(image_paths, desc=f"Processing {phase} images"):
|
||||
if image_path.suffix != ".png":
|
||||
continue
|
||||
image_name_without_ext = image_path.stem
|
||||
img = cv2.imread(str(image_path))
|
||||
h, w = img.shape[:2]
|
||||
convert_label(image_name_without_ext, w, h, orig_label_dir, save_dir)
|
||||
|
||||
|
||||
def min_index(arr1, arr2):
|
||||
"""
|
||||
Find a pair of indexes with the shortest distance between two arrays of 2D points.
|
||||
|
||||
Args:
|
||||
arr1 (np.ndarray): A NumPy array of shape (N, 2) representing N 2D points.
|
||||
arr2 (np.ndarray): A NumPy array of shape (M, 2) representing M 2D points.
|
||||
|
||||
Returns:
|
||||
(tuple): A tuple containing the indexes of the points with the shortest distance in arr1 and arr2 respectively.
|
||||
"""
|
||||
dis = ((arr1[:, None, :] - arr2[None, :, :]) ** 2).sum(-1)
|
||||
return np.unravel_index(np.argmin(dis, axis=None), dis.shape)
|
||||
|
||||
|
||||
def merge_multi_segment(segments):
|
||||
"""
|
||||
Merge multiple segments into one list by connecting the coordinates with the minimum distance between each segment.
|
||||
This function connects these coordinates with a thin line to merge all segments into one.
|
||||
|
||||
Args:
|
||||
segments (List[List]): Original segmentations in COCO's JSON file.
|
||||
Each element is a list of coordinates, like [segmentation1, segmentation2,...].
|
||||
|
||||
Returns:
|
||||
s (List[np.ndarray]): A list of connected segments represented as NumPy arrays.
|
||||
"""
|
||||
s = []
|
||||
segments = [np.array(i).reshape(-1, 2) for i in segments]
|
||||
idx_list = [[] for _ in range(len(segments))]
|
||||
|
||||
# Record the indexes with min distance between each segment
|
||||
for i in range(1, len(segments)):
|
||||
idx1, idx2 = min_index(segments[i - 1], segments[i])
|
||||
idx_list[i - 1].append(idx1)
|
||||
idx_list[i].append(idx2)
|
||||
|
||||
# Use two round to connect all the segments
|
||||
for k in range(2):
|
||||
# Forward connection
|
||||
if k == 0:
|
||||
for i, idx in enumerate(idx_list):
|
||||
# Middle segments have two indexes, reverse the index of middle segments
|
||||
if len(idx) == 2 and idx[0] > idx[1]:
|
||||
idx = idx[::-1]
|
||||
segments[i] = segments[i][::-1, :]
|
||||
|
||||
segments[i] = np.roll(segments[i], -idx[0], axis=0)
|
||||
segments[i] = np.concatenate([segments[i], segments[i][:1]])
|
||||
# Deal with the first segment and the last one
|
||||
if i in [0, len(idx_list) - 1]:
|
||||
s.append(segments[i])
|
||||
else:
|
||||
idx = [0, idx[1] - idx[0]]
|
||||
s.append(segments[i][idx[0] : idx[1] + 1])
|
||||
|
||||
else:
|
||||
for i in range(len(idx_list) - 1, -1, -1):
|
||||
if i not in [0, len(idx_list) - 1]:
|
||||
idx = idx_list[i]
|
||||
nidx = abs(idx[1] - idx[0])
|
||||
s.append(segments[i][nidx:])
|
||||
return s
|
||||
|
||||
|
||||
def yolo_bbox2segment(im_dir, save_dir=None, sam_model="sam_b.pt"):
|
||||
"""
|
||||
Converts existing object detection dataset (bounding boxes) to segmentation dataset or oriented bounding box (OBB)
|
||||
in YOLO format. Generates segmentation data using SAM auto-annotator as needed.
|
||||
|
||||
Args:
|
||||
im_dir (str | Path): Path to image directory to convert.
|
||||
save_dir (str | Path): Path to save the generated labels, labels will be saved
|
||||
into `labels-segment` in the same directory level of `im_dir` if save_dir is None. Default: None.
|
||||
sam_model (str): Segmentation model to use for intermediate segmentation data; optional.
|
||||
|
||||
Notes:
|
||||
The input directory structure assumed for dataset:
|
||||
|
||||
- im_dir
|
||||
├─ 001.jpg
|
||||
├─ ..
|
||||
└─ NNN.jpg
|
||||
- labels
|
||||
├─ 001.txt
|
||||
├─ ..
|
||||
└─ NNN.txt
|
||||
"""
|
||||
from ultralytics.data import YOLODataset
|
||||
from ultralytics.utils.ops import xywh2xyxy
|
||||
from ultralytics.utils import LOGGER
|
||||
from ultralytics import SAM
|
||||
from tqdm import tqdm
|
||||
|
||||
# NOTE: add placeholder to pass class index check
|
||||
dataset = YOLODataset(im_dir, data=dict(names=list(range(1000))))
|
||||
if len(dataset.labels[0]["segments"]) > 0: # if it's segment data
|
||||
LOGGER.info("Segmentation labels detected, no need to generate new ones!")
|
||||
return
|
||||
|
||||
LOGGER.info("Detection labels detected, generating segment labels by SAM model!")
|
||||
sam_model = SAM(sam_model)
|
||||
for l in tqdm(dataset.labels, total=len(dataset.labels), desc="Generating segment labels"):
|
||||
h, w = l["shape"]
|
||||
boxes = l["bboxes"]
|
||||
if len(boxes) == 0: # skip empty labels
|
||||
continue
|
||||
boxes[:, [0, 2]] *= w
|
||||
boxes[:, [1, 3]] *= h
|
||||
im = cv2.imread(l["im_file"])
|
||||
sam_results = sam_model(im, bboxes=xywh2xyxy(boxes), verbose=False, save=False)
|
||||
l["segments"] = sam_results[0].masks.xyn
|
||||
|
||||
save_dir = Path(save_dir) if save_dir else Path(im_dir).parent / "labels-segment"
|
||||
save_dir.mkdir(parents=True, exist_ok=True)
|
||||
for l in dataset.labels:
|
||||
texts = []
|
||||
lb_name = Path(l["im_file"]).with_suffix(".txt").name
|
||||
txt_file = save_dir / lb_name
|
||||
cls = l["cls"]
|
||||
for i, s in enumerate(l["segments"]):
|
||||
line = (int(cls[i]), *s.reshape(-1))
|
||||
texts.append(("%g " * len(line)).rstrip() % line)
|
||||
if texts:
|
||||
with open(txt_file, "a") as f:
|
||||
f.writelines(text + "\n" for text in texts)
|
||||
LOGGER.info(f"Generated segment labels saved in {save_dir}")
|
@ -1,383 +0,0 @@
|
||||
# Ultralytics YOLO 🚀, AGPL-3.0 license
|
||||
import contextlib
|
||||
from itertools import repeat
|
||||
from multiprocessing.pool import ThreadPool
|
||||
from pathlib import Path
|
||||
|
||||
import cv2
|
||||
import numpy as np
|
||||
import torch
|
||||
import torchvision
|
||||
from PIL import Image
|
||||
|
||||
from ultralytics.utils import LOCAL_RANK, NUM_THREADS, TQDM, colorstr, is_dir_writeable
|
||||
from ultralytics.utils.ops import resample_segments
|
||||
from .augment import Compose, Format, Instances, LetterBox, classify_augmentations, classify_transforms, v8_transforms
|
||||
from .base import BaseDataset
|
||||
from .utils import HELP_URL, LOGGER, get_hash, img2label_paths, verify_image, verify_image_label
|
||||
|
||||
# Ultralytics dataset *.cache version, >= 1.0.0 for YOLOv8
|
||||
DATASET_CACHE_VERSION = "1.0.3"
|
||||
|
||||
|
||||
class YOLODataset(BaseDataset):
|
||||
"""
|
||||
Dataset class for loading object detection and/or segmentation labels in YOLO format.
|
||||
|
||||
Args:
|
||||
data (dict, optional): A dataset YAML dictionary. Defaults to None.
|
||||
task (str): An explicit arg to point current task, Defaults to 'detect'.
|
||||
|
||||
Returns:
|
||||
(torch.utils.data.Dataset): A PyTorch dataset object that can be used for training an object detection model.
|
||||
"""
|
||||
|
||||
def __init__(self, *args, data=None, task="detect", **kwargs):
|
||||
"""Initializes the YOLODataset with optional configurations for segments and keypoints."""
|
||||
self.use_segments = task == "segment"
|
||||
self.use_keypoints = task == "pose"
|
||||
self.use_obb = task == "obb"
|
||||
self.data = data
|
||||
assert not (self.use_segments and self.use_keypoints), "Can not use both segments and keypoints."
|
||||
super().__init__(*args, **kwargs)
|
||||
|
||||
def cache_labels(self, path=Path("./labels.cache")):
|
||||
"""
|
||||
Cache dataset labels, check images and read shapes.
|
||||
|
||||
Args:
|
||||
path (Path): Path where to save the cache file. Default is Path('./labels.cache').
|
||||
|
||||
Returns:
|
||||
(dict): labels.
|
||||
"""
|
||||
x = {"labels": []}
|
||||
nm, nf, ne, nc, msgs = 0, 0, 0, 0, [] # number missing, found, empty, corrupt, messages
|
||||
desc = f"{self.prefix}Scanning {path.parent / path.stem}..."
|
||||
total = len(self.im_files)
|
||||
nkpt, ndim = self.data.get("kpt_shape", (0, 0))
|
||||
if self.use_keypoints and (nkpt <= 0 or ndim not in (2, 3)):
|
||||
raise ValueError(
|
||||
"'kpt_shape' in data.yaml missing or incorrect. Should be a list with [number of "
|
||||
"keypoints, number of dims (2 for x,y or 3 for x,y,visible)], i.e. 'kpt_shape: [17, 3]'"
|
||||
)
|
||||
with ThreadPool(NUM_THREADS) as pool:
|
||||
results = pool.imap(
|
||||
func=verify_image_label,
|
||||
iterable=zip(
|
||||
self.im_files,
|
||||
self.label_files,
|
||||
repeat(self.prefix),
|
||||
repeat(self.use_keypoints),
|
||||
repeat(len(self.data["names"])),
|
||||
repeat(nkpt),
|
||||
repeat(ndim),
|
||||
),
|
||||
)
|
||||
pbar = TQDM(results, desc=desc, total=total)
|
||||
for im_file, lb, shape, segments, keypoint, nm_f, nf_f, ne_f, nc_f, msg in pbar:
|
||||
nm += nm_f
|
||||
nf += nf_f
|
||||
ne += ne_f
|
||||
nc += nc_f
|
||||
if im_file:
|
||||
x["labels"].append(
|
||||
dict(
|
||||
im_file=im_file,
|
||||
shape=shape,
|
||||
cls=lb[:, 0:1], # n, 1
|
||||
bboxes=lb[:, 1:], # n, 4
|
||||
segments=segments,
|
||||
keypoints=keypoint,
|
||||
normalized=True,
|
||||
bbox_format="xywh",
|
||||
)
|
||||
)
|
||||
if msg:
|
||||
msgs.append(msg)
|
||||
pbar.desc = f"{desc} {nf} images, {nm + ne} backgrounds, {nc} corrupt"
|
||||
pbar.close()
|
||||
|
||||
if msgs:
|
||||
LOGGER.info("\n".join(msgs))
|
||||
if nf == 0:
|
||||
LOGGER.warning(f"{self.prefix}WARNING ⚠️ No labels found in {path}. {HELP_URL}")
|
||||
x["hash"] = get_hash(self.label_files + self.im_files)
|
||||
x["results"] = nf, nm, ne, nc, len(self.im_files)
|
||||
x["msgs"] = msgs # warnings
|
||||
save_dataset_cache_file(self.prefix, path, x)
|
||||
return x
|
||||
|
||||
def get_labels(self):
|
||||
"""Returns dictionary of labels for YOLO training."""
|
||||
self.label_files = img2label_paths(self.im_files)
|
||||
cache_path = Path(self.label_files[0]).parent.with_suffix(".cache")
|
||||
try:
|
||||
cache, exists = load_dataset_cache_file(cache_path), True # attempt to load a *.cache file
|
||||
assert cache["version"] == DATASET_CACHE_VERSION # matches current version
|
||||
assert cache["hash"] == get_hash(self.label_files + self.im_files) # identical hash
|
||||
except (FileNotFoundError, AssertionError, AttributeError):
|
||||
cache, exists = self.cache_labels(cache_path), False # run cache ops
|
||||
|
||||
# Display cache
|
||||
nf, nm, ne, nc, n = cache.pop("results") # found, missing, empty, corrupt, total
|
||||
if exists and LOCAL_RANK in (-1, 0):
|
||||
d = f"Scanning {cache_path}... {nf} images, {nm + ne} backgrounds, {nc} corrupt"
|
||||
TQDM(None, desc=self.prefix + d, total=n, initial=n) # display results
|
||||
if cache["msgs"]:
|
||||
LOGGER.info("\n".join(cache["msgs"])) # display warnings
|
||||
|
||||
# Read cache
|
||||
[cache.pop(k) for k in ("hash", "version", "msgs")] # remove items
|
||||
labels = cache["labels"]
|
||||
if not labels:
|
||||
LOGGER.warning(f"WARNING ⚠️ No images found in {cache_path}, training may not work correctly. {HELP_URL}")
|
||||
self.im_files = [lb["im_file"] for lb in labels] # update im_files
|
||||
|
||||
# Check if the dataset is all boxes or all segments
|
||||
lengths = ((len(lb["cls"]), len(lb["bboxes"]), len(lb["segments"])) for lb in labels)
|
||||
len_cls, len_boxes, len_segments = (sum(x) for x in zip(*lengths))
|
||||
if len_segments and len_boxes != len_segments:
|
||||
LOGGER.warning(
|
||||
f"WARNING ⚠️ Box and segment counts should be equal, but got len(segments) = {len_segments}, "
|
||||
f"len(boxes) = {len_boxes}. To resolve this only boxes will be used and all segments will be removed. "
|
||||
"To avoid this please supply either a detect or segment dataset, not a detect-segment mixed dataset."
|
||||
)
|
||||
for lb in labels:
|
||||
lb["segments"] = []
|
||||
if len_cls == 0:
|
||||
LOGGER.warning(f"WARNING ⚠️ No labels found in {cache_path}, training may not work correctly. {HELP_URL}")
|
||||
return labels
|
||||
|
||||
def build_transforms(self, hyp=None):
|
||||
"""Builds and appends transforms to the list."""
|
||||
if self.augment:
|
||||
hyp.mosaic = hyp.mosaic if self.augment and not self.rect else 0.0
|
||||
hyp.mixup = hyp.mixup if self.augment and not self.rect else 0.0
|
||||
transforms = v8_transforms(self, self.imgsz, hyp)
|
||||
else:
|
||||
transforms = Compose([LetterBox(new_shape=(self.imgsz, self.imgsz), scaleup=False)])
|
||||
transforms.append(
|
||||
Format(
|
||||
bbox_format="xywh",
|
||||
normalize=True,
|
||||
return_mask=self.use_segments,
|
||||
return_keypoint=self.use_keypoints,
|
||||
return_obb=self.use_obb,
|
||||
batch_idx=True,
|
||||
mask_ratio=hyp.mask_ratio,
|
||||
mask_overlap=hyp.overlap_mask,
|
||||
bgr=hyp.bgr if self.augment else 0.0, # only affect training.
|
||||
)
|
||||
)
|
||||
return transforms
|
||||
|
||||
def close_mosaic(self, hyp):
|
||||
"""Sets mosaic, copy_paste and mixup options to 0.0 and builds transformations."""
|
||||
hyp.mosaic = 0.0 # set mosaic ratio=0.0
|
||||
hyp.copy_paste = 0.0 # keep the same behavior as previous v8 close-mosaic
|
||||
hyp.mixup = 0.0 # keep the same behavior as previous v8 close-mosaic
|
||||
self.transforms = self.build_transforms(hyp)
|
||||
|
||||
def update_labels_info(self, label):
|
||||
"""
|
||||
Custom your label format here.
|
||||
|
||||
Note:
|
||||
cls is not with bboxes now, classification and semantic segmentation need an independent cls label
|
||||
Can also support classification and semantic segmentation by adding or removing dict keys there.
|
||||
"""
|
||||
bboxes = label.pop("bboxes")
|
||||
segments = label.pop("segments", [])
|
||||
keypoints = label.pop("keypoints", None)
|
||||
bbox_format = label.pop("bbox_format")
|
||||
normalized = label.pop("normalized")
|
||||
|
||||
# NOTE: do NOT resample oriented boxes
|
||||
segment_resamples = 100 if self.use_obb else 1000
|
||||
if len(segments) > 0:
|
||||
# list[np.array(1000, 2)] * num_samples
|
||||
# (N, 1000, 2)
|
||||
segments = np.stack(resample_segments(segments, n=segment_resamples), axis=0)
|
||||
else:
|
||||
segments = np.zeros((0, segment_resamples, 2), dtype=np.float32)
|
||||
label["instances"] = Instances(bboxes, segments, keypoints, bbox_format=bbox_format, normalized=normalized)
|
||||
return label
|
||||
|
||||
@staticmethod
|
||||
def collate_fn(batch):
|
||||
"""Collates data samples into batches."""
|
||||
new_batch = {}
|
||||
keys = batch[0].keys()
|
||||
values = list(zip(*[list(b.values()) for b in batch]))
|
||||
for i, k in enumerate(keys):
|
||||
value = values[i]
|
||||
if k == "img":
|
||||
value = torch.stack(value, 0)
|
||||
if k in ["masks", "keypoints", "bboxes", "cls", "segments", "obb"]:
|
||||
value = torch.cat(value, 0)
|
||||
new_batch[k] = value
|
||||
new_batch["batch_idx"] = list(new_batch["batch_idx"])
|
||||
for i in range(len(new_batch["batch_idx"])):
|
||||
new_batch["batch_idx"][i] += i # add target image index for build_targets()
|
||||
new_batch["batch_idx"] = torch.cat(new_batch["batch_idx"], 0)
|
||||
return new_batch
|
||||
|
||||
|
||||
# Classification dataloaders -------------------------------------------------------------------------------------------
|
||||
class ClassificationDataset(torchvision.datasets.ImageFolder):
|
||||
"""
|
||||
Extends torchvision ImageFolder to support YOLO classification tasks, offering functionalities like image
|
||||
augmentation, caching, and verification. It's designed to efficiently handle large datasets for training deep
|
||||
learning models, with optional image transformations and caching mechanisms to speed up training.
|
||||
|
||||
This class allows for augmentations using both torchvision and Albumentations libraries, and supports caching images
|
||||
in RAM or on disk to reduce IO overhead during training. Additionally, it implements a robust verification process
|
||||
to ensure data integrity and consistency.
|
||||
|
||||
Attributes:
|
||||
cache_ram (bool): Indicates if caching in RAM is enabled.
|
||||
cache_disk (bool): Indicates if caching on disk is enabled.
|
||||
samples (list): A list of tuples, each containing the path to an image, its class index, path to its .npy cache
|
||||
file (if caching on disk), and optionally the loaded image array (if caching in RAM).
|
||||
torch_transforms (callable): PyTorch transforms to be applied to the images.
|
||||
"""
|
||||
|
||||
def __init__(self, root, args, augment=False, prefix=""):
|
||||
"""
|
||||
Initialize YOLO object with root, image size, augmentations, and cache settings.
|
||||
|
||||
Args:
|
||||
root (str): Path to the dataset directory where images are stored in a class-specific folder structure.
|
||||
args (Namespace): Configuration containing dataset-related settings such as image size, augmentation
|
||||
parameters, and cache settings. It includes attributes like `imgsz` (image size), `fraction` (fraction
|
||||
of data to use), `scale`, `fliplr`, `flipud`, `cache` (disk or RAM caching for faster training),
|
||||
`auto_augment`, `hsv_h`, `hsv_s`, `hsv_v`, and `crop_fraction`.
|
||||
augment (bool, optional): Whether to apply augmentations to the dataset. Default is False.
|
||||
prefix (str, optional): Prefix for logging and cache filenames, aiding in dataset identification and
|
||||
debugging. Default is an empty string.
|
||||
"""
|
||||
super().__init__(root=root)
|
||||
if augment and args.fraction < 1.0: # reduce training fraction
|
||||
self.samples = self.samples[: round(len(self.samples) * args.fraction)]
|
||||
self.prefix = colorstr(f"{prefix}: ") if prefix else ""
|
||||
self.cache_ram = args.cache is True or args.cache == "ram" # cache images into RAM
|
||||
self.cache_disk = args.cache == "disk" # cache images on hard drive as uncompressed *.npy files
|
||||
self.samples = self.verify_images() # filter out bad images
|
||||
self.samples = [list(x) + [Path(x[0]).with_suffix(".npy"), None] for x in self.samples] # file, index, npy, im
|
||||
scale = (1.0 - args.scale, 1.0) # (0.08, 1.0)
|
||||
self.torch_transforms = (
|
||||
classify_augmentations(
|
||||
size=args.imgsz,
|
||||
scale=scale,
|
||||
hflip=args.fliplr,
|
||||
vflip=args.flipud,
|
||||
erasing=args.erasing,
|
||||
auto_augment=args.auto_augment,
|
||||
hsv_h=args.hsv_h,
|
||||
hsv_s=args.hsv_s,
|
||||
hsv_v=args.hsv_v,
|
||||
)
|
||||
if augment
|
||||
else classify_transforms(size=args.imgsz, crop_fraction=args.crop_fraction)
|
||||
)
|
||||
|
||||
def __getitem__(self, i):
|
||||
"""Returns subset of data and targets corresponding to given indices."""
|
||||
f, j, fn, im = self.samples[i] # filename, index, filename.with_suffix('.npy'), image
|
||||
if self.cache_ram and im is None:
|
||||
im = self.samples[i][3] = cv2.imread(f)
|
||||
elif self.cache_disk:
|
||||
if not fn.exists(): # load npy
|
||||
np.save(fn.as_posix(), cv2.imread(f), allow_pickle=False)
|
||||
im = np.load(fn)
|
||||
else: # read image
|
||||
im = cv2.imread(f) # BGR
|
||||
# Convert NumPy array to PIL image
|
||||
im = Image.fromarray(cv2.cvtColor(im, cv2.COLOR_BGR2RGB))
|
||||
sample = self.torch_transforms(im)
|
||||
return {"img": sample, "cls": j}
|
||||
|
||||
def __len__(self) -> int:
|
||||
"""Return the total number of samples in the dataset."""
|
||||
return len(self.samples)
|
||||
|
||||
def verify_images(self):
|
||||
"""Verify all images in dataset."""
|
||||
desc = f"{self.prefix}Scanning {self.root}..."
|
||||
path = Path(self.root).with_suffix(".cache") # *.cache file path
|
||||
|
||||
with contextlib.suppress(FileNotFoundError, AssertionError, AttributeError):
|
||||
cache = load_dataset_cache_file(path) # attempt to load a *.cache file
|
||||
assert cache["version"] == DATASET_CACHE_VERSION # matches current version
|
||||
assert cache["hash"] == get_hash([x[0] for x in self.samples]) # identical hash
|
||||
nf, nc, n, samples = cache.pop("results") # found, missing, empty, corrupt, total
|
||||
if LOCAL_RANK in (-1, 0):
|
||||
d = f"{desc} {nf} images, {nc} corrupt"
|
||||
TQDM(None, desc=d, total=n, initial=n)
|
||||
if cache["msgs"]:
|
||||
LOGGER.info("\n".join(cache["msgs"])) # display warnings
|
||||
return samples
|
||||
|
||||
# Run scan if *.cache retrieval failed
|
||||
nf, nc, msgs, samples, x = 0, 0, [], [], {}
|
||||
with ThreadPool(NUM_THREADS) as pool:
|
||||
results = pool.imap(func=verify_image, iterable=zip(self.samples, repeat(self.prefix)))
|
||||
pbar = TQDM(results, desc=desc, total=len(self.samples))
|
||||
for sample, nf_f, nc_f, msg in pbar:
|
||||
if nf_f:
|
||||
samples.append(sample)
|
||||
if msg:
|
||||
msgs.append(msg)
|
||||
nf += nf_f
|
||||
nc += nc_f
|
||||
pbar.desc = f"{desc} {nf} images, {nc} corrupt"
|
||||
pbar.close()
|
||||
if msgs:
|
||||
LOGGER.info("\n".join(msgs))
|
||||
x["hash"] = get_hash([x[0] for x in self.samples])
|
||||
x["results"] = nf, nc, len(samples), samples
|
||||
x["msgs"] = msgs # warnings
|
||||
save_dataset_cache_file(self.prefix, path, x)
|
||||
return samples
|
||||
|
||||
|
||||
def load_dataset_cache_file(path):
|
||||
"""Load an Ultralytics *.cache dictionary from path."""
|
||||
import gc
|
||||
|
||||
gc.disable() # reduce pickle load time https://github.com/ultralytics/ultralytics/pull/1585
|
||||
cache = np.load(str(path), allow_pickle=True).item() # load dict
|
||||
gc.enable()
|
||||
return cache
|
||||
|
||||
|
||||
def save_dataset_cache_file(prefix, path, x):
|
||||
"""Save an Ultralytics dataset *.cache dictionary x to path."""
|
||||
x["version"] = DATASET_CACHE_VERSION # add cache version
|
||||
if is_dir_writeable(path.parent):
|
||||
if path.exists():
|
||||
path.unlink() # remove *.cache file if exists
|
||||
np.save(str(path), x) # save cache for next time
|
||||
path.with_suffix(".cache.npy").rename(path) # remove .npy suffix
|
||||
LOGGER.info(f"{prefix}New cache created: {path}")
|
||||
else:
|
||||
LOGGER.warning(f"{prefix}WARNING ⚠️ Cache directory {path.parent} is not writeable, cache not saved.")
|
||||
|
||||
|
||||
# TODO: support semantic segmentation
|
||||
class SemanticDataset(BaseDataset):
|
||||
"""
|
||||
Semantic Segmentation Dataset.
|
||||
|
||||
This class is responsible for handling datasets used for semantic segmentation tasks. It inherits functionalities
|
||||
from the BaseDataset class.
|
||||
|
||||
Note:
|
||||
This class is currently a placeholder and needs to be populated with methods and attributes for supporting
|
||||
semantic segmentation tasks.
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
"""Initialize a SemanticDataset object."""
|
||||
super().__init__()
|
@ -1,5 +0,0 @@
|
||||
# Ultralytics YOLO 🚀, AGPL-3.0 license
|
||||
|
||||
from .utils import plot_query_result
|
||||
|
||||
__all__ = ["plot_query_result"]
|
@ -1,472 +0,0 @@
|
||||
# Ultralytics YOLO 🚀, AGPL-3.0 license
|
||||
|
||||
from io import BytesIO
|
||||
from pathlib import Path
|
||||
from typing import Any, List, Tuple, Union
|
||||
|
||||
import cv2
|
||||
import numpy as np
|
||||
import torch
|
||||
from PIL import Image
|
||||
from matplotlib import pyplot as plt
|
||||
from pandas import DataFrame
|
||||
from tqdm import tqdm
|
||||
|
||||
from ultralytics.data.augment import Format
|
||||
from ultralytics.data.dataset import YOLODataset
|
||||
from ultralytics.data.utils import check_det_dataset
|
||||
from ultralytics.models.yolo.model import YOLO
|
||||
from ultralytics.utils import LOGGER, IterableSimpleNamespace, checks, USER_CONFIG_DIR
|
||||
from .utils import get_sim_index_schema, get_table_schema, plot_query_result, prompt_sql_query, sanitize_batch
|
||||
|
||||
|
||||
class ExplorerDataset(YOLODataset):
|
||||
def __init__(self, *args, data: dict = None, **kwargs) -> None:
|
||||
super().__init__(*args, data=data, **kwargs)
|
||||
|
||||
def load_image(self, i: int) -> Union[Tuple[np.ndarray, Tuple[int, int], Tuple[int, int]], Tuple[None, None, None]]:
|
||||
"""Loads 1 image from dataset index 'i' without any resize ops."""
|
||||
im, f, fn = self.ims[i], self.im_files[i], self.npy_files[i]
|
||||
if im is None: # not cached in RAM
|
||||
if fn.exists(): # load npy
|
||||
im = np.load(fn)
|
||||
else: # read image
|
||||
im = cv2.imread(f) # BGR
|
||||
if im is None:
|
||||
raise FileNotFoundError(f"Image Not Found {f}")
|
||||
h0, w0 = im.shape[:2] # orig hw
|
||||
return im, (h0, w0), im.shape[:2]
|
||||
|
||||
return self.ims[i], self.im_hw0[i], self.im_hw[i]
|
||||
|
||||
def build_transforms(self, hyp: IterableSimpleNamespace = None):
|
||||
"""Creates transforms for dataset images without resizing."""
|
||||
return Format(
|
||||
bbox_format="xyxy",
|
||||
normalize=False,
|
||||
return_mask=self.use_segments,
|
||||
return_keypoint=self.use_keypoints,
|
||||
batch_idx=True,
|
||||
mask_ratio=hyp.mask_ratio,
|
||||
mask_overlap=hyp.overlap_mask,
|
||||
)
|
||||
|
||||
|
||||
class Explorer:
|
||||
def __init__(
|
||||
self,
|
||||
data: Union[str, Path] = "coco128.yaml",
|
||||
model: str = "yolov8n.pt",
|
||||
uri: str = USER_CONFIG_DIR / "explorer",
|
||||
) -> None:
|
||||
# Note duckdb==0.10.0 bug https://github.com/ultralytics/ultralytics/pull/8181
|
||||
checks.check_requirements(["lancedb>=0.4.3", "duckdb<=0.9.2"])
|
||||
import lancedb
|
||||
|
||||
self.connection = lancedb.connect(uri)
|
||||
self.table_name = Path(data).name.lower() + "_" + model.lower()
|
||||
self.sim_idx_base_name = (
|
||||
f"{self.table_name}_sim_idx".lower()
|
||||
) # Use this name and append thres and top_k to reuse the table
|
||||
self.model = YOLO(model)
|
||||
self.data = data # None
|
||||
self.choice_set = None
|
||||
|
||||
self.table = None
|
||||
self.progress = 0
|
||||
|
||||
def create_embeddings_table(self, force: bool = False, split: str = "train") -> None:
|
||||
"""
|
||||
Create LanceDB table containing the embeddings of the images in the dataset. The table will be reused if it
|
||||
already exists. Pass force=True to overwrite the existing table.
|
||||
|
||||
Args:
|
||||
force (bool): Whether to overwrite the existing table or not. Defaults to False.
|
||||
split (str): Split of the dataset to use. Defaults to 'train'.
|
||||
|
||||
Example:
|
||||
```python
|
||||
exp = Explorer()
|
||||
exp.create_embeddings_table()
|
||||
```
|
||||
"""
|
||||
if self.table is not None and not force:
|
||||
LOGGER.info("Table already exists. Reusing it. Pass force=True to overwrite it.")
|
||||
return
|
||||
if self.table_name in self.connection.table_names() and not force:
|
||||
LOGGER.info(f"Table {self.table_name} already exists. Reusing it. Pass force=True to overwrite it.")
|
||||
self.table = self.connection.open_table(self.table_name)
|
||||
self.progress = 1
|
||||
return
|
||||
if self.data is None:
|
||||
raise ValueError("Data must be provided to create embeddings table")
|
||||
|
||||
data_info = check_det_dataset(self.data)
|
||||
if split not in data_info:
|
||||
raise ValueError(
|
||||
f"Split {split} is not found in the dataset. Available keys in the dataset are {list(data_info.keys())}"
|
||||
)
|
||||
|
||||
choice_set = data_info[split]
|
||||
choice_set = choice_set if isinstance(choice_set, list) else [choice_set]
|
||||
self.choice_set = choice_set
|
||||
dataset = ExplorerDataset(img_path=choice_set, data=data_info, augment=False, cache=False, task=self.model.task)
|
||||
|
||||
# Create the table schema
|
||||
batch = dataset[0]
|
||||
vector_size = self.model.embed(batch["im_file"], verbose=False)[0].shape[0]
|
||||
table = self.connection.create_table(self.table_name, schema=get_table_schema(vector_size), mode="overwrite")
|
||||
table.add(
|
||||
self._yield_batches(
|
||||
dataset,
|
||||
data_info,
|
||||
self.model,
|
||||
exclude_keys=["img", "ratio_pad", "resized_shape", "ori_shape", "batch_idx"],
|
||||
)
|
||||
)
|
||||
|
||||
self.table = table
|
||||
|
||||
def _yield_batches(self, dataset: ExplorerDataset, data_info: dict, model: YOLO, exclude_keys: List[str]):
|
||||
"""Generates batches of data for embedding, excluding specified keys."""
|
||||
for i in tqdm(range(len(dataset))):
|
||||
self.progress = float(i + 1) / len(dataset)
|
||||
batch = dataset[i]
|
||||
for k in exclude_keys:
|
||||
batch.pop(k, None)
|
||||
batch = sanitize_batch(batch, data_info)
|
||||
batch["vector"] = model.embed(batch["im_file"], verbose=False)[0].detach().tolist()
|
||||
yield [batch]
|
||||
|
||||
def query(
|
||||
self, imgs: Union[str, np.ndarray, List[str], List[np.ndarray]] = None, limit: int = 25
|
||||
) -> Any: # pyarrow.Table
|
||||
"""
|
||||
Query the table for similar images. Accepts a single image or a list of images.
|
||||
|
||||
Args:
|
||||
imgs (str or list): Path to the image or a list of paths to the images.
|
||||
limit (int): Number of results to return.
|
||||
|
||||
Returns:
|
||||
(pyarrow.Table): An arrow table containing the results. Supports converting to:
|
||||
- pandas dataframe: `result.to_pandas()`
|
||||
- dict of lists: `result.to_pydict()`
|
||||
|
||||
Example:
|
||||
```python
|
||||
exp = Explorer()
|
||||
exp.create_embeddings_table()
|
||||
similar = exp.query(img='https://ultralytics.com/images/zidane.jpg')
|
||||
```
|
||||
"""
|
||||
if self.table is None:
|
||||
raise ValueError("Table is not created. Please create the table first.")
|
||||
if isinstance(imgs, str):
|
||||
imgs = [imgs]
|
||||
assert isinstance(imgs, list), f"img must be a string or a list of strings. Got {type(imgs)}"
|
||||
embeds = self.model.embed(imgs)
|
||||
# Get avg if multiple images are passed (len > 1)
|
||||
embeds = torch.mean(torch.stack(embeds), 0).cpu().numpy() if len(embeds) > 1 else embeds[0].cpu().numpy()
|
||||
return self.table.search(embeds).limit(limit).to_arrow()
|
||||
|
||||
def sql_query(
|
||||
self, query: str, return_type: str = "pandas"
|
||||
) -> Union[DataFrame, Any, None]: # pandas.dataframe or pyarrow.Table
|
||||
"""
|
||||
Run a SQL-Like query on the table. Utilizes LanceDB predicate pushdown.
|
||||
|
||||
Args:
|
||||
query (str): SQL query to run.
|
||||
return_type (str): Type of the result to return. Can be either 'pandas' or 'arrow'. Defaults to 'pandas'.
|
||||
|
||||
Returns:
|
||||
(pyarrow.Table): An arrow table containing the results.
|
||||
|
||||
Example:
|
||||
```python
|
||||
exp = Explorer()
|
||||
exp.create_embeddings_table()
|
||||
query = "SELECT * FROM 'table' WHERE labels LIKE '%person%'"
|
||||
result = exp.sql_query(query)
|
||||
```
|
||||
"""
|
||||
assert return_type in {
|
||||
"pandas",
|
||||
"arrow",
|
||||
}, f"Return type should be either `pandas` or `arrow`, but got {return_type}"
|
||||
import duckdb
|
||||
|
||||
if self.table is None:
|
||||
raise ValueError("Table is not created. Please create the table first.")
|
||||
|
||||
# Note: using filter pushdown would be a better long term solution. Temporarily using duckdb for this.
|
||||
table = self.table.to_arrow() # noqa NOTE: Don't comment this. This line is used by DuckDB
|
||||
if not query.startswith("SELECT") and not query.startswith("WHERE"):
|
||||
raise ValueError(
|
||||
f"Query must start with SELECT or WHERE. You can either pass the entire query or just the WHERE clause. found {query}"
|
||||
)
|
||||
if query.startswith("WHERE"):
|
||||
query = f"SELECT * FROM 'table' {query}"
|
||||
LOGGER.info(f"Running query: {query}")
|
||||
|
||||
rs = duckdb.sql(query)
|
||||
if return_type == "arrow":
|
||||
return rs.arrow()
|
||||
elif return_type == "pandas":
|
||||
return rs.df()
|
||||
|
||||
def plot_sql_query(self, query: str, labels: bool = True) -> Image.Image:
|
||||
"""
|
||||
Plot the results of a SQL-Like query on the table.
|
||||
Args:
|
||||
query (str): SQL query to run.
|
||||
labels (bool): Whether to plot the labels or not.
|
||||
|
||||
Returns:
|
||||
(PIL.Image): Image containing the plot.
|
||||
|
||||
Example:
|
||||
```python
|
||||
exp = Explorer()
|
||||
exp.create_embeddings_table()
|
||||
query = "SELECT * FROM 'table' WHERE labels LIKE '%person%'"
|
||||
result = exp.plot_sql_query(query)
|
||||
```
|
||||
"""
|
||||
result = self.sql_query(query, return_type="arrow")
|
||||
if len(result) == 0:
|
||||
LOGGER.info("No results found.")
|
||||
return None
|
||||
img = plot_query_result(result, plot_labels=labels)
|
||||
return Image.fromarray(img)
|
||||
|
||||
def get_similar(
|
||||
self,
|
||||
img: Union[str, np.ndarray, List[str], List[np.ndarray]] = None,
|
||||
idx: Union[int, List[int]] = None,
|
||||
limit: int = 25,
|
||||
return_type: str = "pandas",
|
||||
) -> Union[DataFrame, Any]: # pandas.dataframe or pyarrow.Table
|
||||
"""
|
||||
Query the table for similar images. Accepts a single image or a list of images.
|
||||
|
||||
Args:
|
||||
img (str or list): Path to the image or a list of paths to the images.
|
||||
idx (int or list): Index of the image in the table or a list of indexes.
|
||||
limit (int): Number of results to return. Defaults to 25.
|
||||
return_type (str): Type of the result to return. Can be either 'pandas' or 'arrow'. Defaults to 'pandas'.
|
||||
|
||||
Returns:
|
||||
(pandas.DataFrame): A dataframe containing the results.
|
||||
|
||||
Example:
|
||||
```python
|
||||
exp = Explorer()
|
||||
exp.create_embeddings_table()
|
||||
similar = exp.get_similar(img='https://ultralytics.com/images/zidane.jpg')
|
||||
```
|
||||
"""
|
||||
assert return_type in {
|
||||
"pandas",
|
||||
"arrow",
|
||||
}, f"Return type should be either `pandas` or `arrow`, but got {return_type}"
|
||||
img = self._check_imgs_or_idxs(img, idx)
|
||||
similar = self.query(img, limit=limit)
|
||||
|
||||
if return_type == "arrow":
|
||||
return similar
|
||||
elif return_type == "pandas":
|
||||
return similar.to_pandas()
|
||||
|
||||
def plot_similar(
|
||||
self,
|
||||
img: Union[str, np.ndarray, List[str], List[np.ndarray]] = None,
|
||||
idx: Union[int, List[int]] = None,
|
||||
limit: int = 25,
|
||||
labels: bool = True,
|
||||
) -> Image.Image:
|
||||
"""
|
||||
Plot the similar images. Accepts images or indexes.
|
||||
|
||||
Args:
|
||||
img (str or list): Path to the image or a list of paths to the images.
|
||||
idx (int or list): Index of the image in the table or a list of indexes.
|
||||
labels (bool): Whether to plot the labels or not.
|
||||
limit (int): Number of results to return. Defaults to 25.
|
||||
|
||||
Returns:
|
||||
(PIL.Image): Image containing the plot.
|
||||
|
||||
Example:
|
||||
```python
|
||||
exp = Explorer()
|
||||
exp.create_embeddings_table()
|
||||
similar = exp.plot_similar(img='https://ultralytics.com/images/zidane.jpg')
|
||||
```
|
||||
"""
|
||||
similar = self.get_similar(img, idx, limit, return_type="arrow")
|
||||
if len(similar) == 0:
|
||||
LOGGER.info("No results found.")
|
||||
return None
|
||||
img = plot_query_result(similar, plot_labels=labels)
|
||||
return Image.fromarray(img)
|
||||
|
||||
def similarity_index(self, max_dist: float = 0.2, top_k: float = None, force: bool = False) -> DataFrame:
|
||||
"""
|
||||
Calculate the similarity index of all the images in the table. Here, the index will contain the data points that
|
||||
are max_dist or closer to the image in the embedding space at a given index.
|
||||
|
||||
Args:
|
||||
max_dist (float): maximum L2 distance between the embeddings to consider. Defaults to 0.2.
|
||||
top_k (float): Percentage of the closest data points to consider when counting. Used to apply limit when running
|
||||
vector search. Defaults: None.
|
||||
force (bool): Whether to overwrite the existing similarity index or not. Defaults to True.
|
||||
|
||||
Returns:
|
||||
(pandas.DataFrame): A dataframe containing the similarity index. Each row corresponds to an image, and columns
|
||||
include indices of similar images and their respective distances.
|
||||
|
||||
Example:
|
||||
```python
|
||||
exp = Explorer()
|
||||
exp.create_embeddings_table()
|
||||
sim_idx = exp.similarity_index()
|
||||
```
|
||||
"""
|
||||
if self.table is None:
|
||||
raise ValueError("Table is not created. Please create the table first.")
|
||||
sim_idx_table_name = f"{self.sim_idx_base_name}_thres_{max_dist}_top_{top_k}".lower()
|
||||
if sim_idx_table_name in self.connection.table_names() and not force:
|
||||
LOGGER.info("Similarity matrix already exists. Reusing it. Pass force=True to overwrite it.")
|
||||
return self.connection.open_table(sim_idx_table_name).to_pandas()
|
||||
|
||||
if top_k and not (1.0 >= top_k >= 0.0):
|
||||
raise ValueError(f"top_k must be between 0.0 and 1.0. Got {top_k}")
|
||||
if max_dist < 0.0:
|
||||
raise ValueError(f"max_dist must be greater than 0. Got {max_dist}")
|
||||
|
||||
top_k = int(top_k * len(self.table)) if top_k else len(self.table)
|
||||
top_k = max(top_k, 1)
|
||||
features = self.table.to_lance().to_table(columns=["vector", "im_file"]).to_pydict()
|
||||
im_files = features["im_file"]
|
||||
embeddings = features["vector"]
|
||||
|
||||
sim_table = self.connection.create_table(sim_idx_table_name, schema=get_sim_index_schema(), mode="overwrite")
|
||||
|
||||
def _yield_sim_idx():
|
||||
"""Generates a dataframe with similarity indices and distances for images."""
|
||||
for i in tqdm(range(len(embeddings))):
|
||||
sim_idx = self.table.search(embeddings[i]).limit(top_k).to_pandas().query(f"_distance <= {max_dist}")
|
||||
yield [
|
||||
{
|
||||
"idx": i,
|
||||
"im_file": im_files[i],
|
||||
"count": len(sim_idx),
|
||||
"sim_im_files": sim_idx["im_file"].tolist(),
|
||||
}
|
||||
]
|
||||
|
||||
sim_table.add(_yield_sim_idx())
|
||||
self.sim_index = sim_table
|
||||
return sim_table.to_pandas()
|
||||
|
||||
def plot_similarity_index(self, max_dist: float = 0.2, top_k: float = None, force: bool = False) -> Image:
|
||||
"""
|
||||
Plot the similarity index of all the images in the table. Here, the index will contain the data points that are
|
||||
max_dist or closer to the image in the embedding space at a given index.
|
||||
|
||||
Args:
|
||||
max_dist (float): maximum L2 distance between the embeddings to consider. Defaults to 0.2.
|
||||
top_k (float): Percentage of closest data points to consider when counting. Used to apply limit when
|
||||
running vector search. Defaults to 0.01.
|
||||
force (bool): Whether to overwrite the existing similarity index or not. Defaults to True.
|
||||
|
||||
Returns:
|
||||
(PIL.Image): Image containing the plot.
|
||||
|
||||
Example:
|
||||
```python
|
||||
exp = Explorer()
|
||||
exp.create_embeddings_table()
|
||||
|
||||
similarity_idx_plot = exp.plot_similarity_index()
|
||||
similarity_idx_plot.show() # view image preview
|
||||
similarity_idx_plot.save('path/to/save/similarity_index_plot.png') # save contents to file
|
||||
```
|
||||
"""
|
||||
sim_idx = self.similarity_index(max_dist=max_dist, top_k=top_k, force=force)
|
||||
sim_count = sim_idx["count"].tolist()
|
||||
sim_count = np.array(sim_count)
|
||||
|
||||
indices = np.arange(len(sim_count))
|
||||
|
||||
# Create the bar plot
|
||||
plt.bar(indices, sim_count)
|
||||
|
||||
# Customize the plot (optional)
|
||||
plt.xlabel("data idx")
|
||||
plt.ylabel("Count")
|
||||
plt.title("Similarity Count")
|
||||
buffer = BytesIO()
|
||||
plt.savefig(buffer, format="png")
|
||||
buffer.seek(0)
|
||||
|
||||
# Use Pillow to open the image from the buffer
|
||||
return Image.fromarray(np.array(Image.open(buffer)))
|
||||
|
||||
def _check_imgs_or_idxs(
|
||||
self, img: Union[str, np.ndarray, List[str], List[np.ndarray], None], idx: Union[None, int, List[int]]
|
||||
) -> List[np.ndarray]:
|
||||
if img is None and idx is None:
|
||||
raise ValueError("Either img or idx must be provided.")
|
||||
if img is not None and idx is not None:
|
||||
raise ValueError("Only one of img or idx must be provided.")
|
||||
if idx is not None:
|
||||
idx = idx if isinstance(idx, list) else [idx]
|
||||
img = self.table.to_lance().take(idx, columns=["im_file"]).to_pydict()["im_file"]
|
||||
|
||||
return img if isinstance(img, list) else [img]
|
||||
|
||||
def ask_ai(self, query):
|
||||
"""
|
||||
Ask AI a question.
|
||||
|
||||
Args:
|
||||
query (str): Question to ask.
|
||||
|
||||
Returns:
|
||||
(pandas.DataFrame): A dataframe containing filtered results to the SQL query.
|
||||
|
||||
Example:
|
||||
```python
|
||||
exp = Explorer()
|
||||
exp.create_embeddings_table()
|
||||
answer = exp.ask_ai('Show images with 1 person and 2 dogs')
|
||||
```
|
||||
"""
|
||||
result = prompt_sql_query(query)
|
||||
try:
|
||||
df = self.sql_query(result)
|
||||
except Exception as e:
|
||||
LOGGER.error("AI generated query is not valid. Please try again with a different prompt")
|
||||
LOGGER.error(e)
|
||||
return None
|
||||
return df
|
||||
|
||||
def visualize(self, result):
|
||||
"""
|
||||
Visualize the results of a query. TODO.
|
||||
|
||||
Args:
|
||||
result (pyarrow.Table): Table containing the results of a query.
|
||||
"""
|
||||
pass
|
||||
|
||||
def generate_report(self, result):
|
||||
"""
|
||||
Generate a report of the dataset.
|
||||
|
||||
TODO
|
||||
"""
|
||||
pass
|
@ -1 +0,0 @@
|
||||
# Ultralytics YOLO 🚀, AGPL-3.0 license
|
@ -1,268 +0,0 @@
|
||||
# Ultralytics YOLO 🚀, AGPL-3.0 license
|
||||
|
||||
import time
|
||||
from threading import Thread
|
||||
|
||||
import pandas as pd
|
||||
|
||||
from ultralytics import Explorer
|
||||
from ultralytics.utils import ROOT, SETTINGS
|
||||
from ultralytics.utils.checks import check_requirements
|
||||
|
||||
check_requirements(("streamlit>=1.29.0", "streamlit-select>=0.3"))
|
||||
|
||||
import streamlit as st
|
||||
from streamlit_select import image_select
|
||||
|
||||
|
||||
def _get_explorer():
|
||||
"""Initializes and returns an instance of the Explorer class."""
|
||||
exp = Explorer(data=st.session_state.get("dataset"), model=st.session_state.get("model"))
|
||||
thread = Thread(
|
||||
target=exp.create_embeddings_table, kwargs={"force": st.session_state.get("force_recreate_embeddings")}
|
||||
)
|
||||
thread.start()
|
||||
progress_bar = st.progress(0, text="Creating embeddings table...")
|
||||
while exp.progress < 1:
|
||||
time.sleep(0.1)
|
||||
progress_bar.progress(exp.progress, text=f"Progress: {exp.progress * 100}%")
|
||||
thread.join()
|
||||
st.session_state["explorer"] = exp
|
||||
progress_bar.empty()
|
||||
|
||||
|
||||
def init_explorer_form():
|
||||
"""Initializes an Explorer instance and creates embeddings table with progress tracking."""
|
||||
datasets = ROOT / "cfg" / "datasets"
|
||||
ds = [d.name for d in datasets.glob("*.yaml")]
|
||||
models = [
|
||||
"yolov8n.pt",
|
||||
"yolov8s.pt",
|
||||
"yolov8m.pt",
|
||||
"yolov8l.pt",
|
||||
"yolov8x.pt",
|
||||
"yolov8n-seg.pt",
|
||||
"yolov8s-seg.pt",
|
||||
"yolov8m-seg.pt",
|
||||
"yolov8l-seg.pt",
|
||||
"yolov8x-seg.pt",
|
||||
"yolov8n-pose.pt",
|
||||
"yolov8s-pose.pt",
|
||||
"yolov8m-pose.pt",
|
||||
"yolov8l-pose.pt",
|
||||
"yolov8x-pose.pt",
|
||||
]
|
||||
with st.form(key="explorer_init_form"):
|
||||
col1, col2 = st.columns(2)
|
||||
with col1:
|
||||
st.selectbox("Select dataset", ds, key="dataset", index=ds.index("coco128.yaml"))
|
||||
with col2:
|
||||
st.selectbox("Select model", models, key="model")
|
||||
st.checkbox("Force recreate embeddings", key="force_recreate_embeddings")
|
||||
|
||||
st.form_submit_button("Explore", on_click=_get_explorer)
|
||||
|
||||
|
||||
def query_form():
|
||||
"""Sets up a form in Streamlit to initialize Explorer with dataset and model selection."""
|
||||
with st.form("query_form"):
|
||||
col1, col2 = st.columns([0.8, 0.2])
|
||||
with col1:
|
||||
st.text_input(
|
||||
"Query",
|
||||
"WHERE labels LIKE '%person%' AND labels LIKE '%dog%'",
|
||||
label_visibility="collapsed",
|
||||
key="query",
|
||||
)
|
||||
with col2:
|
||||
st.form_submit_button("Query", on_click=run_sql_query)
|
||||
|
||||
|
||||
def ai_query_form():
|
||||
"""Sets up a Streamlit form for user input to initialize Explorer with dataset and model selection."""
|
||||
with st.form("ai_query_form"):
|
||||
col1, col2 = st.columns([0.8, 0.2])
|
||||
with col1:
|
||||
st.text_input("Query", "Show images with 1 person and 1 dog", label_visibility="collapsed", key="ai_query")
|
||||
with col2:
|
||||
st.form_submit_button("Ask AI", on_click=run_ai_query)
|
||||
|
||||
|
||||
def find_similar_imgs(imgs):
|
||||
"""Initializes a Streamlit form for AI-based image querying with custom input."""
|
||||
exp = st.session_state["explorer"]
|
||||
similar = exp.get_similar(img=imgs, limit=st.session_state.get("limit"), return_type="arrow")
|
||||
paths = similar.to_pydict()["im_file"]
|
||||
st.session_state["imgs"] = paths
|
||||
st.session_state["res"] = similar
|
||||
|
||||
|
||||
def similarity_form(selected_imgs):
|
||||
"""Initializes a form for AI-based image querying with custom input in Streamlit."""
|
||||
st.write("Similarity Search")
|
||||
with st.form("similarity_form"):
|
||||
subcol1, subcol2 = st.columns([1, 1])
|
||||
with subcol1:
|
||||
st.number_input(
|
||||
"limit", min_value=None, max_value=None, value=25, label_visibility="collapsed", key="limit"
|
||||
)
|
||||
|
||||
with subcol2:
|
||||
disabled = not len(selected_imgs)
|
||||
st.write("Selected: ", len(selected_imgs))
|
||||
st.form_submit_button(
|
||||
"Search",
|
||||
disabled=disabled,
|
||||
on_click=find_similar_imgs,
|
||||
args=(selected_imgs,),
|
||||
)
|
||||
if disabled:
|
||||
st.error("Select at least one image to search.")
|
||||
|
||||
|
||||
# def persist_reset_form():
|
||||
# with st.form("persist_reset"):
|
||||
# col1, col2 = st.columns([1, 1])
|
||||
# with col1:
|
||||
# st.form_submit_button("Reset", on_click=reset)
|
||||
#
|
||||
# with col2:
|
||||
# st.form_submit_button("Persist", on_click=update_state, args=("PERSISTING", True))
|
||||
|
||||
|
||||
def run_sql_query():
|
||||
"""Executes an SQL query and returns the results."""
|
||||
st.session_state["error"] = None
|
||||
query = st.session_state.get("query")
|
||||
if query.rstrip().lstrip():
|
||||
exp = st.session_state["explorer"]
|
||||
res = exp.sql_query(query, return_type="arrow")
|
||||
st.session_state["imgs"] = res.to_pydict()["im_file"]
|
||||
st.session_state["res"] = res
|
||||
|
||||
|
||||
def run_ai_query():
|
||||
"""Execute SQL query and update session state with query results."""
|
||||
if not SETTINGS["openai_api_key"]:
|
||||
st.session_state["error"] = (
|
||||
'OpenAI API key not found in settings. Please run yolo settings openai_api_key="..."'
|
||||
)
|
||||
return
|
||||
st.session_state["error"] = None
|
||||
query = st.session_state.get("ai_query")
|
||||
if query.rstrip().lstrip():
|
||||
exp = st.session_state["explorer"]
|
||||
res = exp.ask_ai(query)
|
||||
if not isinstance(res, pd.DataFrame) or res.empty:
|
||||
st.session_state["error"] = "No results found using AI generated query. Try another query or rerun it."
|
||||
return
|
||||
st.session_state["imgs"] = res["im_file"].to_list()
|
||||
st.session_state["res"] = res
|
||||
|
||||
|
||||
def reset_explorer():
|
||||
"""Resets the explorer to its initial state by clearing session variables."""
|
||||
st.session_state["explorer"] = None
|
||||
st.session_state["imgs"] = None
|
||||
st.session_state["error"] = None
|
||||
|
||||
|
||||
def utralytics_explorer_docs_callback():
|
||||
"""Resets the explorer to its initial state by clearing session variables."""
|
||||
with st.container(border=True):
|
||||
st.image(
|
||||
"https://raw.githubusercontent.com/ultralytics/assets/main/logo/Ultralytics_Logotype_Original.svg",
|
||||
width=100,
|
||||
)
|
||||
st.markdown(
|
||||
"<p>This demo is built using Ultralytics Explorer API. Visit <a href='https://docs.ultralytics.com/datasets/explorer/'>API docs</a> to try examples & learn more</p>",
|
||||
unsafe_allow_html=True,
|
||||
help=None,
|
||||
)
|
||||
st.link_button("Ultrlaytics Explorer API", "https://docs.ultralytics.com/datasets/explorer/")
|
||||
|
||||
|
||||
def layout():
|
||||
"""Resets explorer session variables and provides documentation with a link to API docs."""
|
||||
st.set_page_config(layout="wide", initial_sidebar_state="collapsed")
|
||||
st.markdown("<h1 style='text-align: center;'>Ultralytics Explorer Demo</h1>", unsafe_allow_html=True)
|
||||
|
||||
if st.session_state.get("explorer") is None:
|
||||
init_explorer_form()
|
||||
return
|
||||
|
||||
st.button(":arrow_backward: Select Dataset", on_click=reset_explorer)
|
||||
exp = st.session_state.get("explorer")
|
||||
col1, col2 = st.columns([0.75, 0.25], gap="small")
|
||||
imgs = []
|
||||
if st.session_state.get("error"):
|
||||
st.error(st.session_state["error"])
|
||||
else:
|
||||
if st.session_state.get("imgs"):
|
||||
imgs = st.session_state.get("imgs")
|
||||
else:
|
||||
imgs = exp.table.to_lance().to_table(columns=["im_file"]).to_pydict()["im_file"]
|
||||
st.session_state["res"] = exp.table.to_arrow()
|
||||
total_imgs, selected_imgs = len(imgs), []
|
||||
with col1:
|
||||
subcol1, subcol2, subcol3, subcol4, subcol5 = st.columns(5)
|
||||
with subcol1:
|
||||
st.write("Max Images Displayed:")
|
||||
with subcol2:
|
||||
num = st.number_input(
|
||||
"Max Images Displayed",
|
||||
min_value=0,
|
||||
max_value=total_imgs,
|
||||
value=min(500, total_imgs),
|
||||
key="num_imgs_displayed",
|
||||
label_visibility="collapsed",
|
||||
)
|
||||
with subcol3:
|
||||
st.write("Start Index:")
|
||||
with subcol4:
|
||||
start_idx = st.number_input(
|
||||
"Start Index",
|
||||
min_value=0,
|
||||
max_value=total_imgs,
|
||||
value=0,
|
||||
key="start_index",
|
||||
label_visibility="collapsed",
|
||||
)
|
||||
with subcol5:
|
||||
reset = st.button("Reset", use_container_width=False, key="reset")
|
||||
if reset:
|
||||
st.session_state["imgs"] = None
|
||||
st.experimental_rerun()
|
||||
|
||||
query_form()
|
||||
ai_query_form()
|
||||
if total_imgs:
|
||||
labels, boxes, masks, kpts, classes = None, None, None, None, None
|
||||
task = exp.model.task
|
||||
if st.session_state.get("display_labels"):
|
||||
labels = st.session_state.get("res").to_pydict()["labels"][start_idx : start_idx + num]
|
||||
boxes = st.session_state.get("res").to_pydict()["bboxes"][start_idx : start_idx + num]
|
||||
masks = st.session_state.get("res").to_pydict()["masks"][start_idx : start_idx + num]
|
||||
kpts = st.session_state.get("res").to_pydict()["keypoints"][start_idx : start_idx + num]
|
||||
classes = st.session_state.get("res").to_pydict()["cls"][start_idx : start_idx + num]
|
||||
imgs_displayed = imgs[start_idx : start_idx + num]
|
||||
selected_imgs = image_select(
|
||||
f"Total samples: {total_imgs}",
|
||||
images=imgs_displayed,
|
||||
use_container_width=False,
|
||||
# indices=[i for i in range(num)] if select_all else None,
|
||||
labels=labels,
|
||||
classes=classes,
|
||||
bboxes=boxes,
|
||||
masks=masks if task == "segment" else None,
|
||||
kpts=kpts if task == "pose" else None,
|
||||
)
|
||||
|
||||
with col2:
|
||||
similarity_form(selected_imgs)
|
||||
display_labels = st.checkbox("Labels", value=False, key="display_labels")
|
||||
utralytics_explorer_docs_callback()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
layout()
|
@ -1,166 +0,0 @@
|
||||
# Ultralytics YOLO 🚀, AGPL-3.0 license
|
||||
|
||||
import getpass
|
||||
from typing import List
|
||||
|
||||
import cv2
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
|
||||
from ultralytics.data.augment import LetterBox
|
||||
from ultralytics.utils import LOGGER as logger
|
||||
from ultralytics.utils import SETTINGS
|
||||
from ultralytics.utils.checks import check_requirements
|
||||
from ultralytics.utils.ops import xyxy2xywh
|
||||
from ultralytics.utils.plotting import plot_images
|
||||
|
||||
|
||||
def get_table_schema(vector_size):
|
||||
"""Extracts and returns the schema of a database table."""
|
||||
from lancedb.pydantic import LanceModel, Vector
|
||||
|
||||
class Schema(LanceModel):
|
||||
im_file: str
|
||||
labels: List[str]
|
||||
cls: List[int]
|
||||
bboxes: List[List[float]]
|
||||
masks: List[List[List[int]]]
|
||||
keypoints: List[List[List[float]]]
|
||||
vector: Vector(vector_size)
|
||||
|
||||
return Schema
|
||||
|
||||
|
||||
def get_sim_index_schema():
|
||||
"""Returns a LanceModel schema for a database table with specified vector size."""
|
||||
from lancedb.pydantic import LanceModel
|
||||
|
||||
class Schema(LanceModel):
|
||||
idx: int
|
||||
im_file: str
|
||||
count: int
|
||||
sim_im_files: List[str]
|
||||
|
||||
return Schema
|
||||
|
||||
|
||||
def sanitize_batch(batch, dataset_info):
|
||||
"""Sanitizes input batch for inference, ensuring correct format and dimensions."""
|
||||
batch["cls"] = batch["cls"].flatten().int().tolist()
|
||||
box_cls_pair = sorted(zip(batch["bboxes"].tolist(), batch["cls"]), key=lambda x: x[1])
|
||||
batch["bboxes"] = [box for box, _ in box_cls_pair]
|
||||
batch["cls"] = [cls for _, cls in box_cls_pair]
|
||||
batch["labels"] = [dataset_info["names"][i] for i in batch["cls"]]
|
||||
batch["masks"] = batch["masks"].tolist() if "masks" in batch else [[[]]]
|
||||
batch["keypoints"] = batch["keypoints"].tolist() if "keypoints" in batch else [[[]]]
|
||||
return batch
|
||||
|
||||
|
||||
def plot_query_result(similar_set, plot_labels=True):
|
||||
"""
|
||||
Plot images from the similar set.
|
||||
|
||||
Args:
|
||||
similar_set (list): Pyarrow or pandas object containing the similar data points
|
||||
plot_labels (bool): Whether to plot labels or not
|
||||
"""
|
||||
similar_set = (
|
||||
similar_set.to_dict(orient="list") if isinstance(similar_set, pd.DataFrame) else similar_set.to_pydict()
|
||||
)
|
||||
empty_masks = [[[]]]
|
||||
empty_boxes = [[]]
|
||||
images = similar_set.get("im_file", [])
|
||||
bboxes = similar_set.get("bboxes", []) if similar_set.get("bboxes") is not empty_boxes else []
|
||||
masks = similar_set.get("masks") if similar_set.get("masks")[0] != empty_masks else []
|
||||
kpts = similar_set.get("keypoints") if similar_set.get("keypoints")[0] != empty_masks else []
|
||||
cls = similar_set.get("cls", [])
|
||||
|
||||
plot_size = 640
|
||||
imgs, batch_idx, plot_boxes, plot_masks, plot_kpts = [], [], [], [], []
|
||||
for i, imf in enumerate(images):
|
||||
im = cv2.imread(imf)
|
||||
im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB)
|
||||
h, w = im.shape[:2]
|
||||
r = min(plot_size / h, plot_size / w)
|
||||
imgs.append(LetterBox(plot_size, center=False)(image=im).transpose(2, 0, 1))
|
||||
if plot_labels:
|
||||
if len(bboxes) > i and len(bboxes[i]) > 0:
|
||||
box = np.array(bboxes[i], dtype=np.float32)
|
||||
box[:, [0, 2]] *= r
|
||||
box[:, [1, 3]] *= r
|
||||
plot_boxes.append(box)
|
||||
if len(masks) > i and len(masks[i]) > 0:
|
||||
mask = np.array(masks[i], dtype=np.uint8)[0]
|
||||
plot_masks.append(LetterBox(plot_size, center=False)(image=mask))
|
||||
if len(kpts) > i and kpts[i] is not None:
|
||||
kpt = np.array(kpts[i], dtype=np.float32)
|
||||
kpt[:, :, :2] *= r
|
||||
plot_kpts.append(kpt)
|
||||
batch_idx.append(np.ones(len(np.array(bboxes[i], dtype=np.float32))) * i)
|
||||
imgs = np.stack(imgs, axis=0)
|
||||
masks = np.stack(plot_masks, axis=0) if plot_masks else np.zeros(0, dtype=np.uint8)
|
||||
kpts = np.concatenate(plot_kpts, axis=0) if plot_kpts else np.zeros((0, 51), dtype=np.float32)
|
||||
boxes = xyxy2xywh(np.concatenate(plot_boxes, axis=0)) if plot_boxes else np.zeros(0, dtype=np.float32)
|
||||
batch_idx = np.concatenate(batch_idx, axis=0)
|
||||
cls = np.concatenate([np.array(c, dtype=np.int32) for c in cls], axis=0)
|
||||
|
||||
return plot_images(
|
||||
imgs, batch_idx, cls, bboxes=boxes, masks=masks, kpts=kpts, max_subplots=len(images), save=False, threaded=False
|
||||
)
|
||||
|
||||
|
||||
def prompt_sql_query(query):
|
||||
"""Plots images with optional labels from a similar data set."""
|
||||
check_requirements("openai>=1.6.1")
|
||||
from openai import OpenAI
|
||||
|
||||
if not SETTINGS["openai_api_key"]:
|
||||
logger.warning("OpenAI API key not found in settings. Please enter your API key below.")
|
||||
openai_api_key = getpass.getpass("OpenAI API key: ")
|
||||
SETTINGS.update({"openai_api_key": openai_api_key})
|
||||
openai = OpenAI(api_key=SETTINGS["openai_api_key"])
|
||||
|
||||
messages = [
|
||||
{
|
||||
"role": "system",
|
||||
"content": """
|
||||
You are a helpful data scientist proficient in SQL. You need to output exactly one SQL query based on
|
||||
the following schema and a user request. You only need to output the format with fixed selection
|
||||
statement that selects everything from "'table'", like `SELECT * from 'table'`
|
||||
|
||||
Schema:
|
||||
im_file: string not null
|
||||
labels: list<item: string> not null
|
||||
child 0, item: string
|
||||
cls: list<item: int64> not null
|
||||
child 0, item: int64
|
||||
bboxes: list<item: list<item: double>> not null
|
||||
child 0, item: list<item: double>
|
||||
child 0, item: double
|
||||
masks: list<item: list<item: list<item: int64>>> not null
|
||||
child 0, item: list<item: list<item: int64>>
|
||||
child 0, item: list<item: int64>
|
||||
child 0, item: int64
|
||||
keypoints: list<item: list<item: list<item: double>>> not null
|
||||
child 0, item: list<item: list<item: double>>
|
||||
child 0, item: list<item: double>
|
||||
child 0, item: double
|
||||
vector: fixed_size_list<item: float>[256] not null
|
||||
child 0, item: float
|
||||
|
||||
Some details about the schema:
|
||||
- the "labels" column contains the string values like 'person' and 'dog' for the respective objects
|
||||
in each image
|
||||
- the "cls" column contains the integer values on these classes that map them the labels
|
||||
|
||||
Example of a correct query:
|
||||
request - Get all data points that contain 2 or more people and at least one dog
|
||||
correct query-
|
||||
SELECT * FROM 'table' WHERE ARRAY_LENGTH(cls) >= 2 AND ARRAY_LENGTH(FILTER(labels, x -> x = 'person')) >= 2 AND ARRAY_LENGTH(FILTER(labels, x -> x = 'dog')) >= 1;
|
||||
""",
|
||||
},
|
||||
{"role": "user", "content": f"{query}"},
|
||||
]
|
||||
|
||||
response = openai.chat.completions.create(model="gpt-3.5-turbo", messages=messages)
|
||||
return response.choices[0].message.content
|
@ -1,555 +0,0 @@
|
||||
# Ultralytics YOLO 🚀, AGPL-3.0 license
|
||||
|
||||
import glob
|
||||
import math
|
||||
import os
|
||||
import time
|
||||
from dataclasses import dataclass
|
||||
from pathlib import Path
|
||||
from threading import Thread
|
||||
from urllib.parse import urlparse
|
||||
|
||||
import cv2
|
||||
import numpy as np
|
||||
import requests
|
||||
import torch
|
||||
from PIL import Image
|
||||
|
||||
from ultralytics.data.utils import IMG_FORMATS, VID_FORMATS
|
||||
from ultralytics.utils import LOGGER, is_colab, is_kaggle, ops
|
||||
from ultralytics.utils.checks import check_requirements
|
||||
|
||||
|
||||
@dataclass
|
||||
class SourceTypes:
|
||||
"""Class to represent various types of input sources for predictions."""
|
||||
|
||||
stream: bool = False
|
||||
screenshot: bool = False
|
||||
from_img: bool = False
|
||||
tensor: bool = False
|
||||
|
||||
|
||||
class LoadStreams:
|
||||
"""
|
||||
Stream Loader for various types of video streams, Supports RTSP, RTMP, HTTP, and TCP streams.
|
||||
|
||||
Attributes:
|
||||
sources (str): The source input paths or URLs for the video streams.
|
||||
vid_stride (int): Video frame-rate stride, defaults to 1.
|
||||
buffer (bool): Whether to buffer input streams, defaults to False.
|
||||
running (bool): Flag to indicate if the streaming thread is running.
|
||||
mode (str): Set to 'stream' indicating real-time capture.
|
||||
imgs (list): List of image frames for each stream.
|
||||
fps (list): List of FPS for each stream.
|
||||
frames (list): List of total frames for each stream.
|
||||
threads (list): List of threads for each stream.
|
||||
shape (list): List of shapes for each stream.
|
||||
caps (list): List of cv2.VideoCapture objects for each stream.
|
||||
bs (int): Batch size for processing.
|
||||
|
||||
Methods:
|
||||
__init__: Initialize the stream loader.
|
||||
update: Read stream frames in daemon thread.
|
||||
close: Close stream loader and release resources.
|
||||
__iter__: Returns an iterator object for the class.
|
||||
__next__: Returns source paths, transformed, and original images for processing.
|
||||
__len__: Return the length of the sources object.
|
||||
|
||||
Example:
|
||||
```bash
|
||||
yolo predict source='rtsp://example.com/media.mp4'
|
||||
```
|
||||
"""
|
||||
|
||||
def __init__(self, sources="file.streams", vid_stride=1, buffer=False):
|
||||
"""Initialize instance variables and check for consistent input stream shapes."""
|
||||
torch.backends.cudnn.benchmark = True # faster for fixed-size inference
|
||||
self.buffer = buffer # buffer input streams
|
||||
self.running = True # running flag for Thread
|
||||
self.mode = "stream"
|
||||
self.vid_stride = vid_stride # video frame-rate stride
|
||||
|
||||
sources = Path(sources).read_text().rsplit() if os.path.isfile(sources) else [sources]
|
||||
n = len(sources)
|
||||
self.bs = n
|
||||
self.fps = [0] * n # frames per second
|
||||
self.frames = [0] * n
|
||||
self.threads = [None] * n
|
||||
self.caps = [None] * n # video capture objects
|
||||
self.imgs = [[] for _ in range(n)] # images
|
||||
self.shape = [[] for _ in range(n)] # image shapes
|
||||
self.sources = [ops.clean_str(x) for x in sources] # clean source names for later
|
||||
for i, s in enumerate(sources): # index, source
|
||||
# Start thread to read frames from video stream
|
||||
st = f"{i + 1}/{n}: {s}... "
|
||||
if urlparse(s).hostname in ("www.youtube.com", "youtube.com", "youtu.be"): # if source is YouTube video
|
||||
# YouTube format i.e. 'https://www.youtube.com/watch?v=Zgi9g1ksQHc' or 'https://youtu.be/LNwODJXcvt4'
|
||||
s = get_best_youtube_url(s)
|
||||
s = eval(s) if s.isnumeric() else s # i.e. s = '0' local webcam
|
||||
if s == 0 and (is_colab() or is_kaggle()):
|
||||
raise NotImplementedError(
|
||||
"'source=0' webcam not supported in Colab and Kaggle notebooks. "
|
||||
"Try running 'source=0' in a local environment."
|
||||
)
|
||||
self.caps[i] = cv2.VideoCapture(s) # store video capture object
|
||||
if not self.caps[i].isOpened():
|
||||
raise ConnectionError(f"{st}Failed to open {s}")
|
||||
w = int(self.caps[i].get(cv2.CAP_PROP_FRAME_WIDTH))
|
||||
h = int(self.caps[i].get(cv2.CAP_PROP_FRAME_HEIGHT))
|
||||
fps = self.caps[i].get(cv2.CAP_PROP_FPS) # warning: may return 0 or nan
|
||||
self.frames[i] = max(int(self.caps[i].get(cv2.CAP_PROP_FRAME_COUNT)), 0) or float(
|
||||
"inf"
|
||||
) # infinite stream fallback
|
||||
self.fps[i] = max((fps if math.isfinite(fps) else 0) % 100, 0) or 30 # 30 FPS fallback
|
||||
|
||||
success, im = self.caps[i].read() # guarantee first frame
|
||||
if not success or im is None:
|
||||
raise ConnectionError(f"{st}Failed to read images from {s}")
|
||||
self.imgs[i].append(im)
|
||||
self.shape[i] = im.shape
|
||||
self.threads[i] = Thread(target=self.update, args=([i, self.caps[i], s]), daemon=True)
|
||||
LOGGER.info(f"{st}Success ✅ ({self.frames[i]} frames of shape {w}x{h} at {self.fps[i]:.2f} FPS)")
|
||||
self.threads[i].start()
|
||||
LOGGER.info("") # newline
|
||||
|
||||
def update(self, i, cap, stream):
|
||||
"""Read stream `i` frames in daemon thread."""
|
||||
n, f = 0, self.frames[i] # frame number, frame array
|
||||
while self.running and cap.isOpened() and n < (f - 1):
|
||||
if len(self.imgs[i]) < 30: # keep a <=30-image buffer
|
||||
n += 1
|
||||
cap.grab() # .read() = .grab() followed by .retrieve()
|
||||
if n % self.vid_stride == 0:
|
||||
success, im = cap.retrieve()
|
||||
if not success:
|
||||
im = np.zeros(self.shape[i], dtype=np.uint8)
|
||||
LOGGER.warning("WARNING ⚠️ Video stream unresponsive, please check your IP camera connection.")
|
||||
cap.open(stream) # re-open stream if signal was lost
|
||||
if self.buffer:
|
||||
self.imgs[i].append(im)
|
||||
else:
|
||||
self.imgs[i] = [im]
|
||||
else:
|
||||
time.sleep(0.01) # wait until the buffer is empty
|
||||
|
||||
def close(self):
|
||||
"""Close stream loader and release resources."""
|
||||
self.running = False # stop flag for Thread
|
||||
for thread in self.threads:
|
||||
if thread.is_alive():
|
||||
thread.join(timeout=5) # Add timeout
|
||||
for cap in self.caps: # Iterate through the stored VideoCapture objects
|
||||
try:
|
||||
cap.release() # release video capture
|
||||
except Exception as e:
|
||||
LOGGER.warning(f"WARNING ⚠️ Could not release VideoCapture object: {e}")
|
||||
cv2.destroyAllWindows()
|
||||
|
||||
def __iter__(self):
|
||||
"""Iterates through YOLO image feed and re-opens unresponsive streams."""
|
||||
self.count = -1
|
||||
return self
|
||||
|
||||
def __next__(self):
|
||||
"""Returns source paths, transformed and original images for processing."""
|
||||
self.count += 1
|
||||
|
||||
images = []
|
||||
for i, x in enumerate(self.imgs):
|
||||
# Wait until a frame is available in each buffer
|
||||
while not x:
|
||||
if not self.threads[i].is_alive() or cv2.waitKey(1) == ord("q"): # q to quit
|
||||
self.close()
|
||||
raise StopIteration
|
||||
time.sleep(1 / min(self.fps))
|
||||
x = self.imgs[i]
|
||||
if not x:
|
||||
LOGGER.warning(f"WARNING ⚠️ Waiting for stream {i}")
|
||||
|
||||
# Get and remove the first frame from imgs buffer
|
||||
if self.buffer:
|
||||
images.append(x.pop(0))
|
||||
|
||||
# Get the last frame, and clear the rest from the imgs buffer
|
||||
else:
|
||||
images.append(x.pop(-1) if x else np.zeros(self.shape[i], dtype=np.uint8))
|
||||
x.clear()
|
||||
|
||||
return self.sources, images, [""] * self.bs
|
||||
|
||||
def __len__(self):
|
||||
"""Return the length of the sources object."""
|
||||
return self.bs # 1E12 frames = 32 streams at 30 FPS for 30 years
|
||||
|
||||
|
||||
class LoadScreenshots:
|
||||
"""
|
||||
YOLOv8 screenshot dataloader.
|
||||
|
||||
This class manages the loading of screenshot images for processing with YOLOv8.
|
||||
Suitable for use with `yolo predict source=screen`.
|
||||
|
||||
Attributes:
|
||||
source (str): The source input indicating which screen to capture.
|
||||
screen (int): The screen number to capture.
|
||||
left (int): The left coordinate for screen capture area.
|
||||
top (int): The top coordinate for screen capture area.
|
||||
width (int): The width of the screen capture area.
|
||||
height (int): The height of the screen capture area.
|
||||
mode (str): Set to 'stream' indicating real-time capture.
|
||||
frame (int): Counter for captured frames.
|
||||
sct (mss.mss): Screen capture object from `mss` library.
|
||||
bs (int): Batch size, set to 1.
|
||||
monitor (dict): Monitor configuration details.
|
||||
|
||||
Methods:
|
||||
__iter__: Returns an iterator object.
|
||||
__next__: Captures the next screenshot and returns it.
|
||||
"""
|
||||
|
||||
def __init__(self, source):
|
||||
"""Source = [screen_number left top width height] (pixels)."""
|
||||
check_requirements("mss")
|
||||
import mss # noqa
|
||||
|
||||
source, *params = source.split()
|
||||
self.screen, left, top, width, height = 0, None, None, None, None # default to full screen 0
|
||||
if len(params) == 1:
|
||||
self.screen = int(params[0])
|
||||
elif len(params) == 4:
|
||||
left, top, width, height = (int(x) for x in params)
|
||||
elif len(params) == 5:
|
||||
self.screen, left, top, width, height = (int(x) for x in params)
|
||||
self.mode = "stream"
|
||||
self.frame = 0
|
||||
self.sct = mss.mss()
|
||||
self.bs = 1
|
||||
self.fps = 30
|
||||
|
||||
# Parse monitor shape
|
||||
monitor = self.sct.monitors[self.screen]
|
||||
self.top = monitor["top"] if top is None else (monitor["top"] + top)
|
||||
self.left = monitor["left"] if left is None else (monitor["left"] + left)
|
||||
self.width = width or monitor["width"]
|
||||
self.height = height or monitor["height"]
|
||||
self.monitor = {"left": self.left, "top": self.top, "width": self.width, "height": self.height}
|
||||
|
||||
def __iter__(self):
|
||||
"""Returns an iterator of the object."""
|
||||
return self
|
||||
|
||||
def __next__(self):
|
||||
"""mss screen capture: get raw pixels from the screen as np array."""
|
||||
im0 = np.asarray(self.sct.grab(self.monitor))[:, :, :3] # BGRA to BGR
|
||||
s = f"screen {self.screen} (LTWH): {self.left},{self.top},{self.width},{self.height}: "
|
||||
|
||||
self.frame += 1
|
||||
return [str(self.screen)], [im0], [s] # screen, img, string
|
||||
|
||||
|
||||
class LoadImagesAndVideos:
|
||||
"""
|
||||
YOLOv8 image/video dataloader.
|
||||
|
||||
This class manages the loading and pre-processing of image and video data for YOLOv8. It supports loading from
|
||||
various formats, including single image files, video files, and lists of image and video paths.
|
||||
|
||||
Attributes:
|
||||
files (list): List of image and video file paths.
|
||||
nf (int): Total number of files (images and videos).
|
||||
video_flag (list): Flags indicating whether a file is a video (True) or an image (False).
|
||||
mode (str): Current mode, 'image' or 'video'.
|
||||
vid_stride (int): Stride for video frame-rate, defaults to 1.
|
||||
bs (int): Batch size, set to 1 for this class.
|
||||
cap (cv2.VideoCapture): Video capture object for OpenCV.
|
||||
frame (int): Frame counter for video.
|
||||
frames (int): Total number of frames in the video.
|
||||
count (int): Counter for iteration, initialized at 0 during `__iter__()`.
|
||||
|
||||
Methods:
|
||||
_new_video(path): Create a new cv2.VideoCapture object for a given video path.
|
||||
"""
|
||||
|
||||
def __init__(self, path, batch=1, vid_stride=1):
|
||||
"""Initialize the Dataloader and raise FileNotFoundError if file not found."""
|
||||
parent = None
|
||||
if isinstance(path, str) and Path(path).suffix == ".txt": # *.txt file with img/vid/dir on each line
|
||||
parent = Path(path).parent
|
||||
path = Path(path).read_text().splitlines() # list of sources
|
||||
files = []
|
||||
for p in sorted(path) if isinstance(path, (list, tuple)) else [path]:
|
||||
a = str(Path(p).absolute()) # do not use .resolve() https://github.com/ultralytics/ultralytics/issues/2912
|
||||
if "*" in a:
|
||||
files.extend(sorted(glob.glob(a, recursive=True))) # glob
|
||||
elif os.path.isdir(a):
|
||||
files.extend(sorted(glob.glob(os.path.join(a, "*.*")))) # dir
|
||||
elif os.path.isfile(a):
|
||||
files.append(a) # files (absolute or relative to CWD)
|
||||
elif parent and (parent / p).is_file():
|
||||
files.append(str((parent / p).absolute())) # files (relative to *.txt file parent)
|
||||
else:
|
||||
raise FileNotFoundError(f"{p} does not exist")
|
||||
|
||||
images = [x for x in files if x.split(".")[-1].lower() in IMG_FORMATS]
|
||||
videos = [x for x in files if x.split(".")[-1].lower() in VID_FORMATS]
|
||||
ni, nv = len(images), len(videos)
|
||||
|
||||
self.files = images + videos
|
||||
self.nf = ni + nv # number of files
|
||||
self.ni = ni # number of images
|
||||
self.video_flag = [False] * ni + [True] * nv
|
||||
self.mode = "image"
|
||||
self.vid_stride = vid_stride # video frame-rate stride
|
||||
self.bs = batch
|
||||
if any(videos):
|
||||
self._new_video(videos[0]) # new video
|
||||
else:
|
||||
self.cap = None
|
||||
if self.nf == 0:
|
||||
raise FileNotFoundError(
|
||||
f"No images or videos found in {p}. "
|
||||
f"Supported formats are:\nimages: {IMG_FORMATS}\nvideos: {VID_FORMATS}"
|
||||
)
|
||||
|
||||
def __iter__(self):
|
||||
"""Returns an iterator object for VideoStream or ImageFolder."""
|
||||
self.count = 0
|
||||
return self
|
||||
|
||||
def __next__(self):
|
||||
"""Returns the next batch of images or video frames along with their paths and metadata."""
|
||||
paths, imgs, info = [], [], []
|
||||
while len(imgs) < self.bs:
|
||||
if self.count >= self.nf: # end of file list
|
||||
if len(imgs) > 0:
|
||||
return paths, imgs, info # return last partial batch
|
||||
else:
|
||||
raise StopIteration
|
||||
|
||||
path = self.files[self.count]
|
||||
if self.video_flag[self.count]:
|
||||
self.mode = "video"
|
||||
if not self.cap or not self.cap.isOpened():
|
||||
self._new_video(path)
|
||||
|
||||
for _ in range(self.vid_stride):
|
||||
success = self.cap.grab()
|
||||
if not success:
|
||||
break # end of video or failure
|
||||
|
||||
if success:
|
||||
success, im0 = self.cap.retrieve()
|
||||
if success:
|
||||
self.frame += 1
|
||||
paths.append(path)
|
||||
imgs.append(im0)
|
||||
info.append(f"video {self.count + 1}/{self.nf} (frame {self.frame}/{self.frames}) {path}: ")
|
||||
if self.frame == self.frames: # end of video
|
||||
self.count += 1
|
||||
self.cap.release()
|
||||
else:
|
||||
# Move to the next file if the current video ended or failed to open
|
||||
self.count += 1
|
||||
if self.cap:
|
||||
self.cap.release()
|
||||
if self.count < self.nf:
|
||||
self._new_video(self.files[self.count])
|
||||
else:
|
||||
self.mode = "image"
|
||||
im0 = cv2.imread(path) # BGR
|
||||
if im0 is None:
|
||||
raise FileNotFoundError(f"Image Not Found {path}")
|
||||
paths.append(path)
|
||||
imgs.append(im0)
|
||||
info.append(f"image {self.count + 1}/{self.nf} {path}: ")
|
||||
self.count += 1 # move to the next file
|
||||
if self.count >= self.ni: # end of image list
|
||||
break
|
||||
|
||||
return paths, imgs, info
|
||||
|
||||
def _new_video(self, path):
|
||||
"""Creates a new video capture object for the given path."""
|
||||
self.frame = 0
|
||||
self.cap = cv2.VideoCapture(path)
|
||||
self.fps = int(self.cap.get(cv2.CAP_PROP_FPS))
|
||||
if not self.cap.isOpened():
|
||||
raise FileNotFoundError(f"Failed to open video {path}")
|
||||
self.frames = int(self.cap.get(cv2.CAP_PROP_FRAME_COUNT) / self.vid_stride)
|
||||
|
||||
def __len__(self):
|
||||
"""Returns the number of batches in the object."""
|
||||
return math.ceil(self.nf / self.bs) # number of files
|
||||
|
||||
|
||||
class LoadPilAndNumpy:
|
||||
"""
|
||||
Load images from PIL and Numpy arrays for batch processing.
|
||||
|
||||
This class is designed to manage loading and pre-processing of image data from both PIL and Numpy formats.
|
||||
It performs basic validation and format conversion to ensure that the images are in the required format for
|
||||
downstream processing.
|
||||
|
||||
Attributes:
|
||||
paths (list): List of image paths or autogenerated filenames.
|
||||
im0 (list): List of images stored as Numpy arrays.
|
||||
mode (str): Type of data being processed, defaults to 'image'.
|
||||
bs (int): Batch size, equivalent to the length of `im0`.
|
||||
|
||||
Methods:
|
||||
_single_check(im): Validate and format a single image to a Numpy array.
|
||||
"""
|
||||
|
||||
def __init__(self, im0):
|
||||
"""Initialize PIL and Numpy Dataloader."""
|
||||
if not isinstance(im0, list):
|
||||
im0 = [im0]
|
||||
self.paths = [getattr(im, "filename", f"image{i}.jpg") for i, im in enumerate(im0)]
|
||||
self.im0 = [self._single_check(im) for im in im0]
|
||||
self.mode = "image"
|
||||
self.bs = len(self.im0)
|
||||
|
||||
@staticmethod
|
||||
def _single_check(im):
|
||||
"""Validate and format an image to numpy array."""
|
||||
assert isinstance(im, (Image.Image, np.ndarray)), f"Expected PIL/np.ndarray image type, but got {type(im)}"
|
||||
if isinstance(im, Image.Image):
|
||||
if im.mode != "RGB":
|
||||
im = im.convert("RGB")
|
||||
im = np.asarray(im)[:, :, ::-1]
|
||||
im = np.ascontiguousarray(im) # contiguous
|
||||
return im
|
||||
|
||||
def __len__(self):
|
||||
"""Returns the length of the 'im0' attribute."""
|
||||
return len(self.im0)
|
||||
|
||||
def __next__(self):
|
||||
"""Returns batch paths, images, processed images, None, ''."""
|
||||
if self.count == 1: # loop only once as it's batch inference
|
||||
raise StopIteration
|
||||
self.count += 1
|
||||
return self.paths, self.im0, [""] * self.bs
|
||||
|
||||
def __iter__(self):
|
||||
"""Enables iteration for class LoadPilAndNumpy."""
|
||||
self.count = 0
|
||||
return self
|
||||
|
||||
|
||||
class LoadTensor:
|
||||
"""
|
||||
Load images from torch.Tensor data.
|
||||
|
||||
This class manages the loading and pre-processing of image data from PyTorch tensors for further processing.
|
||||
|
||||
Attributes:
|
||||
im0 (torch.Tensor): The input tensor containing the image(s).
|
||||
bs (int): Batch size, inferred from the shape of `im0`.
|
||||
mode (str): Current mode, set to 'image'.
|
||||
paths (list): List of image paths or filenames.
|
||||
count (int): Counter for iteration, initialized at 0 during `__iter__()`.
|
||||
|
||||
Methods:
|
||||
_single_check(im, stride): Validate and possibly modify the input tensor.
|
||||
"""
|
||||
|
||||
def __init__(self, im0) -> None:
|
||||
"""Initialize Tensor Dataloader."""
|
||||
self.im0 = self._single_check(im0)
|
||||
self.bs = self.im0.shape[0]
|
||||
self.mode = "image"
|
||||
self.paths = [getattr(im, "filename", f"image{i}.jpg") for i, im in enumerate(im0)]
|
||||
|
||||
@staticmethod
|
||||
def _single_check(im, stride=32):
|
||||
"""Validate and format an image to torch.Tensor."""
|
||||
s = (
|
||||
f"WARNING ⚠️ torch.Tensor inputs should be BCHW i.e. shape(1, 3, 640, 640) "
|
||||
f"divisible by stride {stride}. Input shape{tuple(im.shape)} is incompatible."
|
||||
)
|
||||
if len(im.shape) != 4:
|
||||
if len(im.shape) != 3:
|
||||
raise ValueError(s)
|
||||
LOGGER.warning(s)
|
||||
im = im.unsqueeze(0)
|
||||
if im.shape[2] % stride or im.shape[3] % stride:
|
||||
raise ValueError(s)
|
||||
if im.max() > 1.0 + torch.finfo(im.dtype).eps: # torch.float32 eps is 1.2e-07
|
||||
LOGGER.warning(
|
||||
f"WARNING ⚠️ torch.Tensor inputs should be normalized 0.0-1.0 but max value is {im.max()}. "
|
||||
f"Dividing input by 255."
|
||||
)
|
||||
im = im.float() / 255.0
|
||||
|
||||
return im
|
||||
|
||||
def __iter__(self):
|
||||
"""Returns an iterator object."""
|
||||
self.count = 0
|
||||
return self
|
||||
|
||||
def __next__(self):
|
||||
"""Return next item in the iterator."""
|
||||
if self.count == 1:
|
||||
raise StopIteration
|
||||
self.count += 1
|
||||
return self.paths, self.im0, [""] * self.bs
|
||||
|
||||
def __len__(self):
|
||||
"""Returns the batch size."""
|
||||
return self.bs
|
||||
|
||||
|
||||
def autocast_list(source):
|
||||
"""Merges a list of source of different types into a list of numpy arrays or PIL images."""
|
||||
files = []
|
||||
for im in source:
|
||||
if isinstance(im, (str, Path)): # filename or uri
|
||||
files.append(Image.open(requests.get(im, stream=True).raw if str(im).startswith("http") else im))
|
||||
elif isinstance(im, (Image.Image, np.ndarray)): # PIL or np Image
|
||||
files.append(im)
|
||||
else:
|
||||
raise TypeError(
|
||||
f"type {type(im).__name__} is not a supported Ultralytics prediction source type. \n"
|
||||
f"See https://docs.ultralytics.com/modes/predict for supported source types."
|
||||
)
|
||||
|
||||
return files
|
||||
|
||||
|
||||
def get_best_youtube_url(url, use_pafy=True):
|
||||
"""
|
||||
Retrieves the URL of the best quality MP4 video stream from a given YouTube video.
|
||||
|
||||
This function uses the pafy or yt_dlp library to extract the video info from YouTube. It then finds the highest
|
||||
quality MP4 format that has video codec but no audio codec, and returns the URL of this video stream.
|
||||
|
||||
Args:
|
||||
url (str): The URL of the YouTube video.
|
||||
use_pafy (bool): Use the pafy package, default=True, otherwise use yt_dlp package.
|
||||
|
||||
Returns:
|
||||
(str): The URL of the best quality MP4 video stream, or None if no suitable stream is found.
|
||||
"""
|
||||
if use_pafy:
|
||||
check_requirements(("pafy", "youtube_dl==2020.12.2"))
|
||||
import pafy # noqa
|
||||
|
||||
return pafy.new(url).getbestvideo(preftype="mp4").url
|
||||
else:
|
||||
check_requirements("yt-dlp")
|
||||
import yt_dlp
|
||||
|
||||
with yt_dlp.YoutubeDL({"quiet": True}) as ydl:
|
||||
info_dict = ydl.extract_info(url, download=False) # extract info
|
||||
for f in reversed(info_dict.get("formats", [])): # reversed because best is usually last
|
||||
# Find a format with video codec, no audio, *.mp4 extension at least 1920x1080 size
|
||||
good_size = (f.get("width") or 0) >= 1920 or (f.get("height") or 0) >= 1080
|
||||
if good_size and f["vcodec"] != "none" and f["acodec"] == "none" and f["ext"] == "mp4":
|
||||
return f.get("url")
|
||||
|
||||
|
||||
# Define constants
|
||||
LOADERS = (LoadStreams, LoadPilAndNumpy, LoadImagesAndVideos, LoadScreenshots)
|
@ -1,18 +0,0 @@
|
||||
#!/bin/bash
|
||||
# Ultralytics YOLO 🚀, AGPL-3.0 license
|
||||
# Download latest models from https://github.com/ultralytics/assets/releases
|
||||
# Example usage: bash ultralytics/data/scripts/download_weights.sh
|
||||
# parent
|
||||
# └── weights
|
||||
# ├── yolov8n.pt ← downloads here
|
||||
# ├── yolov8s.pt
|
||||
# └── ...
|
||||
|
||||
python - <<EOF
|
||||
from ultralytics.utils.downloads import attempt_download_asset
|
||||
|
||||
assets = [f'yolov8{size}{suffix}.pt' for size in 'nsmlx' for suffix in ('', '-cls', '-seg', '-pose')]
|
||||
for x in assets:
|
||||
attempt_download_asset(f'weights/{x}')
|
||||
|
||||
EOF
|
@ -1,60 +0,0 @@
|
||||
#!/bin/bash
|
||||
# Ultralytics YOLO 🚀, AGPL-3.0 license
|
||||
# Download COCO 2017 dataset https://cocodataset.org
|
||||
# Example usage: bash data/scripts/get_coco.sh
|
||||
# parent
|
||||
# ├── ultralytics
|
||||
# └── datasets
|
||||
# └── coco ← downloads here
|
||||
|
||||
# Arguments (optional) Usage: bash data/scripts/get_coco.sh --train --val --test --segments
|
||||
if [ "$#" -gt 0 ]; then
|
||||
for opt in "$@"; do
|
||||
case "${opt}" in
|
||||
--train) train=true ;;
|
||||
--val) val=true ;;
|
||||
--test) test=true ;;
|
||||
--segments) segments=true ;;
|
||||
--sama) sama=true ;;
|
||||
esac
|
||||
done
|
||||
else
|
||||
train=true
|
||||
val=true
|
||||
test=false
|
||||
segments=false
|
||||
sama=false
|
||||
fi
|
||||
|
||||
# Download/unzip labels
|
||||
d='../datasets' # unzip directory
|
||||
url=https://github.com/ultralytics/yolov5/releases/download/v1.0/
|
||||
if [ "$segments" == "true" ]; then
|
||||
f='coco2017labels-segments.zip' # 169 MB
|
||||
elif [ "$sama" == "true" ]; then
|
||||
f='coco2017labels-segments-sama.zip' # 199 MB https://www.sama.com/sama-coco-dataset/
|
||||
else
|
||||
f='coco2017labels.zip' # 46 MB
|
||||
fi
|
||||
echo 'Downloading' $url$f ' ...'
|
||||
curl -L $url$f -o $f -# && unzip -q $f -d $d && rm $f &
|
||||
|
||||
# Download/unzip images
|
||||
d='../datasets/coco/images' # unzip directory
|
||||
url=http://images.cocodataset.org/zips/
|
||||
if [ "$train" == "true" ]; then
|
||||
f='train2017.zip' # 19G, 118k images
|
||||
echo 'Downloading' $url$f '...'
|
||||
curl -L $url$f -o $f -# && unzip -q $f -d $d && rm $f &
|
||||
fi
|
||||
if [ "$val" == "true" ]; then
|
||||
f='val2017.zip' # 1G, 5k images
|
||||
echo 'Downloading' $url$f '...'
|
||||
curl -L $url$f -o $f -# && unzip -q $f -d $d && rm $f &
|
||||
fi
|
||||
if [ "$test" == "true" ]; then
|
||||
f='test2017.zip' # 7G, 41k images (optional)
|
||||
echo 'Downloading' $url$f '...'
|
||||
curl -L $url$f -o $f -# && unzip -q $f -d $d && rm $f &
|
||||
fi
|
||||
wait # finish background tasks
|
@ -1,17 +0,0 @@
|
||||
#!/bin/bash
|
||||
# Ultralytics YOLO 🚀, AGPL-3.0 license
|
||||
# Download COCO128 dataset https://www.kaggle.com/ultralytics/coco128 (first 128 images from COCO train2017)
|
||||
# Example usage: bash data/scripts/get_coco128.sh
|
||||
# parent
|
||||
# ├── ultralytics
|
||||
# └── datasets
|
||||
# └── coco128 ← downloads here
|
||||
|
||||
# Download/unzip images and labels
|
||||
d='../datasets' # unzip directory
|
||||
url=https://github.com/ultralytics/yolov5/releases/download/v1.0/
|
||||
f='coco128.zip' # or 'coco128-segments.zip', 68 MB
|
||||
echo 'Downloading' $url$f ' ...'
|
||||
curl -L $url$f -o $f -# && unzip -q $f -d $d && rm $f &
|
||||
|
||||
wait # finish background tasks
|
@ -1,51 +0,0 @@
|
||||
#!/bin/bash
|
||||
# Ultralytics YOLO 🚀, AGPL-3.0 license
|
||||
# Download ILSVRC2012 ImageNet dataset https://image-net.org
|
||||
# Example usage: bash data/scripts/get_imagenet.sh
|
||||
# parent
|
||||
# ├── ultralytics
|
||||
# └── datasets
|
||||
# └── imagenet ← downloads here
|
||||
|
||||
# Arguments (optional) Usage: bash data/scripts/get_imagenet.sh --train --val
|
||||
if [ "$#" -gt 0 ]; then
|
||||
for opt in "$@"; do
|
||||
case "${opt}" in
|
||||
--train) train=true ;;
|
||||
--val) val=true ;;
|
||||
esac
|
||||
done
|
||||
else
|
||||
train=true
|
||||
val=true
|
||||
fi
|
||||
|
||||
# Make dir
|
||||
d='../datasets/imagenet' # unzip directory
|
||||
mkdir -p $d && cd $d
|
||||
|
||||
# Download/unzip train
|
||||
if [ "$train" == "true" ]; then
|
||||
wget https://image-net.org/data/ILSVRC/2012/ILSVRC2012_img_train.tar # download 138G, 1281167 images
|
||||
mkdir train && mv ILSVRC2012_img_train.tar train/ && cd train
|
||||
tar -xf ILSVRC2012_img_train.tar && rm -f ILSVRC2012_img_train.tar
|
||||
find . -name "*.tar" | while read NAME; do
|
||||
mkdir -p "${NAME%.tar}"
|
||||
tar -xf "${NAME}" -C "${NAME%.tar}"
|
||||
rm -f "${NAME}"
|
||||
done
|
||||
cd ..
|
||||
fi
|
||||
|
||||
# Download/unzip val
|
||||
if [ "$val" == "true" ]; then
|
||||
wget https://image-net.org/data/ILSVRC/2012/ILSVRC2012_img_val.tar # download 6.3G, 50000 images
|
||||
mkdir val && mv ILSVRC2012_img_val.tar val/ && cd val && tar -xf ILSVRC2012_img_val.tar
|
||||
wget -qO- https://raw.githubusercontent.com/soumith/imagenetloader.torch/master/valprep.sh | bash # move into subdirs
|
||||
fi
|
||||
|
||||
# Delete corrupted image (optional: PNG under JPEG name that may cause dataloaders to fail)
|
||||
# rm train/n04266014/n04266014_10835.JPEG
|
||||
|
||||
# TFRecords (optional)
|
||||
# wget https://raw.githubusercontent.com/tensorflow/models/master/research/slim/datasets/imagenet_lsvrc_2015_synsets.txt
|
@ -1,288 +0,0 @@
|
||||
# Ultralytics YOLO 🚀, AGPL-3.0 license
|
||||
|
||||
import itertools
|
||||
from glob import glob
|
||||
from math import ceil
|
||||
from pathlib import Path
|
||||
|
||||
import cv2
|
||||
import numpy as np
|
||||
from PIL import Image
|
||||
from tqdm import tqdm
|
||||
|
||||
from ultralytics.data.utils import exif_size, img2label_paths
|
||||
from ultralytics.utils.checks import check_requirements
|
||||
|
||||
check_requirements("shapely")
|
||||
from shapely.geometry import Polygon
|
||||
|
||||
|
||||
def bbox_iof(polygon1, bbox2, eps=1e-6):
|
||||
"""
|
||||
Calculate iofs between bbox1 and bbox2.
|
||||
|
||||
Args:
|
||||
polygon1 (np.ndarray): Polygon coordinates, (n, 8).
|
||||
bbox2 (np.ndarray): Bounding boxes, (n ,4).
|
||||
"""
|
||||
polygon1 = polygon1.reshape(-1, 4, 2)
|
||||
lt_point = np.min(polygon1, axis=-2)
|
||||
rb_point = np.max(polygon1, axis=-2)
|
||||
bbox1 = np.concatenate([lt_point, rb_point], axis=-1)
|
||||
|
||||
lt = np.maximum(bbox1[:, None, :2], bbox2[..., :2])
|
||||
rb = np.minimum(bbox1[:, None, 2:], bbox2[..., 2:])
|
||||
wh = np.clip(rb - lt, 0, np.inf)
|
||||
h_overlaps = wh[..., 0] * wh[..., 1]
|
||||
|
||||
l, t, r, b = (bbox2[..., i] for i in range(4))
|
||||
polygon2 = np.stack([l, t, r, t, r, b, l, b], axis=-1).reshape(-1, 4, 2)
|
||||
|
||||
sg_polys1 = [Polygon(p) for p in polygon1]
|
||||
sg_polys2 = [Polygon(p) for p in polygon2]
|
||||
overlaps = np.zeros(h_overlaps.shape)
|
||||
for p in zip(*np.nonzero(h_overlaps)):
|
||||
overlaps[p] = sg_polys1[p[0]].intersection(sg_polys2[p[-1]]).area
|
||||
unions = np.array([p.area for p in sg_polys1], dtype=np.float32)
|
||||
unions = unions[..., None]
|
||||
|
||||
unions = np.clip(unions, eps, np.inf)
|
||||
outputs = overlaps / unions
|
||||
if outputs.ndim == 1:
|
||||
outputs = outputs[..., None]
|
||||
return outputs
|
||||
|
||||
|
||||
def load_yolo_dota(data_root, split="train"):
|
||||
"""
|
||||
Load DOTA dataset.
|
||||
|
||||
Args:
|
||||
data_root (str): Data root.
|
||||
split (str): The split data set, could be train or val.
|
||||
|
||||
Notes:
|
||||
The directory structure assumed for the DOTA dataset:
|
||||
- data_root
|
||||
- images
|
||||
- train
|
||||
- val
|
||||
- labels
|
||||
- train
|
||||
- val
|
||||
"""
|
||||
assert split in ["train", "val"]
|
||||
im_dir = Path(data_root) / "images" / split
|
||||
assert im_dir.exists(), f"Can't find {im_dir}, please check your data root."
|
||||
im_files = glob(str(Path(data_root) / "images" / split / "*"))
|
||||
lb_files = img2label_paths(im_files)
|
||||
annos = []
|
||||
for im_file, lb_file in zip(im_files, lb_files):
|
||||
w, h = exif_size(Image.open(im_file))
|
||||
with open(lb_file) as f:
|
||||
lb = [x.split() for x in f.read().strip().splitlines() if len(x)]
|
||||
lb = np.array(lb, dtype=np.float32)
|
||||
annos.append(dict(ori_size=(h, w), label=lb, filepath=im_file))
|
||||
return annos
|
||||
|
||||
|
||||
def get_windows(im_size, crop_sizes=[1024], gaps=[200], im_rate_thr=0.6, eps=0.01):
|
||||
"""
|
||||
Get the coordinates of windows.
|
||||
|
||||
Args:
|
||||
im_size (tuple): Original image size, (h, w).
|
||||
crop_sizes (List(int)): Crop size of windows.
|
||||
gaps (List(int)): Gap between crops.
|
||||
im_rate_thr (float): Threshold of windows areas divided by image ares.
|
||||
"""
|
||||
h, w = im_size
|
||||
windows = []
|
||||
for crop_size, gap in zip(crop_sizes, gaps):
|
||||
assert crop_size > gap, f"invalid crop_size gap pair [{crop_size} {gap}]"
|
||||
step = crop_size - gap
|
||||
|
||||
xn = 1 if w <= crop_size else ceil((w - crop_size) / step + 1)
|
||||
xs = [step * i for i in range(xn)]
|
||||
if len(xs) > 1 and xs[-1] + crop_size > w:
|
||||
xs[-1] = w - crop_size
|
||||
|
||||
yn = 1 if h <= crop_size else ceil((h - crop_size) / step + 1)
|
||||
ys = [step * i for i in range(yn)]
|
||||
if len(ys) > 1 and ys[-1] + crop_size > h:
|
||||
ys[-1] = h - crop_size
|
||||
|
||||
start = np.array(list(itertools.product(xs, ys)), dtype=np.int64)
|
||||
stop = start + crop_size
|
||||
windows.append(np.concatenate([start, stop], axis=1))
|
||||
windows = np.concatenate(windows, axis=0)
|
||||
|
||||
im_in_wins = windows.copy()
|
||||
im_in_wins[:, 0::2] = np.clip(im_in_wins[:, 0::2], 0, w)
|
||||
im_in_wins[:, 1::2] = np.clip(im_in_wins[:, 1::2], 0, h)
|
||||
im_areas = (im_in_wins[:, 2] - im_in_wins[:, 0]) * (im_in_wins[:, 3] - im_in_wins[:, 1])
|
||||
win_areas = (windows[:, 2] - windows[:, 0]) * (windows[:, 3] - windows[:, 1])
|
||||
im_rates = im_areas / win_areas
|
||||
if not (im_rates > im_rate_thr).any():
|
||||
max_rate = im_rates.max()
|
||||
im_rates[abs(im_rates - max_rate) < eps] = 1
|
||||
return windows[im_rates > im_rate_thr]
|
||||
|
||||
|
||||
def get_window_obj(anno, windows, iof_thr=0.7):
|
||||
"""Get objects for each window."""
|
||||
h, w = anno["ori_size"]
|
||||
label = anno["label"]
|
||||
if len(label):
|
||||
label[:, 1::2] *= w
|
||||
label[:, 2::2] *= h
|
||||
iofs = bbox_iof(label[:, 1:], windows)
|
||||
# Unnormalized and misaligned coordinates
|
||||
return [(label[iofs[:, i] >= iof_thr]) for i in range(len(windows))] # window_anns
|
||||
else:
|
||||
return [np.zeros((0, 9), dtype=np.float32) for _ in range(len(windows))] # window_anns
|
||||
|
||||
|
||||
def crop_and_save(anno, windows, window_objs, im_dir, lb_dir):
|
||||
"""
|
||||
Crop images and save new labels.
|
||||
|
||||
Args:
|
||||
anno (dict): Annotation dict, including `filepath`, `label`, `ori_size` as its keys.
|
||||
windows (list): A list of windows coordinates.
|
||||
window_objs (list): A list of labels inside each window.
|
||||
im_dir (str): The output directory path of images.
|
||||
lb_dir (str): The output directory path of labels.
|
||||
|
||||
Notes:
|
||||
The directory structure assumed for the DOTA dataset:
|
||||
- data_root
|
||||
- images
|
||||
- train
|
||||
- val
|
||||
- labels
|
||||
- train
|
||||
- val
|
||||
"""
|
||||
im = cv2.imread(anno["filepath"])
|
||||
name = Path(anno["filepath"]).stem
|
||||
for i, window in enumerate(windows):
|
||||
x_start, y_start, x_stop, y_stop = window.tolist()
|
||||
new_name = f"{name}__{x_stop - x_start}__{x_start}___{y_start}"
|
||||
patch_im = im[y_start:y_stop, x_start:x_stop]
|
||||
ph, pw = patch_im.shape[:2]
|
||||
|
||||
cv2.imwrite(str(Path(im_dir) / f"{new_name}.jpg"), patch_im)
|
||||
label = window_objs[i]
|
||||
if len(label) == 0:
|
||||
continue
|
||||
label[:, 1::2] -= x_start
|
||||
label[:, 2::2] -= y_start
|
||||
label[:, 1::2] /= pw
|
||||
label[:, 2::2] /= ph
|
||||
|
||||
with open(Path(lb_dir) / f"{new_name}.txt", "w") as f:
|
||||
for lb in label:
|
||||
formatted_coords = ["{:.6g}".format(coord) for coord in lb[1:]]
|
||||
f.write(f"{int(lb[0])} {' '.join(formatted_coords)}\n")
|
||||
|
||||
|
||||
def split_images_and_labels(data_root, save_dir, split="train", crop_sizes=[1024], gaps=[200]):
|
||||
"""
|
||||
Split both images and labels.
|
||||
|
||||
Notes:
|
||||
The directory structure assumed for the DOTA dataset:
|
||||
- data_root
|
||||
- images
|
||||
- split
|
||||
- labels
|
||||
- split
|
||||
and the output directory structure is:
|
||||
- save_dir
|
||||
- images
|
||||
- split
|
||||
- labels
|
||||
- split
|
||||
"""
|
||||
im_dir = Path(save_dir) / "images" / split
|
||||
im_dir.mkdir(parents=True, exist_ok=True)
|
||||
lb_dir = Path(save_dir) / "labels" / split
|
||||
lb_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
annos = load_yolo_dota(data_root, split=split)
|
||||
for anno in tqdm(annos, total=len(annos), desc=split):
|
||||
windows = get_windows(anno["ori_size"], crop_sizes, gaps)
|
||||
window_objs = get_window_obj(anno, windows)
|
||||
crop_and_save(anno, windows, window_objs, str(im_dir), str(lb_dir))
|
||||
|
||||
|
||||
def split_trainval(data_root, save_dir, crop_size=1024, gap=200, rates=[1.0]):
|
||||
"""
|
||||
Split train and val set of DOTA.
|
||||
|
||||
Notes:
|
||||
The directory structure assumed for the DOTA dataset:
|
||||
- data_root
|
||||
- images
|
||||
- train
|
||||
- val
|
||||
- labels
|
||||
- train
|
||||
- val
|
||||
and the output directory structure is:
|
||||
- save_dir
|
||||
- images
|
||||
- train
|
||||
- val
|
||||
- labels
|
||||
- train
|
||||
- val
|
||||
"""
|
||||
crop_sizes, gaps = [], []
|
||||
for r in rates:
|
||||
crop_sizes.append(int(crop_size / r))
|
||||
gaps.append(int(gap / r))
|
||||
for split in ["train", "val"]:
|
||||
split_images_and_labels(data_root, save_dir, split, crop_sizes, gaps)
|
||||
|
||||
|
||||
def split_test(data_root, save_dir, crop_size=1024, gap=200, rates=[1.0]):
|
||||
"""
|
||||
Split test set of DOTA, labels are not included within this set.
|
||||
|
||||
Notes:
|
||||
The directory structure assumed for the DOTA dataset:
|
||||
- data_root
|
||||
- images
|
||||
- test
|
||||
and the output directory structure is:
|
||||
- save_dir
|
||||
- images
|
||||
- test
|
||||
"""
|
||||
crop_sizes, gaps = [], []
|
||||
for r in rates:
|
||||
crop_sizes.append(int(crop_size / r))
|
||||
gaps.append(int(gap / r))
|
||||
save_dir = Path(save_dir) / "images" / "test"
|
||||
save_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
im_dir = Path(data_root) / "images" / "test"
|
||||
assert im_dir.exists(), f"Can't find {im_dir}, please check your data root."
|
||||
im_files = glob(str(im_dir / "*"))
|
||||
for im_file in tqdm(im_files, total=len(im_files), desc="test"):
|
||||
w, h = exif_size(Image.open(im_file))
|
||||
windows = get_windows((h, w), crop_sizes=crop_sizes, gaps=gaps)
|
||||
im = cv2.imread(im_file)
|
||||
name = Path(im_file).stem
|
||||
for window in windows:
|
||||
x_start, y_start, x_stop, y_stop = window.tolist()
|
||||
new_name = f"{name}__{x_stop - x_start}__{x_start}___{y_start}"
|
||||
patch_im = im[y_start:y_stop, x_start:x_stop]
|
||||
cv2.imwrite(str(save_dir / f"{new_name}.jpg"), patch_im)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
split_trainval(data_root="DOTAv2", save_dir="DOTAv2-split")
|
||||
split_test(data_root="DOTAv2", save_dir="DOTAv2-split")
|
@ -1,651 +0,0 @@
|
||||
# Ultralytics YOLO 🚀, AGPL-3.0 license
|
||||
|
||||
import contextlib
|
||||
import hashlib
|
||||
import json
|
||||
import os
|
||||
import random
|
||||
import subprocess
|
||||
import time
|
||||
import zipfile
|
||||
from multiprocessing.pool import ThreadPool
|
||||
from pathlib import Path
|
||||
from tarfile import is_tarfile
|
||||
|
||||
import cv2
|
||||
import numpy as np
|
||||
from PIL import Image, ImageOps
|
||||
|
||||
from ultralytics.nn.autobackend import check_class_names
|
||||
from ultralytics.utils import (
|
||||
DATASETS_DIR,
|
||||
LOGGER,
|
||||
NUM_THREADS,
|
||||
ROOT,
|
||||
SETTINGS_YAML,
|
||||
TQDM,
|
||||
clean_url,
|
||||
colorstr,
|
||||
emojis,
|
||||
yaml_load,
|
||||
yaml_save,
|
||||
)
|
||||
from ultralytics.utils.checks import check_file, check_font, is_ascii
|
||||
from ultralytics.utils.downloads import download, safe_download, unzip_file
|
||||
from ultralytics.utils.ops import segments2boxes
|
||||
|
||||
HELP_URL = "See https://docs.ultralytics.com/datasets/detect for dataset formatting guidance."
|
||||
IMG_FORMATS = {"bmp", "dng", "jpeg", "jpg", "mpo", "png", "tif", "tiff", "webp", "pfm"} # image suffixes
|
||||
VID_FORMATS = {"asf", "avi", "gif", "m4v", "mkv", "mov", "mp4", "mpeg", "mpg", "ts", "wmv", "webm"} # video suffixes
|
||||
PIN_MEMORY = str(os.getenv("PIN_MEMORY", True)).lower() == "true" # global pin_memory for dataloaders
|
||||
|
||||
|
||||
def img2label_paths(img_paths):
|
||||
"""Define label paths as a function of image paths."""
|
||||
sa, sb = f"{os.sep}images{os.sep}", f"{os.sep}labels{os.sep}" # /images/, /labels/ substrings
|
||||
return [sb.join(x.rsplit(sa, 1)).rsplit(".", 1)[0] + ".txt" for x in img_paths]
|
||||
|
||||
|
||||
def get_hash(paths):
|
||||
"""Returns a single hash value of a list of paths (files or dirs)."""
|
||||
size = sum(os.path.getsize(p) for p in paths if os.path.exists(p)) # sizes
|
||||
h = hashlib.sha256(str(size).encode()) # hash sizes
|
||||
h.update("".join(paths).encode()) # hash paths
|
||||
return h.hexdigest() # return hash
|
||||
|
||||
|
||||
def exif_size(img: Image.Image):
|
||||
"""Returns exif-corrected PIL size."""
|
||||
s = img.size # (width, height)
|
||||
if img.format == "JPEG": # only support JPEG images
|
||||
with contextlib.suppress(Exception):
|
||||
exif = img.getexif()
|
||||
if exif:
|
||||
rotation = exif.get(274, None) # the EXIF key for the orientation tag is 274
|
||||
if rotation in [6, 8]: # rotation 270 or 90
|
||||
s = s[1], s[0]
|
||||
return s
|
||||
|
||||
|
||||
def verify_image(args):
|
||||
"""Verify one image."""
|
||||
(im_file, cls), prefix = args
|
||||
# Number (found, corrupt), message
|
||||
nf, nc, msg = 0, 0, ""
|
||||
try:
|
||||
im = Image.open(im_file)
|
||||
im.verify() # PIL verify
|
||||
shape = exif_size(im) # image size
|
||||
shape = (shape[1], shape[0]) # hw
|
||||
assert (shape[0] > 9) & (shape[1] > 9), f"image size {shape} <10 pixels"
|
||||
assert im.format.lower() in IMG_FORMATS, f"invalid image format {im.format}"
|
||||
if im.format.lower() in ("jpg", "jpeg"):
|
||||
with open(im_file, "rb") as f:
|
||||
f.seek(-2, 2)
|
||||
if f.read() != b"\xff\xd9": # corrupt JPEG
|
||||
ImageOps.exif_transpose(Image.open(im_file)).save(im_file, "JPEG", subsampling=0, quality=100)
|
||||
msg = f"{prefix}WARNING ⚠️ {im_file}: corrupt JPEG restored and saved"
|
||||
nf = 1
|
||||
except Exception as e:
|
||||
nc = 1
|
||||
msg = f"{prefix}WARNING ⚠️ {im_file}: ignoring corrupt image/label: {e}"
|
||||
return (im_file, cls), nf, nc, msg
|
||||
|
||||
|
||||
def verify_image_label(args):
|
||||
"""Verify one image-label pair."""
|
||||
im_file, lb_file, prefix, keypoint, num_cls, nkpt, ndim = args
|
||||
# Number (missing, found, empty, corrupt), message, segments, keypoints
|
||||
nm, nf, ne, nc, msg, segments, keypoints = 0, 0, 0, 0, "", [], None
|
||||
try:
|
||||
# Verify images
|
||||
im = Image.open(im_file)
|
||||
im.verify() # PIL verify
|
||||
shape = exif_size(im) # image size
|
||||
shape = (shape[1], shape[0]) # hw
|
||||
assert (shape[0] > 9) & (shape[1] > 9), f"image size {shape} <10 pixels"
|
||||
assert im.format.lower() in IMG_FORMATS, f"invalid image format {im.format}"
|
||||
if im.format.lower() in ("jpg", "jpeg"):
|
||||
with open(im_file, "rb") as f:
|
||||
f.seek(-2, 2)
|
||||
if f.read() != b"\xff\xd9": # corrupt JPEG
|
||||
ImageOps.exif_transpose(Image.open(im_file)).save(im_file, "JPEG", subsampling=0, quality=100)
|
||||
msg = f"{prefix}WARNING ⚠️ {im_file}: corrupt JPEG restored and saved"
|
||||
|
||||
# Verify labels
|
||||
if os.path.isfile(lb_file):
|
||||
nf = 1 # label found
|
||||
with open(lb_file) as f:
|
||||
lb = [x.split() for x in f.read().strip().splitlines() if len(x)]
|
||||
if any(len(x) > 6 for x in lb) and (not keypoint): # is segment
|
||||
classes = np.array([x[0] for x in lb], dtype=np.float32)
|
||||
segments = [np.array(x[1:], dtype=np.float32).reshape(-1, 2) for x in lb] # (cls, xy1...)
|
||||
lb = np.concatenate((classes.reshape(-1, 1), segments2boxes(segments)), 1) # (cls, xywh)
|
||||
lb = np.array(lb, dtype=np.float32)
|
||||
nl = len(lb)
|
||||
if nl:
|
||||
if keypoint:
|
||||
assert lb.shape[1] == (5 + nkpt * ndim), f"labels require {(5 + nkpt * ndim)} columns each"
|
||||
points = lb[:, 5:].reshape(-1, ndim)[:, :2]
|
||||
else:
|
||||
assert lb.shape[1] == 5, f"labels require 5 columns, {lb.shape[1]} columns detected"
|
||||
points = lb[:, 1:]
|
||||
assert points.max() <= 1, f"non-normalized or out of bounds coordinates {points[points > 1]}"
|
||||
assert lb.min() >= 0, f"negative label values {lb[lb < 0]}"
|
||||
|
||||
# All labels
|
||||
max_cls = lb[:, 0].max() # max label count
|
||||
assert max_cls <= num_cls, (
|
||||
f"Label class {int(max_cls)} exceeds dataset class count {num_cls}. "
|
||||
f"Possible class labels are 0-{num_cls - 1}"
|
||||
)
|
||||
_, i = np.unique(lb, axis=0, return_index=True)
|
||||
if len(i) < nl: # duplicate row check
|
||||
lb = lb[i] # remove duplicates
|
||||
if segments:
|
||||
segments = [segments[x] for x in i]
|
||||
msg = f"{prefix}WARNING ⚠️ {im_file}: {nl - len(i)} duplicate labels removed"
|
||||
else:
|
||||
ne = 1 # label empty
|
||||
lb = np.zeros((0, (5 + nkpt * ndim) if keypoint else 5), dtype=np.float32)
|
||||
else:
|
||||
nm = 1 # label missing
|
||||
lb = np.zeros((0, (5 + nkpt * ndim) if keypoints else 5), dtype=np.float32)
|
||||
if keypoint:
|
||||
keypoints = lb[:, 5:].reshape(-1, nkpt, ndim)
|
||||
if ndim == 2:
|
||||
kpt_mask = np.where((keypoints[..., 0] < 0) | (keypoints[..., 1] < 0), 0.0, 1.0).astype(np.float32)
|
||||
keypoints = np.concatenate([keypoints, kpt_mask[..., None]], axis=-1) # (nl, nkpt, 3)
|
||||
lb = lb[:, :5]
|
||||
return im_file, lb, shape, segments, keypoints, nm, nf, ne, nc, msg
|
||||
except Exception as e:
|
||||
nc = 1
|
||||
msg = f"{prefix}WARNING ⚠️ {im_file}: ignoring corrupt image/label: {e}"
|
||||
return [None, None, None, None, None, nm, nf, ne, nc, msg]
|
||||
|
||||
|
||||
def polygon2mask(imgsz, polygons, color=1, downsample_ratio=1):
|
||||
"""
|
||||
Convert a list of polygons to a binary mask of the specified image size.
|
||||
|
||||
Args:
|
||||
imgsz (tuple): The size of the image as (height, width).
|
||||
polygons (list[np.ndarray]): A list of polygons. Each polygon is an array with shape [N, M], where
|
||||
N is the number of polygons, and M is the number of points such that M % 2 = 0.
|
||||
color (int, optional): The color value to fill in the polygons on the mask. Defaults to 1.
|
||||
downsample_ratio (int, optional): Factor by which to downsample the mask. Defaults to 1.
|
||||
|
||||
Returns:
|
||||
(np.ndarray): A binary mask of the specified image size with the polygons filled in.
|
||||
"""
|
||||
mask = np.zeros(imgsz, dtype=np.uint8)
|
||||
polygons = np.asarray(polygons, dtype=np.int32)
|
||||
polygons = polygons.reshape((polygons.shape[0], -1, 2))
|
||||
cv2.fillPoly(mask, polygons, color=color)
|
||||
nh, nw = (imgsz[0] // downsample_ratio, imgsz[1] // downsample_ratio)
|
||||
# Note: fillPoly first then resize is trying to keep the same loss calculation method when mask-ratio=1
|
||||
return cv2.resize(mask, (nw, nh))
|
||||
|
||||
|
||||
def polygons2masks(imgsz, polygons, color, downsample_ratio=1):
|
||||
"""
|
||||
Convert a list of polygons to a set of binary masks of the specified image size.
|
||||
|
||||
Args:
|
||||
imgsz (tuple): The size of the image as (height, width).
|
||||
polygons (list[np.ndarray]): A list of polygons. Each polygon is an array with shape [N, M], where
|
||||
N is the number of polygons, and M is the number of points such that M % 2 = 0.
|
||||
color (int): The color value to fill in the polygons on the masks.
|
||||
downsample_ratio (int, optional): Factor by which to downsample each mask. Defaults to 1.
|
||||
|
||||
Returns:
|
||||
(np.ndarray): A set of binary masks of the specified image size with the polygons filled in.
|
||||
"""
|
||||
return np.array([polygon2mask(imgsz, [x.reshape(-1)], color, downsample_ratio) for x in polygons])
|
||||
|
||||
|
||||
def polygons2masks_overlap(imgsz, segments, downsample_ratio=1):
|
||||
"""Return a (640, 640) overlap mask."""
|
||||
masks = np.zeros(
|
||||
(imgsz[0] // downsample_ratio, imgsz[1] // downsample_ratio),
|
||||
dtype=np.int32 if len(segments) > 255 else np.uint8,
|
||||
)
|
||||
areas = []
|
||||
ms = []
|
||||
for si in range(len(segments)):
|
||||
mask = polygon2mask(imgsz, [segments[si].reshape(-1)], downsample_ratio=downsample_ratio, color=1)
|
||||
ms.append(mask)
|
||||
areas.append(mask.sum())
|
||||
areas = np.asarray(areas)
|
||||
index = np.argsort(-areas)
|
||||
ms = np.array(ms)[index]
|
||||
for i in range(len(segments)):
|
||||
mask = ms[i] * (i + 1)
|
||||
masks = masks + mask
|
||||
masks = np.clip(masks, a_min=0, a_max=i + 1)
|
||||
return masks, index
|
||||
|
||||
|
||||
def find_dataset_yaml(path: Path) -> Path:
|
||||
"""
|
||||
Find and return the YAML file associated with a Detect, Segment or Pose dataset.
|
||||
|
||||
This function searches for a YAML file at the root level of the provided directory first, and if not found, it
|
||||
performs a recursive search. It prefers YAML files that have the same stem as the provided path. An AssertionError
|
||||
is raised if no YAML file is found or if multiple YAML files are found.
|
||||
|
||||
Args:
|
||||
path (Path): The directory path to search for the YAML file.
|
||||
|
||||
Returns:
|
||||
(Path): The path of the found YAML file.
|
||||
"""
|
||||
files = list(path.glob("*.yaml")) or list(path.rglob("*.yaml")) # try root level first and then recursive
|
||||
assert files, f"No YAML file found in '{path.resolve()}'"
|
||||
if len(files) > 1:
|
||||
files = [f for f in files if f.stem == path.stem] # prefer *.yaml files that match
|
||||
assert len(files) == 1, f"Expected 1 YAML file in '{path.resolve()}', but found {len(files)}.\n{files}"
|
||||
return files[0]
|
||||
|
||||
|
||||
def check_det_dataset(dataset, autodownload=True):
|
||||
"""
|
||||
Download, verify, and/or unzip a dataset if not found locally.
|
||||
|
||||
This function checks the availability of a specified dataset, and if not found, it has the option to download and
|
||||
unzip the dataset. It then reads and parses the accompanying YAML data, ensuring key requirements are met and also
|
||||
resolves paths related to the dataset.
|
||||
|
||||
Args:
|
||||
dataset (str): Path to the dataset or dataset descriptor (like a YAML file).
|
||||
autodownload (bool, optional): Whether to automatically download the dataset if not found. Defaults to True.
|
||||
|
||||
Returns:
|
||||
(dict): Parsed dataset information and paths.
|
||||
"""
|
||||
|
||||
file = check_file(dataset)
|
||||
|
||||
# Download (optional)
|
||||
extract_dir = ""
|
||||
if zipfile.is_zipfile(file) or is_tarfile(file):
|
||||
new_dir = safe_download(file, dir=DATASETS_DIR, unzip=True, delete=False)
|
||||
file = find_dataset_yaml(DATASETS_DIR / new_dir)
|
||||
extract_dir, autodownload = file.parent, False
|
||||
|
||||
# Read YAML
|
||||
data = yaml_load(file, append_filename=True) # dictionary
|
||||
|
||||
# Checks
|
||||
for k in "train", "val":
|
||||
if k not in data:
|
||||
if k != "val" or "validation" not in data:
|
||||
raise SyntaxError(
|
||||
emojis(f"{dataset} '{k}:' key missing ❌.\n'train' and 'val' are required in all data YAMLs.")
|
||||
)
|
||||
LOGGER.info("WARNING ⚠️ renaming data YAML 'validation' key to 'val' to match YOLO format.")
|
||||
data["val"] = data.pop("validation") # replace 'validation' key with 'val' key
|
||||
if "names" not in data and "nc" not in data:
|
||||
raise SyntaxError(emojis(f"{dataset} key missing ❌.\n either 'names' or 'nc' are required in all data YAMLs."))
|
||||
if "names" in data and "nc" in data and len(data["names"]) != data["nc"]:
|
||||
raise SyntaxError(emojis(f"{dataset} 'names' length {len(data['names'])} and 'nc: {data['nc']}' must match."))
|
||||
if "names" not in data:
|
||||
data["names"] = [f"class_{i}" for i in range(data["nc"])]
|
||||
else:
|
||||
data["nc"] = len(data["names"])
|
||||
|
||||
data["names"] = check_class_names(data["names"])
|
||||
|
||||
# Resolve paths
|
||||
path = Path(extract_dir or data.get("path") or Path(data.get("yaml_file", "")).parent) # dataset root
|
||||
if not path.is_absolute():
|
||||
path = (DATASETS_DIR / path).resolve()
|
||||
|
||||
# Set paths
|
||||
data["path"] = path # download scripts
|
||||
for k in "train", "val", "test":
|
||||
if data.get(k): # prepend path
|
||||
if isinstance(data[k], str):
|
||||
x = (path / data[k]).resolve()
|
||||
if not x.exists() and data[k].startswith("../"):
|
||||
x = (path / data[k][3:]).resolve()
|
||||
data[k] = str(x)
|
||||
else:
|
||||
data[k] = [str((path / x).resolve()) for x in data[k]]
|
||||
|
||||
# Parse YAML
|
||||
val, s = (data.get(x) for x in ("val", "download"))
|
||||
if val:
|
||||
val = [Path(x).resolve() for x in (val if isinstance(val, list) else [val])] # val path
|
||||
if not all(x.exists() for x in val):
|
||||
name = clean_url(dataset) # dataset name with URL auth stripped
|
||||
m = f"\nDataset '{name}' images not found ⚠️, missing path '{[x for x in val if not x.exists()][0]}'"
|
||||
if s and autodownload:
|
||||
LOGGER.warning(m)
|
||||
else:
|
||||
m += f"\nNote dataset download directory is '{DATASETS_DIR}'. You can update this in '{SETTINGS_YAML}'"
|
||||
raise FileNotFoundError(m)
|
||||
t = time.time()
|
||||
r = None # success
|
||||
if s.startswith("http") and s.endswith(".zip"): # URL
|
||||
safe_download(url=s, dir=DATASETS_DIR, delete=True)
|
||||
elif s.startswith("bash "): # bash script
|
||||
LOGGER.info(f"Running {s} ...")
|
||||
r = os.system(s)
|
||||
else: # python script
|
||||
exec(s, {"yaml": data})
|
||||
dt = f"({round(time.time() - t, 1)}s)"
|
||||
s = f"success ✅ {dt}, saved to {colorstr('bold', DATASETS_DIR)}" if r in (0, None) else f"failure {dt} ❌"
|
||||
LOGGER.info(f"Dataset download {s}\n")
|
||||
check_font("Arial.ttf" if is_ascii(data["names"]) else "Arial.Unicode.ttf") # download fonts
|
||||
|
||||
return data # dictionary
|
||||
|
||||
|
||||
def check_cls_dataset(dataset, split=""):
|
||||
"""
|
||||
Checks a classification dataset such as Imagenet.
|
||||
|
||||
This function accepts a `dataset` name and attempts to retrieve the corresponding dataset information.
|
||||
If the dataset is not found locally, it attempts to download the dataset from the internet and save it locally.
|
||||
|
||||
Args:
|
||||
dataset (str | Path): The name of the dataset.
|
||||
split (str, optional): The split of the dataset. Either 'val', 'test', or ''. Defaults to ''.
|
||||
|
||||
Returns:
|
||||
(dict): A dictionary containing the following keys:
|
||||
- 'train' (Path): The directory path containing the training set of the dataset.
|
||||
- 'val' (Path): The directory path containing the validation set of the dataset.
|
||||
- 'test' (Path): The directory path containing the test set of the dataset.
|
||||
- 'nc' (int): The number of classes in the dataset.
|
||||
- 'names' (dict): A dictionary of class names in the dataset.
|
||||
"""
|
||||
|
||||
# Download (optional if dataset=https://file.zip is passed directly)
|
||||
if str(dataset).startswith(("http:/", "https:/")):
|
||||
dataset = safe_download(dataset, dir=DATASETS_DIR, unzip=True, delete=False)
|
||||
elif Path(dataset).suffix in (".zip", ".tar", ".gz"):
|
||||
file = check_file(dataset)
|
||||
dataset = safe_download(file, dir=DATASETS_DIR, unzip=True, delete=False)
|
||||
|
||||
dataset = Path(dataset)
|
||||
data_dir = (dataset if dataset.is_dir() else (DATASETS_DIR / dataset)).resolve()
|
||||
if not data_dir.is_dir():
|
||||
LOGGER.warning(f"\nDataset not found ⚠️, missing path {data_dir}, attempting download...")
|
||||
t = time.time()
|
||||
if str(dataset) == "imagenet":
|
||||
subprocess.run(f"bash {ROOT / 'data/scripts/get_imagenet.sh'}", shell=True, check=True)
|
||||
else:
|
||||
url = f"https://github.com/ultralytics/yolov5/releases/download/v1.0/{dataset}.zip"
|
||||
download(url, dir=data_dir.parent)
|
||||
s = f"Dataset download success ✅ ({time.time() - t:.1f}s), saved to {colorstr('bold', data_dir)}\n"
|
||||
LOGGER.info(s)
|
||||
train_set = data_dir / "train"
|
||||
val_set = (
|
||||
data_dir / "val"
|
||||
if (data_dir / "val").exists()
|
||||
else data_dir / "validation"
|
||||
if (data_dir / "validation").exists()
|
||||
else None
|
||||
) # data/test or data/val
|
||||
test_set = data_dir / "test" if (data_dir / "test").exists() else None # data/val or data/test
|
||||
if split == "val" and not val_set:
|
||||
LOGGER.warning("WARNING ⚠️ Dataset 'split=val' not found, using 'split=test' instead.")
|
||||
elif split == "test" and not test_set:
|
||||
LOGGER.warning("WARNING ⚠️ Dataset 'split=test' not found, using 'split=val' instead.")
|
||||
|
||||
nc = len([x for x in (data_dir / "train").glob("*") if x.is_dir()]) # number of classes
|
||||
names = [x.name for x in (data_dir / "train").iterdir() if x.is_dir()] # class names list
|
||||
names = dict(enumerate(sorted(names)))
|
||||
|
||||
# Print to console
|
||||
for k, v in {"train": train_set, "val": val_set, "test": test_set}.items():
|
||||
prefix = f'{colorstr(f"{k}:")} {v}...'
|
||||
if v is None:
|
||||
LOGGER.info(prefix)
|
||||
else:
|
||||
files = [path for path in v.rglob("*.*") if path.suffix[1:].lower() in IMG_FORMATS]
|
||||
nf = len(files) # number of files
|
||||
nd = len({file.parent for file in files}) # number of directories
|
||||
if nf == 0:
|
||||
if k == "train":
|
||||
raise FileNotFoundError(emojis(f"{dataset} '{k}:' no training images found ❌ "))
|
||||
else:
|
||||
LOGGER.warning(f"{prefix} found {nf} images in {nd} classes: WARNING ⚠️ no images found")
|
||||
elif nd != nc:
|
||||
LOGGER.warning(f"{prefix} found {nf} images in {nd} classes: ERROR ❌️ requires {nc} classes, not {nd}")
|
||||
else:
|
||||
LOGGER.info(f"{prefix} found {nf} images in {nd} classes ✅ ")
|
||||
|
||||
return {"train": train_set, "val": val_set, "test": test_set, "nc": nc, "names": names}
|
||||
|
||||
|
||||
class HUBDatasetStats:
|
||||
"""
|
||||
A class for generating HUB dataset JSON and `-hub` dataset directory.
|
||||
|
||||
Args:
|
||||
path (str): Path to data.yaml or data.zip (with data.yaml inside data.zip). Default is 'coco8.yaml'.
|
||||
task (str): Dataset task. Options are 'detect', 'segment', 'pose', 'classify'. Default is 'detect'.
|
||||
autodownload (bool): Attempt to download dataset if not found locally. Default is False.
|
||||
|
||||
Example:
|
||||
Download *.zip files from https://github.com/ultralytics/hub/tree/main/example_datasets
|
||||
i.e. https://github.com/ultralytics/hub/raw/main/example_datasets/coco8.zip for coco8.zip.
|
||||
```python
|
||||
from ultralytics.data.utils import HUBDatasetStats
|
||||
|
||||
stats = HUBDatasetStats('path/to/coco8.zip', task='detect') # detect dataset
|
||||
stats = HUBDatasetStats('path/to/coco8-seg.zip', task='segment') # segment dataset
|
||||
stats = HUBDatasetStats('path/to/coco8-pose.zip', task='pose') # pose dataset
|
||||
stats = HUBDatasetStats('path/to/imagenet10.zip', task='classify') # classification dataset
|
||||
|
||||
stats.get_json(save=True)
|
||||
stats.process_images()
|
||||
```
|
||||
"""
|
||||
|
||||
def __init__(self, path="coco8.yaml", task="detect", autodownload=False):
|
||||
"""Initialize class."""
|
||||
path = Path(path).resolve()
|
||||
LOGGER.info(f"Starting HUB dataset checks for {path}....")
|
||||
|
||||
self.task = task # detect, segment, pose, classify
|
||||
if self.task == "classify":
|
||||
unzip_dir = unzip_file(path)
|
||||
data = check_cls_dataset(unzip_dir)
|
||||
data["path"] = unzip_dir
|
||||
else: # detect, segment, pose
|
||||
_, data_dir, yaml_path = self._unzip(Path(path))
|
||||
try:
|
||||
# Load YAML with checks
|
||||
data = yaml_load(yaml_path)
|
||||
data["path"] = "" # strip path since YAML should be in dataset root for all HUB datasets
|
||||
yaml_save(yaml_path, data)
|
||||
data = check_det_dataset(yaml_path, autodownload) # dict
|
||||
data["path"] = data_dir # YAML path should be set to '' (relative) or parent (absolute)
|
||||
except Exception as e:
|
||||
raise Exception("error/HUB/dataset_stats/init") from e
|
||||
|
||||
self.hub_dir = Path(f'{data["path"]}-hub')
|
||||
self.im_dir = self.hub_dir / "images"
|
||||
self.stats = {"nc": len(data["names"]), "names": list(data["names"].values())} # statistics dictionary
|
||||
self.data = data
|
||||
|
||||
@staticmethod
|
||||
def _unzip(path):
|
||||
"""Unzip data.zip."""
|
||||
if not str(path).endswith(".zip"): # path is data.yaml
|
||||
return False, None, path
|
||||
unzip_dir = unzip_file(path, path=path.parent)
|
||||
assert unzip_dir.is_dir(), (
|
||||
f"Error unzipping {path}, {unzip_dir} not found. " f"path/to/abc.zip MUST unzip to path/to/abc/"
|
||||
)
|
||||
return True, str(unzip_dir), find_dataset_yaml(unzip_dir) # zipped, data_dir, yaml_path
|
||||
|
||||
def _hub_ops(self, f):
|
||||
"""Saves a compressed image for HUB previews."""
|
||||
compress_one_image(f, self.im_dir / Path(f).name) # save to dataset-hub
|
||||
|
||||
def get_json(self, save=False, verbose=False):
|
||||
"""Return dataset JSON for Ultralytics HUB."""
|
||||
|
||||
def _round(labels):
|
||||
"""Update labels to integer class and 4 decimal place floats."""
|
||||
if self.task == "detect":
|
||||
coordinates = labels["bboxes"]
|
||||
elif self.task == "segment":
|
||||
coordinates = [x.flatten() for x in labels["segments"]]
|
||||
elif self.task == "pose":
|
||||
n = labels["keypoints"].shape[0]
|
||||
coordinates = np.concatenate((labels["bboxes"], labels["keypoints"].reshape(n, -1)), 1)
|
||||
else:
|
||||
raise ValueError("Undefined dataset task.")
|
||||
zipped = zip(labels["cls"], coordinates)
|
||||
return [[int(c[0]), *(round(float(x), 4) for x in points)] for c, points in zipped]
|
||||
|
||||
for split in "train", "val", "test":
|
||||
self.stats[split] = None # predefine
|
||||
path = self.data.get(split)
|
||||
|
||||
# Check split
|
||||
if path is None: # no split
|
||||
continue
|
||||
files = [f for f in Path(path).rglob("*.*") if f.suffix[1:].lower() in IMG_FORMATS] # image files in split
|
||||
if not files: # no images
|
||||
continue
|
||||
|
||||
# Get dataset statistics
|
||||
if self.task == "classify":
|
||||
from torchvision.datasets import ImageFolder
|
||||
|
||||
dataset = ImageFolder(self.data[split])
|
||||
|
||||
x = np.zeros(len(dataset.classes)).astype(int)
|
||||
for im in dataset.imgs:
|
||||
x[im[1]] += 1
|
||||
|
||||
self.stats[split] = {
|
||||
"instance_stats": {"total": len(dataset), "per_class": x.tolist()},
|
||||
"image_stats": {"total": len(dataset), "unlabelled": 0, "per_class": x.tolist()},
|
||||
"labels": [{Path(k).name: v} for k, v in dataset.imgs],
|
||||
}
|
||||
else:
|
||||
from ultralytics.data import YOLODataset
|
||||
|
||||
dataset = YOLODataset(img_path=self.data[split], data=self.data, task=self.task)
|
||||
x = np.array(
|
||||
[
|
||||
np.bincount(label["cls"].astype(int).flatten(), minlength=self.data["nc"])
|
||||
for label in TQDM(dataset.labels, total=len(dataset), desc="Statistics")
|
||||
]
|
||||
) # shape(128x80)
|
||||
self.stats[split] = {
|
||||
"instance_stats": {"total": int(x.sum()), "per_class": x.sum(0).tolist()},
|
||||
"image_stats": {
|
||||
"total": len(dataset),
|
||||
"unlabelled": int(np.all(x == 0, 1).sum()),
|
||||
"per_class": (x > 0).sum(0).tolist(),
|
||||
},
|
||||
"labels": [{Path(k).name: _round(v)} for k, v in zip(dataset.im_files, dataset.labels)],
|
||||
}
|
||||
|
||||
# Save, print and return
|
||||
if save:
|
||||
self.hub_dir.mkdir(parents=True, exist_ok=True) # makes dataset-hub/
|
||||
stats_path = self.hub_dir / "stats.json"
|
||||
LOGGER.info(f"Saving {stats_path.resolve()}...")
|
||||
with open(stats_path, "w") as f:
|
||||
json.dump(self.stats, f) # save stats.json
|
||||
if verbose:
|
||||
LOGGER.info(json.dumps(self.stats, indent=2, sort_keys=False))
|
||||
return self.stats
|
||||
|
||||
def process_images(self):
|
||||
"""Compress images for Ultralytics HUB."""
|
||||
from ultralytics.data import YOLODataset # ClassificationDataset
|
||||
|
||||
self.im_dir.mkdir(parents=True, exist_ok=True) # makes dataset-hub/images/
|
||||
for split in "train", "val", "test":
|
||||
if self.data.get(split) is None:
|
||||
continue
|
||||
dataset = YOLODataset(img_path=self.data[split], data=self.data)
|
||||
with ThreadPool(NUM_THREADS) as pool:
|
||||
for _ in TQDM(pool.imap(self._hub_ops, dataset.im_files), total=len(dataset), desc=f"{split} images"):
|
||||
pass
|
||||
LOGGER.info(f"Done. All images saved to {self.im_dir}")
|
||||
return self.im_dir
|
||||
|
||||
|
||||
def compress_one_image(f, f_new=None, max_dim=1920, quality=50):
|
||||
"""
|
||||
Compresses a single image file to reduced size while preserving its aspect ratio and quality using either the Python
|
||||
Imaging Library (PIL) or OpenCV library. If the input image is smaller than the maximum dimension, it will not be
|
||||
resized.
|
||||
|
||||
Args:
|
||||
f (str): The path to the input image file.
|
||||
f_new (str, optional): The path to the output image file. If not specified, the input file will be overwritten.
|
||||
max_dim (int, optional): The maximum dimension (width or height) of the output image. Default is 1920 pixels.
|
||||
quality (int, optional): The image compression quality as a percentage. Default is 50%.
|
||||
|
||||
Example:
|
||||
```python
|
||||
from pathlib import Path
|
||||
from ultralytics.data.utils import compress_one_image
|
||||
|
||||
for f in Path('path/to/dataset').rglob('*.jpg'):
|
||||
compress_one_image(f)
|
||||
```
|
||||
"""
|
||||
|
||||
try: # use PIL
|
||||
im = Image.open(f)
|
||||
r = max_dim / max(im.height, im.width) # ratio
|
||||
if r < 1.0: # image too large
|
||||
im = im.resize((int(im.width * r), int(im.height * r)))
|
||||
im.save(f_new or f, "JPEG", quality=quality, optimize=True) # save
|
||||
except Exception as e: # use OpenCV
|
||||
LOGGER.info(f"WARNING ⚠️ HUB ops PIL failure {f}: {e}")
|
||||
im = cv2.imread(f)
|
||||
im_height, im_width = im.shape[:2]
|
||||
r = max_dim / max(im_height, im_width) # ratio
|
||||
if r < 1.0: # image too large
|
||||
im = cv2.resize(im, (int(im_width * r), int(im_height * r)), interpolation=cv2.INTER_AREA)
|
||||
cv2.imwrite(str(f_new or f), im)
|
||||
|
||||
|
||||
def autosplit(path=DATASETS_DIR / "coco8/images", weights=(0.9, 0.1, 0.0), annotated_only=False):
|
||||
"""
|
||||
Automatically split a dataset into train/val/test splits and save the resulting splits into autosplit_*.txt files.
|
||||
|
||||
Args:
|
||||
path (Path, optional): Path to images directory. Defaults to DATASETS_DIR / 'coco8/images'.
|
||||
weights (list | tuple, optional): Train, validation, and test split fractions. Defaults to (0.9, 0.1, 0.0).
|
||||
annotated_only (bool, optional): If True, only images with an associated txt file are used. Defaults to False.
|
||||
|
||||
Example:
|
||||
```python
|
||||
from ultralytics.data.utils import autosplit
|
||||
|
||||
autosplit()
|
||||
```
|
||||
"""
|
||||
|
||||
path = Path(path) # images dir
|
||||
files = sorted(x for x in path.rglob("*.*") if x.suffix[1:].lower() in IMG_FORMATS) # image files only
|
||||
n = len(files) # number of files
|
||||
random.seed(0) # for reproducibility
|
||||
indices = random.choices([0, 1, 2], weights=weights, k=n) # assign each image to a split
|
||||
|
||||
txt = ["autosplit_train.txt", "autosplit_val.txt", "autosplit_test.txt"] # 3 txt files
|
||||
for x in txt:
|
||||
if (path.parent / x).exists():
|
||||
(path.parent / x).unlink() # remove existing
|
||||
|
||||
LOGGER.info(f"Autosplitting images from {path}" + ", using *.txt labeled images only" * annotated_only)
|
||||
for i, img in TQDM(zip(indices, files), total=n):
|
||||
if not annotated_only or Path(img2label_paths([str(img)])[0]).exists(): # check label
|
||||
with open(path.parent / txt[i], "a") as f:
|
||||
f.write(f"./{img.relative_to(path.parent).as_posix()}" + "\n") # add image to txt file
|
@ -1 +0,0 @@
|
||||
# Ultralytics YOLO 🚀, AGPL-3.0 license
|
File diff suppressed because it is too large
Load Diff
@ -1,823 +0,0 @@
|
||||
# Ultralytics YOLO 🚀, AGPL-3.0 license
|
||||
|
||||
import inspect
|
||||
import sys
|
||||
from pathlib import Path
|
||||
from typing import Union
|
||||
|
||||
import numpy as np
|
||||
import torch
|
||||
|
||||
from ultralytics.cfg import TASK2DATA, get_cfg, get_save_dir
|
||||
from ultralytics.hub.utils import HUB_WEB_ROOT
|
||||
from ultralytics.nn.tasks import attempt_load_one_weight, guess_model_task, nn, yaml_model_load
|
||||
from ultralytics.utils import ASSETS, DEFAULT_CFG_DICT, LOGGER, RANK, SETTINGS, callbacks, checks, emojis, yaml_load
|
||||
|
||||
|
||||
class Model(nn.Module):
|
||||
"""
|
||||
A base class for implementing YOLO models, unifying APIs across different model types.
|
||||
|
||||
This class provides a common interface for various operations related to YOLO models, such as training,
|
||||
validation, prediction, exporting, and benchmarking. It handles different types of models, including those
|
||||
loaded from local files, Ultralytics HUB, or Triton Server. The class is designed to be flexible and
|
||||
extendable for different tasks and model configurations.
|
||||
|
||||
Args:
|
||||
model (Union[str, Path], optional): Path or name of the model to load or create. This can be a local file
|
||||
path, a model name from Ultralytics HUB, or a Triton Server model. Defaults to 'yolov8n.pt'.
|
||||
task (Any, optional): The task type associated with the YOLO model. This can be used to specify the model's
|
||||
application domain, such as object detection, segmentation, etc. Defaults to None.
|
||||
verbose (bool, optional): If True, enables verbose output during the model's operations. Defaults to False.
|
||||
|
||||
Attributes:
|
||||
callbacks (dict): A dictionary of callback functions for various events during model operations.
|
||||
predictor (BasePredictor): The predictor object used for making predictions.
|
||||
model (nn.Module): The underlying PyTorch model.
|
||||
trainer (BaseTrainer): The trainer object used for training the model.
|
||||
ckpt (dict): The checkpoint data if the model is loaded from a *.pt file.
|
||||
cfg (str): The configuration of the model if loaded from a *.yaml file.
|
||||
ckpt_path (str): The path to the checkpoint file.
|
||||
overrides (dict): A dictionary of overrides for model configuration.
|
||||
metrics (dict): The latest training/validation metrics.
|
||||
session (HUBTrainingSession): The Ultralytics HUB session, if applicable.
|
||||
task (str): The type of task the model is intended for.
|
||||
model_name (str): The name of the model.
|
||||
|
||||
Methods:
|
||||
__call__: Alias for the predict method, enabling the model instance to be callable.
|
||||
_new: Initializes a new model based on a configuration file.
|
||||
_load: Loads a model from a checkpoint file.
|
||||
_check_is_pytorch_model: Ensures that the model is a PyTorch model.
|
||||
reset_weights: Resets the model's weights to their initial state.
|
||||
load: Loads model weights from a specified file.
|
||||
save: Saves the current state of the model to a file.
|
||||
info: Logs or returns information about the model.
|
||||
fuse: Fuses Conv2d and BatchNorm2d layers for optimized inference.
|
||||
predict: Performs object detection predictions.
|
||||
track: Performs object tracking.
|
||||
val: Validates the model on a dataset.
|
||||
benchmark: Benchmarks the model on various export formats.
|
||||
export: Exports the model to different formats.
|
||||
train: Trains the model on a dataset.
|
||||
tune: Performs hyperparameter tuning.
|
||||
_apply: Applies a function to the model's tensors.
|
||||
add_callback: Adds a callback function for an event.
|
||||
clear_callback: Clears all callbacks for an event.
|
||||
reset_callbacks: Resets all callbacks to their default functions.
|
||||
_get_hub_session: Retrieves or creates an Ultralytics HUB session.
|
||||
is_triton_model: Checks if a model is a Triton Server model.
|
||||
is_hub_model: Checks if a model is an Ultralytics HUB model.
|
||||
_reset_ckpt_args: Resets checkpoint arguments when loading a PyTorch model.
|
||||
_smart_load: Loads the appropriate module based on the model task.
|
||||
task_map: Provides a mapping from model tasks to corresponding classes.
|
||||
|
||||
Raises:
|
||||
FileNotFoundError: If the specified model file does not exist or is inaccessible.
|
||||
ValueError: If the model file or configuration is invalid or unsupported.
|
||||
ImportError: If required dependencies for specific model types (like HUB SDK) are not installed.
|
||||
TypeError: If the model is not a PyTorch model when required.
|
||||
AttributeError: If required attributes or methods are not implemented or available.
|
||||
NotImplementedError: If a specific model task or mode is not supported.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
model: Union[str, Path] = "yolov8n.pt",
|
||||
task: str = None,
|
||||
verbose: bool = False,
|
||||
) -> None:
|
||||
"""
|
||||
Initializes a new instance of the YOLO model class.
|
||||
|
||||
This constructor sets up the model based on the provided model path or name. It handles various types of model
|
||||
sources, including local files, Ultralytics HUB models, and Triton Server models. The method initializes several
|
||||
important attributes of the model and prepares it for operations like training, prediction, or export.
|
||||
|
||||
Args:
|
||||
model (Union[str, Path], optional): The path or model file to load or create. This can be a local
|
||||
file path, a model name from Ultralytics HUB, or a Triton Server model. Defaults to 'yolov8n.pt'.
|
||||
task (Any, optional): The task type associated with the YOLO model, specifying its application domain.
|
||||
Defaults to None.
|
||||
verbose (bool, optional): If True, enables verbose output during the model's initialization and subsequent
|
||||
operations. Defaults to False.
|
||||
|
||||
Raises:
|
||||
FileNotFoundError: If the specified model file does not exist or is inaccessible.
|
||||
ValueError: If the model file or configuration is invalid or unsupported.
|
||||
ImportError: If required dependencies for specific model types (like HUB SDK) are not installed.
|
||||
"""
|
||||
super().__init__()
|
||||
self.callbacks = callbacks.get_default_callbacks()
|
||||
self.predictor = None # reuse predictor
|
||||
self.model = None # model object
|
||||
self.trainer = None # trainer object
|
||||
self.ckpt = None # if loaded from *.pt
|
||||
self.cfg = None # if loaded from *.yaml
|
||||
self.ckpt_path = None
|
||||
self.overrides = {} # overrides for trainer object
|
||||
self.metrics = None # validation/training metrics
|
||||
self.session = None # HUB session
|
||||
self.task = task # task type
|
||||
model = str(model).strip()
|
||||
|
||||
# Check if Ultralytics HUB model from https://hub.ultralytics.com
|
||||
if self.is_hub_model(model):
|
||||
# Fetch model from HUB
|
||||
checks.check_requirements("hub-sdk>=0.0.6")
|
||||
self.session = self._get_hub_session(model)
|
||||
model = self.session.model_file
|
||||
|
||||
# Check if Triton Server model
|
||||
elif self.is_triton_model(model):
|
||||
self.model_name = self.model = model
|
||||
self.task = task
|
||||
return
|
||||
|
||||
# Load or create new YOLO model
|
||||
if Path(model).suffix in (".yaml", ".yml"):
|
||||
self._new(model, task=task, verbose=verbose)
|
||||
else:
|
||||
self._load(model, task=task)
|
||||
|
||||
def __call__(
|
||||
self,
|
||||
source: Union[str, Path, int, list, tuple, np.ndarray, torch.Tensor] = None,
|
||||
stream: bool = False,
|
||||
**kwargs,
|
||||
) -> list:
|
||||
"""
|
||||
An alias for the predict method, enabling the model instance to be callable.
|
||||
|
||||
This method simplifies the process of making predictions by allowing the model instance to be called directly
|
||||
with the required arguments for prediction.
|
||||
|
||||
Args:
|
||||
source (str | Path | int | PIL.Image | np.ndarray, optional): The source of the image for making
|
||||
predictions. Accepts various types, including file paths, URLs, PIL images, and numpy arrays.
|
||||
Defaults to None.
|
||||
stream (bool, optional): If True, treats the input source as a continuous stream for predictions.
|
||||
Defaults to False.
|
||||
**kwargs (any): Additional keyword arguments for configuring the prediction process.
|
||||
|
||||
Returns:
|
||||
(List[ultralytics.engine.results.Results]): A list of prediction results, encapsulated in the Results class.
|
||||
"""
|
||||
return self.predict(source, stream, **kwargs)
|
||||
|
||||
@staticmethod
|
||||
def _get_hub_session(model: str):
|
||||
"""Creates a session for Hub Training."""
|
||||
from ultralytics.hub.session import HUBTrainingSession
|
||||
|
||||
session = HUBTrainingSession(model)
|
||||
return session if session.client.authenticated else None
|
||||
|
||||
@staticmethod
|
||||
def is_triton_model(model: str) -> bool:
|
||||
"""Is model a Triton Server URL string, i.e. <scheme>://<netloc>/<endpoint>/<task_name>"""
|
||||
from urllib.parse import urlsplit
|
||||
|
||||
url = urlsplit(model)
|
||||
return url.netloc and url.path and url.scheme in {"http", "grpc"}
|
||||
|
||||
@staticmethod
|
||||
def is_hub_model(model: str) -> bool:
|
||||
"""Check if the provided model is a HUB model."""
|
||||
return any(
|
||||
(
|
||||
model.startswith(f"{HUB_WEB_ROOT}/models/"), # i.e. https://hub.ultralytics.com/models/MODEL_ID
|
||||
[len(x) for x in model.split("_")] == [42, 20], # APIKEY_MODEL
|
||||
len(model) == 20 and not Path(model).exists() and all(x not in model for x in "./\\"), # MODEL
|
||||
)
|
||||
)
|
||||
|
||||
def _new(self, cfg: str, task=None, model=None, verbose=False) -> None:
|
||||
"""
|
||||
Initializes a new model and infers the task type from the model definitions.
|
||||
|
||||
Args:
|
||||
cfg (str): model configuration file
|
||||
task (str | None): model task
|
||||
model (BaseModel): Customized model.
|
||||
verbose (bool): display model info on load
|
||||
"""
|
||||
cfg_dict = yaml_model_load(cfg)
|
||||
self.cfg = cfg
|
||||
self.task = task or guess_model_task(cfg_dict)
|
||||
self.model = (model or self._smart_load("model"))(cfg_dict, verbose=verbose and RANK == -1) # build model
|
||||
self.overrides["model"] = self.cfg
|
||||
self.overrides["task"] = self.task
|
||||
|
||||
# Below added to allow export from YAMLs
|
||||
self.model.args = {**DEFAULT_CFG_DICT, **self.overrides} # combine default and model args (prefer model args)
|
||||
self.model.task = self.task
|
||||
self.model_name = cfg
|
||||
|
||||
def _load(self, weights: str, task=None) -> None:
|
||||
"""
|
||||
Initializes a new model and infers the task type from the model head.
|
||||
|
||||
Args:
|
||||
weights (str): model checkpoint to be loaded
|
||||
task (str | None): model task
|
||||
"""
|
||||
if weights.lower().startswith(("https://", "http://", "rtsp://", "rtmp://", "tcp://")):
|
||||
weights = checks.check_file(weights) # automatically download and return local filename
|
||||
weights = checks.check_model_file_from_stem(weights) # add suffix, i.e. yolov8n -> yolov8n.pt
|
||||
|
||||
if Path(weights).suffix == ".pt":
|
||||
self.model, self.ckpt = attempt_load_one_weight(weights)
|
||||
self.task = self.model.args["task"]
|
||||
self.overrides = self.model.args = self._reset_ckpt_args(self.model.args)
|
||||
self.ckpt_path = self.model.pt_path
|
||||
else:
|
||||
weights = checks.check_file(weights) # runs in all cases, not redundant with above call
|
||||
self.model, self.ckpt = weights, None
|
||||
self.task = task or guess_model_task(weights)
|
||||
self.ckpt_path = weights
|
||||
self.overrides["model"] = weights
|
||||
self.overrides["task"] = self.task
|
||||
self.model_name = weights
|
||||
|
||||
def _check_is_pytorch_model(self) -> None:
|
||||
"""Raises TypeError is model is not a PyTorch model."""
|
||||
pt_str = isinstance(self.model, (str, Path)) and Path(self.model).suffix == ".pt"
|
||||
pt_module = isinstance(self.model, nn.Module)
|
||||
if not (pt_module or pt_str):
|
||||
raise TypeError(
|
||||
f"model='{self.model}' should be a *.pt PyTorch model to run this method, but is a different format. "
|
||||
f"PyTorch models can train, val, predict and export, i.e. 'model.train(data=...)', but exported "
|
||||
f"formats like ONNX, TensorRT etc. only support 'predict' and 'val' modes, "
|
||||
f"i.e. 'yolo predict model=yolov8n.onnx'.\nTo run CUDA or MPS inference please pass the device "
|
||||
f"argument directly in your inference command, i.e. 'model.predict(source=..., device=0)'"
|
||||
)
|
||||
|
||||
def reset_weights(self) -> "Model":
|
||||
"""
|
||||
Resets the model parameters to randomly initialized values, effectively discarding all training information.
|
||||
|
||||
This method iterates through all modules in the model and resets their parameters if they have a
|
||||
'reset_parameters' method. It also ensures that all parameters have 'requires_grad' set to True, enabling them
|
||||
to be updated during training.
|
||||
|
||||
Returns:
|
||||
self (ultralytics.engine.model.Model): The instance of the class with reset weights.
|
||||
|
||||
Raises:
|
||||
AssertionError: If the model is not a PyTorch model.
|
||||
"""
|
||||
self._check_is_pytorch_model()
|
||||
for m in self.model.modules():
|
||||
if hasattr(m, "reset_parameters"):
|
||||
m.reset_parameters()
|
||||
for p in self.model.parameters():
|
||||
p.requires_grad = True
|
||||
return self
|
||||
|
||||
def load(self, weights: Union[str, Path] = "yolov8n.pt") -> "Model":
|
||||
"""
|
||||
Loads parameters from the specified weights file into the model.
|
||||
|
||||
This method supports loading weights from a file or directly from a weights object. It matches parameters by
|
||||
name and shape and transfers them to the model.
|
||||
|
||||
Args:
|
||||
weights (str | Path): Path to the weights file or a weights object. Defaults to 'yolov8n.pt'.
|
||||
|
||||
Returns:
|
||||
self (ultralytics.engine.model.Model): The instance of the class with loaded weights.
|
||||
|
||||
Raises:
|
||||
AssertionError: If the model is not a PyTorch model.
|
||||
"""
|
||||
self._check_is_pytorch_model()
|
||||
if isinstance(weights, (str, Path)):
|
||||
weights, self.ckpt = attempt_load_one_weight(weights)
|
||||
self.model.load(weights)
|
||||
return self
|
||||
|
||||
def save(self, filename: Union[str, Path] = "saved_model.pt", use_dill=True) -> None:
|
||||
"""
|
||||
Saves the current model state to a file.
|
||||
|
||||
This method exports the model's checkpoint (ckpt) to the specified filename.
|
||||
|
||||
Args:
|
||||
filename (str | Path): The name of the file to save the model to. Defaults to 'saved_model.pt'.
|
||||
use_dill (bool): Whether to try using dill for serialization if available. Defaults to True.
|
||||
|
||||
Raises:
|
||||
AssertionError: If the model is not a PyTorch model.
|
||||
"""
|
||||
self._check_is_pytorch_model()
|
||||
from ultralytics import __version__
|
||||
from datetime import datetime
|
||||
|
||||
updates = {
|
||||
"date": datetime.now().isoformat(),
|
||||
"version": __version__,
|
||||
"license": "AGPL-3.0 License (https://ultralytics.com/license)",
|
||||
"docs": "https://docs.ultralytics.com",
|
||||
}
|
||||
torch.save({**self.ckpt, **updates}, filename, use_dill=use_dill)
|
||||
|
||||
def info(self, detailed: bool = False, verbose: bool = True):
|
||||
"""
|
||||
Logs or returns model information.
|
||||
|
||||
This method provides an overview or detailed information about the model, depending on the arguments passed.
|
||||
It can control the verbosity of the output.
|
||||
|
||||
Args:
|
||||
detailed (bool): If True, shows detailed information about the model. Defaults to False.
|
||||
verbose (bool): If True, prints the information. If False, returns the information. Defaults to True.
|
||||
|
||||
Returns:
|
||||
(list): Various types of information about the model, depending on the 'detailed' and 'verbose' parameters.
|
||||
|
||||
Raises:
|
||||
AssertionError: If the model is not a PyTorch model.
|
||||
"""
|
||||
self._check_is_pytorch_model()
|
||||
return self.model.info(detailed=detailed, verbose=verbose)
|
||||
|
||||
def fuse(self):
|
||||
"""
|
||||
Fuses Conv2d and BatchNorm2d layers in the model.
|
||||
|
||||
This method optimizes the model by fusing Conv2d and BatchNorm2d layers, which can improve inference speed.
|
||||
|
||||
Raises:
|
||||
AssertionError: If the model is not a PyTorch model.
|
||||
"""
|
||||
self._check_is_pytorch_model()
|
||||
self.model.fuse()
|
||||
|
||||
def embed(
|
||||
self,
|
||||
source: Union[str, Path, int, list, tuple, np.ndarray, torch.Tensor] = None,
|
||||
stream: bool = False,
|
||||
**kwargs,
|
||||
) -> list:
|
||||
"""
|
||||
Generates image embeddings based on the provided source.
|
||||
|
||||
This method is a wrapper around the 'predict()' method, focusing on generating embeddings from an image source.
|
||||
It allows customization of the embedding process through various keyword arguments.
|
||||
|
||||
Args:
|
||||
source (str | int | PIL.Image | np.ndarray): The source of the image for generating embeddings.
|
||||
The source can be a file path, URL, PIL image, numpy array, etc. Defaults to None.
|
||||
stream (bool): If True, predictions are streamed. Defaults to False.
|
||||
**kwargs (any): Additional keyword arguments for configuring the embedding process.
|
||||
|
||||
Returns:
|
||||
(List[torch.Tensor]): A list containing the image embeddings.
|
||||
|
||||
Raises:
|
||||
AssertionError: If the model is not a PyTorch model.
|
||||
"""
|
||||
if not kwargs.get("embed"):
|
||||
kwargs["embed"] = [len(self.model.model) - 2] # embed second-to-last layer if no indices passed
|
||||
return self.predict(source, stream, **kwargs)
|
||||
|
||||
def predict(
|
||||
self,
|
||||
source: Union[str, Path, int, list, tuple, np.ndarray, torch.Tensor] = None,
|
||||
stream: bool = False,
|
||||
predictor=None,
|
||||
**kwargs,
|
||||
) -> list:
|
||||
"""
|
||||
Performs predictions on the given image source using the YOLO model.
|
||||
|
||||
This method facilitates the prediction process, allowing various configurations through keyword arguments.
|
||||
It supports predictions with custom predictors or the default predictor method. The method handles different
|
||||
types of image sources and can operate in a streaming mode. It also provides support for SAM-type models
|
||||
through 'prompts'.
|
||||
|
||||
The method sets up a new predictor if not already present and updates its arguments with each call.
|
||||
It also issues a warning and uses default assets if the 'source' is not provided. The method determines if it
|
||||
is being called from the command line interface and adjusts its behavior accordingly, including setting defaults
|
||||
for confidence threshold and saving behavior.
|
||||
|
||||
Args:
|
||||
source (str | int | PIL.Image | np.ndarray, optional): The source of the image for making predictions.
|
||||
Accepts various types, including file paths, URLs, PIL images, and numpy arrays. Defaults to ASSETS.
|
||||
stream (bool, optional): Treats the input source as a continuous stream for predictions. Defaults to False.
|
||||
predictor (BasePredictor, optional): An instance of a custom predictor class for making predictions.
|
||||
If None, the method uses a default predictor. Defaults to None.
|
||||
**kwargs (any): Additional keyword arguments for configuring the prediction process. These arguments allow
|
||||
for further customization of the prediction behavior.
|
||||
|
||||
Returns:
|
||||
(List[ultralytics.engine.results.Results]): A list of prediction results, encapsulated in the Results class.
|
||||
|
||||
Raises:
|
||||
AttributeError: If the predictor is not properly set up.
|
||||
"""
|
||||
if source is None:
|
||||
source = ASSETS
|
||||
LOGGER.warning(f"WARNING ⚠️ 'source' is missing. Using 'source={source}'.")
|
||||
|
||||
is_cli = (sys.argv[0].endswith("yolo") or sys.argv[0].endswith("ultralytics")) and any(
|
||||
x in sys.argv for x in ("predict", "track", "mode=predict", "mode=track")
|
||||
)
|
||||
|
||||
custom = {"conf": 0.25, "batch": 1, "save": is_cli, "mode": "predict"} # method defaults
|
||||
args = {**self.overrides, **custom, **kwargs} # highest priority args on the right
|
||||
prompts = args.pop("prompts", None) # for SAM-type models
|
||||
|
||||
if not self.predictor:
|
||||
self.predictor = predictor or self._smart_load("predictor")(overrides=args, _callbacks=self.callbacks)
|
||||
self.predictor.setup_model(model=self.model, verbose=is_cli)
|
||||
else: # only update args if predictor is already setup
|
||||
self.predictor.args = get_cfg(self.predictor.args, args)
|
||||
if "project" in args or "name" in args:
|
||||
self.predictor.save_dir = get_save_dir(self.predictor.args)
|
||||
if prompts and hasattr(self.predictor, "set_prompts"): # for SAM-type models
|
||||
self.predictor.set_prompts(prompts)
|
||||
return self.predictor.predict_cli(source=source) if is_cli else self.predictor(source=source, stream=stream)
|
||||
|
||||
def track(
|
||||
self,
|
||||
source: Union[str, Path, int, list, tuple, np.ndarray, torch.Tensor] = None,
|
||||
stream: bool = False,
|
||||
persist: bool = False,
|
||||
**kwargs,
|
||||
) -> list:
|
||||
"""
|
||||
Conducts object tracking on the specified input source using the registered trackers.
|
||||
|
||||
This method performs object tracking using the model's predictors and optionally registered trackers. It is
|
||||
capable of handling different types of input sources such as file paths or video streams. The method supports
|
||||
customization of the tracking process through various keyword arguments. It registers trackers if they are not
|
||||
already present and optionally persists them based on the 'persist' flag.
|
||||
|
||||
The method sets a default confidence threshold specifically for ByteTrack-based tracking, which requires low
|
||||
confidence predictions as input. The tracking mode is explicitly set in the keyword arguments.
|
||||
|
||||
Args:
|
||||
source (str, optional): The input source for object tracking. It can be a file path, URL, or video stream.
|
||||
stream (bool, optional): Treats the input source as a continuous video stream. Defaults to False.
|
||||
persist (bool, optional): Persists the trackers between different calls to this method. Defaults to False.
|
||||
**kwargs (any): Additional keyword arguments for configuring the tracking process. These arguments allow
|
||||
for further customization of the tracking behavior.
|
||||
|
||||
Returns:
|
||||
(List[ultralytics.engine.results.Results]): A list of tracking results, encapsulated in the Results class.
|
||||
|
||||
Raises:
|
||||
AttributeError: If the predictor does not have registered trackers.
|
||||
"""
|
||||
if not hasattr(self.predictor, "trackers"):
|
||||
from ultralytics.trackers import register_tracker
|
||||
|
||||
register_tracker(self, persist)
|
||||
kwargs["conf"] = kwargs.get("conf") or 0.1 # ByteTrack-based method needs low confidence predictions as input
|
||||
kwargs["batch"] = kwargs.get("batch") or 1 # batch-size 1 for tracking in videos
|
||||
kwargs["mode"] = "track"
|
||||
return self.predict(source=source, stream=stream, **kwargs)
|
||||
|
||||
def val(
|
||||
self,
|
||||
validator=None,
|
||||
**kwargs,
|
||||
):
|
||||
"""
|
||||
Validates the model using a specified dataset and validation configuration.
|
||||
|
||||
This method facilitates the model validation process, allowing for a range of customization through various
|
||||
settings and configurations. It supports validation with a custom validator or the default validation approach.
|
||||
The method combines default configurations, method-specific defaults, and user-provided arguments to configure
|
||||
the validation process. After validation, it updates the model's metrics with the results obtained from the
|
||||
validator.
|
||||
|
||||
The method supports various arguments that allow customization of the validation process. For a comprehensive
|
||||
list of all configurable options, users should refer to the 'configuration' section in the documentation.
|
||||
|
||||
Args:
|
||||
validator (BaseValidator, optional): An instance of a custom validator class for validating the model. If
|
||||
None, the method uses a default validator. Defaults to None.
|
||||
**kwargs (any): Arbitrary keyword arguments representing the validation configuration. These arguments are
|
||||
used to customize various aspects of the validation process.
|
||||
|
||||
Returns:
|
||||
(dict): Validation metrics obtained from the validation process.
|
||||
|
||||
Raises:
|
||||
AssertionError: If the model is not a PyTorch model.
|
||||
"""
|
||||
custom = {"rect": True} # method defaults
|
||||
args = {**self.overrides, **custom, **kwargs, "mode": "val"} # highest priority args on the right
|
||||
|
||||
validator = (validator or self._smart_load("validator"))(args=args, _callbacks=self.callbacks)
|
||||
validator(model=self.model)
|
||||
self.metrics = validator.metrics
|
||||
return validator.metrics
|
||||
|
||||
def benchmark(
|
||||
self,
|
||||
**kwargs,
|
||||
):
|
||||
"""
|
||||
Benchmarks the model across various export formats to evaluate performance.
|
||||
|
||||
This method assesses the model's performance in different export formats, such as ONNX, TorchScript, etc.
|
||||
It uses the 'benchmark' function from the ultralytics.utils.benchmarks module. The benchmarking is configured
|
||||
using a combination of default configuration values, model-specific arguments, method-specific defaults, and
|
||||
any additional user-provided keyword arguments.
|
||||
|
||||
The method supports various arguments that allow customization of the benchmarking process, such as dataset
|
||||
choice, image size, precision modes, device selection, and verbosity. For a comprehensive list of all
|
||||
configurable options, users should refer to the 'configuration' section in the documentation.
|
||||
|
||||
Args:
|
||||
**kwargs (any): Arbitrary keyword arguments to customize the benchmarking process. These are combined with
|
||||
default configurations, model-specific arguments, and method defaults.
|
||||
|
||||
Returns:
|
||||
(dict): A dictionary containing the results of the benchmarking process.
|
||||
|
||||
Raises:
|
||||
AssertionError: If the model is not a PyTorch model.
|
||||
"""
|
||||
self._check_is_pytorch_model()
|
||||
from ultralytics.utils.benchmarks import benchmark
|
||||
|
||||
custom = {"verbose": False} # method defaults
|
||||
args = {**DEFAULT_CFG_DICT, **self.model.args, **custom, **kwargs, "mode": "benchmark"}
|
||||
return benchmark(
|
||||
model=self,
|
||||
data=kwargs.get("data"), # if no 'data' argument passed set data=None for default datasets
|
||||
imgsz=args["imgsz"],
|
||||
half=args["half"],
|
||||
int8=args["int8"],
|
||||
device=args["device"],
|
||||
verbose=kwargs.get("verbose"),
|
||||
)
|
||||
|
||||
def export(
|
||||
self,
|
||||
**kwargs,
|
||||
):
|
||||
"""
|
||||
Exports the model to a different format suitable for deployment.
|
||||
|
||||
This method facilitates the export of the model to various formats (e.g., ONNX, TorchScript) for deployment
|
||||
purposes. It uses the 'Exporter' class for the export process, combining model-specific overrides, method
|
||||
defaults, and any additional arguments provided. The combined arguments are used to configure export settings.
|
||||
|
||||
The method supports a wide range of arguments to customize the export process. For a comprehensive list of all
|
||||
possible arguments, refer to the 'configuration' section in the documentation.
|
||||
|
||||
Args:
|
||||
**kwargs (any): Arbitrary keyword arguments to customize the export process. These are combined with the
|
||||
model's overrides and method defaults.
|
||||
|
||||
Returns:
|
||||
(object): The exported model in the specified format, or an object related to the export process.
|
||||
|
||||
Raises:
|
||||
AssertionError: If the model is not a PyTorch model.
|
||||
"""
|
||||
self._check_is_pytorch_model()
|
||||
from .exporter import Exporter
|
||||
|
||||
custom = {"imgsz": self.model.args["imgsz"], "batch": 1, "data": None, "verbose": False} # method defaults
|
||||
args = {**self.overrides, **custom, **kwargs, "mode": "export"} # highest priority args on the right
|
||||
return Exporter(overrides=args, _callbacks=self.callbacks)(model=self.model)
|
||||
|
||||
def train(
|
||||
self,
|
||||
trainer=None,
|
||||
**kwargs,
|
||||
):
|
||||
"""
|
||||
Trains the model using the specified dataset and training configuration.
|
||||
|
||||
This method facilitates model training with a range of customizable settings and configurations. It supports
|
||||
training with a custom trainer or the default training approach defined in the method. The method handles
|
||||
different scenarios, such as resuming training from a checkpoint, integrating with Ultralytics HUB, and
|
||||
updating model and configuration after training.
|
||||
|
||||
When using Ultralytics HUB, if the session already has a loaded model, the method prioritizes HUB training
|
||||
arguments and issues a warning if local arguments are provided. It checks for pip updates and combines default
|
||||
configurations, method-specific defaults, and user-provided arguments to configure the training process. After
|
||||
training, it updates the model and its configurations, and optionally attaches metrics.
|
||||
|
||||
Args:
|
||||
trainer (BaseTrainer, optional): An instance of a custom trainer class for training the model. If None, the
|
||||
method uses a default trainer. Defaults to None.
|
||||
**kwargs (any): Arbitrary keyword arguments representing the training configuration. These arguments are
|
||||
used to customize various aspects of the training process.
|
||||
|
||||
Returns:
|
||||
(dict | None): Training metrics if available and training is successful; otherwise, None.
|
||||
|
||||
Raises:
|
||||
AssertionError: If the model is not a PyTorch model.
|
||||
PermissionError: If there is a permission issue with the HUB session.
|
||||
ModuleNotFoundError: If the HUB SDK is not installed.
|
||||
"""
|
||||
self._check_is_pytorch_model()
|
||||
if hasattr(self.session, "model") and self.session.model.id: # Ultralytics HUB session with loaded model
|
||||
if any(kwargs):
|
||||
LOGGER.warning("WARNING ⚠️ using HUB training arguments, ignoring local training arguments.")
|
||||
kwargs = self.session.train_args # overwrite kwargs
|
||||
|
||||
checks.check_pip_update_available()
|
||||
|
||||
overrides = yaml_load(checks.check_yaml(kwargs["cfg"])) if kwargs.get("cfg") else self.overrides
|
||||
custom = {"data": DEFAULT_CFG_DICT["data"] or TASK2DATA[self.task]} # method defaults
|
||||
args = {**overrides, **custom, **kwargs, "mode": "train"} # highest priority args on the right
|
||||
if args.get("resume"):
|
||||
args["resume"] = self.ckpt_path
|
||||
|
||||
self.trainer = (trainer or self._smart_load("trainer"))(overrides=args, _callbacks=self.callbacks)
|
||||
if not args.get("resume"): # manually set model only if not resuming
|
||||
self.trainer.model = self.trainer.get_model(weights=self.model if self.ckpt else None, cfg=self.model.yaml)
|
||||
self.model = self.trainer.model
|
||||
|
||||
if SETTINGS["hub"] is True and not self.session:
|
||||
# Create a model in HUB
|
||||
try:
|
||||
self.session = self._get_hub_session(self.model_name)
|
||||
if self.session:
|
||||
self.session.create_model(args)
|
||||
# Check model was created
|
||||
if not getattr(self.session.model, "id", None):
|
||||
self.session = None
|
||||
except (PermissionError, ModuleNotFoundError):
|
||||
# Ignore PermissionError and ModuleNotFoundError which indicates hub-sdk not installed
|
||||
pass
|
||||
|
||||
self.trainer.hub_session = self.session # attach optional HUB session
|
||||
self.trainer.train()
|
||||
# Update model and cfg after training
|
||||
if RANK in (-1, 0):
|
||||
ckpt = self.trainer.best if self.trainer.best.exists() else self.trainer.last
|
||||
self.model, _ = attempt_load_one_weight(ckpt)
|
||||
self.overrides = self.model.args
|
||||
self.metrics = getattr(self.trainer.validator, "metrics", None) # TODO: no metrics returned by DDP
|
||||
return self.metrics
|
||||
|
||||
def tune(
|
||||
self,
|
||||
use_ray=False,
|
||||
iterations=10,
|
||||
*args,
|
||||
**kwargs,
|
||||
):
|
||||
"""
|
||||
Conducts hyperparameter tuning for the model, with an option to use Ray Tune.
|
||||
|
||||
This method supports two modes of hyperparameter tuning: using Ray Tune or a custom tuning method.
|
||||
When Ray Tune is enabled, it leverages the 'run_ray_tune' function from the ultralytics.utils.tuner module.
|
||||
Otherwise, it uses the internal 'Tuner' class for tuning. The method combines default, overridden, and
|
||||
custom arguments to configure the tuning process.
|
||||
|
||||
Args:
|
||||
use_ray (bool): If True, uses Ray Tune for hyperparameter tuning. Defaults to False.
|
||||
iterations (int): The number of tuning iterations to perform. Defaults to 10.
|
||||
*args (list): Variable length argument list for additional arguments.
|
||||
**kwargs (any): Arbitrary keyword arguments. These are combined with the model's overrides and defaults.
|
||||
|
||||
Returns:
|
||||
(dict): A dictionary containing the results of the hyperparameter search.
|
||||
|
||||
Raises:
|
||||
AssertionError: If the model is not a PyTorch model.
|
||||
"""
|
||||
self._check_is_pytorch_model()
|
||||
if use_ray:
|
||||
from ultralytics.utils.tuner import run_ray_tune
|
||||
|
||||
return run_ray_tune(self, max_samples=iterations, *args, **kwargs)
|
||||
else:
|
||||
from .tuner import Tuner
|
||||
|
||||
custom = {} # method defaults
|
||||
args = {**self.overrides, **custom, **kwargs, "mode": "train"} # highest priority args on the right
|
||||
return Tuner(args=args, _callbacks=self.callbacks)(model=self, iterations=iterations)
|
||||
|
||||
def _apply(self, fn) -> "Model":
|
||||
"""Apply to(), cpu(), cuda(), half(), float() to model tensors that are not parameters or registered buffers."""
|
||||
self._check_is_pytorch_model()
|
||||
self = super()._apply(fn) # noqa
|
||||
self.predictor = None # reset predictor as device may have changed
|
||||
self.overrides["device"] = self.device # was str(self.device) i.e. device(type='cuda', index=0) -> 'cuda:0'
|
||||
return self
|
||||
|
||||
@property
|
||||
def names(self) -> list:
|
||||
"""
|
||||
Retrieves the class names associated with the loaded model.
|
||||
|
||||
This property returns the class names if they are defined in the model. It checks the class names for validity
|
||||
using the 'check_class_names' function from the ultralytics.nn.autobackend module.
|
||||
|
||||
Returns:
|
||||
(list | None): The class names of the model if available, otherwise None.
|
||||
"""
|
||||
from ultralytics.nn.autobackend import check_class_names
|
||||
|
||||
return check_class_names(self.model.names) if hasattr(self.model, "names") else None
|
||||
|
||||
@property
|
||||
def device(self) -> torch.device:
|
||||
"""
|
||||
Retrieves the device on which the model's parameters are allocated.
|
||||
|
||||
This property is used to determine whether the model's parameters are on CPU or GPU. It only applies to models
|
||||
that are instances of nn.Module.
|
||||
|
||||
Returns:
|
||||
(torch.device | None): The device (CPU/GPU) of the model if it is a PyTorch model, otherwise None.
|
||||
"""
|
||||
return next(self.model.parameters()).device if isinstance(self.model, nn.Module) else None
|
||||
|
||||
@property
|
||||
def transforms(self):
|
||||
"""
|
||||
Retrieves the transformations applied to the input data of the loaded model.
|
||||
|
||||
This property returns the transformations if they are defined in the model.
|
||||
|
||||
Returns:
|
||||
(object | None): The transform object of the model if available, otherwise None.
|
||||
"""
|
||||
return self.model.transforms if hasattr(self.model, "transforms") else None
|
||||
|
||||
def add_callback(self, event: str, func) -> None:
|
||||
"""
|
||||
Adds a callback function for a specified event.
|
||||
|
||||
This method allows the user to register a custom callback function that is triggered on a specific event during
|
||||
model training or inference.
|
||||
|
||||
Args:
|
||||
event (str): The name of the event to attach the callback to.
|
||||
func (callable): The callback function to be registered.
|
||||
|
||||
Raises:
|
||||
ValueError: If the event name is not recognized.
|
||||
"""
|
||||
self.callbacks[event].append(func)
|
||||
|
||||
def clear_callback(self, event: str) -> None:
|
||||
"""
|
||||
Clears all callback functions registered for a specified event.
|
||||
|
||||
This method removes all custom and default callback functions associated with the given event.
|
||||
|
||||
Args:
|
||||
event (str): The name of the event for which to clear the callbacks.
|
||||
|
||||
Raises:
|
||||
ValueError: If the event name is not recognized.
|
||||
"""
|
||||
self.callbacks[event] = []
|
||||
|
||||
def reset_callbacks(self) -> None:
|
||||
"""
|
||||
Resets all callbacks to their default functions.
|
||||
|
||||
This method reinstates the default callback functions for all events, removing any custom callbacks that were
|
||||
added previously.
|
||||
"""
|
||||
for event in callbacks.default_callbacks.keys():
|
||||
self.callbacks[event] = [callbacks.default_callbacks[event][0]]
|
||||
|
||||
@staticmethod
|
||||
def _reset_ckpt_args(args: dict) -> dict:
|
||||
"""Reset arguments when loading a PyTorch model."""
|
||||
include = {"imgsz", "data", "task", "single_cls"} # only remember these arguments when loading a PyTorch model
|
||||
return {k: v for k, v in args.items() if k in include}
|
||||
|
||||
# def __getattr__(self, attr):
|
||||
# """Raises error if object has no requested attribute."""
|
||||
# name = self.__class__.__name__
|
||||
# raise AttributeError(f"'{name}' object has no attribute '{attr}'. See valid attributes below.\n{self.__doc__}")
|
||||
|
||||
def _smart_load(self, key: str):
|
||||
"""Load model/trainer/validator/predictor."""
|
||||
try:
|
||||
return self.task_map[self.task][key]
|
||||
except Exception as e:
|
||||
name = self.__class__.__name__
|
||||
mode = inspect.stack()[1][3] # get the function name.
|
||||
raise NotImplementedError(
|
||||
emojis(f"WARNING ⚠️ '{name}' model does not support '{mode}' mode for '{self.task}' task yet.")
|
||||
) from e
|
||||
|
||||
@property
|
||||
def task_map(self) -> dict:
|
||||
"""
|
||||
Map head to model, trainer, validator, and predictor classes.
|
||||
|
||||
Returns:
|
||||
task_map (dict): The map of model task to mode classes.
|
||||
"""
|
||||
raise NotImplementedError("Please provide task map for your model!")
|
@ -1,397 +0,0 @@
|
||||
# Ultralytics YOLO 🚀, AGPL-3.0 license
|
||||
"""
|
||||
Run prediction on images, videos, directories, globs, YouTube, webcam, streams, etc.
|
||||
|
||||
Usage - sources:
|
||||
$ yolo mode=predict model=yolov8n.pt source=0 # webcam
|
||||
img.jpg # image
|
||||
vid.mp4 # video
|
||||
screen # screenshot
|
||||
path/ # directory
|
||||
list.txt # list of images
|
||||
list.streams # list of streams
|
||||
'path/*.jpg' # glob
|
||||
'https://youtu.be/LNwODJXcvt4' # YouTube
|
||||
'rtsp://example.com/media.mp4' # RTSP, RTMP, HTTP, TCP stream
|
||||
|
||||
Usage - formats:
|
||||
$ yolo mode=predict model=yolov8n.pt # PyTorch
|
||||
yolov8n.torchscript # TorchScript
|
||||
yolov8n.onnx # ONNX Runtime or OpenCV DNN with dnn=True
|
||||
yolov8n_openvino_model # OpenVINO
|
||||
yolov8n.engine # TensorRT
|
||||
yolov8n.mlpackage # CoreML (macOS-only)
|
||||
yolov8n_saved_model # TensorFlow SavedModel
|
||||
yolov8n.pb # TensorFlow GraphDef
|
||||
yolov8n.tflite # TensorFlow Lite
|
||||
yolov8n_edgetpu.tflite # TensorFlow Edge TPU
|
||||
yolov8n_paddle_model # PaddlePaddle
|
||||
yolov8n_ncnn_model # NCNN
|
||||
"""
|
||||
|
||||
import platform
|
||||
import re
|
||||
import threading
|
||||
from pathlib import Path
|
||||
|
||||
import cv2
|
||||
import numpy as np
|
||||
import torch
|
||||
|
||||
from ultralytics.cfg import get_cfg, get_save_dir
|
||||
from ultralytics.data import load_inference_source
|
||||
from ultralytics.data.augment import LetterBox, classify_transforms
|
||||
from ultralytics.nn.autobackend import AutoBackend
|
||||
from ultralytics.utils import DEFAULT_CFG, LOGGER, MACOS, WINDOWS, callbacks, colorstr, ops
|
||||
from ultralytics.utils.checks import check_imgsz, check_imshow
|
||||
from ultralytics.utils.files import increment_path
|
||||
from ultralytics.utils.torch_utils import select_device, smart_inference_mode
|
||||
|
||||
STREAM_WARNING = """
|
||||
WARNING ⚠️ inference results will accumulate in RAM unless `stream=True` is passed, causing potential out-of-memory
|
||||
errors for large sources or long-running streams and videos. See https://docs.ultralytics.com/modes/predict/ for help.
|
||||
|
||||
Example:
|
||||
results = model(source=..., stream=True) # generator of Results objects
|
||||
for r in results:
|
||||
boxes = r.boxes # Boxes object for bbox outputs
|
||||
masks = r.masks # Masks object for segment masks outputs
|
||||
probs = r.probs # Class probabilities for classification outputs
|
||||
"""
|
||||
|
||||
|
||||
class BasePredictor:
|
||||
"""
|
||||
BasePredictor.
|
||||
|
||||
A base class for creating predictors.
|
||||
|
||||
Attributes:
|
||||
args (SimpleNamespace): Configuration for the predictor.
|
||||
save_dir (Path): Directory to save results.
|
||||
done_warmup (bool): Whether the predictor has finished setup.
|
||||
model (nn.Module): Model used for prediction.
|
||||
data (dict): Data configuration.
|
||||
device (torch.device): Device used for prediction.
|
||||
dataset (Dataset): Dataset used for prediction.
|
||||
vid_writer (dict): Dictionary of {save_path: video_writer, ...} writer for saving video output.
|
||||
"""
|
||||
|
||||
def __init__(self, cfg=DEFAULT_CFG, overrides=None, _callbacks=None):
|
||||
"""
|
||||
Initializes the BasePredictor class.
|
||||
|
||||
Args:
|
||||
cfg (str, optional): Path to a configuration file. Defaults to DEFAULT_CFG.
|
||||
overrides (dict, optional): Configuration overrides. Defaults to None.
|
||||
"""
|
||||
self.args = get_cfg(cfg, overrides)
|
||||
self.save_dir = get_save_dir(self.args)
|
||||
if self.args.conf is None:
|
||||
self.args.conf = 0.25 # default conf=0.25
|
||||
self.done_warmup = False
|
||||
if self.args.show:
|
||||
self.args.show = check_imshow(warn=True)
|
||||
|
||||
# Usable if setup is done
|
||||
self.model = None
|
||||
self.data = self.args.data # data_dict
|
||||
self.imgsz = None
|
||||
self.device = None
|
||||
self.dataset = None
|
||||
self.vid_writer = {} # dict of {save_path: video_writer, ...}
|
||||
self.plotted_img = None
|
||||
self.source_type = None
|
||||
self.seen = 0
|
||||
self.windows = []
|
||||
self.batch = None
|
||||
self.results = None
|
||||
self.transforms = None
|
||||
self.callbacks = _callbacks or callbacks.get_default_callbacks()
|
||||
self.txt_path = None
|
||||
self._lock = threading.Lock() # for automatic thread-safe inference
|
||||
callbacks.add_integration_callbacks(self)
|
||||
|
||||
def preprocess(self, im):
|
||||
"""
|
||||
Prepares input image before inference.
|
||||
|
||||
Args:
|
||||
im (torch.Tensor | List(np.ndarray)): BCHW for tensor, [(HWC) x B] for list.
|
||||
"""
|
||||
not_tensor = not isinstance(im, torch.Tensor)
|
||||
if not_tensor:
|
||||
im = np.stack(self.pre_transform(im))
|
||||
im = im[..., ::-1].transpose((0, 3, 1, 2)) # BGR to RGB, BHWC to BCHW, (n, 3, h, w)
|
||||
im = np.ascontiguousarray(im) # contiguous
|
||||
im = torch.from_numpy(im)
|
||||
|
||||
im = im.to(self.device)
|
||||
im = im.half() if self.model.fp16 else im.float() # uint8 to fp16/32
|
||||
if not_tensor:
|
||||
im /= 255 # 0 - 255 to 0.0 - 1.0
|
||||
return im
|
||||
|
||||
def inference(self, im, *args, **kwargs):
|
||||
"""Runs inference on a given image using the specified model and arguments."""
|
||||
visualize = (
|
||||
increment_path(self.save_dir / Path(self.batch[0][0]).stem, mkdir=True)
|
||||
if self.args.visualize and (not self.source_type.tensor)
|
||||
else False
|
||||
)
|
||||
return self.model(im, augment=self.args.augment, visualize=visualize, embed=self.args.embed, *args, **kwargs)
|
||||
|
||||
def pre_transform(self, im):
|
||||
"""
|
||||
Pre-transform input image before inference.
|
||||
|
||||
Args:
|
||||
im (List(np.ndarray)): (N, 3, h, w) for tensor, [(h, w, 3) x N] for list.
|
||||
|
||||
Returns:
|
||||
(list): A list of transformed images.
|
||||
"""
|
||||
same_shapes = len({x.shape for x in im}) == 1
|
||||
letterbox = LetterBox(self.imgsz, auto=same_shapes and self.model.pt, stride=self.model.stride)
|
||||
return [letterbox(image=x) for x in im]
|
||||
|
||||
def postprocess(self, preds, img, orig_imgs):
|
||||
"""Post-processes predictions for an image and returns them."""
|
||||
return preds
|
||||
|
||||
def __call__(self, source=None, model=None, stream=False, *args, **kwargs):
|
||||
"""Performs inference on an image or stream."""
|
||||
self.stream = stream
|
||||
if stream:
|
||||
return self.stream_inference(source, model, *args, **kwargs)
|
||||
else:
|
||||
return list(self.stream_inference(source, model, *args, **kwargs)) # merge list of Result into one
|
||||
|
||||
def predict_cli(self, source=None, model=None):
|
||||
"""
|
||||
Method used for CLI prediction.
|
||||
|
||||
It uses always generator as outputs as not required by CLI mode.
|
||||
"""
|
||||
gen = self.stream_inference(source, model)
|
||||
for _ in gen: # noqa, running CLI inference without accumulating any outputs (do not modify)
|
||||
pass
|
||||
|
||||
def setup_source(self, source):
|
||||
"""Sets up source and inference mode."""
|
||||
self.imgsz = check_imgsz(self.args.imgsz, stride=self.model.stride, min_dim=2) # check image size
|
||||
self.transforms = (
|
||||
getattr(
|
||||
self.model.model,
|
||||
"transforms",
|
||||
classify_transforms(self.imgsz[0], crop_fraction=self.args.crop_fraction),
|
||||
)
|
||||
if self.args.task == "classify"
|
||||
else None
|
||||
)
|
||||
self.dataset = load_inference_source(
|
||||
source=source,
|
||||
batch=self.args.batch,
|
||||
vid_stride=self.args.vid_stride,
|
||||
buffer=self.args.stream_buffer,
|
||||
)
|
||||
self.source_type = self.dataset.source_type
|
||||
if not getattr(self, "stream", True) and (
|
||||
self.source_type.stream
|
||||
or self.source_type.screenshot
|
||||
or len(self.dataset) > 1000 # many images
|
||||
or any(getattr(self.dataset, "video_flag", [False]))
|
||||
): # videos
|
||||
LOGGER.warning(STREAM_WARNING)
|
||||
self.vid_writer = {}
|
||||
|
||||
@smart_inference_mode()
|
||||
def stream_inference(self, source=None, model=None, *args, **kwargs):
|
||||
"""Streams real-time inference on camera feed and saves results to file."""
|
||||
if self.args.verbose:
|
||||
LOGGER.info("")
|
||||
|
||||
# Setup model
|
||||
if not self.model:
|
||||
self.setup_model(model)
|
||||
|
||||
with self._lock: # for thread-safe inference
|
||||
# Setup source every time predict is called
|
||||
self.setup_source(source if source is not None else self.args.source)
|
||||
|
||||
# Check if save_dir/ label file exists
|
||||
if self.args.save or self.args.save_txt:
|
||||
(self.save_dir / "labels" if self.args.save_txt else self.save_dir).mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# Warmup model
|
||||
if not self.done_warmup:
|
||||
self.model.warmup(imgsz=(1 if self.model.pt or self.model.triton else self.dataset.bs, 3, *self.imgsz))
|
||||
self.done_warmup = True
|
||||
|
||||
self.seen, self.windows, self.batch = 0, [], None
|
||||
profilers = (
|
||||
ops.Profile(device=self.device),
|
||||
ops.Profile(device=self.device),
|
||||
ops.Profile(device=self.device),
|
||||
)
|
||||
self.run_callbacks("on_predict_start")
|
||||
for self.batch in self.dataset:
|
||||
self.run_callbacks("on_predict_batch_start")
|
||||
paths, im0s, s = self.batch
|
||||
|
||||
# Preprocess
|
||||
with profilers[0]:
|
||||
im = self.preprocess(im0s)
|
||||
|
||||
# Inference
|
||||
with profilers[1]:
|
||||
preds = self.inference(im, *args, **kwargs)
|
||||
if self.args.embed:
|
||||
yield from [preds] if isinstance(preds, torch.Tensor) else preds # yield embedding tensors
|
||||
continue
|
||||
|
||||
# Postprocess
|
||||
with profilers[2]:
|
||||
self.results = self.postprocess(preds, im, im0s)
|
||||
self.run_callbacks("on_predict_postprocess_end")
|
||||
|
||||
# Visualize, save, write results
|
||||
n = len(im0s)
|
||||
for i in range(n):
|
||||
self.seen += 1
|
||||
self.results[i].speed = {
|
||||
"preprocess": profilers[0].dt * 1e3 / n,
|
||||
"inference": profilers[1].dt * 1e3 / n,
|
||||
"postprocess": profilers[2].dt * 1e3 / n,
|
||||
}
|
||||
if self.args.verbose or self.args.save or self.args.save_txt or self.args.show:
|
||||
s[i] += self.write_results(i, Path(paths[i]), im, s)
|
||||
|
||||
# Print batch results
|
||||
if self.args.verbose:
|
||||
LOGGER.info("\n".join(s))
|
||||
|
||||
self.run_callbacks("on_predict_batch_end")
|
||||
yield from self.results
|
||||
|
||||
# Release assets
|
||||
for v in self.vid_writer.values():
|
||||
if isinstance(v, cv2.VideoWriter):
|
||||
v.release()
|
||||
|
||||
# Print final results
|
||||
if self.args.verbose and self.seen:
|
||||
t = tuple(x.t / self.seen * 1e3 for x in profilers) # speeds per image
|
||||
LOGGER.info(
|
||||
f"Speed: %.1fms preprocess, %.1fms inference, %.1fms postprocess per image at shape "
|
||||
f"{(min(self.args.batch, self.seen), 3, *im.shape[2:])}" % t
|
||||
)
|
||||
if self.args.save or self.args.save_txt or self.args.save_crop:
|
||||
nl = len(list(self.save_dir.glob("labels/*.txt"))) # number of labels
|
||||
s = f"\n{nl} label{'s' * (nl > 1)} saved to {self.save_dir / 'labels'}" if self.args.save_txt else ""
|
||||
LOGGER.info(f"Results saved to {colorstr('bold', self.save_dir)}{s}")
|
||||
self.run_callbacks("on_predict_end")
|
||||
|
||||
def setup_model(self, model, verbose=True):
|
||||
"""Initialize YOLO model with given parameters and set it to evaluation mode."""
|
||||
self.model = AutoBackend(
|
||||
weights=model or self.args.model,
|
||||
device=select_device(self.args.device, verbose=verbose),
|
||||
dnn=self.args.dnn,
|
||||
data=self.args.data,
|
||||
fp16=self.args.half,
|
||||
batch=self.args.batch,
|
||||
fuse=True,
|
||||
verbose=verbose,
|
||||
)
|
||||
|
||||
self.device = self.model.device # update device
|
||||
self.args.half = self.model.fp16 # update half
|
||||
self.model.eval()
|
||||
|
||||
def write_results(self, i, p, im, s):
|
||||
"""Write inference results to a file or directory."""
|
||||
string = "" # print string
|
||||
if len(im.shape) == 3:
|
||||
im = im[None] # expand for batch dim
|
||||
if self.source_type.stream or self.source_type.from_img or self.source_type.tensor: # batch_size >= 1
|
||||
string += f"{i}: "
|
||||
frame = self.dataset.count
|
||||
else:
|
||||
match = re.search(r"frame (\d+)/", s[i])
|
||||
frame = int(match.group(1)) if match else None # 0 if frame undetermined
|
||||
|
||||
self.txt_path = self.save_dir / "labels" / (p.stem + ("" if self.dataset.mode == "image" else f"_{frame}"))
|
||||
string += "%gx%g " % im.shape[2:]
|
||||
result = self.results[i]
|
||||
result.save_dir = self.save_dir.__str__() # used in other locations
|
||||
string += result.verbose() + f"{result.speed['inference']:.1f}ms"
|
||||
|
||||
# Add predictions to image
|
||||
if self.args.save or self.args.show:
|
||||
self.plotted_img = result.plot(
|
||||
line_width=self.args.line_width,
|
||||
boxes=self.args.show_boxes,
|
||||
conf=self.args.show_conf,
|
||||
labels=self.args.show_labels,
|
||||
im_gpu=None if self.args.retina_masks else im[i],
|
||||
)
|
||||
|
||||
# Save results
|
||||
if self.args.save_txt:
|
||||
result.save_txt(f"{self.txt_path}.txt", save_conf=self.args.save_conf)
|
||||
if self.args.save_crop:
|
||||
result.save_crop(save_dir=self.save_dir / "crops", file_name=self.txt_path.stem)
|
||||
if self.args.show:
|
||||
self.show(str(p))
|
||||
if self.args.save:
|
||||
self.save_predicted_images(str(self.save_dir / (p.name or "tmp.jpg")), frame)
|
||||
|
||||
return string
|
||||
|
||||
def save_predicted_images(self, save_path="", frame=0):
|
||||
"""Save video predictions as mp4 at specified path."""
|
||||
im = self.plotted_img
|
||||
|
||||
# Save videos and streams
|
||||
if self.dataset.mode in {"stream", "video"}:
|
||||
fps = self.dataset.fps if self.dataset.mode == "video" else 30
|
||||
frames_path = f'{save_path.split(".", 1)[0]}_frames/'
|
||||
if save_path not in self.vid_writer: # new video
|
||||
if self.args.save_frames:
|
||||
Path(frames_path).mkdir(parents=True, exist_ok=True)
|
||||
suffix, fourcc = (".mp4", "avc1") if MACOS else (".avi", "WMV2") if WINDOWS else (".avi", "MJPG")
|
||||
self.vid_writer[save_path] = cv2.VideoWriter(
|
||||
filename=str(Path(save_path).with_suffix(suffix)),
|
||||
fourcc=cv2.VideoWriter_fourcc(*fourcc),
|
||||
fps=fps, # integer required, floats produce error in MP4 codec
|
||||
frameSize=(im.shape[1], im.shape[0]), # (width, height)
|
||||
)
|
||||
|
||||
# Save video
|
||||
self.vid_writer[save_path].write(im)
|
||||
if self.args.save_frames:
|
||||
cv2.imwrite(f"{frames_path}{frame}.jpg", im)
|
||||
|
||||
# Save images
|
||||
else:
|
||||
cv2.imwrite(save_path, im)
|
||||
|
||||
def show(self, p=""):
|
||||
"""Display an image in a window using OpenCV imshow()."""
|
||||
im = self.plotted_img
|
||||
if platform.system() == "Linux" and p not in self.windows:
|
||||
self.windows.append(p)
|
||||
cv2.namedWindow(p, cv2.WINDOW_NORMAL | cv2.WINDOW_KEEPRATIO) # allow window resize (Linux)
|
||||
cv2.resizeWindow(p, im.shape[1], im.shape[0]) # (width, height)
|
||||
cv2.imshow(p, im)
|
||||
cv2.waitKey(300 if self.dataset.mode == "image" else 1) # 1 millisecond
|
||||
|
||||
def run_callbacks(self, event: str):
|
||||
"""Runs all registered callbacks for a specific event."""
|
||||
for callback in self.callbacks.get(event, []):
|
||||
callback(self)
|
||||
|
||||
def add_callback(self, event: str, func):
|
||||
"""Add callback."""
|
||||
self.callbacks[event].append(func)
|
@ -1,743 +0,0 @@
|
||||
# Ultralytics YOLO 🚀, AGPL-3.0 license
|
||||
"""
|
||||
Ultralytics Results, Boxes and Masks classes for handling inference results.
|
||||
|
||||
Usage: See https://docs.ultralytics.com/modes/predict/
|
||||
"""
|
||||
|
||||
from copy import deepcopy
|
||||
from functools import lru_cache
|
||||
from pathlib import Path
|
||||
|
||||
import numpy as np
|
||||
import torch
|
||||
|
||||
from ultralytics.data.augment import LetterBox
|
||||
from ultralytics.utils import LOGGER, SimpleClass, ops
|
||||
from ultralytics.utils.plotting import Annotator, colors, save_one_box
|
||||
from ultralytics.utils.torch_utils import smart_inference_mode
|
||||
|
||||
|
||||
class BaseTensor(SimpleClass):
|
||||
"""Base tensor class with additional methods for easy manipulation and device handling."""
|
||||
|
||||
def __init__(self, data, orig_shape) -> None:
|
||||
"""
|
||||
Initialize BaseTensor with data and original shape.
|
||||
|
||||
Args:
|
||||
data (torch.Tensor | np.ndarray): Predictions, such as bboxes, masks and keypoints.
|
||||
orig_shape (tuple): Original shape of image.
|
||||
"""
|
||||
assert isinstance(data, (torch.Tensor, np.ndarray))
|
||||
self.data = data
|
||||
self.orig_shape = orig_shape
|
||||
|
||||
@property
|
||||
def shape(self):
|
||||
"""Return the shape of the data tensor."""
|
||||
return self.data.shape
|
||||
|
||||
def cpu(self):
|
||||
"""Return a copy of the tensor on CPU memory."""
|
||||
return self if isinstance(self.data, np.ndarray) else self.__class__(self.data.cpu(), self.orig_shape)
|
||||
|
||||
def numpy(self):
|
||||
"""Return a copy of the tensor as a numpy array."""
|
||||
return self if isinstance(self.data, np.ndarray) else self.__class__(self.data.numpy(), self.orig_shape)
|
||||
|
||||
def cuda(self):
|
||||
"""Return a copy of the tensor on GPU memory."""
|
||||
return self.__class__(torch.as_tensor(self.data).cuda(), self.orig_shape)
|
||||
|
||||
def to(self, *args, **kwargs):
|
||||
"""Return a copy of the tensor with the specified device and dtype."""
|
||||
return self.__class__(torch.as_tensor(self.data).to(*args, **kwargs), self.orig_shape)
|
||||
|
||||
def __len__(self): # override len(results)
|
||||
"""Return the length of the data tensor."""
|
||||
return len(self.data)
|
||||
|
||||
def __getitem__(self, idx):
|
||||
"""Return a BaseTensor with the specified index of the data tensor."""
|
||||
return self.__class__(self.data[idx], self.orig_shape)
|
||||
|
||||
|
||||
class Results(SimpleClass):
|
||||
"""
|
||||
A class for storing and manipulating inference results.
|
||||
|
||||
Attributes:
|
||||
orig_img (numpy.ndarray): Original image as a numpy array.
|
||||
orig_shape (tuple): Original image shape in (height, width) format.
|
||||
boxes (Boxes, optional): Object containing detection bounding boxes.
|
||||
masks (Masks, optional): Object containing detection masks.
|
||||
probs (Probs, optional): Object containing class probabilities for classification tasks.
|
||||
keypoints (Keypoints, optional): Object containing detected keypoints for each object.
|
||||
speed (dict): Dictionary of preprocess, inference, and postprocess speeds (ms/image).
|
||||
names (dict): Dictionary of class names.
|
||||
path (str): Path to the image file.
|
||||
|
||||
Methods:
|
||||
update(boxes=None, masks=None, probs=None, obb=None): Updates object attributes with new detection results.
|
||||
cpu(): Returns a copy of the Results object with all tensors on CPU memory.
|
||||
numpy(): Returns a copy of the Results object with all tensors as numpy arrays.
|
||||
cuda(): Returns a copy of the Results object with all tensors on GPU memory.
|
||||
to(*args, **kwargs): Returns a copy of the Results object with tensors on a specified device and dtype.
|
||||
new(): Returns a new Results object with the same image, path, and names.
|
||||
plot(...): Plots detection results on an input image, returning an annotated image.
|
||||
show(): Show annotated results to screen.
|
||||
save(filename): Save annotated results to file.
|
||||
verbose(): Returns a log string for each task, detailing detections and classifications.
|
||||
save_txt(txt_file, save_conf=False): Saves detection results to a text file.
|
||||
save_crop(save_dir, file_name=Path("im.jpg")): Saves cropped detection images.
|
||||
tojson(normalize=False): Converts detection results to JSON format.
|
||||
"""
|
||||
|
||||
def __init__(self, orig_img, path, names, boxes=None, masks=None, probs=None, keypoints=None, obb=None) -> None:
|
||||
"""
|
||||
Initialize the Results class.
|
||||
|
||||
Args:
|
||||
orig_img (numpy.ndarray): The original image as a numpy array.
|
||||
path (str): The path to the image file.
|
||||
names (dict): A dictionary of class names.
|
||||
boxes (torch.tensor, optional): A 2D tensor of bounding box coordinates for each detection.
|
||||
masks (torch.tensor, optional): A 3D tensor of detection masks, where each mask is a binary image.
|
||||
probs (torch.tensor, optional): A 1D tensor of probabilities of each class for classification task.
|
||||
keypoints (torch.tensor, optional): A 2D tensor of keypoint coordinates for each detection.
|
||||
obb (torch.tensor, optional): A 2D tensor of oriented bounding box coordinates for each detection.
|
||||
"""
|
||||
self.orig_img = orig_img
|
||||
self.orig_shape = orig_img.shape[:2]
|
||||
self.boxes = Boxes(boxes, self.orig_shape) if boxes is not None else None # native size boxes
|
||||
self.masks = Masks(masks, self.orig_shape) if masks is not None else None # native size or imgsz masks
|
||||
self.probs = Probs(probs) if probs is not None else None
|
||||
self.keypoints = Keypoints(keypoints, self.orig_shape) if keypoints is not None else None
|
||||
self.obb = OBB(obb, self.orig_shape) if obb is not None else None
|
||||
self.speed = {"preprocess": None, "inference": None, "postprocess": None} # milliseconds per image
|
||||
self.names = names
|
||||
self.path = path
|
||||
self.save_dir = None
|
||||
self._keys = "boxes", "masks", "probs", "keypoints", "obb"
|
||||
|
||||
def __getitem__(self, idx):
|
||||
"""Return a Results object for the specified index."""
|
||||
return self._apply("__getitem__", idx)
|
||||
|
||||
def __len__(self):
|
||||
"""Return the number of detections in the Results object."""
|
||||
for k in self._keys:
|
||||
v = getattr(self, k)
|
||||
if v is not None:
|
||||
return len(v)
|
||||
|
||||
def update(self, boxes=None, masks=None, probs=None, obb=None):
|
||||
"""Update the boxes, masks, and probs attributes of the Results object."""
|
||||
if boxes is not None:
|
||||
self.boxes = Boxes(ops.clip_boxes(boxes, self.orig_shape), self.orig_shape)
|
||||
if masks is not None:
|
||||
self.masks = Masks(masks, self.orig_shape)
|
||||
if probs is not None:
|
||||
self.probs = probs
|
||||
if obb is not None:
|
||||
self.obb = OBB(obb, self.orig_shape)
|
||||
|
||||
def _apply(self, fn, *args, **kwargs):
|
||||
"""
|
||||
Applies a function to all non-empty attributes and returns a new Results object with modified attributes. This
|
||||
function is internally called by methods like .to(), .cuda(), .cpu(), etc.
|
||||
|
||||
Args:
|
||||
fn (str): The name of the function to apply.
|
||||
*args: Variable length argument list to pass to the function.
|
||||
**kwargs: Arbitrary keyword arguments to pass to the function.
|
||||
|
||||
Returns:
|
||||
Results: A new Results object with attributes modified by the applied function.
|
||||
"""
|
||||
r = self.new()
|
||||
for k in self._keys:
|
||||
v = getattr(self, k)
|
||||
if v is not None:
|
||||
setattr(r, k, getattr(v, fn)(*args, **kwargs))
|
||||
return r
|
||||
|
||||
def cpu(self):
|
||||
"""Return a copy of the Results object with all tensors on CPU memory."""
|
||||
return self._apply("cpu")
|
||||
|
||||
def numpy(self):
|
||||
"""Return a copy of the Results object with all tensors as numpy arrays."""
|
||||
return self._apply("numpy")
|
||||
|
||||
def cuda(self):
|
||||
"""Return a copy of the Results object with all tensors on GPU memory."""
|
||||
return self._apply("cuda")
|
||||
|
||||
def to(self, *args, **kwargs):
|
||||
"""Return a copy of the Results object with tensors on the specified device and dtype."""
|
||||
return self._apply("to", *args, **kwargs)
|
||||
|
||||
def new(self):
|
||||
"""Return a new Results object with the same image, path, and names."""
|
||||
return Results(orig_img=self.orig_img, path=self.path, names=self.names)
|
||||
|
||||
def plot(
|
||||
self,
|
||||
conf=True,
|
||||
line_width=None,
|
||||
font_size=None,
|
||||
font="Arial.ttf",
|
||||
pil=False,
|
||||
img=None,
|
||||
im_gpu=None,
|
||||
kpt_radius=5,
|
||||
kpt_line=True,
|
||||
labels=True,
|
||||
boxes=True,
|
||||
masks=True,
|
||||
probs=True,
|
||||
show=False,
|
||||
save=False,
|
||||
filename=None,
|
||||
):
|
||||
"""
|
||||
Plots the detection results on an input RGB image. Accepts a numpy array (cv2) or a PIL Image.
|
||||
|
||||
Args:
|
||||
conf (bool): Whether to plot the detection confidence score.
|
||||
line_width (float, optional): The line width of the bounding boxes. If None, it is scaled to the image size.
|
||||
font_size (float, optional): The font size of the text. If None, it is scaled to the image size.
|
||||
font (str): The font to use for the text.
|
||||
pil (bool): Whether to return the image as a PIL Image.
|
||||
img (numpy.ndarray): Plot to another image. if not, plot to original image.
|
||||
im_gpu (torch.Tensor): Normalized image in gpu with shape (1, 3, 640, 640), for faster mask plotting.
|
||||
kpt_radius (int, optional): Radius of the drawn keypoints. Default is 5.
|
||||
kpt_line (bool): Whether to draw lines connecting keypoints.
|
||||
labels (bool): Whether to plot the label of bounding boxes.
|
||||
boxes (bool): Whether to plot the bounding boxes.
|
||||
masks (bool): Whether to plot the masks.
|
||||
probs (bool): Whether to plot classification probability
|
||||
show (bool): Whether to display the annotated image directly.
|
||||
save (bool): Whether to save the annotated image to `filename`.
|
||||
filename (str): Filename to save image to if save is True.
|
||||
|
||||
Returns:
|
||||
(numpy.ndarray): A numpy array of the annotated image.
|
||||
|
||||
Example:
|
||||
```python
|
||||
from PIL import Image
|
||||
from ultralytics import YOLO
|
||||
|
||||
model = YOLO('yolov8n.pt')
|
||||
results = model('bus.jpg') # results list
|
||||
for r in results:
|
||||
im_array = r.plot() # plot a BGR numpy array of predictions
|
||||
im = Image.fromarray(im_array[..., ::-1]) # RGB PIL image
|
||||
im.show() # show image
|
||||
im.save('results.jpg') # save image
|
||||
```
|
||||
"""
|
||||
if img is None and isinstance(self.orig_img, torch.Tensor):
|
||||
img = (self.orig_img[0].detach().permute(1, 2, 0).contiguous() * 255).to(torch.uint8).cpu().numpy()
|
||||
|
||||
names = self.names
|
||||
is_obb = self.obb is not None
|
||||
pred_boxes, show_boxes = self.obb if is_obb else self.boxes, boxes
|
||||
pred_masks, show_masks = self.masks, masks
|
||||
pred_probs, show_probs = self.probs, probs
|
||||
annotator = Annotator(
|
||||
deepcopy(self.orig_img if img is None else img),
|
||||
line_width,
|
||||
font_size,
|
||||
font,
|
||||
pil or (pred_probs is not None and show_probs), # Classify tasks default to pil=True
|
||||
example=names,
|
||||
)
|
||||
|
||||
# Plot Segment results
|
||||
if pred_masks and show_masks:
|
||||
if im_gpu is None:
|
||||
img = LetterBox(pred_masks.shape[1:])(image=annotator.result())
|
||||
im_gpu = (
|
||||
torch.as_tensor(img, dtype=torch.float16, device=pred_masks.data.device)
|
||||
.permute(2, 0, 1)
|
||||
.flip(0)
|
||||
.contiguous()
|
||||
/ 255
|
||||
)
|
||||
idx = pred_boxes.cls if pred_boxes else range(len(pred_masks))
|
||||
annotator.masks(pred_masks.data, colors=[colors(x, True) for x in idx], im_gpu=im_gpu)
|
||||
|
||||
# Plot Detect results
|
||||
if pred_boxes is not None and show_boxes:
|
||||
for d in reversed(pred_boxes):
|
||||
c, conf, id = int(d.cls), float(d.conf) if conf else None, None if d.id is None else int(d.id.item())
|
||||
name = ("" if id is None else f"id:{id} ") + names[c]
|
||||
label = (f"{name} {conf:.2f}" if conf else name) if labels else None
|
||||
box = d.xyxyxyxy.reshape(-1, 4, 2).squeeze() if is_obb else d.xyxy.squeeze()
|
||||
annotator.box_label(box, label, color=colors(c, True), rotated=is_obb)
|
||||
|
||||
# Plot Classify results
|
||||
if pred_probs is not None and show_probs:
|
||||
text = ",\n".join(f"{names[j] if names else j} {pred_probs.data[j]:.2f}" for j in pred_probs.top5)
|
||||
x = round(self.orig_shape[0] * 0.03)
|
||||
annotator.text([x, x], text, txt_color=(255, 255, 255)) # TODO: allow setting colors
|
||||
|
||||
# Plot Pose results
|
||||
if self.keypoints is not None:
|
||||
for k in reversed(self.keypoints.data):
|
||||
annotator.kpts(k, self.orig_shape, radius=kpt_radius, kpt_line=kpt_line)
|
||||
|
||||
# Show results
|
||||
if show:
|
||||
annotator.show(self.path)
|
||||
|
||||
# Save results
|
||||
if save:
|
||||
annotator.save(filename)
|
||||
|
||||
return annotator.result()
|
||||
|
||||
def show(self, *args, **kwargs):
|
||||
"""Show annotated results image."""
|
||||
self.plot(show=True, *args, **kwargs)
|
||||
|
||||
def save(self, filename=None, *args, **kwargs):
|
||||
"""Save annotated results image."""
|
||||
if not filename:
|
||||
filename = f"results_{Path(self.path).name}"
|
||||
self.plot(save=True, filename=filename, *args, **kwargs)
|
||||
return filename
|
||||
|
||||
def verbose(self):
|
||||
"""Return log string for each task."""
|
||||
log_string = ""
|
||||
probs = self.probs
|
||||
boxes = self.boxes
|
||||
if len(self) == 0:
|
||||
return log_string if probs is not None else f"{log_string}(no detections), "
|
||||
if probs is not None:
|
||||
log_string += f"{', '.join(f'{self.names[j]} {probs.data[j]:.2f}' for j in probs.top5)}, "
|
||||
if boxes:
|
||||
for c in boxes.cls.unique():
|
||||
n = (boxes.cls == c).sum() # detections per class
|
||||
log_string += f"{n} {self.names[int(c)]}{'s' * (n > 1)}, "
|
||||
return log_string
|
||||
|
||||
def save_txt(self, txt_file, save_conf=False):
|
||||
"""
|
||||
Save predictions into txt file.
|
||||
|
||||
Args:
|
||||
txt_file (str): txt file path.
|
||||
save_conf (bool): save confidence score or not.
|
||||
"""
|
||||
is_obb = self.obb is not None
|
||||
boxes = self.obb if is_obb else self.boxes
|
||||
masks = self.masks
|
||||
probs = self.probs
|
||||
kpts = self.keypoints
|
||||
texts = []
|
||||
if probs is not None:
|
||||
# Classify
|
||||
[texts.append(f"{probs.data[j]:.2f} {self.names[j]}") for j in probs.top5]
|
||||
elif boxes:
|
||||
# Detect/segment/pose
|
||||
for j, d in enumerate(boxes):
|
||||
c, conf, id = int(d.cls), float(d.conf), None if d.id is None else int(d.id.item())
|
||||
line = (c, *(d.xyxyxyxyn.view(-1) if is_obb else d.xywhn.view(-1)))
|
||||
if masks:
|
||||
seg = masks[j].xyn[0].copy().reshape(-1) # reversed mask.xyn, (n,2) to (n*2)
|
||||
line = (c, *seg)
|
||||
if kpts is not None:
|
||||
kpt = torch.cat((kpts[j].xyn, kpts[j].conf[..., None]), 2) if kpts[j].has_visible else kpts[j].xyn
|
||||
line += (*kpt.reshape(-1).tolist(),)
|
||||
line += (conf,) * save_conf + (() if id is None else (id,))
|
||||
texts.append(("%g " * len(line)).rstrip() % line)
|
||||
|
||||
if texts:
|
||||
Path(txt_file).parent.mkdir(parents=True, exist_ok=True) # make directory
|
||||
with open(txt_file, "a") as f:
|
||||
f.writelines(text + "\n" for text in texts)
|
||||
|
||||
def save_crop(self, save_dir, file_name=Path("im.jpg")):
|
||||
"""
|
||||
Save cropped predictions to `save_dir/cls/file_name.jpg`.
|
||||
|
||||
Args:
|
||||
save_dir (str | pathlib.Path): Save path.
|
||||
file_name (str | pathlib.Path): File name.
|
||||
"""
|
||||
if self.probs is not None:
|
||||
LOGGER.warning("WARNING ⚠️ Classify task do not support `save_crop`.")
|
||||
return
|
||||
if self.obb is not None:
|
||||
LOGGER.warning("WARNING ⚠️ OBB task do not support `save_crop`.")
|
||||
return
|
||||
for d in self.boxes:
|
||||
save_one_box(
|
||||
d.xyxy,
|
||||
self.orig_img.copy(),
|
||||
file=Path(save_dir) / self.names[int(d.cls)] / f"{Path(file_name)}.jpg",
|
||||
BGR=True,
|
||||
)
|
||||
|
||||
def summary(self, normalize=False, decimals=5):
|
||||
"""Convert the results to a summarized format."""
|
||||
if self.probs is not None:
|
||||
LOGGER.warning("Warning: Classify results do not support the `summary()` method yet.")
|
||||
return
|
||||
|
||||
# Create list of detection dictionaries
|
||||
results = []
|
||||
data = self.boxes.data.cpu().tolist()
|
||||
h, w = self.orig_shape if normalize else (1, 1)
|
||||
for i, row in enumerate(data): # xyxy, track_id if tracking, conf, class_id
|
||||
box = {
|
||||
"x1": round(row[0] / w, decimals),
|
||||
"y1": round(row[1] / h, decimals),
|
||||
"x2": round(row[2] / w, decimals),
|
||||
"y2": round(row[3] / h, decimals),
|
||||
}
|
||||
conf = round(row[-2], decimals)
|
||||
class_id = int(row[-1])
|
||||
result = {"name": self.names[class_id], "class": class_id, "confidence": conf, "box": box}
|
||||
if self.boxes.is_track:
|
||||
result["track_id"] = int(row[-3]) # track ID
|
||||
if self.masks:
|
||||
result["segments"] = {
|
||||
"x": (self.masks.xy[i][:, 0] / w).round(decimals).tolist(),
|
||||
"y": (self.masks.xy[i][:, 1] / h).round(decimals).tolist(),
|
||||
}
|
||||
if self.keypoints is not None:
|
||||
x, y, visible = self.keypoints[i].data[0].cpu().unbind(dim=1) # torch Tensor
|
||||
result["keypoints"] = {
|
||||
"x": (x / w).numpy().round(decimals).tolist(), # decimals named argument required
|
||||
"y": (y / h).numpy().round(decimals).tolist(),
|
||||
"visible": visible.numpy().round(decimals).tolist(),
|
||||
}
|
||||
results.append(result)
|
||||
|
||||
return results
|
||||
|
||||
def tojson(self, normalize=False, decimals=5):
|
||||
"""Convert the results to JSON format."""
|
||||
import json
|
||||
|
||||
return json.dumps(self.summary(normalize=normalize, decimals=decimals), indent=2)
|
||||
|
||||
|
||||
class Boxes(BaseTensor):
|
||||
"""
|
||||
Manages detection boxes, providing easy access and manipulation of box coordinates, confidence scores, class
|
||||
identifiers, and optional tracking IDs. Supports multiple formats for box coordinates, including both absolute and
|
||||
normalized forms.
|
||||
|
||||
Attributes:
|
||||
data (torch.Tensor): The raw tensor containing detection boxes and their associated data.
|
||||
orig_shape (tuple): The original image size as a tuple (height, width), used for normalization.
|
||||
is_track (bool): Indicates whether tracking IDs are included in the box data.
|
||||
|
||||
Properties:
|
||||
xyxy (torch.Tensor | numpy.ndarray): Boxes in [x1, y1, x2, y2] format.
|
||||
conf (torch.Tensor | numpy.ndarray): Confidence scores for each box.
|
||||
cls (torch.Tensor | numpy.ndarray): Class labels for each box.
|
||||
id (torch.Tensor | numpy.ndarray, optional): Tracking IDs for each box, if available.
|
||||
xywh (torch.Tensor | numpy.ndarray): Boxes in [x, y, width, height] format, calculated on demand.
|
||||
xyxyn (torch.Tensor | numpy.ndarray): Normalized [x1, y1, x2, y2] boxes, relative to `orig_shape`.
|
||||
xywhn (torch.Tensor | numpy.ndarray): Normalized [x, y, width, height] boxes, relative to `orig_shape`.
|
||||
|
||||
Methods:
|
||||
cpu(): Moves the boxes to CPU memory.
|
||||
numpy(): Converts the boxes to a numpy array format.
|
||||
cuda(): Moves the boxes to CUDA (GPU) memory.
|
||||
to(device, dtype=None): Moves the boxes to the specified device.
|
||||
"""
|
||||
|
||||
def __init__(self, boxes, orig_shape) -> None:
|
||||
"""
|
||||
Initialize the Boxes class.
|
||||
|
||||
Args:
|
||||
boxes (torch.Tensor | numpy.ndarray): A tensor or numpy array containing the detection boxes, with
|
||||
shape (num_boxes, 6) or (num_boxes, 7). The last two columns contain confidence and class values.
|
||||
If present, the third last column contains track IDs.
|
||||
orig_shape (tuple): Original image size, in the format (height, width).
|
||||
"""
|
||||
if boxes.ndim == 1:
|
||||
boxes = boxes[None, :]
|
||||
n = boxes.shape[-1]
|
||||
assert n in (6, 7), f"expected 6 or 7 values but got {n}" # xyxy, track_id, conf, cls
|
||||
super().__init__(boxes, orig_shape)
|
||||
self.is_track = n == 7
|
||||
self.orig_shape = orig_shape
|
||||
|
||||
@property
|
||||
def xyxy(self):
|
||||
"""Return the boxes in xyxy format."""
|
||||
return self.data[:, :4]
|
||||
|
||||
@property
|
||||
def conf(self):
|
||||
"""Return the confidence values of the boxes."""
|
||||
return self.data[:, -2]
|
||||
|
||||
@property
|
||||
def cls(self):
|
||||
"""Return the class values of the boxes."""
|
||||
return self.data[:, -1]
|
||||
|
||||
@property
|
||||
def id(self):
|
||||
"""Return the track IDs of the boxes (if available)."""
|
||||
return self.data[:, -3] if self.is_track else None
|
||||
|
||||
@property
|
||||
@lru_cache(maxsize=2) # maxsize 1 should suffice
|
||||
def xywh(self):
|
||||
"""Return the boxes in xywh format."""
|
||||
return ops.xyxy2xywh(self.xyxy)
|
||||
|
||||
@property
|
||||
@lru_cache(maxsize=2)
|
||||
def xyxyn(self):
|
||||
"""Return the boxes in xyxy format normalized by original image size."""
|
||||
xyxy = self.xyxy.clone() if isinstance(self.xyxy, torch.Tensor) else np.copy(self.xyxy)
|
||||
xyxy[..., [0, 2]] /= self.orig_shape[1]
|
||||
xyxy[..., [1, 3]] /= self.orig_shape[0]
|
||||
return xyxy
|
||||
|
||||
@property
|
||||
@lru_cache(maxsize=2)
|
||||
def xywhn(self):
|
||||
"""Return the boxes in xywh format normalized by original image size."""
|
||||
xywh = ops.xyxy2xywh(self.xyxy)
|
||||
xywh[..., [0, 2]] /= self.orig_shape[1]
|
||||
xywh[..., [1, 3]] /= self.orig_shape[0]
|
||||
return xywh
|
||||
|
||||
|
||||
class Masks(BaseTensor):
|
||||
"""
|
||||
A class for storing and manipulating detection masks.
|
||||
|
||||
Attributes:
|
||||
xy (list): A list of segments in pixel coordinates.
|
||||
xyn (list): A list of normalized segments.
|
||||
|
||||
Methods:
|
||||
cpu(): Returns the masks tensor on CPU memory.
|
||||
numpy(): Returns the masks tensor as a numpy array.
|
||||
cuda(): Returns the masks tensor on GPU memory.
|
||||
to(device, dtype): Returns the masks tensor with the specified device and dtype.
|
||||
"""
|
||||
|
||||
def __init__(self, masks, orig_shape) -> None:
|
||||
"""Initialize the Masks class with the given masks tensor and original image shape."""
|
||||
if masks.ndim == 2:
|
||||
masks = masks[None, :]
|
||||
super().__init__(masks, orig_shape)
|
||||
|
||||
@property
|
||||
@lru_cache(maxsize=1)
|
||||
def xyn(self):
|
||||
"""Return normalized segments."""
|
||||
return [
|
||||
ops.scale_coords(self.data.shape[1:], x, self.orig_shape, normalize=True)
|
||||
for x in ops.masks2segments(self.data)
|
||||
]
|
||||
|
||||
@property
|
||||
@lru_cache(maxsize=1)
|
||||
def xy(self):
|
||||
"""Return segments in pixel coordinates."""
|
||||
return [
|
||||
ops.scale_coords(self.data.shape[1:], x, self.orig_shape, normalize=False)
|
||||
for x in ops.masks2segments(self.data)
|
||||
]
|
||||
|
||||
|
||||
class Keypoints(BaseTensor):
|
||||
"""
|
||||
A class for storing and manipulating detection keypoints.
|
||||
|
||||
Attributes:
|
||||
xy (torch.Tensor): A collection of keypoints containing x, y coordinates for each detection.
|
||||
xyn (torch.Tensor): A normalized version of xy with coordinates in the range [0, 1].
|
||||
conf (torch.Tensor): Confidence values associated with keypoints if available, otherwise None.
|
||||
|
||||
Methods:
|
||||
cpu(): Returns a copy of the keypoints tensor on CPU memory.
|
||||
numpy(): Returns a copy of the keypoints tensor as a numpy array.
|
||||
cuda(): Returns a copy of the keypoints tensor on GPU memory.
|
||||
to(device, dtype): Returns a copy of the keypoints tensor with the specified device and dtype.
|
||||
"""
|
||||
|
||||
@smart_inference_mode() # avoid keypoints < conf in-place error
|
||||
def __init__(self, keypoints, orig_shape) -> None:
|
||||
"""Initializes the Keypoints object with detection keypoints and original image size."""
|
||||
if keypoints.ndim == 2:
|
||||
keypoints = keypoints[None, :]
|
||||
if keypoints.shape[2] == 3: # x, y, conf
|
||||
mask = keypoints[..., 2] < 0.5 # points with conf < 0.5 (not visible)
|
||||
keypoints[..., :2][mask] = 0
|
||||
super().__init__(keypoints, orig_shape)
|
||||
self.has_visible = self.data.shape[-1] == 3
|
||||
|
||||
@property
|
||||
@lru_cache(maxsize=1)
|
||||
def xy(self):
|
||||
"""Returns x, y coordinates of keypoints."""
|
||||
return self.data[..., :2]
|
||||
|
||||
@property
|
||||
@lru_cache(maxsize=1)
|
||||
def xyn(self):
|
||||
"""Returns normalized x, y coordinates of keypoints."""
|
||||
xy = self.xy.clone() if isinstance(self.xy, torch.Tensor) else np.copy(self.xy)
|
||||
xy[..., 0] /= self.orig_shape[1]
|
||||
xy[..., 1] /= self.orig_shape[0]
|
||||
return xy
|
||||
|
||||
@property
|
||||
@lru_cache(maxsize=1)
|
||||
def conf(self):
|
||||
"""Returns confidence values of keypoints if available, else None."""
|
||||
return self.data[..., 2] if self.has_visible else None
|
||||
|
||||
|
||||
class Probs(BaseTensor):
|
||||
"""
|
||||
A class for storing and manipulating classification predictions.
|
||||
|
||||
Attributes:
|
||||
top1 (int): Index of the top 1 class.
|
||||
top5 (list[int]): Indices of the top 5 classes.
|
||||
top1conf (torch.Tensor): Confidence of the top 1 class.
|
||||
top5conf (torch.Tensor): Confidences of the top 5 classes.
|
||||
|
||||
Methods:
|
||||
cpu(): Returns a copy of the probs tensor on CPU memory.
|
||||
numpy(): Returns a copy of the probs tensor as a numpy array.
|
||||
cuda(): Returns a copy of the probs tensor on GPU memory.
|
||||
to(): Returns a copy of the probs tensor with the specified device and dtype.
|
||||
"""
|
||||
|
||||
def __init__(self, probs, orig_shape=None) -> None:
|
||||
"""Initialize the Probs class with classification probabilities and optional original shape of the image."""
|
||||
super().__init__(probs, orig_shape)
|
||||
|
||||
@property
|
||||
@lru_cache(maxsize=1)
|
||||
def top1(self):
|
||||
"""Return the index of top 1."""
|
||||
return int(self.data.argmax())
|
||||
|
||||
@property
|
||||
@lru_cache(maxsize=1)
|
||||
def top5(self):
|
||||
"""Return the indices of top 5."""
|
||||
return (-self.data).argsort(0)[:5].tolist() # this way works with both torch and numpy.
|
||||
|
||||
@property
|
||||
@lru_cache(maxsize=1)
|
||||
def top1conf(self):
|
||||
"""Return the confidence of top 1."""
|
||||
return self.data[self.top1]
|
||||
|
||||
@property
|
||||
@lru_cache(maxsize=1)
|
||||
def top5conf(self):
|
||||
"""Return the confidences of top 5."""
|
||||
return self.data[self.top5]
|
||||
|
||||
|
||||
class OBB(BaseTensor):
|
||||
"""
|
||||
A class for storing and manipulating Oriented Bounding Boxes (OBB).
|
||||
|
||||
Args:
|
||||
boxes (torch.Tensor | numpy.ndarray): A tensor or numpy array containing the detection boxes,
|
||||
with shape (num_boxes, 7) or (num_boxes, 8). The last two columns contain confidence and class values.
|
||||
If present, the third last column contains track IDs, and the fifth column from the left contains rotation.
|
||||
orig_shape (tuple): Original image size, in the format (height, width).
|
||||
|
||||
Attributes:
|
||||
xywhr (torch.Tensor | numpy.ndarray): The boxes in [x_center, y_center, width, height, rotation] format.
|
||||
conf (torch.Tensor | numpy.ndarray): The confidence values of the boxes.
|
||||
cls (torch.Tensor | numpy.ndarray): The class values of the boxes.
|
||||
id (torch.Tensor | numpy.ndarray): The track IDs of the boxes (if available).
|
||||
xyxyxyxyn (torch.Tensor | numpy.ndarray): The rotated boxes in xyxyxyxy format normalized by orig image size.
|
||||
xyxyxyxy (torch.Tensor | numpy.ndarray): The rotated boxes in xyxyxyxy format.
|
||||
xyxy (torch.Tensor | numpy.ndarray): The horizontal boxes in xyxyxyxy format.
|
||||
data (torch.Tensor): The raw OBB tensor (alias for `boxes`).
|
||||
|
||||
Methods:
|
||||
cpu(): Move the object to CPU memory.
|
||||
numpy(): Convert the object to a numpy array.
|
||||
cuda(): Move the object to CUDA memory.
|
||||
to(*args, **kwargs): Move the object to the specified device.
|
||||
"""
|
||||
|
||||
def __init__(self, boxes, orig_shape) -> None:
|
||||
"""Initialize the Boxes class."""
|
||||
if boxes.ndim == 1:
|
||||
boxes = boxes[None, :]
|
||||
n = boxes.shape[-1]
|
||||
assert n in (7, 8), f"expected 7 or 8 values but got {n}" # xywh, rotation, track_id, conf, cls
|
||||
super().__init__(boxes, orig_shape)
|
||||
self.is_track = n == 8
|
||||
self.orig_shape = orig_shape
|
||||
|
||||
@property
|
||||
def xywhr(self):
|
||||
"""Return the rotated boxes in xywhr format."""
|
||||
return self.data[:, :5]
|
||||
|
||||
@property
|
||||
def conf(self):
|
||||
"""Return the confidence values of the boxes."""
|
||||
return self.data[:, -2]
|
||||
|
||||
@property
|
||||
def cls(self):
|
||||
"""Return the class values of the boxes."""
|
||||
return self.data[:, -1]
|
||||
|
||||
@property
|
||||
def id(self):
|
||||
"""Return the track IDs of the boxes (if available)."""
|
||||
return self.data[:, -3] if self.is_track else None
|
||||
|
||||
@property
|
||||
@lru_cache(maxsize=2)
|
||||
def xyxyxyxy(self):
|
||||
"""Return the boxes in xyxyxyxy format, (N, 4, 2)."""
|
||||
return ops.xywhr2xyxyxyxy(self.xywhr)
|
||||
|
||||
@property
|
||||
@lru_cache(maxsize=2)
|
||||
def xyxyxyxyn(self):
|
||||
"""Return the boxes in xyxyxyxy format, (N, 4, 2)."""
|
||||
xyxyxyxyn = self.xyxyxyxy.clone() if isinstance(self.xyxyxyxy, torch.Tensor) else np.copy(self.xyxyxyxy)
|
||||
xyxyxyxyn[..., 0] /= self.orig_shape[1]
|
||||
xyxyxyxyn[..., 1] /= self.orig_shape[0]
|
||||
return xyxyxyxyn
|
||||
|
||||
@property
|
||||
@lru_cache(maxsize=2)
|
||||
def xyxy(self):
|
||||
"""
|
||||
Return the horizontal boxes in xyxy format, (N, 4).
|
||||
|
||||
Accepts both torch and numpy boxes.
|
||||
"""
|
||||
x1 = self.xyxyxyxy[..., 0].min(1).values
|
||||
x2 = self.xyxyxyxy[..., 0].max(1).values
|
||||
y1 = self.xyxyxyxy[..., 1].min(1).values
|
||||
y2 = self.xyxyxyxy[..., 1].max(1).values
|
||||
xyxy = [x1, y1, x2, y2]
|
||||
return np.stack(xyxy, axis=-1) if isinstance(self.data, np.ndarray) else torch.stack(xyxy, dim=-1)
|
@ -1,756 +0,0 @@
|
||||
# Ultralytics YOLO 🚀, AGPL-3.0 license
|
||||
"""
|
||||
Train a model on a dataset.
|
||||
|
||||
Usage:
|
||||
$ yolo mode=train model=yolov8n.pt data=coco128.yaml imgsz=640 epochs=100 batch=16
|
||||
"""
|
||||
|
||||
import math
|
||||
import os
|
||||
import subprocess
|
||||
import time
|
||||
import warnings
|
||||
from copy import deepcopy
|
||||
from datetime import datetime, timedelta
|
||||
from pathlib import Path
|
||||
|
||||
import numpy as np
|
||||
import torch
|
||||
from torch import distributed as dist
|
||||
from torch import nn, optim
|
||||
|
||||
from ultralytics.cfg import get_cfg, get_save_dir
|
||||
from ultralytics.data.utils import check_cls_dataset, check_det_dataset
|
||||
from ultralytics.nn.tasks import attempt_load_one_weight, attempt_load_weights
|
||||
from ultralytics.utils import (
|
||||
DEFAULT_CFG,
|
||||
LOGGER,
|
||||
RANK,
|
||||
TQDM,
|
||||
__version__,
|
||||
callbacks,
|
||||
clean_url,
|
||||
colorstr,
|
||||
emojis,
|
||||
yaml_save,
|
||||
)
|
||||
from ultralytics.utils.autobatch import check_train_batch_size
|
||||
from ultralytics.utils.checks import check_amp, check_file, check_imgsz, check_model_file_from_stem, print_args
|
||||
from ultralytics.utils.dist import ddp_cleanup, generate_ddp_command
|
||||
from ultralytics.utils.files import get_latest_run
|
||||
from ultralytics.utils.torch_utils import (
|
||||
EarlyStopping,
|
||||
ModelEMA,
|
||||
de_parallel,
|
||||
init_seeds,
|
||||
one_cycle,
|
||||
select_device,
|
||||
strip_optimizer,
|
||||
)
|
||||
|
||||
|
||||
class BaseTrainer:
|
||||
"""
|
||||
BaseTrainer.
|
||||
|
||||
A base class for creating trainers.
|
||||
|
||||
Attributes:
|
||||
args (SimpleNamespace): Configuration for the trainer.
|
||||
validator (BaseValidator): Validator instance.
|
||||
model (nn.Module): Model instance.
|
||||
callbacks (defaultdict): Dictionary of callbacks.
|
||||
save_dir (Path): Directory to save results.
|
||||
wdir (Path): Directory to save weights.
|
||||
last (Path): Path to the last checkpoint.
|
||||
best (Path): Path to the best checkpoint.
|
||||
save_period (int): Save checkpoint every x epochs (disabled if < 1).
|
||||
batch_size (int): Batch size for training.
|
||||
epochs (int): Number of epochs to train for.
|
||||
start_epoch (int): Starting epoch for training.
|
||||
device (torch.device): Device to use for training.
|
||||
amp (bool): Flag to enable AMP (Automatic Mixed Precision).
|
||||
scaler (amp.GradScaler): Gradient scaler for AMP.
|
||||
data (str): Path to data.
|
||||
trainset (torch.utils.data.Dataset): Training dataset.
|
||||
testset (torch.utils.data.Dataset): Testing dataset.
|
||||
ema (nn.Module): EMA (Exponential Moving Average) of the model.
|
||||
resume (bool): Resume training from a checkpoint.
|
||||
lf (nn.Module): Loss function.
|
||||
scheduler (torch.optim.lr_scheduler._LRScheduler): Learning rate scheduler.
|
||||
best_fitness (float): The best fitness value achieved.
|
||||
fitness (float): Current fitness value.
|
||||
loss (float): Current loss value.
|
||||
tloss (float): Total loss value.
|
||||
loss_names (list): List of loss names.
|
||||
csv (Path): Path to results CSV file.
|
||||
"""
|
||||
|
||||
def __init__(self, cfg=DEFAULT_CFG, overrides=None, _callbacks=None):
|
||||
"""
|
||||
Initializes the BaseTrainer class.
|
||||
|
||||
Args:
|
||||
cfg (str, optional): Path to a configuration file. Defaults to DEFAULT_CFG.
|
||||
overrides (dict, optional): Configuration overrides. Defaults to None.
|
||||
"""
|
||||
self.args = get_cfg(cfg, overrides)
|
||||
self.check_resume(overrides)
|
||||
self.device = select_device(self.args.device, self.args.batch)
|
||||
self.validator = None
|
||||
self.metrics = None
|
||||
self.plots = {}
|
||||
init_seeds(self.args.seed + 1 + RANK, deterministic=self.args.deterministic)
|
||||
|
||||
# Dirs
|
||||
self.save_dir = get_save_dir(self.args)
|
||||
self.args.name = self.save_dir.name # update name for loggers
|
||||
self.wdir = self.save_dir / "weights" # weights dir
|
||||
if RANK in (-1, 0):
|
||||
self.wdir.mkdir(parents=True, exist_ok=True) # make dir
|
||||
self.args.save_dir = str(self.save_dir)
|
||||
yaml_save(self.save_dir / "args.yaml", vars(self.args)) # save run args
|
||||
self.last, self.best = self.wdir / "last.pt", self.wdir / "best.pt" # checkpoint paths
|
||||
self.save_period = self.args.save_period
|
||||
|
||||
self.batch_size = self.args.batch
|
||||
self.epochs = self.args.epochs
|
||||
self.start_epoch = 0
|
||||
if RANK == -1:
|
||||
print_args(vars(self.args))
|
||||
|
||||
# Device
|
||||
if self.device.type in ("cpu", "mps"):
|
||||
self.args.workers = 0 # faster CPU training as time dominated by inference, not dataloading
|
||||
|
||||
# Model and Dataset
|
||||
self.model = check_model_file_from_stem(self.args.model) # add suffix, i.e. yolov8n -> yolov8n.pt
|
||||
try:
|
||||
if self.args.task == "classify":
|
||||
self.data = check_cls_dataset(self.args.data)
|
||||
elif self.args.data.split(".")[-1] in ("yaml", "yml") or self.args.task in (
|
||||
"detect",
|
||||
"segment",
|
||||
"pose",
|
||||
"obb",
|
||||
):
|
||||
self.data = check_det_dataset(self.args.data)
|
||||
if "yaml_file" in self.data:
|
||||
self.args.data = self.data["yaml_file"] # for validating 'yolo train data=url.zip' usage
|
||||
except Exception as e:
|
||||
raise RuntimeError(emojis(f"Dataset '{clean_url(self.args.data)}' error ❌ {e}")) from e
|
||||
|
||||
self.trainset, self.testset = self.get_dataset(self.data)
|
||||
self.ema = None
|
||||
|
||||
# Optimization utils init
|
||||
self.lf = None
|
||||
self.scheduler = None
|
||||
|
||||
# Epoch level metrics
|
||||
self.best_fitness = None
|
||||
self.fitness = None
|
||||
self.loss = None
|
||||
self.tloss = None
|
||||
self.loss_names = ["Loss"]
|
||||
self.csv = self.save_dir / "results.csv"
|
||||
self.plot_idx = [0, 1, 2]
|
||||
|
||||
# Callbacks
|
||||
self.callbacks = _callbacks or callbacks.get_default_callbacks()
|
||||
if RANK in (-1, 0):
|
||||
callbacks.add_integration_callbacks(self)
|
||||
|
||||
def add_callback(self, event: str, callback):
|
||||
"""Appends the given callback."""
|
||||
self.callbacks[event].append(callback)
|
||||
|
||||
def set_callback(self, event: str, callback):
|
||||
"""Overrides the existing callbacks with the given callback."""
|
||||
self.callbacks[event] = [callback]
|
||||
|
||||
def run_callbacks(self, event: str):
|
||||
"""Run all existing callbacks associated with a particular event."""
|
||||
for callback in self.callbacks.get(event, []):
|
||||
callback(self)
|
||||
|
||||
def train(self):
|
||||
"""Allow device='', device=None on Multi-GPU systems to default to device=0."""
|
||||
if isinstance(self.args.device, str) and len(self.args.device): # i.e. device='0' or device='0,1,2,3'
|
||||
world_size = len(self.args.device.split(","))
|
||||
elif isinstance(self.args.device, (tuple, list)): # i.e. device=[0, 1, 2, 3] (multi-GPU from CLI is list)
|
||||
world_size = len(self.args.device)
|
||||
elif torch.cuda.is_available(): # i.e. device=None or device='' or device=number
|
||||
world_size = 1 # default to device 0
|
||||
else: # i.e. device='cpu' or 'mps'
|
||||
world_size = 0
|
||||
|
||||
# Run subprocess if DDP training, else train normally
|
||||
if world_size > 1 and "LOCAL_RANK" not in os.environ:
|
||||
# Argument checks
|
||||
if self.args.rect:
|
||||
LOGGER.warning("WARNING ⚠️ 'rect=True' is incompatible with Multi-GPU training, setting 'rect=False'")
|
||||
self.args.rect = False
|
||||
if self.args.batch == -1:
|
||||
LOGGER.warning(
|
||||
"WARNING ⚠️ 'batch=-1' for AutoBatch is incompatible with Multi-GPU training, setting "
|
||||
"default 'batch=16'"
|
||||
)
|
||||
self.args.batch = 16
|
||||
|
||||
# Command
|
||||
cmd, file = generate_ddp_command(world_size, self)
|
||||
try:
|
||||
LOGGER.info(f'{colorstr("DDP:")} debug command {" ".join(cmd)}')
|
||||
subprocess.run(cmd, check=True)
|
||||
except Exception as e:
|
||||
raise e
|
||||
finally:
|
||||
ddp_cleanup(self, str(file))
|
||||
|
||||
else:
|
||||
self._do_train(world_size)
|
||||
|
||||
def _setup_scheduler(self):
|
||||
"""Initialize training learning rate scheduler."""
|
||||
if self.args.cos_lr:
|
||||
self.lf = one_cycle(1, self.args.lrf, self.epochs) # cosine 1->hyp['lrf']
|
||||
else:
|
||||
self.lf = lambda x: max(1 - x / self.epochs, 0) * (1.0 - self.args.lrf) + self.args.lrf # linear
|
||||
self.scheduler = optim.lr_scheduler.LambdaLR(self.optimizer, lr_lambda=self.lf)
|
||||
|
||||
def _setup_ddp(self, world_size):
|
||||
"""Initializes and sets the DistributedDataParallel parameters for training."""
|
||||
torch.cuda.set_device(RANK)
|
||||
self.device = torch.device("cuda", RANK)
|
||||
# LOGGER.info(f'DDP info: RANK {RANK}, WORLD_SIZE {world_size}, DEVICE {self.device}')
|
||||
os.environ["NCCL_BLOCKING_WAIT"] = "1" # set to enforce timeout
|
||||
dist.init_process_group(
|
||||
backend="nccl" if dist.is_nccl_available() else "gloo",
|
||||
timeout=timedelta(seconds=10800), # 3 hours
|
||||
rank=RANK,
|
||||
world_size=world_size,
|
||||
)
|
||||
|
||||
def _setup_train(self, world_size):
|
||||
"""Builds dataloaders and optimizer on correct rank process."""
|
||||
|
||||
# Model
|
||||
self.run_callbacks("on_pretrain_routine_start")
|
||||
ckpt = self.setup_model()
|
||||
self.model = self.model.to(self.device)
|
||||
self.set_model_attributes()
|
||||
|
||||
# Freeze layers
|
||||
freeze_list = (
|
||||
self.args.freeze
|
||||
if isinstance(self.args.freeze, list)
|
||||
else range(self.args.freeze)
|
||||
if isinstance(self.args.freeze, int)
|
||||
else []
|
||||
)
|
||||
always_freeze_names = [".dfl"] # always freeze these layers
|
||||
freeze_layer_names = [f"model.{x}." for x in freeze_list] + always_freeze_names
|
||||
for k, v in self.model.named_parameters():
|
||||
# v.register_hook(lambda x: torch.nan_to_num(x)) # NaN to 0 (commented for erratic training results)
|
||||
if any(x in k for x in freeze_layer_names):
|
||||
LOGGER.info(f"Freezing layer '{k}'")
|
||||
v.requires_grad = False
|
||||
elif not v.requires_grad and v.dtype.is_floating_point: # only floating point Tensor can require gradients
|
||||
LOGGER.info(
|
||||
f"WARNING ⚠️ setting 'requires_grad=True' for frozen layer '{k}'. "
|
||||
"See ultralytics.engine.trainer for customization of frozen layers."
|
||||
)
|
||||
v.requires_grad = True
|
||||
|
||||
# Check AMP
|
||||
self.amp = torch.tensor(self.args.amp).to(self.device) # True or False
|
||||
if self.amp and RANK in (-1, 0): # Single-GPU and DDP
|
||||
callbacks_backup = callbacks.default_callbacks.copy() # backup callbacks as check_amp() resets them
|
||||
self.amp = torch.tensor(check_amp(self.model), device=self.device)
|
||||
callbacks.default_callbacks = callbacks_backup # restore callbacks
|
||||
if RANK > -1 and world_size > 1: # DDP
|
||||
dist.broadcast(self.amp, src=0) # broadcast the tensor from rank 0 to all other ranks (returns None)
|
||||
self.amp = bool(self.amp) # as boolean
|
||||
self.scaler = torch.cuda.amp.GradScaler(enabled=self.amp)
|
||||
if world_size > 1:
|
||||
self.model = nn.parallel.DistributedDataParallel(self.model, device_ids=[RANK])
|
||||
|
||||
# Check imgsz
|
||||
gs = max(int(self.model.stride.max() if hasattr(self.model, "stride") else 32), 32) # grid size (max stride)
|
||||
self.args.imgsz = check_imgsz(self.args.imgsz, stride=gs, floor=gs, max_dim=1)
|
||||
self.stride = gs # for multiscale training
|
||||
|
||||
# Batch size
|
||||
if self.batch_size == -1 and RANK == -1: # single-GPU only, estimate best batch size
|
||||
self.args.batch = self.batch_size = check_train_batch_size(self.model, self.args.imgsz, self.amp)
|
||||
|
||||
# Dataloaders
|
||||
batch_size = self.batch_size // max(world_size, 1)
|
||||
self.train_loader = self.get_dataloader(self.trainset, batch_size=batch_size, rank=RANK, mode="train")
|
||||
if RANK in (-1, 0):
|
||||
# Note: When training DOTA dataset, double batch size could get OOM on images with >2000 objects.
|
||||
self.test_loader = self.get_dataloader(
|
||||
self.testset, batch_size=batch_size if self.args.task == "obb" else batch_size * 2, rank=-1, mode="val"
|
||||
)
|
||||
self.validator = self.get_validator()
|
||||
metric_keys = self.validator.metrics.keys + self.label_loss_items(prefix="val")
|
||||
self.metrics = dict(zip(metric_keys, [0] * len(metric_keys)))
|
||||
self.ema = ModelEMA(self.model)
|
||||
if self.args.plots:
|
||||
self.plot_training_labels()
|
||||
|
||||
# Optimizer
|
||||
self.accumulate = max(round(self.args.nbs / self.batch_size), 1) # accumulate loss before optimizing
|
||||
weight_decay = self.args.weight_decay * self.batch_size * self.accumulate / self.args.nbs # scale weight_decay
|
||||
iterations = math.ceil(len(self.train_loader.dataset) / max(self.batch_size, self.args.nbs)) * self.epochs
|
||||
self.optimizer = self.build_optimizer(
|
||||
model=self.model,
|
||||
name=self.args.optimizer,
|
||||
lr=self.args.lr0,
|
||||
momentum=self.args.momentum,
|
||||
decay=weight_decay,
|
||||
iterations=iterations,
|
||||
)
|
||||
# Scheduler
|
||||
self._setup_scheduler()
|
||||
self.stopper, self.stop = EarlyStopping(patience=self.args.patience), False
|
||||
self.resume_training(ckpt)
|
||||
self.scheduler.last_epoch = self.start_epoch - 1 # do not move
|
||||
self.run_callbacks("on_pretrain_routine_end")
|
||||
|
||||
def _do_train(self, world_size=1):
|
||||
"""Train completed, evaluate and plot if specified by arguments."""
|
||||
if world_size > 1:
|
||||
self._setup_ddp(world_size)
|
||||
self._setup_train(world_size)
|
||||
|
||||
nb = len(self.train_loader) # number of batches
|
||||
nw = max(round(self.args.warmup_epochs * nb), 100) if self.args.warmup_epochs > 0 else -1 # warmup iterations
|
||||
last_opt_step = -1
|
||||
self.epoch_time = None
|
||||
self.epoch_time_start = time.time()
|
||||
self.train_time_start = time.time()
|
||||
self.run_callbacks("on_train_start")
|
||||
LOGGER.info(
|
||||
f'Image sizes {self.args.imgsz} train, {self.args.imgsz} val\n'
|
||||
f'Using {self.train_loader.num_workers * (world_size or 1)} dataloader workers\n'
|
||||
f"Logging results to {colorstr('bold', self.save_dir)}\n"
|
||||
f'Starting training for ' + (f"{self.args.time} hours..." if self.args.time else f"{self.epochs} epochs...")
|
||||
)
|
||||
if self.args.close_mosaic:
|
||||
base_idx = (self.epochs - self.args.close_mosaic) * nb
|
||||
self.plot_idx.extend([base_idx, base_idx + 1, base_idx + 2])
|
||||
epoch = self.start_epoch
|
||||
while True:
|
||||
self.epoch = epoch
|
||||
self.run_callbacks("on_train_epoch_start")
|
||||
self.model.train()
|
||||
if RANK != -1:
|
||||
self.train_loader.sampler.set_epoch(epoch)
|
||||
pbar = enumerate(self.train_loader)
|
||||
# Update dataloader attributes (optional)
|
||||
if epoch == (self.epochs - self.args.close_mosaic):
|
||||
self._close_dataloader_mosaic()
|
||||
self.train_loader.reset()
|
||||
|
||||
if RANK in (-1, 0):
|
||||
LOGGER.info(self.progress_string())
|
||||
pbar = TQDM(enumerate(self.train_loader), total=nb)
|
||||
self.tloss = None
|
||||
self.optimizer.zero_grad()
|
||||
for i, batch in pbar:
|
||||
self.run_callbacks("on_train_batch_start")
|
||||
# Warmup
|
||||
ni = i + nb * epoch
|
||||
if ni <= nw:
|
||||
xi = [0, nw] # x interp
|
||||
self.accumulate = max(1, int(np.interp(ni, xi, [1, self.args.nbs / self.batch_size]).round()))
|
||||
for j, x in enumerate(self.optimizer.param_groups):
|
||||
# Bias lr falls from 0.1 to lr0, all other lrs rise from 0.0 to lr0
|
||||
x["lr"] = np.interp(
|
||||
ni, xi, [self.args.warmup_bias_lr if j == 0 else 0.0, x["initial_lr"] * self.lf(epoch)]
|
||||
)
|
||||
if "momentum" in x:
|
||||
x["momentum"] = np.interp(ni, xi, [self.args.warmup_momentum, self.args.momentum])
|
||||
|
||||
# Forward
|
||||
with torch.cuda.amp.autocast(self.amp):
|
||||
batch = self.preprocess_batch(batch)
|
||||
self.loss, self.loss_items = self.model(batch)
|
||||
if RANK != -1:
|
||||
self.loss *= world_size
|
||||
self.tloss = (
|
||||
(self.tloss * i + self.loss_items) / (i + 1) if self.tloss is not None else self.loss_items
|
||||
)
|
||||
|
||||
# Backward
|
||||
self.scaler.scale(self.loss).backward()
|
||||
|
||||
# Optimize - https://pytorch.org/docs/master/notes/amp_examples.html
|
||||
if ni - last_opt_step >= self.accumulate:
|
||||
self.optimizer_step()
|
||||
last_opt_step = ni
|
||||
|
||||
# Timed stopping
|
||||
if self.args.time:
|
||||
self.stop = (time.time() - self.train_time_start) > (self.args.time * 3600)
|
||||
if RANK != -1: # if DDP training
|
||||
broadcast_list = [self.stop if RANK == 0 else None]
|
||||
dist.broadcast_object_list(broadcast_list, 0) # broadcast 'stop' to all ranks
|
||||
self.stop = broadcast_list[0]
|
||||
if self.stop: # training time exceeded
|
||||
break
|
||||
|
||||
# Log
|
||||
mem = f"{torch.cuda.memory_reserved() / 1E9 if torch.cuda.is_available() else 0:.3g}G" # (GB)
|
||||
loss_len = self.tloss.shape[0] if len(self.tloss.shape) else 1
|
||||
losses = self.tloss if loss_len > 1 else torch.unsqueeze(self.tloss, 0)
|
||||
if RANK in (-1, 0):
|
||||
pbar.set_description(
|
||||
("%11s" * 2 + "%11.4g" * (2 + loss_len))
|
||||
% (f"{epoch + 1}/{self.epochs}", mem, *losses, batch["cls"].shape[0], batch["img"].shape[-1])
|
||||
)
|
||||
self.run_callbacks("on_batch_end")
|
||||
if self.args.plots and ni in self.plot_idx:
|
||||
self.plot_training_samples(batch, ni)
|
||||
|
||||
self.run_callbacks("on_train_batch_end")
|
||||
|
||||
self.lr = {f"lr/pg{ir}": x["lr"] for ir, x in enumerate(self.optimizer.param_groups)} # for loggers
|
||||
self.run_callbacks("on_train_epoch_end")
|
||||
if RANK in (-1, 0):
|
||||
final_epoch = epoch + 1 == self.epochs
|
||||
self.ema.update_attr(self.model, include=["yaml", "nc", "args", "names", "stride", "class_weights"])
|
||||
|
||||
# Validation
|
||||
if (self.args.val and (((epoch+1) % self.args.val_period == 0) or (self.epochs - epoch) <= 10)) \
|
||||
or final_epoch or self.stopper.possible_stop or self.stop:
|
||||
self.metrics, self.fitness = self.validate()
|
||||
self.save_metrics(metrics={**self.label_loss_items(self.tloss), **self.metrics, **self.lr})
|
||||
self.stop |= self.stopper(epoch + 1, self.fitness) or final_epoch
|
||||
if self.args.time:
|
||||
self.stop |= (time.time() - self.train_time_start) > (self.args.time * 3600)
|
||||
|
||||
# Save model
|
||||
if self.args.save or final_epoch:
|
||||
self.save_model()
|
||||
self.run_callbacks("on_model_save")
|
||||
|
||||
# Scheduler
|
||||
t = time.time()
|
||||
self.epoch_time = t - self.epoch_time_start
|
||||
self.epoch_time_start = t
|
||||
with warnings.catch_warnings():
|
||||
warnings.simplefilter("ignore") # suppress 'Detected lr_scheduler.step() before optimizer.step()'
|
||||
if self.args.time:
|
||||
mean_epoch_time = (t - self.train_time_start) / (epoch - self.start_epoch + 1)
|
||||
self.epochs = self.args.epochs = math.ceil(self.args.time * 3600 / mean_epoch_time)
|
||||
self._setup_scheduler()
|
||||
self.scheduler.last_epoch = self.epoch # do not move
|
||||
self.stop |= epoch >= self.epochs # stop if exceeded epochs
|
||||
self.scheduler.step()
|
||||
self.run_callbacks("on_fit_epoch_end")
|
||||
torch.cuda.empty_cache() # clear GPU memory at end of epoch, may help reduce CUDA out of memory errors
|
||||
|
||||
# Early Stopping
|
||||
if RANK != -1: # if DDP training
|
||||
broadcast_list = [self.stop if RANK == 0 else None]
|
||||
dist.broadcast_object_list(broadcast_list, 0) # broadcast 'stop' to all ranks
|
||||
self.stop = broadcast_list[0]
|
||||
if self.stop:
|
||||
break # must break all DDP ranks
|
||||
epoch += 1
|
||||
|
||||
if RANK in (-1, 0):
|
||||
# Do final val with best.pt
|
||||
LOGGER.info(
|
||||
f"\n{epoch - self.start_epoch + 1} epochs completed in "
|
||||
f"{(time.time() - self.train_time_start) / 3600:.3f} hours."
|
||||
)
|
||||
self.final_eval()
|
||||
if self.args.plots:
|
||||
self.plot_metrics()
|
||||
self.run_callbacks("on_train_end")
|
||||
torch.cuda.empty_cache()
|
||||
self.run_callbacks("teardown")
|
||||
|
||||
def save_model(self):
|
||||
"""Save model training checkpoints with additional metadata."""
|
||||
import pandas as pd # scope for faster startup
|
||||
|
||||
metrics = {**self.metrics, **{"fitness": self.fitness}}
|
||||
results = {k.strip(): v for k, v in pd.read_csv(self.csv).to_dict(orient="list").items()}
|
||||
ckpt = {
|
||||
"epoch": self.epoch,
|
||||
"best_fitness": self.best_fitness,
|
||||
"model": deepcopy(de_parallel(self.model)).half(),
|
||||
"ema": deepcopy(self.ema.ema).half(),
|
||||
"updates": self.ema.updates,
|
||||
"optimizer": self.optimizer.state_dict(),
|
||||
"train_args": vars(self.args), # save as dict
|
||||
"train_metrics": metrics,
|
||||
"train_results": results,
|
||||
"date": datetime.now().isoformat(),
|
||||
"version": __version__,
|
||||
"license": "AGPL-3.0 (https://ultralytics.com/license)",
|
||||
"docs": "https://docs.ultralytics.com",
|
||||
}
|
||||
|
||||
# Save last and best
|
||||
torch.save(ckpt, self.last)
|
||||
if self.best_fitness == self.fitness:
|
||||
torch.save(ckpt, self.best)
|
||||
if (self.save_period > 0) and (self.epoch > 0) and (self.epoch % self.save_period == 0):
|
||||
torch.save(ckpt, self.wdir / f"epoch{self.epoch}.pt")
|
||||
|
||||
@staticmethod
|
||||
def get_dataset(data):
|
||||
"""
|
||||
Get train, val path from data dict if it exists.
|
||||
|
||||
Returns None if data format is not recognized.
|
||||
"""
|
||||
return data["train"], data.get("val") or data.get("test")
|
||||
|
||||
def setup_model(self):
|
||||
"""Load/create/download model for any task."""
|
||||
if isinstance(self.model, torch.nn.Module): # if model is loaded beforehand. No setup needed
|
||||
return
|
||||
|
||||
model, weights = self.model, None
|
||||
ckpt = None
|
||||
if str(model).endswith(".pt"):
|
||||
weights, ckpt = attempt_load_one_weight(model)
|
||||
cfg = ckpt["model"].yaml
|
||||
else:
|
||||
cfg = model
|
||||
self.model = self.get_model(cfg=cfg, weights=weights, verbose=RANK == -1) # calls Model(cfg, weights)
|
||||
return ckpt
|
||||
|
||||
def optimizer_step(self):
|
||||
"""Perform a single step of the training optimizer with gradient clipping and EMA update."""
|
||||
self.scaler.unscale_(self.optimizer) # unscale gradients
|
||||
torch.nn.utils.clip_grad_norm_(self.model.parameters(), max_norm=10.0) # clip gradients
|
||||
self.scaler.step(self.optimizer)
|
||||
self.scaler.update()
|
||||
self.optimizer.zero_grad()
|
||||
if self.ema:
|
||||
self.ema.update(self.model)
|
||||
|
||||
def preprocess_batch(self, batch):
|
||||
"""Allows custom preprocessing model inputs and ground truths depending on task type."""
|
||||
return batch
|
||||
|
||||
def validate(self):
|
||||
"""
|
||||
Runs validation on test set using self.validator.
|
||||
|
||||
The returned dict is expected to contain "fitness" key.
|
||||
"""
|
||||
metrics = self.validator(self)
|
||||
fitness = metrics.pop("fitness", -self.loss.detach().cpu().numpy()) # use loss as fitness measure if not found
|
||||
if not self.best_fitness or self.best_fitness < fitness:
|
||||
self.best_fitness = fitness
|
||||
return metrics, fitness
|
||||
|
||||
def get_model(self, cfg=None, weights=None, verbose=True):
|
||||
"""Get model and raise NotImplementedError for loading cfg files."""
|
||||
raise NotImplementedError("This task trainer doesn't support loading cfg files")
|
||||
|
||||
def get_validator(self):
|
||||
"""Returns a NotImplementedError when the get_validator function is called."""
|
||||
raise NotImplementedError("get_validator function not implemented in trainer")
|
||||
|
||||
def get_dataloader(self, dataset_path, batch_size=16, rank=0, mode="train"):
|
||||
"""Returns dataloader derived from torch.data.Dataloader."""
|
||||
raise NotImplementedError("get_dataloader function not implemented in trainer")
|
||||
|
||||
def build_dataset(self, img_path, mode="train", batch=None):
|
||||
"""Build dataset."""
|
||||
raise NotImplementedError("build_dataset function not implemented in trainer")
|
||||
|
||||
def label_loss_items(self, loss_items=None, prefix="train"):
|
||||
"""
|
||||
Returns a loss dict with labelled training loss items tensor.
|
||||
|
||||
Note:
|
||||
This is not needed for classification but necessary for segmentation & detection
|
||||
"""
|
||||
return {"loss": loss_items} if loss_items is not None else ["loss"]
|
||||
|
||||
def set_model_attributes(self):
|
||||
"""To set or update model parameters before training."""
|
||||
self.model.names = self.data["names"]
|
||||
|
||||
def build_targets(self, preds, targets):
|
||||
"""Builds target tensors for training YOLO model."""
|
||||
pass
|
||||
|
||||
def progress_string(self):
|
||||
"""Returns a string describing training progress."""
|
||||
return ""
|
||||
|
||||
# TODO: may need to put these following functions into callback
|
||||
def plot_training_samples(self, batch, ni):
|
||||
"""Plots training samples during YOLO training."""
|
||||
pass
|
||||
|
||||
def plot_training_labels(self):
|
||||
"""Plots training labels for YOLO model."""
|
||||
pass
|
||||
|
||||
def save_metrics(self, metrics):
|
||||
"""Saves training metrics to a CSV file."""
|
||||
keys, vals = list(metrics.keys()), list(metrics.values())
|
||||
n = len(metrics) + 1 # number of cols
|
||||
s = "" if self.csv.exists() else (("%23s," * n % tuple(["epoch"] + keys)).rstrip(",") + "\n") # header
|
||||
with open(self.csv, "a") as f:
|
||||
f.write(s + ("%23.5g," * n % tuple([self.epoch + 1] + vals)).rstrip(",") + "\n")
|
||||
|
||||
def plot_metrics(self):
|
||||
"""Plot and display metrics visually."""
|
||||
pass
|
||||
|
||||
def on_plot(self, name, data=None):
|
||||
"""Registers plots (e.g. to be consumed in callbacks)"""
|
||||
path = Path(name)
|
||||
self.plots[path] = {"data": data, "timestamp": time.time()}
|
||||
|
||||
def final_eval(self):
|
||||
"""Performs final evaluation and validation for object detection YOLO model."""
|
||||
for f in self.last, self.best:
|
||||
if f.exists():
|
||||
strip_optimizer(f) # strip optimizers
|
||||
if f is self.best:
|
||||
LOGGER.info(f"\nValidating {f}...")
|
||||
self.validator.args.plots = self.args.plots
|
||||
self.metrics = self.validator(model=f)
|
||||
self.metrics.pop("fitness", None)
|
||||
self.run_callbacks("on_fit_epoch_end")
|
||||
|
||||
def check_resume(self, overrides):
|
||||
"""Check if resume checkpoint exists and update arguments accordingly."""
|
||||
resume = self.args.resume
|
||||
if resume:
|
||||
try:
|
||||
exists = isinstance(resume, (str, Path)) and Path(resume).exists()
|
||||
last = Path(check_file(resume) if exists else get_latest_run())
|
||||
|
||||
# Check that resume data YAML exists, otherwise strip to force re-download of dataset
|
||||
ckpt_args = attempt_load_weights(last).args
|
||||
if not Path(ckpt_args["data"]).exists():
|
||||
ckpt_args["data"] = self.args.data
|
||||
|
||||
resume = True
|
||||
self.args = get_cfg(ckpt_args)
|
||||
self.args.model = self.args.resume = str(last) # reinstate model
|
||||
for k in "imgsz", "batch", "device": # allow arg updates to reduce memory or update device on resume
|
||||
if k in overrides:
|
||||
setattr(self.args, k, overrides[k])
|
||||
|
||||
except Exception as e:
|
||||
raise FileNotFoundError(
|
||||
"Resume checkpoint not found. Please pass a valid checkpoint to resume from, "
|
||||
"i.e. 'yolo train resume model=path/to/last.pt'"
|
||||
) from e
|
||||
self.resume = resume
|
||||
|
||||
def resume_training(self, ckpt):
|
||||
"""Resume YOLO training from given epoch and best fitness."""
|
||||
if ckpt is None or not self.resume:
|
||||
return
|
||||
best_fitness = 0.0
|
||||
start_epoch = ckpt["epoch"] + 1
|
||||
if ckpt["optimizer"] is not None:
|
||||
self.optimizer.load_state_dict(ckpt["optimizer"]) # optimizer
|
||||
best_fitness = ckpt["best_fitness"]
|
||||
if self.ema and ckpt.get("ema"):
|
||||
self.ema.ema.load_state_dict(ckpt["ema"].float().state_dict()) # EMA
|
||||
self.ema.updates = ckpt["updates"]
|
||||
assert start_epoch > 0, (
|
||||
f"{self.args.model} training to {self.epochs} epochs is finished, nothing to resume.\n"
|
||||
f"Start a new training without resuming, i.e. 'yolo train model={self.args.model}'"
|
||||
)
|
||||
LOGGER.info(f"Resuming training {self.args.model} from epoch {start_epoch + 1} to {self.epochs} total epochs")
|
||||
if self.epochs < start_epoch:
|
||||
LOGGER.info(
|
||||
f"{self.model} has been trained for {ckpt['epoch']} epochs. Fine-tuning for {self.epochs} more epochs."
|
||||
)
|
||||
self.epochs += ckpt["epoch"] # finetune additional epochs
|
||||
self.best_fitness = best_fitness
|
||||
self.start_epoch = start_epoch
|
||||
if start_epoch > (self.epochs - self.args.close_mosaic):
|
||||
self._close_dataloader_mosaic()
|
||||
|
||||
def _close_dataloader_mosaic(self):
|
||||
"""Update dataloaders to stop using mosaic augmentation."""
|
||||
if hasattr(self.train_loader.dataset, "mosaic"):
|
||||
self.train_loader.dataset.mosaic = False
|
||||
if hasattr(self.train_loader.dataset, "close_mosaic"):
|
||||
LOGGER.info("Closing dataloader mosaic")
|
||||
self.train_loader.dataset.close_mosaic(hyp=self.args)
|
||||
|
||||
def build_optimizer(self, model, name="auto", lr=0.001, momentum=0.9, decay=1e-5, iterations=1e5):
|
||||
"""
|
||||
Constructs an optimizer for the given model, based on the specified optimizer name, learning rate, momentum,
|
||||
weight decay, and number of iterations.
|
||||
|
||||
Args:
|
||||
model (torch.nn.Module): The model for which to build an optimizer.
|
||||
name (str, optional): The name of the optimizer to use. If 'auto', the optimizer is selected
|
||||
based on the number of iterations. Default: 'auto'.
|
||||
lr (float, optional): The learning rate for the optimizer. Default: 0.001.
|
||||
momentum (float, optional): The momentum factor for the optimizer. Default: 0.9.
|
||||
decay (float, optional): The weight decay for the optimizer. Default: 1e-5.
|
||||
iterations (float, optional): The number of iterations, which determines the optimizer if
|
||||
name is 'auto'. Default: 1e5.
|
||||
|
||||
Returns:
|
||||
(torch.optim.Optimizer): The constructed optimizer.
|
||||
"""
|
||||
|
||||
g = [], [], [] # optimizer parameter groups
|
||||
bn = tuple(v for k, v in nn.__dict__.items() if "Norm" in k) # normalization layers, i.e. BatchNorm2d()
|
||||
if name == "auto":
|
||||
LOGGER.info(
|
||||
f"{colorstr('optimizer:')} 'optimizer=auto' found, "
|
||||
f"ignoring 'lr0={self.args.lr0}' and 'momentum={self.args.momentum}' and "
|
||||
f"determining best 'optimizer', 'lr0' and 'momentum' automatically... "
|
||||
)
|
||||
nc = getattr(model, "nc", 10) # number of classes
|
||||
lr_fit = round(0.002 * 5 / (4 + nc), 6) # lr0 fit equation to 6 decimal places
|
||||
name, lr, momentum = ("SGD", 0.01, 0.9) if iterations > 10000 else ("AdamW", lr_fit, 0.9)
|
||||
self.args.warmup_bias_lr = 0.0 # no higher than 0.01 for Adam
|
||||
|
||||
for module_name, module in model.named_modules():
|
||||
for param_name, param in module.named_parameters(recurse=False):
|
||||
fullname = f"{module_name}.{param_name}" if module_name else param_name
|
||||
if "bias" in fullname: # bias (no decay)
|
||||
g[2].append(param)
|
||||
elif isinstance(module, bn): # weight (no decay)
|
||||
g[1].append(param)
|
||||
else: # weight (with decay)
|
||||
g[0].append(param)
|
||||
|
||||
if name in ("Adam", "Adamax", "AdamW", "NAdam", "RAdam"):
|
||||
optimizer = getattr(optim, name, optim.Adam)(g[2], lr=lr, betas=(momentum, 0.999), weight_decay=0.0)
|
||||
elif name == "RMSProp":
|
||||
optimizer = optim.RMSprop(g[2], lr=lr, momentum=momentum)
|
||||
elif name == "SGD":
|
||||
optimizer = optim.SGD(g[2], lr=lr, momentum=momentum, nesterov=True)
|
||||
else:
|
||||
raise NotImplementedError(
|
||||
f"Optimizer '{name}' not found in list of available optimizers "
|
||||
f"[Adam, AdamW, NAdam, RAdam, RMSProp, SGD, auto]."
|
||||
"To request support for addition optimizers please visit https://github.com/ultralytics/ultralytics."
|
||||
)
|
||||
|
||||
optimizer.add_param_group({"params": g[0], "weight_decay": decay}) # add g0 with weight_decay
|
||||
optimizer.add_param_group({"params": g[1], "weight_decay": 0.0}) # add g1 (BatchNorm2d weights)
|
||||
LOGGER.info(
|
||||
f"{colorstr('optimizer:')} {type(optimizer).__name__}(lr={lr}, momentum={momentum}) with parameter groups "
|
||||
f'{len(g[1])} weight(decay=0.0), {len(g[0])} weight(decay={decay}), {len(g[2])} bias(decay=0.0)'
|
||||
)
|
||||
return optimizer
|
@ -1,242 +0,0 @@
|
||||
# Ultralytics YOLO 🚀, AGPL-3.0 license
|
||||
"""
|
||||
This module provides functionalities for hyperparameter tuning of the Ultralytics YOLO models for object detection,
|
||||
instance segmentation, image classification, pose estimation, and multi-object tracking.
|
||||
|
||||
Hyperparameter tuning is the process of systematically searching for the optimal set of hyperparameters
|
||||
that yield the best model performance. This is particularly crucial in deep learning models like YOLO,
|
||||
where small changes in hyperparameters can lead to significant differences in model accuracy and efficiency.
|
||||
|
||||
Example:
|
||||
Tune hyperparameters for YOLOv8n on COCO8 at imgsz=640 and epochs=30 for 300 tuning iterations.
|
||||
```python
|
||||
from ultralytics import YOLO
|
||||
|
||||
model = YOLO('yolov8n.pt')
|
||||
model.tune(data='coco8.yaml', epochs=10, iterations=300, optimizer='AdamW', plots=False, save=False, val=False)
|
||||
```
|
||||
"""
|
||||
|
||||
import random
|
||||
import shutil
|
||||
import subprocess
|
||||
import time
|
||||
|
||||
import numpy as np
|
||||
import torch
|
||||
|
||||
from ultralytics.cfg import get_cfg, get_save_dir
|
||||
from ultralytics.utils import DEFAULT_CFG, LOGGER, callbacks, colorstr, remove_colorstr, yaml_print, yaml_save
|
||||
from ultralytics.utils.plotting import plot_tune_results
|
||||
|
||||
|
||||
class Tuner:
|
||||
"""
|
||||
Class responsible for hyperparameter tuning of YOLO models.
|
||||
|
||||
The class evolves YOLO model hyperparameters over a given number of iterations
|
||||
by mutating them according to the search space and retraining the model to evaluate their performance.
|
||||
|
||||
Attributes:
|
||||
space (dict): Hyperparameter search space containing bounds and scaling factors for mutation.
|
||||
tune_dir (Path): Directory where evolution logs and results will be saved.
|
||||
tune_csv (Path): Path to the CSV file where evolution logs are saved.
|
||||
|
||||
Methods:
|
||||
_mutate(hyp: dict) -> dict:
|
||||
Mutates the given hyperparameters within the bounds specified in `self.space`.
|
||||
|
||||
__call__():
|
||||
Executes the hyperparameter evolution across multiple iterations.
|
||||
|
||||
Example:
|
||||
Tune hyperparameters for YOLOv8n on COCO8 at imgsz=640 and epochs=30 for 300 tuning iterations.
|
||||
```python
|
||||
from ultralytics import YOLO
|
||||
|
||||
model = YOLO('yolov8n.pt')
|
||||
model.tune(data='coco8.yaml', epochs=10, iterations=300, optimizer='AdamW', plots=False, save=False, val=False)
|
||||
```
|
||||
|
||||
Tune with custom search space.
|
||||
```python
|
||||
from ultralytics import YOLO
|
||||
|
||||
model = YOLO('yolov8n.pt')
|
||||
model.tune(space={key1: val1, key2: val2}) # custom search space dictionary
|
||||
```
|
||||
"""
|
||||
|
||||
def __init__(self, args=DEFAULT_CFG, _callbacks=None):
|
||||
"""
|
||||
Initialize the Tuner with configurations.
|
||||
|
||||
Args:
|
||||
args (dict, optional): Configuration for hyperparameter evolution.
|
||||
"""
|
||||
self.space = args.pop("space", None) or { # key: (min, max, gain(optional))
|
||||
# 'optimizer': tune.choice(['SGD', 'Adam', 'AdamW', 'NAdam', 'RAdam', 'RMSProp']),
|
||||
"lr0": (1e-5, 1e-1), # initial learning rate (i.e. SGD=1E-2, Adam=1E-3)
|
||||
"lrf": (0.0001, 0.1), # final OneCycleLR learning rate (lr0 * lrf)
|
||||
"momentum": (0.7, 0.98, 0.3), # SGD momentum/Adam beta1
|
||||
"weight_decay": (0.0, 0.001), # optimizer weight decay 5e-4
|
||||
"warmup_epochs": (0.0, 5.0), # warmup epochs (fractions ok)
|
||||
"warmup_momentum": (0.0, 0.95), # warmup initial momentum
|
||||
"box": (1.0, 20.0), # box loss gain
|
||||
"cls": (0.2, 4.0), # cls loss gain (scale with pixels)
|
||||
"dfl": (0.4, 6.0), # dfl loss gain
|
||||
"hsv_h": (0.0, 0.1), # image HSV-Hue augmentation (fraction)
|
||||
"hsv_s": (0.0, 0.9), # image HSV-Saturation augmentation (fraction)
|
||||
"hsv_v": (0.0, 0.9), # image HSV-Value augmentation (fraction)
|
||||
"degrees": (0.0, 45.0), # image rotation (+/- deg)
|
||||
"translate": (0.0, 0.9), # image translation (+/- fraction)
|
||||
"scale": (0.0, 0.95), # image scale (+/- gain)
|
||||
"shear": (0.0, 10.0), # image shear (+/- deg)
|
||||
"perspective": (0.0, 0.001), # image perspective (+/- fraction), range 0-0.001
|
||||
"flipud": (0.0, 1.0), # image flip up-down (probability)
|
||||
"fliplr": (0.0, 1.0), # image flip left-right (probability)
|
||||
"bgr": (0.0, 1.0), # image channel bgr (probability)
|
||||
"mosaic": (0.0, 1.0), # image mixup (probability)
|
||||
"mixup": (0.0, 1.0), # image mixup (probability)
|
||||
"copy_paste": (0.0, 1.0), # segment copy-paste (probability)
|
||||
}
|
||||
self.args = get_cfg(overrides=args)
|
||||
self.tune_dir = get_save_dir(self.args, name="tune")
|
||||
self.tune_csv = self.tune_dir / "tune_results.csv"
|
||||
self.callbacks = _callbacks or callbacks.get_default_callbacks()
|
||||
self.prefix = colorstr("Tuner: ")
|
||||
callbacks.add_integration_callbacks(self)
|
||||
LOGGER.info(
|
||||
f"{self.prefix}Initialized Tuner instance with 'tune_dir={self.tune_dir}'\n"
|
||||
f"{self.prefix}💡 Learn about tuning at https://docs.ultralytics.com/guides/hyperparameter-tuning"
|
||||
)
|
||||
|
||||
def _mutate(self, parent="single", n=5, mutation=0.8, sigma=0.2):
|
||||
"""
|
||||
Mutates the hyperparameters based on bounds and scaling factors specified in `self.space`.
|
||||
|
||||
Args:
|
||||
parent (str): Parent selection method: 'single' or 'weighted'.
|
||||
n (int): Number of parents to consider.
|
||||
mutation (float): Probability of a parameter mutation in any given iteration.
|
||||
sigma (float): Standard deviation for Gaussian random number generator.
|
||||
|
||||
Returns:
|
||||
(dict): A dictionary containing mutated hyperparameters.
|
||||
"""
|
||||
if self.tune_csv.exists(): # if CSV file exists: select best hyps and mutate
|
||||
# Select parent(s)
|
||||
x = np.loadtxt(self.tune_csv, ndmin=2, delimiter=",", skiprows=1)
|
||||
fitness = x[:, 0] # first column
|
||||
n = min(n, len(x)) # number of previous results to consider
|
||||
x = x[np.argsort(-fitness)][:n] # top n mutations
|
||||
w = x[:, 0] - x[:, 0].min() + 1e-6 # weights (sum > 0)
|
||||
if parent == "single" or len(x) == 1:
|
||||
# x = x[random.randint(0, n - 1)] # random selection
|
||||
x = x[random.choices(range(n), weights=w)[0]] # weighted selection
|
||||
elif parent == "weighted":
|
||||
x = (x * w.reshape(n, 1)).sum(0) / w.sum() # weighted combination
|
||||
|
||||
# Mutate
|
||||
r = np.random # method
|
||||
r.seed(int(time.time()))
|
||||
g = np.array([v[2] if len(v) == 3 else 1.0 for k, v in self.space.items()]) # gains 0-1
|
||||
ng = len(self.space)
|
||||
v = np.ones(ng)
|
||||
while all(v == 1): # mutate until a change occurs (prevent duplicates)
|
||||
v = (g * (r.random(ng) < mutation) * r.randn(ng) * r.random() * sigma + 1).clip(0.3, 3.0)
|
||||
hyp = {k: float(x[i + 1] * v[i]) for i, k in enumerate(self.space.keys())}
|
||||
else:
|
||||
hyp = {k: getattr(self.args, k) for k in self.space.keys()}
|
||||
|
||||
# Constrain to limits
|
||||
for k, v in self.space.items():
|
||||
hyp[k] = max(hyp[k], v[0]) # lower limit
|
||||
hyp[k] = min(hyp[k], v[1]) # upper limit
|
||||
hyp[k] = round(hyp[k], 5) # significant digits
|
||||
|
||||
return hyp
|
||||
|
||||
def __call__(self, model=None, iterations=10, cleanup=True):
|
||||
"""
|
||||
Executes the hyperparameter evolution process when the Tuner instance is called.
|
||||
|
||||
This method iterates through the number of iterations, performing the following steps in each iteration:
|
||||
1. Load the existing hyperparameters or initialize new ones.
|
||||
2. Mutate the hyperparameters using the `mutate` method.
|
||||
3. Train a YOLO model with the mutated hyperparameters.
|
||||
4. Log the fitness score and mutated hyperparameters to a CSV file.
|
||||
|
||||
Args:
|
||||
model (Model): A pre-initialized YOLO model to be used for training.
|
||||
iterations (int): The number of generations to run the evolution for.
|
||||
cleanup (bool): Whether to delete iteration weights to reduce storage space used during tuning.
|
||||
|
||||
Note:
|
||||
The method utilizes the `self.tune_csv` Path object to read and log hyperparameters and fitness scores.
|
||||
Ensure this path is set correctly in the Tuner instance.
|
||||
"""
|
||||
|
||||
t0 = time.time()
|
||||
best_save_dir, best_metrics = None, None
|
||||
(self.tune_dir / "weights").mkdir(parents=True, exist_ok=True)
|
||||
for i in range(iterations):
|
||||
# Mutate hyperparameters
|
||||
mutated_hyp = self._mutate()
|
||||
LOGGER.info(f"{self.prefix}Starting iteration {i + 1}/{iterations} with hyperparameters: {mutated_hyp}")
|
||||
|
||||
metrics = {}
|
||||
train_args = {**vars(self.args), **mutated_hyp}
|
||||
save_dir = get_save_dir(get_cfg(train_args))
|
||||
weights_dir = save_dir / "weights"
|
||||
try:
|
||||
# Train YOLO model with mutated hyperparameters (run in subprocess to avoid dataloader hang)
|
||||
cmd = ["yolo", "train", *(f"{k}={v}" for k, v in train_args.items())]
|
||||
return_code = subprocess.run(cmd, check=True).returncode
|
||||
ckpt_file = weights_dir / ("best.pt" if (weights_dir / "best.pt").exists() else "last.pt")
|
||||
metrics = torch.load(ckpt_file)["train_metrics"]
|
||||
assert return_code == 0, "training failed"
|
||||
|
||||
except Exception as e:
|
||||
LOGGER.warning(f"WARNING ❌️ training failure for hyperparameter tuning iteration {i + 1}\n{e}")
|
||||
|
||||
# Save results and mutated_hyp to CSV
|
||||
fitness = metrics.get("fitness", 0.0)
|
||||
log_row = [round(fitness, 5)] + [mutated_hyp[k] for k in self.space.keys()]
|
||||
headers = "" if self.tune_csv.exists() else (",".join(["fitness"] + list(self.space.keys())) + "\n")
|
||||
with open(self.tune_csv, "a") as f:
|
||||
f.write(headers + ",".join(map(str, log_row)) + "\n")
|
||||
|
||||
# Get best results
|
||||
x = np.loadtxt(self.tune_csv, ndmin=2, delimiter=",", skiprows=1)
|
||||
fitness = x[:, 0] # first column
|
||||
best_idx = fitness.argmax()
|
||||
best_is_current = best_idx == i
|
||||
if best_is_current:
|
||||
best_save_dir = save_dir
|
||||
best_metrics = {k: round(v, 5) for k, v in metrics.items()}
|
||||
for ckpt in weights_dir.glob("*.pt"):
|
||||
shutil.copy2(ckpt, self.tune_dir / "weights")
|
||||
elif cleanup:
|
||||
shutil.rmtree(ckpt_file.parent) # remove iteration weights/ dir to reduce storage space
|
||||
|
||||
# Plot tune results
|
||||
plot_tune_results(self.tune_csv)
|
||||
|
||||
# Save and print tune results
|
||||
header = (
|
||||
f'{self.prefix}{i + 1}/{iterations} iterations complete ✅ ({time.time() - t0:.2f}s)\n'
|
||||
f'{self.prefix}Results saved to {colorstr("bold", self.tune_dir)}\n'
|
||||
f'{self.prefix}Best fitness={fitness[best_idx]} observed at iteration {best_idx + 1}\n'
|
||||
f'{self.prefix}Best fitness metrics are {best_metrics}\n'
|
||||
f'{self.prefix}Best fitness model is {best_save_dir}\n'
|
||||
f'{self.prefix}Best fitness hyperparameters are printed below.\n'
|
||||
)
|
||||
LOGGER.info("\n" + header)
|
||||
data = {k: float(x[best_idx, i + 1]) for i, k in enumerate(self.space.keys())}
|
||||
yaml_save(
|
||||
self.tune_dir / "best_hyperparameters.yaml",
|
||||
data=data,
|
||||
header=remove_colorstr(header.replace(self.prefix, "# ")) + "\n",
|
||||
)
|
||||
yaml_print(self.tune_dir / "best_hyperparameters.yaml")
|
@ -1,345 +0,0 @@
|
||||
# Ultralytics YOLO 🚀, AGPL-3.0 license
|
||||
"""
|
||||
Check a model's accuracy on a test or val split of a dataset.
|
||||
|
||||
Usage:
|
||||
$ yolo mode=val model=yolov8n.pt data=coco128.yaml imgsz=640
|
||||
|
||||
Usage - formats:
|
||||
$ yolo mode=val model=yolov8n.pt # PyTorch
|
||||
yolov8n.torchscript # TorchScript
|
||||
yolov8n.onnx # ONNX Runtime or OpenCV DNN with dnn=True
|
||||
yolov8n_openvino_model # OpenVINO
|
||||
yolov8n.engine # TensorRT
|
||||
yolov8n.mlpackage # CoreML (macOS-only)
|
||||
yolov8n_saved_model # TensorFlow SavedModel
|
||||
yolov8n.pb # TensorFlow GraphDef
|
||||
yolov8n.tflite # TensorFlow Lite
|
||||
yolov8n_edgetpu.tflite # TensorFlow Edge TPU
|
||||
yolov8n_paddle_model # PaddlePaddle
|
||||
yolov8n_ncnn_model # NCNN
|
||||
"""
|
||||
|
||||
import json
|
||||
import time
|
||||
from pathlib import Path
|
||||
|
||||
import numpy as np
|
||||
import torch
|
||||
|
||||
from ultralytics.cfg import get_cfg, get_save_dir
|
||||
from ultralytics.data.utils import check_cls_dataset, check_det_dataset
|
||||
from ultralytics.nn.autobackend import AutoBackend
|
||||
from ultralytics.utils import LOGGER, TQDM, callbacks, colorstr, emojis
|
||||
from ultralytics.utils.checks import check_imgsz
|
||||
from ultralytics.utils.ops import Profile
|
||||
from ultralytics.utils.torch_utils import de_parallel, select_device, smart_inference_mode
|
||||
|
||||
|
||||
class BaseValidator:
|
||||
"""
|
||||
BaseValidator.
|
||||
|
||||
A base class for creating validators.
|
||||
|
||||
Attributes:
|
||||
args (SimpleNamespace): Configuration for the validator.
|
||||
dataloader (DataLoader): Dataloader to use for validation.
|
||||
pbar (tqdm): Progress bar to update during validation.
|
||||
model (nn.Module): Model to validate.
|
||||
data (dict): Data dictionary.
|
||||
device (torch.device): Device to use for validation.
|
||||
batch_i (int): Current batch index.
|
||||
training (bool): Whether the model is in training mode.
|
||||
names (dict): Class names.
|
||||
seen: Records the number of images seen so far during validation.
|
||||
stats: Placeholder for statistics during validation.
|
||||
confusion_matrix: Placeholder for a confusion matrix.
|
||||
nc: Number of classes.
|
||||
iouv: (torch.Tensor): IoU thresholds from 0.50 to 0.95 in spaces of 0.05.
|
||||
jdict (dict): Dictionary to store JSON validation results.
|
||||
speed (dict): Dictionary with keys 'preprocess', 'inference', 'loss', 'postprocess' and their respective
|
||||
batch processing times in milliseconds.
|
||||
save_dir (Path): Directory to save results.
|
||||
plots (dict): Dictionary to store plots for visualization.
|
||||
callbacks (dict): Dictionary to store various callback functions.
|
||||
"""
|
||||
|
||||
def __init__(self, dataloader=None, save_dir=None, pbar=None, args=None, _callbacks=None):
|
||||
"""
|
||||
Initializes a BaseValidator instance.
|
||||
|
||||
Args:
|
||||
dataloader (torch.utils.data.DataLoader): Dataloader to be used for validation.
|
||||
save_dir (Path, optional): Directory to save results.
|
||||
pbar (tqdm.tqdm): Progress bar for displaying progress.
|
||||
args (SimpleNamespace): Configuration for the validator.
|
||||
_callbacks (dict): Dictionary to store various callback functions.
|
||||
"""
|
||||
self.args = get_cfg(overrides=args)
|
||||
self.dataloader = dataloader
|
||||
self.pbar = pbar
|
||||
self.stride = None
|
||||
self.data = None
|
||||
self.device = None
|
||||
self.batch_i = None
|
||||
self.training = True
|
||||
self.names = None
|
||||
self.seen = None
|
||||
self.stats = None
|
||||
self.confusion_matrix = None
|
||||
self.nc = None
|
||||
self.iouv = None
|
||||
self.jdict = None
|
||||
self.speed = {"preprocess": 0.0, "inference": 0.0, "loss": 0.0, "postprocess": 0.0}
|
||||
|
||||
self.save_dir = save_dir or get_save_dir(self.args)
|
||||
(self.save_dir / "labels" if self.args.save_txt else self.save_dir).mkdir(parents=True, exist_ok=True)
|
||||
if self.args.conf is None:
|
||||
self.args.conf = 0.001 # default conf=0.001
|
||||
self.args.imgsz = check_imgsz(self.args.imgsz, max_dim=1)
|
||||
|
||||
self.plots = {}
|
||||
self.callbacks = _callbacks or callbacks.get_default_callbacks()
|
||||
|
||||
@smart_inference_mode()
|
||||
def __call__(self, trainer=None, model=None):
|
||||
"""Supports validation of a pre-trained model if passed or a model being trained if trainer is passed (trainer
|
||||
gets priority).
|
||||
"""
|
||||
self.training = trainer is not None
|
||||
augment = self.args.augment and (not self.training)
|
||||
if self.training:
|
||||
self.device = trainer.device
|
||||
self.data = trainer.data
|
||||
# self.args.half = self.device.type != "cpu" # force FP16 val during training
|
||||
model = trainer.ema.ema or trainer.model
|
||||
model = model.half() if self.args.half else model.float()
|
||||
# self.model = model
|
||||
self.loss = torch.zeros_like(trainer.loss_items, device=trainer.device)
|
||||
self.args.plots &= trainer.stopper.possible_stop or (trainer.epoch == trainer.epochs - 1)
|
||||
model.eval()
|
||||
else:
|
||||
callbacks.add_integration_callbacks(self)
|
||||
model = AutoBackend(
|
||||
weights=model or self.args.model,
|
||||
device=select_device(self.args.device, self.args.batch),
|
||||
dnn=self.args.dnn,
|
||||
data=self.args.data,
|
||||
fp16=self.args.half,
|
||||
)
|
||||
# self.model = model
|
||||
self.device = model.device # update device
|
||||
self.args.half = model.fp16 # update half
|
||||
stride, pt, jit, engine = model.stride, model.pt, model.jit, model.engine
|
||||
imgsz = check_imgsz(self.args.imgsz, stride=stride)
|
||||
if engine:
|
||||
self.args.batch = model.batch_size
|
||||
elif not pt and not jit:
|
||||
self.args.batch = 1 # export.py models default to batch-size 1
|
||||
LOGGER.info(f"Forcing batch=1 square inference (1,3,{imgsz},{imgsz}) for non-PyTorch models")
|
||||
|
||||
if str(self.args.data).split(".")[-1] in ("yaml", "yml"):
|
||||
self.data = check_det_dataset(self.args.data)
|
||||
elif self.args.task == "classify":
|
||||
self.data = check_cls_dataset(self.args.data, split=self.args.split)
|
||||
else:
|
||||
raise FileNotFoundError(emojis(f"Dataset '{self.args.data}' for task={self.args.task} not found ❌"))
|
||||
|
||||
if self.device.type in ("cpu", "mps"):
|
||||
self.args.workers = 0 # faster CPU val as time dominated by inference, not dataloading
|
||||
if not pt:
|
||||
self.args.rect = False
|
||||
self.stride = model.stride # used in get_dataloader() for padding
|
||||
self.dataloader = self.dataloader or self.get_dataloader(self.data.get(self.args.split), self.args.batch)
|
||||
|
||||
model.eval()
|
||||
model.warmup(imgsz=(1 if pt else self.args.batch, 3, imgsz, imgsz)) # warmup
|
||||
|
||||
self.run_callbacks("on_val_start")
|
||||
dt = (
|
||||
Profile(device=self.device),
|
||||
Profile(device=self.device),
|
||||
Profile(device=self.device),
|
||||
Profile(device=self.device),
|
||||
)
|
||||
bar = TQDM(self.dataloader, desc=self.get_desc(), total=len(self.dataloader))
|
||||
self.init_metrics(de_parallel(model))
|
||||
self.jdict = [] # empty before each val
|
||||
for batch_i, batch in enumerate(bar):
|
||||
self.run_callbacks("on_val_batch_start")
|
||||
self.batch_i = batch_i
|
||||
# Preprocess
|
||||
with dt[0]:
|
||||
batch = self.preprocess(batch)
|
||||
|
||||
# Inference
|
||||
with dt[1]:
|
||||
preds = model(batch["img"], augment=augment)
|
||||
|
||||
# Loss
|
||||
with dt[2]:
|
||||
if self.training:
|
||||
self.loss += model.loss(batch, preds)[1]
|
||||
|
||||
# Postprocess
|
||||
with dt[3]:
|
||||
preds = self.postprocess(preds)
|
||||
|
||||
self.update_metrics(preds, batch)
|
||||
if self.args.plots and batch_i < 3:
|
||||
self.plot_val_samples(batch, batch_i)
|
||||
self.plot_predictions(batch, preds, batch_i)
|
||||
|
||||
self.run_callbacks("on_val_batch_end")
|
||||
stats = self.get_stats()
|
||||
self.check_stats(stats)
|
||||
self.speed = dict(zip(self.speed.keys(), (x.t / len(self.dataloader.dataset) * 1e3 for x in dt)))
|
||||
self.finalize_metrics()
|
||||
if not (self.args.save_json and self.is_coco and len(self.jdict)):
|
||||
self.print_results()
|
||||
self.run_callbacks("on_val_end")
|
||||
if self.training:
|
||||
model.float()
|
||||
if self.args.save_json and self.jdict:
|
||||
with open(str(self.save_dir / "predictions.json"), "w") as f:
|
||||
LOGGER.info(f"Saving {f.name}...")
|
||||
json.dump(self.jdict, f) # flatten and save
|
||||
stats = self.eval_json(stats) # update stats
|
||||
stats['fitness'] = stats['metrics/mAP50-95(B)']
|
||||
results = {**stats, **trainer.label_loss_items(self.loss.cpu() / len(self.dataloader), prefix="val")}
|
||||
return {k: round(float(v), 5) for k, v in results.items()} # return results as 5 decimal place floats
|
||||
else:
|
||||
LOGGER.info(
|
||||
"Speed: %.1fms preprocess, %.1fms inference, %.1fms loss, %.1fms postprocess per image"
|
||||
% tuple(self.speed.values())
|
||||
)
|
||||
if self.args.save_json and self.jdict:
|
||||
with open(str(self.save_dir / "predictions.json"), "w") as f:
|
||||
LOGGER.info(f"Saving {f.name}...")
|
||||
json.dump(self.jdict, f) # flatten and save
|
||||
stats = self.eval_json(stats) # update stats
|
||||
if self.args.plots or self.args.save_json:
|
||||
LOGGER.info(f"Results saved to {colorstr('bold', self.save_dir)}")
|
||||
return stats
|
||||
|
||||
def match_predictions(self, pred_classes, true_classes, iou, use_scipy=False):
|
||||
"""
|
||||
Matches predictions to ground truth objects (pred_classes, true_classes) using IoU.
|
||||
|
||||
Args:
|
||||
pred_classes (torch.Tensor): Predicted class indices of shape(N,).
|
||||
true_classes (torch.Tensor): Target class indices of shape(M,).
|
||||
iou (torch.Tensor): An NxM tensor containing the pairwise IoU values for predictions and ground of truth
|
||||
use_scipy (bool): Whether to use scipy for matching (more precise).
|
||||
|
||||
Returns:
|
||||
(torch.Tensor): Correct tensor of shape(N,10) for 10 IoU thresholds.
|
||||
"""
|
||||
# Dx10 matrix, where D - detections, 10 - IoU thresholds
|
||||
correct = np.zeros((pred_classes.shape[0], self.iouv.shape[0])).astype(bool)
|
||||
# LxD matrix where L - labels (rows), D - detections (columns)
|
||||
correct_class = true_classes[:, None] == pred_classes
|
||||
iou = iou * correct_class # zero out the wrong classes
|
||||
iou = iou.cpu().numpy()
|
||||
for i, threshold in enumerate(self.iouv.cpu().tolist()):
|
||||
if use_scipy:
|
||||
# WARNING: known issue that reduces mAP in https://github.com/ultralytics/ultralytics/pull/4708
|
||||
import scipy # scope import to avoid importing for all commands
|
||||
|
||||
cost_matrix = iou * (iou >= threshold)
|
||||
if cost_matrix.any():
|
||||
labels_idx, detections_idx = scipy.optimize.linear_sum_assignment(cost_matrix, maximize=True)
|
||||
valid = cost_matrix[labels_idx, detections_idx] > 0
|
||||
if valid.any():
|
||||
correct[detections_idx[valid], i] = True
|
||||
else:
|
||||
matches = np.nonzero(iou >= threshold) # IoU > threshold and classes match
|
||||
matches = np.array(matches).T
|
||||
if matches.shape[0]:
|
||||
if matches.shape[0] > 1:
|
||||
matches = matches[iou[matches[:, 0], matches[:, 1]].argsort()[::-1]]
|
||||
matches = matches[np.unique(matches[:, 1], return_index=True)[1]]
|
||||
# matches = matches[matches[:, 2].argsort()[::-1]]
|
||||
matches = matches[np.unique(matches[:, 0], return_index=True)[1]]
|
||||
correct[matches[:, 1].astype(int), i] = True
|
||||
return torch.tensor(correct, dtype=torch.bool, device=pred_classes.device)
|
||||
|
||||
def add_callback(self, event: str, callback):
|
||||
"""Appends the given callback."""
|
||||
self.callbacks[event].append(callback)
|
||||
|
||||
def run_callbacks(self, event: str):
|
||||
"""Runs all callbacks associated with a specified event."""
|
||||
for callback in self.callbacks.get(event, []):
|
||||
callback(self)
|
||||
|
||||
def get_dataloader(self, dataset_path, batch_size):
|
||||
"""Get data loader from dataset path and batch size."""
|
||||
raise NotImplementedError("get_dataloader function not implemented for this validator")
|
||||
|
||||
def build_dataset(self, img_path):
|
||||
"""Build dataset."""
|
||||
raise NotImplementedError("build_dataset function not implemented in validator")
|
||||
|
||||
def preprocess(self, batch):
|
||||
"""Preprocesses an input batch."""
|
||||
return batch
|
||||
|
||||
def postprocess(self, preds):
|
||||
"""Describes and summarizes the purpose of 'postprocess()' but no details mentioned."""
|
||||
return preds
|
||||
|
||||
def init_metrics(self, model):
|
||||
"""Initialize performance metrics for the YOLO model."""
|
||||
pass
|
||||
|
||||
def update_metrics(self, preds, batch):
|
||||
"""Updates metrics based on predictions and batch."""
|
||||
pass
|
||||
|
||||
def finalize_metrics(self, *args, **kwargs):
|
||||
"""Finalizes and returns all metrics."""
|
||||
pass
|
||||
|
||||
def get_stats(self):
|
||||
"""Returns statistics about the model's performance."""
|
||||
return {}
|
||||
|
||||
def check_stats(self, stats):
|
||||
"""Checks statistics."""
|
||||
pass
|
||||
|
||||
def print_results(self):
|
||||
"""Prints the results of the model's predictions."""
|
||||
pass
|
||||
|
||||
def get_desc(self):
|
||||
"""Get description of the YOLO model."""
|
||||
pass
|
||||
|
||||
@property
|
||||
def metric_keys(self):
|
||||
"""Returns the metric keys used in YOLO training/validation."""
|
||||
return []
|
||||
|
||||
def on_plot(self, name, data=None):
|
||||
"""Registers plots (e.g. to be consumed in callbacks)"""
|
||||
self.plots[Path(name)] = {"data": data, "timestamp": time.time()}
|
||||
|
||||
# TODO: may need to put these following functions into callback
|
||||
def plot_val_samples(self, batch, ni):
|
||||
"""Plots validation samples during training."""
|
||||
pass
|
||||
|
||||
def plot_predictions(self, batch, preds, ni):
|
||||
"""Plots YOLO model predictions on batch images."""
|
||||
pass
|
||||
|
||||
def pred_to_json(self, preds, batch):
|
||||
"""Convert predictions to JSON format."""
|
||||
pass
|
||||
|
||||
def eval_json(self, stats):
|
||||
"""Evaluate and return JSON format of prediction statistics."""
|
||||
pass
|
@ -1,128 +0,0 @@
|
||||
# Ultralytics YOLO 🚀, AGPL-3.0 license
|
||||
|
||||
import requests
|
||||
|
||||
from ultralytics.data.utils import HUBDatasetStats
|
||||
from ultralytics.hub.auth import Auth
|
||||
from ultralytics.hub.utils import HUB_API_ROOT, HUB_WEB_ROOT, PREFIX
|
||||
from ultralytics.utils import LOGGER, SETTINGS, checks
|
||||
|
||||
|
||||
def login(api_key: str = None, save=True) -> bool:
|
||||
"""
|
||||
Log in to the Ultralytics HUB API using the provided API key.
|
||||
|
||||
The session is not stored; a new session is created when needed using the saved SETTINGS or the HUB_API_KEY
|
||||
environment variable if successfully authenticated.
|
||||
|
||||
Args:
|
||||
api_key (str, optional): API key to use for authentication.
|
||||
If not provided, it will be retrieved from SETTINGS or HUB_API_KEY environment variable.
|
||||
save (bool, optional): Whether to save the API key to SETTINGS if authentication is successful.
|
||||
|
||||
Returns:
|
||||
(bool): True if authentication is successful, False otherwise.
|
||||
"""
|
||||
checks.check_requirements("hub-sdk>=0.0.6")
|
||||
from hub_sdk import HUBClient
|
||||
|
||||
api_key_url = f"{HUB_WEB_ROOT}/settings?tab=api+keys" # set the redirect URL
|
||||
saved_key = SETTINGS.get("api_key")
|
||||
active_key = api_key or saved_key
|
||||
credentials = {"api_key": active_key} if active_key and active_key != "" else None # set credentials
|
||||
|
||||
client = HUBClient(credentials) # initialize HUBClient
|
||||
|
||||
if client.authenticated:
|
||||
# Successfully authenticated with HUB
|
||||
|
||||
if save and client.api_key != saved_key:
|
||||
SETTINGS.update({"api_key": client.api_key}) # update settings with valid API key
|
||||
|
||||
# Set message based on whether key was provided or retrieved from settings
|
||||
log_message = (
|
||||
"New authentication successful ✅" if client.api_key == api_key or not credentials else "Authenticated ✅"
|
||||
)
|
||||
LOGGER.info(f"{PREFIX}{log_message}")
|
||||
|
||||
return True
|
||||
else:
|
||||
# Failed to authenticate with HUB
|
||||
LOGGER.info(f"{PREFIX}Get API key from {api_key_url} and then run 'yolo hub login API_KEY'")
|
||||
return False
|
||||
|
||||
|
||||
def logout():
|
||||
"""
|
||||
Log out of Ultralytics HUB by removing the API key from the settings file. To log in again, use 'yolo hub login'.
|
||||
|
||||
Example:
|
||||
```python
|
||||
from ultralytics import hub
|
||||
|
||||
hub.logout()
|
||||
```
|
||||
"""
|
||||
SETTINGS["api_key"] = ""
|
||||
SETTINGS.save()
|
||||
LOGGER.info(f"{PREFIX}logged out ✅. To log in again, use 'yolo hub login'.")
|
||||
|
||||
|
||||
def reset_model(model_id=""):
|
||||
"""Reset a trained model to an untrained state."""
|
||||
r = requests.post(f"{HUB_API_ROOT}/model-reset", json={"modelId": model_id}, headers={"x-api-key": Auth().api_key})
|
||||
if r.status_code == 200:
|
||||
LOGGER.info(f"{PREFIX}Model reset successfully")
|
||||
return
|
||||
LOGGER.warning(f"{PREFIX}Model reset failure {r.status_code} {r.reason}")
|
||||
|
||||
|
||||
def export_fmts_hub():
|
||||
"""Returns a list of HUB-supported export formats."""
|
||||
from ultralytics.engine.exporter import export_formats
|
||||
|
||||
return list(export_formats()["Argument"][1:]) + ["ultralytics_tflite", "ultralytics_coreml"]
|
||||
|
||||
|
||||
def export_model(model_id="", format="torchscript"):
|
||||
"""Export a model to all formats."""
|
||||
assert format in export_fmts_hub(), f"Unsupported export format '{format}', valid formats are {export_fmts_hub()}"
|
||||
r = requests.post(
|
||||
f"{HUB_API_ROOT}/v1/models/{model_id}/export", json={"format": format}, headers={"x-api-key": Auth().api_key}
|
||||
)
|
||||
assert r.status_code == 200, f"{PREFIX}{format} export failure {r.status_code} {r.reason}"
|
||||
LOGGER.info(f"{PREFIX}{format} export started ✅")
|
||||
|
||||
|
||||
def get_export(model_id="", format="torchscript"):
|
||||
"""Get an exported model dictionary with download URL."""
|
||||
assert format in export_fmts_hub(), f"Unsupported export format '{format}', valid formats are {export_fmts_hub()}"
|
||||
r = requests.post(
|
||||
f"{HUB_API_ROOT}/get-export",
|
||||
json={"apiKey": Auth().api_key, "modelId": model_id, "format": format},
|
||||
headers={"x-api-key": Auth().api_key},
|
||||
)
|
||||
assert r.status_code == 200, f"{PREFIX}{format} get_export failure {r.status_code} {r.reason}"
|
||||
return r.json()
|
||||
|
||||
|
||||
def check_dataset(path="", task="detect"):
|
||||
"""
|
||||
Function for error-checking HUB dataset Zip file before upload. It checks a dataset for errors before it is uploaded
|
||||
to the HUB. Usage examples are given below.
|
||||
|
||||
Args:
|
||||
path (str, optional): Path to data.zip (with data.yaml inside data.zip). Defaults to ''.
|
||||
task (str, optional): Dataset task. Options are 'detect', 'segment', 'pose', 'classify'. Defaults to 'detect'.
|
||||
|
||||
Example:
|
||||
```python
|
||||
from ultralytics.hub import check_dataset
|
||||
|
||||
check_dataset('path/to/coco8.zip', task='detect') # detect dataset
|
||||
check_dataset('path/to/coco8-seg.zip', task='segment') # segment dataset
|
||||
check_dataset('path/to/coco8-pose.zip', task='pose') # pose dataset
|
||||
```
|
||||
"""
|
||||
HUBDatasetStats(path=path, task=task).get_json()
|
||||
LOGGER.info(f"Checks completed correctly ✅. Upload this dataset to {HUB_WEB_ROOT}/datasets/.")
|
@ -1,136 +0,0 @@
|
||||
# Ultralytics YOLO 🚀, AGPL-3.0 license
|
||||
|
||||
import requests
|
||||
|
||||
from ultralytics.hub.utils import HUB_API_ROOT, HUB_WEB_ROOT, PREFIX, request_with_credentials
|
||||
from ultralytics.utils import LOGGER, SETTINGS, emojis, is_colab
|
||||
|
||||
API_KEY_URL = f"{HUB_WEB_ROOT}/settings?tab=api+keys"
|
||||
|
||||
|
||||
class Auth:
|
||||
"""
|
||||
Manages authentication processes including API key handling, cookie-based authentication, and header generation.
|
||||
|
||||
The class supports different methods of authentication:
|
||||
1. Directly using an API key.
|
||||
2. Authenticating using browser cookies (specifically in Google Colab).
|
||||
3. Prompting the user to enter an API key.
|
||||
|
||||
Attributes:
|
||||
id_token (str or bool): Token used for identity verification, initialized as False.
|
||||
api_key (str or bool): API key for authentication, initialized as False.
|
||||
model_key (bool): Placeholder for model key, initialized as False.
|
||||
"""
|
||||
|
||||
id_token = api_key = model_key = False
|
||||
|
||||
def __init__(self, api_key="", verbose=False):
|
||||
"""
|
||||
Initialize the Auth class with an optional API key.
|
||||
|
||||
Args:
|
||||
api_key (str, optional): May be an API key or a combination API key and model ID, i.e. key_id
|
||||
"""
|
||||
# Split the input API key in case it contains a combined key_model and keep only the API key part
|
||||
api_key = api_key.split("_")[0]
|
||||
|
||||
# Set API key attribute as value passed or SETTINGS API key if none passed
|
||||
self.api_key = api_key or SETTINGS.get("api_key", "")
|
||||
|
||||
# If an API key is provided
|
||||
if self.api_key:
|
||||
# If the provided API key matches the API key in the SETTINGS
|
||||
if self.api_key == SETTINGS.get("api_key"):
|
||||
# Log that the user is already logged in
|
||||
if verbose:
|
||||
LOGGER.info(f"{PREFIX}Authenticated ✅")
|
||||
return
|
||||
else:
|
||||
# Attempt to authenticate with the provided API key
|
||||
success = self.authenticate()
|
||||
# If the API key is not provided and the environment is a Google Colab notebook
|
||||
elif is_colab():
|
||||
# Attempt to authenticate using browser cookies
|
||||
success = self.auth_with_cookies()
|
||||
else:
|
||||
# Request an API key
|
||||
success = self.request_api_key()
|
||||
|
||||
# Update SETTINGS with the new API key after successful authentication
|
||||
if success:
|
||||
SETTINGS.update({"api_key": self.api_key})
|
||||
# Log that the new login was successful
|
||||
if verbose:
|
||||
LOGGER.info(f"{PREFIX}New authentication successful ✅")
|
||||
elif verbose:
|
||||
LOGGER.info(f"{PREFIX}Get API key from {API_KEY_URL} and then run 'yolo hub login API_KEY'")
|
||||
|
||||
def request_api_key(self, max_attempts=3):
|
||||
"""
|
||||
Prompt the user to input their API key.
|
||||
|
||||
Returns the model ID.
|
||||
"""
|
||||
import getpass
|
||||
|
||||
for attempts in range(max_attempts):
|
||||
LOGGER.info(f"{PREFIX}Login. Attempt {attempts + 1} of {max_attempts}")
|
||||
input_key = getpass.getpass(f"Enter API key from {API_KEY_URL} ")
|
||||
self.api_key = input_key.split("_")[0] # remove model id if present
|
||||
if self.authenticate():
|
||||
return True
|
||||
raise ConnectionError(emojis(f"{PREFIX}Failed to authenticate ❌"))
|
||||
|
||||
def authenticate(self) -> bool:
|
||||
"""
|
||||
Attempt to authenticate with the server using either id_token or API key.
|
||||
|
||||
Returns:
|
||||
(bool): True if authentication is successful, False otherwise.
|
||||
"""
|
||||
try:
|
||||
if header := self.get_auth_header():
|
||||
r = requests.post(f"{HUB_API_ROOT}/v1/auth", headers=header)
|
||||
if not r.json().get("success", False):
|
||||
raise ConnectionError("Unable to authenticate.")
|
||||
return True
|
||||
raise ConnectionError("User has not authenticated locally.")
|
||||
except ConnectionError:
|
||||
self.id_token = self.api_key = False # reset invalid
|
||||
LOGGER.warning(f"{PREFIX}Invalid API key ⚠️")
|
||||
return False
|
||||
|
||||
def auth_with_cookies(self) -> bool:
|
||||
"""
|
||||
Attempt to fetch authentication via cookies and set id_token. User must be logged in to HUB and running in a
|
||||
supported browser.
|
||||
|
||||
Returns:
|
||||
(bool): True if authentication is successful, False otherwise.
|
||||
"""
|
||||
if not is_colab():
|
||||
return False # Currently only works with Colab
|
||||
try:
|
||||
authn = request_with_credentials(f"{HUB_API_ROOT}/v1/auth/auto")
|
||||
if authn.get("success", False):
|
||||
self.id_token = authn.get("data", {}).get("idToken", None)
|
||||
self.authenticate()
|
||||
return True
|
||||
raise ConnectionError("Unable to fetch browser authentication details.")
|
||||
except ConnectionError:
|
||||
self.id_token = False # reset invalid
|
||||
return False
|
||||
|
||||
def get_auth_header(self):
|
||||
"""
|
||||
Get the authentication header for making API requests.
|
||||
|
||||
Returns:
|
||||
(dict): The authentication header if id_token or API key is set, None otherwise.
|
||||
"""
|
||||
if self.id_token:
|
||||
return {"authorization": f"Bearer {self.id_token}"}
|
||||
elif self.api_key:
|
||||
return {"x-api-key": self.api_key}
|
||||
# else returns None
|
@ -1,355 +0,0 @@
|
||||
# Ultralytics YOLO 🚀, AGPL-3.0 license
|
||||
|
||||
import threading
|
||||
import time
|
||||
from http import HTTPStatus
|
||||
from pathlib import Path
|
||||
|
||||
import requests
|
||||
|
||||
from ultralytics.hub.utils import HUB_WEB_ROOT, HELP_MSG, PREFIX, TQDM
|
||||
from ultralytics.utils import LOGGER, SETTINGS, __version__, checks, emojis, is_colab
|
||||
from ultralytics.utils.errors import HUBModelError
|
||||
|
||||
AGENT_NAME = f"python-{__version__}-colab" if is_colab() else f"python-{__version__}-local"
|
||||
|
||||
|
||||
class HUBTrainingSession:
|
||||
"""
|
||||
HUB training session for Ultralytics HUB YOLO models. Handles model initialization, heartbeats, and checkpointing.
|
||||
|
||||
Attributes:
|
||||
agent_id (str): Identifier for the instance communicating with the server.
|
||||
model_id (str): Identifier for the YOLO model being trained.
|
||||
model_url (str): URL for the model in Ultralytics HUB.
|
||||
api_url (str): API URL for the model in Ultralytics HUB.
|
||||
auth_header (dict): Authentication header for the Ultralytics HUB API requests.
|
||||
rate_limits (dict): Rate limits for different API calls (in seconds).
|
||||
timers (dict): Timers for rate limiting.
|
||||
metrics_queue (dict): Queue for the model's metrics.
|
||||
model (dict): Model data fetched from Ultralytics HUB.
|
||||
alive (bool): Indicates if the heartbeat loop is active.
|
||||
"""
|
||||
|
||||
def __init__(self, identifier):
|
||||
"""
|
||||
Initialize the HUBTrainingSession with the provided model identifier.
|
||||
|
||||
Args:
|
||||
identifier (str): Model identifier used to initialize the HUB training session.
|
||||
It can be a URL string or a model key with specific format.
|
||||
|
||||
Raises:
|
||||
ValueError: If the provided model identifier is invalid.
|
||||
ConnectionError: If connecting with global API key is not supported.
|
||||
ModuleNotFoundError: If hub-sdk package is not installed.
|
||||
"""
|
||||
from hub_sdk import HUBClient
|
||||
|
||||
self.rate_limits = {
|
||||
"metrics": 3.0,
|
||||
"ckpt": 900.0,
|
||||
"heartbeat": 300.0,
|
||||
} # rate limits (seconds)
|
||||
self.metrics_queue = {} # holds metrics for each epoch until upload
|
||||
self.metrics_upload_failed_queue = {} # holds metrics for each epoch if upload failed
|
||||
self.timers = {} # holds timers in ultralytics/utils/callbacks/hub.py
|
||||
|
||||
# Parse input
|
||||
api_key, model_id, self.filename = self._parse_identifier(identifier)
|
||||
|
||||
# Get credentials
|
||||
active_key = api_key or SETTINGS.get("api_key")
|
||||
credentials = {"api_key": active_key} if active_key else None # set credentials
|
||||
|
||||
# Initialize client
|
||||
self.client = HUBClient(credentials)
|
||||
|
||||
if model_id:
|
||||
self.load_model(model_id) # load existing model
|
||||
else:
|
||||
self.model = self.client.model() # load empty model
|
||||
|
||||
def load_model(self, model_id):
|
||||
"""Loads an existing model from Ultralytics HUB using the provided model identifier."""
|
||||
self.model = self.client.model(model_id)
|
||||
if not self.model.data: # then model does not exist
|
||||
raise ValueError(emojis("❌ The specified HUB model does not exist")) # TODO: improve error handling
|
||||
|
||||
self.model_url = f"{HUB_WEB_ROOT}/models/{self.model.id}"
|
||||
|
||||
self._set_train_args()
|
||||
|
||||
# Start heartbeats for HUB to monitor agent
|
||||
self.model.start_heartbeat(self.rate_limits["heartbeat"])
|
||||
LOGGER.info(f"{PREFIX}View model at {self.model_url} 🚀")
|
||||
|
||||
def create_model(self, model_args):
|
||||
"""Initializes a HUB training session with the specified model identifier."""
|
||||
payload = {
|
||||
"config": {
|
||||
"batchSize": model_args.get("batch", -1),
|
||||
"epochs": model_args.get("epochs", 300),
|
||||
"imageSize": model_args.get("imgsz", 640),
|
||||
"patience": model_args.get("patience", 100),
|
||||
"device": model_args.get("device", ""),
|
||||
"cache": model_args.get("cache", "ram"),
|
||||
},
|
||||
"dataset": {"name": model_args.get("data")},
|
||||
"lineage": {
|
||||
"architecture": {
|
||||
"name": self.filename.replace(".pt", "").replace(".yaml", ""),
|
||||
},
|
||||
"parent": {},
|
||||
},
|
||||
"meta": {"name": self.filename},
|
||||
}
|
||||
|
||||
if self.filename.endswith(".pt"):
|
||||
payload["lineage"]["parent"]["name"] = self.filename
|
||||
|
||||
self.model.create_model(payload)
|
||||
|
||||
# Model could not be created
|
||||
# TODO: improve error handling
|
||||
if not self.model.id:
|
||||
return
|
||||
|
||||
self.model_url = f"{HUB_WEB_ROOT}/models/{self.model.id}"
|
||||
|
||||
# Start heartbeats for HUB to monitor agent
|
||||
self.model.start_heartbeat(self.rate_limits["heartbeat"])
|
||||
|
||||
LOGGER.info(f"{PREFIX}View model at {self.model_url} 🚀")
|
||||
|
||||
def _parse_identifier(self, identifier):
|
||||
"""
|
||||
Parses the given identifier to determine the type of identifier and extract relevant components.
|
||||
|
||||
The method supports different identifier formats:
|
||||
- A HUB URL, which starts with HUB_WEB_ROOT followed by '/models/'
|
||||
- An identifier containing an API key and a model ID separated by an underscore
|
||||
- An identifier that is solely a model ID of a fixed length
|
||||
- A local filename that ends with '.pt' or '.yaml'
|
||||
|
||||
Args:
|
||||
identifier (str): The identifier string to be parsed.
|
||||
|
||||
Returns:
|
||||
(tuple): A tuple containing the API key, model ID, and filename as applicable.
|
||||
|
||||
Raises:
|
||||
HUBModelError: If the identifier format is not recognized.
|
||||
"""
|
||||
|
||||
# Initialize variables
|
||||
api_key, model_id, filename = None, None, None
|
||||
|
||||
# Check if identifier is a HUB URL
|
||||
if identifier.startswith(f"{HUB_WEB_ROOT}/models/"):
|
||||
# Extract the model_id after the HUB_WEB_ROOT URL
|
||||
model_id = identifier.split(f"{HUB_WEB_ROOT}/models/")[-1]
|
||||
else:
|
||||
# Split the identifier based on underscores only if it's not a HUB URL
|
||||
parts = identifier.split("_")
|
||||
|
||||
# Check if identifier is in the format of API key and model ID
|
||||
if len(parts) == 2 and len(parts[0]) == 42 and len(parts[1]) == 20:
|
||||
api_key, model_id = parts
|
||||
# Check if identifier is a single model ID
|
||||
elif len(parts) == 1 and len(parts[0]) == 20:
|
||||
model_id = parts[0]
|
||||
# Check if identifier is a local filename
|
||||
elif identifier.endswith(".pt") or identifier.endswith(".yaml"):
|
||||
filename = identifier
|
||||
else:
|
||||
raise HUBModelError(
|
||||
f"model='{identifier}' could not be parsed. Check format is correct. "
|
||||
f"Supported formats are Ultralytics HUB URL, apiKey_modelId, modelId, local pt or yaml file."
|
||||
)
|
||||
|
||||
return api_key, model_id, filename
|
||||
|
||||
def _set_train_args(self):
|
||||
"""
|
||||
Initializes training arguments and creates a model entry on the Ultralytics HUB.
|
||||
|
||||
This method sets up training arguments based on the model's state and updates them with any additional
|
||||
arguments provided. It handles different states of the model, such as whether it's resumable, pretrained,
|
||||
or requires specific file setup.
|
||||
|
||||
Raises:
|
||||
ValueError: If the model is already trained, if required dataset information is missing, or if there are
|
||||
issues with the provided training arguments.
|
||||
"""
|
||||
if self.model.is_trained():
|
||||
raise ValueError(emojis(f"Model is already trained and uploaded to {self.model_url} 🚀"))
|
||||
|
||||
if self.model.is_resumable():
|
||||
# Model has saved weights
|
||||
self.train_args = {"data": self.model.get_dataset_url(), "resume": True}
|
||||
self.model_file = self.model.get_weights_url("last")
|
||||
else:
|
||||
# Model has no saved weights
|
||||
self.train_args = self.model.data.get("train_args") # new response
|
||||
|
||||
# Set the model file as either a *.pt or *.yaml file
|
||||
self.model_file = (
|
||||
self.model.get_weights_url("parent") if self.model.is_pretrained() else self.model.get_architecture()
|
||||
)
|
||||
|
||||
if "data" not in self.train_args:
|
||||
# RF bug - datasets are sometimes not exported
|
||||
raise ValueError("Dataset may still be processing. Please wait a minute and try again.")
|
||||
|
||||
self.model_file = checks.check_yolov5u_filename(self.model_file, verbose=False) # YOLOv5->YOLOv5u
|
||||
self.model_id = self.model.id
|
||||
|
||||
def request_queue(
|
||||
self,
|
||||
request_func,
|
||||
retry=3,
|
||||
timeout=30,
|
||||
thread=True,
|
||||
verbose=True,
|
||||
progress_total=None,
|
||||
*args,
|
||||
**kwargs,
|
||||
):
|
||||
def retry_request():
|
||||
"""Attempts to call `request_func` with retries, timeout, and optional threading."""
|
||||
t0 = time.time() # Record the start time for the timeout
|
||||
for i in range(retry + 1):
|
||||
if (time.time() - t0) > timeout:
|
||||
LOGGER.warning(f"{PREFIX}Timeout for request reached. {HELP_MSG}")
|
||||
break # Timeout reached, exit loop
|
||||
|
||||
response = request_func(*args, **kwargs)
|
||||
if response is None:
|
||||
LOGGER.warning(f"{PREFIX}Received no response from the request. {HELP_MSG}")
|
||||
time.sleep(2**i) # Exponential backoff before retrying
|
||||
continue # Skip further processing and retry
|
||||
|
||||
if progress_total:
|
||||
self._show_upload_progress(progress_total, response)
|
||||
|
||||
if HTTPStatus.OK <= response.status_code < HTTPStatus.MULTIPLE_CHOICES:
|
||||
# if request related to metrics upload
|
||||
if kwargs.get("metrics"):
|
||||
self.metrics_upload_failed_queue = {}
|
||||
return response # Success, no need to retry
|
||||
|
||||
if i == 0:
|
||||
# Initial attempt, check status code and provide messages
|
||||
message = self._get_failure_message(response, retry, timeout)
|
||||
|
||||
if verbose:
|
||||
LOGGER.warning(f"{PREFIX}{message} {HELP_MSG} ({response.status_code})")
|
||||
|
||||
if not self._should_retry(response.status_code):
|
||||
LOGGER.warning(f"{PREFIX}Request failed. {HELP_MSG} ({response.status_code}")
|
||||
break # Not an error that should be retried, exit loop
|
||||
|
||||
time.sleep(2**i) # Exponential backoff for retries
|
||||
|
||||
# if request related to metrics upload and exceed retries
|
||||
if response is None and kwargs.get("metrics"):
|
||||
self.metrics_upload_failed_queue.update(kwargs.get("metrics", None))
|
||||
|
||||
return response
|
||||
|
||||
if thread:
|
||||
# Start a new thread to run the retry_request function
|
||||
threading.Thread(target=retry_request, daemon=True).start()
|
||||
else:
|
||||
# If running in the main thread, call retry_request directly
|
||||
return retry_request()
|
||||
|
||||
def _should_retry(self, status_code):
|
||||
"""Determines if a request should be retried based on the HTTP status code."""
|
||||
retry_codes = {
|
||||
HTTPStatus.REQUEST_TIMEOUT,
|
||||
HTTPStatus.BAD_GATEWAY,
|
||||
HTTPStatus.GATEWAY_TIMEOUT,
|
||||
}
|
||||
return status_code in retry_codes
|
||||
|
||||
def _get_failure_message(self, response: requests.Response, retry: int, timeout: int):
|
||||
"""
|
||||
Generate a retry message based on the response status code.
|
||||
|
||||
Args:
|
||||
response: The HTTP response object.
|
||||
retry: The number of retry attempts allowed.
|
||||
timeout: The maximum timeout duration.
|
||||
|
||||
Returns:
|
||||
(str): The retry message.
|
||||
"""
|
||||
if self._should_retry(response.status_code):
|
||||
return f"Retrying {retry}x for {timeout}s." if retry else ""
|
||||
elif response.status_code == HTTPStatus.TOO_MANY_REQUESTS: # rate limit
|
||||
headers = response.headers
|
||||
return (
|
||||
f"Rate limit reached ({headers['X-RateLimit-Remaining']}/{headers['X-RateLimit-Limit']}). "
|
||||
f"Please retry after {headers['Retry-After']}s."
|
||||
)
|
||||
else:
|
||||
try:
|
||||
return response.json().get("message", "No JSON message.")
|
||||
except AttributeError:
|
||||
return "Unable to read JSON."
|
||||
|
||||
def upload_metrics(self):
|
||||
"""Upload model metrics to Ultralytics HUB."""
|
||||
return self.request_queue(self.model.upload_metrics, metrics=self.metrics_queue.copy(), thread=True)
|
||||
|
||||
def upload_model(
|
||||
self,
|
||||
epoch: int,
|
||||
weights: str,
|
||||
is_best: bool = False,
|
||||
map: float = 0.0,
|
||||
final: bool = False,
|
||||
) -> None:
|
||||
"""
|
||||
Upload a model checkpoint to Ultralytics HUB.
|
||||
|
||||
Args:
|
||||
epoch (int): The current training epoch.
|
||||
weights (str): Path to the model weights file.
|
||||
is_best (bool): Indicates if the current model is the best one so far.
|
||||
map (float): Mean average precision of the model.
|
||||
final (bool): Indicates if the model is the final model after training.
|
||||
"""
|
||||
if Path(weights).is_file():
|
||||
progress_total = Path(weights).stat().st_size if final else None # Only show progress if final
|
||||
self.request_queue(
|
||||
self.model.upload_model,
|
||||
epoch=epoch,
|
||||
weights=weights,
|
||||
is_best=is_best,
|
||||
map=map,
|
||||
final=final,
|
||||
retry=10,
|
||||
timeout=3600,
|
||||
thread=not final,
|
||||
progress_total=progress_total,
|
||||
)
|
||||
else:
|
||||
LOGGER.warning(f"{PREFIX}WARNING ⚠️ Model upload issue. Missing model {weights}.")
|
||||
|
||||
def _show_upload_progress(self, content_length: int, response: requests.Response) -> None:
|
||||
"""
|
||||
Display a progress bar to track the upload progress of a file download.
|
||||
|
||||
Args:
|
||||
content_length (int): The total size of the content to be downloaded in bytes.
|
||||
response (requests.Response): The response object from the file download request.
|
||||
|
||||
Returns:
|
||||
None
|
||||
"""
|
||||
with TQDM(total=content_length, unit="B", unit_scale=True, unit_divisor=1024) as pbar:
|
||||
for data in response.iter_content(chunk_size=1024):
|
||||
pbar.update(len(data))
|
@ -1,247 +0,0 @@
|
||||
# Ultralytics YOLO 🚀, AGPL-3.0 license
|
||||
|
||||
import os
|
||||
import platform
|
||||
import random
|
||||
import sys
|
||||
import threading
|
||||
import time
|
||||
from pathlib import Path
|
||||
|
||||
import requests
|
||||
|
||||
from ultralytics.utils import (
|
||||
ENVIRONMENT,
|
||||
LOGGER,
|
||||
ONLINE,
|
||||
RANK,
|
||||
SETTINGS,
|
||||
TESTS_RUNNING,
|
||||
TQDM,
|
||||
TryExcept,
|
||||
__version__,
|
||||
colorstr,
|
||||
get_git_origin_url,
|
||||
is_colab,
|
||||
is_git_dir,
|
||||
is_pip_package,
|
||||
)
|
||||
from ultralytics.utils.downloads import GITHUB_ASSETS_NAMES
|
||||
|
||||
HUB_API_ROOT = os.environ.get("ULTRALYTICS_HUB_API", "https://api.ultralytics.com")
|
||||
HUB_WEB_ROOT = os.environ.get("ULTRALYTICS_HUB_WEB", "https://hub.ultralytics.com")
|
||||
|
||||
PREFIX = colorstr("Ultralytics HUB: ")
|
||||
HELP_MSG = "If this issue persists please visit https://github.com/ultralytics/hub/issues for assistance."
|
||||
|
||||
|
||||
def request_with_credentials(url: str) -> any:
|
||||
"""
|
||||
Make an AJAX request with cookies attached in a Google Colab environment.
|
||||
|
||||
Args:
|
||||
url (str): The URL to make the request to.
|
||||
|
||||
Returns:
|
||||
(any): The response data from the AJAX request.
|
||||
|
||||
Raises:
|
||||
OSError: If the function is not run in a Google Colab environment.
|
||||
"""
|
||||
if not is_colab():
|
||||
raise OSError("request_with_credentials() must run in a Colab environment")
|
||||
from google.colab import output # noqa
|
||||
from IPython import display # noqa
|
||||
|
||||
display.display(
|
||||
display.Javascript(
|
||||
"""
|
||||
window._hub_tmp = new Promise((resolve, reject) => {
|
||||
const timeout = setTimeout(() => reject("Failed authenticating existing browser session"), 5000)
|
||||
fetch("%s", {
|
||||
method: 'POST',
|
||||
credentials: 'include'
|
||||
})
|
||||
.then((response) => resolve(response.json()))
|
||||
.then((json) => {
|
||||
clearTimeout(timeout);
|
||||
}).catch((err) => {
|
||||
clearTimeout(timeout);
|
||||
reject(err);
|
||||
});
|
||||
});
|
||||
"""
|
||||
% url
|
||||
)
|
||||
)
|
||||
return output.eval_js("_hub_tmp")
|
||||
|
||||
|
||||
def requests_with_progress(method, url, **kwargs):
|
||||
"""
|
||||
Make an HTTP request using the specified method and URL, with an optional progress bar.
|
||||
|
||||
Args:
|
||||
method (str): The HTTP method to use (e.g. 'GET', 'POST').
|
||||
url (str): The URL to send the request to.
|
||||
**kwargs (any): Additional keyword arguments to pass to the underlying `requests.request` function.
|
||||
|
||||
Returns:
|
||||
(requests.Response): The response object from the HTTP request.
|
||||
|
||||
Note:
|
||||
- If 'progress' is set to True, the progress bar will display the download progress for responses with a known
|
||||
content length.
|
||||
- If 'progress' is a number then progress bar will display assuming content length = progress.
|
||||
"""
|
||||
progress = kwargs.pop("progress", False)
|
||||
if not progress:
|
||||
return requests.request(method, url, **kwargs)
|
||||
response = requests.request(method, url, stream=True, **kwargs)
|
||||
total = int(response.headers.get("content-length", 0) if isinstance(progress, bool) else progress) # total size
|
||||
try:
|
||||
pbar = TQDM(total=total, unit="B", unit_scale=True, unit_divisor=1024)
|
||||
for data in response.iter_content(chunk_size=1024):
|
||||
pbar.update(len(data))
|
||||
pbar.close()
|
||||
except requests.exceptions.ChunkedEncodingError: # avoid 'Connection broken: IncompleteRead' warnings
|
||||
response.close()
|
||||
return response
|
||||
|
||||
|
||||
def smart_request(method, url, retry=3, timeout=30, thread=True, code=-1, verbose=True, progress=False, **kwargs):
|
||||
"""
|
||||
Makes an HTTP request using the 'requests' library, with exponential backoff retries up to a specified timeout.
|
||||
|
||||
Args:
|
||||
method (str): The HTTP method to use for the request. Choices are 'post' and 'get'.
|
||||
url (str): The URL to make the request to.
|
||||
retry (int, optional): Number of retries to attempt before giving up. Default is 3.
|
||||
timeout (int, optional): Timeout in seconds after which the function will give up retrying. Default is 30.
|
||||
thread (bool, optional): Whether to execute the request in a separate daemon thread. Default is True.
|
||||
code (int, optional): An identifier for the request, used for logging purposes. Default is -1.
|
||||
verbose (bool, optional): A flag to determine whether to print out to console or not. Default is True.
|
||||
progress (bool, optional): Whether to show a progress bar during the request. Default is False.
|
||||
**kwargs (any): Keyword arguments to be passed to the requests function specified in method.
|
||||
|
||||
Returns:
|
||||
(requests.Response): The HTTP response object. If the request is executed in a separate thread, returns None.
|
||||
"""
|
||||
retry_codes = (408, 500) # retry only these codes
|
||||
|
||||
@TryExcept(verbose=verbose)
|
||||
def func(func_method, func_url, **func_kwargs):
|
||||
"""Make HTTP requests with retries and timeouts, with optional progress tracking."""
|
||||
r = None # response
|
||||
t0 = time.time() # initial time for timer
|
||||
for i in range(retry + 1):
|
||||
if (time.time() - t0) > timeout:
|
||||
break
|
||||
r = requests_with_progress(func_method, func_url, **func_kwargs) # i.e. get(url, data, json, files)
|
||||
if r.status_code < 300: # return codes in the 2xx range are generally considered "good" or "successful"
|
||||
break
|
||||
try:
|
||||
m = r.json().get("message", "No JSON message.")
|
||||
except AttributeError:
|
||||
m = "Unable to read JSON."
|
||||
if i == 0:
|
||||
if r.status_code in retry_codes:
|
||||
m += f" Retrying {retry}x for {timeout}s." if retry else ""
|
||||
elif r.status_code == 429: # rate limit
|
||||
h = r.headers # response headers
|
||||
m = (
|
||||
f"Rate limit reached ({h['X-RateLimit-Remaining']}/{h['X-RateLimit-Limit']}). "
|
||||
f"Please retry after {h['Retry-After']}s."
|
||||
)
|
||||
if verbose:
|
||||
LOGGER.warning(f"{PREFIX}{m} {HELP_MSG} ({r.status_code} #{code})")
|
||||
if r.status_code not in retry_codes:
|
||||
return r
|
||||
time.sleep(2**i) # exponential standoff
|
||||
return r
|
||||
|
||||
args = method, url
|
||||
kwargs["progress"] = progress
|
||||
if thread:
|
||||
threading.Thread(target=func, args=args, kwargs=kwargs, daemon=True).start()
|
||||
else:
|
||||
return func(*args, **kwargs)
|
||||
|
||||
|
||||
class Events:
|
||||
"""
|
||||
A class for collecting anonymous event analytics. Event analytics are enabled when sync=True in settings and
|
||||
disabled when sync=False. Run 'yolo settings' to see and update settings YAML file.
|
||||
|
||||
Attributes:
|
||||
url (str): The URL to send anonymous events.
|
||||
rate_limit (float): The rate limit in seconds for sending events.
|
||||
metadata (dict): A dictionary containing metadata about the environment.
|
||||
enabled (bool): A flag to enable or disable Events based on certain conditions.
|
||||
"""
|
||||
|
||||
url = "https://www.google-analytics.com/mp/collect?measurement_id=G-X8NCJYTQXM&api_secret=QLQrATrNSwGRFRLE-cbHJw"
|
||||
|
||||
def __init__(self):
|
||||
"""Initializes the Events object with default values for events, rate_limit, and metadata."""
|
||||
self.events = [] # events list
|
||||
self.rate_limit = 60.0 # rate limit (seconds)
|
||||
self.t = 0.0 # rate limit timer (seconds)
|
||||
self.metadata = {
|
||||
"cli": Path(sys.argv[0]).name == "yolo",
|
||||
"install": "git" if is_git_dir() else "pip" if is_pip_package() else "other",
|
||||
"python": ".".join(platform.python_version_tuple()[:2]), # i.e. 3.10
|
||||
"version": __version__,
|
||||
"env": ENVIRONMENT,
|
||||
"session_id": round(random.random() * 1e15),
|
||||
"engagement_time_msec": 1000,
|
||||
}
|
||||
self.enabled = (
|
||||
SETTINGS["sync"]
|
||||
and RANK in (-1, 0)
|
||||
and not TESTS_RUNNING
|
||||
and ONLINE
|
||||
and (is_pip_package() or get_git_origin_url() == "https://github.com/ultralytics/ultralytics.git")
|
||||
)
|
||||
|
||||
def __call__(self, cfg):
|
||||
"""
|
||||
Attempts to add a new event to the events list and send events if the rate limit is reached.
|
||||
|
||||
Args:
|
||||
cfg (IterableSimpleNamespace): The configuration object containing mode and task information.
|
||||
"""
|
||||
if not self.enabled:
|
||||
# Events disabled, do nothing
|
||||
return
|
||||
|
||||
# Attempt to add to events
|
||||
if len(self.events) < 25: # Events list limited to 25 events (drop any events past this)
|
||||
params = {
|
||||
**self.metadata,
|
||||
"task": cfg.task,
|
||||
"model": cfg.model if cfg.model in GITHUB_ASSETS_NAMES else "custom",
|
||||
}
|
||||
if cfg.mode == "export":
|
||||
params["format"] = cfg.format
|
||||
self.events.append({"name": cfg.mode, "params": params})
|
||||
|
||||
# Check rate limit
|
||||
t = time.time()
|
||||
if (t - self.t) < self.rate_limit:
|
||||
# Time is under rate limiter, wait to send
|
||||
return
|
||||
|
||||
# Time is over rate limiter, send now
|
||||
data = {"client_id": SETTINGS["uuid"], "events": self.events} # SHA-256 anonymized UUID hash and events list
|
||||
|
||||
# POST equivalent to requests.post(self.url, json=data)
|
||||
smart_request("post", self.url, json=data, retry=0, verbose=False)
|
||||
|
||||
# Reset events and rate limit timer
|
||||
self.events = []
|
||||
self.t = t
|
||||
|
||||
|
||||
# Run below code on hub/utils init -------------------------------------------------------------------------------------
|
||||
events = Events()
|
@ -1,8 +0,0 @@
|
||||
# Ultralytics YOLO 🚀, AGPL-3.0 license
|
||||
|
||||
from .rtdetr import RTDETR
|
||||
from .sam import SAM
|
||||
from .yolo import YOLO, YOLOWorld
|
||||
from .yolov10 import YOLOv10
|
||||
|
||||
__all__ = "YOLO", "RTDETR", "SAM", "YOLOWorld", "YOLOv10" # allow simpler import
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
x
Reference in New Issue
Block a user