diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index a004332b..a75167bc 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -11,7 +11,7 @@ ci:
repos:
- repo: https://github.com/pre-commit/pre-commit-hooks
- rev: v4.4.0
+ rev: v4.5.0
hooks:
- id: end-of-file-fixer
- id: trailing-whitespace
@@ -22,7 +22,7 @@ repos:
- id: detect-private-key
- repo: https://github.com/asottile/pyupgrade
- rev: v3.14.0
+ rev: v3.15.0
hooks:
- id: pyupgrade
name: Upgrade code
@@ -77,3 +77,8 @@ repos:
# rev: v1.5.0
# hooks:
# - id: dead
+
+# - repo: https://github.com/ultralytics/pre-commit
+# rev: bd60a414f80a53fb8f593d3bfed4701fc47e4b23
+# hooks:
+# - id: capitalize-comments
diff --git a/docs/build_reference.py b/docs/build_reference.py
index a237ae06..e3adfe9b 100644
--- a/docs/build_reference.py
+++ b/docs/build_reference.py
@@ -6,7 +6,6 @@ reference section of *.md files composed of classes and functions, and also crea
Note: Must be run from repository root directory. Do not run from docs directory.
"""
-import os
import re
from collections import defaultdict
from pathlib import Path
@@ -18,17 +17,9 @@ CODE_DIR = ROOT
REFERENCE_DIR = ROOT.parent / 'docs/reference'
-def extract_classes_and_functions(filepath: Path):
- """
- Extracts class and function names from a given Python file.
-
- Args:
- filepath (Path): The path to the Python file.
-
- Returns:
- (tuple): A tuple containing lists of class and function names.
- """
- content = Path(filepath).read_text()
+def extract_classes_and_functions(filepath: Path) -> tuple:
+ """Extracts class and function names from a given Python file."""
+ content = filepath.read_text()
class_pattern = r'(?:^|\n)class\s(\w+)(?:\(|:)'
func_pattern = r'(?:^|\n)def\s(\w+)\('
@@ -39,26 +30,17 @@ def extract_classes_and_functions(filepath: Path):
def create_markdown(py_filepath: Path, module_path: str, classes: list, functions: list):
- """
- Creates a Markdown file containing the API reference for the given Python module.
-
- Args:
- py_filepath (Path): The path to the Python file.
- module_path (str): The import path for the Python module.
- classes (list): A list of class names within the module.
- functions (list): A list of function names within the module.
- """
+ """Creates a Markdown file containing the API reference for the given Python module."""
md_filepath = py_filepath.with_suffix('.md')
# Read existing content and keep header content between first two ---
header_content = ''
if md_filepath.exists():
- with open(md_filepath) as file:
- existing_content = file.read()
- header_parts = existing_content.split('---')
- for part in header_parts:
- if 'description:' in part or 'comments:' in part:
- header_content += f'---{part}---\n\n'
+ existing_content = md_filepath.read_text()
+ header_parts = existing_content.split('---')
+ for part in header_parts:
+ if 'description:' in part or 'comments:' in part:
+ header_content += f'---{part}---\n\n'
module_name = module_path.replace('.__init__', '')
module_path = module_path.replace('.', '/')
@@ -74,43 +56,24 @@ def create_markdown(py_filepath: Path, module_path: str, classes: list, function
if not md_content.endswith('\n'):
md_content += '\n'
- os.makedirs(os.path.dirname(md_filepath), exist_ok=True)
- with open(md_filepath, 'w') as file:
- file.write(md_content)
+ md_filepath.parent.mkdir(parents=True, exist_ok=True)
+ md_filepath.write_text(md_content)
return md_filepath.relative_to(NEW_YAML_DIR)
-def nested_dict():
- """
- Creates and returns a nested defaultdict.
-
- Returns:
- (defaultdict): A nested defaultdict object.
- """
+def nested_dict() -> defaultdict:
+ """Creates and returns a nested defaultdict."""
return defaultdict(nested_dict)
-def sort_nested_dict(d: dict):
- """
- Sorts a nested dictionary recursively.
-
- Args:
- d (dict): The dictionary to sort.
-
- Returns:
- (dict): The sorted dictionary.
- """
+def sort_nested_dict(d: dict) -> dict:
+ """Sorts a nested dictionary recursively."""
return {key: sort_nested_dict(value) if isinstance(value, dict) else value for key, value in sorted(d.items())}
def create_nav_menu_yaml(nav_items: list):
- """
- Creates a YAML file for the navigation menu based on the provided list of items.
-
- Args:
- nav_items (list): A list of relative file paths to Markdown files for the navigation menu.
- """
+ """Creates a YAML file for the navigation menu based on the provided list of items."""
nav_tree = nested_dict()
for item_str in nav_items:
@@ -136,26 +99,26 @@ def create_nav_menu_yaml(nav_items: list):
yaml_str += f"{indent}- {k}: {str(v).replace('docs/', '')}\n"
return yaml_str
- with open(NEW_YAML_DIR / 'nav_menu_updated.yml', 'w') as file:
- yaml_str = _dict_to_yaml(nav_tree_sorted)
- file.write(yaml_str)
+ # Print updated YAML reference section
+ print('Scan complete, new mkdocs.yaml reference section is:\n\n', _dict_to_yaml(nav_tree_sorted))
+
+ # Save new YAML reference section
+ # (NEW_YAML_DIR / 'nav_menu_updated.yml').write_text(_dict_to_yaml(nav_tree_sorted))
def main():
"""Main function to extract class and function names, create Markdown files, and generate a YAML navigation menu."""
nav_items = []
- for root, _, files in os.walk(CODE_DIR):
- for file in files:
- if file.endswith('.py'):
- py_filepath = Path(root) / file
- classes, functions = extract_classes_and_functions(py_filepath)
- if classes or functions:
- py_filepath_rel = py_filepath.relative_to(CODE_DIR)
- md_filepath = REFERENCE_DIR / py_filepath_rel
- module_path = f"ultralytics.{py_filepath_rel.with_suffix('').as_posix().replace('/', '.')}"
- md_rel_filepath = create_markdown(md_filepath, module_path, classes, functions)
- nav_items.append(str(md_rel_filepath))
+ for py_filepath in CODE_DIR.rglob('*.py'):
+ classes, functions = extract_classes_and_functions(py_filepath)
+
+ if classes or functions:
+ py_filepath_rel = py_filepath.relative_to(CODE_DIR)
+ md_filepath = REFERENCE_DIR / py_filepath_rel
+ module_path = f"ultralytics.{py_filepath_rel.with_suffix('').as_posix().replace('/', '.')}"
+ md_rel_filepath = create_markdown(md_filepath, module_path, classes, functions)
+ nav_items.append(str(md_rel_filepath))
create_nav_menu_yaml(nav_items)
diff --git a/docs/reference/utils/callbacks/wb.md b/docs/reference/utils/callbacks/wb.md
index a5869a5e..74d1ef80 100644
--- a/docs/reference/utils/callbacks/wb.md
+++ b/docs/reference/utils/callbacks/wb.md
@@ -9,6 +9,14 @@ keywords: Ultralytics, callbacks, _log_plots, on_fit_epoch_end, on_train_end
Full source code for this file is available at [https://github.com/ultralytics/ultralytics/blob/main/ultralytics/utils/callbacks/wb.py](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/utils/callbacks/wb.py). Help us fix any issues you see by submitting a [Pull Request](https://docs.ultralytics.com/help/contributing/) 🛠️. Thank you 🙏!
+---
+## ::: ultralytics.utils.callbacks.wb._custom_table
+
+
+---
+## ::: ultralytics.utils.callbacks.wb._plot_curve
+
+
---
## ::: ultralytics.utils.callbacks.wb._log_plots
diff --git a/examples/YOLOv8-Region-Counter/yolov8_region_counter.py b/examples/YOLOv8-Region-Counter/yolov8_region_counter.py
index dd0e476f..ada269ed 100644
--- a/examples/YOLOv8-Region-Counter/yolov8_region_counter.py
+++ b/examples/YOLOv8-Region-Counter/yolov8_region_counter.py
@@ -11,7 +11,7 @@ from ultralytics import YOLO
from ultralytics.utils.files import increment_path
from ultralytics.utils.plotting import Annotator, colors
-track_history = defaultdict(lambda: [])
+track_history = defaultdict(list)
current_region = None
counting_regions = [
diff --git a/tests/test_cli.py b/tests/test_cli.py
index 788651b4..7a3fd992 100644
--- a/tests/test_cli.py
+++ b/tests/test_cli.py
@@ -97,8 +97,8 @@ def test_fastsam(task='segment', model=WEIGHTS_DIR / 'FastSAM-s.pt', data='coco8
ann = prompt_process.text_prompt(text='a photo of a dog')
# Point prompt
- # points default [[0,0]] [[x1,y1],[x2,y2]]
- # point_label default [0] [1,0] 0:background, 1:foreground
+ # Points default [[0,0]] [[x1,y1],[x2,y2]]
+ # Point_label default [0] [1,0] 0:background, 1:foreground
ann = prompt_process.point_prompt(points=[[200, 200]], pointlabel=[1])
prompt_process.plot(annotations=ann, output='./')
diff --git a/tests/test_engine.py b/tests/test_engine.py
index 2d789583..ce328efc 100644
--- a/tests/test_engine.py
+++ b/tests/test_engine.py
@@ -70,7 +70,7 @@ def test_segment():
CFG.imgsz = 32
# YOLO(CFG_SEG).train(**overrides) # works
- # trainer
+ # Trainer
trainer = segment.SegmentationTrainer(overrides=overrides)
trainer.add_callback('on_train_start', test_func)
assert test_func in trainer.callbacks['on_train_start'], 'callback test failed'
diff --git a/ultralytics/__init__.py b/ultralytics/__init__.py
index e3948bd4..da166d88 100644
--- a/ultralytics/__init__.py
+++ b/ultralytics/__init__.py
@@ -1,6 +1,6 @@
# Ultralytics YOLO 🚀, AGPL-3.0 license
-__version__ = '8.0.196'
+__version__ = '8.0.197'
from ultralytics.models import RTDETR, SAM, YOLO
from ultralytics.models.fastsam import FastSAM
diff --git a/ultralytics/data/augment.py b/ultralytics/data/augment.py
index 7fb32477..f500aa03 100644
--- a/ultralytics/data/augment.py
+++ b/ultralytics/data/augment.py
@@ -491,7 +491,7 @@ class RandomPerspective:
border = labels.pop('mosaic_border', self.border)
self.size = img.shape[1] + border[1] * 2, img.shape[0] + border[0] * 2 # w, h
# M is affine matrix
- # scale for func:`box_candidates`
+ # Scale for func:`box_candidates`
img, M, scale = self.affine_transform(img, border)
bboxes = self.apply_bboxes(instances.bboxes, M)
@@ -894,7 +894,7 @@ class Format:
return labels
def _format_img(self, img):
- """Format the image for YOLOv5 from Numpy array to PyTorch tensor."""
+ """Format the image for YOLO from Numpy array to PyTorch tensor."""
if len(img.shape) < 3:
img = np.expand_dims(img, -1)
img = np.ascontiguousarray(img.transpose(2, 0, 1)[::-1])
diff --git a/ultralytics/data/converter.py b/ultralytics/data/converter.py
index 4451df17..27ceb578 100644
--- a/ultralytics/data/converter.py
+++ b/ultralytics/data/converter.py
@@ -1,14 +1,14 @@
# Ultralytics YOLO 🚀, AGPL-3.0 license
import json
-import shutil
from collections import defaultdict
from pathlib import Path
import cv2
import numpy as np
-from ultralytics.utils import TQDM
+from ultralytics.utils import LOGGER, TQDM
+from ultralytics.utils.files import increment_path
def coco91_to_coco80_class():
@@ -48,12 +48,12 @@ def coco80_to_coco91_class(): #
def convert_coco(labels_dir='../coco/annotations/',
- save_dir='.',
+ save_dir='coco_converted/',
use_segments=False,
use_keypoints=False,
cls91to80=True):
"""
- Converts COCO dataset annotations to a format suitable for training YOLOv5 models.
+ Converts COCO dataset annotations to a YOLO annotation format suitable for training YOLO models.
Args:
labels_dir (str, optional): Path to directory containing COCO dataset annotation files.
@@ -74,9 +74,7 @@ def convert_coco(labels_dir='../coco/annotations/',
"""
# Create dataset directory
- save_dir = Path(save_dir)
- if save_dir.exists():
- shutil.rmtree(save_dir) # delete dir
+ save_dir = increment_path(save_dir) # increment if save directory already exists
for p in save_dir / 'labels', save_dir / 'images':
p.mkdir(parents=True, exist_ok=True) # make dir
@@ -147,6 +145,8 @@ def convert_coco(labels_dir='../coco/annotations/',
if use_segments and len(segments[i]) > 0 else bboxes[i]), # cls, box or segments
file.write(('%g ' * len(line)).rstrip() % line + '\n')
+ LOGGER.info(f'COCO data converted successfully.\nResults saved to {save_dir.resolve()}')
+
def convert_dota_to_yolo_obb(dota_root_path: str):
"""
@@ -271,26 +271,25 @@ def merge_multi_segment(segments):
segments = [np.array(i).reshape(-1, 2) for i in segments]
idx_list = [[] for _ in range(len(segments))]
- # record the indexes with min distance between each segment
+ # Record the indexes with min distance between each segment
for i in range(1, len(segments)):
idx1, idx2 = min_index(segments[i - 1], segments[i])
idx_list[i - 1].append(idx1)
idx_list[i].append(idx2)
- # use two round to connect all the segments
+ # Use two round to connect all the segments
for k in range(2):
- # forward connection
+ # Forward connection
if k == 0:
for i, idx in enumerate(idx_list):
- # middle segments have two indexes
- # reverse the index of middle segments
+ # Middle segments have two indexes, reverse the index of middle segments
if len(idx) == 2 and idx[0] > idx[1]:
idx = idx[::-1]
segments[i] = segments[i][::-1, :]
segments[i] = np.roll(segments[i], -idx[0], axis=0)
segments[i] = np.concatenate([segments[i], segments[i][:1]])
- # deal with the first segment and the last one
+ # Deal with the first segment and the last one
if i in [0, len(idx_list) - 1]:
s.append(segments[i])
else:
diff --git a/ultralytics/data/dataset.py b/ultralytics/data/dataset.py
index f0f431ea..068311ef 100644
--- a/ultralytics/data/dataset.py
+++ b/ultralytics/data/dataset.py
@@ -162,7 +162,7 @@ class YOLODataset(BaseDataset):
def update_labels_info(self, label):
"""Custom your label format here."""
# NOTE: cls is not with bboxes now, classification and semantic segmentation need an independent cls label
- # we can make it also support classification and semantic segmentation by add or remove some dict keys there.
+ # We can make it also support classification and semantic segmentation by add or remove some dict keys there.
bboxes = label.pop('bboxes')
segments = label.pop('segments')
keypoints = label.pop('keypoints', None)
diff --git a/ultralytics/engine/trainer.py b/ultralytics/engine/trainer.py
index 6905847a..9b953080 100644
--- a/ultralytics/engine/trainer.py
+++ b/ultralytics/engine/trainer.py
@@ -526,7 +526,7 @@ class BaseTrainer:
# TODO: may need to put these following functions into callback
def plot_training_samples(self, batch, ni):
- """Plots training samples during YOLOv5 training."""
+ """Plots training samples during YOLO training."""
pass
def plot_training_labels(self):
diff --git a/ultralytics/hub/session.py b/ultralytics/hub/session.py
index 9870ca16..d2fd89a9 100644
--- a/ultralytics/hub/session.py
+++ b/ultralytics/hub/session.py
@@ -23,7 +23,7 @@ class HUBTrainingSession:
Attributes:
agent_id (str): Identifier for the instance communicating with the server.
- model_id (str): Identifier for the YOLOv5 model being trained.
+ model_id (str): Identifier for the YOLO model being trained.
model_url (str): URL for the model in Ultralytics HUB.
api_url (str): API URL for the model in Ultralytics HUB.
auth_header (dict): Authentication header for the Ultralytics HUB API requests.
diff --git a/ultralytics/models/fastsam/prompt.py b/ultralytics/models/fastsam/prompt.py
index d381075f..72ea5ef8 100644
--- a/ultralytics/models/fastsam/prompt.py
+++ b/ultralytics/models/fastsam/prompt.py
@@ -120,7 +120,7 @@ class FastSAMPrompt:
result_name = os.path.basename(ann.path)
image = ann.orig_img[..., ::-1] # BGR to RGB
original_h, original_w = ann.orig_shape
- # for macOS only
+ # For macOS only
# plt.switch_backend('TkAgg')
plt.figure(figsize=(original_w / 100, original_h / 100))
# Add subplot with no margin.
diff --git a/ultralytics/models/fastsam/utils.py b/ultralytics/models/fastsam/utils.py
index e99fd623..480e9039 100644
--- a/ultralytics/models/fastsam/utils.py
+++ b/ultralytics/models/fastsam/utils.py
@@ -42,23 +42,23 @@ def bbox_iou(box1, boxes, iou_thres=0.9, image_shape=(640, 640), raw_output=Fals
high_iou_indices (torch.Tensor): Indices of boxes with IoU > thres
"""
boxes = adjust_bboxes_to_image_border(boxes, image_shape)
- # obtain coordinates for intersections
+ # Obtain coordinates for intersections
x1 = torch.max(box1[0], boxes[:, 0])
y1 = torch.max(box1[1], boxes[:, 1])
x2 = torch.min(box1[2], boxes[:, 2])
y2 = torch.min(box1[3], boxes[:, 3])
- # compute the area of intersection
+ # Compute the area of intersection
intersection = (x2 - x1).clamp(0) * (y2 - y1).clamp(0)
- # compute the area of both individual boxes
+ # Compute the area of both individual boxes
box1_area = (box1[2] - box1[0]) * (box1[3] - box1[1])
box2_area = (boxes[:, 2] - boxes[:, 0]) * (boxes[:, 3] - boxes[:, 1])
- # compute the area of union
+ # Compute the area of union
union = box1_area + box2_area - intersection
- # compute the IoU
+ # Compute the IoU
iou = intersection / union # Should be shape (n, )
if raw_output:
return 0 if iou.numel() == 0 else iou
diff --git a/ultralytics/models/rtdetr/val.py b/ultralytics/models/rtdetr/val.py
index a6af67ab..468026be 100644
--- a/ultralytics/models/rtdetr/val.py
+++ b/ultralytics/models/rtdetr/val.py
@@ -99,10 +99,10 @@ class RTDETRValidator(DetectionValidator):
for i, bbox in enumerate(bboxes): # (300, 4)
bbox = ops.xywh2xyxy(bbox)
score, cls = scores[i].max(-1) # (300, )
- # Do not need threshold for evaluation as only got 300 boxes here.
+ # Do not need threshold for evaluation as only got 300 boxes here
# idx = score > self.args.conf
pred = torch.cat([bbox, score[..., None], cls[..., None]], dim=-1) # filter
- # sort by confidence to correctly get internal metrics.
+ # Sort by confidence to correctly get internal metrics
pred = pred[score.argsort(descending=True)]
outputs[i] = pred # [idx]
diff --git a/ultralytics/models/sam/modules/encoders.py b/ultralytics/models/sam/modules/encoders.py
index 67b35970..30e6014e 100644
--- a/ultralytics/models/sam/modules/encoders.py
+++ b/ultralytics/models/sam/modules/encoders.py
@@ -304,11 +304,11 @@ class PositionEmbeddingRandom(nn.Module):
def _pe_encoding(self, coords: torch.Tensor) -> torch.Tensor:
"""Positionally encode points that are normalized to [0,1]."""
- # assuming coords are in [0, 1]^2 square and have d_1 x ... x d_n x 2 shape
+ # Assuming coords are in [0, 1]^2 square and have d_1 x ... x d_n x 2 shape
coords = 2 * coords - 1
coords = coords @ self.positional_encoding_gaussian_matrix
coords = 2 * np.pi * coords
- # outputs d_1 x ... x d_n x C shape
+ # Outputs d_1 x ... x d_n x C shape
return torch.cat([torch.sin(coords), torch.cos(coords)], dim=-1)
def forward(self, size: Tuple[int, int]) -> torch.Tensor:
@@ -429,7 +429,7 @@ class Attention(nn.Module):
self.use_rel_pos = use_rel_pos
if self.use_rel_pos:
assert (input_size is not None), 'Input size must be provided if using relative positional encoding.'
- # initialize relative positional embeddings
+ # Initialize relative positional embeddings
self.rel_pos_h = nn.Parameter(torch.zeros(2 * input_size[0] - 1, head_dim))
self.rel_pos_w = nn.Parameter(torch.zeros(2 * input_size[1] - 1, head_dim))
diff --git a/ultralytics/models/sam/modules/tiny_encoder.py b/ultralytics/models/sam/modules/tiny_encoder.py
index 595286fe..9955a261 100644
--- a/ultralytics/models/sam/modules/tiny_encoder.py
+++ b/ultralytics/models/sam/modules/tiny_encoder.py
@@ -172,7 +172,7 @@ class ConvLayer(nn.Module):
self.depth = depth
self.use_checkpoint = use_checkpoint
- # build blocks
+ # Build blocks
self.blocks = nn.ModuleList([
MBConv(
dim,
@@ -182,7 +182,7 @@ class ConvLayer(nn.Module):
drop_path[i] if isinstance(drop_path, list) else drop_path,
) for i in range(depth)])
- # patch merging layer
+ # Patch merging layer
self.downsample = None if downsample is None else downsample(
input_resolution, dim=dim, out_dim=out_dim, activation=activation)
@@ -393,11 +393,11 @@ class TinyViTBlock(nn.Module):
pH, pW = H + pad_b, W + pad_r
nH = pH // self.window_size
nW = pW // self.window_size
- # window partition
+ # Window partition
x = x.view(B, nH, self.window_size, nW, self.window_size,
C).transpose(2, 3).reshape(B * nH * nW, self.window_size * self.window_size, C)
x = self.attn(x)
- # window reverse
+ # Window reverse
x = x.view(B, nH, nW, self.window_size, self.window_size, C).transpose(2, 3).reshape(B, pH, pW, C)
if padding:
@@ -467,7 +467,7 @@ class BasicLayer(nn.Module):
self.depth = depth
self.use_checkpoint = use_checkpoint
- # build blocks
+ # Build blocks
self.blocks = nn.ModuleList([
TinyViTBlock(
dim=dim,
@@ -481,7 +481,7 @@ class BasicLayer(nn.Module):
activation=activation,
) for i in range(depth)])
- # patch merging layer
+ # Patch merging layer
self.downsample = None if downsample is None else downsample(
input_resolution, dim=dim, out_dim=out_dim, activation=activation)
@@ -593,10 +593,10 @@ class TinyViT(nn.Module):
patches_resolution = self.patch_embed.patches_resolution
self.patches_resolution = patches_resolution
- # stochastic depth
+ # Stochastic depth
dpr = [x.item() for x in torch.linspace(0, drop_path_rate, sum(depths))] # stochastic depth decay rule
- # build layers
+ # Build layers
self.layers = nn.ModuleList()
for i_layer in range(self.num_layers):
kwargs = dict(
@@ -628,7 +628,7 @@ class TinyViT(nn.Module):
self.norm_head = nn.LayerNorm(embed_dims[-1])
self.head = nn.Linear(embed_dims[-1], num_classes) if num_classes > 0 else torch.nn.Identity()
- # init weights
+ # Init weights
self.apply(self._init_weights)
self.set_layer_lr_decay(layer_lr_decay)
self.neck = nn.Sequential(
@@ -653,7 +653,7 @@ class TinyViT(nn.Module):
"""Sets the learning rate decay for each layer in the TinyViT model."""
decay_rate = layer_lr_decay
- # layers -> blocks (depth)
+ # Layers -> blocks (depth)
depth = sum(self.depths)
lr_scales = [decay_rate ** (depth - i - 1) for i in range(depth)]
diff --git a/ultralytics/models/sam/predict.py b/ultralytics/models/sam/predict.py
index 1a3b8dfb..26a49b54 100644
--- a/ultralytics/models/sam/predict.py
+++ b/ultralytics/models/sam/predict.py
@@ -414,8 +414,7 @@ class Predictor(BasePredictor):
unchanged = unchanged and not changed
new_masks.append(torch.as_tensor(mask).unsqueeze(0))
- # Give score=0 to changed masks and score=1 to unchanged masks
- # so NMS will prefer ones that didn't need postprocessing
+ # Give score=0 to changed masks and 1 to unchanged masks so NMS prefers masks not needing postprocessing
scores.append(float(unchanged))
# Recalculate boxes and remove any new duplicates
diff --git a/ultralytics/models/utils/loss.py b/ultralytics/models/utils/loss.py
index a61d20a9..abb54958 100644
--- a/ultralytics/models/utils/loss.py
+++ b/ultralytics/models/utils/loss.py
@@ -66,7 +66,7 @@ class DETRLoss(nn.Module):
def _get_loss_class(self, pred_scores, targets, gt_scores, num_gts, postfix=''):
"""Computes the classification loss based on predictions, target values, and ground truth scores."""
- # logits: [b, query, num_classes], gt_class: list[[n, 1]]
+ # Logits: [b, query, num_classes], gt_class: list[[n, 1]]
name_class = f'loss_class{postfix}'
bs, nq = pred_scores.shape[:2]
# one_hot = F.one_hot(targets, self.nc + 1)[..., :-1] # (bs, num_queries, num_classes)
@@ -90,7 +90,7 @@ class DETRLoss(nn.Module):
"""Calculates and returns the bounding box loss and GIoU loss for the predicted and ground truth bounding
boxes.
"""
- # boxes: [b, query, 4], gt_bbox: list[[n, 4]]
+ # Boxes: [b, query, 4], gt_bbox: list[[n, 4]]
name_bbox = f'loss_bbox{postfix}'
name_giou = f'loss_giou{postfix}'
diff --git a/ultralytics/models/utils/ops.py b/ultralytics/models/utils/ops.py
index 99357d19..902756db 100644
--- a/ultralytics/models/utils/ops.py
+++ b/ultralytics/models/utils/ops.py
@@ -188,7 +188,7 @@ def get_cdn_group(batch,
num_group = num_dn // max_nums
num_group = 1 if num_group == 0 else num_group
- # pad gt to max_num of a batch
+ # Pad gt to max_num of a batch
bs = len(gt_groups)
gt_cls = batch['cls'] # (bs*num, )
gt_bbox = batch['bboxes'] # bs*num, 4
@@ -204,10 +204,10 @@ def get_cdn_group(batch,
neg_idx = torch.arange(total_num * num_group, dtype=torch.long, device=gt_bbox.device) + num_group * total_num
if cls_noise_ratio > 0:
- # half of bbox prob
+ # Half of bbox prob
mask = torch.rand(dn_cls.shape) < (cls_noise_ratio * 0.5)
idx = torch.nonzero(mask).squeeze(-1)
- # randomly put a new one here
+ # Randomly put a new one here
new_label = torch.randint_like(idx, 0, num_classes, dtype=dn_cls.dtype, device=dn_cls.device)
dn_cls[idx] = new_label
@@ -240,9 +240,9 @@ def get_cdn_group(batch,
tgt_size = num_dn + num_queries
attn_mask = torch.zeros([tgt_size, tgt_size], dtype=torch.bool)
- # match query cannot see the reconstruct
+ # Match query cannot see the reconstruct
attn_mask[num_dn:, :num_dn] = True
- # reconstruct cannot see each other
+ # Reconstruct cannot see each other
for i in range(num_group):
if i == 0:
attn_mask[max_nums * 2 * i:max_nums * 2 * (i + 1), max_nums * 2 * (i + 1):num_dn] = True
diff --git a/ultralytics/nn/modules/block.py b/ultralytics/nn/modules/block.py
index 9a65395f..778dcec3 100644
--- a/ultralytics/nn/modules/block.py
+++ b/ultralytics/nn/modules/block.py
@@ -307,7 +307,7 @@ class Bottleneck(nn.Module):
self.add = shortcut and c1 == c2
def forward(self, x):
- """'forward()' applies the YOLOv5 FPN to input data."""
+ """'forward()' applies the YOLO FPN to input data."""
return x + self.cv2(self.cv1(x)) if self.add else self.cv2(self.cv1(x))
diff --git a/ultralytics/nn/modules/head.py b/ultralytics/nn/modules/head.py
index 5ac4e73c..95c6457f 100644
--- a/ultralytics/nn/modules/head.py
+++ b/ultralytics/nn/modules/head.py
@@ -192,7 +192,7 @@ class RTDETRDecoder(nn.Module):
dropout=0.,
act=nn.ReLU(),
eval_idx=-1,
- # training args
+ # Training args
nd=100, # num denoising
label_noise_ratio=0.5,
box_noise_scale=1.0,
@@ -225,7 +225,7 @@ class RTDETRDecoder(nn.Module):
self.num_queries = nq
self.num_decoder_layers = ndl
- # backbone feature projection
+ # Backbone feature projection
self.input_proj = nn.ModuleList(nn.Sequential(nn.Conv2d(x, hd, 1, bias=False), nn.BatchNorm2d(hd)) for x in ch)
# NOTE: simplified version but it's not consistent with .pt weights.
# self.input_proj = nn.ModuleList(Conv(x, hd, act=False) for x in ch)
@@ -234,24 +234,24 @@ class RTDETRDecoder(nn.Module):
decoder_layer = DeformableTransformerDecoderLayer(hd, nh, d_ffn, dropout, act, self.nl, ndp)
self.decoder = DeformableTransformerDecoder(hd, decoder_layer, ndl, eval_idx)
- # denoising part
+ # Denoising part
self.denoising_class_embed = nn.Embedding(nc, hd)
self.num_denoising = nd
self.label_noise_ratio = label_noise_ratio
self.box_noise_scale = box_noise_scale
- # decoder embedding
+ # Decoder embedding
self.learnt_init_query = learnt_init_query
if learnt_init_query:
self.tgt_embed = nn.Embedding(nq, hd)
self.query_pos_head = MLP(4, 2 * hd, hd, num_layers=2)
- # encoder head
+ # Encoder head
self.enc_output = nn.Sequential(nn.Linear(hd, hd), nn.LayerNorm(hd))
self.enc_score_head = nn.Linear(hd, nc)
self.enc_bbox_head = MLP(hd, hd, 4, num_layers=3)
- # decoder head
+ # Decoder head
self.dec_score_head = nn.ModuleList([nn.Linear(hd, nc) for _ in range(ndl)])
self.dec_bbox_head = nn.ModuleList([MLP(hd, hd, 4, num_layers=3) for _ in range(ndl)])
@@ -261,10 +261,10 @@ class RTDETRDecoder(nn.Module):
"""Runs the forward pass of the module, returning bounding box and classification scores for the input."""
from ultralytics.models.utils.ops import get_cdn_group
- # input projection and embedding
+ # Input projection and embedding
feats, shapes = self._get_encoder_input(x)
- # prepare denoising training
+ # Prepare denoising training
dn_embed, dn_bbox, attn_mask, dn_meta = \
get_cdn_group(batch,
self.nc,
@@ -278,7 +278,7 @@ class RTDETRDecoder(nn.Module):
embed, refer_bbox, enc_bboxes, enc_scores = \
self._get_decoder_input(feats, shapes, dn_embed, dn_bbox)
- # decoder
+ # Decoder
dec_bboxes, dec_scores = self.decoder(embed,
refer_bbox,
feats,
@@ -316,9 +316,9 @@ class RTDETRDecoder(nn.Module):
def _get_encoder_input(self, x):
"""Processes and returns encoder inputs by getting projection features from input and concatenating them."""
- # get projection features
+ # Get projection features
x = [self.input_proj[i](feat) for i, feat in enumerate(x)]
- # get encoder inputs
+ # Get encoder inputs
feats = []
shapes = []
for feat in x:
@@ -335,13 +335,13 @@ class RTDETRDecoder(nn.Module):
def _get_decoder_input(self, feats, shapes, dn_embed=None, dn_bbox=None):
"""Generates and prepares the input required for the decoder from the provided features and shapes."""
bs = len(feats)
- # prepare input for decoder
+ # Prepare input for decoder
anchors, valid_mask = self._generate_anchors(shapes, dtype=feats.dtype, device=feats.device)
features = self.enc_output(valid_mask * feats) # bs, h*w, 256
enc_outputs_scores = self.enc_score_head(features) # (bs, h*w, nc)
- # query selection
+ # Query selection
# (bs, num_queries)
topk_ind = torch.topk(enc_outputs_scores.max(-1).values, self.num_queries, dim=1).indices.view(-1)
# (bs, num_queries)
@@ -352,7 +352,7 @@ class RTDETRDecoder(nn.Module):
# (bs, num_queries, 4)
top_k_anchors = anchors[:, topk_ind].view(bs, self.num_queries, -1)
- # dynamic anchors + static content
+ # Dynamic anchors + static content
refer_bbox = self.enc_bbox_head(top_k_features) + top_k_anchors
enc_bboxes = refer_bbox.sigmoid()
@@ -373,7 +373,7 @@ class RTDETRDecoder(nn.Module):
# TODO
def _reset_parameters(self):
"""Initializes or resets the parameters of the model's various components with predefined weights and biases."""
- # class and bbox head init
+ # Class and bbox head init
bias_cls = bias_init_with_prob(0.01) / 80 * self.nc
# NOTE: the weight initialization in `linear_init_` would cause NaN when training with custom datasets.
# linear_init_(self.enc_score_head)
diff --git a/ultralytics/nn/modules/transformer.py b/ultralytics/nn/modules/transformer.py
index 4b7c0868..493ca3c6 100644
--- a/ultralytics/nn/modules/transformer.py
+++ b/ultralytics/nn/modules/transformer.py
@@ -81,7 +81,7 @@ class AIFI(TransformerEncoderLayer):
"""Forward pass for the AIFI transformer layer."""
c, h, w = x.shape[1:]
pos_embed = self.build_2d_sincos_position_embedding(w, h, c)
- # flatten [B, C, H, W] to [B, HxW, C]
+ # Flatten [B, C, H, W] to [B, HxW, C]
x = super().forward(x.flatten(2).permute(0, 2, 1), pos=pos_embed.to(device=x.device, dtype=x.dtype))
return x.permute(0, 2, 1).view([-1, c, h, w]).contiguous()
@@ -213,7 +213,7 @@ class MSDeformAttn(nn.Module):
if d_model % n_heads != 0:
raise ValueError(f'd_model must be divisible by n_heads, but got {d_model} and {n_heads}')
_d_per_head = d_model // n_heads
- # you'd better set _d_per_head to a power of 2 which is more efficient in our CUDA implementation
+ # Better to set _d_per_head to a power of 2 which is more efficient in a CUDA implementation
assert _d_per_head * n_heads == d_model, '`d_model` must be divisible by `n_heads`'
self.im2col_step = 64
diff --git a/ultralytics/nn/tasks.py b/ultralytics/nn/tasks.py
index 7633a73c..55aa51a3 100644
--- a/ultralytics/nn/tasks.py
+++ b/ultralytics/nn/tasks.py
@@ -277,7 +277,7 @@ class DetectionModel(BaseModel):
return torch.cat((x, y, wh, cls), dim)
def _clip_augmented(self, y):
- """Clip YOLOv5 augmented inference tails."""
+ """Clip YOLO augmented inference tails."""
nl = self.model[-1].nl # number of detection layers (P3-P5)
g = sum(4 ** x for x in range(nl)) # grid points
e = 1 # exclude layer count
@@ -491,7 +491,7 @@ class Ensemble(nn.ModuleList):
super().__init__()
def forward(self, x, augment=False, profile=False, visualize=False):
- """Function generates the YOLOv5 network's final layer."""
+ """Function generates the YOLO network's final layer."""
y = [module(x, augment, profile, visualize)[0] for module in self]
# y = torch.stack(y).max(0)[0] # max ensemble
# y = torch.stack(y).mean(0) # mean ensemble
diff --git a/ultralytics/utils/callbacks/mlflow.py b/ultralytics/utils/callbacks/mlflow.py
index efd580b3..3e8ddc33 100644
--- a/ultralytics/utils/callbacks/mlflow.py
+++ b/ultralytics/utils/callbacks/mlflow.py
@@ -38,7 +38,7 @@ def on_pretrain_routine_end(trainer):
if not active_run:
active_run = mlflow.start_run(experiment_id=experiment.experiment_id, run_name=run_name)
LOGGER.info(f'{prefix}Using run_id({active_run.info.run_id}) at {mlflow_location}')
- run.log_params(vars(trainer.model.args))
+ run.log_params(trainer.args)
except Exception as err:
LOGGER.error(f'{prefix}Failing init - {repr(err)}')
LOGGER.warning(f'{prefix}Continuing without Mlflow')
diff --git a/ultralytics/utils/callbacks/wb.py b/ultralytics/utils/callbacks/wb.py
index b901e3cc..32842a3d 100644
--- a/ultralytics/utils/callbacks/wb.py
+++ b/ultralytics/utils/callbacks/wb.py
@@ -8,7 +8,10 @@ try:
assert SETTINGS['wandb'] is True # verify integration is enabled
import wandb as wb
- assert hasattr(wb, '__version__')
+ assert hasattr(wb, '__version__') # verify package is not directory
+
+ import numpy as np
+ import pandas as pd
_processed_plots = {}
@@ -16,6 +19,83 @@ except (ImportError, AssertionError):
wb = None
+def _custom_table(x, y, classes, title='Precision Recall Curve', x_axis_title='Recall', y_axis_title='Precision'):
+ """
+ Create and log a custom metric visualization to wandb.plot.pr_curve.
+
+ This function crafts a custom metric visualization that mimics the behavior of wandb's default precision-recall curve
+ while allowing for enhanced customization. The visual metric is useful for monitoring model performance across different classes.
+
+ Args:
+ x (List): Values for the x-axis; expected to have length N.
+ y (List): Corresponding values for the y-axis; also expected to have length N.
+ classes (List): Labels identifying the class of each point; length N.
+ title (str, optional): Title for the plot; defaults to 'Precision Recall Curve'.
+ x_title (str, optional): Label for the x-axis; defaults to 'Recall'.
+ y_title (str, optional): Label for the y-axis; defaults to 'Precision'.
+
+ Returns:
+ (wandb.Object): A wandb object suitable for logging, showcasing the crafted metric visualization.
+ """
+ df = pd.DataFrame({'class': classes, 'y': y, 'x': x}).round(3)
+ fields = {'x': 'x', 'y': 'y', 'class': 'class'}
+ string_fields = {'title': title, 'x-axis-title': x_axis_title, 'y-axis-title': y_axis_title}
+ return wb.plot_table('wandb/area-under-curve/v0',
+ wb.Table(dataframe=df),
+ fields=fields,
+ string_fields=string_fields)
+
+
+def _plot_curve(x,
+ y,
+ names=None,
+ id='precision-recall',
+ title='Precision Recall Curve',
+ x_title='Recall',
+ y_title='Precision',
+ num_x=100,
+ only_mean=False):
+ """
+ Log a metric curve visualization.
+
+ This function generates a metric curve based on input data and logs the visualization to wandb.
+ The curve can represent aggregated data (mean) or individual class data, depending on the 'only_mean' flag.
+
+ Args:
+ x (np.ndarray): Data points for the x-axis with length N.
+ y (np.ndarray): Corresponding data points for the y-axis with shape CxN, where C represents the number of classes.
+ names (list, optional): Names of the classes corresponding to the y-axis data; length C. Defaults to an empty list.
+ id (str, optional): Unique identifier for the logged data in wandb. Defaults to 'precision-recall'.
+ title (str, optional): Title for the visualization plot. Defaults to 'Precision Recall Curve'.
+ x_title (str, optional): Label for the x-axis. Defaults to 'Recall'.
+ y_title (str, optional): Label for the y-axis. Defaults to 'Precision'.
+ num_x (int, optional): Number of interpolated data points for visualization. Defaults to 100.
+ only_mean (bool, optional): Flag to indicate if only the mean curve should be plotted. Defaults to True.
+
+ Note:
+ The function leverages the '_custom_table' function to generate the actual visualization.
+ """
+ # Create new x
+ if names is None:
+ names = []
+ x_new = np.linspace(x[0], x[-1], num_x).round(5)
+
+ # Create arrays for logging
+ x_log = x_new.tolist()
+ y_log = np.interp(x_new, x, np.mean(y, axis=0)).round(3).tolist()
+
+ if only_mean:
+ table = wb.Table(data=list(zip(x_log, y_log)), columns=[x_title, y_title])
+ wb.run.log({title: wb.plot.line(table, x_title, y_title, title=title)})
+ else:
+ classes = ['mean'] * len(x_log)
+ for i, yi in enumerate(y):
+ x_log.extend(x_new) # add new x
+ y_log.extend(np.interp(x_new, x, yi)) # interpolate y to new x
+ classes.extend([names[i]] * len(x_new)) # add class names
+ wb.log({id: _custom_table(x_log, y_log, classes, title, x_title, y_title)}, commit=False)
+
+
def _log_plots(plots, step):
"""Logs plots from the input dictionary if they haven't been logged already at the specified step."""
for name, params in plots.items():
@@ -55,6 +135,17 @@ def on_train_end(trainer):
if trainer.best.exists():
art.add_file(trainer.best)
wb.run.log_artifact(art, aliases=['best'])
+ for curve_name, curve_values in zip(trainer.validator.metrics.curves, trainer.validator.metrics.curves_results):
+ x, y, x_title, y_title = curve_values
+ _plot_curve(
+ x,
+ y,
+ names=list(trainer.validator.metrics.names.values()),
+ id=f'curves/{curve_name}',
+ title=curve_name,
+ x_title=x_title,
+ y_title=y_title,
+ )
wb.run.finish() # required or run continues on dashboard
diff --git a/ultralytics/utils/checks.py b/ultralytics/utils/checks.py
index 184ce06e..a8c1608a 100644
--- a/ultralytics/utils/checks.py
+++ b/ultralytics/utils/checks.py
@@ -165,16 +165,16 @@ def check_version(current: str = '0.0.0',
Example:
```python
- # check if current version is exactly 22.04
+ # Check if current version is exactly 22.04
check_version(current='22.04', required='==22.04')
- # check if current version is greater than or equal to 22.04
+ # Check if current version is greater than or equal to 22.04
check_version(current='22.10', required='22.04') # assumes '>=' inequality if none passed
- # check if current version is less than or equal to 22.04
+ # Check if current version is less than or equal to 22.04
check_version(current='22.04', required='<=22.04')
- # check if current version is between 20.04 (inclusive) and 22.04 (exclusive)
+ # Check if current version is between 20.04 (inclusive) and 22.04 (exclusive)
check_version(current='21.10', required='>20.04,<22.04')
```
"""
diff --git a/ultralytics/utils/files.py b/ultralytics/utils/files.py
index 82e8ebfe..dee298c7 100644
--- a/ultralytics/utils/files.py
+++ b/ultralytics/utils/files.py
@@ -45,7 +45,7 @@ def spaces_in_path(path):
with ultralytics.utils.files import spaces_in_path
with spaces_in_path('/path/with spaces') as new_path:
- # your code here
+ # Your code here
```
"""
diff --git a/ultralytics/utils/instance.py b/ultralytics/utils/instance.py
index 3f57a094..7df1453d 100644
--- a/ultralytics/utils/instance.py
+++ b/ultralytics/utils/instance.py
@@ -219,7 +219,7 @@ class Instances:
self.normalized = normalized
if len(segments) > 0:
- # list[np.array(1000, 2)] * num_samples
+ # List[np.array(1000, 2)] * num_samples
segments = resample_segments(segments)
# (N, 1000, 2)
segments = np.stack(segments, axis=0)
diff --git a/ultralytics/utils/loss.py b/ultralytics/utils/loss.py
index f76960d6..916f91c5 100644
--- a/ultralytics/utils/loss.py
+++ b/ultralytics/utils/loss.py
@@ -176,13 +176,13 @@ class v8DetectionLoss:
imgsz = torch.tensor(feats[0].shape[2:], device=self.device, dtype=dtype) * self.stride[0] # image size (h,w)
anchor_points, stride_tensor = make_anchors(feats, self.stride, 0.5)
- # targets
+ # Targets
targets = torch.cat((batch['batch_idx'].view(-1, 1), batch['cls'].view(-1, 1), batch['bboxes']), 1)
targets = self.preprocess(targets.to(self.device), batch_size, scale_tensor=imgsz[[1, 0, 1, 0]])
gt_labels, gt_bboxes = targets.split((1, 4), 2) # cls, xyxy
mask_gt = gt_bboxes.sum(2, keepdim=True).gt_(0)
- # pboxes
+ # Pboxes
pred_bboxes = self.bbox_decode(anchor_points, pred_distri) # xyxy, (b, h*w, 4)
_, target_bboxes, target_scores, fg_mask, _ = self.assigner(
@@ -191,11 +191,11 @@ class v8DetectionLoss:
target_scores_sum = max(target_scores.sum(), 1)
- # cls loss
+ # Cls loss
# loss[1] = self.varifocal_loss(pred_scores, target_scores, target_labels) / target_scores_sum # VFL way
loss[1] = self.bce(pred_scores, target_scores.to(dtype)).sum() / target_scores_sum # BCE
- # bbox loss
+ # Bbox loss
if fg_mask.sum():
target_bboxes /= stride_tensor
loss[0], loss[2] = self.bbox_loss(pred_distri, pred_bboxes, anchor_points, target_bboxes, target_scores,
@@ -224,7 +224,7 @@ class v8SegmentationLoss(v8DetectionLoss):
pred_distri, pred_scores = torch.cat([xi.view(feats[0].shape[0], self.no, -1) for xi in feats], 2).split(
(self.reg_max * 4, self.nc), 1)
- # b, grids, ..
+ # B, grids, ..
pred_scores = pred_scores.permute(0, 2, 1).contiguous()
pred_distri = pred_distri.permute(0, 2, 1).contiguous()
pred_masks = pred_masks.permute(0, 2, 1).contiguous()
@@ -233,7 +233,7 @@ class v8SegmentationLoss(v8DetectionLoss):
imgsz = torch.tensor(feats[0].shape[2:], device=self.device, dtype=dtype) * self.stride[0] # image size (h,w)
anchor_points, stride_tensor = make_anchors(feats, self.stride, 0.5)
- # targets
+ # Targets
try:
batch_idx = batch['batch_idx'].view(-1, 1)
targets = torch.cat((batch_idx, batch['cls'].view(-1, 1), batch['bboxes']), 1)
@@ -247,7 +247,7 @@ class v8SegmentationLoss(v8DetectionLoss):
"correctly formatted 'segment' dataset using 'data=coco128-seg.yaml' "
'as an example.\nSee https://docs.ultralytics.com/tasks/segment/ for help.') from e
- # pboxes
+ # Pboxes
pred_bboxes = self.bbox_decode(anchor_points, pred_distri) # xyxy, (b, h*w, 4)
_, target_bboxes, target_scores, fg_mask, target_gt_idx = self.assigner(
@@ -256,15 +256,15 @@ class v8SegmentationLoss(v8DetectionLoss):
target_scores_sum = max(target_scores.sum(), 1)
- # cls loss
+ # Cls loss
# loss[1] = self.varifocal_loss(pred_scores, target_scores, target_labels) / target_scores_sum # VFL way
loss[2] = self.bce(pred_scores, target_scores.to(dtype)).sum() / target_scores_sum # BCE
if fg_mask.sum():
- # bbox loss
+ # Bbox loss
loss[0], loss[3] = self.bbox_loss(pred_distri, pred_bboxes, anchor_points, target_bboxes / stride_tensor,
target_scores, target_scores_sum, fg_mask)
- # masks loss
+ # Masks loss
masks = batch['masks'].to(self.device).float()
if tuple(masks.shape[-2:]) != (mask_h, mask_w): # downsample
masks = F.interpolate(masks[None], (mask_h, mask_w), mode='nearest')[0]
@@ -344,13 +344,13 @@ class v8SegmentationLoss(v8DetectionLoss):
_, _, mask_h, mask_w = proto.shape
loss = 0
- # normalize to 0-1
+ # Normalize to 0-1
target_bboxes_normalized = target_bboxes / imgsz[[1, 0, 1, 0]]
- # areas of target bboxes
+ # Areas of target bboxes
marea = xyxy2xywh(target_bboxes_normalized)[..., 2:].prod(2)
- # normalize to mask size
+ # Normalize to mask size
mxyxy = target_bboxes_normalized * torch.tensor([mask_w, mask_h, mask_w, mask_h], device=proto.device)
for i, single_i in enumerate(zip(fg_mask, target_gt_idx, pred_masks, proto, mxyxy, marea, masks)):
@@ -393,7 +393,7 @@ class v8PoseLoss(v8DetectionLoss):
pred_distri, pred_scores = torch.cat([xi.view(feats[0].shape[0], self.no, -1) for xi in feats], 2).split(
(self.reg_max * 4, self.nc), 1)
- # b, grids, ..
+ # B, grids, ..
pred_scores = pred_scores.permute(0, 2, 1).contiguous()
pred_distri = pred_distri.permute(0, 2, 1).contiguous()
pred_kpts = pred_kpts.permute(0, 2, 1).contiguous()
@@ -402,7 +402,7 @@ class v8PoseLoss(v8DetectionLoss):
imgsz = torch.tensor(feats[0].shape[2:], device=self.device, dtype=dtype) * self.stride[0] # image size (h,w)
anchor_points, stride_tensor = make_anchors(feats, self.stride, 0.5)
- # targets
+ # Targets
batch_size = pred_scores.shape[0]
batch_idx = batch['batch_idx'].view(-1, 1)
targets = torch.cat((batch_idx, batch['cls'].view(-1, 1), batch['bboxes']), 1)
@@ -410,7 +410,7 @@ class v8PoseLoss(v8DetectionLoss):
gt_labels, gt_bboxes = targets.split((1, 4), 2) # cls, xyxy
mask_gt = gt_bboxes.sum(2, keepdim=True).gt_(0)
- # pboxes
+ # Pboxes
pred_bboxes = self.bbox_decode(anchor_points, pred_distri) # xyxy, (b, h*w, 4)
pred_kpts = self.kpts_decode(anchor_points, pred_kpts.view(batch_size, -1, *self.kpt_shape)) # (b, h*w, 17, 3)
@@ -420,11 +420,11 @@ class v8PoseLoss(v8DetectionLoss):
target_scores_sum = max(target_scores.sum(), 1)
- # cls loss
+ # Cls loss
# loss[1] = self.varifocal_loss(pred_scores, target_scores, target_labels) / target_scores_sum # VFL way
loss[3] = self.bce(pred_scores, target_scores.to(dtype)).sum() / target_scores_sum # BCE
- # bbox loss
+ # Bbox loss
if fg_mask.sum():
target_bboxes /= stride_tensor
loss[0], loss[4] = self.bbox_loss(pred_distri, pred_bboxes, anchor_points, target_bboxes, target_scores,
diff --git a/ultralytics/utils/metrics.py b/ultralytics/utils/metrics.py
index ad0168d5..16f8019f 100644
--- a/ultralytics/utils/metrics.py
+++ b/ultralytics/utils/metrics.py
@@ -36,7 +36,7 @@ def bbox_ioa(box1, box2, iou=False, eps=1e-7):
inter_area = (np.minimum(b1_x2[:, None], b2_x2) - np.maximum(b1_x1[:, None], b2_x1)).clip(0) * \
(np.minimum(b1_y2[:, None], b2_y2) - np.maximum(b1_y1[:, None], b2_y1)).clip(0)
- # box2 area
+ # Box2 area
area = (b2_x2 - b2_x1) * (b2_y2 - b2_y1)
if iou:
box1_area = (b1_x2 - b1_x1) * (b1_y2 - b1_y1)
@@ -440,13 +440,18 @@ def ap_per_class(tp,
Returns:
(tuple): A tuple of six arrays and one array of unique classes, where:
- tp (np.ndarray): True positive counts for each class.
- fp (np.ndarray): False positive counts for each class.
- p (np.ndarray): Precision values at each confidence threshold.
- r (np.ndarray): Recall values at each confidence threshold.
- f1 (np.ndarray): F1-score values at each confidence threshold.
- ap (np.ndarray): Average precision for each class at different IoU thresholds.
- unique_classes (np.ndarray): An array of unique classes that have data.
+ tp (np.ndarray): True positive counts at threshold given by max F1 metric for each class.Shape: (nc,).
+ fp (np.ndarray): False positive counts at threshold given by max F1 metric for each class. Shape: (nc,).
+ p (np.ndarray): Precision values at threshold given by max F1 metric for each class. Shape: (nc,).
+ r (np.ndarray): Recall values at threshold given by max F1 metric for each class. Shape: (nc,).
+ f1 (np.ndarray): F1-score values at threshold given by max F1 metric for each class. Shape: (nc,).
+ ap (np.ndarray): Average precision for each class at different IoU thresholds. Shape: (nc, 10).
+ unique_classes (np.ndarray): An array of unique classes that have data. Shape: (nc,).
+ p_curve (np.ndarray): Precision curves for each class. Shape: (nc, 1000).
+ r_curve (np.ndarray): Recall curves for each class. Shape: (nc, 1000).
+ f1_curve (np.ndarray): F1-score curves for each class. Shape: (nc, 1000).
+ x (np.ndarray): X-axis values for the curves. Shape: (1000,).
+ prec_values: Precision values at mAP@0.5 for each class. Shape: (nc, 1000).
"""
# Sort by objectness
@@ -458,8 +463,10 @@ def ap_per_class(tp,
nc = unique_classes.shape[0] # number of classes, number of detections
# Create Precision-Recall curve and compute AP for each class
- px, py = np.linspace(0, 1, 1000), [] # for plotting
- ap, p, r = np.zeros((nc, tp.shape[1])), np.zeros((nc, 1000)), np.zeros((nc, 1000))
+ x, prec_values = np.linspace(0, 1, 1000), []
+
+ # Average precision, precision and recall curves
+ ap, p_curve, r_curve = np.zeros((nc, tp.shape[1])), np.zeros((nc, 1000)), np.zeros((nc, 1000))
for ci, c in enumerate(unique_classes):
i = pred_cls == c
n_l = nt[ci] # number of labels
@@ -473,33 +480,35 @@ def ap_per_class(tp,
# Recall
recall = tpc / (n_l + eps) # recall curve
- r[ci] = np.interp(-px, -conf[i], recall[:, 0], left=0) # negative x, xp because xp decreases
+ r_curve[ci] = np.interp(-x, -conf[i], recall[:, 0], left=0) # negative x, xp because xp decreases
# Precision
precision = tpc / (tpc + fpc) # precision curve
- p[ci] = np.interp(-px, -conf[i], precision[:, 0], left=1) # p at pr_score
+ p_curve[ci] = np.interp(-x, -conf[i], precision[:, 0], left=1) # p at pr_score
# AP from recall-precision curve
for j in range(tp.shape[1]):
ap[ci, j], mpre, mrec = compute_ap(recall[:, j], precision[:, j])
if plot and j == 0:
- py.append(np.interp(px, mrec, mpre)) # precision at mAP@0.5
+ prec_values.append(np.interp(x, mrec, mpre)) # precision at mAP@0.5
+
+ prec_values = np.array(prec_values) # (nc, 1000)
# Compute F1 (harmonic mean of precision and recall)
- f1 = 2 * p * r / (p + r + eps)
+ f1_curve = 2 * p_curve * r_curve / (p_curve + r_curve + eps)
names = [v for k, v in names.items() if k in unique_classes] # list: only classes that have data
names = dict(enumerate(names)) # to dict
if plot:
- plot_pr_curve(px, py, ap, save_dir / f'{prefix}PR_curve.png', names, on_plot=on_plot)
- plot_mc_curve(px, f1, save_dir / f'{prefix}F1_curve.png', names, ylabel='F1', on_plot=on_plot)
- plot_mc_curve(px, p, save_dir / f'{prefix}P_curve.png', names, ylabel='Precision', on_plot=on_plot)
- plot_mc_curve(px, r, save_dir / f'{prefix}R_curve.png', names, ylabel='Recall', on_plot=on_plot)
+ plot_pr_curve(x, prec_values, ap, save_dir / f'{prefix}PR_curve.png', names, on_plot=on_plot)
+ plot_mc_curve(x, f1_curve, save_dir / f'{prefix}F1_curve.png', names, ylabel='F1', on_plot=on_plot)
+ plot_mc_curve(x, p_curve, save_dir / f'{prefix}P_curve.png', names, ylabel='Precision', on_plot=on_plot)
+ plot_mc_curve(x, r_curve, save_dir / f'{prefix}R_curve.png', names, ylabel='Recall', on_plot=on_plot)
- i = smooth(f1.mean(0), 0.1).argmax() # max F1 index
- p, r, f1 = p[:, i], r[:, i], f1[:, i]
+ i = smooth(f1_curve.mean(0), 0.1).argmax() # max F1 index
+ p, r, f1 = p_curve[:, i], r_curve[:, i], f1_curve[:, i] # max-F1 precision, recall, F1 values
tp = (r * nt).round() # true positives
fp = (tp / (p + eps) - tp).round() # false positives
- return tp, fp, p, r, f1, ap, unique_classes.astype(int)
+ return tp, fp, p, r, f1, ap, unique_classes.astype(int), p_curve, r_curve, f1_curve, x, prec_values
class Metric(SimpleClass):
@@ -645,7 +654,19 @@ class Metric(SimpleClass):
Updates the class attributes `self.p`, `self.r`, `self.f1`, `self.all_ap`, and `self.ap_class_index` based
on the values provided in the `results` tuple.
"""
- self.p, self.r, self.f1, self.all_ap, self.ap_class_index = results
+ (self.p, self.r, self.f1, self.all_ap, self.ap_class_index, self.p_curve, self.r_curve, self.f1_curve, self.px,
+ self.prec_values) = results
+
+ @property
+ def curves(self):
+ """Returns a list of curves for accessing specific metrics curves."""
+ return []
+
+ @property
+ def curves_results(self):
+ """Returns a list of curves for accessing specific metrics curves."""
+ return [[self.px, self.prec_values, 'Recall', 'Precision'], [self.px, self.f1_curve, 'Confidence', 'F1'],
+ [self.px, self.p_curve, 'Confidence', 'Precision'], [self.px, self.r_curve, 'Confidence', 'Recall']]
class DetMetrics(SimpleClass):
@@ -676,6 +697,8 @@ class DetMetrics(SimpleClass):
fitness: Computes the fitness score based on the computed detection metrics.
ap_class_index: Returns a list of class indices sorted by their average precision (AP) values.
results_dict: Returns a dictionary that maps detection metric keys to their computed values.
+ curves: TODO
+ curves_results: TODO
"""
def __init__(self, save_dir=Path('.'), plot=False, on_plot=None, names=()) -> None:
@@ -686,6 +709,7 @@ class DetMetrics(SimpleClass):
self.names = names
self.box = Metric()
self.speed = {'preprocess': 0.0, 'inference': 0.0, 'loss': 0.0, 'postprocess': 0.0}
+ self.task = 'detect'
def process(self, tp, conf, pred_cls, target_cls):
"""Process predicted results for object detection and update metrics."""
@@ -733,6 +757,16 @@ class DetMetrics(SimpleClass):
"""Returns dictionary of computed performance metrics and statistics."""
return dict(zip(self.keys + ['fitness'], self.mean_results() + [self.fitness]))
+ @property
+ def curves(self):
+ """Returns a list of curves for accessing specific metrics curves."""
+ return ['Precision-Recall(B)', 'F1-Confidence(B)', 'Precision-Confidence(B)', 'Recall-Confidence(B)']
+
+ @property
+ def curves_results(self):
+ """Returns dictionary of computed performance metrics and statistics."""
+ return self.box.curves_results
+
class SegmentMetrics(SimpleClass):
"""
@@ -772,6 +806,7 @@ class SegmentMetrics(SimpleClass):
self.box = Metric()
self.seg = Metric()
self.speed = {'preprocess': 0.0, 'inference': 0.0, 'loss': 0.0, 'postprocess': 0.0}
+ self.task = 'segment'
def process(self, tp_b, tp_m, conf, pred_cls, target_cls):
"""
@@ -843,6 +878,18 @@ class SegmentMetrics(SimpleClass):
"""Returns results of object detection model for evaluation."""
return dict(zip(self.keys + ['fitness'], self.mean_results() + [self.fitness]))
+ @property
+ def curves(self):
+ """Returns a list of curves for accessing specific metrics curves."""
+ return [
+ 'Precision-Recall(B)', 'F1-Confidence(B)', 'Precision-Confidence(B)', 'Recall-Confidence(B)',
+ 'Precision-Recall(M)', 'F1-Confidence(M)', 'Precision-Confidence(M)', 'Recall-Confidence(M)']
+
+ @property
+ def curves_results(self):
+ """Returns dictionary of computed performance metrics and statistics."""
+ return self.box.curves_results + self.seg.curves_results
+
class PoseMetrics(SegmentMetrics):
"""
@@ -883,6 +930,7 @@ class PoseMetrics(SegmentMetrics):
self.box = Metric()
self.pose = Metric()
self.speed = {'preprocess': 0.0, 'inference': 0.0, 'loss': 0.0, 'postprocess': 0.0}
+ self.task = 'pose'
def process(self, tp_b, tp_p, conf, pred_cls, target_cls):
"""
@@ -944,6 +992,18 @@ class PoseMetrics(SegmentMetrics):
"""Computes classification metrics and speed using the `targets` and `pred` inputs."""
return self.pose.fitness() + self.box.fitness()
+ @property
+ def curves(self):
+ """Returns a list of curves for accessing specific metrics curves."""
+ return [
+ 'Precision-Recall(B)', 'F1-Confidence(B)', 'Precision-Confidence(B)', 'Recall-Confidence(B)',
+ 'Precision-Recall(P)', 'F1-Confidence(P)', 'Precision-Confidence(P)', 'Recall-Confidence(P)']
+
+ @property
+ def curves_results(self):
+ """Returns dictionary of computed performance metrics and statistics."""
+ return self.box.curves_results + self.pose.curves_results
+
class ClassifyMetrics(SimpleClass):
"""
@@ -968,6 +1028,7 @@ class ClassifyMetrics(SimpleClass):
self.top1 = 0
self.top5 = 0
self.speed = {'preprocess': 0.0, 'inference': 0.0, 'loss': 0.0, 'postprocess': 0.0}
+ self.task = 'classify'
def process(self, targets, pred):
"""Target classes and predicted classes."""
@@ -990,3 +1051,13 @@ class ClassifyMetrics(SimpleClass):
def keys(self):
"""Returns a list of keys for the results_dict property."""
return ['metrics/accuracy_top1', 'metrics/accuracy_top5']
+
+ @property
+ def curves(self):
+ """Returns a list of curves for accessing specific metrics curves."""
+ return []
+
+ @property
+ def curves_results(self):
+ """Returns a list of curves for accessing specific metrics curves."""
+ return []
diff --git a/ultralytics/utils/tal.py b/ultralytics/utils/tal.py
index 432e7a70..1d10b7aa 100644
--- a/ultralytics/utils/tal.py
+++ b/ultralytics/utils/tal.py
@@ -193,7 +193,7 @@ class TaskAlignedAssigner(nn.Module):
# Expand topk_idxs for each value of k and add 1 at the specified positions
count_tensor.scatter_add_(-1, topk_idxs[:, :, k:k + 1], ones)
# count_tensor.scatter_add_(-1, topk_idxs, torch.ones_like(topk_idxs, dtype=torch.int8, device=topk_idxs.device))
- # filter invalid bboxes
+ # Filter invalid bboxes
count_tensor.masked_fill_(count_tensor > 1, 0)
return count_tensor.to(metrics.dtype)
diff --git a/ultralytics/utils/tuner.py b/ultralytics/utils/tuner.py
index 5bccd6fe..a06f813d 100644
--- a/ultralytics/utils/tuner.py
+++ b/ultralytics/utils/tuner.py
@@ -45,6 +45,7 @@ def run_ray_tune(model,
try:
subprocess.run('pip install ray[tune]'.split(), check=True)
+ import ray
from ray import tune
from ray.air import RunConfig
from ray.air.integrations.wandb import WandbLoggerCallback
@@ -83,6 +84,10 @@ def run_ray_tune(model,
'mixup': tune.uniform(0.0, 1.0), # image mixup (probability)
'copy_paste': tune.uniform(0.0, 1.0)} # segment copy-paste (probability)
+ # Put the model in ray store
+ task = model.task
+ model_in_store = ray.put(model)
+
def _tune(config):
"""
Trains the YOLO model with the specified hyperparameters and additional arguments.
@@ -93,9 +98,10 @@ def run_ray_tune(model,
Returns:
None.
"""
- model.reset_callbacks()
+ model_to_train = ray.get(model_in_store) # get the model from ray store for tuning
+ model_to_train.reset_callbacks()
config.update(train_args)
- results = model.train(**config)
+ results = model_to_train.train(**config)
return results.results_dict
# Get search space
@@ -104,7 +110,7 @@ def run_ray_tune(model,
LOGGER.warning('WARNING ⚠️ search space not provided, using default search space.')
# Get dataset
- data = train_args.get('data', TASK2DATA[model.task])
+ data = train_args.get('data', TASK2DATA[task])
space['data'] = data
if 'data' not in train_args:
LOGGER.warning(f'WARNING ⚠️ data not provided, using default "data={data}".')
@@ -114,7 +120,7 @@ def run_ray_tune(model,
# Define the ASHA scheduler for hyperparameter search
asha_scheduler = ASHAScheduler(time_attr='epoch',
- metric=TASK2METRIC[model.task],
+ metric=TASK2METRIC[task],
mode='max',
max_t=train_args.get('epochs') or DEFAULT_CFG_DICT['epochs'] or 100,
grace_period=grace_period,