From 12e3eef844b7b5e298647c5d9bf7e1cc41dcf8e0 Mon Sep 17 00:00:00 2001 From: Glenn Jocher Date: Fri, 13 Oct 2023 02:49:31 +0200 Subject: [PATCH] `ultralytics 8.0.197` save P, R, F1 curves to metrics (#5354) Signed-off-by: Glenn Jocher Co-authored-by: erminkev1 <83356055+erminkev1@users.noreply.github.com> Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Andy <39454881+yermandy@users.noreply.github.com> --- .pre-commit-config.yaml | 9 +- docs/build_reference.py | 97 +++++---------- docs/reference/utils/callbacks/wb.md | 8 ++ .../yolov8_region_counter.py | 2 +- tests/test_cli.py | 4 +- tests/test_engine.py | 2 +- ultralytics/__init__.py | 2 +- ultralytics/data/augment.py | 4 +- ultralytics/data/converter.py | 25 ++-- ultralytics/data/dataset.py | 2 +- ultralytics/engine/trainer.py | 2 +- ultralytics/hub/session.py | 2 +- ultralytics/models/fastsam/prompt.py | 2 +- ultralytics/models/fastsam/utils.py | 10 +- ultralytics/models/rtdetr/val.py | 4 +- ultralytics/models/sam/modules/encoders.py | 6 +- .../models/sam/modules/tiny_encoder.py | 20 +-- ultralytics/models/sam/predict.py | 3 +- ultralytics/models/utils/loss.py | 4 +- ultralytics/models/utils/ops.py | 10 +- ultralytics/nn/modules/block.py | 2 +- ultralytics/nn/modules/head.py | 30 ++--- ultralytics/nn/modules/transformer.py | 4 +- ultralytics/nn/tasks.py | 4 +- ultralytics/utils/callbacks/mlflow.py | 2 +- ultralytics/utils/callbacks/wb.py | 93 +++++++++++++- ultralytics/utils/checks.py | 8 +- ultralytics/utils/files.py | 2 +- ultralytics/utils/instance.py | 2 +- ultralytics/utils/loss.py | 36 +++--- ultralytics/utils/metrics.py | 115 ++++++++++++++---- ultralytics/utils/tal.py | 2 +- ultralytics/utils/tuner.py | 14 ++- 33 files changed, 337 insertions(+), 195 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index a004332b..a75167bc 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -11,7 +11,7 @@ ci: repos: - repo: https://github.com/pre-commit/pre-commit-hooks - rev: v4.4.0 + rev: v4.5.0 hooks: - id: end-of-file-fixer - id: trailing-whitespace @@ -22,7 +22,7 @@ repos: - id: detect-private-key - repo: https://github.com/asottile/pyupgrade - rev: v3.14.0 + rev: v3.15.0 hooks: - id: pyupgrade name: Upgrade code @@ -77,3 +77,8 @@ repos: # rev: v1.5.0 # hooks: # - id: dead + +# - repo: https://github.com/ultralytics/pre-commit +# rev: bd60a414f80a53fb8f593d3bfed4701fc47e4b23 +# hooks: +# - id: capitalize-comments diff --git a/docs/build_reference.py b/docs/build_reference.py index a237ae06..e3adfe9b 100644 --- a/docs/build_reference.py +++ b/docs/build_reference.py @@ -6,7 +6,6 @@ reference section of *.md files composed of classes and functions, and also crea Note: Must be run from repository root directory. Do not run from docs directory. """ -import os import re from collections import defaultdict from pathlib import Path @@ -18,17 +17,9 @@ CODE_DIR = ROOT REFERENCE_DIR = ROOT.parent / 'docs/reference' -def extract_classes_and_functions(filepath: Path): - """ - Extracts class and function names from a given Python file. - - Args: - filepath (Path): The path to the Python file. - - Returns: - (tuple): A tuple containing lists of class and function names. - """ - content = Path(filepath).read_text() +def extract_classes_and_functions(filepath: Path) -> tuple: + """Extracts class and function names from a given Python file.""" + content = filepath.read_text() class_pattern = r'(?:^|\n)class\s(\w+)(?:\(|:)' func_pattern = r'(?:^|\n)def\s(\w+)\(' @@ -39,26 +30,17 @@ def extract_classes_and_functions(filepath: Path): def create_markdown(py_filepath: Path, module_path: str, classes: list, functions: list): - """ - Creates a Markdown file containing the API reference for the given Python module. - - Args: - py_filepath (Path): The path to the Python file. - module_path (str): The import path for the Python module. - classes (list): A list of class names within the module. - functions (list): A list of function names within the module. - """ + """Creates a Markdown file containing the API reference for the given Python module.""" md_filepath = py_filepath.with_suffix('.md') # Read existing content and keep header content between first two --- header_content = '' if md_filepath.exists(): - with open(md_filepath) as file: - existing_content = file.read() - header_parts = existing_content.split('---') - for part in header_parts: - if 'description:' in part or 'comments:' in part: - header_content += f'---{part}---\n\n' + existing_content = md_filepath.read_text() + header_parts = existing_content.split('---') + for part in header_parts: + if 'description:' in part or 'comments:' in part: + header_content += f'---{part}---\n\n' module_name = module_path.replace('.__init__', '') module_path = module_path.replace('.', '/') @@ -74,43 +56,24 @@ def create_markdown(py_filepath: Path, module_path: str, classes: list, function if not md_content.endswith('\n'): md_content += '\n' - os.makedirs(os.path.dirname(md_filepath), exist_ok=True) - with open(md_filepath, 'w') as file: - file.write(md_content) + md_filepath.parent.mkdir(parents=True, exist_ok=True) + md_filepath.write_text(md_content) return md_filepath.relative_to(NEW_YAML_DIR) -def nested_dict(): - """ - Creates and returns a nested defaultdict. - - Returns: - (defaultdict): A nested defaultdict object. - """ +def nested_dict() -> defaultdict: + """Creates and returns a nested defaultdict.""" return defaultdict(nested_dict) -def sort_nested_dict(d: dict): - """ - Sorts a nested dictionary recursively. - - Args: - d (dict): The dictionary to sort. - - Returns: - (dict): The sorted dictionary. - """ +def sort_nested_dict(d: dict) -> dict: + """Sorts a nested dictionary recursively.""" return {key: sort_nested_dict(value) if isinstance(value, dict) else value for key, value in sorted(d.items())} def create_nav_menu_yaml(nav_items: list): - """ - Creates a YAML file for the navigation menu based on the provided list of items. - - Args: - nav_items (list): A list of relative file paths to Markdown files for the navigation menu. - """ + """Creates a YAML file for the navigation menu based on the provided list of items.""" nav_tree = nested_dict() for item_str in nav_items: @@ -136,26 +99,26 @@ def create_nav_menu_yaml(nav_items: list): yaml_str += f"{indent}- {k}: {str(v).replace('docs/', '')}\n" return yaml_str - with open(NEW_YAML_DIR / 'nav_menu_updated.yml', 'w') as file: - yaml_str = _dict_to_yaml(nav_tree_sorted) - file.write(yaml_str) + # Print updated YAML reference section + print('Scan complete, new mkdocs.yaml reference section is:\n\n', _dict_to_yaml(nav_tree_sorted)) + + # Save new YAML reference section + # (NEW_YAML_DIR / 'nav_menu_updated.yml').write_text(_dict_to_yaml(nav_tree_sorted)) def main(): """Main function to extract class and function names, create Markdown files, and generate a YAML navigation menu.""" nav_items = [] - for root, _, files in os.walk(CODE_DIR): - for file in files: - if file.endswith('.py'): - py_filepath = Path(root) / file - classes, functions = extract_classes_and_functions(py_filepath) - if classes or functions: - py_filepath_rel = py_filepath.relative_to(CODE_DIR) - md_filepath = REFERENCE_DIR / py_filepath_rel - module_path = f"ultralytics.{py_filepath_rel.with_suffix('').as_posix().replace('/', '.')}" - md_rel_filepath = create_markdown(md_filepath, module_path, classes, functions) - nav_items.append(str(md_rel_filepath)) + for py_filepath in CODE_DIR.rglob('*.py'): + classes, functions = extract_classes_and_functions(py_filepath) + + if classes or functions: + py_filepath_rel = py_filepath.relative_to(CODE_DIR) + md_filepath = REFERENCE_DIR / py_filepath_rel + module_path = f"ultralytics.{py_filepath_rel.with_suffix('').as_posix().replace('/', '.')}" + md_rel_filepath = create_markdown(md_filepath, module_path, classes, functions) + nav_items.append(str(md_rel_filepath)) create_nav_menu_yaml(nav_items) diff --git a/docs/reference/utils/callbacks/wb.md b/docs/reference/utils/callbacks/wb.md index a5869a5e..74d1ef80 100644 --- a/docs/reference/utils/callbacks/wb.md +++ b/docs/reference/utils/callbacks/wb.md @@ -9,6 +9,14 @@ keywords: Ultralytics, callbacks, _log_plots, on_fit_epoch_end, on_train_end Full source code for this file is available at [https://github.com/ultralytics/ultralytics/blob/main/ultralytics/utils/callbacks/wb.py](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/utils/callbacks/wb.py). Help us fix any issues you see by submitting a [Pull Request](https://docs.ultralytics.com/help/contributing/) 🛠️. Thank you 🙏! +--- +## ::: ultralytics.utils.callbacks.wb._custom_table +

+ +--- +## ::: ultralytics.utils.callbacks.wb._plot_curve +

+ --- ## ::: ultralytics.utils.callbacks.wb._log_plots

diff --git a/examples/YOLOv8-Region-Counter/yolov8_region_counter.py b/examples/YOLOv8-Region-Counter/yolov8_region_counter.py index dd0e476f..ada269ed 100644 --- a/examples/YOLOv8-Region-Counter/yolov8_region_counter.py +++ b/examples/YOLOv8-Region-Counter/yolov8_region_counter.py @@ -11,7 +11,7 @@ from ultralytics import YOLO from ultralytics.utils.files import increment_path from ultralytics.utils.plotting import Annotator, colors -track_history = defaultdict(lambda: []) +track_history = defaultdict(list) current_region = None counting_regions = [ diff --git a/tests/test_cli.py b/tests/test_cli.py index 788651b4..7a3fd992 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -97,8 +97,8 @@ def test_fastsam(task='segment', model=WEIGHTS_DIR / 'FastSAM-s.pt', data='coco8 ann = prompt_process.text_prompt(text='a photo of a dog') # Point prompt - # points default [[0,0]] [[x1,y1],[x2,y2]] - # point_label default [0] [1,0] 0:background, 1:foreground + # Points default [[0,0]] [[x1,y1],[x2,y2]] + # Point_label default [0] [1,0] 0:background, 1:foreground ann = prompt_process.point_prompt(points=[[200, 200]], pointlabel=[1]) prompt_process.plot(annotations=ann, output='./') diff --git a/tests/test_engine.py b/tests/test_engine.py index 2d789583..ce328efc 100644 --- a/tests/test_engine.py +++ b/tests/test_engine.py @@ -70,7 +70,7 @@ def test_segment(): CFG.imgsz = 32 # YOLO(CFG_SEG).train(**overrides) # works - # trainer + # Trainer trainer = segment.SegmentationTrainer(overrides=overrides) trainer.add_callback('on_train_start', test_func) assert test_func in trainer.callbacks['on_train_start'], 'callback test failed' diff --git a/ultralytics/__init__.py b/ultralytics/__init__.py index e3948bd4..da166d88 100644 --- a/ultralytics/__init__.py +++ b/ultralytics/__init__.py @@ -1,6 +1,6 @@ # Ultralytics YOLO 🚀, AGPL-3.0 license -__version__ = '8.0.196' +__version__ = '8.0.197' from ultralytics.models import RTDETR, SAM, YOLO from ultralytics.models.fastsam import FastSAM diff --git a/ultralytics/data/augment.py b/ultralytics/data/augment.py index 7fb32477..f500aa03 100644 --- a/ultralytics/data/augment.py +++ b/ultralytics/data/augment.py @@ -491,7 +491,7 @@ class RandomPerspective: border = labels.pop('mosaic_border', self.border) self.size = img.shape[1] + border[1] * 2, img.shape[0] + border[0] * 2 # w, h # M is affine matrix - # scale for func:`box_candidates` + # Scale for func:`box_candidates` img, M, scale = self.affine_transform(img, border) bboxes = self.apply_bboxes(instances.bboxes, M) @@ -894,7 +894,7 @@ class Format: return labels def _format_img(self, img): - """Format the image for YOLOv5 from Numpy array to PyTorch tensor.""" + """Format the image for YOLO from Numpy array to PyTorch tensor.""" if len(img.shape) < 3: img = np.expand_dims(img, -1) img = np.ascontiguousarray(img.transpose(2, 0, 1)[::-1]) diff --git a/ultralytics/data/converter.py b/ultralytics/data/converter.py index 4451df17..27ceb578 100644 --- a/ultralytics/data/converter.py +++ b/ultralytics/data/converter.py @@ -1,14 +1,14 @@ # Ultralytics YOLO 🚀, AGPL-3.0 license import json -import shutil from collections import defaultdict from pathlib import Path import cv2 import numpy as np -from ultralytics.utils import TQDM +from ultralytics.utils import LOGGER, TQDM +from ultralytics.utils.files import increment_path def coco91_to_coco80_class(): @@ -48,12 +48,12 @@ def coco80_to_coco91_class(): # def convert_coco(labels_dir='../coco/annotations/', - save_dir='.', + save_dir='coco_converted/', use_segments=False, use_keypoints=False, cls91to80=True): """ - Converts COCO dataset annotations to a format suitable for training YOLOv5 models. + Converts COCO dataset annotations to a YOLO annotation format suitable for training YOLO models. Args: labels_dir (str, optional): Path to directory containing COCO dataset annotation files. @@ -74,9 +74,7 @@ def convert_coco(labels_dir='../coco/annotations/', """ # Create dataset directory - save_dir = Path(save_dir) - if save_dir.exists(): - shutil.rmtree(save_dir) # delete dir + save_dir = increment_path(save_dir) # increment if save directory already exists for p in save_dir / 'labels', save_dir / 'images': p.mkdir(parents=True, exist_ok=True) # make dir @@ -147,6 +145,8 @@ def convert_coco(labels_dir='../coco/annotations/', if use_segments and len(segments[i]) > 0 else bboxes[i]), # cls, box or segments file.write(('%g ' * len(line)).rstrip() % line + '\n') + LOGGER.info(f'COCO data converted successfully.\nResults saved to {save_dir.resolve()}') + def convert_dota_to_yolo_obb(dota_root_path: str): """ @@ -271,26 +271,25 @@ def merge_multi_segment(segments): segments = [np.array(i).reshape(-1, 2) for i in segments] idx_list = [[] for _ in range(len(segments))] - # record the indexes with min distance between each segment + # Record the indexes with min distance between each segment for i in range(1, len(segments)): idx1, idx2 = min_index(segments[i - 1], segments[i]) idx_list[i - 1].append(idx1) idx_list[i].append(idx2) - # use two round to connect all the segments + # Use two round to connect all the segments for k in range(2): - # forward connection + # Forward connection if k == 0: for i, idx in enumerate(idx_list): - # middle segments have two indexes - # reverse the index of middle segments + # Middle segments have two indexes, reverse the index of middle segments if len(idx) == 2 and idx[0] > idx[1]: idx = idx[::-1] segments[i] = segments[i][::-1, :] segments[i] = np.roll(segments[i], -idx[0], axis=0) segments[i] = np.concatenate([segments[i], segments[i][:1]]) - # deal with the first segment and the last one + # Deal with the first segment and the last one if i in [0, len(idx_list) - 1]: s.append(segments[i]) else: diff --git a/ultralytics/data/dataset.py b/ultralytics/data/dataset.py index f0f431ea..068311ef 100644 --- a/ultralytics/data/dataset.py +++ b/ultralytics/data/dataset.py @@ -162,7 +162,7 @@ class YOLODataset(BaseDataset): def update_labels_info(self, label): """Custom your label format here.""" # NOTE: cls is not with bboxes now, classification and semantic segmentation need an independent cls label - # we can make it also support classification and semantic segmentation by add or remove some dict keys there. + # We can make it also support classification and semantic segmentation by add or remove some dict keys there. bboxes = label.pop('bboxes') segments = label.pop('segments') keypoints = label.pop('keypoints', None) diff --git a/ultralytics/engine/trainer.py b/ultralytics/engine/trainer.py index 6905847a..9b953080 100644 --- a/ultralytics/engine/trainer.py +++ b/ultralytics/engine/trainer.py @@ -526,7 +526,7 @@ class BaseTrainer: # TODO: may need to put these following functions into callback def plot_training_samples(self, batch, ni): - """Plots training samples during YOLOv5 training.""" + """Plots training samples during YOLO training.""" pass def plot_training_labels(self): diff --git a/ultralytics/hub/session.py b/ultralytics/hub/session.py index 9870ca16..d2fd89a9 100644 --- a/ultralytics/hub/session.py +++ b/ultralytics/hub/session.py @@ -23,7 +23,7 @@ class HUBTrainingSession: Attributes: agent_id (str): Identifier for the instance communicating with the server. - model_id (str): Identifier for the YOLOv5 model being trained. + model_id (str): Identifier for the YOLO model being trained. model_url (str): URL for the model in Ultralytics HUB. api_url (str): API URL for the model in Ultralytics HUB. auth_header (dict): Authentication header for the Ultralytics HUB API requests. diff --git a/ultralytics/models/fastsam/prompt.py b/ultralytics/models/fastsam/prompt.py index d381075f..72ea5ef8 100644 --- a/ultralytics/models/fastsam/prompt.py +++ b/ultralytics/models/fastsam/prompt.py @@ -120,7 +120,7 @@ class FastSAMPrompt: result_name = os.path.basename(ann.path) image = ann.orig_img[..., ::-1] # BGR to RGB original_h, original_w = ann.orig_shape - # for macOS only + # For macOS only # plt.switch_backend('TkAgg') plt.figure(figsize=(original_w / 100, original_h / 100)) # Add subplot with no margin. diff --git a/ultralytics/models/fastsam/utils.py b/ultralytics/models/fastsam/utils.py index e99fd623..480e9039 100644 --- a/ultralytics/models/fastsam/utils.py +++ b/ultralytics/models/fastsam/utils.py @@ -42,23 +42,23 @@ def bbox_iou(box1, boxes, iou_thres=0.9, image_shape=(640, 640), raw_output=Fals high_iou_indices (torch.Tensor): Indices of boxes with IoU > thres """ boxes = adjust_bboxes_to_image_border(boxes, image_shape) - # obtain coordinates for intersections + # Obtain coordinates for intersections x1 = torch.max(box1[0], boxes[:, 0]) y1 = torch.max(box1[1], boxes[:, 1]) x2 = torch.min(box1[2], boxes[:, 2]) y2 = torch.min(box1[3], boxes[:, 3]) - # compute the area of intersection + # Compute the area of intersection intersection = (x2 - x1).clamp(0) * (y2 - y1).clamp(0) - # compute the area of both individual boxes + # Compute the area of both individual boxes box1_area = (box1[2] - box1[0]) * (box1[3] - box1[1]) box2_area = (boxes[:, 2] - boxes[:, 0]) * (boxes[:, 3] - boxes[:, 1]) - # compute the area of union + # Compute the area of union union = box1_area + box2_area - intersection - # compute the IoU + # Compute the IoU iou = intersection / union # Should be shape (n, ) if raw_output: return 0 if iou.numel() == 0 else iou diff --git a/ultralytics/models/rtdetr/val.py b/ultralytics/models/rtdetr/val.py index a6af67ab..468026be 100644 --- a/ultralytics/models/rtdetr/val.py +++ b/ultralytics/models/rtdetr/val.py @@ -99,10 +99,10 @@ class RTDETRValidator(DetectionValidator): for i, bbox in enumerate(bboxes): # (300, 4) bbox = ops.xywh2xyxy(bbox) score, cls = scores[i].max(-1) # (300, ) - # Do not need threshold for evaluation as only got 300 boxes here. + # Do not need threshold for evaluation as only got 300 boxes here # idx = score > self.args.conf pred = torch.cat([bbox, score[..., None], cls[..., None]], dim=-1) # filter - # sort by confidence to correctly get internal metrics. + # Sort by confidence to correctly get internal metrics pred = pred[score.argsort(descending=True)] outputs[i] = pred # [idx] diff --git a/ultralytics/models/sam/modules/encoders.py b/ultralytics/models/sam/modules/encoders.py index 67b35970..30e6014e 100644 --- a/ultralytics/models/sam/modules/encoders.py +++ b/ultralytics/models/sam/modules/encoders.py @@ -304,11 +304,11 @@ class PositionEmbeddingRandom(nn.Module): def _pe_encoding(self, coords: torch.Tensor) -> torch.Tensor: """Positionally encode points that are normalized to [0,1].""" - # assuming coords are in [0, 1]^2 square and have d_1 x ... x d_n x 2 shape + # Assuming coords are in [0, 1]^2 square and have d_1 x ... x d_n x 2 shape coords = 2 * coords - 1 coords = coords @ self.positional_encoding_gaussian_matrix coords = 2 * np.pi * coords - # outputs d_1 x ... x d_n x C shape + # Outputs d_1 x ... x d_n x C shape return torch.cat([torch.sin(coords), torch.cos(coords)], dim=-1) def forward(self, size: Tuple[int, int]) -> torch.Tensor: @@ -429,7 +429,7 @@ class Attention(nn.Module): self.use_rel_pos = use_rel_pos if self.use_rel_pos: assert (input_size is not None), 'Input size must be provided if using relative positional encoding.' - # initialize relative positional embeddings + # Initialize relative positional embeddings self.rel_pos_h = nn.Parameter(torch.zeros(2 * input_size[0] - 1, head_dim)) self.rel_pos_w = nn.Parameter(torch.zeros(2 * input_size[1] - 1, head_dim)) diff --git a/ultralytics/models/sam/modules/tiny_encoder.py b/ultralytics/models/sam/modules/tiny_encoder.py index 595286fe..9955a261 100644 --- a/ultralytics/models/sam/modules/tiny_encoder.py +++ b/ultralytics/models/sam/modules/tiny_encoder.py @@ -172,7 +172,7 @@ class ConvLayer(nn.Module): self.depth = depth self.use_checkpoint = use_checkpoint - # build blocks + # Build blocks self.blocks = nn.ModuleList([ MBConv( dim, @@ -182,7 +182,7 @@ class ConvLayer(nn.Module): drop_path[i] if isinstance(drop_path, list) else drop_path, ) for i in range(depth)]) - # patch merging layer + # Patch merging layer self.downsample = None if downsample is None else downsample( input_resolution, dim=dim, out_dim=out_dim, activation=activation) @@ -393,11 +393,11 @@ class TinyViTBlock(nn.Module): pH, pW = H + pad_b, W + pad_r nH = pH // self.window_size nW = pW // self.window_size - # window partition + # Window partition x = x.view(B, nH, self.window_size, nW, self.window_size, C).transpose(2, 3).reshape(B * nH * nW, self.window_size * self.window_size, C) x = self.attn(x) - # window reverse + # Window reverse x = x.view(B, nH, nW, self.window_size, self.window_size, C).transpose(2, 3).reshape(B, pH, pW, C) if padding: @@ -467,7 +467,7 @@ class BasicLayer(nn.Module): self.depth = depth self.use_checkpoint = use_checkpoint - # build blocks + # Build blocks self.blocks = nn.ModuleList([ TinyViTBlock( dim=dim, @@ -481,7 +481,7 @@ class BasicLayer(nn.Module): activation=activation, ) for i in range(depth)]) - # patch merging layer + # Patch merging layer self.downsample = None if downsample is None else downsample( input_resolution, dim=dim, out_dim=out_dim, activation=activation) @@ -593,10 +593,10 @@ class TinyViT(nn.Module): patches_resolution = self.patch_embed.patches_resolution self.patches_resolution = patches_resolution - # stochastic depth + # Stochastic depth dpr = [x.item() for x in torch.linspace(0, drop_path_rate, sum(depths))] # stochastic depth decay rule - # build layers + # Build layers self.layers = nn.ModuleList() for i_layer in range(self.num_layers): kwargs = dict( @@ -628,7 +628,7 @@ class TinyViT(nn.Module): self.norm_head = nn.LayerNorm(embed_dims[-1]) self.head = nn.Linear(embed_dims[-1], num_classes) if num_classes > 0 else torch.nn.Identity() - # init weights + # Init weights self.apply(self._init_weights) self.set_layer_lr_decay(layer_lr_decay) self.neck = nn.Sequential( @@ -653,7 +653,7 @@ class TinyViT(nn.Module): """Sets the learning rate decay for each layer in the TinyViT model.""" decay_rate = layer_lr_decay - # layers -> blocks (depth) + # Layers -> blocks (depth) depth = sum(self.depths) lr_scales = [decay_rate ** (depth - i - 1) for i in range(depth)] diff --git a/ultralytics/models/sam/predict.py b/ultralytics/models/sam/predict.py index 1a3b8dfb..26a49b54 100644 --- a/ultralytics/models/sam/predict.py +++ b/ultralytics/models/sam/predict.py @@ -414,8 +414,7 @@ class Predictor(BasePredictor): unchanged = unchanged and not changed new_masks.append(torch.as_tensor(mask).unsqueeze(0)) - # Give score=0 to changed masks and score=1 to unchanged masks - # so NMS will prefer ones that didn't need postprocessing + # Give score=0 to changed masks and 1 to unchanged masks so NMS prefers masks not needing postprocessing scores.append(float(unchanged)) # Recalculate boxes and remove any new duplicates diff --git a/ultralytics/models/utils/loss.py b/ultralytics/models/utils/loss.py index a61d20a9..abb54958 100644 --- a/ultralytics/models/utils/loss.py +++ b/ultralytics/models/utils/loss.py @@ -66,7 +66,7 @@ class DETRLoss(nn.Module): def _get_loss_class(self, pred_scores, targets, gt_scores, num_gts, postfix=''): """Computes the classification loss based on predictions, target values, and ground truth scores.""" - # logits: [b, query, num_classes], gt_class: list[[n, 1]] + # Logits: [b, query, num_classes], gt_class: list[[n, 1]] name_class = f'loss_class{postfix}' bs, nq = pred_scores.shape[:2] # one_hot = F.one_hot(targets, self.nc + 1)[..., :-1] # (bs, num_queries, num_classes) @@ -90,7 +90,7 @@ class DETRLoss(nn.Module): """Calculates and returns the bounding box loss and GIoU loss for the predicted and ground truth bounding boxes. """ - # boxes: [b, query, 4], gt_bbox: list[[n, 4]] + # Boxes: [b, query, 4], gt_bbox: list[[n, 4]] name_bbox = f'loss_bbox{postfix}' name_giou = f'loss_giou{postfix}' diff --git a/ultralytics/models/utils/ops.py b/ultralytics/models/utils/ops.py index 99357d19..902756db 100644 --- a/ultralytics/models/utils/ops.py +++ b/ultralytics/models/utils/ops.py @@ -188,7 +188,7 @@ def get_cdn_group(batch, num_group = num_dn // max_nums num_group = 1 if num_group == 0 else num_group - # pad gt to max_num of a batch + # Pad gt to max_num of a batch bs = len(gt_groups) gt_cls = batch['cls'] # (bs*num, ) gt_bbox = batch['bboxes'] # bs*num, 4 @@ -204,10 +204,10 @@ def get_cdn_group(batch, neg_idx = torch.arange(total_num * num_group, dtype=torch.long, device=gt_bbox.device) + num_group * total_num if cls_noise_ratio > 0: - # half of bbox prob + # Half of bbox prob mask = torch.rand(dn_cls.shape) < (cls_noise_ratio * 0.5) idx = torch.nonzero(mask).squeeze(-1) - # randomly put a new one here + # Randomly put a new one here new_label = torch.randint_like(idx, 0, num_classes, dtype=dn_cls.dtype, device=dn_cls.device) dn_cls[idx] = new_label @@ -240,9 +240,9 @@ def get_cdn_group(batch, tgt_size = num_dn + num_queries attn_mask = torch.zeros([tgt_size, tgt_size], dtype=torch.bool) - # match query cannot see the reconstruct + # Match query cannot see the reconstruct attn_mask[num_dn:, :num_dn] = True - # reconstruct cannot see each other + # Reconstruct cannot see each other for i in range(num_group): if i == 0: attn_mask[max_nums * 2 * i:max_nums * 2 * (i + 1), max_nums * 2 * (i + 1):num_dn] = True diff --git a/ultralytics/nn/modules/block.py b/ultralytics/nn/modules/block.py index 9a65395f..778dcec3 100644 --- a/ultralytics/nn/modules/block.py +++ b/ultralytics/nn/modules/block.py @@ -307,7 +307,7 @@ class Bottleneck(nn.Module): self.add = shortcut and c1 == c2 def forward(self, x): - """'forward()' applies the YOLOv5 FPN to input data.""" + """'forward()' applies the YOLO FPN to input data.""" return x + self.cv2(self.cv1(x)) if self.add else self.cv2(self.cv1(x)) diff --git a/ultralytics/nn/modules/head.py b/ultralytics/nn/modules/head.py index 5ac4e73c..95c6457f 100644 --- a/ultralytics/nn/modules/head.py +++ b/ultralytics/nn/modules/head.py @@ -192,7 +192,7 @@ class RTDETRDecoder(nn.Module): dropout=0., act=nn.ReLU(), eval_idx=-1, - # training args + # Training args nd=100, # num denoising label_noise_ratio=0.5, box_noise_scale=1.0, @@ -225,7 +225,7 @@ class RTDETRDecoder(nn.Module): self.num_queries = nq self.num_decoder_layers = ndl - # backbone feature projection + # Backbone feature projection self.input_proj = nn.ModuleList(nn.Sequential(nn.Conv2d(x, hd, 1, bias=False), nn.BatchNorm2d(hd)) for x in ch) # NOTE: simplified version but it's not consistent with .pt weights. # self.input_proj = nn.ModuleList(Conv(x, hd, act=False) for x in ch) @@ -234,24 +234,24 @@ class RTDETRDecoder(nn.Module): decoder_layer = DeformableTransformerDecoderLayer(hd, nh, d_ffn, dropout, act, self.nl, ndp) self.decoder = DeformableTransformerDecoder(hd, decoder_layer, ndl, eval_idx) - # denoising part + # Denoising part self.denoising_class_embed = nn.Embedding(nc, hd) self.num_denoising = nd self.label_noise_ratio = label_noise_ratio self.box_noise_scale = box_noise_scale - # decoder embedding + # Decoder embedding self.learnt_init_query = learnt_init_query if learnt_init_query: self.tgt_embed = nn.Embedding(nq, hd) self.query_pos_head = MLP(4, 2 * hd, hd, num_layers=2) - # encoder head + # Encoder head self.enc_output = nn.Sequential(nn.Linear(hd, hd), nn.LayerNorm(hd)) self.enc_score_head = nn.Linear(hd, nc) self.enc_bbox_head = MLP(hd, hd, 4, num_layers=3) - # decoder head + # Decoder head self.dec_score_head = nn.ModuleList([nn.Linear(hd, nc) for _ in range(ndl)]) self.dec_bbox_head = nn.ModuleList([MLP(hd, hd, 4, num_layers=3) for _ in range(ndl)]) @@ -261,10 +261,10 @@ class RTDETRDecoder(nn.Module): """Runs the forward pass of the module, returning bounding box and classification scores for the input.""" from ultralytics.models.utils.ops import get_cdn_group - # input projection and embedding + # Input projection and embedding feats, shapes = self._get_encoder_input(x) - # prepare denoising training + # Prepare denoising training dn_embed, dn_bbox, attn_mask, dn_meta = \ get_cdn_group(batch, self.nc, @@ -278,7 +278,7 @@ class RTDETRDecoder(nn.Module): embed, refer_bbox, enc_bboxes, enc_scores = \ self._get_decoder_input(feats, shapes, dn_embed, dn_bbox) - # decoder + # Decoder dec_bboxes, dec_scores = self.decoder(embed, refer_bbox, feats, @@ -316,9 +316,9 @@ class RTDETRDecoder(nn.Module): def _get_encoder_input(self, x): """Processes and returns encoder inputs by getting projection features from input and concatenating them.""" - # get projection features + # Get projection features x = [self.input_proj[i](feat) for i, feat in enumerate(x)] - # get encoder inputs + # Get encoder inputs feats = [] shapes = [] for feat in x: @@ -335,13 +335,13 @@ class RTDETRDecoder(nn.Module): def _get_decoder_input(self, feats, shapes, dn_embed=None, dn_bbox=None): """Generates and prepares the input required for the decoder from the provided features and shapes.""" bs = len(feats) - # prepare input for decoder + # Prepare input for decoder anchors, valid_mask = self._generate_anchors(shapes, dtype=feats.dtype, device=feats.device) features = self.enc_output(valid_mask * feats) # bs, h*w, 256 enc_outputs_scores = self.enc_score_head(features) # (bs, h*w, nc) - # query selection + # Query selection # (bs, num_queries) topk_ind = torch.topk(enc_outputs_scores.max(-1).values, self.num_queries, dim=1).indices.view(-1) # (bs, num_queries) @@ -352,7 +352,7 @@ class RTDETRDecoder(nn.Module): # (bs, num_queries, 4) top_k_anchors = anchors[:, topk_ind].view(bs, self.num_queries, -1) - # dynamic anchors + static content + # Dynamic anchors + static content refer_bbox = self.enc_bbox_head(top_k_features) + top_k_anchors enc_bboxes = refer_bbox.sigmoid() @@ -373,7 +373,7 @@ class RTDETRDecoder(nn.Module): # TODO def _reset_parameters(self): """Initializes or resets the parameters of the model's various components with predefined weights and biases.""" - # class and bbox head init + # Class and bbox head init bias_cls = bias_init_with_prob(0.01) / 80 * self.nc # NOTE: the weight initialization in `linear_init_` would cause NaN when training with custom datasets. # linear_init_(self.enc_score_head) diff --git a/ultralytics/nn/modules/transformer.py b/ultralytics/nn/modules/transformer.py index 4b7c0868..493ca3c6 100644 --- a/ultralytics/nn/modules/transformer.py +++ b/ultralytics/nn/modules/transformer.py @@ -81,7 +81,7 @@ class AIFI(TransformerEncoderLayer): """Forward pass for the AIFI transformer layer.""" c, h, w = x.shape[1:] pos_embed = self.build_2d_sincos_position_embedding(w, h, c) - # flatten [B, C, H, W] to [B, HxW, C] + # Flatten [B, C, H, W] to [B, HxW, C] x = super().forward(x.flatten(2).permute(0, 2, 1), pos=pos_embed.to(device=x.device, dtype=x.dtype)) return x.permute(0, 2, 1).view([-1, c, h, w]).contiguous() @@ -213,7 +213,7 @@ class MSDeformAttn(nn.Module): if d_model % n_heads != 0: raise ValueError(f'd_model must be divisible by n_heads, but got {d_model} and {n_heads}') _d_per_head = d_model // n_heads - # you'd better set _d_per_head to a power of 2 which is more efficient in our CUDA implementation + # Better to set _d_per_head to a power of 2 which is more efficient in a CUDA implementation assert _d_per_head * n_heads == d_model, '`d_model` must be divisible by `n_heads`' self.im2col_step = 64 diff --git a/ultralytics/nn/tasks.py b/ultralytics/nn/tasks.py index 7633a73c..55aa51a3 100644 --- a/ultralytics/nn/tasks.py +++ b/ultralytics/nn/tasks.py @@ -277,7 +277,7 @@ class DetectionModel(BaseModel): return torch.cat((x, y, wh, cls), dim) def _clip_augmented(self, y): - """Clip YOLOv5 augmented inference tails.""" + """Clip YOLO augmented inference tails.""" nl = self.model[-1].nl # number of detection layers (P3-P5) g = sum(4 ** x for x in range(nl)) # grid points e = 1 # exclude layer count @@ -491,7 +491,7 @@ class Ensemble(nn.ModuleList): super().__init__() def forward(self, x, augment=False, profile=False, visualize=False): - """Function generates the YOLOv5 network's final layer.""" + """Function generates the YOLO network's final layer.""" y = [module(x, augment, profile, visualize)[0] for module in self] # y = torch.stack(y).max(0)[0] # max ensemble # y = torch.stack(y).mean(0) # mean ensemble diff --git a/ultralytics/utils/callbacks/mlflow.py b/ultralytics/utils/callbacks/mlflow.py index efd580b3..3e8ddc33 100644 --- a/ultralytics/utils/callbacks/mlflow.py +++ b/ultralytics/utils/callbacks/mlflow.py @@ -38,7 +38,7 @@ def on_pretrain_routine_end(trainer): if not active_run: active_run = mlflow.start_run(experiment_id=experiment.experiment_id, run_name=run_name) LOGGER.info(f'{prefix}Using run_id({active_run.info.run_id}) at {mlflow_location}') - run.log_params(vars(trainer.model.args)) + run.log_params(trainer.args) except Exception as err: LOGGER.error(f'{prefix}Failing init - {repr(err)}') LOGGER.warning(f'{prefix}Continuing without Mlflow') diff --git a/ultralytics/utils/callbacks/wb.py b/ultralytics/utils/callbacks/wb.py index b901e3cc..32842a3d 100644 --- a/ultralytics/utils/callbacks/wb.py +++ b/ultralytics/utils/callbacks/wb.py @@ -8,7 +8,10 @@ try: assert SETTINGS['wandb'] is True # verify integration is enabled import wandb as wb - assert hasattr(wb, '__version__') + assert hasattr(wb, '__version__') # verify package is not directory + + import numpy as np + import pandas as pd _processed_plots = {} @@ -16,6 +19,83 @@ except (ImportError, AssertionError): wb = None +def _custom_table(x, y, classes, title='Precision Recall Curve', x_axis_title='Recall', y_axis_title='Precision'): + """ + Create and log a custom metric visualization to wandb.plot.pr_curve. + + This function crafts a custom metric visualization that mimics the behavior of wandb's default precision-recall curve + while allowing for enhanced customization. The visual metric is useful for monitoring model performance across different classes. + + Args: + x (List): Values for the x-axis; expected to have length N. + y (List): Corresponding values for the y-axis; also expected to have length N. + classes (List): Labels identifying the class of each point; length N. + title (str, optional): Title for the plot; defaults to 'Precision Recall Curve'. + x_title (str, optional): Label for the x-axis; defaults to 'Recall'. + y_title (str, optional): Label for the y-axis; defaults to 'Precision'. + + Returns: + (wandb.Object): A wandb object suitable for logging, showcasing the crafted metric visualization. + """ + df = pd.DataFrame({'class': classes, 'y': y, 'x': x}).round(3) + fields = {'x': 'x', 'y': 'y', 'class': 'class'} + string_fields = {'title': title, 'x-axis-title': x_axis_title, 'y-axis-title': y_axis_title} + return wb.plot_table('wandb/area-under-curve/v0', + wb.Table(dataframe=df), + fields=fields, + string_fields=string_fields) + + +def _plot_curve(x, + y, + names=None, + id='precision-recall', + title='Precision Recall Curve', + x_title='Recall', + y_title='Precision', + num_x=100, + only_mean=False): + """ + Log a metric curve visualization. + + This function generates a metric curve based on input data and logs the visualization to wandb. + The curve can represent aggregated data (mean) or individual class data, depending on the 'only_mean' flag. + + Args: + x (np.ndarray): Data points for the x-axis with length N. + y (np.ndarray): Corresponding data points for the y-axis with shape CxN, where C represents the number of classes. + names (list, optional): Names of the classes corresponding to the y-axis data; length C. Defaults to an empty list. + id (str, optional): Unique identifier for the logged data in wandb. Defaults to 'precision-recall'. + title (str, optional): Title for the visualization plot. Defaults to 'Precision Recall Curve'. + x_title (str, optional): Label for the x-axis. Defaults to 'Recall'. + y_title (str, optional): Label for the y-axis. Defaults to 'Precision'. + num_x (int, optional): Number of interpolated data points for visualization. Defaults to 100. + only_mean (bool, optional): Flag to indicate if only the mean curve should be plotted. Defaults to True. + + Note: + The function leverages the '_custom_table' function to generate the actual visualization. + """ + # Create new x + if names is None: + names = [] + x_new = np.linspace(x[0], x[-1], num_x).round(5) + + # Create arrays for logging + x_log = x_new.tolist() + y_log = np.interp(x_new, x, np.mean(y, axis=0)).round(3).tolist() + + if only_mean: + table = wb.Table(data=list(zip(x_log, y_log)), columns=[x_title, y_title]) + wb.run.log({title: wb.plot.line(table, x_title, y_title, title=title)}) + else: + classes = ['mean'] * len(x_log) + for i, yi in enumerate(y): + x_log.extend(x_new) # add new x + y_log.extend(np.interp(x_new, x, yi)) # interpolate y to new x + classes.extend([names[i]] * len(x_new)) # add class names + wb.log({id: _custom_table(x_log, y_log, classes, title, x_title, y_title)}, commit=False) + + def _log_plots(plots, step): """Logs plots from the input dictionary if they haven't been logged already at the specified step.""" for name, params in plots.items(): @@ -55,6 +135,17 @@ def on_train_end(trainer): if trainer.best.exists(): art.add_file(trainer.best) wb.run.log_artifact(art, aliases=['best']) + for curve_name, curve_values in zip(trainer.validator.metrics.curves, trainer.validator.metrics.curves_results): + x, y, x_title, y_title = curve_values + _plot_curve( + x, + y, + names=list(trainer.validator.metrics.names.values()), + id=f'curves/{curve_name}', + title=curve_name, + x_title=x_title, + y_title=y_title, + ) wb.run.finish() # required or run continues on dashboard diff --git a/ultralytics/utils/checks.py b/ultralytics/utils/checks.py index 184ce06e..a8c1608a 100644 --- a/ultralytics/utils/checks.py +++ b/ultralytics/utils/checks.py @@ -165,16 +165,16 @@ def check_version(current: str = '0.0.0', Example: ```python - # check if current version is exactly 22.04 + # Check if current version is exactly 22.04 check_version(current='22.04', required='==22.04') - # check if current version is greater than or equal to 22.04 + # Check if current version is greater than or equal to 22.04 check_version(current='22.10', required='22.04') # assumes '>=' inequality if none passed - # check if current version is less than or equal to 22.04 + # Check if current version is less than or equal to 22.04 check_version(current='22.04', required='<=22.04') - # check if current version is between 20.04 (inclusive) and 22.04 (exclusive) + # Check if current version is between 20.04 (inclusive) and 22.04 (exclusive) check_version(current='21.10', required='>20.04,<22.04') ``` """ diff --git a/ultralytics/utils/files.py b/ultralytics/utils/files.py index 82e8ebfe..dee298c7 100644 --- a/ultralytics/utils/files.py +++ b/ultralytics/utils/files.py @@ -45,7 +45,7 @@ def spaces_in_path(path): with ultralytics.utils.files import spaces_in_path with spaces_in_path('/path/with spaces') as new_path: - # your code here + # Your code here ``` """ diff --git a/ultralytics/utils/instance.py b/ultralytics/utils/instance.py index 3f57a094..7df1453d 100644 --- a/ultralytics/utils/instance.py +++ b/ultralytics/utils/instance.py @@ -219,7 +219,7 @@ class Instances: self.normalized = normalized if len(segments) > 0: - # list[np.array(1000, 2)] * num_samples + # List[np.array(1000, 2)] * num_samples segments = resample_segments(segments) # (N, 1000, 2) segments = np.stack(segments, axis=0) diff --git a/ultralytics/utils/loss.py b/ultralytics/utils/loss.py index f76960d6..916f91c5 100644 --- a/ultralytics/utils/loss.py +++ b/ultralytics/utils/loss.py @@ -176,13 +176,13 @@ class v8DetectionLoss: imgsz = torch.tensor(feats[0].shape[2:], device=self.device, dtype=dtype) * self.stride[0] # image size (h,w) anchor_points, stride_tensor = make_anchors(feats, self.stride, 0.5) - # targets + # Targets targets = torch.cat((batch['batch_idx'].view(-1, 1), batch['cls'].view(-1, 1), batch['bboxes']), 1) targets = self.preprocess(targets.to(self.device), batch_size, scale_tensor=imgsz[[1, 0, 1, 0]]) gt_labels, gt_bboxes = targets.split((1, 4), 2) # cls, xyxy mask_gt = gt_bboxes.sum(2, keepdim=True).gt_(0) - # pboxes + # Pboxes pred_bboxes = self.bbox_decode(anchor_points, pred_distri) # xyxy, (b, h*w, 4) _, target_bboxes, target_scores, fg_mask, _ = self.assigner( @@ -191,11 +191,11 @@ class v8DetectionLoss: target_scores_sum = max(target_scores.sum(), 1) - # cls loss + # Cls loss # loss[1] = self.varifocal_loss(pred_scores, target_scores, target_labels) / target_scores_sum # VFL way loss[1] = self.bce(pred_scores, target_scores.to(dtype)).sum() / target_scores_sum # BCE - # bbox loss + # Bbox loss if fg_mask.sum(): target_bboxes /= stride_tensor loss[0], loss[2] = self.bbox_loss(pred_distri, pred_bboxes, anchor_points, target_bboxes, target_scores, @@ -224,7 +224,7 @@ class v8SegmentationLoss(v8DetectionLoss): pred_distri, pred_scores = torch.cat([xi.view(feats[0].shape[0], self.no, -1) for xi in feats], 2).split( (self.reg_max * 4, self.nc), 1) - # b, grids, .. + # B, grids, .. pred_scores = pred_scores.permute(0, 2, 1).contiguous() pred_distri = pred_distri.permute(0, 2, 1).contiguous() pred_masks = pred_masks.permute(0, 2, 1).contiguous() @@ -233,7 +233,7 @@ class v8SegmentationLoss(v8DetectionLoss): imgsz = torch.tensor(feats[0].shape[2:], device=self.device, dtype=dtype) * self.stride[0] # image size (h,w) anchor_points, stride_tensor = make_anchors(feats, self.stride, 0.5) - # targets + # Targets try: batch_idx = batch['batch_idx'].view(-1, 1) targets = torch.cat((batch_idx, batch['cls'].view(-1, 1), batch['bboxes']), 1) @@ -247,7 +247,7 @@ class v8SegmentationLoss(v8DetectionLoss): "correctly formatted 'segment' dataset using 'data=coco128-seg.yaml' " 'as an example.\nSee https://docs.ultralytics.com/tasks/segment/ for help.') from e - # pboxes + # Pboxes pred_bboxes = self.bbox_decode(anchor_points, pred_distri) # xyxy, (b, h*w, 4) _, target_bboxes, target_scores, fg_mask, target_gt_idx = self.assigner( @@ -256,15 +256,15 @@ class v8SegmentationLoss(v8DetectionLoss): target_scores_sum = max(target_scores.sum(), 1) - # cls loss + # Cls loss # loss[1] = self.varifocal_loss(pred_scores, target_scores, target_labels) / target_scores_sum # VFL way loss[2] = self.bce(pred_scores, target_scores.to(dtype)).sum() / target_scores_sum # BCE if fg_mask.sum(): - # bbox loss + # Bbox loss loss[0], loss[3] = self.bbox_loss(pred_distri, pred_bboxes, anchor_points, target_bboxes / stride_tensor, target_scores, target_scores_sum, fg_mask) - # masks loss + # Masks loss masks = batch['masks'].to(self.device).float() if tuple(masks.shape[-2:]) != (mask_h, mask_w): # downsample masks = F.interpolate(masks[None], (mask_h, mask_w), mode='nearest')[0] @@ -344,13 +344,13 @@ class v8SegmentationLoss(v8DetectionLoss): _, _, mask_h, mask_w = proto.shape loss = 0 - # normalize to 0-1 + # Normalize to 0-1 target_bboxes_normalized = target_bboxes / imgsz[[1, 0, 1, 0]] - # areas of target bboxes + # Areas of target bboxes marea = xyxy2xywh(target_bboxes_normalized)[..., 2:].prod(2) - # normalize to mask size + # Normalize to mask size mxyxy = target_bboxes_normalized * torch.tensor([mask_w, mask_h, mask_w, mask_h], device=proto.device) for i, single_i in enumerate(zip(fg_mask, target_gt_idx, pred_masks, proto, mxyxy, marea, masks)): @@ -393,7 +393,7 @@ class v8PoseLoss(v8DetectionLoss): pred_distri, pred_scores = torch.cat([xi.view(feats[0].shape[0], self.no, -1) for xi in feats], 2).split( (self.reg_max * 4, self.nc), 1) - # b, grids, .. + # B, grids, .. pred_scores = pred_scores.permute(0, 2, 1).contiguous() pred_distri = pred_distri.permute(0, 2, 1).contiguous() pred_kpts = pred_kpts.permute(0, 2, 1).contiguous() @@ -402,7 +402,7 @@ class v8PoseLoss(v8DetectionLoss): imgsz = torch.tensor(feats[0].shape[2:], device=self.device, dtype=dtype) * self.stride[0] # image size (h,w) anchor_points, stride_tensor = make_anchors(feats, self.stride, 0.5) - # targets + # Targets batch_size = pred_scores.shape[0] batch_idx = batch['batch_idx'].view(-1, 1) targets = torch.cat((batch_idx, batch['cls'].view(-1, 1), batch['bboxes']), 1) @@ -410,7 +410,7 @@ class v8PoseLoss(v8DetectionLoss): gt_labels, gt_bboxes = targets.split((1, 4), 2) # cls, xyxy mask_gt = gt_bboxes.sum(2, keepdim=True).gt_(0) - # pboxes + # Pboxes pred_bboxes = self.bbox_decode(anchor_points, pred_distri) # xyxy, (b, h*w, 4) pred_kpts = self.kpts_decode(anchor_points, pred_kpts.view(batch_size, -1, *self.kpt_shape)) # (b, h*w, 17, 3) @@ -420,11 +420,11 @@ class v8PoseLoss(v8DetectionLoss): target_scores_sum = max(target_scores.sum(), 1) - # cls loss + # Cls loss # loss[1] = self.varifocal_loss(pred_scores, target_scores, target_labels) / target_scores_sum # VFL way loss[3] = self.bce(pred_scores, target_scores.to(dtype)).sum() / target_scores_sum # BCE - # bbox loss + # Bbox loss if fg_mask.sum(): target_bboxes /= stride_tensor loss[0], loss[4] = self.bbox_loss(pred_distri, pred_bboxes, anchor_points, target_bboxes, target_scores, diff --git a/ultralytics/utils/metrics.py b/ultralytics/utils/metrics.py index ad0168d5..16f8019f 100644 --- a/ultralytics/utils/metrics.py +++ b/ultralytics/utils/metrics.py @@ -36,7 +36,7 @@ def bbox_ioa(box1, box2, iou=False, eps=1e-7): inter_area = (np.minimum(b1_x2[:, None], b2_x2) - np.maximum(b1_x1[:, None], b2_x1)).clip(0) * \ (np.minimum(b1_y2[:, None], b2_y2) - np.maximum(b1_y1[:, None], b2_y1)).clip(0) - # box2 area + # Box2 area area = (b2_x2 - b2_x1) * (b2_y2 - b2_y1) if iou: box1_area = (b1_x2 - b1_x1) * (b1_y2 - b1_y1) @@ -440,13 +440,18 @@ def ap_per_class(tp, Returns: (tuple): A tuple of six arrays and one array of unique classes, where: - tp (np.ndarray): True positive counts for each class. - fp (np.ndarray): False positive counts for each class. - p (np.ndarray): Precision values at each confidence threshold. - r (np.ndarray): Recall values at each confidence threshold. - f1 (np.ndarray): F1-score values at each confidence threshold. - ap (np.ndarray): Average precision for each class at different IoU thresholds. - unique_classes (np.ndarray): An array of unique classes that have data. + tp (np.ndarray): True positive counts at threshold given by max F1 metric for each class.Shape: (nc,). + fp (np.ndarray): False positive counts at threshold given by max F1 metric for each class. Shape: (nc,). + p (np.ndarray): Precision values at threshold given by max F1 metric for each class. Shape: (nc,). + r (np.ndarray): Recall values at threshold given by max F1 metric for each class. Shape: (nc,). + f1 (np.ndarray): F1-score values at threshold given by max F1 metric for each class. Shape: (nc,). + ap (np.ndarray): Average precision for each class at different IoU thresholds. Shape: (nc, 10). + unique_classes (np.ndarray): An array of unique classes that have data. Shape: (nc,). + p_curve (np.ndarray): Precision curves for each class. Shape: (nc, 1000). + r_curve (np.ndarray): Recall curves for each class. Shape: (nc, 1000). + f1_curve (np.ndarray): F1-score curves for each class. Shape: (nc, 1000). + x (np.ndarray): X-axis values for the curves. Shape: (1000,). + prec_values: Precision values at mAP@0.5 for each class. Shape: (nc, 1000). """ # Sort by objectness @@ -458,8 +463,10 @@ def ap_per_class(tp, nc = unique_classes.shape[0] # number of classes, number of detections # Create Precision-Recall curve and compute AP for each class - px, py = np.linspace(0, 1, 1000), [] # for plotting - ap, p, r = np.zeros((nc, tp.shape[1])), np.zeros((nc, 1000)), np.zeros((nc, 1000)) + x, prec_values = np.linspace(0, 1, 1000), [] + + # Average precision, precision and recall curves + ap, p_curve, r_curve = np.zeros((nc, tp.shape[1])), np.zeros((nc, 1000)), np.zeros((nc, 1000)) for ci, c in enumerate(unique_classes): i = pred_cls == c n_l = nt[ci] # number of labels @@ -473,33 +480,35 @@ def ap_per_class(tp, # Recall recall = tpc / (n_l + eps) # recall curve - r[ci] = np.interp(-px, -conf[i], recall[:, 0], left=0) # negative x, xp because xp decreases + r_curve[ci] = np.interp(-x, -conf[i], recall[:, 0], left=0) # negative x, xp because xp decreases # Precision precision = tpc / (tpc + fpc) # precision curve - p[ci] = np.interp(-px, -conf[i], precision[:, 0], left=1) # p at pr_score + p_curve[ci] = np.interp(-x, -conf[i], precision[:, 0], left=1) # p at pr_score # AP from recall-precision curve for j in range(tp.shape[1]): ap[ci, j], mpre, mrec = compute_ap(recall[:, j], precision[:, j]) if plot and j == 0: - py.append(np.interp(px, mrec, mpre)) # precision at mAP@0.5 + prec_values.append(np.interp(x, mrec, mpre)) # precision at mAP@0.5 + + prec_values = np.array(prec_values) # (nc, 1000) # Compute F1 (harmonic mean of precision and recall) - f1 = 2 * p * r / (p + r + eps) + f1_curve = 2 * p_curve * r_curve / (p_curve + r_curve + eps) names = [v for k, v in names.items() if k in unique_classes] # list: only classes that have data names = dict(enumerate(names)) # to dict if plot: - plot_pr_curve(px, py, ap, save_dir / f'{prefix}PR_curve.png', names, on_plot=on_plot) - plot_mc_curve(px, f1, save_dir / f'{prefix}F1_curve.png', names, ylabel='F1', on_plot=on_plot) - plot_mc_curve(px, p, save_dir / f'{prefix}P_curve.png', names, ylabel='Precision', on_plot=on_plot) - plot_mc_curve(px, r, save_dir / f'{prefix}R_curve.png', names, ylabel='Recall', on_plot=on_plot) + plot_pr_curve(x, prec_values, ap, save_dir / f'{prefix}PR_curve.png', names, on_plot=on_plot) + plot_mc_curve(x, f1_curve, save_dir / f'{prefix}F1_curve.png', names, ylabel='F1', on_plot=on_plot) + plot_mc_curve(x, p_curve, save_dir / f'{prefix}P_curve.png', names, ylabel='Precision', on_plot=on_plot) + plot_mc_curve(x, r_curve, save_dir / f'{prefix}R_curve.png', names, ylabel='Recall', on_plot=on_plot) - i = smooth(f1.mean(0), 0.1).argmax() # max F1 index - p, r, f1 = p[:, i], r[:, i], f1[:, i] + i = smooth(f1_curve.mean(0), 0.1).argmax() # max F1 index + p, r, f1 = p_curve[:, i], r_curve[:, i], f1_curve[:, i] # max-F1 precision, recall, F1 values tp = (r * nt).round() # true positives fp = (tp / (p + eps) - tp).round() # false positives - return tp, fp, p, r, f1, ap, unique_classes.astype(int) + return tp, fp, p, r, f1, ap, unique_classes.astype(int), p_curve, r_curve, f1_curve, x, prec_values class Metric(SimpleClass): @@ -645,7 +654,19 @@ class Metric(SimpleClass): Updates the class attributes `self.p`, `self.r`, `self.f1`, `self.all_ap`, and `self.ap_class_index` based on the values provided in the `results` tuple. """ - self.p, self.r, self.f1, self.all_ap, self.ap_class_index = results + (self.p, self.r, self.f1, self.all_ap, self.ap_class_index, self.p_curve, self.r_curve, self.f1_curve, self.px, + self.prec_values) = results + + @property + def curves(self): + """Returns a list of curves for accessing specific metrics curves.""" + return [] + + @property + def curves_results(self): + """Returns a list of curves for accessing specific metrics curves.""" + return [[self.px, self.prec_values, 'Recall', 'Precision'], [self.px, self.f1_curve, 'Confidence', 'F1'], + [self.px, self.p_curve, 'Confidence', 'Precision'], [self.px, self.r_curve, 'Confidence', 'Recall']] class DetMetrics(SimpleClass): @@ -676,6 +697,8 @@ class DetMetrics(SimpleClass): fitness: Computes the fitness score based on the computed detection metrics. ap_class_index: Returns a list of class indices sorted by their average precision (AP) values. results_dict: Returns a dictionary that maps detection metric keys to their computed values. + curves: TODO + curves_results: TODO """ def __init__(self, save_dir=Path('.'), plot=False, on_plot=None, names=()) -> None: @@ -686,6 +709,7 @@ class DetMetrics(SimpleClass): self.names = names self.box = Metric() self.speed = {'preprocess': 0.0, 'inference': 0.0, 'loss': 0.0, 'postprocess': 0.0} + self.task = 'detect' def process(self, tp, conf, pred_cls, target_cls): """Process predicted results for object detection and update metrics.""" @@ -733,6 +757,16 @@ class DetMetrics(SimpleClass): """Returns dictionary of computed performance metrics and statistics.""" return dict(zip(self.keys + ['fitness'], self.mean_results() + [self.fitness])) + @property + def curves(self): + """Returns a list of curves for accessing specific metrics curves.""" + return ['Precision-Recall(B)', 'F1-Confidence(B)', 'Precision-Confidence(B)', 'Recall-Confidence(B)'] + + @property + def curves_results(self): + """Returns dictionary of computed performance metrics and statistics.""" + return self.box.curves_results + class SegmentMetrics(SimpleClass): """ @@ -772,6 +806,7 @@ class SegmentMetrics(SimpleClass): self.box = Metric() self.seg = Metric() self.speed = {'preprocess': 0.0, 'inference': 0.0, 'loss': 0.0, 'postprocess': 0.0} + self.task = 'segment' def process(self, tp_b, tp_m, conf, pred_cls, target_cls): """ @@ -843,6 +878,18 @@ class SegmentMetrics(SimpleClass): """Returns results of object detection model for evaluation.""" return dict(zip(self.keys + ['fitness'], self.mean_results() + [self.fitness])) + @property + def curves(self): + """Returns a list of curves for accessing specific metrics curves.""" + return [ + 'Precision-Recall(B)', 'F1-Confidence(B)', 'Precision-Confidence(B)', 'Recall-Confidence(B)', + 'Precision-Recall(M)', 'F1-Confidence(M)', 'Precision-Confidence(M)', 'Recall-Confidence(M)'] + + @property + def curves_results(self): + """Returns dictionary of computed performance metrics and statistics.""" + return self.box.curves_results + self.seg.curves_results + class PoseMetrics(SegmentMetrics): """ @@ -883,6 +930,7 @@ class PoseMetrics(SegmentMetrics): self.box = Metric() self.pose = Metric() self.speed = {'preprocess': 0.0, 'inference': 0.0, 'loss': 0.0, 'postprocess': 0.0} + self.task = 'pose' def process(self, tp_b, tp_p, conf, pred_cls, target_cls): """ @@ -944,6 +992,18 @@ class PoseMetrics(SegmentMetrics): """Computes classification metrics and speed using the `targets` and `pred` inputs.""" return self.pose.fitness() + self.box.fitness() + @property + def curves(self): + """Returns a list of curves for accessing specific metrics curves.""" + return [ + 'Precision-Recall(B)', 'F1-Confidence(B)', 'Precision-Confidence(B)', 'Recall-Confidence(B)', + 'Precision-Recall(P)', 'F1-Confidence(P)', 'Precision-Confidence(P)', 'Recall-Confidence(P)'] + + @property + def curves_results(self): + """Returns dictionary of computed performance metrics and statistics.""" + return self.box.curves_results + self.pose.curves_results + class ClassifyMetrics(SimpleClass): """ @@ -968,6 +1028,7 @@ class ClassifyMetrics(SimpleClass): self.top1 = 0 self.top5 = 0 self.speed = {'preprocess': 0.0, 'inference': 0.0, 'loss': 0.0, 'postprocess': 0.0} + self.task = 'classify' def process(self, targets, pred): """Target classes and predicted classes.""" @@ -990,3 +1051,13 @@ class ClassifyMetrics(SimpleClass): def keys(self): """Returns a list of keys for the results_dict property.""" return ['metrics/accuracy_top1', 'metrics/accuracy_top5'] + + @property + def curves(self): + """Returns a list of curves for accessing specific metrics curves.""" + return [] + + @property + def curves_results(self): + """Returns a list of curves for accessing specific metrics curves.""" + return [] diff --git a/ultralytics/utils/tal.py b/ultralytics/utils/tal.py index 432e7a70..1d10b7aa 100644 --- a/ultralytics/utils/tal.py +++ b/ultralytics/utils/tal.py @@ -193,7 +193,7 @@ class TaskAlignedAssigner(nn.Module): # Expand topk_idxs for each value of k and add 1 at the specified positions count_tensor.scatter_add_(-1, topk_idxs[:, :, k:k + 1], ones) # count_tensor.scatter_add_(-1, topk_idxs, torch.ones_like(topk_idxs, dtype=torch.int8, device=topk_idxs.device)) - # filter invalid bboxes + # Filter invalid bboxes count_tensor.masked_fill_(count_tensor > 1, 0) return count_tensor.to(metrics.dtype) diff --git a/ultralytics/utils/tuner.py b/ultralytics/utils/tuner.py index 5bccd6fe..a06f813d 100644 --- a/ultralytics/utils/tuner.py +++ b/ultralytics/utils/tuner.py @@ -45,6 +45,7 @@ def run_ray_tune(model, try: subprocess.run('pip install ray[tune]'.split(), check=True) + import ray from ray import tune from ray.air import RunConfig from ray.air.integrations.wandb import WandbLoggerCallback @@ -83,6 +84,10 @@ def run_ray_tune(model, 'mixup': tune.uniform(0.0, 1.0), # image mixup (probability) 'copy_paste': tune.uniform(0.0, 1.0)} # segment copy-paste (probability) + # Put the model in ray store + task = model.task + model_in_store = ray.put(model) + def _tune(config): """ Trains the YOLO model with the specified hyperparameters and additional arguments. @@ -93,9 +98,10 @@ def run_ray_tune(model, Returns: None. """ - model.reset_callbacks() + model_to_train = ray.get(model_in_store) # get the model from ray store for tuning + model_to_train.reset_callbacks() config.update(train_args) - results = model.train(**config) + results = model_to_train.train(**config) return results.results_dict # Get search space @@ -104,7 +110,7 @@ def run_ray_tune(model, LOGGER.warning('WARNING ⚠️ search space not provided, using default search space.') # Get dataset - data = train_args.get('data', TASK2DATA[model.task]) + data = train_args.get('data', TASK2DATA[task]) space['data'] = data if 'data' not in train_args: LOGGER.warning(f'WARNING ⚠️ data not provided, using default "data={data}".') @@ -114,7 +120,7 @@ def run_ray_tune(model, # Define the ASHA scheduler for hyperparameter search asha_scheduler = ASHAScheduler(time_attr='epoch', - metric=TASK2METRIC[model.task], + metric=TASK2METRIC[task], mode='max', max_t=train_args.get('epochs') or DEFAULT_CFG_DICT['epochs'] or 100, grace_period=grace_period,