ultralytics 8.0.197 save P, R, F1 curves to metrics (#5354)

Signed-off-by: Glenn Jocher <glenn.jocher@ultralytics.com>
Co-authored-by: erminkev1 <83356055+erminkev1@users.noreply.github.com>
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Co-authored-by: Andy <39454881+yermandy@users.noreply.github.com>
This commit is contained in:
Glenn Jocher 2023-10-13 02:49:31 +02:00 committed by GitHub
parent 7fd5dcbd86
commit 12e3eef844
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
33 changed files with 337 additions and 195 deletions

View File

@ -11,7 +11,7 @@ ci:
repos: repos:
- repo: https://github.com/pre-commit/pre-commit-hooks - repo: https://github.com/pre-commit/pre-commit-hooks
rev: v4.4.0 rev: v4.5.0
hooks: hooks:
- id: end-of-file-fixer - id: end-of-file-fixer
- id: trailing-whitespace - id: trailing-whitespace
@ -22,7 +22,7 @@ repos:
- id: detect-private-key - id: detect-private-key
- repo: https://github.com/asottile/pyupgrade - repo: https://github.com/asottile/pyupgrade
rev: v3.14.0 rev: v3.15.0
hooks: hooks:
- id: pyupgrade - id: pyupgrade
name: Upgrade code name: Upgrade code
@ -77,3 +77,8 @@ repos:
# rev: v1.5.0 # rev: v1.5.0
# hooks: # hooks:
# - id: dead # - id: dead
# - repo: https://github.com/ultralytics/pre-commit
# rev: bd60a414f80a53fb8f593d3bfed4701fc47e4b23
# hooks:
# - id: capitalize-comments

View File

@ -6,7 +6,6 @@ reference section of *.md files composed of classes and functions, and also crea
Note: Must be run from repository root directory. Do not run from docs directory. Note: Must be run from repository root directory. Do not run from docs directory.
""" """
import os
import re import re
from collections import defaultdict from collections import defaultdict
from pathlib import Path from pathlib import Path
@ -18,17 +17,9 @@ CODE_DIR = ROOT
REFERENCE_DIR = ROOT.parent / 'docs/reference' REFERENCE_DIR = ROOT.parent / 'docs/reference'
def extract_classes_and_functions(filepath: Path): def extract_classes_and_functions(filepath: Path) -> tuple:
""" """Extracts class and function names from a given Python file."""
Extracts class and function names from a given Python file. content = filepath.read_text()
Args:
filepath (Path): The path to the Python file.
Returns:
(tuple): A tuple containing lists of class and function names.
"""
content = Path(filepath).read_text()
class_pattern = r'(?:^|\n)class\s(\w+)(?:\(|:)' class_pattern = r'(?:^|\n)class\s(\w+)(?:\(|:)'
func_pattern = r'(?:^|\n)def\s(\w+)\(' func_pattern = r'(?:^|\n)def\s(\w+)\('
@ -39,26 +30,17 @@ def extract_classes_and_functions(filepath: Path):
def create_markdown(py_filepath: Path, module_path: str, classes: list, functions: list): def create_markdown(py_filepath: Path, module_path: str, classes: list, functions: list):
""" """Creates a Markdown file containing the API reference for the given Python module."""
Creates a Markdown file containing the API reference for the given Python module.
Args:
py_filepath (Path): The path to the Python file.
module_path (str): The import path for the Python module.
classes (list): A list of class names within the module.
functions (list): A list of function names within the module.
"""
md_filepath = py_filepath.with_suffix('.md') md_filepath = py_filepath.with_suffix('.md')
# Read existing content and keep header content between first two --- # Read existing content and keep header content between first two ---
header_content = '' header_content = ''
if md_filepath.exists(): if md_filepath.exists():
with open(md_filepath) as file: existing_content = md_filepath.read_text()
existing_content = file.read() header_parts = existing_content.split('---')
header_parts = existing_content.split('---') for part in header_parts:
for part in header_parts: if 'description:' in part or 'comments:' in part:
if 'description:' in part or 'comments:' in part: header_content += f'---{part}---\n\n'
header_content += f'---{part}---\n\n'
module_name = module_path.replace('.__init__', '') module_name = module_path.replace('.__init__', '')
module_path = module_path.replace('.', '/') module_path = module_path.replace('.', '/')
@ -74,43 +56,24 @@ def create_markdown(py_filepath: Path, module_path: str, classes: list, function
if not md_content.endswith('\n'): if not md_content.endswith('\n'):
md_content += '\n' md_content += '\n'
os.makedirs(os.path.dirname(md_filepath), exist_ok=True) md_filepath.parent.mkdir(parents=True, exist_ok=True)
with open(md_filepath, 'w') as file: md_filepath.write_text(md_content)
file.write(md_content)
return md_filepath.relative_to(NEW_YAML_DIR) return md_filepath.relative_to(NEW_YAML_DIR)
def nested_dict(): def nested_dict() -> defaultdict:
""" """Creates and returns a nested defaultdict."""
Creates and returns a nested defaultdict.
Returns:
(defaultdict): A nested defaultdict object.
"""
return defaultdict(nested_dict) return defaultdict(nested_dict)
def sort_nested_dict(d: dict): def sort_nested_dict(d: dict) -> dict:
""" """Sorts a nested dictionary recursively."""
Sorts a nested dictionary recursively.
Args:
d (dict): The dictionary to sort.
Returns:
(dict): The sorted dictionary.
"""
return {key: sort_nested_dict(value) if isinstance(value, dict) else value for key, value in sorted(d.items())} return {key: sort_nested_dict(value) if isinstance(value, dict) else value for key, value in sorted(d.items())}
def create_nav_menu_yaml(nav_items: list): def create_nav_menu_yaml(nav_items: list):
""" """Creates a YAML file for the navigation menu based on the provided list of items."""
Creates a YAML file for the navigation menu based on the provided list of items.
Args:
nav_items (list): A list of relative file paths to Markdown files for the navigation menu.
"""
nav_tree = nested_dict() nav_tree = nested_dict()
for item_str in nav_items: for item_str in nav_items:
@ -136,26 +99,26 @@ def create_nav_menu_yaml(nav_items: list):
yaml_str += f"{indent}- {k}: {str(v).replace('docs/', '')}\n" yaml_str += f"{indent}- {k}: {str(v).replace('docs/', '')}\n"
return yaml_str return yaml_str
with open(NEW_YAML_DIR / 'nav_menu_updated.yml', 'w') as file: # Print updated YAML reference section
yaml_str = _dict_to_yaml(nav_tree_sorted) print('Scan complete, new mkdocs.yaml reference section is:\n\n', _dict_to_yaml(nav_tree_sorted))
file.write(yaml_str)
# Save new YAML reference section
# (NEW_YAML_DIR / 'nav_menu_updated.yml').write_text(_dict_to_yaml(nav_tree_sorted))
def main(): def main():
"""Main function to extract class and function names, create Markdown files, and generate a YAML navigation menu.""" """Main function to extract class and function names, create Markdown files, and generate a YAML navigation menu."""
nav_items = [] nav_items = []
for root, _, files in os.walk(CODE_DIR):
for file in files:
if file.endswith('.py'):
py_filepath = Path(root) / file
classes, functions = extract_classes_and_functions(py_filepath)
if classes or functions: for py_filepath in CODE_DIR.rglob('*.py'):
py_filepath_rel = py_filepath.relative_to(CODE_DIR) classes, functions = extract_classes_and_functions(py_filepath)
md_filepath = REFERENCE_DIR / py_filepath_rel
module_path = f"ultralytics.{py_filepath_rel.with_suffix('').as_posix().replace('/', '.')}" if classes or functions:
md_rel_filepath = create_markdown(md_filepath, module_path, classes, functions) py_filepath_rel = py_filepath.relative_to(CODE_DIR)
nav_items.append(str(md_rel_filepath)) md_filepath = REFERENCE_DIR / py_filepath_rel
module_path = f"ultralytics.{py_filepath_rel.with_suffix('').as_posix().replace('/', '.')}"
md_rel_filepath = create_markdown(md_filepath, module_path, classes, functions)
nav_items.append(str(md_rel_filepath))
create_nav_menu_yaml(nav_items) create_nav_menu_yaml(nav_items)

View File

@ -9,6 +9,14 @@ keywords: Ultralytics, callbacks, _log_plots, on_fit_epoch_end, on_train_end
Full source code for this file is available at [https://github.com/ultralytics/ultralytics/blob/main/ultralytics/utils/callbacks/wb.py](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/utils/callbacks/wb.py). Help us fix any issues you see by submitting a [Pull Request](https://docs.ultralytics.com/help/contributing/) 🛠️. Thank you 🙏! Full source code for this file is available at [https://github.com/ultralytics/ultralytics/blob/main/ultralytics/utils/callbacks/wb.py](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/utils/callbacks/wb.py). Help us fix any issues you see by submitting a [Pull Request](https://docs.ultralytics.com/help/contributing/) 🛠️. Thank you 🙏!
---
## ::: ultralytics.utils.callbacks.wb._custom_table
<br><br>
---
## ::: ultralytics.utils.callbacks.wb._plot_curve
<br><br>
--- ---
## ::: ultralytics.utils.callbacks.wb._log_plots ## ::: ultralytics.utils.callbacks.wb._log_plots
<br><br> <br><br>

View File

@ -11,7 +11,7 @@ from ultralytics import YOLO
from ultralytics.utils.files import increment_path from ultralytics.utils.files import increment_path
from ultralytics.utils.plotting import Annotator, colors from ultralytics.utils.plotting import Annotator, colors
track_history = defaultdict(lambda: []) track_history = defaultdict(list)
current_region = None current_region = None
counting_regions = [ counting_regions = [

View File

@ -97,8 +97,8 @@ def test_fastsam(task='segment', model=WEIGHTS_DIR / 'FastSAM-s.pt', data='coco8
ann = prompt_process.text_prompt(text='a photo of a dog') ann = prompt_process.text_prompt(text='a photo of a dog')
# Point prompt # Point prompt
# points default [[0,0]] [[x1,y1],[x2,y2]] # Points default [[0,0]] [[x1,y1],[x2,y2]]
# point_label default [0] [1,0] 0:background, 1:foreground # Point_label default [0] [1,0] 0:background, 1:foreground
ann = prompt_process.point_prompt(points=[[200, 200]], pointlabel=[1]) ann = prompt_process.point_prompt(points=[[200, 200]], pointlabel=[1])
prompt_process.plot(annotations=ann, output='./') prompt_process.plot(annotations=ann, output='./')

View File

@ -70,7 +70,7 @@ def test_segment():
CFG.imgsz = 32 CFG.imgsz = 32
# YOLO(CFG_SEG).train(**overrides) # works # YOLO(CFG_SEG).train(**overrides) # works
# trainer # Trainer
trainer = segment.SegmentationTrainer(overrides=overrides) trainer = segment.SegmentationTrainer(overrides=overrides)
trainer.add_callback('on_train_start', test_func) trainer.add_callback('on_train_start', test_func)
assert test_func in trainer.callbacks['on_train_start'], 'callback test failed' assert test_func in trainer.callbacks['on_train_start'], 'callback test failed'

View File

@ -1,6 +1,6 @@
# Ultralytics YOLO 🚀, AGPL-3.0 license # Ultralytics YOLO 🚀, AGPL-3.0 license
__version__ = '8.0.196' __version__ = '8.0.197'
from ultralytics.models import RTDETR, SAM, YOLO from ultralytics.models import RTDETR, SAM, YOLO
from ultralytics.models.fastsam import FastSAM from ultralytics.models.fastsam import FastSAM

View File

@ -491,7 +491,7 @@ class RandomPerspective:
border = labels.pop('mosaic_border', self.border) border = labels.pop('mosaic_border', self.border)
self.size = img.shape[1] + border[1] * 2, img.shape[0] + border[0] * 2 # w, h self.size = img.shape[1] + border[1] * 2, img.shape[0] + border[0] * 2 # w, h
# M is affine matrix # M is affine matrix
# scale for func:`box_candidates` # Scale for func:`box_candidates`
img, M, scale = self.affine_transform(img, border) img, M, scale = self.affine_transform(img, border)
bboxes = self.apply_bboxes(instances.bboxes, M) bboxes = self.apply_bboxes(instances.bboxes, M)
@ -894,7 +894,7 @@ class Format:
return labels return labels
def _format_img(self, img): def _format_img(self, img):
"""Format the image for YOLOv5 from Numpy array to PyTorch tensor.""" """Format the image for YOLO from Numpy array to PyTorch tensor."""
if len(img.shape) < 3: if len(img.shape) < 3:
img = np.expand_dims(img, -1) img = np.expand_dims(img, -1)
img = np.ascontiguousarray(img.transpose(2, 0, 1)[::-1]) img = np.ascontiguousarray(img.transpose(2, 0, 1)[::-1])

View File

@ -1,14 +1,14 @@
# Ultralytics YOLO 🚀, AGPL-3.0 license # Ultralytics YOLO 🚀, AGPL-3.0 license
import json import json
import shutil
from collections import defaultdict from collections import defaultdict
from pathlib import Path from pathlib import Path
import cv2 import cv2
import numpy as np import numpy as np
from ultralytics.utils import TQDM from ultralytics.utils import LOGGER, TQDM
from ultralytics.utils.files import increment_path
def coco91_to_coco80_class(): def coco91_to_coco80_class():
@ -48,12 +48,12 @@ def coco80_to_coco91_class(): #
def convert_coco(labels_dir='../coco/annotations/', def convert_coco(labels_dir='../coco/annotations/',
save_dir='.', save_dir='coco_converted/',
use_segments=False, use_segments=False,
use_keypoints=False, use_keypoints=False,
cls91to80=True): cls91to80=True):
""" """
Converts COCO dataset annotations to a format suitable for training YOLOv5 models. Converts COCO dataset annotations to a YOLO annotation format suitable for training YOLO models.
Args: Args:
labels_dir (str, optional): Path to directory containing COCO dataset annotation files. labels_dir (str, optional): Path to directory containing COCO dataset annotation files.
@ -74,9 +74,7 @@ def convert_coco(labels_dir='../coco/annotations/',
""" """
# Create dataset directory # Create dataset directory
save_dir = Path(save_dir) save_dir = increment_path(save_dir) # increment if save directory already exists
if save_dir.exists():
shutil.rmtree(save_dir) # delete dir
for p in save_dir / 'labels', save_dir / 'images': for p in save_dir / 'labels', save_dir / 'images':
p.mkdir(parents=True, exist_ok=True) # make dir p.mkdir(parents=True, exist_ok=True) # make dir
@ -147,6 +145,8 @@ def convert_coco(labels_dir='../coco/annotations/',
if use_segments and len(segments[i]) > 0 else bboxes[i]), # cls, box or segments if use_segments and len(segments[i]) > 0 else bboxes[i]), # cls, box or segments
file.write(('%g ' * len(line)).rstrip() % line + '\n') file.write(('%g ' * len(line)).rstrip() % line + '\n')
LOGGER.info(f'COCO data converted successfully.\nResults saved to {save_dir.resolve()}')
def convert_dota_to_yolo_obb(dota_root_path: str): def convert_dota_to_yolo_obb(dota_root_path: str):
""" """
@ -271,26 +271,25 @@ def merge_multi_segment(segments):
segments = [np.array(i).reshape(-1, 2) for i in segments] segments = [np.array(i).reshape(-1, 2) for i in segments]
idx_list = [[] for _ in range(len(segments))] idx_list = [[] for _ in range(len(segments))]
# record the indexes with min distance between each segment # Record the indexes with min distance between each segment
for i in range(1, len(segments)): for i in range(1, len(segments)):
idx1, idx2 = min_index(segments[i - 1], segments[i]) idx1, idx2 = min_index(segments[i - 1], segments[i])
idx_list[i - 1].append(idx1) idx_list[i - 1].append(idx1)
idx_list[i].append(idx2) idx_list[i].append(idx2)
# use two round to connect all the segments # Use two round to connect all the segments
for k in range(2): for k in range(2):
# forward connection # Forward connection
if k == 0: if k == 0:
for i, idx in enumerate(idx_list): for i, idx in enumerate(idx_list):
# middle segments have two indexes # Middle segments have two indexes, reverse the index of middle segments
# reverse the index of middle segments
if len(idx) == 2 and idx[0] > idx[1]: if len(idx) == 2 and idx[0] > idx[1]:
idx = idx[::-1] idx = idx[::-1]
segments[i] = segments[i][::-1, :] segments[i] = segments[i][::-1, :]
segments[i] = np.roll(segments[i], -idx[0], axis=0) segments[i] = np.roll(segments[i], -idx[0], axis=0)
segments[i] = np.concatenate([segments[i], segments[i][:1]]) segments[i] = np.concatenate([segments[i], segments[i][:1]])
# deal with the first segment and the last one # Deal with the first segment and the last one
if i in [0, len(idx_list) - 1]: if i in [0, len(idx_list) - 1]:
s.append(segments[i]) s.append(segments[i])
else: else:

View File

@ -162,7 +162,7 @@ class YOLODataset(BaseDataset):
def update_labels_info(self, label): def update_labels_info(self, label):
"""Custom your label format here.""" """Custom your label format here."""
# NOTE: cls is not with bboxes now, classification and semantic segmentation need an independent cls label # NOTE: cls is not with bboxes now, classification and semantic segmentation need an independent cls label
# we can make it also support classification and semantic segmentation by add or remove some dict keys there. # We can make it also support classification and semantic segmentation by add or remove some dict keys there.
bboxes = label.pop('bboxes') bboxes = label.pop('bboxes')
segments = label.pop('segments') segments = label.pop('segments')
keypoints = label.pop('keypoints', None) keypoints = label.pop('keypoints', None)

View File

@ -526,7 +526,7 @@ class BaseTrainer:
# TODO: may need to put these following functions into callback # TODO: may need to put these following functions into callback
def plot_training_samples(self, batch, ni): def plot_training_samples(self, batch, ni):
"""Plots training samples during YOLOv5 training.""" """Plots training samples during YOLO training."""
pass pass
def plot_training_labels(self): def plot_training_labels(self):

View File

@ -23,7 +23,7 @@ class HUBTrainingSession:
Attributes: Attributes:
agent_id (str): Identifier for the instance communicating with the server. agent_id (str): Identifier for the instance communicating with the server.
model_id (str): Identifier for the YOLOv5 model being trained. model_id (str): Identifier for the YOLO model being trained.
model_url (str): URL for the model in Ultralytics HUB. model_url (str): URL for the model in Ultralytics HUB.
api_url (str): API URL for the model in Ultralytics HUB. api_url (str): API URL for the model in Ultralytics HUB.
auth_header (dict): Authentication header for the Ultralytics HUB API requests. auth_header (dict): Authentication header for the Ultralytics HUB API requests.

View File

@ -120,7 +120,7 @@ class FastSAMPrompt:
result_name = os.path.basename(ann.path) result_name = os.path.basename(ann.path)
image = ann.orig_img[..., ::-1] # BGR to RGB image = ann.orig_img[..., ::-1] # BGR to RGB
original_h, original_w = ann.orig_shape original_h, original_w = ann.orig_shape
# for macOS only # For macOS only
# plt.switch_backend('TkAgg') # plt.switch_backend('TkAgg')
plt.figure(figsize=(original_w / 100, original_h / 100)) plt.figure(figsize=(original_w / 100, original_h / 100))
# Add subplot with no margin. # Add subplot with no margin.

View File

@ -42,23 +42,23 @@ def bbox_iou(box1, boxes, iou_thres=0.9, image_shape=(640, 640), raw_output=Fals
high_iou_indices (torch.Tensor): Indices of boxes with IoU > thres high_iou_indices (torch.Tensor): Indices of boxes with IoU > thres
""" """
boxes = adjust_bboxes_to_image_border(boxes, image_shape) boxes = adjust_bboxes_to_image_border(boxes, image_shape)
# obtain coordinates for intersections # Obtain coordinates for intersections
x1 = torch.max(box1[0], boxes[:, 0]) x1 = torch.max(box1[0], boxes[:, 0])
y1 = torch.max(box1[1], boxes[:, 1]) y1 = torch.max(box1[1], boxes[:, 1])
x2 = torch.min(box1[2], boxes[:, 2]) x2 = torch.min(box1[2], boxes[:, 2])
y2 = torch.min(box1[3], boxes[:, 3]) y2 = torch.min(box1[3], boxes[:, 3])
# compute the area of intersection # Compute the area of intersection
intersection = (x2 - x1).clamp(0) * (y2 - y1).clamp(0) intersection = (x2 - x1).clamp(0) * (y2 - y1).clamp(0)
# compute the area of both individual boxes # Compute the area of both individual boxes
box1_area = (box1[2] - box1[0]) * (box1[3] - box1[1]) box1_area = (box1[2] - box1[0]) * (box1[3] - box1[1])
box2_area = (boxes[:, 2] - boxes[:, 0]) * (boxes[:, 3] - boxes[:, 1]) box2_area = (boxes[:, 2] - boxes[:, 0]) * (boxes[:, 3] - boxes[:, 1])
# compute the area of union # Compute the area of union
union = box1_area + box2_area - intersection union = box1_area + box2_area - intersection
# compute the IoU # Compute the IoU
iou = intersection / union # Should be shape (n, ) iou = intersection / union # Should be shape (n, )
if raw_output: if raw_output:
return 0 if iou.numel() == 0 else iou return 0 if iou.numel() == 0 else iou

View File

@ -99,10 +99,10 @@ class RTDETRValidator(DetectionValidator):
for i, bbox in enumerate(bboxes): # (300, 4) for i, bbox in enumerate(bboxes): # (300, 4)
bbox = ops.xywh2xyxy(bbox) bbox = ops.xywh2xyxy(bbox)
score, cls = scores[i].max(-1) # (300, ) score, cls = scores[i].max(-1) # (300, )
# Do not need threshold for evaluation as only got 300 boxes here. # Do not need threshold for evaluation as only got 300 boxes here
# idx = score > self.args.conf # idx = score > self.args.conf
pred = torch.cat([bbox, score[..., None], cls[..., None]], dim=-1) # filter pred = torch.cat([bbox, score[..., None], cls[..., None]], dim=-1) # filter
# sort by confidence to correctly get internal metrics. # Sort by confidence to correctly get internal metrics
pred = pred[score.argsort(descending=True)] pred = pred[score.argsort(descending=True)]
outputs[i] = pred # [idx] outputs[i] = pred # [idx]

View File

@ -304,11 +304,11 @@ class PositionEmbeddingRandom(nn.Module):
def _pe_encoding(self, coords: torch.Tensor) -> torch.Tensor: def _pe_encoding(self, coords: torch.Tensor) -> torch.Tensor:
"""Positionally encode points that are normalized to [0,1].""" """Positionally encode points that are normalized to [0,1]."""
# assuming coords are in [0, 1]^2 square and have d_1 x ... x d_n x 2 shape # Assuming coords are in [0, 1]^2 square and have d_1 x ... x d_n x 2 shape
coords = 2 * coords - 1 coords = 2 * coords - 1
coords = coords @ self.positional_encoding_gaussian_matrix coords = coords @ self.positional_encoding_gaussian_matrix
coords = 2 * np.pi * coords coords = 2 * np.pi * coords
# outputs d_1 x ... x d_n x C shape # Outputs d_1 x ... x d_n x C shape
return torch.cat([torch.sin(coords), torch.cos(coords)], dim=-1) return torch.cat([torch.sin(coords), torch.cos(coords)], dim=-1)
def forward(self, size: Tuple[int, int]) -> torch.Tensor: def forward(self, size: Tuple[int, int]) -> torch.Tensor:
@ -429,7 +429,7 @@ class Attention(nn.Module):
self.use_rel_pos = use_rel_pos self.use_rel_pos = use_rel_pos
if self.use_rel_pos: if self.use_rel_pos:
assert (input_size is not None), 'Input size must be provided if using relative positional encoding.' assert (input_size is not None), 'Input size must be provided if using relative positional encoding.'
# initialize relative positional embeddings # Initialize relative positional embeddings
self.rel_pos_h = nn.Parameter(torch.zeros(2 * input_size[0] - 1, head_dim)) self.rel_pos_h = nn.Parameter(torch.zeros(2 * input_size[0] - 1, head_dim))
self.rel_pos_w = nn.Parameter(torch.zeros(2 * input_size[1] - 1, head_dim)) self.rel_pos_w = nn.Parameter(torch.zeros(2 * input_size[1] - 1, head_dim))

View File

@ -172,7 +172,7 @@ class ConvLayer(nn.Module):
self.depth = depth self.depth = depth
self.use_checkpoint = use_checkpoint self.use_checkpoint = use_checkpoint
# build blocks # Build blocks
self.blocks = nn.ModuleList([ self.blocks = nn.ModuleList([
MBConv( MBConv(
dim, dim,
@ -182,7 +182,7 @@ class ConvLayer(nn.Module):
drop_path[i] if isinstance(drop_path, list) else drop_path, drop_path[i] if isinstance(drop_path, list) else drop_path,
) for i in range(depth)]) ) for i in range(depth)])
# patch merging layer # Patch merging layer
self.downsample = None if downsample is None else downsample( self.downsample = None if downsample is None else downsample(
input_resolution, dim=dim, out_dim=out_dim, activation=activation) input_resolution, dim=dim, out_dim=out_dim, activation=activation)
@ -393,11 +393,11 @@ class TinyViTBlock(nn.Module):
pH, pW = H + pad_b, W + pad_r pH, pW = H + pad_b, W + pad_r
nH = pH // self.window_size nH = pH // self.window_size
nW = pW // self.window_size nW = pW // self.window_size
# window partition # Window partition
x = x.view(B, nH, self.window_size, nW, self.window_size, x = x.view(B, nH, self.window_size, nW, self.window_size,
C).transpose(2, 3).reshape(B * nH * nW, self.window_size * self.window_size, C) C).transpose(2, 3).reshape(B * nH * nW, self.window_size * self.window_size, C)
x = self.attn(x) x = self.attn(x)
# window reverse # Window reverse
x = x.view(B, nH, nW, self.window_size, self.window_size, C).transpose(2, 3).reshape(B, pH, pW, C) x = x.view(B, nH, nW, self.window_size, self.window_size, C).transpose(2, 3).reshape(B, pH, pW, C)
if padding: if padding:
@ -467,7 +467,7 @@ class BasicLayer(nn.Module):
self.depth = depth self.depth = depth
self.use_checkpoint = use_checkpoint self.use_checkpoint = use_checkpoint
# build blocks # Build blocks
self.blocks = nn.ModuleList([ self.blocks = nn.ModuleList([
TinyViTBlock( TinyViTBlock(
dim=dim, dim=dim,
@ -481,7 +481,7 @@ class BasicLayer(nn.Module):
activation=activation, activation=activation,
) for i in range(depth)]) ) for i in range(depth)])
# patch merging layer # Patch merging layer
self.downsample = None if downsample is None else downsample( self.downsample = None if downsample is None else downsample(
input_resolution, dim=dim, out_dim=out_dim, activation=activation) input_resolution, dim=dim, out_dim=out_dim, activation=activation)
@ -593,10 +593,10 @@ class TinyViT(nn.Module):
patches_resolution = self.patch_embed.patches_resolution patches_resolution = self.patch_embed.patches_resolution
self.patches_resolution = patches_resolution self.patches_resolution = patches_resolution
# stochastic depth # Stochastic depth
dpr = [x.item() for x in torch.linspace(0, drop_path_rate, sum(depths))] # stochastic depth decay rule dpr = [x.item() for x in torch.linspace(0, drop_path_rate, sum(depths))] # stochastic depth decay rule
# build layers # Build layers
self.layers = nn.ModuleList() self.layers = nn.ModuleList()
for i_layer in range(self.num_layers): for i_layer in range(self.num_layers):
kwargs = dict( kwargs = dict(
@ -628,7 +628,7 @@ class TinyViT(nn.Module):
self.norm_head = nn.LayerNorm(embed_dims[-1]) self.norm_head = nn.LayerNorm(embed_dims[-1])
self.head = nn.Linear(embed_dims[-1], num_classes) if num_classes > 0 else torch.nn.Identity() self.head = nn.Linear(embed_dims[-1], num_classes) if num_classes > 0 else torch.nn.Identity()
# init weights # Init weights
self.apply(self._init_weights) self.apply(self._init_weights)
self.set_layer_lr_decay(layer_lr_decay) self.set_layer_lr_decay(layer_lr_decay)
self.neck = nn.Sequential( self.neck = nn.Sequential(
@ -653,7 +653,7 @@ class TinyViT(nn.Module):
"""Sets the learning rate decay for each layer in the TinyViT model.""" """Sets the learning rate decay for each layer in the TinyViT model."""
decay_rate = layer_lr_decay decay_rate = layer_lr_decay
# layers -> blocks (depth) # Layers -> blocks (depth)
depth = sum(self.depths) depth = sum(self.depths)
lr_scales = [decay_rate ** (depth - i - 1) for i in range(depth)] lr_scales = [decay_rate ** (depth - i - 1) for i in range(depth)]

View File

@ -414,8 +414,7 @@ class Predictor(BasePredictor):
unchanged = unchanged and not changed unchanged = unchanged and not changed
new_masks.append(torch.as_tensor(mask).unsqueeze(0)) new_masks.append(torch.as_tensor(mask).unsqueeze(0))
# Give score=0 to changed masks and score=1 to unchanged masks # Give score=0 to changed masks and 1 to unchanged masks so NMS prefers masks not needing postprocessing
# so NMS will prefer ones that didn't need postprocessing
scores.append(float(unchanged)) scores.append(float(unchanged))
# Recalculate boxes and remove any new duplicates # Recalculate boxes and remove any new duplicates

View File

@ -66,7 +66,7 @@ class DETRLoss(nn.Module):
def _get_loss_class(self, pred_scores, targets, gt_scores, num_gts, postfix=''): def _get_loss_class(self, pred_scores, targets, gt_scores, num_gts, postfix=''):
"""Computes the classification loss based on predictions, target values, and ground truth scores.""" """Computes the classification loss based on predictions, target values, and ground truth scores."""
# logits: [b, query, num_classes], gt_class: list[[n, 1]] # Logits: [b, query, num_classes], gt_class: list[[n, 1]]
name_class = f'loss_class{postfix}' name_class = f'loss_class{postfix}'
bs, nq = pred_scores.shape[:2] bs, nq = pred_scores.shape[:2]
# one_hot = F.one_hot(targets, self.nc + 1)[..., :-1] # (bs, num_queries, num_classes) # one_hot = F.one_hot(targets, self.nc + 1)[..., :-1] # (bs, num_queries, num_classes)
@ -90,7 +90,7 @@ class DETRLoss(nn.Module):
"""Calculates and returns the bounding box loss and GIoU loss for the predicted and ground truth bounding """Calculates and returns the bounding box loss and GIoU loss for the predicted and ground truth bounding
boxes. boxes.
""" """
# boxes: [b, query, 4], gt_bbox: list[[n, 4]] # Boxes: [b, query, 4], gt_bbox: list[[n, 4]]
name_bbox = f'loss_bbox{postfix}' name_bbox = f'loss_bbox{postfix}'
name_giou = f'loss_giou{postfix}' name_giou = f'loss_giou{postfix}'

View File

@ -188,7 +188,7 @@ def get_cdn_group(batch,
num_group = num_dn // max_nums num_group = num_dn // max_nums
num_group = 1 if num_group == 0 else num_group num_group = 1 if num_group == 0 else num_group
# pad gt to max_num of a batch # Pad gt to max_num of a batch
bs = len(gt_groups) bs = len(gt_groups)
gt_cls = batch['cls'] # (bs*num, ) gt_cls = batch['cls'] # (bs*num, )
gt_bbox = batch['bboxes'] # bs*num, 4 gt_bbox = batch['bboxes'] # bs*num, 4
@ -204,10 +204,10 @@ def get_cdn_group(batch,
neg_idx = torch.arange(total_num * num_group, dtype=torch.long, device=gt_bbox.device) + num_group * total_num neg_idx = torch.arange(total_num * num_group, dtype=torch.long, device=gt_bbox.device) + num_group * total_num
if cls_noise_ratio > 0: if cls_noise_ratio > 0:
# half of bbox prob # Half of bbox prob
mask = torch.rand(dn_cls.shape) < (cls_noise_ratio * 0.5) mask = torch.rand(dn_cls.shape) < (cls_noise_ratio * 0.5)
idx = torch.nonzero(mask).squeeze(-1) idx = torch.nonzero(mask).squeeze(-1)
# randomly put a new one here # Randomly put a new one here
new_label = torch.randint_like(idx, 0, num_classes, dtype=dn_cls.dtype, device=dn_cls.device) new_label = torch.randint_like(idx, 0, num_classes, dtype=dn_cls.dtype, device=dn_cls.device)
dn_cls[idx] = new_label dn_cls[idx] = new_label
@ -240,9 +240,9 @@ def get_cdn_group(batch,
tgt_size = num_dn + num_queries tgt_size = num_dn + num_queries
attn_mask = torch.zeros([tgt_size, tgt_size], dtype=torch.bool) attn_mask = torch.zeros([tgt_size, tgt_size], dtype=torch.bool)
# match query cannot see the reconstruct # Match query cannot see the reconstruct
attn_mask[num_dn:, :num_dn] = True attn_mask[num_dn:, :num_dn] = True
# reconstruct cannot see each other # Reconstruct cannot see each other
for i in range(num_group): for i in range(num_group):
if i == 0: if i == 0:
attn_mask[max_nums * 2 * i:max_nums * 2 * (i + 1), max_nums * 2 * (i + 1):num_dn] = True attn_mask[max_nums * 2 * i:max_nums * 2 * (i + 1), max_nums * 2 * (i + 1):num_dn] = True

View File

@ -307,7 +307,7 @@ class Bottleneck(nn.Module):
self.add = shortcut and c1 == c2 self.add = shortcut and c1 == c2
def forward(self, x): def forward(self, x):
"""'forward()' applies the YOLOv5 FPN to input data.""" """'forward()' applies the YOLO FPN to input data."""
return x + self.cv2(self.cv1(x)) if self.add else self.cv2(self.cv1(x)) return x + self.cv2(self.cv1(x)) if self.add else self.cv2(self.cv1(x))

View File

@ -192,7 +192,7 @@ class RTDETRDecoder(nn.Module):
dropout=0., dropout=0.,
act=nn.ReLU(), act=nn.ReLU(),
eval_idx=-1, eval_idx=-1,
# training args # Training args
nd=100, # num denoising nd=100, # num denoising
label_noise_ratio=0.5, label_noise_ratio=0.5,
box_noise_scale=1.0, box_noise_scale=1.0,
@ -225,7 +225,7 @@ class RTDETRDecoder(nn.Module):
self.num_queries = nq self.num_queries = nq
self.num_decoder_layers = ndl self.num_decoder_layers = ndl
# backbone feature projection # Backbone feature projection
self.input_proj = nn.ModuleList(nn.Sequential(nn.Conv2d(x, hd, 1, bias=False), nn.BatchNorm2d(hd)) for x in ch) self.input_proj = nn.ModuleList(nn.Sequential(nn.Conv2d(x, hd, 1, bias=False), nn.BatchNorm2d(hd)) for x in ch)
# NOTE: simplified version but it's not consistent with .pt weights. # NOTE: simplified version but it's not consistent with .pt weights.
# self.input_proj = nn.ModuleList(Conv(x, hd, act=False) for x in ch) # self.input_proj = nn.ModuleList(Conv(x, hd, act=False) for x in ch)
@ -234,24 +234,24 @@ class RTDETRDecoder(nn.Module):
decoder_layer = DeformableTransformerDecoderLayer(hd, nh, d_ffn, dropout, act, self.nl, ndp) decoder_layer = DeformableTransformerDecoderLayer(hd, nh, d_ffn, dropout, act, self.nl, ndp)
self.decoder = DeformableTransformerDecoder(hd, decoder_layer, ndl, eval_idx) self.decoder = DeformableTransformerDecoder(hd, decoder_layer, ndl, eval_idx)
# denoising part # Denoising part
self.denoising_class_embed = nn.Embedding(nc, hd) self.denoising_class_embed = nn.Embedding(nc, hd)
self.num_denoising = nd self.num_denoising = nd
self.label_noise_ratio = label_noise_ratio self.label_noise_ratio = label_noise_ratio
self.box_noise_scale = box_noise_scale self.box_noise_scale = box_noise_scale
# decoder embedding # Decoder embedding
self.learnt_init_query = learnt_init_query self.learnt_init_query = learnt_init_query
if learnt_init_query: if learnt_init_query:
self.tgt_embed = nn.Embedding(nq, hd) self.tgt_embed = nn.Embedding(nq, hd)
self.query_pos_head = MLP(4, 2 * hd, hd, num_layers=2) self.query_pos_head = MLP(4, 2 * hd, hd, num_layers=2)
# encoder head # Encoder head
self.enc_output = nn.Sequential(nn.Linear(hd, hd), nn.LayerNorm(hd)) self.enc_output = nn.Sequential(nn.Linear(hd, hd), nn.LayerNorm(hd))
self.enc_score_head = nn.Linear(hd, nc) self.enc_score_head = nn.Linear(hd, nc)
self.enc_bbox_head = MLP(hd, hd, 4, num_layers=3) self.enc_bbox_head = MLP(hd, hd, 4, num_layers=3)
# decoder head # Decoder head
self.dec_score_head = nn.ModuleList([nn.Linear(hd, nc) for _ in range(ndl)]) self.dec_score_head = nn.ModuleList([nn.Linear(hd, nc) for _ in range(ndl)])
self.dec_bbox_head = nn.ModuleList([MLP(hd, hd, 4, num_layers=3) for _ in range(ndl)]) self.dec_bbox_head = nn.ModuleList([MLP(hd, hd, 4, num_layers=3) for _ in range(ndl)])
@ -261,10 +261,10 @@ class RTDETRDecoder(nn.Module):
"""Runs the forward pass of the module, returning bounding box and classification scores for the input.""" """Runs the forward pass of the module, returning bounding box and classification scores for the input."""
from ultralytics.models.utils.ops import get_cdn_group from ultralytics.models.utils.ops import get_cdn_group
# input projection and embedding # Input projection and embedding
feats, shapes = self._get_encoder_input(x) feats, shapes = self._get_encoder_input(x)
# prepare denoising training # Prepare denoising training
dn_embed, dn_bbox, attn_mask, dn_meta = \ dn_embed, dn_bbox, attn_mask, dn_meta = \
get_cdn_group(batch, get_cdn_group(batch,
self.nc, self.nc,
@ -278,7 +278,7 @@ class RTDETRDecoder(nn.Module):
embed, refer_bbox, enc_bboxes, enc_scores = \ embed, refer_bbox, enc_bboxes, enc_scores = \
self._get_decoder_input(feats, shapes, dn_embed, dn_bbox) self._get_decoder_input(feats, shapes, dn_embed, dn_bbox)
# decoder # Decoder
dec_bboxes, dec_scores = self.decoder(embed, dec_bboxes, dec_scores = self.decoder(embed,
refer_bbox, refer_bbox,
feats, feats,
@ -316,9 +316,9 @@ class RTDETRDecoder(nn.Module):
def _get_encoder_input(self, x): def _get_encoder_input(self, x):
"""Processes and returns encoder inputs by getting projection features from input and concatenating them.""" """Processes and returns encoder inputs by getting projection features from input and concatenating them."""
# get projection features # Get projection features
x = [self.input_proj[i](feat) for i, feat in enumerate(x)] x = [self.input_proj[i](feat) for i, feat in enumerate(x)]
# get encoder inputs # Get encoder inputs
feats = [] feats = []
shapes = [] shapes = []
for feat in x: for feat in x:
@ -335,13 +335,13 @@ class RTDETRDecoder(nn.Module):
def _get_decoder_input(self, feats, shapes, dn_embed=None, dn_bbox=None): def _get_decoder_input(self, feats, shapes, dn_embed=None, dn_bbox=None):
"""Generates and prepares the input required for the decoder from the provided features and shapes.""" """Generates and prepares the input required for the decoder from the provided features and shapes."""
bs = len(feats) bs = len(feats)
# prepare input for decoder # Prepare input for decoder
anchors, valid_mask = self._generate_anchors(shapes, dtype=feats.dtype, device=feats.device) anchors, valid_mask = self._generate_anchors(shapes, dtype=feats.dtype, device=feats.device)
features = self.enc_output(valid_mask * feats) # bs, h*w, 256 features = self.enc_output(valid_mask * feats) # bs, h*w, 256
enc_outputs_scores = self.enc_score_head(features) # (bs, h*w, nc) enc_outputs_scores = self.enc_score_head(features) # (bs, h*w, nc)
# query selection # Query selection
# (bs, num_queries) # (bs, num_queries)
topk_ind = torch.topk(enc_outputs_scores.max(-1).values, self.num_queries, dim=1).indices.view(-1) topk_ind = torch.topk(enc_outputs_scores.max(-1).values, self.num_queries, dim=1).indices.view(-1)
# (bs, num_queries) # (bs, num_queries)
@ -352,7 +352,7 @@ class RTDETRDecoder(nn.Module):
# (bs, num_queries, 4) # (bs, num_queries, 4)
top_k_anchors = anchors[:, topk_ind].view(bs, self.num_queries, -1) top_k_anchors = anchors[:, topk_ind].view(bs, self.num_queries, -1)
# dynamic anchors + static content # Dynamic anchors + static content
refer_bbox = self.enc_bbox_head(top_k_features) + top_k_anchors refer_bbox = self.enc_bbox_head(top_k_features) + top_k_anchors
enc_bboxes = refer_bbox.sigmoid() enc_bboxes = refer_bbox.sigmoid()
@ -373,7 +373,7 @@ class RTDETRDecoder(nn.Module):
# TODO # TODO
def _reset_parameters(self): def _reset_parameters(self):
"""Initializes or resets the parameters of the model's various components with predefined weights and biases.""" """Initializes or resets the parameters of the model's various components with predefined weights and biases."""
# class and bbox head init # Class and bbox head init
bias_cls = bias_init_with_prob(0.01) / 80 * self.nc bias_cls = bias_init_with_prob(0.01) / 80 * self.nc
# NOTE: the weight initialization in `linear_init_` would cause NaN when training with custom datasets. # NOTE: the weight initialization in `linear_init_` would cause NaN when training with custom datasets.
# linear_init_(self.enc_score_head) # linear_init_(self.enc_score_head)

View File

@ -81,7 +81,7 @@ class AIFI(TransformerEncoderLayer):
"""Forward pass for the AIFI transformer layer.""" """Forward pass for the AIFI transformer layer."""
c, h, w = x.shape[1:] c, h, w = x.shape[1:]
pos_embed = self.build_2d_sincos_position_embedding(w, h, c) pos_embed = self.build_2d_sincos_position_embedding(w, h, c)
# flatten [B, C, H, W] to [B, HxW, C] # Flatten [B, C, H, W] to [B, HxW, C]
x = super().forward(x.flatten(2).permute(0, 2, 1), pos=pos_embed.to(device=x.device, dtype=x.dtype)) x = super().forward(x.flatten(2).permute(0, 2, 1), pos=pos_embed.to(device=x.device, dtype=x.dtype))
return x.permute(0, 2, 1).view([-1, c, h, w]).contiguous() return x.permute(0, 2, 1).view([-1, c, h, w]).contiguous()
@ -213,7 +213,7 @@ class MSDeformAttn(nn.Module):
if d_model % n_heads != 0: if d_model % n_heads != 0:
raise ValueError(f'd_model must be divisible by n_heads, but got {d_model} and {n_heads}') raise ValueError(f'd_model must be divisible by n_heads, but got {d_model} and {n_heads}')
_d_per_head = d_model // n_heads _d_per_head = d_model // n_heads
# you'd better set _d_per_head to a power of 2 which is more efficient in our CUDA implementation # Better to set _d_per_head to a power of 2 which is more efficient in a CUDA implementation
assert _d_per_head * n_heads == d_model, '`d_model` must be divisible by `n_heads`' assert _d_per_head * n_heads == d_model, '`d_model` must be divisible by `n_heads`'
self.im2col_step = 64 self.im2col_step = 64

View File

@ -277,7 +277,7 @@ class DetectionModel(BaseModel):
return torch.cat((x, y, wh, cls), dim) return torch.cat((x, y, wh, cls), dim)
def _clip_augmented(self, y): def _clip_augmented(self, y):
"""Clip YOLOv5 augmented inference tails.""" """Clip YOLO augmented inference tails."""
nl = self.model[-1].nl # number of detection layers (P3-P5) nl = self.model[-1].nl # number of detection layers (P3-P5)
g = sum(4 ** x for x in range(nl)) # grid points g = sum(4 ** x for x in range(nl)) # grid points
e = 1 # exclude layer count e = 1 # exclude layer count
@ -491,7 +491,7 @@ class Ensemble(nn.ModuleList):
super().__init__() super().__init__()
def forward(self, x, augment=False, profile=False, visualize=False): def forward(self, x, augment=False, profile=False, visualize=False):
"""Function generates the YOLOv5 network's final layer.""" """Function generates the YOLO network's final layer."""
y = [module(x, augment, profile, visualize)[0] for module in self] y = [module(x, augment, profile, visualize)[0] for module in self]
# y = torch.stack(y).max(0)[0] # max ensemble # y = torch.stack(y).max(0)[0] # max ensemble
# y = torch.stack(y).mean(0) # mean ensemble # y = torch.stack(y).mean(0) # mean ensemble

View File

@ -38,7 +38,7 @@ def on_pretrain_routine_end(trainer):
if not active_run: if not active_run:
active_run = mlflow.start_run(experiment_id=experiment.experiment_id, run_name=run_name) active_run = mlflow.start_run(experiment_id=experiment.experiment_id, run_name=run_name)
LOGGER.info(f'{prefix}Using run_id({active_run.info.run_id}) at {mlflow_location}') LOGGER.info(f'{prefix}Using run_id({active_run.info.run_id}) at {mlflow_location}')
run.log_params(vars(trainer.model.args)) run.log_params(trainer.args)
except Exception as err: except Exception as err:
LOGGER.error(f'{prefix}Failing init - {repr(err)}') LOGGER.error(f'{prefix}Failing init - {repr(err)}')
LOGGER.warning(f'{prefix}Continuing without Mlflow') LOGGER.warning(f'{prefix}Continuing without Mlflow')

View File

@ -8,7 +8,10 @@ try:
assert SETTINGS['wandb'] is True # verify integration is enabled assert SETTINGS['wandb'] is True # verify integration is enabled
import wandb as wb import wandb as wb
assert hasattr(wb, '__version__') assert hasattr(wb, '__version__') # verify package is not directory
import numpy as np
import pandas as pd
_processed_plots = {} _processed_plots = {}
@ -16,6 +19,83 @@ except (ImportError, AssertionError):
wb = None wb = None
def _custom_table(x, y, classes, title='Precision Recall Curve', x_axis_title='Recall', y_axis_title='Precision'):
"""
Create and log a custom metric visualization to wandb.plot.pr_curve.
This function crafts a custom metric visualization that mimics the behavior of wandb's default precision-recall curve
while allowing for enhanced customization. The visual metric is useful for monitoring model performance across different classes.
Args:
x (List): Values for the x-axis; expected to have length N.
y (List): Corresponding values for the y-axis; also expected to have length N.
classes (List): Labels identifying the class of each point; length N.
title (str, optional): Title for the plot; defaults to 'Precision Recall Curve'.
x_title (str, optional): Label for the x-axis; defaults to 'Recall'.
y_title (str, optional): Label for the y-axis; defaults to 'Precision'.
Returns:
(wandb.Object): A wandb object suitable for logging, showcasing the crafted metric visualization.
"""
df = pd.DataFrame({'class': classes, 'y': y, 'x': x}).round(3)
fields = {'x': 'x', 'y': 'y', 'class': 'class'}
string_fields = {'title': title, 'x-axis-title': x_axis_title, 'y-axis-title': y_axis_title}
return wb.plot_table('wandb/area-under-curve/v0',
wb.Table(dataframe=df),
fields=fields,
string_fields=string_fields)
def _plot_curve(x,
y,
names=None,
id='precision-recall',
title='Precision Recall Curve',
x_title='Recall',
y_title='Precision',
num_x=100,
only_mean=False):
"""
Log a metric curve visualization.
This function generates a metric curve based on input data and logs the visualization to wandb.
The curve can represent aggregated data (mean) or individual class data, depending on the 'only_mean' flag.
Args:
x (np.ndarray): Data points for the x-axis with length N.
y (np.ndarray): Corresponding data points for the y-axis with shape CxN, where C represents the number of classes.
names (list, optional): Names of the classes corresponding to the y-axis data; length C. Defaults to an empty list.
id (str, optional): Unique identifier for the logged data in wandb. Defaults to 'precision-recall'.
title (str, optional): Title for the visualization plot. Defaults to 'Precision Recall Curve'.
x_title (str, optional): Label for the x-axis. Defaults to 'Recall'.
y_title (str, optional): Label for the y-axis. Defaults to 'Precision'.
num_x (int, optional): Number of interpolated data points for visualization. Defaults to 100.
only_mean (bool, optional): Flag to indicate if only the mean curve should be plotted. Defaults to True.
Note:
The function leverages the '_custom_table' function to generate the actual visualization.
"""
# Create new x
if names is None:
names = []
x_new = np.linspace(x[0], x[-1], num_x).round(5)
# Create arrays for logging
x_log = x_new.tolist()
y_log = np.interp(x_new, x, np.mean(y, axis=0)).round(3).tolist()
if only_mean:
table = wb.Table(data=list(zip(x_log, y_log)), columns=[x_title, y_title])
wb.run.log({title: wb.plot.line(table, x_title, y_title, title=title)})
else:
classes = ['mean'] * len(x_log)
for i, yi in enumerate(y):
x_log.extend(x_new) # add new x
y_log.extend(np.interp(x_new, x, yi)) # interpolate y to new x
classes.extend([names[i]] * len(x_new)) # add class names
wb.log({id: _custom_table(x_log, y_log, classes, title, x_title, y_title)}, commit=False)
def _log_plots(plots, step): def _log_plots(plots, step):
"""Logs plots from the input dictionary if they haven't been logged already at the specified step.""" """Logs plots from the input dictionary if they haven't been logged already at the specified step."""
for name, params in plots.items(): for name, params in plots.items():
@ -55,6 +135,17 @@ def on_train_end(trainer):
if trainer.best.exists(): if trainer.best.exists():
art.add_file(trainer.best) art.add_file(trainer.best)
wb.run.log_artifact(art, aliases=['best']) wb.run.log_artifact(art, aliases=['best'])
for curve_name, curve_values in zip(trainer.validator.metrics.curves, trainer.validator.metrics.curves_results):
x, y, x_title, y_title = curve_values
_plot_curve(
x,
y,
names=list(trainer.validator.metrics.names.values()),
id=f'curves/{curve_name}',
title=curve_name,
x_title=x_title,
y_title=y_title,
)
wb.run.finish() # required or run continues on dashboard wb.run.finish() # required or run continues on dashboard

View File

@ -165,16 +165,16 @@ def check_version(current: str = '0.0.0',
Example: Example:
```python ```python
# check if current version is exactly 22.04 # Check if current version is exactly 22.04
check_version(current='22.04', required='==22.04') check_version(current='22.04', required='==22.04')
# check if current version is greater than or equal to 22.04 # Check if current version is greater than or equal to 22.04
check_version(current='22.10', required='22.04') # assumes '>=' inequality if none passed check_version(current='22.10', required='22.04') # assumes '>=' inequality if none passed
# check if current version is less than or equal to 22.04 # Check if current version is less than or equal to 22.04
check_version(current='22.04', required='<=22.04') check_version(current='22.04', required='<=22.04')
# check if current version is between 20.04 (inclusive) and 22.04 (exclusive) # Check if current version is between 20.04 (inclusive) and 22.04 (exclusive)
check_version(current='21.10', required='>20.04,<22.04') check_version(current='21.10', required='>20.04,<22.04')
``` ```
""" """

View File

@ -45,7 +45,7 @@ def spaces_in_path(path):
with ultralytics.utils.files import spaces_in_path with ultralytics.utils.files import spaces_in_path
with spaces_in_path('/path/with spaces') as new_path: with spaces_in_path('/path/with spaces') as new_path:
# your code here # Your code here
``` ```
""" """

View File

@ -219,7 +219,7 @@ class Instances:
self.normalized = normalized self.normalized = normalized
if len(segments) > 0: if len(segments) > 0:
# list[np.array(1000, 2)] * num_samples # List[np.array(1000, 2)] * num_samples
segments = resample_segments(segments) segments = resample_segments(segments)
# (N, 1000, 2) # (N, 1000, 2)
segments = np.stack(segments, axis=0) segments = np.stack(segments, axis=0)

View File

@ -176,13 +176,13 @@ class v8DetectionLoss:
imgsz = torch.tensor(feats[0].shape[2:], device=self.device, dtype=dtype) * self.stride[0] # image size (h,w) imgsz = torch.tensor(feats[0].shape[2:], device=self.device, dtype=dtype) * self.stride[0] # image size (h,w)
anchor_points, stride_tensor = make_anchors(feats, self.stride, 0.5) anchor_points, stride_tensor = make_anchors(feats, self.stride, 0.5)
# targets # Targets
targets = torch.cat((batch['batch_idx'].view(-1, 1), batch['cls'].view(-1, 1), batch['bboxes']), 1) targets = torch.cat((batch['batch_idx'].view(-1, 1), batch['cls'].view(-1, 1), batch['bboxes']), 1)
targets = self.preprocess(targets.to(self.device), batch_size, scale_tensor=imgsz[[1, 0, 1, 0]]) targets = self.preprocess(targets.to(self.device), batch_size, scale_tensor=imgsz[[1, 0, 1, 0]])
gt_labels, gt_bboxes = targets.split((1, 4), 2) # cls, xyxy gt_labels, gt_bboxes = targets.split((1, 4), 2) # cls, xyxy
mask_gt = gt_bboxes.sum(2, keepdim=True).gt_(0) mask_gt = gt_bboxes.sum(2, keepdim=True).gt_(0)
# pboxes # Pboxes
pred_bboxes = self.bbox_decode(anchor_points, pred_distri) # xyxy, (b, h*w, 4) pred_bboxes = self.bbox_decode(anchor_points, pred_distri) # xyxy, (b, h*w, 4)
_, target_bboxes, target_scores, fg_mask, _ = self.assigner( _, target_bboxes, target_scores, fg_mask, _ = self.assigner(
@ -191,11 +191,11 @@ class v8DetectionLoss:
target_scores_sum = max(target_scores.sum(), 1) target_scores_sum = max(target_scores.sum(), 1)
# cls loss # Cls loss
# loss[1] = self.varifocal_loss(pred_scores, target_scores, target_labels) / target_scores_sum # VFL way # loss[1] = self.varifocal_loss(pred_scores, target_scores, target_labels) / target_scores_sum # VFL way
loss[1] = self.bce(pred_scores, target_scores.to(dtype)).sum() / target_scores_sum # BCE loss[1] = self.bce(pred_scores, target_scores.to(dtype)).sum() / target_scores_sum # BCE
# bbox loss # Bbox loss
if fg_mask.sum(): if fg_mask.sum():
target_bboxes /= stride_tensor target_bboxes /= stride_tensor
loss[0], loss[2] = self.bbox_loss(pred_distri, pred_bboxes, anchor_points, target_bboxes, target_scores, loss[0], loss[2] = self.bbox_loss(pred_distri, pred_bboxes, anchor_points, target_bboxes, target_scores,
@ -224,7 +224,7 @@ class v8SegmentationLoss(v8DetectionLoss):
pred_distri, pred_scores = torch.cat([xi.view(feats[0].shape[0], self.no, -1) for xi in feats], 2).split( pred_distri, pred_scores = torch.cat([xi.view(feats[0].shape[0], self.no, -1) for xi in feats], 2).split(
(self.reg_max * 4, self.nc), 1) (self.reg_max * 4, self.nc), 1)
# b, grids, .. # B, grids, ..
pred_scores = pred_scores.permute(0, 2, 1).contiguous() pred_scores = pred_scores.permute(0, 2, 1).contiguous()
pred_distri = pred_distri.permute(0, 2, 1).contiguous() pred_distri = pred_distri.permute(0, 2, 1).contiguous()
pred_masks = pred_masks.permute(0, 2, 1).contiguous() pred_masks = pred_masks.permute(0, 2, 1).contiguous()
@ -233,7 +233,7 @@ class v8SegmentationLoss(v8DetectionLoss):
imgsz = torch.tensor(feats[0].shape[2:], device=self.device, dtype=dtype) * self.stride[0] # image size (h,w) imgsz = torch.tensor(feats[0].shape[2:], device=self.device, dtype=dtype) * self.stride[0] # image size (h,w)
anchor_points, stride_tensor = make_anchors(feats, self.stride, 0.5) anchor_points, stride_tensor = make_anchors(feats, self.stride, 0.5)
# targets # Targets
try: try:
batch_idx = batch['batch_idx'].view(-1, 1) batch_idx = batch['batch_idx'].view(-1, 1)
targets = torch.cat((batch_idx, batch['cls'].view(-1, 1), batch['bboxes']), 1) targets = torch.cat((batch_idx, batch['cls'].view(-1, 1), batch['bboxes']), 1)
@ -247,7 +247,7 @@ class v8SegmentationLoss(v8DetectionLoss):
"correctly formatted 'segment' dataset using 'data=coco128-seg.yaml' " "correctly formatted 'segment' dataset using 'data=coco128-seg.yaml' "
'as an example.\nSee https://docs.ultralytics.com/tasks/segment/ for help.') from e 'as an example.\nSee https://docs.ultralytics.com/tasks/segment/ for help.') from e
# pboxes # Pboxes
pred_bboxes = self.bbox_decode(anchor_points, pred_distri) # xyxy, (b, h*w, 4) pred_bboxes = self.bbox_decode(anchor_points, pred_distri) # xyxy, (b, h*w, 4)
_, target_bboxes, target_scores, fg_mask, target_gt_idx = self.assigner( _, target_bboxes, target_scores, fg_mask, target_gt_idx = self.assigner(
@ -256,15 +256,15 @@ class v8SegmentationLoss(v8DetectionLoss):
target_scores_sum = max(target_scores.sum(), 1) target_scores_sum = max(target_scores.sum(), 1)
# cls loss # Cls loss
# loss[1] = self.varifocal_loss(pred_scores, target_scores, target_labels) / target_scores_sum # VFL way # loss[1] = self.varifocal_loss(pred_scores, target_scores, target_labels) / target_scores_sum # VFL way
loss[2] = self.bce(pred_scores, target_scores.to(dtype)).sum() / target_scores_sum # BCE loss[2] = self.bce(pred_scores, target_scores.to(dtype)).sum() / target_scores_sum # BCE
if fg_mask.sum(): if fg_mask.sum():
# bbox loss # Bbox loss
loss[0], loss[3] = self.bbox_loss(pred_distri, pred_bboxes, anchor_points, target_bboxes / stride_tensor, loss[0], loss[3] = self.bbox_loss(pred_distri, pred_bboxes, anchor_points, target_bboxes / stride_tensor,
target_scores, target_scores_sum, fg_mask) target_scores, target_scores_sum, fg_mask)
# masks loss # Masks loss
masks = batch['masks'].to(self.device).float() masks = batch['masks'].to(self.device).float()
if tuple(masks.shape[-2:]) != (mask_h, mask_w): # downsample if tuple(masks.shape[-2:]) != (mask_h, mask_w): # downsample
masks = F.interpolate(masks[None], (mask_h, mask_w), mode='nearest')[0] masks = F.interpolate(masks[None], (mask_h, mask_w), mode='nearest')[0]
@ -344,13 +344,13 @@ class v8SegmentationLoss(v8DetectionLoss):
_, _, mask_h, mask_w = proto.shape _, _, mask_h, mask_w = proto.shape
loss = 0 loss = 0
# normalize to 0-1 # Normalize to 0-1
target_bboxes_normalized = target_bboxes / imgsz[[1, 0, 1, 0]] target_bboxes_normalized = target_bboxes / imgsz[[1, 0, 1, 0]]
# areas of target bboxes # Areas of target bboxes
marea = xyxy2xywh(target_bboxes_normalized)[..., 2:].prod(2) marea = xyxy2xywh(target_bboxes_normalized)[..., 2:].prod(2)
# normalize to mask size # Normalize to mask size
mxyxy = target_bboxes_normalized * torch.tensor([mask_w, mask_h, mask_w, mask_h], device=proto.device) mxyxy = target_bboxes_normalized * torch.tensor([mask_w, mask_h, mask_w, mask_h], device=proto.device)
for i, single_i in enumerate(zip(fg_mask, target_gt_idx, pred_masks, proto, mxyxy, marea, masks)): for i, single_i in enumerate(zip(fg_mask, target_gt_idx, pred_masks, proto, mxyxy, marea, masks)):
@ -393,7 +393,7 @@ class v8PoseLoss(v8DetectionLoss):
pred_distri, pred_scores = torch.cat([xi.view(feats[0].shape[0], self.no, -1) for xi in feats], 2).split( pred_distri, pred_scores = torch.cat([xi.view(feats[0].shape[0], self.no, -1) for xi in feats], 2).split(
(self.reg_max * 4, self.nc), 1) (self.reg_max * 4, self.nc), 1)
# b, grids, .. # B, grids, ..
pred_scores = pred_scores.permute(0, 2, 1).contiguous() pred_scores = pred_scores.permute(0, 2, 1).contiguous()
pred_distri = pred_distri.permute(0, 2, 1).contiguous() pred_distri = pred_distri.permute(0, 2, 1).contiguous()
pred_kpts = pred_kpts.permute(0, 2, 1).contiguous() pred_kpts = pred_kpts.permute(0, 2, 1).contiguous()
@ -402,7 +402,7 @@ class v8PoseLoss(v8DetectionLoss):
imgsz = torch.tensor(feats[0].shape[2:], device=self.device, dtype=dtype) * self.stride[0] # image size (h,w) imgsz = torch.tensor(feats[0].shape[2:], device=self.device, dtype=dtype) * self.stride[0] # image size (h,w)
anchor_points, stride_tensor = make_anchors(feats, self.stride, 0.5) anchor_points, stride_tensor = make_anchors(feats, self.stride, 0.5)
# targets # Targets
batch_size = pred_scores.shape[0] batch_size = pred_scores.shape[0]
batch_idx = batch['batch_idx'].view(-1, 1) batch_idx = batch['batch_idx'].view(-1, 1)
targets = torch.cat((batch_idx, batch['cls'].view(-1, 1), batch['bboxes']), 1) targets = torch.cat((batch_idx, batch['cls'].view(-1, 1), batch['bboxes']), 1)
@ -410,7 +410,7 @@ class v8PoseLoss(v8DetectionLoss):
gt_labels, gt_bboxes = targets.split((1, 4), 2) # cls, xyxy gt_labels, gt_bboxes = targets.split((1, 4), 2) # cls, xyxy
mask_gt = gt_bboxes.sum(2, keepdim=True).gt_(0) mask_gt = gt_bboxes.sum(2, keepdim=True).gt_(0)
# pboxes # Pboxes
pred_bboxes = self.bbox_decode(anchor_points, pred_distri) # xyxy, (b, h*w, 4) pred_bboxes = self.bbox_decode(anchor_points, pred_distri) # xyxy, (b, h*w, 4)
pred_kpts = self.kpts_decode(anchor_points, pred_kpts.view(batch_size, -1, *self.kpt_shape)) # (b, h*w, 17, 3) pred_kpts = self.kpts_decode(anchor_points, pred_kpts.view(batch_size, -1, *self.kpt_shape)) # (b, h*w, 17, 3)
@ -420,11 +420,11 @@ class v8PoseLoss(v8DetectionLoss):
target_scores_sum = max(target_scores.sum(), 1) target_scores_sum = max(target_scores.sum(), 1)
# cls loss # Cls loss
# loss[1] = self.varifocal_loss(pred_scores, target_scores, target_labels) / target_scores_sum # VFL way # loss[1] = self.varifocal_loss(pred_scores, target_scores, target_labels) / target_scores_sum # VFL way
loss[3] = self.bce(pred_scores, target_scores.to(dtype)).sum() / target_scores_sum # BCE loss[3] = self.bce(pred_scores, target_scores.to(dtype)).sum() / target_scores_sum # BCE
# bbox loss # Bbox loss
if fg_mask.sum(): if fg_mask.sum():
target_bboxes /= stride_tensor target_bboxes /= stride_tensor
loss[0], loss[4] = self.bbox_loss(pred_distri, pred_bboxes, anchor_points, target_bboxes, target_scores, loss[0], loss[4] = self.bbox_loss(pred_distri, pred_bboxes, anchor_points, target_bboxes, target_scores,

View File

@ -36,7 +36,7 @@ def bbox_ioa(box1, box2, iou=False, eps=1e-7):
inter_area = (np.minimum(b1_x2[:, None], b2_x2) - np.maximum(b1_x1[:, None], b2_x1)).clip(0) * \ inter_area = (np.minimum(b1_x2[:, None], b2_x2) - np.maximum(b1_x1[:, None], b2_x1)).clip(0) * \
(np.minimum(b1_y2[:, None], b2_y2) - np.maximum(b1_y1[:, None], b2_y1)).clip(0) (np.minimum(b1_y2[:, None], b2_y2) - np.maximum(b1_y1[:, None], b2_y1)).clip(0)
# box2 area # Box2 area
area = (b2_x2 - b2_x1) * (b2_y2 - b2_y1) area = (b2_x2 - b2_x1) * (b2_y2 - b2_y1)
if iou: if iou:
box1_area = (b1_x2 - b1_x1) * (b1_y2 - b1_y1) box1_area = (b1_x2 - b1_x1) * (b1_y2 - b1_y1)
@ -440,13 +440,18 @@ def ap_per_class(tp,
Returns: Returns:
(tuple): A tuple of six arrays and one array of unique classes, where: (tuple): A tuple of six arrays and one array of unique classes, where:
tp (np.ndarray): True positive counts for each class. tp (np.ndarray): True positive counts at threshold given by max F1 metric for each class.Shape: (nc,).
fp (np.ndarray): False positive counts for each class. fp (np.ndarray): False positive counts at threshold given by max F1 metric for each class. Shape: (nc,).
p (np.ndarray): Precision values at each confidence threshold. p (np.ndarray): Precision values at threshold given by max F1 metric for each class. Shape: (nc,).
r (np.ndarray): Recall values at each confidence threshold. r (np.ndarray): Recall values at threshold given by max F1 metric for each class. Shape: (nc,).
f1 (np.ndarray): F1-score values at each confidence threshold. f1 (np.ndarray): F1-score values at threshold given by max F1 metric for each class. Shape: (nc,).
ap (np.ndarray): Average precision for each class at different IoU thresholds. ap (np.ndarray): Average precision for each class at different IoU thresholds. Shape: (nc, 10).
unique_classes (np.ndarray): An array of unique classes that have data. unique_classes (np.ndarray): An array of unique classes that have data. Shape: (nc,).
p_curve (np.ndarray): Precision curves for each class. Shape: (nc, 1000).
r_curve (np.ndarray): Recall curves for each class. Shape: (nc, 1000).
f1_curve (np.ndarray): F1-score curves for each class. Shape: (nc, 1000).
x (np.ndarray): X-axis values for the curves. Shape: (1000,).
prec_values: Precision values at mAP@0.5 for each class. Shape: (nc, 1000).
""" """
# Sort by objectness # Sort by objectness
@ -458,8 +463,10 @@ def ap_per_class(tp,
nc = unique_classes.shape[0] # number of classes, number of detections nc = unique_classes.shape[0] # number of classes, number of detections
# Create Precision-Recall curve and compute AP for each class # Create Precision-Recall curve and compute AP for each class
px, py = np.linspace(0, 1, 1000), [] # for plotting x, prec_values = np.linspace(0, 1, 1000), []
ap, p, r = np.zeros((nc, tp.shape[1])), np.zeros((nc, 1000)), np.zeros((nc, 1000))
# Average precision, precision and recall curves
ap, p_curve, r_curve = np.zeros((nc, tp.shape[1])), np.zeros((nc, 1000)), np.zeros((nc, 1000))
for ci, c in enumerate(unique_classes): for ci, c in enumerate(unique_classes):
i = pred_cls == c i = pred_cls == c
n_l = nt[ci] # number of labels n_l = nt[ci] # number of labels
@ -473,33 +480,35 @@ def ap_per_class(tp,
# Recall # Recall
recall = tpc / (n_l + eps) # recall curve recall = tpc / (n_l + eps) # recall curve
r[ci] = np.interp(-px, -conf[i], recall[:, 0], left=0) # negative x, xp because xp decreases r_curve[ci] = np.interp(-x, -conf[i], recall[:, 0], left=0) # negative x, xp because xp decreases
# Precision # Precision
precision = tpc / (tpc + fpc) # precision curve precision = tpc / (tpc + fpc) # precision curve
p[ci] = np.interp(-px, -conf[i], precision[:, 0], left=1) # p at pr_score p_curve[ci] = np.interp(-x, -conf[i], precision[:, 0], left=1) # p at pr_score
# AP from recall-precision curve # AP from recall-precision curve
for j in range(tp.shape[1]): for j in range(tp.shape[1]):
ap[ci, j], mpre, mrec = compute_ap(recall[:, j], precision[:, j]) ap[ci, j], mpre, mrec = compute_ap(recall[:, j], precision[:, j])
if plot and j == 0: if plot and j == 0:
py.append(np.interp(px, mrec, mpre)) # precision at mAP@0.5 prec_values.append(np.interp(x, mrec, mpre)) # precision at mAP@0.5
prec_values = np.array(prec_values) # (nc, 1000)
# Compute F1 (harmonic mean of precision and recall) # Compute F1 (harmonic mean of precision and recall)
f1 = 2 * p * r / (p + r + eps) f1_curve = 2 * p_curve * r_curve / (p_curve + r_curve + eps)
names = [v for k, v in names.items() if k in unique_classes] # list: only classes that have data names = [v for k, v in names.items() if k in unique_classes] # list: only classes that have data
names = dict(enumerate(names)) # to dict names = dict(enumerate(names)) # to dict
if plot: if plot:
plot_pr_curve(px, py, ap, save_dir / f'{prefix}PR_curve.png', names, on_plot=on_plot) plot_pr_curve(x, prec_values, ap, save_dir / f'{prefix}PR_curve.png', names, on_plot=on_plot)
plot_mc_curve(px, f1, save_dir / f'{prefix}F1_curve.png', names, ylabel='F1', on_plot=on_plot) plot_mc_curve(x, f1_curve, save_dir / f'{prefix}F1_curve.png', names, ylabel='F1', on_plot=on_plot)
plot_mc_curve(px, p, save_dir / f'{prefix}P_curve.png', names, ylabel='Precision', on_plot=on_plot) plot_mc_curve(x, p_curve, save_dir / f'{prefix}P_curve.png', names, ylabel='Precision', on_plot=on_plot)
plot_mc_curve(px, r, save_dir / f'{prefix}R_curve.png', names, ylabel='Recall', on_plot=on_plot) plot_mc_curve(x, r_curve, save_dir / f'{prefix}R_curve.png', names, ylabel='Recall', on_plot=on_plot)
i = smooth(f1.mean(0), 0.1).argmax() # max F1 index i = smooth(f1_curve.mean(0), 0.1).argmax() # max F1 index
p, r, f1 = p[:, i], r[:, i], f1[:, i] p, r, f1 = p_curve[:, i], r_curve[:, i], f1_curve[:, i] # max-F1 precision, recall, F1 values
tp = (r * nt).round() # true positives tp = (r * nt).round() # true positives
fp = (tp / (p + eps) - tp).round() # false positives fp = (tp / (p + eps) - tp).round() # false positives
return tp, fp, p, r, f1, ap, unique_classes.astype(int) return tp, fp, p, r, f1, ap, unique_classes.astype(int), p_curve, r_curve, f1_curve, x, prec_values
class Metric(SimpleClass): class Metric(SimpleClass):
@ -645,7 +654,19 @@ class Metric(SimpleClass):
Updates the class attributes `self.p`, `self.r`, `self.f1`, `self.all_ap`, and `self.ap_class_index` based Updates the class attributes `self.p`, `self.r`, `self.f1`, `self.all_ap`, and `self.ap_class_index` based
on the values provided in the `results` tuple. on the values provided in the `results` tuple.
""" """
self.p, self.r, self.f1, self.all_ap, self.ap_class_index = results (self.p, self.r, self.f1, self.all_ap, self.ap_class_index, self.p_curve, self.r_curve, self.f1_curve, self.px,
self.prec_values) = results
@property
def curves(self):
"""Returns a list of curves for accessing specific metrics curves."""
return []
@property
def curves_results(self):
"""Returns a list of curves for accessing specific metrics curves."""
return [[self.px, self.prec_values, 'Recall', 'Precision'], [self.px, self.f1_curve, 'Confidence', 'F1'],
[self.px, self.p_curve, 'Confidence', 'Precision'], [self.px, self.r_curve, 'Confidence', 'Recall']]
class DetMetrics(SimpleClass): class DetMetrics(SimpleClass):
@ -676,6 +697,8 @@ class DetMetrics(SimpleClass):
fitness: Computes the fitness score based on the computed detection metrics. fitness: Computes the fitness score based on the computed detection metrics.
ap_class_index: Returns a list of class indices sorted by their average precision (AP) values. ap_class_index: Returns a list of class indices sorted by their average precision (AP) values.
results_dict: Returns a dictionary that maps detection metric keys to their computed values. results_dict: Returns a dictionary that maps detection metric keys to their computed values.
curves: TODO
curves_results: TODO
""" """
def __init__(self, save_dir=Path('.'), plot=False, on_plot=None, names=()) -> None: def __init__(self, save_dir=Path('.'), plot=False, on_plot=None, names=()) -> None:
@ -686,6 +709,7 @@ class DetMetrics(SimpleClass):
self.names = names self.names = names
self.box = Metric() self.box = Metric()
self.speed = {'preprocess': 0.0, 'inference': 0.0, 'loss': 0.0, 'postprocess': 0.0} self.speed = {'preprocess': 0.0, 'inference': 0.0, 'loss': 0.0, 'postprocess': 0.0}
self.task = 'detect'
def process(self, tp, conf, pred_cls, target_cls): def process(self, tp, conf, pred_cls, target_cls):
"""Process predicted results for object detection and update metrics.""" """Process predicted results for object detection and update metrics."""
@ -733,6 +757,16 @@ class DetMetrics(SimpleClass):
"""Returns dictionary of computed performance metrics and statistics.""" """Returns dictionary of computed performance metrics and statistics."""
return dict(zip(self.keys + ['fitness'], self.mean_results() + [self.fitness])) return dict(zip(self.keys + ['fitness'], self.mean_results() + [self.fitness]))
@property
def curves(self):
"""Returns a list of curves for accessing specific metrics curves."""
return ['Precision-Recall(B)', 'F1-Confidence(B)', 'Precision-Confidence(B)', 'Recall-Confidence(B)']
@property
def curves_results(self):
"""Returns dictionary of computed performance metrics and statistics."""
return self.box.curves_results
class SegmentMetrics(SimpleClass): class SegmentMetrics(SimpleClass):
""" """
@ -772,6 +806,7 @@ class SegmentMetrics(SimpleClass):
self.box = Metric() self.box = Metric()
self.seg = Metric() self.seg = Metric()
self.speed = {'preprocess': 0.0, 'inference': 0.0, 'loss': 0.0, 'postprocess': 0.0} self.speed = {'preprocess': 0.0, 'inference': 0.0, 'loss': 0.0, 'postprocess': 0.0}
self.task = 'segment'
def process(self, tp_b, tp_m, conf, pred_cls, target_cls): def process(self, tp_b, tp_m, conf, pred_cls, target_cls):
""" """
@ -843,6 +878,18 @@ class SegmentMetrics(SimpleClass):
"""Returns results of object detection model for evaluation.""" """Returns results of object detection model for evaluation."""
return dict(zip(self.keys + ['fitness'], self.mean_results() + [self.fitness])) return dict(zip(self.keys + ['fitness'], self.mean_results() + [self.fitness]))
@property
def curves(self):
"""Returns a list of curves for accessing specific metrics curves."""
return [
'Precision-Recall(B)', 'F1-Confidence(B)', 'Precision-Confidence(B)', 'Recall-Confidence(B)',
'Precision-Recall(M)', 'F1-Confidence(M)', 'Precision-Confidence(M)', 'Recall-Confidence(M)']
@property
def curves_results(self):
"""Returns dictionary of computed performance metrics and statistics."""
return self.box.curves_results + self.seg.curves_results
class PoseMetrics(SegmentMetrics): class PoseMetrics(SegmentMetrics):
""" """
@ -883,6 +930,7 @@ class PoseMetrics(SegmentMetrics):
self.box = Metric() self.box = Metric()
self.pose = Metric() self.pose = Metric()
self.speed = {'preprocess': 0.0, 'inference': 0.0, 'loss': 0.0, 'postprocess': 0.0} self.speed = {'preprocess': 0.0, 'inference': 0.0, 'loss': 0.0, 'postprocess': 0.0}
self.task = 'pose'
def process(self, tp_b, tp_p, conf, pred_cls, target_cls): def process(self, tp_b, tp_p, conf, pred_cls, target_cls):
""" """
@ -944,6 +992,18 @@ class PoseMetrics(SegmentMetrics):
"""Computes classification metrics and speed using the `targets` and `pred` inputs.""" """Computes classification metrics and speed using the `targets` and `pred` inputs."""
return self.pose.fitness() + self.box.fitness() return self.pose.fitness() + self.box.fitness()
@property
def curves(self):
"""Returns a list of curves for accessing specific metrics curves."""
return [
'Precision-Recall(B)', 'F1-Confidence(B)', 'Precision-Confidence(B)', 'Recall-Confidence(B)',
'Precision-Recall(P)', 'F1-Confidence(P)', 'Precision-Confidence(P)', 'Recall-Confidence(P)']
@property
def curves_results(self):
"""Returns dictionary of computed performance metrics and statistics."""
return self.box.curves_results + self.pose.curves_results
class ClassifyMetrics(SimpleClass): class ClassifyMetrics(SimpleClass):
""" """
@ -968,6 +1028,7 @@ class ClassifyMetrics(SimpleClass):
self.top1 = 0 self.top1 = 0
self.top5 = 0 self.top5 = 0
self.speed = {'preprocess': 0.0, 'inference': 0.0, 'loss': 0.0, 'postprocess': 0.0} self.speed = {'preprocess': 0.0, 'inference': 0.0, 'loss': 0.0, 'postprocess': 0.0}
self.task = 'classify'
def process(self, targets, pred): def process(self, targets, pred):
"""Target classes and predicted classes.""" """Target classes and predicted classes."""
@ -990,3 +1051,13 @@ class ClassifyMetrics(SimpleClass):
def keys(self): def keys(self):
"""Returns a list of keys for the results_dict property.""" """Returns a list of keys for the results_dict property."""
return ['metrics/accuracy_top1', 'metrics/accuracy_top5'] return ['metrics/accuracy_top1', 'metrics/accuracy_top5']
@property
def curves(self):
"""Returns a list of curves for accessing specific metrics curves."""
return []
@property
def curves_results(self):
"""Returns a list of curves for accessing specific metrics curves."""
return []

View File

@ -193,7 +193,7 @@ class TaskAlignedAssigner(nn.Module):
# Expand topk_idxs for each value of k and add 1 at the specified positions # Expand topk_idxs for each value of k and add 1 at the specified positions
count_tensor.scatter_add_(-1, topk_idxs[:, :, k:k + 1], ones) count_tensor.scatter_add_(-1, topk_idxs[:, :, k:k + 1], ones)
# count_tensor.scatter_add_(-1, topk_idxs, torch.ones_like(topk_idxs, dtype=torch.int8, device=topk_idxs.device)) # count_tensor.scatter_add_(-1, topk_idxs, torch.ones_like(topk_idxs, dtype=torch.int8, device=topk_idxs.device))
# filter invalid bboxes # Filter invalid bboxes
count_tensor.masked_fill_(count_tensor > 1, 0) count_tensor.masked_fill_(count_tensor > 1, 0)
return count_tensor.to(metrics.dtype) return count_tensor.to(metrics.dtype)

View File

@ -45,6 +45,7 @@ def run_ray_tune(model,
try: try:
subprocess.run('pip install ray[tune]'.split(), check=True) subprocess.run('pip install ray[tune]'.split(), check=True)
import ray
from ray import tune from ray import tune
from ray.air import RunConfig from ray.air import RunConfig
from ray.air.integrations.wandb import WandbLoggerCallback from ray.air.integrations.wandb import WandbLoggerCallback
@ -83,6 +84,10 @@ def run_ray_tune(model,
'mixup': tune.uniform(0.0, 1.0), # image mixup (probability) 'mixup': tune.uniform(0.0, 1.0), # image mixup (probability)
'copy_paste': tune.uniform(0.0, 1.0)} # segment copy-paste (probability) 'copy_paste': tune.uniform(0.0, 1.0)} # segment copy-paste (probability)
# Put the model in ray store
task = model.task
model_in_store = ray.put(model)
def _tune(config): def _tune(config):
""" """
Trains the YOLO model with the specified hyperparameters and additional arguments. Trains the YOLO model with the specified hyperparameters and additional arguments.
@ -93,9 +98,10 @@ def run_ray_tune(model,
Returns: Returns:
None. None.
""" """
model.reset_callbacks() model_to_train = ray.get(model_in_store) # get the model from ray store for tuning
model_to_train.reset_callbacks()
config.update(train_args) config.update(train_args)
results = model.train(**config) results = model_to_train.train(**config)
return results.results_dict return results.results_dict
# Get search space # Get search space
@ -104,7 +110,7 @@ def run_ray_tune(model,
LOGGER.warning('WARNING ⚠️ search space not provided, using default search space.') LOGGER.warning('WARNING ⚠️ search space not provided, using default search space.')
# Get dataset # Get dataset
data = train_args.get('data', TASK2DATA[model.task]) data = train_args.get('data', TASK2DATA[task])
space['data'] = data space['data'] = data
if 'data' not in train_args: if 'data' not in train_args:
LOGGER.warning(f'WARNING ⚠️ data not provided, using default "data={data}".') LOGGER.warning(f'WARNING ⚠️ data not provided, using default "data={data}".')
@ -114,7 +120,7 @@ def run_ray_tune(model,
# Define the ASHA scheduler for hyperparameter search # Define the ASHA scheduler for hyperparameter search
asha_scheduler = ASHAScheduler(time_attr='epoch', asha_scheduler = ASHAScheduler(time_attr='epoch',
metric=TASK2METRIC[model.task], metric=TASK2METRIC[task],
mode='max', mode='max',
max_t=train_args.get('epochs') or DEFAULT_CFG_DICT['epochs'] or 100, max_t=train_args.get('epochs') or DEFAULT_CFG_DICT['epochs'] or 100,
grace_period=grace_period, grace_period=grace_period,