mirror of
https://github.com/THU-MIG/yolov10.git
synced 2025-05-23 13:34:23 +08:00
ultralytics 8.0.197
save P, R, F1 curves to metrics (#5354)
Signed-off-by: Glenn Jocher <glenn.jocher@ultralytics.com> Co-authored-by: erminkev1 <83356055+erminkev1@users.noreply.github.com> Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Andy <39454881+yermandy@users.noreply.github.com>
This commit is contained in:
parent
7fd5dcbd86
commit
12e3eef844
@ -11,7 +11,7 @@ ci:
|
||||
|
||||
repos:
|
||||
- repo: https://github.com/pre-commit/pre-commit-hooks
|
||||
rev: v4.4.0
|
||||
rev: v4.5.0
|
||||
hooks:
|
||||
- id: end-of-file-fixer
|
||||
- id: trailing-whitespace
|
||||
@ -22,7 +22,7 @@ repos:
|
||||
- id: detect-private-key
|
||||
|
||||
- repo: https://github.com/asottile/pyupgrade
|
||||
rev: v3.14.0
|
||||
rev: v3.15.0
|
||||
hooks:
|
||||
- id: pyupgrade
|
||||
name: Upgrade code
|
||||
@ -77,3 +77,8 @@ repos:
|
||||
# rev: v1.5.0
|
||||
# hooks:
|
||||
# - id: dead
|
||||
|
||||
# - repo: https://github.com/ultralytics/pre-commit
|
||||
# rev: bd60a414f80a53fb8f593d3bfed4701fc47e4b23
|
||||
# hooks:
|
||||
# - id: capitalize-comments
|
||||
|
@ -6,7 +6,6 @@ reference section of *.md files composed of classes and functions, and also crea
|
||||
Note: Must be run from repository root directory. Do not run from docs directory.
|
||||
"""
|
||||
|
||||
import os
|
||||
import re
|
||||
from collections import defaultdict
|
||||
from pathlib import Path
|
||||
@ -18,17 +17,9 @@ CODE_DIR = ROOT
|
||||
REFERENCE_DIR = ROOT.parent / 'docs/reference'
|
||||
|
||||
|
||||
def extract_classes_and_functions(filepath: Path):
|
||||
"""
|
||||
Extracts class and function names from a given Python file.
|
||||
|
||||
Args:
|
||||
filepath (Path): The path to the Python file.
|
||||
|
||||
Returns:
|
||||
(tuple): A tuple containing lists of class and function names.
|
||||
"""
|
||||
content = Path(filepath).read_text()
|
||||
def extract_classes_and_functions(filepath: Path) -> tuple:
|
||||
"""Extracts class and function names from a given Python file."""
|
||||
content = filepath.read_text()
|
||||
class_pattern = r'(?:^|\n)class\s(\w+)(?:\(|:)'
|
||||
func_pattern = r'(?:^|\n)def\s(\w+)\('
|
||||
|
||||
@ -39,26 +30,17 @@ def extract_classes_and_functions(filepath: Path):
|
||||
|
||||
|
||||
def create_markdown(py_filepath: Path, module_path: str, classes: list, functions: list):
|
||||
"""
|
||||
Creates a Markdown file containing the API reference for the given Python module.
|
||||
|
||||
Args:
|
||||
py_filepath (Path): The path to the Python file.
|
||||
module_path (str): The import path for the Python module.
|
||||
classes (list): A list of class names within the module.
|
||||
functions (list): A list of function names within the module.
|
||||
"""
|
||||
"""Creates a Markdown file containing the API reference for the given Python module."""
|
||||
md_filepath = py_filepath.with_suffix('.md')
|
||||
|
||||
# Read existing content and keep header content between first two ---
|
||||
header_content = ''
|
||||
if md_filepath.exists():
|
||||
with open(md_filepath) as file:
|
||||
existing_content = file.read()
|
||||
header_parts = existing_content.split('---')
|
||||
for part in header_parts:
|
||||
if 'description:' in part or 'comments:' in part:
|
||||
header_content += f'---{part}---\n\n'
|
||||
existing_content = md_filepath.read_text()
|
||||
header_parts = existing_content.split('---')
|
||||
for part in header_parts:
|
||||
if 'description:' in part or 'comments:' in part:
|
||||
header_content += f'---{part}---\n\n'
|
||||
|
||||
module_name = module_path.replace('.__init__', '')
|
||||
module_path = module_path.replace('.', '/')
|
||||
@ -74,43 +56,24 @@ def create_markdown(py_filepath: Path, module_path: str, classes: list, function
|
||||
if not md_content.endswith('\n'):
|
||||
md_content += '\n'
|
||||
|
||||
os.makedirs(os.path.dirname(md_filepath), exist_ok=True)
|
||||
with open(md_filepath, 'w') as file:
|
||||
file.write(md_content)
|
||||
md_filepath.parent.mkdir(parents=True, exist_ok=True)
|
||||
md_filepath.write_text(md_content)
|
||||
|
||||
return md_filepath.relative_to(NEW_YAML_DIR)
|
||||
|
||||
|
||||
def nested_dict():
|
||||
"""
|
||||
Creates and returns a nested defaultdict.
|
||||
|
||||
Returns:
|
||||
(defaultdict): A nested defaultdict object.
|
||||
"""
|
||||
def nested_dict() -> defaultdict:
|
||||
"""Creates and returns a nested defaultdict."""
|
||||
return defaultdict(nested_dict)
|
||||
|
||||
|
||||
def sort_nested_dict(d: dict):
|
||||
"""
|
||||
Sorts a nested dictionary recursively.
|
||||
|
||||
Args:
|
||||
d (dict): The dictionary to sort.
|
||||
|
||||
Returns:
|
||||
(dict): The sorted dictionary.
|
||||
"""
|
||||
def sort_nested_dict(d: dict) -> dict:
|
||||
"""Sorts a nested dictionary recursively."""
|
||||
return {key: sort_nested_dict(value) if isinstance(value, dict) else value for key, value in sorted(d.items())}
|
||||
|
||||
|
||||
def create_nav_menu_yaml(nav_items: list):
|
||||
"""
|
||||
Creates a YAML file for the navigation menu based on the provided list of items.
|
||||
|
||||
Args:
|
||||
nav_items (list): A list of relative file paths to Markdown files for the navigation menu.
|
||||
"""
|
||||
"""Creates a YAML file for the navigation menu based on the provided list of items."""
|
||||
nav_tree = nested_dict()
|
||||
|
||||
for item_str in nav_items:
|
||||
@ -136,26 +99,26 @@ def create_nav_menu_yaml(nav_items: list):
|
||||
yaml_str += f"{indent}- {k}: {str(v).replace('docs/', '')}\n"
|
||||
return yaml_str
|
||||
|
||||
with open(NEW_YAML_DIR / 'nav_menu_updated.yml', 'w') as file:
|
||||
yaml_str = _dict_to_yaml(nav_tree_sorted)
|
||||
file.write(yaml_str)
|
||||
# Print updated YAML reference section
|
||||
print('Scan complete, new mkdocs.yaml reference section is:\n\n', _dict_to_yaml(nav_tree_sorted))
|
||||
|
||||
# Save new YAML reference section
|
||||
# (NEW_YAML_DIR / 'nav_menu_updated.yml').write_text(_dict_to_yaml(nav_tree_sorted))
|
||||
|
||||
|
||||
def main():
|
||||
"""Main function to extract class and function names, create Markdown files, and generate a YAML navigation menu."""
|
||||
nav_items = []
|
||||
for root, _, files in os.walk(CODE_DIR):
|
||||
for file in files:
|
||||
if file.endswith('.py'):
|
||||
py_filepath = Path(root) / file
|
||||
classes, functions = extract_classes_and_functions(py_filepath)
|
||||
|
||||
if classes or functions:
|
||||
py_filepath_rel = py_filepath.relative_to(CODE_DIR)
|
||||
md_filepath = REFERENCE_DIR / py_filepath_rel
|
||||
module_path = f"ultralytics.{py_filepath_rel.with_suffix('').as_posix().replace('/', '.')}"
|
||||
md_rel_filepath = create_markdown(md_filepath, module_path, classes, functions)
|
||||
nav_items.append(str(md_rel_filepath))
|
||||
for py_filepath in CODE_DIR.rglob('*.py'):
|
||||
classes, functions = extract_classes_and_functions(py_filepath)
|
||||
|
||||
if classes or functions:
|
||||
py_filepath_rel = py_filepath.relative_to(CODE_DIR)
|
||||
md_filepath = REFERENCE_DIR / py_filepath_rel
|
||||
module_path = f"ultralytics.{py_filepath_rel.with_suffix('').as_posix().replace('/', '.')}"
|
||||
md_rel_filepath = create_markdown(md_filepath, module_path, classes, functions)
|
||||
nav_items.append(str(md_rel_filepath))
|
||||
|
||||
create_nav_menu_yaml(nav_items)
|
||||
|
||||
|
@ -9,6 +9,14 @@ keywords: Ultralytics, callbacks, _log_plots, on_fit_epoch_end, on_train_end
|
||||
|
||||
Full source code for this file is available at [https://github.com/ultralytics/ultralytics/blob/main/ultralytics/utils/callbacks/wb.py](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/utils/callbacks/wb.py). Help us fix any issues you see by submitting a [Pull Request](https://docs.ultralytics.com/help/contributing/) 🛠️. Thank you 🙏!
|
||||
|
||||
---
|
||||
## ::: ultralytics.utils.callbacks.wb._custom_table
|
||||
<br><br>
|
||||
|
||||
---
|
||||
## ::: ultralytics.utils.callbacks.wb._plot_curve
|
||||
<br><br>
|
||||
|
||||
---
|
||||
## ::: ultralytics.utils.callbacks.wb._log_plots
|
||||
<br><br>
|
||||
|
@ -11,7 +11,7 @@ from ultralytics import YOLO
|
||||
from ultralytics.utils.files import increment_path
|
||||
from ultralytics.utils.plotting import Annotator, colors
|
||||
|
||||
track_history = defaultdict(lambda: [])
|
||||
track_history = defaultdict(list)
|
||||
|
||||
current_region = None
|
||||
counting_regions = [
|
||||
|
@ -97,8 +97,8 @@ def test_fastsam(task='segment', model=WEIGHTS_DIR / 'FastSAM-s.pt', data='coco8
|
||||
ann = prompt_process.text_prompt(text='a photo of a dog')
|
||||
|
||||
# Point prompt
|
||||
# points default [[0,0]] [[x1,y1],[x2,y2]]
|
||||
# point_label default [0] [1,0] 0:background, 1:foreground
|
||||
# Points default [[0,0]] [[x1,y1],[x2,y2]]
|
||||
# Point_label default [0] [1,0] 0:background, 1:foreground
|
||||
ann = prompt_process.point_prompt(points=[[200, 200]], pointlabel=[1])
|
||||
prompt_process.plot(annotations=ann, output='./')
|
||||
|
||||
|
@ -70,7 +70,7 @@ def test_segment():
|
||||
CFG.imgsz = 32
|
||||
# YOLO(CFG_SEG).train(**overrides) # works
|
||||
|
||||
# trainer
|
||||
# Trainer
|
||||
trainer = segment.SegmentationTrainer(overrides=overrides)
|
||||
trainer.add_callback('on_train_start', test_func)
|
||||
assert test_func in trainer.callbacks['on_train_start'], 'callback test failed'
|
||||
|
@ -1,6 +1,6 @@
|
||||
# Ultralytics YOLO 🚀, AGPL-3.0 license
|
||||
|
||||
__version__ = '8.0.196'
|
||||
__version__ = '8.0.197'
|
||||
|
||||
from ultralytics.models import RTDETR, SAM, YOLO
|
||||
from ultralytics.models.fastsam import FastSAM
|
||||
|
@ -491,7 +491,7 @@ class RandomPerspective:
|
||||
border = labels.pop('mosaic_border', self.border)
|
||||
self.size = img.shape[1] + border[1] * 2, img.shape[0] + border[0] * 2 # w, h
|
||||
# M is affine matrix
|
||||
# scale for func:`box_candidates`
|
||||
# Scale for func:`box_candidates`
|
||||
img, M, scale = self.affine_transform(img, border)
|
||||
|
||||
bboxes = self.apply_bboxes(instances.bboxes, M)
|
||||
@ -894,7 +894,7 @@ class Format:
|
||||
return labels
|
||||
|
||||
def _format_img(self, img):
|
||||
"""Format the image for YOLOv5 from Numpy array to PyTorch tensor."""
|
||||
"""Format the image for YOLO from Numpy array to PyTorch tensor."""
|
||||
if len(img.shape) < 3:
|
||||
img = np.expand_dims(img, -1)
|
||||
img = np.ascontiguousarray(img.transpose(2, 0, 1)[::-1])
|
||||
|
@ -1,14 +1,14 @@
|
||||
# Ultralytics YOLO 🚀, AGPL-3.0 license
|
||||
|
||||
import json
|
||||
import shutil
|
||||
from collections import defaultdict
|
||||
from pathlib import Path
|
||||
|
||||
import cv2
|
||||
import numpy as np
|
||||
|
||||
from ultralytics.utils import TQDM
|
||||
from ultralytics.utils import LOGGER, TQDM
|
||||
from ultralytics.utils.files import increment_path
|
||||
|
||||
|
||||
def coco91_to_coco80_class():
|
||||
@ -48,12 +48,12 @@ def coco80_to_coco91_class(): #
|
||||
|
||||
|
||||
def convert_coco(labels_dir='../coco/annotations/',
|
||||
save_dir='.',
|
||||
save_dir='coco_converted/',
|
||||
use_segments=False,
|
||||
use_keypoints=False,
|
||||
cls91to80=True):
|
||||
"""
|
||||
Converts COCO dataset annotations to a format suitable for training YOLOv5 models.
|
||||
Converts COCO dataset annotations to a YOLO annotation format suitable for training YOLO models.
|
||||
|
||||
Args:
|
||||
labels_dir (str, optional): Path to directory containing COCO dataset annotation files.
|
||||
@ -74,9 +74,7 @@ def convert_coco(labels_dir='../coco/annotations/',
|
||||
"""
|
||||
|
||||
# Create dataset directory
|
||||
save_dir = Path(save_dir)
|
||||
if save_dir.exists():
|
||||
shutil.rmtree(save_dir) # delete dir
|
||||
save_dir = increment_path(save_dir) # increment if save directory already exists
|
||||
for p in save_dir / 'labels', save_dir / 'images':
|
||||
p.mkdir(parents=True, exist_ok=True) # make dir
|
||||
|
||||
@ -147,6 +145,8 @@ def convert_coco(labels_dir='../coco/annotations/',
|
||||
if use_segments and len(segments[i]) > 0 else bboxes[i]), # cls, box or segments
|
||||
file.write(('%g ' * len(line)).rstrip() % line + '\n')
|
||||
|
||||
LOGGER.info(f'COCO data converted successfully.\nResults saved to {save_dir.resolve()}')
|
||||
|
||||
|
||||
def convert_dota_to_yolo_obb(dota_root_path: str):
|
||||
"""
|
||||
@ -271,26 +271,25 @@ def merge_multi_segment(segments):
|
||||
segments = [np.array(i).reshape(-1, 2) for i in segments]
|
||||
idx_list = [[] for _ in range(len(segments))]
|
||||
|
||||
# record the indexes with min distance between each segment
|
||||
# Record the indexes with min distance between each segment
|
||||
for i in range(1, len(segments)):
|
||||
idx1, idx2 = min_index(segments[i - 1], segments[i])
|
||||
idx_list[i - 1].append(idx1)
|
||||
idx_list[i].append(idx2)
|
||||
|
||||
# use two round to connect all the segments
|
||||
# Use two round to connect all the segments
|
||||
for k in range(2):
|
||||
# forward connection
|
||||
# Forward connection
|
||||
if k == 0:
|
||||
for i, idx in enumerate(idx_list):
|
||||
# middle segments have two indexes
|
||||
# reverse the index of middle segments
|
||||
# Middle segments have two indexes, reverse the index of middle segments
|
||||
if len(idx) == 2 and idx[0] > idx[1]:
|
||||
idx = idx[::-1]
|
||||
segments[i] = segments[i][::-1, :]
|
||||
|
||||
segments[i] = np.roll(segments[i], -idx[0], axis=0)
|
||||
segments[i] = np.concatenate([segments[i], segments[i][:1]])
|
||||
# deal with the first segment and the last one
|
||||
# Deal with the first segment and the last one
|
||||
if i in [0, len(idx_list) - 1]:
|
||||
s.append(segments[i])
|
||||
else:
|
||||
|
@ -162,7 +162,7 @@ class YOLODataset(BaseDataset):
|
||||
def update_labels_info(self, label):
|
||||
"""Custom your label format here."""
|
||||
# NOTE: cls is not with bboxes now, classification and semantic segmentation need an independent cls label
|
||||
# we can make it also support classification and semantic segmentation by add or remove some dict keys there.
|
||||
# We can make it also support classification and semantic segmentation by add or remove some dict keys there.
|
||||
bboxes = label.pop('bboxes')
|
||||
segments = label.pop('segments')
|
||||
keypoints = label.pop('keypoints', None)
|
||||
|
@ -526,7 +526,7 @@ class BaseTrainer:
|
||||
|
||||
# TODO: may need to put these following functions into callback
|
||||
def plot_training_samples(self, batch, ni):
|
||||
"""Plots training samples during YOLOv5 training."""
|
||||
"""Plots training samples during YOLO training."""
|
||||
pass
|
||||
|
||||
def plot_training_labels(self):
|
||||
|
@ -23,7 +23,7 @@ class HUBTrainingSession:
|
||||
|
||||
Attributes:
|
||||
agent_id (str): Identifier for the instance communicating with the server.
|
||||
model_id (str): Identifier for the YOLOv5 model being trained.
|
||||
model_id (str): Identifier for the YOLO model being trained.
|
||||
model_url (str): URL for the model in Ultralytics HUB.
|
||||
api_url (str): API URL for the model in Ultralytics HUB.
|
||||
auth_header (dict): Authentication header for the Ultralytics HUB API requests.
|
||||
|
@ -120,7 +120,7 @@ class FastSAMPrompt:
|
||||
result_name = os.path.basename(ann.path)
|
||||
image = ann.orig_img[..., ::-1] # BGR to RGB
|
||||
original_h, original_w = ann.orig_shape
|
||||
# for macOS only
|
||||
# For macOS only
|
||||
# plt.switch_backend('TkAgg')
|
||||
plt.figure(figsize=(original_w / 100, original_h / 100))
|
||||
# Add subplot with no margin.
|
||||
|
@ -42,23 +42,23 @@ def bbox_iou(box1, boxes, iou_thres=0.9, image_shape=(640, 640), raw_output=Fals
|
||||
high_iou_indices (torch.Tensor): Indices of boxes with IoU > thres
|
||||
"""
|
||||
boxes = adjust_bboxes_to_image_border(boxes, image_shape)
|
||||
# obtain coordinates for intersections
|
||||
# Obtain coordinates for intersections
|
||||
x1 = torch.max(box1[0], boxes[:, 0])
|
||||
y1 = torch.max(box1[1], boxes[:, 1])
|
||||
x2 = torch.min(box1[2], boxes[:, 2])
|
||||
y2 = torch.min(box1[3], boxes[:, 3])
|
||||
|
||||
# compute the area of intersection
|
||||
# Compute the area of intersection
|
||||
intersection = (x2 - x1).clamp(0) * (y2 - y1).clamp(0)
|
||||
|
||||
# compute the area of both individual boxes
|
||||
# Compute the area of both individual boxes
|
||||
box1_area = (box1[2] - box1[0]) * (box1[3] - box1[1])
|
||||
box2_area = (boxes[:, 2] - boxes[:, 0]) * (boxes[:, 3] - boxes[:, 1])
|
||||
|
||||
# compute the area of union
|
||||
# Compute the area of union
|
||||
union = box1_area + box2_area - intersection
|
||||
|
||||
# compute the IoU
|
||||
# Compute the IoU
|
||||
iou = intersection / union # Should be shape (n, )
|
||||
if raw_output:
|
||||
return 0 if iou.numel() == 0 else iou
|
||||
|
@ -99,10 +99,10 @@ class RTDETRValidator(DetectionValidator):
|
||||
for i, bbox in enumerate(bboxes): # (300, 4)
|
||||
bbox = ops.xywh2xyxy(bbox)
|
||||
score, cls = scores[i].max(-1) # (300, )
|
||||
# Do not need threshold for evaluation as only got 300 boxes here.
|
||||
# Do not need threshold for evaluation as only got 300 boxes here
|
||||
# idx = score > self.args.conf
|
||||
pred = torch.cat([bbox, score[..., None], cls[..., None]], dim=-1) # filter
|
||||
# sort by confidence to correctly get internal metrics.
|
||||
# Sort by confidence to correctly get internal metrics
|
||||
pred = pred[score.argsort(descending=True)]
|
||||
outputs[i] = pred # [idx]
|
||||
|
||||
|
@ -304,11 +304,11 @@ class PositionEmbeddingRandom(nn.Module):
|
||||
|
||||
def _pe_encoding(self, coords: torch.Tensor) -> torch.Tensor:
|
||||
"""Positionally encode points that are normalized to [0,1]."""
|
||||
# assuming coords are in [0, 1]^2 square and have d_1 x ... x d_n x 2 shape
|
||||
# Assuming coords are in [0, 1]^2 square and have d_1 x ... x d_n x 2 shape
|
||||
coords = 2 * coords - 1
|
||||
coords = coords @ self.positional_encoding_gaussian_matrix
|
||||
coords = 2 * np.pi * coords
|
||||
# outputs d_1 x ... x d_n x C shape
|
||||
# Outputs d_1 x ... x d_n x C shape
|
||||
return torch.cat([torch.sin(coords), torch.cos(coords)], dim=-1)
|
||||
|
||||
def forward(self, size: Tuple[int, int]) -> torch.Tensor:
|
||||
@ -429,7 +429,7 @@ class Attention(nn.Module):
|
||||
self.use_rel_pos = use_rel_pos
|
||||
if self.use_rel_pos:
|
||||
assert (input_size is not None), 'Input size must be provided if using relative positional encoding.'
|
||||
# initialize relative positional embeddings
|
||||
# Initialize relative positional embeddings
|
||||
self.rel_pos_h = nn.Parameter(torch.zeros(2 * input_size[0] - 1, head_dim))
|
||||
self.rel_pos_w = nn.Parameter(torch.zeros(2 * input_size[1] - 1, head_dim))
|
||||
|
||||
|
@ -172,7 +172,7 @@ class ConvLayer(nn.Module):
|
||||
self.depth = depth
|
||||
self.use_checkpoint = use_checkpoint
|
||||
|
||||
# build blocks
|
||||
# Build blocks
|
||||
self.blocks = nn.ModuleList([
|
||||
MBConv(
|
||||
dim,
|
||||
@ -182,7 +182,7 @@ class ConvLayer(nn.Module):
|
||||
drop_path[i] if isinstance(drop_path, list) else drop_path,
|
||||
) for i in range(depth)])
|
||||
|
||||
# patch merging layer
|
||||
# Patch merging layer
|
||||
self.downsample = None if downsample is None else downsample(
|
||||
input_resolution, dim=dim, out_dim=out_dim, activation=activation)
|
||||
|
||||
@ -393,11 +393,11 @@ class TinyViTBlock(nn.Module):
|
||||
pH, pW = H + pad_b, W + pad_r
|
||||
nH = pH // self.window_size
|
||||
nW = pW // self.window_size
|
||||
# window partition
|
||||
# Window partition
|
||||
x = x.view(B, nH, self.window_size, nW, self.window_size,
|
||||
C).transpose(2, 3).reshape(B * nH * nW, self.window_size * self.window_size, C)
|
||||
x = self.attn(x)
|
||||
# window reverse
|
||||
# Window reverse
|
||||
x = x.view(B, nH, nW, self.window_size, self.window_size, C).transpose(2, 3).reshape(B, pH, pW, C)
|
||||
|
||||
if padding:
|
||||
@ -467,7 +467,7 @@ class BasicLayer(nn.Module):
|
||||
self.depth = depth
|
||||
self.use_checkpoint = use_checkpoint
|
||||
|
||||
# build blocks
|
||||
# Build blocks
|
||||
self.blocks = nn.ModuleList([
|
||||
TinyViTBlock(
|
||||
dim=dim,
|
||||
@ -481,7 +481,7 @@ class BasicLayer(nn.Module):
|
||||
activation=activation,
|
||||
) for i in range(depth)])
|
||||
|
||||
# patch merging layer
|
||||
# Patch merging layer
|
||||
self.downsample = None if downsample is None else downsample(
|
||||
input_resolution, dim=dim, out_dim=out_dim, activation=activation)
|
||||
|
||||
@ -593,10 +593,10 @@ class TinyViT(nn.Module):
|
||||
patches_resolution = self.patch_embed.patches_resolution
|
||||
self.patches_resolution = patches_resolution
|
||||
|
||||
# stochastic depth
|
||||
# Stochastic depth
|
||||
dpr = [x.item() for x in torch.linspace(0, drop_path_rate, sum(depths))] # stochastic depth decay rule
|
||||
|
||||
# build layers
|
||||
# Build layers
|
||||
self.layers = nn.ModuleList()
|
||||
for i_layer in range(self.num_layers):
|
||||
kwargs = dict(
|
||||
@ -628,7 +628,7 @@ class TinyViT(nn.Module):
|
||||
self.norm_head = nn.LayerNorm(embed_dims[-1])
|
||||
self.head = nn.Linear(embed_dims[-1], num_classes) if num_classes > 0 else torch.nn.Identity()
|
||||
|
||||
# init weights
|
||||
# Init weights
|
||||
self.apply(self._init_weights)
|
||||
self.set_layer_lr_decay(layer_lr_decay)
|
||||
self.neck = nn.Sequential(
|
||||
@ -653,7 +653,7 @@ class TinyViT(nn.Module):
|
||||
"""Sets the learning rate decay for each layer in the TinyViT model."""
|
||||
decay_rate = layer_lr_decay
|
||||
|
||||
# layers -> blocks (depth)
|
||||
# Layers -> blocks (depth)
|
||||
depth = sum(self.depths)
|
||||
lr_scales = [decay_rate ** (depth - i - 1) for i in range(depth)]
|
||||
|
||||
|
@ -414,8 +414,7 @@ class Predictor(BasePredictor):
|
||||
unchanged = unchanged and not changed
|
||||
|
||||
new_masks.append(torch.as_tensor(mask).unsqueeze(0))
|
||||
# Give score=0 to changed masks and score=1 to unchanged masks
|
||||
# so NMS will prefer ones that didn't need postprocessing
|
||||
# Give score=0 to changed masks and 1 to unchanged masks so NMS prefers masks not needing postprocessing
|
||||
scores.append(float(unchanged))
|
||||
|
||||
# Recalculate boxes and remove any new duplicates
|
||||
|
@ -66,7 +66,7 @@ class DETRLoss(nn.Module):
|
||||
|
||||
def _get_loss_class(self, pred_scores, targets, gt_scores, num_gts, postfix=''):
|
||||
"""Computes the classification loss based on predictions, target values, and ground truth scores."""
|
||||
# logits: [b, query, num_classes], gt_class: list[[n, 1]]
|
||||
# Logits: [b, query, num_classes], gt_class: list[[n, 1]]
|
||||
name_class = f'loss_class{postfix}'
|
||||
bs, nq = pred_scores.shape[:2]
|
||||
# one_hot = F.one_hot(targets, self.nc + 1)[..., :-1] # (bs, num_queries, num_classes)
|
||||
@ -90,7 +90,7 @@ class DETRLoss(nn.Module):
|
||||
"""Calculates and returns the bounding box loss and GIoU loss for the predicted and ground truth bounding
|
||||
boxes.
|
||||
"""
|
||||
# boxes: [b, query, 4], gt_bbox: list[[n, 4]]
|
||||
# Boxes: [b, query, 4], gt_bbox: list[[n, 4]]
|
||||
name_bbox = f'loss_bbox{postfix}'
|
||||
name_giou = f'loss_giou{postfix}'
|
||||
|
||||
|
@ -188,7 +188,7 @@ def get_cdn_group(batch,
|
||||
|
||||
num_group = num_dn // max_nums
|
||||
num_group = 1 if num_group == 0 else num_group
|
||||
# pad gt to max_num of a batch
|
||||
# Pad gt to max_num of a batch
|
||||
bs = len(gt_groups)
|
||||
gt_cls = batch['cls'] # (bs*num, )
|
||||
gt_bbox = batch['bboxes'] # bs*num, 4
|
||||
@ -204,10 +204,10 @@ def get_cdn_group(batch,
|
||||
neg_idx = torch.arange(total_num * num_group, dtype=torch.long, device=gt_bbox.device) + num_group * total_num
|
||||
|
||||
if cls_noise_ratio > 0:
|
||||
# half of bbox prob
|
||||
# Half of bbox prob
|
||||
mask = torch.rand(dn_cls.shape) < (cls_noise_ratio * 0.5)
|
||||
idx = torch.nonzero(mask).squeeze(-1)
|
||||
# randomly put a new one here
|
||||
# Randomly put a new one here
|
||||
new_label = torch.randint_like(idx, 0, num_classes, dtype=dn_cls.dtype, device=dn_cls.device)
|
||||
dn_cls[idx] = new_label
|
||||
|
||||
@ -240,9 +240,9 @@ def get_cdn_group(batch,
|
||||
|
||||
tgt_size = num_dn + num_queries
|
||||
attn_mask = torch.zeros([tgt_size, tgt_size], dtype=torch.bool)
|
||||
# match query cannot see the reconstruct
|
||||
# Match query cannot see the reconstruct
|
||||
attn_mask[num_dn:, :num_dn] = True
|
||||
# reconstruct cannot see each other
|
||||
# Reconstruct cannot see each other
|
||||
for i in range(num_group):
|
||||
if i == 0:
|
||||
attn_mask[max_nums * 2 * i:max_nums * 2 * (i + 1), max_nums * 2 * (i + 1):num_dn] = True
|
||||
|
@ -307,7 +307,7 @@ class Bottleneck(nn.Module):
|
||||
self.add = shortcut and c1 == c2
|
||||
|
||||
def forward(self, x):
|
||||
"""'forward()' applies the YOLOv5 FPN to input data."""
|
||||
"""'forward()' applies the YOLO FPN to input data."""
|
||||
return x + self.cv2(self.cv1(x)) if self.add else self.cv2(self.cv1(x))
|
||||
|
||||
|
||||
|
@ -192,7 +192,7 @@ class RTDETRDecoder(nn.Module):
|
||||
dropout=0.,
|
||||
act=nn.ReLU(),
|
||||
eval_idx=-1,
|
||||
# training args
|
||||
# Training args
|
||||
nd=100, # num denoising
|
||||
label_noise_ratio=0.5,
|
||||
box_noise_scale=1.0,
|
||||
@ -225,7 +225,7 @@ class RTDETRDecoder(nn.Module):
|
||||
self.num_queries = nq
|
||||
self.num_decoder_layers = ndl
|
||||
|
||||
# backbone feature projection
|
||||
# Backbone feature projection
|
||||
self.input_proj = nn.ModuleList(nn.Sequential(nn.Conv2d(x, hd, 1, bias=False), nn.BatchNorm2d(hd)) for x in ch)
|
||||
# NOTE: simplified version but it's not consistent with .pt weights.
|
||||
# self.input_proj = nn.ModuleList(Conv(x, hd, act=False) for x in ch)
|
||||
@ -234,24 +234,24 @@ class RTDETRDecoder(nn.Module):
|
||||
decoder_layer = DeformableTransformerDecoderLayer(hd, nh, d_ffn, dropout, act, self.nl, ndp)
|
||||
self.decoder = DeformableTransformerDecoder(hd, decoder_layer, ndl, eval_idx)
|
||||
|
||||
# denoising part
|
||||
# Denoising part
|
||||
self.denoising_class_embed = nn.Embedding(nc, hd)
|
||||
self.num_denoising = nd
|
||||
self.label_noise_ratio = label_noise_ratio
|
||||
self.box_noise_scale = box_noise_scale
|
||||
|
||||
# decoder embedding
|
||||
# Decoder embedding
|
||||
self.learnt_init_query = learnt_init_query
|
||||
if learnt_init_query:
|
||||
self.tgt_embed = nn.Embedding(nq, hd)
|
||||
self.query_pos_head = MLP(4, 2 * hd, hd, num_layers=2)
|
||||
|
||||
# encoder head
|
||||
# Encoder head
|
||||
self.enc_output = nn.Sequential(nn.Linear(hd, hd), nn.LayerNorm(hd))
|
||||
self.enc_score_head = nn.Linear(hd, nc)
|
||||
self.enc_bbox_head = MLP(hd, hd, 4, num_layers=3)
|
||||
|
||||
# decoder head
|
||||
# Decoder head
|
||||
self.dec_score_head = nn.ModuleList([nn.Linear(hd, nc) for _ in range(ndl)])
|
||||
self.dec_bbox_head = nn.ModuleList([MLP(hd, hd, 4, num_layers=3) for _ in range(ndl)])
|
||||
|
||||
@ -261,10 +261,10 @@ class RTDETRDecoder(nn.Module):
|
||||
"""Runs the forward pass of the module, returning bounding box and classification scores for the input."""
|
||||
from ultralytics.models.utils.ops import get_cdn_group
|
||||
|
||||
# input projection and embedding
|
||||
# Input projection and embedding
|
||||
feats, shapes = self._get_encoder_input(x)
|
||||
|
||||
# prepare denoising training
|
||||
# Prepare denoising training
|
||||
dn_embed, dn_bbox, attn_mask, dn_meta = \
|
||||
get_cdn_group(batch,
|
||||
self.nc,
|
||||
@ -278,7 +278,7 @@ class RTDETRDecoder(nn.Module):
|
||||
embed, refer_bbox, enc_bboxes, enc_scores = \
|
||||
self._get_decoder_input(feats, shapes, dn_embed, dn_bbox)
|
||||
|
||||
# decoder
|
||||
# Decoder
|
||||
dec_bboxes, dec_scores = self.decoder(embed,
|
||||
refer_bbox,
|
||||
feats,
|
||||
@ -316,9 +316,9 @@ class RTDETRDecoder(nn.Module):
|
||||
|
||||
def _get_encoder_input(self, x):
|
||||
"""Processes and returns encoder inputs by getting projection features from input and concatenating them."""
|
||||
# get projection features
|
||||
# Get projection features
|
||||
x = [self.input_proj[i](feat) for i, feat in enumerate(x)]
|
||||
# get encoder inputs
|
||||
# Get encoder inputs
|
||||
feats = []
|
||||
shapes = []
|
||||
for feat in x:
|
||||
@ -335,13 +335,13 @@ class RTDETRDecoder(nn.Module):
|
||||
def _get_decoder_input(self, feats, shapes, dn_embed=None, dn_bbox=None):
|
||||
"""Generates and prepares the input required for the decoder from the provided features and shapes."""
|
||||
bs = len(feats)
|
||||
# prepare input for decoder
|
||||
# Prepare input for decoder
|
||||
anchors, valid_mask = self._generate_anchors(shapes, dtype=feats.dtype, device=feats.device)
|
||||
features = self.enc_output(valid_mask * feats) # bs, h*w, 256
|
||||
|
||||
enc_outputs_scores = self.enc_score_head(features) # (bs, h*w, nc)
|
||||
|
||||
# query selection
|
||||
# Query selection
|
||||
# (bs, num_queries)
|
||||
topk_ind = torch.topk(enc_outputs_scores.max(-1).values, self.num_queries, dim=1).indices.view(-1)
|
||||
# (bs, num_queries)
|
||||
@ -352,7 +352,7 @@ class RTDETRDecoder(nn.Module):
|
||||
# (bs, num_queries, 4)
|
||||
top_k_anchors = anchors[:, topk_ind].view(bs, self.num_queries, -1)
|
||||
|
||||
# dynamic anchors + static content
|
||||
# Dynamic anchors + static content
|
||||
refer_bbox = self.enc_bbox_head(top_k_features) + top_k_anchors
|
||||
|
||||
enc_bboxes = refer_bbox.sigmoid()
|
||||
@ -373,7 +373,7 @@ class RTDETRDecoder(nn.Module):
|
||||
# TODO
|
||||
def _reset_parameters(self):
|
||||
"""Initializes or resets the parameters of the model's various components with predefined weights and biases."""
|
||||
# class and bbox head init
|
||||
# Class and bbox head init
|
||||
bias_cls = bias_init_with_prob(0.01) / 80 * self.nc
|
||||
# NOTE: the weight initialization in `linear_init_` would cause NaN when training with custom datasets.
|
||||
# linear_init_(self.enc_score_head)
|
||||
|
@ -81,7 +81,7 @@ class AIFI(TransformerEncoderLayer):
|
||||
"""Forward pass for the AIFI transformer layer."""
|
||||
c, h, w = x.shape[1:]
|
||||
pos_embed = self.build_2d_sincos_position_embedding(w, h, c)
|
||||
# flatten [B, C, H, W] to [B, HxW, C]
|
||||
# Flatten [B, C, H, W] to [B, HxW, C]
|
||||
x = super().forward(x.flatten(2).permute(0, 2, 1), pos=pos_embed.to(device=x.device, dtype=x.dtype))
|
||||
return x.permute(0, 2, 1).view([-1, c, h, w]).contiguous()
|
||||
|
||||
@ -213,7 +213,7 @@ class MSDeformAttn(nn.Module):
|
||||
if d_model % n_heads != 0:
|
||||
raise ValueError(f'd_model must be divisible by n_heads, but got {d_model} and {n_heads}')
|
||||
_d_per_head = d_model // n_heads
|
||||
# you'd better set _d_per_head to a power of 2 which is more efficient in our CUDA implementation
|
||||
# Better to set _d_per_head to a power of 2 which is more efficient in a CUDA implementation
|
||||
assert _d_per_head * n_heads == d_model, '`d_model` must be divisible by `n_heads`'
|
||||
|
||||
self.im2col_step = 64
|
||||
|
@ -277,7 +277,7 @@ class DetectionModel(BaseModel):
|
||||
return torch.cat((x, y, wh, cls), dim)
|
||||
|
||||
def _clip_augmented(self, y):
|
||||
"""Clip YOLOv5 augmented inference tails."""
|
||||
"""Clip YOLO augmented inference tails."""
|
||||
nl = self.model[-1].nl # number of detection layers (P3-P5)
|
||||
g = sum(4 ** x for x in range(nl)) # grid points
|
||||
e = 1 # exclude layer count
|
||||
@ -491,7 +491,7 @@ class Ensemble(nn.ModuleList):
|
||||
super().__init__()
|
||||
|
||||
def forward(self, x, augment=False, profile=False, visualize=False):
|
||||
"""Function generates the YOLOv5 network's final layer."""
|
||||
"""Function generates the YOLO network's final layer."""
|
||||
y = [module(x, augment, profile, visualize)[0] for module in self]
|
||||
# y = torch.stack(y).max(0)[0] # max ensemble
|
||||
# y = torch.stack(y).mean(0) # mean ensemble
|
||||
|
@ -38,7 +38,7 @@ def on_pretrain_routine_end(trainer):
|
||||
if not active_run:
|
||||
active_run = mlflow.start_run(experiment_id=experiment.experiment_id, run_name=run_name)
|
||||
LOGGER.info(f'{prefix}Using run_id({active_run.info.run_id}) at {mlflow_location}')
|
||||
run.log_params(vars(trainer.model.args))
|
||||
run.log_params(trainer.args)
|
||||
except Exception as err:
|
||||
LOGGER.error(f'{prefix}Failing init - {repr(err)}')
|
||||
LOGGER.warning(f'{prefix}Continuing without Mlflow')
|
||||
|
@ -8,7 +8,10 @@ try:
|
||||
assert SETTINGS['wandb'] is True # verify integration is enabled
|
||||
import wandb as wb
|
||||
|
||||
assert hasattr(wb, '__version__')
|
||||
assert hasattr(wb, '__version__') # verify package is not directory
|
||||
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
|
||||
_processed_plots = {}
|
||||
|
||||
@ -16,6 +19,83 @@ except (ImportError, AssertionError):
|
||||
wb = None
|
||||
|
||||
|
||||
def _custom_table(x, y, classes, title='Precision Recall Curve', x_axis_title='Recall', y_axis_title='Precision'):
|
||||
"""
|
||||
Create and log a custom metric visualization to wandb.plot.pr_curve.
|
||||
|
||||
This function crafts a custom metric visualization that mimics the behavior of wandb's default precision-recall curve
|
||||
while allowing for enhanced customization. The visual metric is useful for monitoring model performance across different classes.
|
||||
|
||||
Args:
|
||||
x (List): Values for the x-axis; expected to have length N.
|
||||
y (List): Corresponding values for the y-axis; also expected to have length N.
|
||||
classes (List): Labels identifying the class of each point; length N.
|
||||
title (str, optional): Title for the plot; defaults to 'Precision Recall Curve'.
|
||||
x_title (str, optional): Label for the x-axis; defaults to 'Recall'.
|
||||
y_title (str, optional): Label for the y-axis; defaults to 'Precision'.
|
||||
|
||||
Returns:
|
||||
(wandb.Object): A wandb object suitable for logging, showcasing the crafted metric visualization.
|
||||
"""
|
||||
df = pd.DataFrame({'class': classes, 'y': y, 'x': x}).round(3)
|
||||
fields = {'x': 'x', 'y': 'y', 'class': 'class'}
|
||||
string_fields = {'title': title, 'x-axis-title': x_axis_title, 'y-axis-title': y_axis_title}
|
||||
return wb.plot_table('wandb/area-under-curve/v0',
|
||||
wb.Table(dataframe=df),
|
||||
fields=fields,
|
||||
string_fields=string_fields)
|
||||
|
||||
|
||||
def _plot_curve(x,
|
||||
y,
|
||||
names=None,
|
||||
id='precision-recall',
|
||||
title='Precision Recall Curve',
|
||||
x_title='Recall',
|
||||
y_title='Precision',
|
||||
num_x=100,
|
||||
only_mean=False):
|
||||
"""
|
||||
Log a metric curve visualization.
|
||||
|
||||
This function generates a metric curve based on input data and logs the visualization to wandb.
|
||||
The curve can represent aggregated data (mean) or individual class data, depending on the 'only_mean' flag.
|
||||
|
||||
Args:
|
||||
x (np.ndarray): Data points for the x-axis with length N.
|
||||
y (np.ndarray): Corresponding data points for the y-axis with shape CxN, where C represents the number of classes.
|
||||
names (list, optional): Names of the classes corresponding to the y-axis data; length C. Defaults to an empty list.
|
||||
id (str, optional): Unique identifier for the logged data in wandb. Defaults to 'precision-recall'.
|
||||
title (str, optional): Title for the visualization plot. Defaults to 'Precision Recall Curve'.
|
||||
x_title (str, optional): Label for the x-axis. Defaults to 'Recall'.
|
||||
y_title (str, optional): Label for the y-axis. Defaults to 'Precision'.
|
||||
num_x (int, optional): Number of interpolated data points for visualization. Defaults to 100.
|
||||
only_mean (bool, optional): Flag to indicate if only the mean curve should be plotted. Defaults to True.
|
||||
|
||||
Note:
|
||||
The function leverages the '_custom_table' function to generate the actual visualization.
|
||||
"""
|
||||
# Create new x
|
||||
if names is None:
|
||||
names = []
|
||||
x_new = np.linspace(x[0], x[-1], num_x).round(5)
|
||||
|
||||
# Create arrays for logging
|
||||
x_log = x_new.tolist()
|
||||
y_log = np.interp(x_new, x, np.mean(y, axis=0)).round(3).tolist()
|
||||
|
||||
if only_mean:
|
||||
table = wb.Table(data=list(zip(x_log, y_log)), columns=[x_title, y_title])
|
||||
wb.run.log({title: wb.plot.line(table, x_title, y_title, title=title)})
|
||||
else:
|
||||
classes = ['mean'] * len(x_log)
|
||||
for i, yi in enumerate(y):
|
||||
x_log.extend(x_new) # add new x
|
||||
y_log.extend(np.interp(x_new, x, yi)) # interpolate y to new x
|
||||
classes.extend([names[i]] * len(x_new)) # add class names
|
||||
wb.log({id: _custom_table(x_log, y_log, classes, title, x_title, y_title)}, commit=False)
|
||||
|
||||
|
||||
def _log_plots(plots, step):
|
||||
"""Logs plots from the input dictionary if they haven't been logged already at the specified step."""
|
||||
for name, params in plots.items():
|
||||
@ -55,6 +135,17 @@ def on_train_end(trainer):
|
||||
if trainer.best.exists():
|
||||
art.add_file(trainer.best)
|
||||
wb.run.log_artifact(art, aliases=['best'])
|
||||
for curve_name, curve_values in zip(trainer.validator.metrics.curves, trainer.validator.metrics.curves_results):
|
||||
x, y, x_title, y_title = curve_values
|
||||
_plot_curve(
|
||||
x,
|
||||
y,
|
||||
names=list(trainer.validator.metrics.names.values()),
|
||||
id=f'curves/{curve_name}',
|
||||
title=curve_name,
|
||||
x_title=x_title,
|
||||
y_title=y_title,
|
||||
)
|
||||
wb.run.finish() # required or run continues on dashboard
|
||||
|
||||
|
||||
|
@ -165,16 +165,16 @@ def check_version(current: str = '0.0.0',
|
||||
|
||||
Example:
|
||||
```python
|
||||
# check if current version is exactly 22.04
|
||||
# Check if current version is exactly 22.04
|
||||
check_version(current='22.04', required='==22.04')
|
||||
|
||||
# check if current version is greater than or equal to 22.04
|
||||
# Check if current version is greater than or equal to 22.04
|
||||
check_version(current='22.10', required='22.04') # assumes '>=' inequality if none passed
|
||||
|
||||
# check if current version is less than or equal to 22.04
|
||||
# Check if current version is less than or equal to 22.04
|
||||
check_version(current='22.04', required='<=22.04')
|
||||
|
||||
# check if current version is between 20.04 (inclusive) and 22.04 (exclusive)
|
||||
# Check if current version is between 20.04 (inclusive) and 22.04 (exclusive)
|
||||
check_version(current='21.10', required='>20.04,<22.04')
|
||||
```
|
||||
"""
|
||||
|
@ -45,7 +45,7 @@ def spaces_in_path(path):
|
||||
with ultralytics.utils.files import spaces_in_path
|
||||
|
||||
with spaces_in_path('/path/with spaces') as new_path:
|
||||
# your code here
|
||||
# Your code here
|
||||
```
|
||||
"""
|
||||
|
||||
|
@ -219,7 +219,7 @@ class Instances:
|
||||
self.normalized = normalized
|
||||
|
||||
if len(segments) > 0:
|
||||
# list[np.array(1000, 2)] * num_samples
|
||||
# List[np.array(1000, 2)] * num_samples
|
||||
segments = resample_segments(segments)
|
||||
# (N, 1000, 2)
|
||||
segments = np.stack(segments, axis=0)
|
||||
|
@ -176,13 +176,13 @@ class v8DetectionLoss:
|
||||
imgsz = torch.tensor(feats[0].shape[2:], device=self.device, dtype=dtype) * self.stride[0] # image size (h,w)
|
||||
anchor_points, stride_tensor = make_anchors(feats, self.stride, 0.5)
|
||||
|
||||
# targets
|
||||
# Targets
|
||||
targets = torch.cat((batch['batch_idx'].view(-1, 1), batch['cls'].view(-1, 1), batch['bboxes']), 1)
|
||||
targets = self.preprocess(targets.to(self.device), batch_size, scale_tensor=imgsz[[1, 0, 1, 0]])
|
||||
gt_labels, gt_bboxes = targets.split((1, 4), 2) # cls, xyxy
|
||||
mask_gt = gt_bboxes.sum(2, keepdim=True).gt_(0)
|
||||
|
||||
# pboxes
|
||||
# Pboxes
|
||||
pred_bboxes = self.bbox_decode(anchor_points, pred_distri) # xyxy, (b, h*w, 4)
|
||||
|
||||
_, target_bboxes, target_scores, fg_mask, _ = self.assigner(
|
||||
@ -191,11 +191,11 @@ class v8DetectionLoss:
|
||||
|
||||
target_scores_sum = max(target_scores.sum(), 1)
|
||||
|
||||
# cls loss
|
||||
# Cls loss
|
||||
# loss[1] = self.varifocal_loss(pred_scores, target_scores, target_labels) / target_scores_sum # VFL way
|
||||
loss[1] = self.bce(pred_scores, target_scores.to(dtype)).sum() / target_scores_sum # BCE
|
||||
|
||||
# bbox loss
|
||||
# Bbox loss
|
||||
if fg_mask.sum():
|
||||
target_bboxes /= stride_tensor
|
||||
loss[0], loss[2] = self.bbox_loss(pred_distri, pred_bboxes, anchor_points, target_bboxes, target_scores,
|
||||
@ -224,7 +224,7 @@ class v8SegmentationLoss(v8DetectionLoss):
|
||||
pred_distri, pred_scores = torch.cat([xi.view(feats[0].shape[0], self.no, -1) for xi in feats], 2).split(
|
||||
(self.reg_max * 4, self.nc), 1)
|
||||
|
||||
# b, grids, ..
|
||||
# B, grids, ..
|
||||
pred_scores = pred_scores.permute(0, 2, 1).contiguous()
|
||||
pred_distri = pred_distri.permute(0, 2, 1).contiguous()
|
||||
pred_masks = pred_masks.permute(0, 2, 1).contiguous()
|
||||
@ -233,7 +233,7 @@ class v8SegmentationLoss(v8DetectionLoss):
|
||||
imgsz = torch.tensor(feats[0].shape[2:], device=self.device, dtype=dtype) * self.stride[0] # image size (h,w)
|
||||
anchor_points, stride_tensor = make_anchors(feats, self.stride, 0.5)
|
||||
|
||||
# targets
|
||||
# Targets
|
||||
try:
|
||||
batch_idx = batch['batch_idx'].view(-1, 1)
|
||||
targets = torch.cat((batch_idx, batch['cls'].view(-1, 1), batch['bboxes']), 1)
|
||||
@ -247,7 +247,7 @@ class v8SegmentationLoss(v8DetectionLoss):
|
||||
"correctly formatted 'segment' dataset using 'data=coco128-seg.yaml' "
|
||||
'as an example.\nSee https://docs.ultralytics.com/tasks/segment/ for help.') from e
|
||||
|
||||
# pboxes
|
||||
# Pboxes
|
||||
pred_bboxes = self.bbox_decode(anchor_points, pred_distri) # xyxy, (b, h*w, 4)
|
||||
|
||||
_, target_bboxes, target_scores, fg_mask, target_gt_idx = self.assigner(
|
||||
@ -256,15 +256,15 @@ class v8SegmentationLoss(v8DetectionLoss):
|
||||
|
||||
target_scores_sum = max(target_scores.sum(), 1)
|
||||
|
||||
# cls loss
|
||||
# Cls loss
|
||||
# loss[1] = self.varifocal_loss(pred_scores, target_scores, target_labels) / target_scores_sum # VFL way
|
||||
loss[2] = self.bce(pred_scores, target_scores.to(dtype)).sum() / target_scores_sum # BCE
|
||||
|
||||
if fg_mask.sum():
|
||||
# bbox loss
|
||||
# Bbox loss
|
||||
loss[0], loss[3] = self.bbox_loss(pred_distri, pred_bboxes, anchor_points, target_bboxes / stride_tensor,
|
||||
target_scores, target_scores_sum, fg_mask)
|
||||
# masks loss
|
||||
# Masks loss
|
||||
masks = batch['masks'].to(self.device).float()
|
||||
if tuple(masks.shape[-2:]) != (mask_h, mask_w): # downsample
|
||||
masks = F.interpolate(masks[None], (mask_h, mask_w), mode='nearest')[0]
|
||||
@ -344,13 +344,13 @@ class v8SegmentationLoss(v8DetectionLoss):
|
||||
_, _, mask_h, mask_w = proto.shape
|
||||
loss = 0
|
||||
|
||||
# normalize to 0-1
|
||||
# Normalize to 0-1
|
||||
target_bboxes_normalized = target_bboxes / imgsz[[1, 0, 1, 0]]
|
||||
|
||||
# areas of target bboxes
|
||||
# Areas of target bboxes
|
||||
marea = xyxy2xywh(target_bboxes_normalized)[..., 2:].prod(2)
|
||||
|
||||
# normalize to mask size
|
||||
# Normalize to mask size
|
||||
mxyxy = target_bboxes_normalized * torch.tensor([mask_w, mask_h, mask_w, mask_h], device=proto.device)
|
||||
|
||||
for i, single_i in enumerate(zip(fg_mask, target_gt_idx, pred_masks, proto, mxyxy, marea, masks)):
|
||||
@ -393,7 +393,7 @@ class v8PoseLoss(v8DetectionLoss):
|
||||
pred_distri, pred_scores = torch.cat([xi.view(feats[0].shape[0], self.no, -1) for xi in feats], 2).split(
|
||||
(self.reg_max * 4, self.nc), 1)
|
||||
|
||||
# b, grids, ..
|
||||
# B, grids, ..
|
||||
pred_scores = pred_scores.permute(0, 2, 1).contiguous()
|
||||
pred_distri = pred_distri.permute(0, 2, 1).contiguous()
|
||||
pred_kpts = pred_kpts.permute(0, 2, 1).contiguous()
|
||||
@ -402,7 +402,7 @@ class v8PoseLoss(v8DetectionLoss):
|
||||
imgsz = torch.tensor(feats[0].shape[2:], device=self.device, dtype=dtype) * self.stride[0] # image size (h,w)
|
||||
anchor_points, stride_tensor = make_anchors(feats, self.stride, 0.5)
|
||||
|
||||
# targets
|
||||
# Targets
|
||||
batch_size = pred_scores.shape[0]
|
||||
batch_idx = batch['batch_idx'].view(-1, 1)
|
||||
targets = torch.cat((batch_idx, batch['cls'].view(-1, 1), batch['bboxes']), 1)
|
||||
@ -410,7 +410,7 @@ class v8PoseLoss(v8DetectionLoss):
|
||||
gt_labels, gt_bboxes = targets.split((1, 4), 2) # cls, xyxy
|
||||
mask_gt = gt_bboxes.sum(2, keepdim=True).gt_(0)
|
||||
|
||||
# pboxes
|
||||
# Pboxes
|
||||
pred_bboxes = self.bbox_decode(anchor_points, pred_distri) # xyxy, (b, h*w, 4)
|
||||
pred_kpts = self.kpts_decode(anchor_points, pred_kpts.view(batch_size, -1, *self.kpt_shape)) # (b, h*w, 17, 3)
|
||||
|
||||
@ -420,11 +420,11 @@ class v8PoseLoss(v8DetectionLoss):
|
||||
|
||||
target_scores_sum = max(target_scores.sum(), 1)
|
||||
|
||||
# cls loss
|
||||
# Cls loss
|
||||
# loss[1] = self.varifocal_loss(pred_scores, target_scores, target_labels) / target_scores_sum # VFL way
|
||||
loss[3] = self.bce(pred_scores, target_scores.to(dtype)).sum() / target_scores_sum # BCE
|
||||
|
||||
# bbox loss
|
||||
# Bbox loss
|
||||
if fg_mask.sum():
|
||||
target_bboxes /= stride_tensor
|
||||
loss[0], loss[4] = self.bbox_loss(pred_distri, pred_bboxes, anchor_points, target_bboxes, target_scores,
|
||||
|
@ -36,7 +36,7 @@ def bbox_ioa(box1, box2, iou=False, eps=1e-7):
|
||||
inter_area = (np.minimum(b1_x2[:, None], b2_x2) - np.maximum(b1_x1[:, None], b2_x1)).clip(0) * \
|
||||
(np.minimum(b1_y2[:, None], b2_y2) - np.maximum(b1_y1[:, None], b2_y1)).clip(0)
|
||||
|
||||
# box2 area
|
||||
# Box2 area
|
||||
area = (b2_x2 - b2_x1) * (b2_y2 - b2_y1)
|
||||
if iou:
|
||||
box1_area = (b1_x2 - b1_x1) * (b1_y2 - b1_y1)
|
||||
@ -440,13 +440,18 @@ def ap_per_class(tp,
|
||||
|
||||
Returns:
|
||||
(tuple): A tuple of six arrays and one array of unique classes, where:
|
||||
tp (np.ndarray): True positive counts for each class.
|
||||
fp (np.ndarray): False positive counts for each class.
|
||||
p (np.ndarray): Precision values at each confidence threshold.
|
||||
r (np.ndarray): Recall values at each confidence threshold.
|
||||
f1 (np.ndarray): F1-score values at each confidence threshold.
|
||||
ap (np.ndarray): Average precision for each class at different IoU thresholds.
|
||||
unique_classes (np.ndarray): An array of unique classes that have data.
|
||||
tp (np.ndarray): True positive counts at threshold given by max F1 metric for each class.Shape: (nc,).
|
||||
fp (np.ndarray): False positive counts at threshold given by max F1 metric for each class. Shape: (nc,).
|
||||
p (np.ndarray): Precision values at threshold given by max F1 metric for each class. Shape: (nc,).
|
||||
r (np.ndarray): Recall values at threshold given by max F1 metric for each class. Shape: (nc,).
|
||||
f1 (np.ndarray): F1-score values at threshold given by max F1 metric for each class. Shape: (nc,).
|
||||
ap (np.ndarray): Average precision for each class at different IoU thresholds. Shape: (nc, 10).
|
||||
unique_classes (np.ndarray): An array of unique classes that have data. Shape: (nc,).
|
||||
p_curve (np.ndarray): Precision curves for each class. Shape: (nc, 1000).
|
||||
r_curve (np.ndarray): Recall curves for each class. Shape: (nc, 1000).
|
||||
f1_curve (np.ndarray): F1-score curves for each class. Shape: (nc, 1000).
|
||||
x (np.ndarray): X-axis values for the curves. Shape: (1000,).
|
||||
prec_values: Precision values at mAP@0.5 for each class. Shape: (nc, 1000).
|
||||
"""
|
||||
|
||||
# Sort by objectness
|
||||
@ -458,8 +463,10 @@ def ap_per_class(tp,
|
||||
nc = unique_classes.shape[0] # number of classes, number of detections
|
||||
|
||||
# Create Precision-Recall curve and compute AP for each class
|
||||
px, py = np.linspace(0, 1, 1000), [] # for plotting
|
||||
ap, p, r = np.zeros((nc, tp.shape[1])), np.zeros((nc, 1000)), np.zeros((nc, 1000))
|
||||
x, prec_values = np.linspace(0, 1, 1000), []
|
||||
|
||||
# Average precision, precision and recall curves
|
||||
ap, p_curve, r_curve = np.zeros((nc, tp.shape[1])), np.zeros((nc, 1000)), np.zeros((nc, 1000))
|
||||
for ci, c in enumerate(unique_classes):
|
||||
i = pred_cls == c
|
||||
n_l = nt[ci] # number of labels
|
||||
@ -473,33 +480,35 @@ def ap_per_class(tp,
|
||||
|
||||
# Recall
|
||||
recall = tpc / (n_l + eps) # recall curve
|
||||
r[ci] = np.interp(-px, -conf[i], recall[:, 0], left=0) # negative x, xp because xp decreases
|
||||
r_curve[ci] = np.interp(-x, -conf[i], recall[:, 0], left=0) # negative x, xp because xp decreases
|
||||
|
||||
# Precision
|
||||
precision = tpc / (tpc + fpc) # precision curve
|
||||
p[ci] = np.interp(-px, -conf[i], precision[:, 0], left=1) # p at pr_score
|
||||
p_curve[ci] = np.interp(-x, -conf[i], precision[:, 0], left=1) # p at pr_score
|
||||
|
||||
# AP from recall-precision curve
|
||||
for j in range(tp.shape[1]):
|
||||
ap[ci, j], mpre, mrec = compute_ap(recall[:, j], precision[:, j])
|
||||
if plot and j == 0:
|
||||
py.append(np.interp(px, mrec, mpre)) # precision at mAP@0.5
|
||||
prec_values.append(np.interp(x, mrec, mpre)) # precision at mAP@0.5
|
||||
|
||||
prec_values = np.array(prec_values) # (nc, 1000)
|
||||
|
||||
# Compute F1 (harmonic mean of precision and recall)
|
||||
f1 = 2 * p * r / (p + r + eps)
|
||||
f1_curve = 2 * p_curve * r_curve / (p_curve + r_curve + eps)
|
||||
names = [v for k, v in names.items() if k in unique_classes] # list: only classes that have data
|
||||
names = dict(enumerate(names)) # to dict
|
||||
if plot:
|
||||
plot_pr_curve(px, py, ap, save_dir / f'{prefix}PR_curve.png', names, on_plot=on_plot)
|
||||
plot_mc_curve(px, f1, save_dir / f'{prefix}F1_curve.png', names, ylabel='F1', on_plot=on_plot)
|
||||
plot_mc_curve(px, p, save_dir / f'{prefix}P_curve.png', names, ylabel='Precision', on_plot=on_plot)
|
||||
plot_mc_curve(px, r, save_dir / f'{prefix}R_curve.png', names, ylabel='Recall', on_plot=on_plot)
|
||||
plot_pr_curve(x, prec_values, ap, save_dir / f'{prefix}PR_curve.png', names, on_plot=on_plot)
|
||||
plot_mc_curve(x, f1_curve, save_dir / f'{prefix}F1_curve.png', names, ylabel='F1', on_plot=on_plot)
|
||||
plot_mc_curve(x, p_curve, save_dir / f'{prefix}P_curve.png', names, ylabel='Precision', on_plot=on_plot)
|
||||
plot_mc_curve(x, r_curve, save_dir / f'{prefix}R_curve.png', names, ylabel='Recall', on_plot=on_plot)
|
||||
|
||||
i = smooth(f1.mean(0), 0.1).argmax() # max F1 index
|
||||
p, r, f1 = p[:, i], r[:, i], f1[:, i]
|
||||
i = smooth(f1_curve.mean(0), 0.1).argmax() # max F1 index
|
||||
p, r, f1 = p_curve[:, i], r_curve[:, i], f1_curve[:, i] # max-F1 precision, recall, F1 values
|
||||
tp = (r * nt).round() # true positives
|
||||
fp = (tp / (p + eps) - tp).round() # false positives
|
||||
return tp, fp, p, r, f1, ap, unique_classes.astype(int)
|
||||
return tp, fp, p, r, f1, ap, unique_classes.astype(int), p_curve, r_curve, f1_curve, x, prec_values
|
||||
|
||||
|
||||
class Metric(SimpleClass):
|
||||
@ -645,7 +654,19 @@ class Metric(SimpleClass):
|
||||
Updates the class attributes `self.p`, `self.r`, `self.f1`, `self.all_ap`, and `self.ap_class_index` based
|
||||
on the values provided in the `results` tuple.
|
||||
"""
|
||||
self.p, self.r, self.f1, self.all_ap, self.ap_class_index = results
|
||||
(self.p, self.r, self.f1, self.all_ap, self.ap_class_index, self.p_curve, self.r_curve, self.f1_curve, self.px,
|
||||
self.prec_values) = results
|
||||
|
||||
@property
|
||||
def curves(self):
|
||||
"""Returns a list of curves for accessing specific metrics curves."""
|
||||
return []
|
||||
|
||||
@property
|
||||
def curves_results(self):
|
||||
"""Returns a list of curves for accessing specific metrics curves."""
|
||||
return [[self.px, self.prec_values, 'Recall', 'Precision'], [self.px, self.f1_curve, 'Confidence', 'F1'],
|
||||
[self.px, self.p_curve, 'Confidence', 'Precision'], [self.px, self.r_curve, 'Confidence', 'Recall']]
|
||||
|
||||
|
||||
class DetMetrics(SimpleClass):
|
||||
@ -676,6 +697,8 @@ class DetMetrics(SimpleClass):
|
||||
fitness: Computes the fitness score based on the computed detection metrics.
|
||||
ap_class_index: Returns a list of class indices sorted by their average precision (AP) values.
|
||||
results_dict: Returns a dictionary that maps detection metric keys to their computed values.
|
||||
curves: TODO
|
||||
curves_results: TODO
|
||||
"""
|
||||
|
||||
def __init__(self, save_dir=Path('.'), plot=False, on_plot=None, names=()) -> None:
|
||||
@ -686,6 +709,7 @@ class DetMetrics(SimpleClass):
|
||||
self.names = names
|
||||
self.box = Metric()
|
||||
self.speed = {'preprocess': 0.0, 'inference': 0.0, 'loss': 0.0, 'postprocess': 0.0}
|
||||
self.task = 'detect'
|
||||
|
||||
def process(self, tp, conf, pred_cls, target_cls):
|
||||
"""Process predicted results for object detection and update metrics."""
|
||||
@ -733,6 +757,16 @@ class DetMetrics(SimpleClass):
|
||||
"""Returns dictionary of computed performance metrics and statistics."""
|
||||
return dict(zip(self.keys + ['fitness'], self.mean_results() + [self.fitness]))
|
||||
|
||||
@property
|
||||
def curves(self):
|
||||
"""Returns a list of curves for accessing specific metrics curves."""
|
||||
return ['Precision-Recall(B)', 'F1-Confidence(B)', 'Precision-Confidence(B)', 'Recall-Confidence(B)']
|
||||
|
||||
@property
|
||||
def curves_results(self):
|
||||
"""Returns dictionary of computed performance metrics and statistics."""
|
||||
return self.box.curves_results
|
||||
|
||||
|
||||
class SegmentMetrics(SimpleClass):
|
||||
"""
|
||||
@ -772,6 +806,7 @@ class SegmentMetrics(SimpleClass):
|
||||
self.box = Metric()
|
||||
self.seg = Metric()
|
||||
self.speed = {'preprocess': 0.0, 'inference': 0.0, 'loss': 0.0, 'postprocess': 0.0}
|
||||
self.task = 'segment'
|
||||
|
||||
def process(self, tp_b, tp_m, conf, pred_cls, target_cls):
|
||||
"""
|
||||
@ -843,6 +878,18 @@ class SegmentMetrics(SimpleClass):
|
||||
"""Returns results of object detection model for evaluation."""
|
||||
return dict(zip(self.keys + ['fitness'], self.mean_results() + [self.fitness]))
|
||||
|
||||
@property
|
||||
def curves(self):
|
||||
"""Returns a list of curves for accessing specific metrics curves."""
|
||||
return [
|
||||
'Precision-Recall(B)', 'F1-Confidence(B)', 'Precision-Confidence(B)', 'Recall-Confidence(B)',
|
||||
'Precision-Recall(M)', 'F1-Confidence(M)', 'Precision-Confidence(M)', 'Recall-Confidence(M)']
|
||||
|
||||
@property
|
||||
def curves_results(self):
|
||||
"""Returns dictionary of computed performance metrics and statistics."""
|
||||
return self.box.curves_results + self.seg.curves_results
|
||||
|
||||
|
||||
class PoseMetrics(SegmentMetrics):
|
||||
"""
|
||||
@ -883,6 +930,7 @@ class PoseMetrics(SegmentMetrics):
|
||||
self.box = Metric()
|
||||
self.pose = Metric()
|
||||
self.speed = {'preprocess': 0.0, 'inference': 0.0, 'loss': 0.0, 'postprocess': 0.0}
|
||||
self.task = 'pose'
|
||||
|
||||
def process(self, tp_b, tp_p, conf, pred_cls, target_cls):
|
||||
"""
|
||||
@ -944,6 +992,18 @@ class PoseMetrics(SegmentMetrics):
|
||||
"""Computes classification metrics and speed using the `targets` and `pred` inputs."""
|
||||
return self.pose.fitness() + self.box.fitness()
|
||||
|
||||
@property
|
||||
def curves(self):
|
||||
"""Returns a list of curves for accessing specific metrics curves."""
|
||||
return [
|
||||
'Precision-Recall(B)', 'F1-Confidence(B)', 'Precision-Confidence(B)', 'Recall-Confidence(B)',
|
||||
'Precision-Recall(P)', 'F1-Confidence(P)', 'Precision-Confidence(P)', 'Recall-Confidence(P)']
|
||||
|
||||
@property
|
||||
def curves_results(self):
|
||||
"""Returns dictionary of computed performance metrics and statistics."""
|
||||
return self.box.curves_results + self.pose.curves_results
|
||||
|
||||
|
||||
class ClassifyMetrics(SimpleClass):
|
||||
"""
|
||||
@ -968,6 +1028,7 @@ class ClassifyMetrics(SimpleClass):
|
||||
self.top1 = 0
|
||||
self.top5 = 0
|
||||
self.speed = {'preprocess': 0.0, 'inference': 0.0, 'loss': 0.0, 'postprocess': 0.0}
|
||||
self.task = 'classify'
|
||||
|
||||
def process(self, targets, pred):
|
||||
"""Target classes and predicted classes."""
|
||||
@ -990,3 +1051,13 @@ class ClassifyMetrics(SimpleClass):
|
||||
def keys(self):
|
||||
"""Returns a list of keys for the results_dict property."""
|
||||
return ['metrics/accuracy_top1', 'metrics/accuracy_top5']
|
||||
|
||||
@property
|
||||
def curves(self):
|
||||
"""Returns a list of curves for accessing specific metrics curves."""
|
||||
return []
|
||||
|
||||
@property
|
||||
def curves_results(self):
|
||||
"""Returns a list of curves for accessing specific metrics curves."""
|
||||
return []
|
||||
|
@ -193,7 +193,7 @@ class TaskAlignedAssigner(nn.Module):
|
||||
# Expand topk_idxs for each value of k and add 1 at the specified positions
|
||||
count_tensor.scatter_add_(-1, topk_idxs[:, :, k:k + 1], ones)
|
||||
# count_tensor.scatter_add_(-1, topk_idxs, torch.ones_like(topk_idxs, dtype=torch.int8, device=topk_idxs.device))
|
||||
# filter invalid bboxes
|
||||
# Filter invalid bboxes
|
||||
count_tensor.masked_fill_(count_tensor > 1, 0)
|
||||
|
||||
return count_tensor.to(metrics.dtype)
|
||||
|
@ -45,6 +45,7 @@ def run_ray_tune(model,
|
||||
try:
|
||||
subprocess.run('pip install ray[tune]'.split(), check=True)
|
||||
|
||||
import ray
|
||||
from ray import tune
|
||||
from ray.air import RunConfig
|
||||
from ray.air.integrations.wandb import WandbLoggerCallback
|
||||
@ -83,6 +84,10 @@ def run_ray_tune(model,
|
||||
'mixup': tune.uniform(0.0, 1.0), # image mixup (probability)
|
||||
'copy_paste': tune.uniform(0.0, 1.0)} # segment copy-paste (probability)
|
||||
|
||||
# Put the model in ray store
|
||||
task = model.task
|
||||
model_in_store = ray.put(model)
|
||||
|
||||
def _tune(config):
|
||||
"""
|
||||
Trains the YOLO model with the specified hyperparameters and additional arguments.
|
||||
@ -93,9 +98,10 @@ def run_ray_tune(model,
|
||||
Returns:
|
||||
None.
|
||||
"""
|
||||
model.reset_callbacks()
|
||||
model_to_train = ray.get(model_in_store) # get the model from ray store for tuning
|
||||
model_to_train.reset_callbacks()
|
||||
config.update(train_args)
|
||||
results = model.train(**config)
|
||||
results = model_to_train.train(**config)
|
||||
return results.results_dict
|
||||
|
||||
# Get search space
|
||||
@ -104,7 +110,7 @@ def run_ray_tune(model,
|
||||
LOGGER.warning('WARNING ⚠️ search space not provided, using default search space.')
|
||||
|
||||
# Get dataset
|
||||
data = train_args.get('data', TASK2DATA[model.task])
|
||||
data = train_args.get('data', TASK2DATA[task])
|
||||
space['data'] = data
|
||||
if 'data' not in train_args:
|
||||
LOGGER.warning(f'WARNING ⚠️ data not provided, using default "data={data}".')
|
||||
@ -114,7 +120,7 @@ def run_ray_tune(model,
|
||||
|
||||
# Define the ASHA scheduler for hyperparameter search
|
||||
asha_scheduler = ASHAScheduler(time_attr='epoch',
|
||||
metric=TASK2METRIC[model.task],
|
||||
metric=TASK2METRIC[task],
|
||||
mode='max',
|
||||
max_t=train_args.get('epochs') or DEFAULT_CFG_DICT['epochs'] or 100,
|
||||
grace_period=grace_period,
|
||||
|
Loading…
x
Reference in New Issue
Block a user