mirror of
https://github.com/THU-MIG/yolov10.git
synced 2025-05-23 05:24:22 +08:00
Add docformatter
to pre-commit (#5279)
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Burhan <62214284+Burhan-Q@users.noreply.github.com>
This commit is contained in:
parent
c7aa83da31
commit
7517667a33
@ -62,6 +62,11 @@ repos:
|
||||
args:
|
||||
- --ignore-words-list=crate,nd,strack,dota,ane,segway,fo
|
||||
|
||||
- repo: https://github.com/PyCQA/docformatter
|
||||
rev: v1.7.5
|
||||
hooks:
|
||||
- id: docformatter
|
||||
|
||||
# - repo: https://github.com/asottile/yesqa
|
||||
# rev: v1.4.0
|
||||
# hooks:
|
||||
|
@ -18,7 +18,15 @@ CODE_DIR = ROOT
|
||||
REFERENCE_DIR = ROOT.parent / 'docs/reference'
|
||||
|
||||
|
||||
def extract_classes_and_functions(filepath):
|
||||
def extract_classes_and_functions(filepath: Path):
|
||||
"""Extracts class and function names from a given Python file.
|
||||
|
||||
Args:
|
||||
filepath (Path): The path to the Python file.
|
||||
|
||||
Returns:
|
||||
(tuple): A tuple containing lists of class and function names.
|
||||
"""
|
||||
with open(filepath, 'r') as file:
|
||||
content = file.read()
|
||||
|
||||
@ -31,7 +39,15 @@ def extract_classes_and_functions(filepath):
|
||||
return classes, functions
|
||||
|
||||
|
||||
def create_markdown(py_filepath, module_path, classes, functions):
|
||||
def create_markdown(py_filepath: Path, module_path: str, classes: list, functions: list):
|
||||
"""Creates a Markdown file containing the API reference for the given Python module.
|
||||
|
||||
Args:
|
||||
py_filepath (Path): The path to the Python file.
|
||||
module_path (str): The import path for the Python module.
|
||||
classes (list): A list of class names within the module.
|
||||
functions (list): A list of function names within the module.
|
||||
"""
|
||||
md_filepath = py_filepath.with_suffix('.md')
|
||||
|
||||
# Read existing content and keep header content between first two ---
|
||||
@ -64,17 +80,35 @@ def create_markdown(py_filepath, module_path, classes, functions):
|
||||
|
||||
|
||||
def nested_dict():
|
||||
"""Creates and returns a nested defaultdict.
|
||||
|
||||
Returns:
|
||||
(defaultdict): A nested defaultdict object.
|
||||
"""
|
||||
return defaultdict(nested_dict)
|
||||
|
||||
|
||||
def sort_nested_dict(d):
|
||||
def sort_nested_dict(d: dict):
|
||||
"""Sorts a nested dictionary recursively.
|
||||
|
||||
Args:
|
||||
d (dict): The dictionary to sort.
|
||||
|
||||
Returns:
|
||||
(dict): The sorted dictionary.
|
||||
"""
|
||||
return {
|
||||
key: sort_nested_dict(value) if isinstance(value, dict) else value
|
||||
for key, value in sorted(d.items())
|
||||
}
|
||||
|
||||
|
||||
def create_nav_menu_yaml(nav_items):
|
||||
def create_nav_menu_yaml(nav_items: list):
|
||||
"""Creates a YAML file for the navigation menu based on the provided list of items.
|
||||
|
||||
Args:
|
||||
nav_items (list): A list of relative file paths to Markdown files for the navigation menu.
|
||||
"""
|
||||
nav_tree = nested_dict()
|
||||
|
||||
for item_str in nav_items:
|
||||
@ -90,6 +124,7 @@ def create_nav_menu_yaml(nav_items):
|
||||
nav_tree_sorted = sort_nested_dict(nav_tree)
|
||||
|
||||
def _dict_to_yaml(d, level=0):
|
||||
"""Converts a nested dictionary to a YAML-formatted string with indentation."""
|
||||
yaml_str = ''
|
||||
indent = ' ' * level
|
||||
for k, v in d.items():
|
||||
@ -105,6 +140,7 @@ def create_nav_menu_yaml(nav_items):
|
||||
|
||||
|
||||
def main():
|
||||
"""Main function to extract class and function names, create Markdown files, and generate a YAML navigation menu."""
|
||||
nav_items = []
|
||||
for root, _, files in os.walk(CODE_DIR):
|
||||
for file in files:
|
||||
|
@ -16,7 +16,3 @@ keywords: Ultralytics, YOLO, HungarianMatcher, inverse_sigmoid, detection models
|
||||
---
|
||||
## ::: ultralytics.models.utils.ops.get_cdn_group
|
||||
<br><br>
|
||||
|
||||
---
|
||||
## ::: ultralytics.models.utils.ops.inverse_sigmoid
|
||||
<br><br>
|
||||
|
@ -9,11 +9,12 @@ from ultralytics.utils import ASSETS, yaml_load
|
||||
from ultralytics.utils.checks import check_requirements, check_yaml
|
||||
|
||||
|
||||
class Yolov8:
|
||||
class YOLOv8:
|
||||
"""YOLOv8 object detection model class for handling inference and visualization."""
|
||||
|
||||
def __init__(self, onnx_model, input_image, confidence_thres, iou_thres):
|
||||
"""
|
||||
Initializes an instance of the Yolov8 class.
|
||||
Initializes an instance of the YOLOv8 class.
|
||||
|
||||
Args:
|
||||
onnx_model: Path to the ONNX model.
|
||||
@ -213,8 +214,8 @@ if __name__ == '__main__':
|
||||
# Check the requirements and select the appropriate backend (CPU or GPU)
|
||||
check_requirements('onnxruntime-gpu' if torch.cuda.is_available() else 'onnxruntime')
|
||||
|
||||
# Create an instance of the Yolov8 class with the specified arguments
|
||||
detection = Yolov8(args.model, args.img, args.conf_thres, args.iou_thres)
|
||||
# Create an instance of the YOLOv8 class with the specified arguments
|
||||
detection = YOLOv8(args.model, args.img, args.conf_thres, args.iou_thres)
|
||||
|
||||
# Perform object detection and obtain the output image
|
||||
output_image = detection.main()
|
||||
|
@ -7,11 +7,22 @@ from ultralytics.utils import ASSETS, yaml_load
|
||||
from ultralytics.utils.checks import check_yaml
|
||||
|
||||
CLASSES = yaml_load(check_yaml('coco128.yaml'))['names']
|
||||
|
||||
colors = np.random.uniform(0, 255, size=(len(CLASSES), 3))
|
||||
|
||||
|
||||
def draw_bounding_box(img, class_id, confidence, x, y, x_plus_w, y_plus_h):
|
||||
"""
|
||||
Draws bounding boxes on the input image based on the provided arguments.
|
||||
|
||||
Args:
|
||||
img (numpy.ndarray): The input image to draw the bounding box on.
|
||||
class_id (int): Class ID of the detected object.
|
||||
confidence (float): Confidence score of the detected object.
|
||||
x (int): X-coordinate of the top-left corner of the bounding box.
|
||||
y (int): Y-coordinate of the top-left corner of the bounding box.
|
||||
x_plus_w (int): X-coordinate of the bottom-right corner of the bounding box.
|
||||
y_plus_h (int): Y-coordinate of the bottom-right corner of the bounding box.
|
||||
"""
|
||||
label = f'{CLASSES[class_id]} ({confidence:.2f})'
|
||||
color = colors[class_id]
|
||||
cv2.rectangle(img, (x, y), (x_plus_w, y_plus_h), color, 2)
|
||||
@ -19,18 +30,39 @@ def draw_bounding_box(img, class_id, confidence, x, y, x_plus_w, y_plus_h):
|
||||
|
||||
|
||||
def main(onnx_model, input_image):
|
||||
"""
|
||||
Main function to load ONNX model, perform inference, draw bounding boxes, and display the output image.
|
||||
|
||||
Args:
|
||||
onnx_model (str): Path to the ONNX model.
|
||||
input_image (str): Path to the input image.
|
||||
|
||||
Returns:
|
||||
list: List of dictionaries containing detection information such as class_id, class_name, confidence, etc.
|
||||
"""
|
||||
# Load the ONNX model
|
||||
model: cv2.dnn.Net = cv2.dnn.readNetFromONNX(onnx_model)
|
||||
|
||||
# Read the input image
|
||||
original_image: np.ndarray = cv2.imread(input_image)
|
||||
[height, width, _] = original_image.shape
|
||||
|
||||
# Prepare a square image for inference
|
||||
length = max((height, width))
|
||||
image = np.zeros((length, length, 3), np.uint8)
|
||||
image[0:height, 0:width] = original_image
|
||||
|
||||
# Calculate scale factor
|
||||
scale = length / 640
|
||||
|
||||
# Preprocess the image and prepare blob for model
|
||||
blob = cv2.dnn.blobFromImage(image, scalefactor=1 / 255, size=(640, 640), swapRB=True)
|
||||
model.setInput(blob)
|
||||
|
||||
# Perform inference
|
||||
outputs = model.forward()
|
||||
|
||||
# Prepare output array
|
||||
outputs = np.array([cv2.transpose(outputs[0])])
|
||||
rows = outputs.shape[1]
|
||||
|
||||
@ -38,6 +70,7 @@ def main(onnx_model, input_image):
|
||||
scores = []
|
||||
class_ids = []
|
||||
|
||||
# Iterate through output to collect bounding boxes, confidence scores, and class IDs
|
||||
for i in range(rows):
|
||||
classes_scores = outputs[0][i][4:]
|
||||
(minScore, maxScore, minClassLoc, (x, maxClassIndex)) = cv2.minMaxLoc(classes_scores)
|
||||
@ -49,9 +82,12 @@ def main(onnx_model, input_image):
|
||||
scores.append(maxScore)
|
||||
class_ids.append(maxClassIndex)
|
||||
|
||||
# Apply NMS (Non-maximum suppression)
|
||||
result_boxes = cv2.dnn.NMSBoxes(boxes, scores, 0.25, 0.45, 0.5)
|
||||
|
||||
detections = []
|
||||
|
||||
# Iterate through NMS results to draw bounding boxes and labels
|
||||
for i in range(len(result_boxes)):
|
||||
index = result_boxes[i]
|
||||
box = boxes[index]
|
||||
@ -65,6 +101,7 @@ def main(onnx_model, input_image):
|
||||
draw_bounding_box(original_image, class_ids[index], scores[index], round(box[0] * scale), round(box[1] * scale),
|
||||
round((box[0] + box[2]) * scale), round((box[1] + box[3]) * scale))
|
||||
|
||||
# Display the image with bounding boxes
|
||||
cv2.imshow('image', original_image)
|
||||
cv2.waitKey(0)
|
||||
cv2.destroyAllWindows()
|
||||
@ -74,7 +111,7 @@ def main(onnx_model, input_image):
|
||||
|
||||
if __name__ == '__main__':
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument('--model', default='yolov8n.onnx', help='Input your onnx model.')
|
||||
parser.add_argument('--model', default='yolov8n.onnx', help='Input your ONNX model.')
|
||||
parser.add_argument('--img', default=str(ASSETS / 'bus.jpg'), help='Path to input image.')
|
||||
args = parser.parse_args()
|
||||
main(args.model, args.img)
|
||||
|
@ -33,10 +33,6 @@ counting_regions = [
|
||||
}, ]
|
||||
|
||||
|
||||
def is_inside_polygon(point, polygon):
|
||||
return polygon.contains(Point(point))
|
||||
|
||||
|
||||
def mouse_callback(event, x, y, flags, param):
|
||||
"""Mouse call back event."""
|
||||
global current_region
|
||||
@ -44,7 +40,7 @@ def mouse_callback(event, x, y, flags, param):
|
||||
# Mouse left button down event
|
||||
if event == cv2.EVENT_LBUTTONDOWN:
|
||||
for region in counting_regions:
|
||||
if is_inside_polygon((x, y), region['polygon']):
|
||||
if region['polygon'].contains(Point((x, y))):
|
||||
current_region = region
|
||||
current_region['dragging'] = True
|
||||
current_region['offset_x'] = x
|
||||
@ -150,7 +146,7 @@ def run(
|
||||
|
||||
# Check if detection inside region
|
||||
for region in counting_regions:
|
||||
if is_inside_polygon((x, y), region['polygon']):
|
||||
if region['polygon'].contains(Point((x, y))):
|
||||
region['counts'] += 1
|
||||
|
||||
# Draw regions (Polygons/Rectangles)
|
||||
|
@ -60,3 +60,12 @@ SPACE_BETWEEN_ENDING_COMMA_AND_CLOSING_BRACKET = True
|
||||
SPLIT_BEFORE_CLOSING_BRACKET = False
|
||||
SPLIT_BEFORE_FIRST_ARGUMENT = False
|
||||
# EACH_DICT_ENTRY_ON_SEPARATE_LINE = False
|
||||
|
||||
[docformatter]
|
||||
wrap-summaries = 120
|
||||
wrap-descriptions = 120
|
||||
in-place = true
|
||||
make-summary-multi-line = false
|
||||
pre-summary-newline = true
|
||||
force-wrap = false
|
||||
close-quotes-on-newline = true
|
||||
|
8
setup.py
8
setup.py
@ -12,6 +12,12 @@ README = (PARENT / 'README.md').read_text(encoding='utf-8')
|
||||
|
||||
|
||||
def get_version():
|
||||
"""
|
||||
Retrieve the version number from the 'ultralytics/__init__.py' file.
|
||||
|
||||
Returns:
|
||||
(str): The version number extracted from the '__version__' attribute in the 'ultralytics/__init__.py' file.
|
||||
"""
|
||||
file = PARENT / 'ultralytics/__init__.py'
|
||||
return re.search(r'^__version__ = [\'"]([^\'"]*)[\'"]', file.read_text(encoding='utf-8'), re.M)[1]
|
||||
|
||||
@ -24,7 +30,7 @@ def parse_requirements(file_path: Path):
|
||||
file_path (str | Path): Path to the requirements.txt file.
|
||||
|
||||
Returns:
|
||||
List[str]: List of parsed requirements.
|
||||
(List[str]): List of parsed requirements.
|
||||
"""
|
||||
|
||||
requirements = []
|
||||
|
@ -9,7 +9,8 @@ TMP = Path(__file__).resolve().parent / 'tmp' # temp directory for test files
|
||||
|
||||
|
||||
def pytest_addoption(parser):
|
||||
"""Add custom command-line options to pytest.
|
||||
"""
|
||||
Add custom command-line options to pytest.
|
||||
|
||||
Args:
|
||||
parser (pytest.config.Parser): The pytest parser object.
|
||||
@ -18,7 +19,8 @@ def pytest_addoption(parser):
|
||||
|
||||
|
||||
def pytest_configure(config):
|
||||
"""Register custom markers to avoid pytest warnings.
|
||||
"""
|
||||
Register custom markers to avoid pytest warnings.
|
||||
|
||||
Args:
|
||||
config (pytest.config.Config): The pytest config object.
|
||||
@ -27,7 +29,8 @@ def pytest_configure(config):
|
||||
|
||||
|
||||
def pytest_runtest_setup(item):
|
||||
"""Setup hook to skip tests marked as slow if the --slow option is not provided.
|
||||
"""
|
||||
Setup hook to skip tests marked as slow if the --slow option is not provided.
|
||||
|
||||
Args:
|
||||
item (pytest.Item): The test item object.
|
||||
|
@ -22,11 +22,12 @@ EXPORT_ARGS = [
|
||||
|
||||
|
||||
def run(cmd):
|
||||
# Run a subprocess command with check=True
|
||||
"""Execute a shell command using subprocess."""
|
||||
subprocess.run(cmd.split(), check=True)
|
||||
|
||||
|
||||
def test_special_modes():
|
||||
"""Test various special command modes of YOLO."""
|
||||
run('yolo help')
|
||||
run('yolo checks')
|
||||
run('yolo version')
|
||||
@ -36,31 +37,37 @@ def test_special_modes():
|
||||
|
||||
@pytest.mark.parametrize('task,model,data', TASK_ARGS)
|
||||
def test_train(task, model, data):
|
||||
"""Test YOLO training for a given task, model, and data."""
|
||||
run(f'yolo train {task} model={model}.yaml data={data} imgsz=32 epochs=1 cache=disk')
|
||||
|
||||
|
||||
@pytest.mark.parametrize('task,model,data', TASK_ARGS)
|
||||
def test_val(task, model, data):
|
||||
"""Test YOLO validation for a given task, model, and data."""
|
||||
run(f'yolo val {task} model={WEIGHTS_DIR / model}.pt data={data} imgsz=32 save_txt save_json')
|
||||
|
||||
|
||||
@pytest.mark.parametrize('task,model,data', TASK_ARGS)
|
||||
def test_predict(task, model, data):
|
||||
"""Test YOLO prediction on sample assets for a given task and model."""
|
||||
run(f'yolo predict model={WEIGHTS_DIR / model}.pt source={ASSETS} imgsz=32 save save_crop save_txt')
|
||||
|
||||
|
||||
@pytest.mark.parametrize('model,format', EXPORT_ARGS)
|
||||
def test_export(model, format):
|
||||
"""Test exporting a YOLO model to different formats."""
|
||||
run(f'yolo export model={WEIGHTS_DIR / model}.pt format={format} imgsz=32')
|
||||
|
||||
|
||||
def test_rtdetr(task='detect', model='yolov8n-rtdetr.yaml', data='coco8.yaml'):
|
||||
"""Test the RTDETR functionality with the Ultralytics framework."""
|
||||
# Warning: MUST use imgsz=640
|
||||
run(f'yolo train {task} model={model} data={data} --imgsz= 640 epochs =1, cache = disk') # add coma, spaces to args
|
||||
run(f"yolo predict {task} model={model} source={ASSETS / 'bus.jpg'} imgsz=640 save save_crop save_txt")
|
||||
|
||||
|
||||
def test_fastsam(task='segment', model=WEIGHTS_DIR / 'FastSAM-s.pt', data='coco8-seg.yaml'):
|
||||
"""Test FastSAM segmentation functionality within Ultralytics."""
|
||||
source = ASSETS / 'bus.jpg'
|
||||
|
||||
run(f'yolo segment val {task} model={model} data={data} imgsz=32')
|
||||
@ -97,6 +104,7 @@ def test_fastsam(task='segment', model=WEIGHTS_DIR / 'FastSAM-s.pt', data='coco8
|
||||
|
||||
|
||||
def test_mobilesam():
|
||||
"""Test MobileSAM segmentation functionality using Ultralytics."""
|
||||
from ultralytics import SAM
|
||||
|
||||
# Load the model
|
||||
@ -121,5 +129,6 @@ def test_mobilesam():
|
||||
@pytest.mark.skipif(not CUDA_IS_AVAILABLE, reason='CUDA is not available')
|
||||
@pytest.mark.skipif(CUDA_DEVICE_COUNT < 2, reason='DDP is not available')
|
||||
def test_train_gpu(task, model, data):
|
||||
"""Test YOLO training on GPU(s) for various tasks and models."""
|
||||
run(f'yolo train {task} model={model}.yaml data={data} imgsz=32 epochs=1 device=0') # single GPU
|
||||
run(f'yolo train {task} model={model}.pt data={data} imgsz=32 epochs=1 device=0,1') # multi GPU
|
||||
|
@ -1,4 +1,5 @@
|
||||
# Ultralytics YOLO 🚀, AGPL-3.0 license
|
||||
|
||||
import contextlib
|
||||
|
||||
import pytest
|
||||
@ -17,18 +18,21 @@ BUS = ASSETS / 'bus.jpg'
|
||||
|
||||
|
||||
def test_checks():
|
||||
"""Validate CUDA settings against torch CUDA functions."""
|
||||
assert torch.cuda.is_available() == CUDA_IS_AVAILABLE
|
||||
assert torch.cuda.device_count() == CUDA_DEVICE_COUNT
|
||||
|
||||
|
||||
@pytest.mark.skipif(not CUDA_IS_AVAILABLE, reason='CUDA is not available')
|
||||
def test_train():
|
||||
"""Test model training on a minimal dataset."""
|
||||
device = 0 if CUDA_DEVICE_COUNT == 1 else [0, 1]
|
||||
YOLO(MODEL).train(data=DATA, imgsz=64, epochs=1, device=device) # requires imgsz>=64
|
||||
|
||||
|
||||
@pytest.mark.skipif(not CUDA_IS_AVAILABLE, reason='CUDA is not available')
|
||||
def test_predict_multiple_devices():
|
||||
"""Validate model prediction on multiple devices."""
|
||||
model = YOLO('yolov8n.pt')
|
||||
model = model.cpu()
|
||||
assert str(model.device) == 'cpu'
|
||||
@ -53,6 +57,7 @@ def test_predict_multiple_devices():
|
||||
|
||||
@pytest.mark.skipif(not CUDA_IS_AVAILABLE, reason='CUDA is not available')
|
||||
def test_autobatch():
|
||||
"""Check batch size for YOLO model using autobatch."""
|
||||
from ultralytics.utils.autobatch import check_train_batch_size
|
||||
|
||||
check_train_batch_size(YOLO(MODEL).model.cuda(), imgsz=128, amp=True)
|
||||
@ -60,6 +65,7 @@ def test_autobatch():
|
||||
|
||||
@pytest.mark.skipif(not CUDA_IS_AVAILABLE, reason='CUDA is not available')
|
||||
def test_utils_benchmarks():
|
||||
"""Profile YOLO models for performance benchmarks."""
|
||||
from ultralytics.utils.benchmarks import ProfileModels
|
||||
|
||||
# Pre-export a dynamic engine model to use dynamic inference
|
||||
@ -69,6 +75,7 @@ def test_utils_benchmarks():
|
||||
|
||||
@pytest.mark.skipif(not CUDA_IS_AVAILABLE, reason='CUDA is not available')
|
||||
def test_predict_sam():
|
||||
"""Test SAM model prediction with various prompts."""
|
||||
from ultralytics import SAM
|
||||
from ultralytics.models.sam import Predictor as SAMPredictor
|
||||
|
||||
@ -102,6 +109,7 @@ def test_predict_sam():
|
||||
|
||||
@pytest.mark.skipif(not CUDA_IS_AVAILABLE, reason='CUDA is not available')
|
||||
def test_model_ray_tune():
|
||||
"""Tune YOLO model with Ray optimization library."""
|
||||
with contextlib.suppress(RuntimeError): # RuntimeError may be caused by out-of-memory
|
||||
YOLO('yolov8n-cls.yaml').tune(use_ray=True,
|
||||
data='imagenet10',
|
||||
@ -115,12 +123,14 @@ def test_model_ray_tune():
|
||||
|
||||
@pytest.mark.skipif(not CUDA_IS_AVAILABLE, reason='CUDA is not available')
|
||||
def test_model_tune():
|
||||
"""Tune YOLO model for performance."""
|
||||
YOLO('yolov8n-pose.pt').tune(data='coco8-pose.yaml', plots=False, imgsz=32, epochs=1, iterations=2, device='cpu')
|
||||
YOLO('yolov8n-cls.pt').tune(data='imagenet10', plots=False, imgsz=32, epochs=1, iterations=2, device='cpu')
|
||||
|
||||
|
||||
@pytest.mark.skipif(not CUDA_IS_AVAILABLE, reason='CUDA is not available')
|
||||
def test_pycocotools():
|
||||
"""Validate model predictions using pycocotools."""
|
||||
from ultralytics.models.yolo.detect import DetectionValidator
|
||||
from ultralytics.models.yolo.pose import PoseValidator
|
||||
from ultralytics.models.yolo.segment import SegmentationValidator
|
||||
|
@ -14,10 +14,12 @@ MODEL = WEIGHTS_DIR / 'yolov8n'
|
||||
|
||||
|
||||
def test_func(*args): # noqa
|
||||
"""Test function callback."""
|
||||
print('callback test passed')
|
||||
|
||||
|
||||
def test_export():
|
||||
"""Test model exporting functionality."""
|
||||
exporter = Exporter()
|
||||
exporter.add_callback('on_export_start', test_func)
|
||||
assert test_func in exporter.callbacks['on_export_start'], 'callback test failed'
|
||||
@ -26,6 +28,7 @@ def test_export():
|
||||
|
||||
|
||||
def test_detect():
|
||||
"""Test object detection functionality."""
|
||||
overrides = {'data': 'coco8.yaml', 'model': CFG_DET, 'imgsz': 32, 'epochs': 1, 'save': False}
|
||||
CFG.data = 'coco8.yaml'
|
||||
CFG.imgsz = 32
|
||||
@ -61,6 +64,7 @@ def test_detect():
|
||||
|
||||
|
||||
def test_segment():
|
||||
"""Test image segmentation functionality."""
|
||||
overrides = {'data': 'coco8-seg.yaml', 'model': CFG_SEG, 'imgsz': 32, 'epochs': 1, 'save': False}
|
||||
CFG.data = 'coco8-seg.yaml'
|
||||
CFG.imgsz = 32
|
||||
@ -98,6 +102,7 @@ def test_segment():
|
||||
|
||||
|
||||
def test_classify():
|
||||
"""Test image classification functionality."""
|
||||
overrides = {'data': 'imagenet10', 'model': CFG_CLS, 'imgsz': 32, 'epochs': 1, 'save': False}
|
||||
CFG.data = 'imagenet10'
|
||||
CFG.imgsz = 32
|
||||
|
@ -27,11 +27,13 @@ IS_TMP_WRITEABLE = is_dir_writeable(TMP)
|
||||
|
||||
|
||||
def test_model_forward():
|
||||
"""Test the forward pass of the YOLO model."""
|
||||
model = YOLO(CFG)
|
||||
model(source=None, imgsz=32, augment=True) # also test no source and augment
|
||||
|
||||
|
||||
def test_model_methods():
|
||||
"""Test various methods and properties of the YOLO model."""
|
||||
model = YOLO(MODEL)
|
||||
|
||||
# Model methods
|
||||
@ -51,7 +53,7 @@ def test_model_methods():
|
||||
|
||||
|
||||
def test_model_profile():
|
||||
# Test profile=True model argument
|
||||
"""Test profiling of the YOLO model with 'profile=True' argument."""
|
||||
from ultralytics.nn.tasks import DetectionModel
|
||||
|
||||
model = DetectionModel() # build model
|
||||
@ -61,7 +63,7 @@ def test_model_profile():
|
||||
|
||||
@pytest.mark.skipif(not IS_TMP_WRITEABLE, reason='directory is not writeable')
|
||||
def test_predict_txt():
|
||||
# Write a list of sources (file, dir, glob, recursive glob) to a txt file
|
||||
"""Test YOLO predictions with sources (file, dir, glob, recursive glob) specified in a text file."""
|
||||
txt_file = TMP / 'sources.txt'
|
||||
with open(txt_file, 'w') as f:
|
||||
for x in [ASSETS / 'bus.jpg', ASSETS, ASSETS / '*', ASSETS / '**/*.jpg']:
|
||||
@ -70,6 +72,7 @@ def test_predict_txt():
|
||||
|
||||
|
||||
def test_predict_img():
|
||||
"""Test YOLO prediction on various types of image sources."""
|
||||
model = YOLO(MODEL)
|
||||
seg_model = YOLO(WEIGHTS_DIR / 'yolov8n-seg.pt')
|
||||
cls_model = YOLO(WEIGHTS_DIR / 'yolov8n-cls.pt')
|
||||
@ -105,7 +108,7 @@ def test_predict_img():
|
||||
|
||||
|
||||
def test_predict_grey_and_4ch():
|
||||
# Convert SOURCE to greyscale and 4-ch
|
||||
"""Test YOLO prediction on SOURCE converted to greyscale and 4-channel images."""
|
||||
im = Image.open(SOURCE)
|
||||
directory = TMP / 'im4'
|
||||
directory.mkdir(parents=True, exist_ok=True)
|
||||
@ -132,8 +135,11 @@ def test_predict_grey_and_4ch():
|
||||
@pytest.mark.skipif(not ONLINE, reason='environment is offline')
|
||||
@pytest.mark.skipif(not IS_TMP_WRITEABLE, reason='directory is not writeable')
|
||||
def test_track_stream():
|
||||
# Test YouTube streaming inference (short 10 frame video) with non-default ByteTrack tracker
|
||||
# imgsz=160 required for tracking for higher confidence and better matches
|
||||
"""
|
||||
Test YouTube streaming tracking (short 10 frame video) with non-default ByteTrack tracker.
|
||||
|
||||
Note imgsz=160 required for tracking for higher confidence and better matches
|
||||
"""
|
||||
import yaml
|
||||
|
||||
model = YOLO(MODEL)
|
||||
@ -153,37 +159,44 @@ def test_track_stream():
|
||||
|
||||
|
||||
def test_val():
|
||||
"""Test the validation mode of the YOLO model."""
|
||||
YOLO(MODEL).val(data='coco8.yaml', imgsz=32, save_hybrid=True)
|
||||
|
||||
|
||||
def test_train_scratch():
|
||||
"""Test training the YOLO model from scratch."""
|
||||
model = YOLO(CFG)
|
||||
model.train(data='coco8.yaml', epochs=2, imgsz=32, cache='disk', batch=-1, close_mosaic=1, name='model')
|
||||
model(SOURCE)
|
||||
|
||||
|
||||
def test_train_pretrained():
|
||||
"""Test training the YOLO model from a pre-trained state."""
|
||||
model = YOLO(WEIGHTS_DIR / 'yolov8n-seg.pt')
|
||||
model.train(data='coco8-seg.yaml', epochs=1, imgsz=32, cache='ram', copy_paste=0.5, mixup=0.5, name=0)
|
||||
model(SOURCE)
|
||||
|
||||
|
||||
def test_export_torchscript():
|
||||
"""Test exporting the YOLO model to TorchScript format."""
|
||||
f = YOLO(MODEL).export(format='torchscript', optimize=False)
|
||||
YOLO(f)(SOURCE) # exported model inference
|
||||
|
||||
|
||||
def test_export_onnx():
|
||||
"""Test exporting the YOLO model to ONNX format."""
|
||||
f = YOLO(MODEL).export(format='onnx', dynamic=True)
|
||||
YOLO(f)(SOURCE) # exported model inference
|
||||
|
||||
|
||||
def test_export_openvino():
|
||||
"""Test exporting the YOLO model to OpenVINO format."""
|
||||
f = YOLO(MODEL).export(format='openvino')
|
||||
YOLO(f)(SOURCE) # exported model inference
|
||||
|
||||
|
||||
def test_export_coreml():
|
||||
"""Test exporting the YOLO model to CoreML format."""
|
||||
if not WINDOWS: # RuntimeError: BlobWriter not loaded with coremltools 7.0 on windows
|
||||
if MACOS:
|
||||
f = YOLO(MODEL).export(format='coreml')
|
||||
@ -193,7 +206,11 @@ def test_export_coreml():
|
||||
|
||||
|
||||
def test_export_tflite(enabled=False):
|
||||
# TF suffers from install conflicts on Windows and macOS
|
||||
"""
|
||||
Test exporting the YOLO model to TFLite format.
|
||||
|
||||
Note TF suffers from install conflicts on Windows and macOS.
|
||||
"""
|
||||
if enabled and LINUX:
|
||||
model = YOLO(MODEL)
|
||||
f = model.export(format='tflite')
|
||||
@ -201,7 +218,11 @@ def test_export_tflite(enabled=False):
|
||||
|
||||
|
||||
def test_export_pb(enabled=False):
|
||||
# TF suffers from install conflicts on Windows and macOS
|
||||
"""
|
||||
Test exporting the YOLO model to *.pb format.
|
||||
|
||||
Note TF suffers from install conflicts on Windows and macOS.
|
||||
"""
|
||||
if enabled and LINUX:
|
||||
model = YOLO(MODEL)
|
||||
f = model.export(format='pb')
|
||||
@ -209,18 +230,24 @@ def test_export_pb(enabled=False):
|
||||
|
||||
|
||||
def test_export_paddle(enabled=False):
|
||||
# Paddle protobuf requirements conflicting with onnx protobuf requirements
|
||||
"""
|
||||
Test exporting the YOLO model to Paddle format.
|
||||
|
||||
Note Paddle protobuf requirements conflicting with onnx protobuf requirements.
|
||||
"""
|
||||
if enabled:
|
||||
YOLO(MODEL).export(format='paddle')
|
||||
|
||||
|
||||
@pytest.mark.slow
|
||||
def test_export_ncnn():
|
||||
"""Test exporting the YOLO model to NCNN format."""
|
||||
f = YOLO(MODEL).export(format='ncnn')
|
||||
YOLO(f)(SOURCE) # exported model inference
|
||||
|
||||
|
||||
def test_all_model_yamls():
|
||||
"""Test YOLO model creation for all available YAML configurations."""
|
||||
for m in (ROOT / 'cfg' / 'models').rglob('*.yaml'):
|
||||
if 'rtdetr' in m.name:
|
||||
if TORCH_1_9: # torch<=1.8 issue - TypeError: __init__() got an unexpected keyword argument 'batch_first'
|
||||
@ -230,6 +257,7 @@ def test_all_model_yamls():
|
||||
|
||||
|
||||
def test_workflow():
|
||||
"""Test the complete workflow including training, validation, prediction, and exporting."""
|
||||
model = YOLO(MODEL)
|
||||
model.train(data='coco8.yaml', epochs=1, imgsz=32, optimizer='SGD')
|
||||
model.val(imgsz=32)
|
||||
@ -238,12 +266,14 @@ def test_workflow():
|
||||
|
||||
|
||||
def test_predict_callback_and_setup():
|
||||
# Test callback addition for prediction
|
||||
def on_predict_batch_end(predictor): # results -> List[batch_size]
|
||||
"""Test callback functionality during YOLO prediction."""
|
||||
|
||||
def on_predict_batch_end(predictor):
|
||||
"""Callback function that handles operations at the end of a prediction batch."""
|
||||
path, im0s, _, _ = predictor.batch
|
||||
im0s = im0s if isinstance(im0s, list) else [im0s]
|
||||
bs = [predictor.dataset.bs for _ in range(len(path))]
|
||||
predictor.results = zip(predictor.results, im0s, bs)
|
||||
predictor.results = zip(predictor.results, im0s, bs) # results is List[batch_size]
|
||||
|
||||
model = YOLO(MODEL)
|
||||
model.add_callback('on_predict_batch_end', on_predict_batch_end)
|
||||
@ -259,6 +289,7 @@ def test_predict_callback_and_setup():
|
||||
|
||||
|
||||
def test_results():
|
||||
"""Test various result formats for the YOLO model."""
|
||||
for m in 'yolov8n-pose.pt', 'yolov8n-seg.pt', 'yolov8n.pt', 'yolov8n-cls.pt':
|
||||
results = YOLO(WEIGHTS_DIR / m)([SOURCE, SOURCE], imgsz=160)
|
||||
for r in results:
|
||||
@ -274,7 +305,7 @@ def test_results():
|
||||
|
||||
@pytest.mark.skipif(not ONLINE, reason='environment is offline')
|
||||
def test_data_utils():
|
||||
# Test functions in ultralytics/data/utils.py
|
||||
"""Test utility functions in ultralytics/data/utils.py."""
|
||||
from ultralytics.data.utils import HUBDatasetStats, autosplit
|
||||
from ultralytics.utils.downloads import zip_directory
|
||||
|
||||
@ -294,7 +325,7 @@ def test_data_utils():
|
||||
|
||||
@pytest.mark.skipif(not ONLINE, reason='environment is offline')
|
||||
def test_data_converter():
|
||||
# Test dataset converters
|
||||
"""Test dataset converters."""
|
||||
from ultralytics.data.converter import coco80_to_coco91_class, convert_coco
|
||||
|
||||
file = 'instances_val2017.json'
|
||||
@ -304,6 +335,7 @@ def test_data_converter():
|
||||
|
||||
|
||||
def test_data_annotator():
|
||||
"""Test automatic data annotation."""
|
||||
from ultralytics.data.annotator import auto_annotate
|
||||
|
||||
auto_annotate(ASSETS,
|
||||
@ -313,7 +345,7 @@ def test_data_annotator():
|
||||
|
||||
|
||||
def test_events():
|
||||
# Test event sending
|
||||
"""Test event sending functionality."""
|
||||
from ultralytics.hub.utils import Events
|
||||
|
||||
events = Events()
|
||||
@ -324,6 +356,7 @@ def test_events():
|
||||
|
||||
|
||||
def test_cfg_init():
|
||||
"""Test configuration initialization utilities."""
|
||||
from ultralytics.cfg import check_dict_alignment, copy_default_cfg, smart_value
|
||||
|
||||
with contextlib.suppress(SyntaxError):
|
||||
@ -334,6 +367,7 @@ def test_cfg_init():
|
||||
|
||||
|
||||
def test_utils_init():
|
||||
"""Test initialization utilities."""
|
||||
from ultralytics.utils import get_git_branch, get_git_origin_url, get_ubuntu_version, is_github_actions_ci
|
||||
|
||||
get_ubuntu_version()
|
||||
@ -343,6 +377,7 @@ def test_utils_init():
|
||||
|
||||
|
||||
def test_utils_checks():
|
||||
"""Test various utility checks."""
|
||||
checks.check_yolov5u_filename('yolov5n.pt')
|
||||
checks.git_describe(ROOT)
|
||||
checks.check_requirements() # check requirements.txt
|
||||
@ -354,12 +389,14 @@ def test_utils_checks():
|
||||
|
||||
|
||||
def test_utils_benchmarks():
|
||||
"""Test model benchmarking."""
|
||||
from ultralytics.utils.benchmarks import ProfileModels
|
||||
|
||||
ProfileModels(['yolov8n.yaml'], imgsz=32, min_time=1, num_timed_runs=3, num_warmup_runs=1).profile()
|
||||
|
||||
|
||||
def test_utils_torchutils():
|
||||
"""Test Torch utility functions."""
|
||||
from ultralytics.nn.modules.conv import Conv
|
||||
from ultralytics.utils.torch_utils import get_flops_with_torch_profiler, profile, time_sync
|
||||
|
||||
@ -373,12 +410,14 @@ def test_utils_torchutils():
|
||||
|
||||
@pytest.mark.skipif(not ONLINE, reason='environment is offline')
|
||||
def test_utils_downloads():
|
||||
"""Test file download utilities."""
|
||||
from ultralytics.utils.downloads import get_google_drive_file_info
|
||||
|
||||
get_google_drive_file_info('https://drive.google.com/file/d/1cqT-cJgANNrhIHCrEufUYhQ4RqiWG_lJ/view?usp=drive_link')
|
||||
|
||||
|
||||
def test_utils_ops():
|
||||
"""Test various operations utilities."""
|
||||
from ultralytics.utils.ops import (ltwh2xywh, ltwh2xyxy, make_divisible, xywh2ltwh, xywh2xyxy, xywhn2xyxy,
|
||||
xywhr2xyxyxyxy, xyxy2ltwh, xyxy2xywh, xyxy2xywhn, xyxyxyxy2xywhr)
|
||||
|
||||
@ -396,6 +435,7 @@ def test_utils_ops():
|
||||
|
||||
|
||||
def test_utils_files():
|
||||
"""Test file handling utilities."""
|
||||
from ultralytics.utils.files import file_age, file_date, get_latest_run, spaces_in_path
|
||||
|
||||
file_age(SOURCE)
|
||||
@ -409,6 +449,7 @@ def test_utils_files():
|
||||
|
||||
|
||||
def test_nn_modules_conv():
|
||||
"""Test Convolutional Neural Network modules."""
|
||||
from ultralytics.nn.modules.conv import CBAM, Conv2, ConvTranspose, DWConvTranspose2d, Focus
|
||||
|
||||
c1, c2 = 8, 16 # input and output channels
|
||||
@ -427,6 +468,7 @@ def test_nn_modules_conv():
|
||||
|
||||
|
||||
def test_nn_modules_block():
|
||||
"""Test Neural Network block modules."""
|
||||
from ultralytics.nn.modules.block import C1, C3TR, BottleneckCSP, C3Ghost, C3x
|
||||
|
||||
c1, c2 = 8, 16 # input and output channels
|
||||
@ -442,6 +484,7 @@ def test_nn_modules_block():
|
||||
|
||||
@pytest.mark.skipif(not ONLINE, reason='environment is offline')
|
||||
def test_hub():
|
||||
"""Test Ultralytics HUB functionalities."""
|
||||
from ultralytics.hub import export_fmts_hub, logout
|
||||
from ultralytics.hub.utils import smart_request
|
||||
|
||||
@ -453,6 +496,7 @@ def test_hub():
|
||||
@pytest.mark.slow
|
||||
@pytest.mark.skipif(not ONLINE, reason='environment is offline')
|
||||
def test_triton():
|
||||
"""Test NVIDIA Triton Server functionalities."""
|
||||
checks.check_requirements('tritonclient[all]')
|
||||
import subprocess
|
||||
import time
|
||||
|
@ -180,8 +180,8 @@ def _handle_deprecation(custom):
|
||||
|
||||
def check_dict_alignment(base: Dict, custom: Dict, e=None):
|
||||
"""
|
||||
This function checks for any mismatched keys between a custom configuration list and a base configuration list.
|
||||
If any mismatched keys are found, the function prints out similar keys from the base list and exits the program.
|
||||
This function checks for any mismatched keys between a custom configuration list and a base configuration list. If
|
||||
any mismatched keys are found, the function prints out similar keys from the base list and exits the program.
|
||||
|
||||
Args:
|
||||
custom (dict): a dictionary of custom configuration options
|
||||
@ -205,9 +205,8 @@ def check_dict_alignment(base: Dict, custom: Dict, e=None):
|
||||
|
||||
def merge_equals_args(args: List[str]) -> List[str]:
|
||||
"""
|
||||
Merges arguments around isolated '=' args in a list of strings.
|
||||
The function considers cases where the first argument ends with '=' or the second starts with '=',
|
||||
as well as when the middle one is an equals sign.
|
||||
Merges arguments around isolated '=' args in a list of strings. The function considers cases where the first
|
||||
argument ends with '=' or the second starts with '=', as well as when the middle one is an equals sign.
|
||||
|
||||
Args:
|
||||
args (List[str]): A list of strings where each element is an argument.
|
||||
|
@ -20,16 +20,30 @@ from .utils import polygons2masks, polygons2masks_overlap
|
||||
|
||||
# TODO: we might need a BaseTransform to make all these augments be compatible with both classification and semantic
|
||||
class BaseTransform:
|
||||
"""
|
||||
Base class for image transformations.
|
||||
|
||||
This is a generic transformation class that can be extended for specific image processing needs.
|
||||
The class is designed to be compatible with both classification and semantic segmentation tasks.
|
||||
|
||||
Methods:
|
||||
__init__: Initializes the BaseTransform object.
|
||||
apply_image: Applies image transformation to labels.
|
||||
apply_instances: Applies transformations to object instances in labels.
|
||||
apply_semantic: Applies semantic segmentation to an image.
|
||||
__call__: Applies all label transformations to an image, instances, and semantic masks.
|
||||
"""
|
||||
|
||||
def __init__(self) -> None:
|
||||
"""Initializes the BaseTransform object."""
|
||||
pass
|
||||
|
||||
def apply_image(self, labels):
|
||||
"""Applies image transformation to labels."""
|
||||
"""Applies image transformations to labels."""
|
||||
pass
|
||||
|
||||
def apply_instances(self, labels):
|
||||
"""Applies transformations to input 'labels' and returns object instances."""
|
||||
"""Applies transformations to object instances in labels."""
|
||||
pass
|
||||
|
||||
def apply_semantic(self, labels):
|
||||
@ -37,13 +51,14 @@ class BaseTransform:
|
||||
pass
|
||||
|
||||
def __call__(self, labels):
|
||||
"""Applies label transformations to an image, instances and semantic masks."""
|
||||
"""Applies all label transformations to an image, instances, and semantic masks."""
|
||||
self.apply_image(labels)
|
||||
self.apply_instances(labels)
|
||||
self.apply_semantic(labels)
|
||||
|
||||
|
||||
class Compose:
|
||||
"""Class for composing multiple image transformations."""
|
||||
|
||||
def __init__(self, transforms):
|
||||
"""Initializes the Compose object with a list of transforms."""
|
||||
@ -60,18 +75,23 @@ class Compose:
|
||||
self.transforms.append(transform)
|
||||
|
||||
def tolist(self):
|
||||
"""Converts list of transforms to a standard Python list."""
|
||||
"""Converts the list of transforms to a standard Python list."""
|
||||
return self.transforms
|
||||
|
||||
def __repr__(self):
|
||||
"""Return string representation of object."""
|
||||
"""Returns a string representation of the object."""
|
||||
return f"{self.__class__.__name__}({', '.join([f'{t}' for t in self.transforms])})"
|
||||
|
||||
|
||||
class BaseMixTransform:
|
||||
"""This implementation is from mmyolo."""
|
||||
"""
|
||||
Class for base mix (MixUp/Mosaic) transformations.
|
||||
|
||||
This implementation is from mmyolo.
|
||||
"""
|
||||
|
||||
def __init__(self, dataset, pre_transform=None, p=0.0) -> None:
|
||||
"""Initializes the BaseMixTransform object with dataset, pre_transform, and probability."""
|
||||
self.dataset = dataset
|
||||
self.pre_transform = pre_transform
|
||||
self.p = p
|
||||
@ -262,8 +282,10 @@ class Mosaic(BaseMixTransform):
|
||||
|
||||
|
||||
class MixUp(BaseMixTransform):
|
||||
"""Class for applying MixUp augmentation to the dataset."""
|
||||
|
||||
def __init__(self, dataset, pre_transform=None, p=0.0) -> None:
|
||||
"""Initializes MixUp object with dataset, pre_transform, and probability of applying MixUp."""
|
||||
super().__init__(dataset=dataset, pre_transform=pre_transform, p=p)
|
||||
|
||||
def get_indexes(self):
|
||||
@ -271,7 +293,7 @@ class MixUp(BaseMixTransform):
|
||||
return random.randint(0, len(self.dataset) - 1)
|
||||
|
||||
def _mix_transform(self, labels):
|
||||
"""Applies MixUp augmentation https://arxiv.org/pdf/1710.09412.pdf."""
|
||||
"""Applies MixUp augmentation as per https://arxiv.org/pdf/1710.09412.pdf."""
|
||||
r = np.random.beta(32.0, 32.0) # mixup ratio, alpha=beta=32.0
|
||||
labels2 = labels['mix_labels'][0]
|
||||
labels['img'] = (labels['img'] * r + labels2['img'] * (1 - r)).astype(np.uint8)
|
||||
@ -281,6 +303,28 @@ class MixUp(BaseMixTransform):
|
||||
|
||||
|
||||
class RandomPerspective:
|
||||
"""
|
||||
Implements random perspective and affine transformations on images and corresponding bounding boxes, segments, and
|
||||
keypoints. These transformations include rotation, translation, scaling, and shearing. The class also offers the
|
||||
option to apply these transformations conditionally with a specified probability.
|
||||
|
||||
Attributes:
|
||||
degrees (float): Degree range for random rotations.
|
||||
translate (float): Fraction of total width and height for random translation.
|
||||
scale (float): Scaling factor interval, e.g., a scale factor of 0.1 allows a resize between 90%-110%.
|
||||
shear (float): Shear intensity (angle in degrees).
|
||||
perspective (float): Perspective distortion factor.
|
||||
border (tuple): Tuple specifying mosaic border.
|
||||
pre_transform (callable): A function/transform to apply to the image before starting the random transformation.
|
||||
|
||||
Methods:
|
||||
affine_transform(img, border): Applies a series of affine transformations to the image.
|
||||
apply_bboxes(bboxes, M): Transforms bounding boxes using the calculated affine matrix.
|
||||
apply_segments(segments, M): Transforms segments and generates new bounding boxes.
|
||||
apply_keypoints(keypoints, M): Transforms keypoints.
|
||||
__call__(labels): Main method to apply transformations to both images and their corresponding annotations.
|
||||
box_candidates(box1, box2): Filters out bounding boxes that don't meet certain criteria post-transformation.
|
||||
"""
|
||||
|
||||
def __init__(self,
|
||||
degrees=0.0,
|
||||
@ -290,17 +334,31 @@ class RandomPerspective:
|
||||
perspective=0.0,
|
||||
border=(0, 0),
|
||||
pre_transform=None):
|
||||
"""Initializes RandomPerspective object with transformation parameters."""
|
||||
|
||||
self.degrees = degrees
|
||||
self.translate = translate
|
||||
self.scale = scale
|
||||
self.shear = shear
|
||||
self.perspective = perspective
|
||||
# Mosaic border
|
||||
self.border = border
|
||||
self.border = border # mosaic border
|
||||
self.pre_transform = pre_transform
|
||||
|
||||
def affine_transform(self, img, border):
|
||||
"""Center."""
|
||||
"""
|
||||
Applies a sequence of affine transformations centered around the image center.
|
||||
|
||||
Args:
|
||||
img (ndarray): Input image.
|
||||
border (tuple): Border dimensions.
|
||||
|
||||
Returns:
|
||||
img (ndarray): Transformed image.
|
||||
M (ndarray): Transformation matrix.
|
||||
s (float): Scale factor.
|
||||
"""
|
||||
|
||||
# Center
|
||||
C = np.eye(3, dtype=np.float32)
|
||||
|
||||
C[0, 2] = -img.shape[1] / 2 # x translation (pixels)
|
||||
@ -462,8 +520,22 @@ class RandomPerspective:
|
||||
labels['resized_shape'] = img.shape[:2]
|
||||
return labels
|
||||
|
||||
def box_candidates(self, box1, box2, wh_thr=2, ar_thr=100, area_thr=0.1, eps=1e-16): # box1(4,n), box2(4,n)
|
||||
# Compute box candidates: box1 before augment, box2 after augment, wh_thr (pixels), aspect_ratio_thr, area_ratio
|
||||
def box_candidates(self, box1, box2, wh_thr=2, ar_thr=100, area_thr=0.1, eps=1e-16):
|
||||
"""
|
||||
Compute box candidates based on a set of thresholds. This method compares the characteristics of the boxes
|
||||
before and after augmentation to decide whether a box is a candidate for further processing.
|
||||
|
||||
Args:
|
||||
box1 (numpy.ndarray): The 4,n bounding box before augmentation, represented as [x1, y1, x2, y2].
|
||||
box2 (numpy.ndarray): The 4,n bounding box after augmentation, represented as [x1, y1, x2, y2].
|
||||
wh_thr (float, optional): The width and height threshold in pixels. Default is 2.
|
||||
ar_thr (float, optional): The aspect ratio threshold. Default is 100.
|
||||
area_thr (float, optional): The area ratio threshold. Default is 0.1.
|
||||
eps (float, optional): A small epsilon value to prevent division by zero. Default is 1e-16.
|
||||
|
||||
Returns:
|
||||
(numpy.ndarray): A boolean array indicating which boxes are candidates based on the given thresholds.
|
||||
"""
|
||||
w1, h1 = box1[2] - box1[0], box1[3] - box1[1]
|
||||
w2, h2 = box2[2] - box2[0], box2[3] - box2[1]
|
||||
ar = np.maximum(w2 / (h2 + eps), h2 / (w2 + eps)) # aspect ratio
|
||||
@ -471,14 +543,32 @@ class RandomPerspective:
|
||||
|
||||
|
||||
class RandomHSV:
|
||||
"""
|
||||
This class is responsible for performing random adjustments to the Hue, Saturation, and Value (HSV) channels of an
|
||||
image.
|
||||
|
||||
The adjustments are random but within limits set by hgain, sgain, and vgain.
|
||||
"""
|
||||
|
||||
def __init__(self, hgain=0.5, sgain=0.5, vgain=0.5) -> None:
|
||||
"""
|
||||
Initialize RandomHSV class with gains for each HSV channel.
|
||||
|
||||
Args:
|
||||
hgain (float, optional): Maximum variation for hue. Default is 0.5.
|
||||
sgain (float, optional): Maximum variation for saturation. Default is 0.5.
|
||||
vgain (float, optional): Maximum variation for value. Default is 0.5.
|
||||
"""
|
||||
self.hgain = hgain
|
||||
self.sgain = sgain
|
||||
self.vgain = vgain
|
||||
|
||||
def __call__(self, labels):
|
||||
"""Applies image HSV augmentation"""
|
||||
"""
|
||||
Applies random HSV augmentation to an image within the predefined limits.
|
||||
|
||||
The modified image replaces the original image in the input 'labels' dict.
|
||||
"""
|
||||
img = labels['img']
|
||||
if self.hgain or self.sgain or self.vgain:
|
||||
r = np.random.uniform(-1, 1, 3) * [self.hgain, self.sgain, self.vgain] + 1 # random gains
|
||||
@ -496,9 +586,22 @@ class RandomHSV:
|
||||
|
||||
|
||||
class RandomFlip:
|
||||
"""Applies random horizontal or vertical flip to an image with a given probability."""
|
||||
"""
|
||||
Applies a random horizontal or vertical flip to an image with a given probability.
|
||||
|
||||
Also updates any instances (bounding boxes, keypoints, etc.) accordingly.
|
||||
"""
|
||||
|
||||
def __init__(self, p=0.5, direction='horizontal', flip_idx=None) -> None:
|
||||
"""
|
||||
Initializes the RandomFlip class with probability and direction.
|
||||
|
||||
Args:
|
||||
p (float, optional): The probability of applying the flip. Must be between 0 and 1. Default is 0.5.
|
||||
direction (str, optional): The direction to apply the flip. Must be 'horizontal' or 'vertical'.
|
||||
Default is 'horizontal'.
|
||||
flip_idx (array-like, optional): Index mapping for flipping keypoints, if any.
|
||||
"""
|
||||
assert direction in ['horizontal', 'vertical'], f'Support direction `horizontal` or `vertical`, got {direction}'
|
||||
assert 0 <= p <= 1.0
|
||||
|
||||
@ -507,7 +610,16 @@ class RandomFlip:
|
||||
self.flip_idx = flip_idx
|
||||
|
||||
def __call__(self, labels):
|
||||
"""Resize image and padding for detection, instance segmentation, pose."""
|
||||
"""
|
||||
Applies random flip to an image and updates any instances like bounding boxes or keypoints accordingly.
|
||||
|
||||
Args:
|
||||
labels (dict): A dictionary containing the keys 'img' and 'instances'. 'img' is the image to be flipped.
|
||||
'instances' is an object containing bounding boxes and optionally keypoints.
|
||||
|
||||
Returns:
|
||||
(dict): The same dict with the flipped image and updated instances under the 'img' and 'instances' keys.
|
||||
"""
|
||||
img = labels['img']
|
||||
instances = labels.pop('instances')
|
||||
instances.convert_bbox(format='xywh')
|
||||
@ -599,12 +711,38 @@ class LetterBox:
|
||||
|
||||
|
||||
class CopyPaste:
|
||||
"""
|
||||
Implements the Copy-Paste augmentation as described in the paper https://arxiv.org/abs/2012.07177. This class is
|
||||
responsible for applying the Copy-Paste augmentation on images and their corresponding instances.
|
||||
"""
|
||||
|
||||
def __init__(self, p=0.5) -> None:
|
||||
"""
|
||||
Initializes the CopyPaste class with a given probability.
|
||||
|
||||
Args:
|
||||
p (float, optional): The probability of applying the Copy-Paste augmentation. Must be between 0 and 1.
|
||||
Default is 0.5.
|
||||
"""
|
||||
self.p = p
|
||||
|
||||
def __call__(self, labels):
|
||||
"""Implement Copy-Paste augmentation https://arxiv.org/abs/2012.07177, labels as nx5 np.array(cls, xyxy)."""
|
||||
"""
|
||||
Applies the Copy-Paste augmentation to the given image and instances.
|
||||
|
||||
Args:
|
||||
labels (dict): A dictionary containing:
|
||||
- 'img': The image to augment.
|
||||
- 'cls': Class labels associated with the instances.
|
||||
- 'instances': Object containing bounding boxes, and optionally, keypoints and segments.
|
||||
|
||||
Returns:
|
||||
(dict): Dict with augmented image and updated instances under the 'img', 'cls', and 'instances' keys.
|
||||
|
||||
Notes:
|
||||
1. Instances are expected to have 'segments' as one of their attributes for this augmentation to work.
|
||||
2. This method modifies the input dictionary 'labels' in place.
|
||||
"""
|
||||
im = labels['img']
|
||||
cls = labels['cls']
|
||||
h, w = im.shape[:2]
|
||||
@ -639,9 +777,13 @@ class CopyPaste:
|
||||
|
||||
|
||||
class Albumentations:
|
||||
"""Albumentations transformations. Optional, uninstall package to disable.
|
||||
Applies Blur, Median Blur, convert to grayscale, Contrast Limited Adaptive Histogram Equalization,
|
||||
random change of brightness and contrast, RandomGamma and lowering of image quality by compression."""
|
||||
"""
|
||||
Albumentations transformations.
|
||||
|
||||
Optional, uninstall package to disable. Applies Blur, Median Blur, convert to grayscale, Contrast Limited Adaptive
|
||||
Histogram Equalization, random change of brightness and contrast, RandomGamma and lowering of image quality by
|
||||
compression.
|
||||
"""
|
||||
|
||||
def __init__(self, p=1.0):
|
||||
"""Initialize the transform object for YOLO bbox formatted params."""
|
||||
@ -690,6 +832,19 @@ class Albumentations:
|
||||
|
||||
# TODO: technically this is not an augmentation, maybe we should put this to another files
|
||||
class Format:
|
||||
"""
|
||||
Formats image annotations for object detection, instance segmentation, and pose estimation tasks. The class
|
||||
standardizes the image and instance annotations to be used by the `collate_fn` in PyTorch DataLoader.
|
||||
|
||||
Attributes:
|
||||
bbox_format (str): Format for bounding boxes. Default is 'xywh'.
|
||||
normalize (bool): Whether to normalize bounding boxes. Default is True.
|
||||
return_mask (bool): Return instance masks for segmentation. Default is False.
|
||||
return_keypoint (bool): Return keypoints for pose estimation. Default is False.
|
||||
mask_ratio (int): Downsample ratio for masks. Default is 4.
|
||||
mask_overlap (bool): Whether to overlap masks. Default is True.
|
||||
batch_idx (bool): Keep batch indexes. Default is True.
|
||||
"""
|
||||
|
||||
def __init__(self,
|
||||
bbox_format='xywh',
|
||||
@ -699,6 +854,7 @@ class Format:
|
||||
mask_ratio=4,
|
||||
mask_overlap=True,
|
||||
batch_idx=True):
|
||||
"""Initializes the Format class with given parameters."""
|
||||
self.bbox_format = bbox_format
|
||||
self.normalize = normalize
|
||||
self.return_mask = return_mask # set False when training detection only
|
||||
@ -746,7 +902,7 @@ class Format:
|
||||
return img
|
||||
|
||||
def _format_segments(self, instances, cls, w, h):
|
||||
"""convert polygon points to bitmap."""
|
||||
"""Convert polygon points to bitmap."""
|
||||
segments = instances.segments
|
||||
if self.mask_overlap:
|
||||
masks, sorted_idx = polygons2masks_overlap((h, w), segments, downsample_ratio=self.mask_ratio)
|
||||
@ -851,35 +1007,75 @@ def classify_albumentations(
|
||||
|
||||
|
||||
class ClassifyLetterBox:
|
||||
"""YOLOv8 LetterBox class for image preprocessing, i.e. T.Compose([LetterBox(size), ToTensor()])"""
|
||||
"""
|
||||
YOLOv8 LetterBox class for image preprocessing, designed to be part of a transformation pipeline, e.g.,
|
||||
T.Compose([LetterBox(size), ToTensor()]).
|
||||
|
||||
Attributes:
|
||||
h (int): Target height of the image.
|
||||
w (int): Target width of the image.
|
||||
auto (bool): If True, automatically solves for short side using stride.
|
||||
stride (int): The stride value, used when 'auto' is True.
|
||||
"""
|
||||
|
||||
def __init__(self, size=(640, 640), auto=False, stride=32):
|
||||
"""Resizes image and crops it to center with max dimensions 'h' and 'w'."""
|
||||
"""
|
||||
Initializes the ClassifyLetterBox class with a target size, auto-flag, and stride.
|
||||
|
||||
Args:
|
||||
size (Union[int, Tuple[int, int]]): The target dimensions (height, width) for the letterbox.
|
||||
auto (bool): If True, automatically calculates the short side based on stride.
|
||||
stride (int): The stride value, used when 'auto' is True.
|
||||
"""
|
||||
super().__init__()
|
||||
self.h, self.w = (size, size) if isinstance(size, int) else size
|
||||
self.auto = auto # pass max size integer, automatically solve for short side using stride
|
||||
self.stride = stride # used with auto
|
||||
|
||||
def __call__(self, im): # im = np.array HWC
|
||||
def __call__(self, im):
|
||||
"""
|
||||
Resizes the image and pads it with a letterbox method.
|
||||
|
||||
Args:
|
||||
im (numpy.ndarray): The input image as a numpy array of shape HWC.
|
||||
|
||||
Returns:
|
||||
(numpy.ndarray): The letterboxed and resized image as a numpy array.
|
||||
"""
|
||||
imh, imw = im.shape[:2]
|
||||
r = min(self.h / imh, self.w / imw) # ratio of new/old
|
||||
h, w = round(imh * r), round(imw * r) # resized image
|
||||
r = min(self.h / imh, self.w / imw) # ratio of new/old dimensions
|
||||
h, w = round(imh * r), round(imw * r) # resized image dimensions
|
||||
|
||||
# Calculate padding dimensions
|
||||
hs, ws = (math.ceil(x / self.stride) * self.stride for x in (h, w)) if self.auto else (self.h, self.w)
|
||||
top, left = round((hs - h) / 2 - 0.1), round((ws - w) / 2 - 0.1)
|
||||
|
||||
# Create padded image
|
||||
im_out = np.full((hs, ws, 3), 114, dtype=im.dtype)
|
||||
im_out[top:top + h, left:left + w] = cv2.resize(im, (w, h), interpolation=cv2.INTER_LINEAR)
|
||||
return im_out
|
||||
|
||||
|
||||
class CenterCrop:
|
||||
"""YOLOv8 CenterCrop class for image preprocessing, i.e. T.Compose([CenterCrop(size), ToTensor()])"""
|
||||
"""YOLOv8 CenterCrop class for image preprocessing, designed to be part of a transformation pipeline, e.g.,
|
||||
T.Compose([CenterCrop(size), ToTensor()]).
|
||||
"""
|
||||
|
||||
def __init__(self, size=640):
|
||||
"""Converts an image from numpy array to PyTorch tensor."""
|
||||
super().__init__()
|
||||
self.h, self.w = (size, size) if isinstance(size, int) else size
|
||||
|
||||
def __call__(self, im): # im = np.array HWC
|
||||
def __call__(self, im):
|
||||
"""
|
||||
Resizes and crops the center of the image using a letterbox method.
|
||||
|
||||
Args:
|
||||
im (numpy.ndarray): The input image as a numpy array of shape HWC.
|
||||
|
||||
Returns:
|
||||
(numpy.ndarray): The center-cropped and resized image as a numpy array.
|
||||
"""
|
||||
imh, imw = im.shape[:2]
|
||||
m = min(imh, imw) # min dimension
|
||||
top, left = (imh - m) // 2, (imw - m) // 2
|
||||
@ -887,14 +1083,23 @@ class CenterCrop:
|
||||
|
||||
|
||||
class ToTensor:
|
||||
"""YOLOv8 ToTensor class for image preprocessing, i.e. T.Compose([LetterBox(size), ToTensor()])."""
|
||||
"""YOLOv8 ToTensor class for image preprocessing, i.e., T.Compose([LetterBox(size), ToTensor()])."""
|
||||
|
||||
def __init__(self, half=False):
|
||||
"""Initialize YOLOv8 ToTensor object with optional half-precision support."""
|
||||
super().__init__()
|
||||
self.half = half
|
||||
|
||||
def __call__(self, im): # im = np.array HWC in BGR order
|
||||
def __call__(self, im):
|
||||
"""
|
||||
Transforms an image from a numpy array to a PyTorch tensor, applying optional half-precision and normalization.
|
||||
|
||||
Args:
|
||||
im (numpy.ndarray): Input image as a numpy array with shape (H, W, C) in BGR order.
|
||||
|
||||
Returns:
|
||||
(torch.Tensor): The transformed image as a PyTorch tensor in float32 or float16, normalized to [0, 1].
|
||||
"""
|
||||
im = np.ascontiguousarray(im.transpose((2, 0, 1))[::-1]) # HWC to CHW -> BGR to RGB -> contiguous
|
||||
im = torch.from_numpy(im) # to torch
|
||||
im = im.half() if self.half else im.float() # uint8 to fp16/32
|
||||
|
@ -62,6 +62,7 @@ class BaseDataset(Dataset):
|
||||
classes=None,
|
||||
fraction=1.0):
|
||||
super().__init__()
|
||||
"""Initialize BaseDataset with given configuration and options."""
|
||||
self.img_path = img_path
|
||||
self.imgsz = imgsz
|
||||
self.augment = augment
|
||||
@ -256,7 +257,7 @@ class BaseDataset(Dataset):
|
||||
return len(self.labels)
|
||||
|
||||
def update_labels_info(self, label):
|
||||
"""custom your label format here."""
|
||||
"""Custom your label format here."""
|
||||
return label
|
||||
|
||||
def build_transforms(self, hyp=None):
|
||||
|
@ -20,7 +20,11 @@ from .utils import PIN_MEMORY
|
||||
|
||||
|
||||
class InfiniteDataLoader(dataloader.DataLoader):
|
||||
"""Dataloader that reuses workers. Uses same syntax as vanilla DataLoader."""
|
||||
"""
|
||||
Dataloader that reuses workers.
|
||||
|
||||
Uses same syntax as vanilla DataLoader.
|
||||
"""
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
"""Dataloader that infinitely recycles workers, inherits from DataLoader."""
|
||||
@ -38,7 +42,9 @@ class InfiniteDataLoader(dataloader.DataLoader):
|
||||
yield next(self.iterator)
|
||||
|
||||
def reset(self):
|
||||
"""Reset iterator.
|
||||
"""
|
||||
Reset iterator.
|
||||
|
||||
This is useful when we want to modify settings of dataset while training.
|
||||
"""
|
||||
self.iterator = self._get_iterator()
|
||||
@ -70,7 +76,7 @@ def seed_worker(worker_id): # noqa
|
||||
|
||||
|
||||
def build_yolo_dataset(cfg, img_path, batch, data, mode='train', rect=False, stride=32):
|
||||
"""Build YOLO Dataset"""
|
||||
"""Build YOLO Dataset."""
|
||||
return YOLODataset(
|
||||
img_path=img_path,
|
||||
imgsz=cfg.imgsz,
|
||||
|
@ -12,7 +12,8 @@ from ultralytics.utils import TQDM
|
||||
|
||||
|
||||
def coco91_to_coco80_class():
|
||||
"""Converts 91-index COCO class IDs to 80-index COCO class IDs.
|
||||
"""
|
||||
Converts 91-index COCO class IDs to 80-index COCO class IDs.
|
||||
|
||||
Returns:
|
||||
(list): A list of 91 class IDs where the index represents the 80-index class ID and the value is the
|
||||
@ -51,7 +52,8 @@ def convert_coco(labels_dir='../coco/annotations/',
|
||||
use_segments=False,
|
||||
use_keypoints=False,
|
||||
cls91to80=True):
|
||||
"""Converts COCO dataset annotations to a format suitable for training YOLOv5 models.
|
||||
"""
|
||||
Converts COCO dataset annotations to a format suitable for training YOLOv5 models.
|
||||
|
||||
Args:
|
||||
labels_dir (str, optional): Path to directory containing COCO dataset annotation files.
|
||||
@ -203,6 +205,7 @@ def convert_dota_to_yolo_obb(dota_root_path: str):
|
||||
'helipad': 17}
|
||||
|
||||
def convert_label(image_name, image_width, image_height, orig_label_dir, save_dir):
|
||||
"""Converts a single image's DOTA annotation to YOLO OBB format and saves it to a specified directory."""
|
||||
orig_label_path = orig_label_dir / f'{image_name}.txt'
|
||||
save_path = save_dir / f'{image_name}.txt'
|
||||
|
||||
|
@ -33,6 +33,7 @@ class YOLODataset(BaseDataset):
|
||||
"""
|
||||
|
||||
def __init__(self, *args, data=None, use_segments=False, use_keypoints=False, **kwargs):
|
||||
"""Initializes the YOLODataset with optional configurations for segments and keypoints."""
|
||||
self.use_segments = use_segments
|
||||
self.use_keypoints = use_keypoints
|
||||
self.data = data
|
||||
@ -40,7 +41,9 @@ class YOLODataset(BaseDataset):
|
||||
super().__init__(*args, **kwargs)
|
||||
|
||||
def cache_labels(self, path=Path('./labels.cache')):
|
||||
"""Cache dataset labels, check images and read shapes.
|
||||
"""
|
||||
Cache dataset labels, check images and read shapes.
|
||||
|
||||
Args:
|
||||
path (Path): path where to save the cache file (default: Path('./labels.cache')).
|
||||
Returns:
|
||||
@ -157,7 +160,7 @@ class YOLODataset(BaseDataset):
|
||||
self.transforms = self.build_transforms(hyp)
|
||||
|
||||
def update_labels_info(self, label):
|
||||
"""custom your label format here."""
|
||||
"""Custom your label format here."""
|
||||
# NOTE: cls is not with bboxes now, classification and semantic segmentation need an independent cls label
|
||||
# we can make it also support classification and semantic segmentation by add or remove some dict keys there.
|
||||
bboxes = label.pop('bboxes')
|
||||
@ -254,6 +257,7 @@ class ClassificationDataset(torchvision.datasets.ImageFolder):
|
||||
return {'img': sample, 'cls': j}
|
||||
|
||||
def __len__(self) -> int:
|
||||
"""Return the total number of samples in the dataset."""
|
||||
return len(self.samples)
|
||||
|
||||
def verify_images(self):
|
||||
@ -320,6 +324,16 @@ def save_dataset_cache_file(prefix, path, x):
|
||||
|
||||
# TODO: support semantic segmentation
|
||||
class SemanticDataset(BaseDataset):
|
||||
"""
|
||||
Semantic Segmentation Dataset.
|
||||
|
||||
This class is responsible for handling datasets used for semantic segmentation tasks. It inherits functionalities
|
||||
from the BaseDataset class.
|
||||
|
||||
Note:
|
||||
This class is currently a placeholder and needs to be populated with methods and attributes for supporting
|
||||
semantic segmentation tasks.
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
"""Initialize a SemanticDataset object."""
|
||||
|
@ -22,6 +22,7 @@ from ultralytics.utils.checks import check_requirements
|
||||
|
||||
@dataclass
|
||||
class SourceTypes:
|
||||
"""Class to represent various types of input sources for predictions."""
|
||||
webcam: bool = False
|
||||
screenshot: bool = False
|
||||
from_img: bool = False
|
||||
@ -29,7 +30,34 @@ class SourceTypes:
|
||||
|
||||
|
||||
class LoadStreams:
|
||||
"""Stream Loader, i.e. `yolo predict source='rtsp://example.com/media.mp4' # RTSP, RTMP, HTTP, TCP streams`."""
|
||||
"""
|
||||
Stream Loader for various types of video streams.
|
||||
|
||||
Suitable for use with `yolo predict source='rtsp://example.com/media.mp4'`, supports RTSP, RTMP, HTTP, and TCP streams.
|
||||
|
||||
Attributes:
|
||||
sources (str): The source input paths or URLs for the video streams.
|
||||
imgsz (int): The image size for processing, defaults to 640.
|
||||
vid_stride (int): Video frame-rate stride, defaults to 1.
|
||||
buffer (bool): Whether to buffer input streams, defaults to False.
|
||||
running (bool): Flag to indicate if the streaming thread is running.
|
||||
mode (str): Set to 'stream' indicating real-time capture.
|
||||
imgs (list): List of image frames for each stream.
|
||||
fps (list): List of FPS for each stream.
|
||||
frames (list): List of total frames for each stream.
|
||||
threads (list): List of threads for each stream.
|
||||
shape (list): List of shapes for each stream.
|
||||
caps (list): List of cv2.VideoCapture objects for each stream.
|
||||
bs (int): Batch size for processing.
|
||||
|
||||
Methods:
|
||||
__init__: Initialize the stream loader.
|
||||
update: Read stream frames in daemon thread.
|
||||
close: Close stream loader and release resources.
|
||||
__iter__: Returns an iterator object for the class.
|
||||
__next__: Returns source paths, transformed, and original images for processing.
|
||||
__len__: Return the length of the sources object.
|
||||
"""
|
||||
|
||||
def __init__(self, sources='file.streams', imgsz=640, vid_stride=1, buffer=False):
|
||||
"""Initialize instance variables and check for consistent input stream shapes."""
|
||||
@ -149,10 +177,33 @@ class LoadStreams:
|
||||
|
||||
|
||||
class LoadScreenshots:
|
||||
"""YOLOv8 screenshot dataloader, i.e. `yolo predict source=screen`."""
|
||||
"""
|
||||
YOLOv8 screenshot dataloader.
|
||||
|
||||
This class manages the loading of screenshot images for processing with YOLOv8.
|
||||
Suitable for use with `yolo predict source=screen`.
|
||||
|
||||
Attributes:
|
||||
source (str): The source input indicating which screen to capture.
|
||||
imgsz (int): The image size for processing, defaults to 640.
|
||||
screen (int): The screen number to capture.
|
||||
left (int): The left coordinate for screen capture area.
|
||||
top (int): The top coordinate for screen capture area.
|
||||
width (int): The width of the screen capture area.
|
||||
height (int): The height of the screen capture area.
|
||||
mode (str): Set to 'stream' indicating real-time capture.
|
||||
frame (int): Counter for captured frames.
|
||||
sct (mss.mss): Screen capture object from `mss` library.
|
||||
bs (int): Batch size, set to 1.
|
||||
monitor (dict): Monitor configuration details.
|
||||
|
||||
Methods:
|
||||
__iter__: Returns an iterator object.
|
||||
__next__: Captures the next screenshot and returns it.
|
||||
"""
|
||||
|
||||
def __init__(self, source, imgsz=640):
|
||||
"""source = [screen_number left top width height] (pixels)."""
|
||||
"""Source = [screen_number left top width height] (pixels)."""
|
||||
check_requirements('mss')
|
||||
import mss # noqa
|
||||
|
||||
@ -192,7 +243,28 @@ class LoadScreenshots:
|
||||
|
||||
|
||||
class LoadImages:
|
||||
"""YOLOv8 image/video dataloader, i.e. `yolo predict source=image.jpg/vid.mp4`."""
|
||||
"""
|
||||
YOLOv8 image/video dataloader.
|
||||
|
||||
This class manages the loading and pre-processing of image and video data for YOLOv8. It supports loading from
|
||||
various formats, including single image files, video files, and lists of image and video paths.
|
||||
|
||||
Attributes:
|
||||
imgsz (int): Image size, defaults to 640.
|
||||
files (list): List of image and video file paths.
|
||||
nf (int): Total number of files (images and videos).
|
||||
video_flag (list): Flags indicating whether a file is a video (True) or an image (False).
|
||||
mode (str): Current mode, 'image' or 'video'.
|
||||
vid_stride (int): Stride for video frame-rate, defaults to 1.
|
||||
bs (int): Batch size, set to 1 for this class.
|
||||
cap (cv2.VideoCapture): Video capture object for OpenCV.
|
||||
frame (int): Frame counter for video.
|
||||
frames (int): Total number of frames in the video.
|
||||
count (int): Counter for iteration, initialized at 0 during `__iter__()`.
|
||||
|
||||
Methods:
|
||||
_new_video(path): Create a new cv2.VideoCapture object for a given video path.
|
||||
"""
|
||||
|
||||
def __init__(self, path, imgsz=640, vid_stride=1):
|
||||
"""Initialize the Dataloader and raise FileNotFoundError if file not found."""
|
||||
@ -285,6 +357,24 @@ class LoadImages:
|
||||
|
||||
|
||||
class LoadPilAndNumpy:
|
||||
"""
|
||||
Load images from PIL and Numpy arrays for batch processing.
|
||||
|
||||
This class is designed to manage loading and pre-processing of image data from both PIL and Numpy formats.
|
||||
It performs basic validation and format conversion to ensure that the images are in the required format for
|
||||
downstream processing.
|
||||
|
||||
Attributes:
|
||||
paths (list): List of image paths or autogenerated filenames.
|
||||
im0 (list): List of images stored as Numpy arrays.
|
||||
imgsz (int): Image size, defaults to 640.
|
||||
mode (str): Type of data being processed, defaults to 'image'.
|
||||
bs (int): Batch size, equivalent to the length of `im0`.
|
||||
count (int): Counter for iteration, initialized at 0 during `__iter__()`.
|
||||
|
||||
Methods:
|
||||
_single_check(im): Validate and format a single image to a Numpy array.
|
||||
"""
|
||||
|
||||
def __init__(self, im0, imgsz=640):
|
||||
"""Initialize PIL and Numpy Dataloader."""
|
||||
@ -326,8 +416,24 @@ class LoadPilAndNumpy:
|
||||
|
||||
|
||||
class LoadTensor:
|
||||
"""
|
||||
Load images from torch.Tensor data.
|
||||
|
||||
This class manages the loading and pre-processing of image data from PyTorch tensors for further processing.
|
||||
|
||||
Attributes:
|
||||
im0 (torch.Tensor): The input tensor containing the image(s).
|
||||
bs (int): Batch size, inferred from the shape of `im0`.
|
||||
mode (str): Current mode, set to 'image'.
|
||||
paths (list): List of image paths or filenames.
|
||||
count (int): Counter for iteration, initialized at 0 during `__iter__()`.
|
||||
|
||||
Methods:
|
||||
_single_check(im, stride): Validate and possibly modify the input tensor.
|
||||
"""
|
||||
|
||||
def __init__(self, im0) -> None:
|
||||
"""Initialize Tensor Dataloader."""
|
||||
self.im0 = self._single_check(im0)
|
||||
self.bs = self.im0.shape[0]
|
||||
self.mode = 'image'
|
||||
@ -370,9 +476,7 @@ class LoadTensor:
|
||||
|
||||
|
||||
def autocast_list(source):
|
||||
"""
|
||||
Merges a list of source of different types into a list of numpy arrays or PIL images
|
||||
"""
|
||||
"""Merges a list of source of different types into a list of numpy arrays or PIL images."""
|
||||
files = []
|
||||
for im in source:
|
||||
if isinstance(im, (str, Path)): # filename or uri
|
||||
|
@ -547,9 +547,9 @@ class HUBDatasetStats:
|
||||
|
||||
def compress_one_image(f, f_new=None, max_dim=1920, quality=50):
|
||||
"""
|
||||
Compresses a single image file to reduced size while preserving its aspect ratio and quality using either the
|
||||
Python Imaging Library (PIL) or OpenCV library. If the input image is smaller than the maximum dimension, it will
|
||||
not be resized.
|
||||
Compresses a single image file to reduced size while preserving its aspect ratio and quality using either the Python
|
||||
Imaging Library (PIL) or OpenCV library. If the input image is smaller than the maximum dimension, it will not be
|
||||
resized.
|
||||
|
||||
Args:
|
||||
f (str): The path to the input image file.
|
||||
|
@ -986,9 +986,7 @@ class Exporter:
|
||||
return model
|
||||
|
||||
def add_callback(self, event: str, callback):
|
||||
"""
|
||||
Appends the given callback.
|
||||
"""
|
||||
"""Appends the given callback."""
|
||||
self.callbacks[event].append(callback)
|
||||
|
||||
def run_callbacks(self, event: str):
|
||||
|
@ -159,9 +159,7 @@ class Model(nn.Module):
|
||||
self.overrides['task'] = self.task
|
||||
|
||||
def _check_is_pytorch_model(self):
|
||||
"""
|
||||
Raises TypeError is model is not a PyTorch model
|
||||
"""
|
||||
"""Raises TypeError is model is not a PyTorch model."""
|
||||
pt_str = isinstance(self.model, (str, Path)) and Path(self.model).suffix == '.pt'
|
||||
pt_module = isinstance(self.model, nn.Module)
|
||||
if not (pt_module or pt_str):
|
||||
@ -173,9 +171,7 @@ class Model(nn.Module):
|
||||
f"argument directly in your inference command, i.e. 'model.predict(source=..., device=0)'")
|
||||
|
||||
def reset_weights(self):
|
||||
"""
|
||||
Resets the model modules parameters to randomly initialized values, losing all training information.
|
||||
"""
|
||||
"""Resets the model modules parameters to randomly initialized values, losing all training information."""
|
||||
self._check_is_pytorch_model()
|
||||
for m in self.model.modules():
|
||||
if hasattr(m, 'reset_parameters'):
|
||||
@ -185,9 +181,7 @@ class Model(nn.Module):
|
||||
return self
|
||||
|
||||
def load(self, weights='yolov8n.pt'):
|
||||
"""
|
||||
Transfers parameters with matching names and shapes from 'weights' to model.
|
||||
"""
|
||||
"""Transfers parameters with matching names and shapes from 'weights' to model."""
|
||||
self._check_is_pytorch_model()
|
||||
if isinstance(weights, (str, Path)):
|
||||
weights, self.ckpt = attempt_load_one_weight(weights)
|
||||
|
@ -58,7 +58,7 @@ Example:
|
||||
|
||||
class BasePredictor:
|
||||
"""
|
||||
BasePredictor
|
||||
BasePredictor.
|
||||
|
||||
A base class for creating predictors.
|
||||
|
||||
@ -109,7 +109,8 @@ class BasePredictor:
|
||||
callbacks.add_integration_callbacks(self)
|
||||
|
||||
def preprocess(self, im):
|
||||
"""Prepares input image before inference.
|
||||
"""
|
||||
Prepares input image before inference.
|
||||
|
||||
Args:
|
||||
im (torch.Tensor | List(np.ndarray)): BCHW for tensor, [(HWC) x B] for list.
|
||||
@ -128,6 +129,7 @@ class BasePredictor:
|
||||
return im
|
||||
|
||||
def inference(self, im, *args, **kwargs):
|
||||
"""Runs inference on a given image using the specified model and arguments."""
|
||||
visualize = increment_path(self.save_dir / Path(self.batch[0][0]).stem,
|
||||
mkdir=True) if self.args.visualize and (not self.source_type.tensor) else False
|
||||
return self.model(im, augment=self.args.augment, visualize=visualize)
|
||||
@ -194,7 +196,11 @@ class BasePredictor:
|
||||
return list(self.stream_inference(source, model, *args, **kwargs)) # merge list of Result into one
|
||||
|
||||
def predict_cli(self, source=None, model=None):
|
||||
"""Method used for CLI prediction. It uses always generator as outputs as not required by CLI mode."""
|
||||
"""
|
||||
Method used for CLI prediction.
|
||||
|
||||
It uses always generator as outputs as not required by CLI mode.
|
||||
"""
|
||||
gen = self.stream_inference(source, model)
|
||||
for _ in gen: # running CLI inference without accumulating any outputs (do not modify)
|
||||
pass
|
||||
@ -352,7 +358,5 @@ class BasePredictor:
|
||||
callback(self)
|
||||
|
||||
def add_callback(self, event: str, func):
|
||||
"""
|
||||
Add callback
|
||||
"""
|
||||
"""Add callback."""
|
||||
self.callbacks[event].append(func)
|
||||
|
@ -1,6 +1,6 @@
|
||||
# Ultralytics YOLO 🚀, AGPL-3.0 license
|
||||
"""
|
||||
Ultralytics Results, Boxes and Masks classes for handling inference results
|
||||
Ultralytics Results, Boxes and Masks classes for handling inference results.
|
||||
|
||||
Usage: See https://docs.ultralytics.com/modes/predict/
|
||||
"""
|
||||
@ -19,12 +19,11 @@ from ultralytics.utils.torch_utils import smart_inference_mode
|
||||
|
||||
|
||||
class BaseTensor(SimpleClass):
|
||||
"""
|
||||
Base tensor class with additional methods for easy manipulation and device handling.
|
||||
"""
|
||||
"""Base tensor class with additional methods for easy manipulation and device handling."""
|
||||
|
||||
def __init__(self, data, orig_shape) -> None:
|
||||
"""Initialize BaseTensor with data and original shape.
|
||||
"""
|
||||
Initialize BaseTensor with data and original shape.
|
||||
|
||||
Args:
|
||||
data (torch.Tensor | np.ndarray): Predictions, such as bboxes, masks and keypoints.
|
||||
@ -126,6 +125,18 @@ class Results(SimpleClass):
|
||||
self.probs = probs
|
||||
|
||||
def _apply(self, fn, *args, **kwargs):
|
||||
"""
|
||||
Applies a function to all non-empty attributes and returns a new Results object with modified attributes. This
|
||||
function is internally called by methods like .to(), .cuda(), .cpu(), etc.
|
||||
|
||||
Args:
|
||||
fn (str): The name of the function to apply.
|
||||
*args: Variable length argument list to pass to the function.
|
||||
**kwargs: Arbitrary keyword arguments to pass to the function.
|
||||
|
||||
Returns:
|
||||
Results: A new Results object with attributes modified by the applied function.
|
||||
"""
|
||||
r = self.new()
|
||||
for k in self._keys:
|
||||
v = getattr(self, k)
|
||||
@ -250,9 +261,7 @@ class Results(SimpleClass):
|
||||
return annotator.result()
|
||||
|
||||
def verbose(self):
|
||||
"""
|
||||
Return log string for each task.
|
||||
"""
|
||||
"""Return log string for each task."""
|
||||
log_string = ''
|
||||
probs = self.probs
|
||||
boxes = self.boxes
|
||||
@ -537,6 +546,7 @@ class Probs(BaseTensor):
|
||||
"""
|
||||
|
||||
def __init__(self, probs, orig_shape=None) -> None:
|
||||
"""Initialize the Probs class with classification probabilities and optional original shape of the image."""
|
||||
super().__init__(probs, orig_shape)
|
||||
|
||||
@property
|
||||
|
@ -1,6 +1,6 @@
|
||||
# Ultralytics YOLO 🚀, AGPL-3.0 license
|
||||
"""
|
||||
Train a model on a dataset
|
||||
Train a model on a dataset.
|
||||
|
||||
Usage:
|
||||
$ yolo mode=train model=yolov8n.pt data=coco128.yaml imgsz=640 epochs=100 batch=16
|
||||
@ -37,7 +37,7 @@ from ultralytics.utils.torch_utils import (EarlyStopping, ModelEMA, de_parallel,
|
||||
|
||||
class BaseTrainer:
|
||||
"""
|
||||
BaseTrainer
|
||||
BaseTrainer.
|
||||
|
||||
A base class for creating trainers.
|
||||
|
||||
@ -143,15 +143,11 @@ class BaseTrainer:
|
||||
callbacks.add_integration_callbacks(self)
|
||||
|
||||
def add_callback(self, event: str, callback):
|
||||
"""
|
||||
Appends the given callback.
|
||||
"""
|
||||
"""Appends the given callback."""
|
||||
self.callbacks[event].append(callback)
|
||||
|
||||
def set_callback(self, event: str, callback):
|
||||
"""
|
||||
Overrides the existing callbacks with the given callback.
|
||||
"""
|
||||
"""Overrides the existing callbacks with the given callback."""
|
||||
self.callbacks[event] = [callback]
|
||||
|
||||
def run_callbacks(self, event: str):
|
||||
@ -207,9 +203,7 @@ class BaseTrainer:
|
||||
world_size=world_size)
|
||||
|
||||
def _setup_train(self, world_size):
|
||||
"""
|
||||
Builds dataloaders and optimizer on correct rank process.
|
||||
"""
|
||||
"""Builds dataloaders and optimizer on correct rank process."""
|
||||
|
||||
# Model
|
||||
self.run_callbacks('on_pretrain_routine_start')
|
||||
@ -450,14 +444,14 @@ class BaseTrainer:
|
||||
@staticmethod
|
||||
def get_dataset(data):
|
||||
"""
|
||||
Get train, val path from data dict if it exists. Returns None if data format is not recognized.
|
||||
Get train, val path from data dict if it exists.
|
||||
|
||||
Returns None if data format is not recognized.
|
||||
"""
|
||||
return data['train'], data.get('val') or data.get('test')
|
||||
|
||||
def setup_model(self):
|
||||
"""
|
||||
load/create/download model for any task.
|
||||
"""
|
||||
"""Load/create/download model for any task."""
|
||||
if isinstance(self.model, torch.nn.Module): # if model is loaded beforehand. No setup needed
|
||||
return
|
||||
|
||||
@ -482,14 +476,14 @@ class BaseTrainer:
|
||||
self.ema.update(self.model)
|
||||
|
||||
def preprocess_batch(self, batch):
|
||||
"""
|
||||
Allows custom preprocessing model inputs and ground truths depending on task type.
|
||||
"""
|
||||
"""Allows custom preprocessing model inputs and ground truths depending on task type."""
|
||||
return batch
|
||||
|
||||
def validate(self):
|
||||
"""
|
||||
Runs validation on test set using self.validator. The returned dict is expected to contain "fitness" key.
|
||||
Runs validation on test set using self.validator.
|
||||
|
||||
The returned dict is expected to contain "fitness" key.
|
||||
"""
|
||||
metrics = self.validator(self)
|
||||
fitness = metrics.pop('fitness', -self.loss.detach().cpu().numpy()) # use loss as fitness measure if not found
|
||||
@ -506,26 +500,20 @@ class BaseTrainer:
|
||||
raise NotImplementedError('get_validator function not implemented in trainer')
|
||||
|
||||
def get_dataloader(self, dataset_path, batch_size=16, rank=0, mode='train'):
|
||||
"""
|
||||
Returns dataloader derived from torch.data.Dataloader.
|
||||
"""
|
||||
"""Returns dataloader derived from torch.data.Dataloader."""
|
||||
raise NotImplementedError('get_dataloader function not implemented in trainer')
|
||||
|
||||
def build_dataset(self, img_path, mode='train', batch=None):
|
||||
"""Build dataset"""
|
||||
"""Build dataset."""
|
||||
raise NotImplementedError('build_dataset function not implemented in trainer')
|
||||
|
||||
def label_loss_items(self, loss_items=None, prefix='train'):
|
||||
"""
|
||||
Returns a loss dict with labelled training loss items tensor
|
||||
"""
|
||||
"""Returns a loss dict with labelled training loss items tensor."""
|
||||
# Not needed for classification but necessary for segmentation & detection
|
||||
return {'loss': loss_items} if loss_items is not None else ['loss']
|
||||
|
||||
def set_model_attributes(self):
|
||||
"""
|
||||
To set or update model parameters before training.
|
||||
"""
|
||||
"""To set or update model parameters before training."""
|
||||
self.model.names = self.data['names']
|
||||
|
||||
def build_targets(self, preds, targets):
|
||||
@ -632,8 +620,8 @@ class BaseTrainer:
|
||||
|
||||
def build_optimizer(self, model, name='auto', lr=0.001, momentum=0.9, decay=1e-5, iterations=1e5):
|
||||
"""
|
||||
Constructs an optimizer for the given model, based on the specified optimizer name, learning rate,
|
||||
momentum, weight decay, and number of iterations.
|
||||
Constructs an optimizer for the given model, based on the specified optimizer name, learning rate, momentum,
|
||||
weight decay, and number of iterations.
|
||||
|
||||
Args:
|
||||
model (torch.nn.Module): The model for which to build an optimizer.
|
||||
|
@ -31,32 +31,32 @@ from ultralytics.utils.plotting import plot_tune_results
|
||||
|
||||
class Tuner:
|
||||
"""
|
||||
Class responsible for hyperparameter tuning of YOLO models.
|
||||
Class responsible for hyperparameter tuning of YOLO models.
|
||||
|
||||
The class evolves YOLO model hyperparameters over a given number of iterations
|
||||
by mutating them according to the search space and retraining the model to evaluate their performance.
|
||||
The class evolves YOLO model hyperparameters over a given number of iterations
|
||||
by mutating them according to the search space and retraining the model to evaluate their performance.
|
||||
|
||||
Attributes:
|
||||
space (dict): Hyperparameter search space containing bounds and scaling factors for mutation.
|
||||
tune_dir (Path): Directory where evolution logs and results will be saved.
|
||||
tune_csv (Path): Path to the CSV file where evolution logs are saved.
|
||||
Attributes:
|
||||
space (dict): Hyperparameter search space containing bounds and scaling factors for mutation.
|
||||
tune_dir (Path): Directory where evolution logs and results will be saved.
|
||||
tune_csv (Path): Path to the CSV file where evolution logs are saved.
|
||||
|
||||
Methods:
|
||||
_mutate(hyp: dict) -> dict:
|
||||
Mutates the given hyperparameters within the bounds specified in `self.space`.
|
||||
Methods:
|
||||
_mutate(hyp: dict) -> dict:
|
||||
Mutates the given hyperparameters within the bounds specified in `self.space`.
|
||||
|
||||
__call__():
|
||||
Executes the hyperparameter evolution across multiple iterations.
|
||||
__call__():
|
||||
Executes the hyperparameter evolution across multiple iterations.
|
||||
|
||||
Example:
|
||||
Tune hyperparameters for YOLOv8n on COCO8 at imgsz=640 and epochs=30 for 300 tuning iterations.
|
||||
```python
|
||||
from ultralytics import YOLO
|
||||
Example:
|
||||
Tune hyperparameters for YOLOv8n on COCO8 at imgsz=640 and epochs=30 for 300 tuning iterations.
|
||||
```python
|
||||
from ultralytics import YOLO
|
||||
|
||||
model = YOLO('yolov8n.pt')
|
||||
model.tune(data='coco8.yaml', epochs=10, iterations=300, optimizer='AdamW', plots=False, save=False, val=False)
|
||||
```
|
||||
"""
|
||||
model = YOLO('yolov8n.pt')
|
||||
model.tune(data='coco8.yaml', epochs=10, iterations=300, optimizer='AdamW', plots=False, save=False, val=False)
|
||||
```
|
||||
"""
|
||||
|
||||
def __init__(self, args=DEFAULT_CFG, _callbacks=None):
|
||||
"""
|
||||
|
@ -36,7 +36,7 @@ from ultralytics.utils.torch_utils import de_parallel, select_device, smart_infe
|
||||
|
||||
class BaseValidator:
|
||||
"""
|
||||
BaseValidator
|
||||
BaseValidator.
|
||||
|
||||
A base class for creating validators.
|
||||
|
||||
@ -102,8 +102,7 @@ class BaseValidator:
|
||||
|
||||
@smart_inference_mode()
|
||||
def __call__(self, trainer=None, model=None):
|
||||
"""
|
||||
Supports validation of a pre-trained model if passed or a model being trained if trainer is passed (trainer
|
||||
"""Supports validation of a pre-trained model if passed or a model being trained if trainer is passed (trainer
|
||||
gets priority).
|
||||
"""
|
||||
self.training = trainer is not None
|
||||
@ -260,7 +259,7 @@ class BaseValidator:
|
||||
raise NotImplementedError('get_dataloader function not implemented for this validator')
|
||||
|
||||
def build_dataset(self, img_path):
|
||||
"""Build dataset"""
|
||||
"""Build dataset."""
|
||||
raise NotImplementedError('build_dataset function not implemented in validator')
|
||||
|
||||
def preprocess(self, batch):
|
||||
|
@ -80,8 +80,8 @@ def get_export(model_id='', format='torchscript'):
|
||||
|
||||
def check_dataset(path='', task='detect'):
|
||||
"""
|
||||
Function for error-checking HUB dataset Zip file before upload. It checks a dataset for errors before it is
|
||||
uploaded to the HUB. Usage examples are given below.
|
||||
Function for error-checking HUB dataset Zip file before upload. It checks a dataset for errors before it is uploaded
|
||||
to the HUB. Usage examples are given below.
|
||||
|
||||
Args:
|
||||
path (str, optional): Path to data.zip (with data.yaml inside data.zip). Defaults to ''.
|
||||
|
@ -9,6 +9,19 @@ API_KEY_URL = f'{HUB_WEB_ROOT}/settings?tab=api+keys'
|
||||
|
||||
|
||||
class Auth:
|
||||
"""
|
||||
Manages authentication processes including API key handling, cookie-based authentication, and header generation.
|
||||
|
||||
The class supports different methods of authentication:
|
||||
1. Directly using an API key.
|
||||
2. Authenticating using browser cookies (specifically in Google Colab).
|
||||
3. Prompting the user to enter an API key.
|
||||
|
||||
Attributes:
|
||||
id_token (str or bool): Token used for identity verification, initialized as False.
|
||||
api_key (str or bool): API key for authentication, initialized as False.
|
||||
model_key (bool): Placeholder for model key, initialized as False.
|
||||
"""
|
||||
id_token = api_key = model_key = False
|
||||
|
||||
def __init__(self, api_key='', verbose=False):
|
||||
@ -54,7 +67,9 @@ class Auth:
|
||||
|
||||
def request_api_key(self, max_attempts=3):
|
||||
"""
|
||||
Prompt the user to input their API key. Returns the model ID.
|
||||
Prompt the user to input their API key.
|
||||
|
||||
Returns the model ID.
|
||||
"""
|
||||
import getpass
|
||||
for attempts in range(max_attempts):
|
||||
@ -86,8 +101,8 @@ class Auth:
|
||||
|
||||
def auth_with_cookies(self) -> bool:
|
||||
"""
|
||||
Attempt to fetch authentication via cookies and set id_token.
|
||||
User must be logged in to HUB and running in a supported browser.
|
||||
Attempt to fetch authentication via cookies and set id_token. User must be logged in to HUB and running in a
|
||||
supported browser.
|
||||
|
||||
Returns:
|
||||
bool: True if authentication is successful, False otherwise.
|
||||
|
@ -84,6 +84,7 @@ class HUBTrainingSession:
|
||||
def _handle_signal(self, signum, frame):
|
||||
"""
|
||||
Handle kill signals and prevent heartbeats from being sent on Colab after termination.
|
||||
|
||||
This method does not use frame, it is included as it is passed by signal.
|
||||
"""
|
||||
if self.alive is True:
|
||||
|
@ -161,9 +161,7 @@ class Events:
|
||||
url = 'https://www.google-analytics.com/mp/collect?measurement_id=G-X8NCJYTQXM&api_secret=QLQrATrNSwGRFRLE-cbHJw'
|
||||
|
||||
def __init__(self):
|
||||
"""
|
||||
Initializes the Events object with default values for events, rate_limit, and metadata.
|
||||
"""
|
||||
"""Initializes the Events object with default values for events, rate_limit, and metadata."""
|
||||
self.events = [] # events list
|
||||
self.rate_limit = 60.0 # rate limit (seconds)
|
||||
self.t = 0.0 # rate limit timer (seconds)
|
||||
|
@ -22,7 +22,7 @@ class FastSAM(Model):
|
||||
"""
|
||||
|
||||
def __init__(self, model='FastSAM-x.pt'):
|
||||
"""Call the __init__ method of the parent class (YOLO) with the updated default model"""
|
||||
"""Call the __init__ method of the parent class (YOLO) with the updated default model."""
|
||||
if str(model) == 'FastSAM.pt':
|
||||
model = 'FastSAM-x.pt'
|
||||
assert Path(model).suffix not in ('.yaml', '.yml'), 'FastSAM models only support pre-trained models.'
|
||||
@ -30,4 +30,5 @@ class FastSAM(Model):
|
||||
|
||||
@property
|
||||
def task_map(self):
|
||||
"""Returns a dictionary mapping segment task to corresponding predictor and validator classes."""
|
||||
return {'segment': {'predictor': FastSAMPredictor, 'validator': FastSAMValidator}}
|
||||
|
@ -11,10 +11,12 @@ from ultralytics.utils import DEFAULT_CFG, ops
|
||||
class FastSAMPredictor(DetectionPredictor):
|
||||
|
||||
def __init__(self, cfg=DEFAULT_CFG, overrides=None, _callbacks=None):
|
||||
"""Initializes FastSAMPredictor class by inheriting from DetectionPredictor and setting task to 'segment'."""
|
||||
super().__init__(cfg, overrides, _callbacks)
|
||||
self.args.task = 'segment'
|
||||
|
||||
def postprocess(self, preds, img, orig_imgs):
|
||||
"""Postprocesses the predictions, applies non-max suppression, scales the boxes, and returns the results."""
|
||||
p = ops.non_max_suppression(
|
||||
preds[0],
|
||||
self.args.conf,
|
||||
|
@ -15,6 +15,7 @@ from ultralytics.utils import TQDM
|
||||
class FastSAMPrompt:
|
||||
|
||||
def __init__(self, source, results, device='cuda') -> None:
|
||||
"""Initializes FastSAMPrompt with given source, results and device, and assigns clip for linear assignment."""
|
||||
self.device = device
|
||||
self.results = results
|
||||
self.source = source
|
||||
@ -30,6 +31,7 @@ class FastSAMPrompt:
|
||||
|
||||
@staticmethod
|
||||
def _segment_image(image, bbox):
|
||||
"""Segments the given image according to the provided bounding box coordinates."""
|
||||
image_array = np.array(image)
|
||||
segmented_image_array = np.zeros_like(image_array)
|
||||
x1, y1, x2, y2 = bbox
|
||||
@ -45,6 +47,9 @@ class FastSAMPrompt:
|
||||
|
||||
@staticmethod
|
||||
def _format_results(result, filter=0):
|
||||
"""Formats detection results into list of annotations each containing ID, segmentation, bounding box, score and
|
||||
area.
|
||||
"""
|
||||
annotations = []
|
||||
n = len(result.masks.data) if result.masks is not None else 0
|
||||
for i in range(n):
|
||||
@ -61,6 +66,9 @@ class FastSAMPrompt:
|
||||
|
||||
@staticmethod
|
||||
def _get_bbox_from_mask(mask):
|
||||
"""Applies morphological transformations to the mask, displays it, and if with_contours is True, draws
|
||||
contours.
|
||||
"""
|
||||
mask = mask.astype(np.uint8)
|
||||
contours, hierarchy = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
|
||||
x1, y1, w, h = cv2.boundingRect(contours[0])
|
||||
@ -195,6 +203,7 @@ class FastSAMPrompt:
|
||||
|
||||
@torch.no_grad()
|
||||
def retrieve(self, model, preprocess, elements, search_text: str, device) -> int:
|
||||
"""Processes images and text with a model, calculates similarity, and returns softmax score."""
|
||||
preprocessed_images = [preprocess(image).to(device) for image in elements]
|
||||
tokenized_text = self.clip.tokenize([search_text]).to(device)
|
||||
stacked_images = torch.stack(preprocessed_images)
|
||||
@ -206,6 +215,7 @@ class FastSAMPrompt:
|
||||
return probs[:, 0].softmax(dim=0)
|
||||
|
||||
def _crop_image(self, format_results):
|
||||
"""Crops an image based on provided annotation format and returns cropped images and related data."""
|
||||
if os.path.isdir(self.source):
|
||||
raise ValueError(f"'{self.source}' is a directory, not a valid source for this function.")
|
||||
image = Image.fromarray(cv2.cvtColor(self.results[0].orig_img, cv2.COLOR_BGR2RGB))
|
||||
@ -229,6 +239,7 @@ class FastSAMPrompt:
|
||||
return cropped_boxes, cropped_images, not_crop, filter_id, annotations
|
||||
|
||||
def box_prompt(self, bbox):
|
||||
"""Modifies the bounding box properties and calculates IoU between masks and bounding box."""
|
||||
if self.results[0].masks is not None:
|
||||
assert (bbox[2] != 0 and bbox[3] != 0)
|
||||
if os.path.isdir(self.source):
|
||||
@ -261,7 +272,8 @@ class FastSAMPrompt:
|
||||
self.results[0].masks.data = torch.tensor(np.array([masks[max_iou_index].cpu().numpy()]))
|
||||
return self.results
|
||||
|
||||
def point_prompt(self, points, pointlabel): # numpy 处理
|
||||
def point_prompt(self, points, pointlabel): # numpy
|
||||
"""Adjusts points on detected masks based on user input and returns the modified results."""
|
||||
if self.results[0].masks is not None:
|
||||
if os.path.isdir(self.source):
|
||||
raise ValueError(f"'{self.source}' is a directory, not a valid source for this function.")
|
||||
@ -284,6 +296,7 @@ class FastSAMPrompt:
|
||||
return self.results
|
||||
|
||||
def text_prompt(self, text):
|
||||
"""Processes a text prompt, applies it to existing results and returns the updated results."""
|
||||
if self.results[0].masks is not None:
|
||||
format_results = self._format_results(self.results[0], 0)
|
||||
cropped_boxes, cropped_images, not_crop, filter_id, annotations = self._crop_image(format_results)
|
||||
@ -296,4 +309,5 @@ class FastSAMPrompt:
|
||||
return self.results
|
||||
|
||||
def everything_prompt(self):
|
||||
"""Returns the processed results from the previous methods in the class."""
|
||||
return self.results
|
||||
|
@ -25,12 +25,13 @@ from .val import NASValidator
|
||||
class NAS(Model):
|
||||
|
||||
def __init__(self, model='yolo_nas_s.pt') -> None:
|
||||
"""Initializes the NAS model with the provided or default 'yolo_nas_s.pt' model."""
|
||||
assert Path(model).suffix not in ('.yaml', '.yml'), 'YOLO-NAS models only support pre-trained models.'
|
||||
super().__init__(model, task='detect')
|
||||
|
||||
@smart_inference_mode()
|
||||
def _load(self, weights: str, task: str):
|
||||
# Load or create new NAS model
|
||||
"""Loads an existing NAS model weights or creates a new NAS model with pretrained weights if not provided."""
|
||||
import super_gradients
|
||||
suffix = Path(weights).suffix
|
||||
if suffix == '.pt':
|
||||
@ -58,4 +59,5 @@ class NAS(Model):
|
||||
|
||||
@property
|
||||
def task_map(self):
|
||||
"""Returns a dictionary mapping tasks to respective predictor and validator classes."""
|
||||
return {'detect': {'predictor': NASPredictor, 'validator': NASValidator}}
|
||||
|
@ -1,7 +1,5 @@
|
||||
# Ultralytics YOLO 🚀, AGPL-3.0 license
|
||||
"""
|
||||
RT-DETR model interface
|
||||
"""
|
||||
"""RT-DETR model interface."""
|
||||
from ultralytics.engine.model import Model
|
||||
from ultralytics.nn.tasks import RTDETRDetectionModel
|
||||
|
||||
@ -11,17 +9,17 @@ from .val import RTDETRValidator
|
||||
|
||||
|
||||
class RTDETR(Model):
|
||||
"""
|
||||
RTDETR model interface.
|
||||
"""
|
||||
"""RTDETR model interface."""
|
||||
|
||||
def __init__(self, model='rtdetr-l.pt') -> None:
|
||||
"""Initializes the RTDETR model with the given model file, defaulting to 'rtdetr-l.pt'."""
|
||||
if model and model.split('.')[-1] not in ('pt', 'yaml', 'yml'):
|
||||
raise NotImplementedError('RT-DETR only supports creating from *.pt file or *.yaml file.')
|
||||
super().__init__(model=model, task='detect')
|
||||
|
||||
@property
|
||||
def task_map(self):
|
||||
"""Returns a dictionary mapping task names to corresponding Ultralytics task classes for RTDETR model."""
|
||||
return {
|
||||
'detect': {
|
||||
'predictor': RTDETRPredictor,
|
||||
|
@ -48,7 +48,8 @@ class RTDETRPredictor(BasePredictor):
|
||||
return results
|
||||
|
||||
def pre_transform(self, im):
|
||||
"""Pre-transform input image before inference.
|
||||
"""
|
||||
Pre-transform input image before inference.
|
||||
|
||||
Args:
|
||||
im (List(np.ndarray)): (N, 3, h, w) for tensor, [(h, w, 3) x N] for list.
|
||||
|
@ -37,7 +37,8 @@ class RTDETRTrainer(DetectionTrainer):
|
||||
return model
|
||||
|
||||
def build_dataset(self, img_path, mode='val', batch=None):
|
||||
"""Build RTDETR Dataset
|
||||
"""
|
||||
Build RTDETR Dataset.
|
||||
|
||||
Args:
|
||||
img_path (str): Path to the folder containing images.
|
||||
|
@ -16,6 +16,7 @@ __all__ = 'RTDETRValidator', # tuple or list
|
||||
class RTDETRDataset(YOLODataset):
|
||||
|
||||
def __init__(self, *args, data=None, **kwargs):
|
||||
"""Initialize the RTDETRDataset class by inheriting from the YOLODataset class."""
|
||||
super().__init__(*args, data=data, use_segments=False, use_keypoints=False, **kwargs)
|
||||
|
||||
# NOTE: add stretch version load_image for rtdetr mosaic
|
||||
|
@ -32,9 +32,10 @@ def batch_iterator(batch_size: int, *args) -> Generator[List[Any], None, None]:
|
||||
|
||||
def calculate_stability_score(masks: torch.Tensor, mask_threshold: float, threshold_offset: float) -> torch.Tensor:
|
||||
"""
|
||||
Computes the stability score for a batch of masks. The stability
|
||||
score is the IoU between the binary masks obtained by thresholding
|
||||
the predicted mask logits at high and low values.
|
||||
Computes the stability score for a batch of masks.
|
||||
|
||||
The stability score is the IoU between the binary masks obtained by thresholding the predicted mask logits at high
|
||||
and low values.
|
||||
"""
|
||||
# One mask is always contained inside the other.
|
||||
# Save memory by preventing unnecessary cast to torch.int64
|
||||
@ -60,7 +61,11 @@ def build_all_layer_point_grids(n_per_side: int, n_layers: int, scale_per_layer:
|
||||
|
||||
def generate_crop_boxes(im_size: Tuple[int, ...], n_layers: int,
|
||||
overlap_ratio: float) -> Tuple[List[List[int]], List[int]]:
|
||||
"""Generates a list of crop boxes of different sizes. Each layer has (2**i)**2 boxes for the ith layer."""
|
||||
"""
|
||||
Generates a list of crop boxes of different sizes.
|
||||
|
||||
Each layer has (2**i)**2 boxes for the ith layer.
|
||||
"""
|
||||
crop_boxes, layer_idxs = [], []
|
||||
im_h, im_w = im_size
|
||||
short_side = min(im_h, im_w)
|
||||
@ -145,8 +150,9 @@ def remove_small_regions(mask: np.ndarray, area_thresh: float, mode: str) -> Tup
|
||||
|
||||
def batched_mask_to_box(masks: torch.Tensor) -> torch.Tensor:
|
||||
"""
|
||||
Calculates boxes in XYXY format around masks. Return [0,0,0,0] for
|
||||
an empty mask. For input shape C1xC2x...xHxW, the output shape is C1xC2x...x4.
|
||||
Calculates boxes in XYXY format around masks.
|
||||
|
||||
Return [0,0,0,0] for an empty mask. For input shape C1xC2x...xHxW, the output shape is C1xC2x...x4.
|
||||
"""
|
||||
# torch.max below raises an error on empty inputs, just skip in this case
|
||||
if torch.numel(masks) == 0:
|
||||
|
@ -1,7 +1,5 @@
|
||||
# Ultralytics YOLO 🚀, AGPL-3.0 license
|
||||
"""
|
||||
SAM model interface
|
||||
"""
|
||||
"""SAM model interface."""
|
||||
|
||||
from pathlib import Path
|
||||
|
||||
@ -13,16 +11,16 @@ from .predict import Predictor
|
||||
|
||||
|
||||
class SAM(Model):
|
||||
"""
|
||||
SAM model interface.
|
||||
"""
|
||||
"""SAM model interface."""
|
||||
|
||||
def __init__(self, model='sam_b.pt') -> None:
|
||||
"""Initializes the SAM model instance with the specified pre-trained model file."""
|
||||
if model and Path(model).suffix not in ('.pt', '.pth'):
|
||||
raise NotImplementedError('SAM prediction requires pre-trained *.pt or *.pth model.')
|
||||
super().__init__(model=model, task='segment')
|
||||
|
||||
def _load(self, weights: str, task=None):
|
||||
"""Loads the provided weights into the SAM model."""
|
||||
self.model = build_sam(weights)
|
||||
|
||||
def predict(self, source, stream=False, bboxes=None, points=None, labels=None, **kwargs):
|
||||
@ -48,4 +46,5 @@ class SAM(Model):
|
||||
|
||||
@property
|
||||
def task_map(self):
|
||||
"""Returns a dictionary mapping the 'segment' task to its corresponding 'Predictor'."""
|
||||
return {'segment': {'predictor': Predictor}}
|
||||
|
@ -98,7 +98,11 @@ class MaskDecoder(nn.Module):
|
||||
sparse_prompt_embeddings: torch.Tensor,
|
||||
dense_prompt_embeddings: torch.Tensor,
|
||||
) -> Tuple[torch.Tensor, torch.Tensor]:
|
||||
"""Predicts masks. See 'forward' for more details."""
|
||||
"""
|
||||
Predicts masks.
|
||||
|
||||
See 'forward' for more details.
|
||||
"""
|
||||
# Concatenate output tokens
|
||||
output_tokens = torch.cat([self.iou_token.weight, self.mask_tokens.weight], dim=0)
|
||||
output_tokens = output_tokens.unsqueeze(0).expand(sparse_prompt_embeddings.size(0), -1, -1)
|
||||
|
@ -100,6 +100,9 @@ class ImageEncoderViT(nn.Module):
|
||||
)
|
||||
|
||||
def forward(self, x: torch.Tensor) -> torch.Tensor:
|
||||
"""Processes input through patch embedding, applies positional embedding if present, and passes through blocks
|
||||
and neck.
|
||||
"""
|
||||
x = self.patch_embed(x)
|
||||
if self.pos_embed is not None:
|
||||
x = x + self.pos_embed
|
||||
@ -157,8 +160,8 @@ class PromptEncoder(nn.Module):
|
||||
|
||||
def get_dense_pe(self) -> torch.Tensor:
|
||||
"""
|
||||
Returns the positional encoding used to encode point prompts,
|
||||
applied to a dense set of points the shape of the image encoding.
|
||||
Returns the positional encoding used to encode point prompts, applied to a dense set of points the shape of the
|
||||
image encoding.
|
||||
|
||||
Returns:
|
||||
torch.Tensor: Positional encoding with shape 1x(embed_dim)x(embedding_h)x(embedding_w)
|
||||
@ -204,9 +207,7 @@ class PromptEncoder(nn.Module):
|
||||
boxes: Optional[torch.Tensor],
|
||||
masks: Optional[torch.Tensor],
|
||||
) -> int:
|
||||
"""
|
||||
Gets the batch size of the output given the batch size of the input prompts.
|
||||
"""
|
||||
"""Gets the batch size of the output given the batch size of the input prompts."""
|
||||
if points is not None:
|
||||
return points[0].shape[0]
|
||||
elif boxes is not None:
|
||||
@ -217,6 +218,7 @@ class PromptEncoder(nn.Module):
|
||||
return 1
|
||||
|
||||
def _get_device(self) -> torch.device:
|
||||
"""Returns the device of the first point embedding's weight tensor."""
|
||||
return self.point_embeddings[0].weight.device
|
||||
|
||||
def forward(
|
||||
@ -259,11 +261,10 @@ class PromptEncoder(nn.Module):
|
||||
|
||||
|
||||
class PositionEmbeddingRandom(nn.Module):
|
||||
"""
|
||||
Positional encoding using random spatial frequencies.
|
||||
"""
|
||||
"""Positional encoding using random spatial frequencies."""
|
||||
|
||||
def __init__(self, num_pos_feats: int = 64, scale: Optional[float] = None) -> None:
|
||||
"""Initializes a position embedding using random spatial frequencies."""
|
||||
super().__init__()
|
||||
if scale is None or scale <= 0.0:
|
||||
scale = 1.0
|
||||
@ -304,7 +305,7 @@ class PositionEmbeddingRandom(nn.Module):
|
||||
|
||||
|
||||
class Block(nn.Module):
|
||||
"""Transformer blocks with support of window attention and residual propagation blocks"""
|
||||
"""Transformer blocks with support of window attention and residual propagation blocks."""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
@ -351,6 +352,7 @@ class Block(nn.Module):
|
||||
self.window_size = window_size
|
||||
|
||||
def forward(self, x: torch.Tensor) -> torch.Tensor:
|
||||
"""Executes a forward pass through the transformer block with window attention and non-overlapping windows."""
|
||||
shortcut = x
|
||||
x = self.norm1(x)
|
||||
# Window partition
|
||||
@ -404,6 +406,7 @@ class Attention(nn.Module):
|
||||
self.rel_pos_w = nn.Parameter(torch.zeros(2 * input_size[1] - 1, head_dim))
|
||||
|
||||
def forward(self, x: torch.Tensor) -> torch.Tensor:
|
||||
"""Applies the forward operation including attention, normalization, MLP, and indexing within window limits."""
|
||||
B, H, W, _ = x.shape
|
||||
# qkv with shape (3, B, nHead, H * W, C)
|
||||
qkv = self.qkv(x).reshape(B, H * W, 3, self.num_heads, -1).permute(2, 0, 3, 1, 4)
|
||||
@ -448,6 +451,7 @@ def window_unpartition(windows: torch.Tensor, window_size: int, pad_hw: Tuple[in
|
||||
hw: Tuple[int, int]) -> torch.Tensor:
|
||||
"""
|
||||
Window unpartition into original sequences and removing padding.
|
||||
|
||||
Args:
|
||||
windows (tensor): input tokens with [B * num_windows, window_size, window_size, C].
|
||||
window_size (int): window size.
|
||||
@ -540,9 +544,7 @@ def add_decomposed_rel_pos(
|
||||
|
||||
|
||||
class PatchEmbed(nn.Module):
|
||||
"""
|
||||
Image to Patch Embedding.
|
||||
"""
|
||||
"""Image to Patch Embedding."""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
@ -565,4 +567,5 @@ class PatchEmbed(nn.Module):
|
||||
self.proj = nn.Conv2d(in_chans, embed_dim, kernel_size=kernel_size, stride=stride, padding=padding)
|
||||
|
||||
def forward(self, x: torch.Tensor) -> torch.Tensor:
|
||||
"""Computes patch embedding by applying convolution and transposing resulting tensor."""
|
||||
return self.proj(x).permute(0, 2, 3, 1) # B C H W -> B H W C
|
||||
|
@ -23,6 +23,9 @@ from ultralytics.utils.instance import to_2tuple
|
||||
class Conv2d_BN(torch.nn.Sequential):
|
||||
|
||||
def __init__(self, a, b, ks=1, stride=1, pad=0, dilation=1, groups=1, bn_weight_init=1):
|
||||
"""Initializes the MBConv model with given input channels, output channels, expansion ratio, activation, and
|
||||
drop path.
|
||||
"""
|
||||
super().__init__()
|
||||
self.add_module('c', torch.nn.Conv2d(a, b, ks, stride, pad, dilation, groups, bias=False))
|
||||
bn = torch.nn.BatchNorm2d(b)
|
||||
@ -34,6 +37,9 @@ class Conv2d_BN(torch.nn.Sequential):
|
||||
class PatchEmbed(nn.Module):
|
||||
|
||||
def __init__(self, in_chans, embed_dim, resolution, activation):
|
||||
"""Initialize the PatchMerging class with specified input, output dimensions, resolution and activation
|
||||
function.
|
||||
"""
|
||||
super().__init__()
|
||||
img_size: Tuple[int, int] = to_2tuple(resolution)
|
||||
self.patches_resolution = (img_size[0] // 4, img_size[1] // 4)
|
||||
@ -48,12 +54,16 @@ class PatchEmbed(nn.Module):
|
||||
)
|
||||
|
||||
def forward(self, x):
|
||||
"""Runs input tensor 'x' through the PatchMerging model's sequence of operations."""
|
||||
return self.seq(x)
|
||||
|
||||
|
||||
class MBConv(nn.Module):
|
||||
|
||||
def __init__(self, in_chans, out_chans, expand_ratio, activation, drop_path):
|
||||
"""Initializes a convolutional layer with specified dimensions, input resolution, depth, and activation
|
||||
function.
|
||||
"""
|
||||
super().__init__()
|
||||
self.in_chans = in_chans
|
||||
self.hidden_chans = int(in_chans * expand_ratio)
|
||||
@ -73,6 +83,7 @@ class MBConv(nn.Module):
|
||||
self.drop_path = nn.Identity()
|
||||
|
||||
def forward(self, x):
|
||||
"""Implements the forward pass for the model architecture."""
|
||||
shortcut = x
|
||||
x = self.conv1(x)
|
||||
x = self.act1(x)
|
||||
@ -87,6 +98,9 @@ class MBConv(nn.Module):
|
||||
class PatchMerging(nn.Module):
|
||||
|
||||
def __init__(self, input_resolution, dim, out_dim, activation):
|
||||
"""Initializes the ConvLayer with specific dimension, input resolution, depth, activation, drop path, and other
|
||||
optional parameters.
|
||||
"""
|
||||
super().__init__()
|
||||
|
||||
self.input_resolution = input_resolution
|
||||
@ -99,6 +113,7 @@ class PatchMerging(nn.Module):
|
||||
self.conv3 = Conv2d_BN(out_dim, out_dim, 1, 1, 0)
|
||||
|
||||
def forward(self, x):
|
||||
"""Applies forward pass on the input utilizing convolution and activation layers, and returns the result."""
|
||||
if x.ndim == 3:
|
||||
H, W = self.input_resolution
|
||||
B = len(x)
|
||||
@ -149,6 +164,7 @@ class ConvLayer(nn.Module):
|
||||
input_resolution, dim=dim, out_dim=out_dim, activation=activation)
|
||||
|
||||
def forward(self, x):
|
||||
"""Processes the input through a series of convolutional layers and returns the activated output."""
|
||||
for blk in self.blocks:
|
||||
x = checkpoint.checkpoint(blk, x) if self.use_checkpoint else blk(x)
|
||||
return x if self.downsample is None else self.downsample(x)
|
||||
@ -157,6 +173,7 @@ class ConvLayer(nn.Module):
|
||||
class Mlp(nn.Module):
|
||||
|
||||
def __init__(self, in_features, hidden_features=None, out_features=None, act_layer=nn.GELU, drop=0.):
|
||||
"""Initializes Attention module with the given parameters including dimension, key_dim, number of heads, etc."""
|
||||
super().__init__()
|
||||
out_features = out_features or in_features
|
||||
hidden_features = hidden_features or in_features
|
||||
@ -167,6 +184,7 @@ class Mlp(nn.Module):
|
||||
self.drop = nn.Dropout(drop)
|
||||
|
||||
def forward(self, x):
|
||||
"""Applies operations on input x and returns modified x, runs downsample if not None."""
|
||||
x = self.norm(x)
|
||||
x = self.fc1(x)
|
||||
x = self.act(x)
|
||||
@ -216,6 +234,7 @@ class Attention(torch.nn.Module):
|
||||
|
||||
@torch.no_grad()
|
||||
def train(self, mode=True):
|
||||
"""Sets the module in training mode and handles attribute 'ab' based on the mode."""
|
||||
super().train(mode)
|
||||
if mode and hasattr(self, 'ab'):
|
||||
del self.ab
|
||||
@ -298,6 +317,9 @@ class TinyViTBlock(nn.Module):
|
||||
self.local_conv = Conv2d_BN(dim, dim, ks=local_conv_size, stride=1, pad=pad, groups=dim)
|
||||
|
||||
def forward(self, x):
|
||||
"""Applies attention-based transformation or padding to input 'x' before passing it through a local
|
||||
convolution.
|
||||
"""
|
||||
H, W = self.input_resolution
|
||||
B, L, C = x.shape
|
||||
assert L == H * W, 'input feature has wrong size'
|
||||
@ -337,6 +359,9 @@ class TinyViTBlock(nn.Module):
|
||||
return x + self.drop_path(self.mlp(x))
|
||||
|
||||
def extra_repr(self) -> str:
|
||||
"""Returns a formatted string representing the TinyViTBlock's parameters: dimension, input resolution, number of
|
||||
attentions heads, window size, and MLP ratio.
|
||||
"""
|
||||
return f'dim={self.dim}, input_resolution={self.input_resolution}, num_heads={self.num_heads}, ' \
|
||||
f'window_size={self.window_size}, mlp_ratio={self.mlp_ratio}'
|
||||
|
||||
@ -402,23 +427,28 @@ class BasicLayer(nn.Module):
|
||||
input_resolution, dim=dim, out_dim=out_dim, activation=activation)
|
||||
|
||||
def forward(self, x):
|
||||
"""Performs forward propagation on the input tensor and returns a normalized tensor."""
|
||||
for blk in self.blocks:
|
||||
x = checkpoint.checkpoint(blk, x) if self.use_checkpoint else blk(x)
|
||||
return x if self.downsample is None else self.downsample(x)
|
||||
|
||||
def extra_repr(self) -> str:
|
||||
"""Returns a string representation of the extra_repr function with the layer's parameters."""
|
||||
return f'dim={self.dim}, input_resolution={self.input_resolution}, depth={self.depth}'
|
||||
|
||||
|
||||
class LayerNorm2d(nn.Module):
|
||||
"""A PyTorch implementation of Layer Normalization in 2D."""
|
||||
|
||||
def __init__(self, num_channels: int, eps: float = 1e-6) -> None:
|
||||
"""Initialize LayerNorm2d with the number of channels and an optional epsilon."""
|
||||
super().__init__()
|
||||
self.weight = nn.Parameter(torch.ones(num_channels))
|
||||
self.bias = nn.Parameter(torch.zeros(num_channels))
|
||||
self.eps = eps
|
||||
|
||||
def forward(self, x: torch.Tensor) -> torch.Tensor:
|
||||
"""Perform a forward pass, normalizing the input tensor."""
|
||||
u = x.mean(1, keepdim=True)
|
||||
s = (x - u).pow(2).mean(1, keepdim=True)
|
||||
x = (x - u) / torch.sqrt(s + self.eps)
|
||||
@ -518,6 +548,7 @@ class TinyViT(nn.Module):
|
||||
)
|
||||
|
||||
def set_layer_lr_decay(self, layer_lr_decay):
|
||||
"""Sets the learning rate decay for each layer in the TinyViT model."""
|
||||
decay_rate = layer_lr_decay
|
||||
|
||||
# layers -> blocks (depth)
|
||||
@ -525,6 +556,7 @@ class TinyViT(nn.Module):
|
||||
lr_scales = [decay_rate ** (depth - i - 1) for i in range(depth)]
|
||||
|
||||
def _set_lr_scale(m, scale):
|
||||
"""Sets the learning rate scale for each layer in the model based on the layer's depth."""
|
||||
for p in m.parameters():
|
||||
p.lr_scale = scale
|
||||
|
||||
@ -544,12 +576,14 @@ class TinyViT(nn.Module):
|
||||
p.param_name = k
|
||||
|
||||
def _check_lr_scale(m):
|
||||
"""Checks if the learning rate scale attribute is present in module's parameters."""
|
||||
for p in m.parameters():
|
||||
assert hasattr(p, 'lr_scale'), p.param_name
|
||||
|
||||
self.apply(_check_lr_scale)
|
||||
|
||||
def _init_weights(self, m):
|
||||
"""Initializes weights for linear layers and layer normalization in the given module."""
|
||||
if isinstance(m, nn.Linear):
|
||||
# NOTE: This initialization is needed only for training.
|
||||
# trunc_normal_(m.weight, std=.02)
|
||||
@ -561,11 +595,12 @@ class TinyViT(nn.Module):
|
||||
|
||||
@torch.jit.ignore
|
||||
def no_weight_decay_keywords(self):
|
||||
"""Returns a dictionary of parameter names where weight decay should not be applied."""
|
||||
return {'attention_biases'}
|
||||
|
||||
def forward_features(self, x):
|
||||
# x: (N, C, H, W)
|
||||
x = self.patch_embed(x)
|
||||
"""Runs the input through the model layers and returns the transformed output."""
|
||||
x = self.patch_embed(x) # x input is (N, C, H, W)
|
||||
|
||||
x = self.layers[0](x)
|
||||
start_i = 1
|
||||
@ -579,4 +614,5 @@ class TinyViT(nn.Module):
|
||||
return self.neck(x)
|
||||
|
||||
def forward(self, x):
|
||||
"""Executes a forward pass on the input tensor through the constructed model layers."""
|
||||
return self.forward_features(x)
|
||||
|
@ -21,8 +21,7 @@ class TwoWayTransformer(nn.Module):
|
||||
attention_downsample_rate: int = 2,
|
||||
) -> None:
|
||||
"""
|
||||
A transformer decoder that attends to an input image using
|
||||
queries whose positional embedding is supplied.
|
||||
A transformer decoder that attends to an input image using queries whose positional embedding is supplied.
|
||||
|
||||
Args:
|
||||
depth (int): number of layers in the transformer
|
||||
@ -171,8 +170,7 @@ class TwoWayAttentionBlock(nn.Module):
|
||||
|
||||
|
||||
class Attention(nn.Module):
|
||||
"""
|
||||
An attention layer that allows for downscaling the size of the embedding after projection to queries, keys, and
|
||||
"""An attention layer that allows for downscaling the size of the embedding after projection to queries, keys, and
|
||||
values.
|
||||
"""
|
||||
|
||||
|
@ -19,6 +19,7 @@ from .build import build_sam
|
||||
class Predictor(BasePredictor):
|
||||
|
||||
def __init__(self, cfg=DEFAULT_CFG, overrides=None, _callbacks=None):
|
||||
"""Initializes the Predictor class with default or provided configuration, overrides, and callbacks."""
|
||||
if overrides is None:
|
||||
overrides = {}
|
||||
overrides.update(dict(task='segment', mode='predict', imgsz=1024))
|
||||
@ -34,7 +35,8 @@ class Predictor(BasePredictor):
|
||||
self.segment_all = False
|
||||
|
||||
def preprocess(self, im):
|
||||
"""Prepares input image before inference.
|
||||
"""
|
||||
Prepares input image before inference.
|
||||
|
||||
Args:
|
||||
im (torch.Tensor | List(np.ndarray)): BCHW for tensor, [(HWC) x B] for list.
|
||||
@ -189,7 +191,8 @@ class Predictor(BasePredictor):
|
||||
stability_score_thresh=0.95,
|
||||
stability_score_offset=0.95,
|
||||
crop_nms_thresh=0.7):
|
||||
"""Segment the whole image.
|
||||
"""
|
||||
Segment the whole image.
|
||||
|
||||
Args:
|
||||
im (torch.Tensor): The preprocessed image, (N, C, H, W).
|
||||
@ -360,14 +363,15 @@ class Predictor(BasePredictor):
|
||||
self.prompts = prompts
|
||||
|
||||
def reset_image(self):
|
||||
"""Resets the image and its features to None."""
|
||||
self.im = None
|
||||
self.features = None
|
||||
|
||||
@staticmethod
|
||||
def remove_small_regions(masks, min_area=0, nms_thresh=0.7):
|
||||
"""
|
||||
Removes small disconnected regions and holes in masks, then reruns
|
||||
box NMS to remove any new duplicates. Requires open-cv as a dependency.
|
||||
Removes small disconnected regions and holes in masks, then reruns box NMS to remove any new duplicates.
|
||||
Requires open-cv as a dependency.
|
||||
|
||||
Args:
|
||||
masks (torch.Tensor): Masks, (N, H, W).
|
||||
|
@ -47,6 +47,7 @@ class DETRLoss(nn.Module):
|
||||
self.device = None
|
||||
|
||||
def _get_loss_class(self, pred_scores, targets, gt_scores, num_gts, postfix=''):
|
||||
"""Computes the classification loss based on predictions, target values, and ground truth scores."""
|
||||
# logits: [b, query, num_classes], gt_class: list[[n, 1]]
|
||||
name_class = f'loss_class{postfix}'
|
||||
bs, nq = pred_scores.shape[:2]
|
||||
@ -68,6 +69,9 @@ class DETRLoss(nn.Module):
|
||||
return {name_class: loss_cls.squeeze() * self.loss_gain['class']}
|
||||
|
||||
def _get_loss_bbox(self, pred_bboxes, gt_bboxes, postfix=''):
|
||||
"""Calculates and returns the bounding box loss and GIoU loss for the predicted and ground truth bounding
|
||||
boxes.
|
||||
"""
|
||||
# boxes: [b, query, 4], gt_bbox: list[[n, 4]]
|
||||
name_bbox = f'loss_bbox{postfix}'
|
||||
name_giou = f'loss_giou{postfix}'
|
||||
@ -125,7 +129,7 @@ class DETRLoss(nn.Module):
|
||||
postfix='',
|
||||
masks=None,
|
||||
gt_mask=None):
|
||||
"""Get auxiliary losses"""
|
||||
"""Get auxiliary losses."""
|
||||
# NOTE: loss class, bbox, giou, mask, dice
|
||||
loss = torch.zeros(5 if masks is not None else 3, device=pred_bboxes.device)
|
||||
if match_indices is None and self.use_uni_match:
|
||||
@ -166,12 +170,14 @@ class DETRLoss(nn.Module):
|
||||
|
||||
@staticmethod
|
||||
def _get_index(match_indices):
|
||||
"""Returns batch indices, source indices, and destination indices from provided match indices."""
|
||||
batch_idx = torch.cat([torch.full_like(src, i) for i, (src, _) in enumerate(match_indices)])
|
||||
src_idx = torch.cat([src for (src, _) in match_indices])
|
||||
dst_idx = torch.cat([dst for (_, dst) in match_indices])
|
||||
return (batch_idx, src_idx), dst_idx
|
||||
|
||||
def _get_assigned_bboxes(self, pred_bboxes, gt_bboxes, match_indices):
|
||||
"""Assigns predicted bounding boxes to ground truth bounding boxes based on the match indices."""
|
||||
pred_assigned = torch.cat([
|
||||
t[I] if len(I) > 0 else torch.zeros(0, t.shape[-1], device=self.device)
|
||||
for t, (I, _) in zip(pred_bboxes, match_indices)])
|
||||
@ -190,7 +196,7 @@ class DETRLoss(nn.Module):
|
||||
gt_mask=None,
|
||||
postfix='',
|
||||
match_indices=None):
|
||||
"""Get losses"""
|
||||
"""Get losses."""
|
||||
if match_indices is None:
|
||||
match_indices = self.matcher(pred_bboxes,
|
||||
pred_scores,
|
||||
@ -250,22 +256,43 @@ class DETRLoss(nn.Module):
|
||||
|
||||
|
||||
class RTDETRDetectionLoss(DETRLoss):
|
||||
"""
|
||||
Real-Time DeepTracker (RT-DETR) Detection Loss class that extends the DETRLoss.
|
||||
|
||||
This class computes the detection loss for the RT-DETR model, which includes the standard detection loss as well as
|
||||
an additional denoising training loss when provided with denoising metadata.
|
||||
"""
|
||||
|
||||
def forward(self, preds, batch, dn_bboxes=None, dn_scores=None, dn_meta=None):
|
||||
"""
|
||||
Forward pass to compute the detection loss.
|
||||
|
||||
Args:
|
||||
preds (tuple): Predicted bounding boxes and scores.
|
||||
batch (dict): Batch data containing ground truth information.
|
||||
dn_bboxes (torch.Tensor, optional): Denoising bounding boxes. Default is None.
|
||||
dn_scores (torch.Tensor, optional): Denoising scores. Default is None.
|
||||
dn_meta (dict, optional): Metadata for denoising. Default is None.
|
||||
|
||||
Returns:
|
||||
(dict): Dictionary containing the total loss and, if applicable, the denoising loss.
|
||||
"""
|
||||
pred_bboxes, pred_scores = preds
|
||||
total_loss = super().forward(pred_bboxes, pred_scores, batch)
|
||||
|
||||
# Check for denoising metadata to compute denoising training loss
|
||||
if dn_meta is not None:
|
||||
dn_pos_idx, dn_num_group = dn_meta['dn_pos_idx'], dn_meta['dn_num_group']
|
||||
assert len(batch['gt_groups']) == len(dn_pos_idx)
|
||||
|
||||
# Denoising match indices
|
||||
# Get the match indices for denoising
|
||||
match_indices = self.get_dn_match_indices(dn_pos_idx, dn_num_group, batch['gt_groups'])
|
||||
|
||||
# Compute denoising training loss
|
||||
# Compute the denoising training loss
|
||||
dn_loss = super().forward(dn_bboxes, dn_scores, batch, postfix='_dn', match_indices=match_indices)
|
||||
total_loss.update(dn_loss)
|
||||
else:
|
||||
# If no denoising metadata is provided, set denoising loss to zero
|
||||
total_loss.update({f'{k}_dn': torch.tensor(0., device=self.device) for k in total_loss.keys()})
|
||||
|
||||
return total_loss
|
||||
@ -276,12 +303,12 @@ class RTDETRDetectionLoss(DETRLoss):
|
||||
Get the match indices for denoising.
|
||||
|
||||
Args:
|
||||
dn_pos_idx (List[torch.Tensor]): A list includes positive indices of denoising.
|
||||
dn_num_group (int): The number of groups of denoising.
|
||||
gt_groups (List(int)): a list of batch size length includes the number of gts of each image.
|
||||
dn_pos_idx (List[torch.Tensor]): List of tensors containing positive indices for denoising.
|
||||
dn_num_group (int): Number of denoising groups.
|
||||
gt_groups (List[int]): List of integers representing the number of ground truths for each image.
|
||||
|
||||
Returns:
|
||||
dn_match_indices (List(tuple)): Matched indices.
|
||||
(List[tuple]): List of tuples containing matched indices for denoising.
|
||||
"""
|
||||
dn_match_indices = []
|
||||
idx_groups = torch.as_tensor([0, *gt_groups[:-1]]).cumsum_(0)
|
||||
|
@ -11,8 +11,8 @@ from ultralytics.utils.ops import xywh2xyxy, xyxy2xywh
|
||||
|
||||
class HungarianMatcher(nn.Module):
|
||||
"""
|
||||
A module implementing the HungarianMatcher, which is a differentiable module to solve the assignment problem in
|
||||
an end-to-end fashion.
|
||||
A module implementing the HungarianMatcher, which is a differentiable module to solve the assignment problem in an
|
||||
end-to-end fashion.
|
||||
|
||||
HungarianMatcher performs optimal assignment over the predicted and ground truth bounding boxes using a cost
|
||||
function that considers classification scores, bounding box coordinates, and optionally, mask predictions.
|
||||
@ -32,6 +32,9 @@ class HungarianMatcher(nn.Module):
|
||||
"""
|
||||
|
||||
def __init__(self, cost_gain=None, use_fl=True, with_mask=False, num_sample_points=12544, alpha=0.25, gamma=2.0):
|
||||
"""Initializes HungarianMatcher with cost coefficients, Focal Loss, mask prediction, sample points, and alpha
|
||||
gamma factors.
|
||||
"""
|
||||
super().__init__()
|
||||
if cost_gain is None:
|
||||
cost_gain = {'class': 1, 'bbox': 5, 'giou': 2, 'mask': 1, 'dice': 1}
|
||||
@ -45,8 +48,8 @@ class HungarianMatcher(nn.Module):
|
||||
def forward(self, pred_bboxes, pred_scores, gt_bboxes, gt_cls, gt_groups, masks=None, gt_mask=None):
|
||||
"""
|
||||
Forward pass for HungarianMatcher. This function computes costs based on prediction and ground truth
|
||||
(classification cost, L1 cost between boxes and GIoU cost between boxes) and finds the optimal matching
|
||||
between predictions and ground truth based on these costs.
|
||||
(classification cost, L1 cost between boxes and GIoU cost between boxes) and finds the optimal matching between
|
||||
predictions and ground truth based on these costs.
|
||||
|
||||
Args:
|
||||
pred_bboxes (Tensor): Predicted bounding boxes with shape [batch_size, num_queries, 4].
|
||||
@ -153,9 +156,9 @@ def get_cdn_group(batch,
|
||||
box_noise_scale=1.0,
|
||||
training=False):
|
||||
"""
|
||||
Get contrastive denoising training group. This function creates a contrastive denoising training group with
|
||||
positive and negative samples from the ground truths (gt). It applies noise to the class labels and bounding
|
||||
box coordinates, and returns the modified labels, bounding boxes, attention mask and meta information.
|
||||
Get contrastive denoising training group. This function creates a contrastive denoising training group with positive
|
||||
and negative samples from the ground truths (gt). It applies noise to the class labels and bounding box coordinates,
|
||||
and returns the modified labels, bounding boxes, attention mask and meta information.
|
||||
|
||||
Args:
|
||||
batch (dict): A dict that includes 'gt_cls' (torch.Tensor with shape [num_gts, ]), 'gt_bboxes'
|
||||
@ -191,12 +194,12 @@ def get_cdn_group(batch,
|
||||
gt_bbox = batch['bboxes'] # bs*num, 4
|
||||
b_idx = batch['batch_idx']
|
||||
|
||||
# each group has positive and negative queries.
|
||||
# Each group has positive and negative queries.
|
||||
dn_cls = gt_cls.repeat(2 * num_group) # (2*num_group*bs*num, )
|
||||
dn_bbox = gt_bbox.repeat(2 * num_group, 1) # 2*num_group*bs*num, 4
|
||||
dn_b_idx = b_idx.repeat(2 * num_group).view(-1) # (2*num_group*bs*num, )
|
||||
|
||||
# positive and negative mask
|
||||
# Positive and negative mask
|
||||
# (bs*num*num_group, ), the second total_num*num_group part as negative samples
|
||||
neg_idx = torch.arange(total_num * num_group, dtype=torch.long, device=gt_bbox.device) + num_group * total_num
|
||||
|
||||
@ -220,10 +223,9 @@ def get_cdn_group(batch,
|
||||
known_bbox += rand_part * diff
|
||||
known_bbox.clip_(min=0.0, max=1.0)
|
||||
dn_bbox = xyxy2xywh(known_bbox)
|
||||
dn_bbox = inverse_sigmoid(dn_bbox)
|
||||
dn_bbox = torch.logit(dn_bbox, eps=1e-6) # inverse sigmoid
|
||||
|
||||
# total denoising queries
|
||||
num_dn = int(max_nums * 2 * num_group)
|
||||
num_dn = int(max_nums * 2 * num_group) # total denoising queries
|
||||
# class_embed = torch.cat([class_embed, torch.zeros([1, class_embed.shape[-1]], device=class_embed.device)])
|
||||
dn_cls_embed = class_embed[dn_cls] # bs*num * 2 * num_group, 256
|
||||
padding_cls = torch.zeros(bs, num_dn, dn_cls_embed.shape[-1], device=gt_cls.device)
|
||||
@ -256,9 +258,3 @@ def get_cdn_group(batch,
|
||||
|
||||
return padding_cls.to(class_embed.device), padding_bbox.to(class_embed.device), attn_mask.to(
|
||||
class_embed.device), dn_meta
|
||||
|
||||
|
||||
def inverse_sigmoid(x, eps=1e-6):
|
||||
"""Inverse sigmoid function."""
|
||||
x = x.clip(min=0., max=1.)
|
||||
return torch.log(x / (1 - x + eps) + eps)
|
||||
|
@ -26,6 +26,7 @@ class ClassificationPredictor(BasePredictor):
|
||||
"""
|
||||
|
||||
def __init__(self, cfg=DEFAULT_CFG, overrides=None, _callbacks=None):
|
||||
"""Initializes ClassificationPredictor setting the task to 'classify'."""
|
||||
super().__init__(cfg, overrides, _callbacks)
|
||||
self.args.task = 'classify'
|
||||
|
||||
|
@ -79,6 +79,7 @@ class ClassificationTrainer(BaseTrainer):
|
||||
return ckpt
|
||||
|
||||
def build_dataset(self, img_path, mode='train', batch=None):
|
||||
"""Creates a ClassificationDataset instance given an image path, and mode (train/test etc.)."""
|
||||
return ClassificationDataset(root=img_path, args=self.args, augment=mode == 'train', prefix=mode)
|
||||
|
||||
def get_dataloader(self, dataset_path, batch_size=16, rank=0, mode='train'):
|
||||
@ -113,8 +114,9 @@ class ClassificationTrainer(BaseTrainer):
|
||||
|
||||
def label_loss_items(self, loss_items=None, prefix='train'):
|
||||
"""
|
||||
Returns a loss dict with labelled training loss items tensor. Not needed for classification but necessary for
|
||||
segmentation & detection
|
||||
Returns a loss dict with labelled training loss items tensor.
|
||||
|
||||
Not needed for classification but necessary for segmentation & detection
|
||||
"""
|
||||
keys = [f'{prefix}/{x}' for x in self.loss_names]
|
||||
if loss_items is None:
|
||||
|
@ -78,6 +78,7 @@ class ClassificationValidator(BaseValidator):
|
||||
return self.metrics.results_dict
|
||||
|
||||
def build_dataset(self, img_path):
|
||||
"""Creates and returns a ClassificationDataset instance using given image path and preprocessing parameters."""
|
||||
return ClassificationDataset(root=img_path, args=self.args, augment=False, prefix=self.args.split)
|
||||
|
||||
def get_dataloader(self, dataset_path, batch_size):
|
||||
|
@ -57,7 +57,7 @@ class DetectionTrainer(BaseTrainer):
|
||||
return batch
|
||||
|
||||
def set_model_attributes(self):
|
||||
"""nl = de_parallel(self.model).model[-1].nl # number of detection layers (to scale hyps)."""
|
||||
"""Nl = de_parallel(self.model).model[-1].nl # number of detection layers (to scale hyps)."""
|
||||
# self.args.box *= 3 / nl # scale to layers
|
||||
# self.args.cls *= self.data["nc"] / 80 * 3 / nl # scale to classes and layers
|
||||
# self.args.cls *= (self.args.imgsz / 640) ** 2 * 3 / nl # scale to image size and layers
|
||||
@ -80,8 +80,9 @@ class DetectionTrainer(BaseTrainer):
|
||||
|
||||
def label_loss_items(self, loss_items=None, prefix='train'):
|
||||
"""
|
||||
Returns a loss dict with labelled training loss items tensor. Not needed for classification but necessary for
|
||||
segmentation & detection
|
||||
Returns a loss dict with labelled training loss items tensor.
|
||||
|
||||
Not needed for classification but necessary for segmentation & detection
|
||||
"""
|
||||
keys = [f'{prefix}/{x}' for x in self.loss_names]
|
||||
if loss_items is not None:
|
||||
|
@ -6,13 +6,11 @@ from ultralytics.nn.tasks import ClassificationModel, DetectionModel, PoseModel,
|
||||
|
||||
|
||||
class YOLO(Model):
|
||||
"""
|
||||
YOLO (You Only Look Once) object detection model.
|
||||
"""
|
||||
"""YOLO (You Only Look Once) object detection model."""
|
||||
|
||||
@property
|
||||
def task_map(self):
|
||||
"""Map head to model, trainer, validator, and predictor classes"""
|
||||
"""Map head to model, trainer, validator, and predictor classes."""
|
||||
return {
|
||||
'classify': {
|
||||
'model': ClassificationModel,
|
||||
|
@ -21,6 +21,7 @@ class PosePredictor(DetectionPredictor):
|
||||
"""
|
||||
|
||||
def __init__(self, cfg=DEFAULT_CFG, overrides=None, _callbacks=None):
|
||||
"""Initializes PosePredictor, sets task to 'pose' and logs a warning for using 'mps' as device."""
|
||||
super().__init__(cfg, overrides, _callbacks)
|
||||
self.args.task = 'pose'
|
||||
if isinstance(self.args.device, str) and self.args.device.lower() == 'mps':
|
||||
|
@ -21,10 +21,12 @@ class SegmentationPredictor(DetectionPredictor):
|
||||
"""
|
||||
|
||||
def __init__(self, cfg=DEFAULT_CFG, overrides=None, _callbacks=None):
|
||||
"""Initializes the SegmentationPredictor with the provided configuration, overrides, and callbacks."""
|
||||
super().__init__(cfg, overrides, _callbacks)
|
||||
self.args.task = 'segment'
|
||||
|
||||
def postprocess(self, preds, img, orig_imgs):
|
||||
"""Applies non-max suppression and processes detections for each image in an input batch."""
|
||||
p = ops.non_max_suppression(preds[0],
|
||||
self.args.conf,
|
||||
self.args.iou,
|
||||
|
@ -144,7 +144,7 @@ class SegmentationValidator(DetectionValidator):
|
||||
|
||||
def _process_batch(self, detections, labels, pred_masks=None, gt_masks=None, overlap=False, masks=False):
|
||||
"""
|
||||
Return correct prediction matrix
|
||||
Return correct prediction matrix.
|
||||
|
||||
Args:
|
||||
detections (array[N, 6]), x1, y1, x2, y2, conf, class
|
||||
|
@ -20,7 +20,11 @@ from ultralytics.utils.downloads import attempt_download_asset, is_url
|
||||
|
||||
|
||||
def check_class_names(names):
|
||||
"""Check class names. Map imagenet class codes to human-readable names if required. Convert lists to dicts."""
|
||||
"""
|
||||
Check class names.
|
||||
|
||||
Map imagenet class codes to human-readable names if required. Convert lists to dicts.
|
||||
"""
|
||||
if isinstance(names, list): # names is a list
|
||||
names = dict(enumerate(names)) # convert to dict
|
||||
if isinstance(names, dict):
|
||||
@ -37,6 +41,32 @@ def check_class_names(names):
|
||||
|
||||
|
||||
class AutoBackend(nn.Module):
|
||||
"""
|
||||
Handles dynamic backend selection for running inference using Ultralytics YOLO models.
|
||||
|
||||
The AutoBackend class is designed to provide an abstraction layer for various inference engines. It supports a wide
|
||||
range of formats, each with specific naming conventions as outlined below:
|
||||
|
||||
Supported Formats and Naming Conventions:
|
||||
| Format | File Suffix |
|
||||
|-----------------------|------------------|
|
||||
| PyTorch | *.pt |
|
||||
| TorchScript | *.torchscript |
|
||||
| ONNX Runtime | *.onnx |
|
||||
| ONNX OpenCV DNN | *.onnx (dnn=True)|
|
||||
| OpenVINO | *openvino_model/ |
|
||||
| CoreML | *.mlpackage |
|
||||
| TensorRT | *.engine |
|
||||
| TensorFlow SavedModel | *_saved_model |
|
||||
| TensorFlow GraphDef | *.pb |
|
||||
| TensorFlow Lite | *.tflite |
|
||||
| TensorFlow Edge TPU | *_edgetpu.tflite |
|
||||
| PaddlePaddle | *_paddle_model |
|
||||
| ncnn | *_ncnn_model |
|
||||
|
||||
This class offers dynamic backend switching capabilities based on the input model format, making it easier to deploy
|
||||
models across various platforms.
|
||||
"""
|
||||
|
||||
@torch.no_grad()
|
||||
def __init__(self,
|
||||
@ -48,33 +78,16 @@ class AutoBackend(nn.Module):
|
||||
fuse=True,
|
||||
verbose=True):
|
||||
"""
|
||||
MultiBackend class for python inference on various platforms using Ultralytics YOLO.
|
||||
Initialize the AutoBackend for inference.
|
||||
|
||||
Args:
|
||||
weights (str): The path to the weights file. Default: 'yolov8n.pt'
|
||||
device (torch.device): The device to run the model on.
|
||||
dnn (bool): Use OpenCV DNN module for inference if True, defaults to False.
|
||||
data (str | Path | optional): Additional data.yaml file for class names.
|
||||
fp16 (bool): If True, use half precision. Default: False
|
||||
fuse (bool): Whether to fuse the model or not. Default: True
|
||||
verbose (bool): Whether to run in verbose mode or not. Default: True
|
||||
|
||||
Supported formats and their naming conventions:
|
||||
| Format | Suffix |
|
||||
|-----------------------|------------------|
|
||||
| PyTorch | *.pt |
|
||||
| TorchScript | *.torchscript |
|
||||
| ONNX Runtime | *.onnx |
|
||||
| ONNX OpenCV DNN | *.onnx dnn=True |
|
||||
| OpenVINO | *.xml |
|
||||
| CoreML | *.mlpackage |
|
||||
| TensorRT | *.engine |
|
||||
| TensorFlow SavedModel | *_saved_model |
|
||||
| TensorFlow GraphDef | *.pb |
|
||||
| TensorFlow Lite | *.tflite |
|
||||
| TensorFlow Edge TPU | *_edgetpu.tflite |
|
||||
| PaddlePaddle | *_paddle_model |
|
||||
| ncnn | *_ncnn_model |
|
||||
weights (str): Path to the model weights file. Defaults to 'yolov8n.pt'.
|
||||
device (torch.device): Device to run the model on. Defaults to CPU.
|
||||
dnn (bool): Use OpenCV DNN module for ONNX inference. Defaults to False.
|
||||
data (str | Path | optional): Path to the additional data.yaml file containing class names. Optional.
|
||||
fp16 (bool): Enable half-precision inference. Supported only on specific backends. Defaults to False.
|
||||
fuse (bool): Fuse Conv2D + BatchNorm layers for optimization. Defaults to True.
|
||||
verbose (bool): Enable verbose logging. Defaults to True.
|
||||
"""
|
||||
super().__init__()
|
||||
w = str(weights[0] if isinstance(weights, list) else weights)
|
||||
@ -440,14 +453,14 @@ class AutoBackend(nn.Module):
|
||||
|
||||
def from_numpy(self, x):
|
||||
"""
|
||||
Convert a numpy array to a tensor.
|
||||
Convert a numpy array to a tensor.
|
||||
|
||||
Args:
|
||||
x (np.ndarray): The array to be converted.
|
||||
Args:
|
||||
x (np.ndarray): The array to be converted.
|
||||
|
||||
Returns:
|
||||
(torch.Tensor): The converted tensor
|
||||
"""
|
||||
Returns:
|
||||
(torch.Tensor): The converted tensor
|
||||
"""
|
||||
return torch.tensor(x).to(self.device) if isinstance(x, np.ndarray) else x
|
||||
|
||||
def warmup(self, imgsz=(1, 3, 640, 640)):
|
||||
@ -476,7 +489,7 @@ class AutoBackend(nn.Module):
|
||||
@staticmethod
|
||||
def _model_type(p='path/to/model.pt'):
|
||||
"""
|
||||
This function takes a path to a model file and returns the model type
|
||||
This function takes a path to a model file and returns the model type.
|
||||
|
||||
Args:
|
||||
p: path to the model file. Defaults to path/to/model.pt
|
||||
|
@ -1,16 +1,20 @@
|
||||
# Ultralytics YOLO 🚀, AGPL-3.0 license
|
||||
"""
|
||||
Ultralytics modules. Visualize with:
|
||||
Ultralytics modules.
|
||||
|
||||
from ultralytics.nn.modules import *
|
||||
import torch
|
||||
import os
|
||||
Example:
|
||||
Visualize a module with Netron.
|
||||
```python
|
||||
from ultralytics.nn.modules import *
|
||||
import torch
|
||||
import os
|
||||
|
||||
x = torch.ones(1, 128, 40, 40)
|
||||
m = Conv(128, 128)
|
||||
f = f'{m._get_name()}.onnx'
|
||||
torch.onnx.export(m, x, f)
|
||||
os.system(f'onnxsim {f} {f} && open {f}')
|
||||
x = torch.ones(1, 128, 40, 40)
|
||||
m = Conv(128, 128)
|
||||
f = f'{m._get_name()}.onnx'
|
||||
torch.onnx.export(m, x, f)
|
||||
os.system(f'onnxsim {f} {f} && open {f}')
|
||||
```
|
||||
"""
|
||||
|
||||
from .block import (C1, C2, C3, C3TR, DFL, SPP, SPPF, Bottleneck, BottleneckCSP, C2f, C3Ghost, C3x, GhostBottleneck,
|
||||
|
@ -1,7 +1,5 @@
|
||||
# Ultralytics YOLO 🚀, AGPL-3.0 license
|
||||
"""
|
||||
Block modules
|
||||
"""
|
||||
"""Block modules."""
|
||||
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
@ -17,6 +15,7 @@ __all__ = ('DFL', 'HGBlock', 'HGStem', 'SPP', 'SPPF', 'C1', 'C2', 'C3', 'C2f', '
|
||||
class DFL(nn.Module):
|
||||
"""
|
||||
Integral module of Distribution Focal Loss (DFL).
|
||||
|
||||
Proposed in Generalized Focal Loss https://ieeexplore.ieee.org/document/9792391
|
||||
"""
|
||||
|
||||
@ -51,11 +50,14 @@ class Proto(nn.Module):
|
||||
|
||||
|
||||
class HGStem(nn.Module):
|
||||
"""StemBlock of PPHGNetV2 with 5 convolutions and one maxpool2d.
|
||||
"""
|
||||
StemBlock of PPHGNetV2 with 5 convolutions and one maxpool2d.
|
||||
|
||||
https://github.com/PaddlePaddle/PaddleDetection/blob/develop/ppdet/modeling/backbones/hgnet_v2.py
|
||||
"""
|
||||
|
||||
def __init__(self, c1, cm, c2):
|
||||
"""Initialize the SPP layer with input/output channels and specified kernel sizes for max pooling."""
|
||||
super().__init__()
|
||||
self.stem1 = Conv(c1, cm, 3, 2, act=nn.ReLU())
|
||||
self.stem2a = Conv(cm, cm // 2, 2, 1, 0, act=nn.ReLU())
|
||||
@ -79,11 +81,14 @@ class HGStem(nn.Module):
|
||||
|
||||
|
||||
class HGBlock(nn.Module):
|
||||
"""HG_Block of PPHGNetV2 with 2 convolutions and LightConv.
|
||||
"""
|
||||
HG_Block of PPHGNetV2 with 2 convolutions and LightConv.
|
||||
|
||||
https://github.com/PaddlePaddle/PaddleDetection/blob/develop/ppdet/modeling/backbones/hgnet_v2.py
|
||||
"""
|
||||
|
||||
def __init__(self, c1, cm, c2, k=3, n=6, lightconv=False, shortcut=False, act=nn.ReLU()):
|
||||
"""Initializes a CSP Bottleneck with 1 convolution using specified input and output channels."""
|
||||
super().__init__()
|
||||
block = LightConv if lightconv else Conv
|
||||
self.m = nn.ModuleList(block(c1 if i == 0 else cm, cm, k=k, act=act) for i in range(n))
|
||||
@ -218,6 +223,7 @@ class RepC3(nn.Module):
|
||||
"""Rep C3."""
|
||||
|
||||
def __init__(self, c1, c2, n=3, e=1.0):
|
||||
"""Initialize CSP Bottleneck with a single convolution using input channels, output channels, and number."""
|
||||
super().__init__()
|
||||
c_ = int(c2 * e) # hidden channels
|
||||
self.cv1 = Conv(c1, c2, 1, 1)
|
||||
|
@ -1,7 +1,5 @@
|
||||
# Ultralytics YOLO 🚀, AGPL-3.0 license
|
||||
"""
|
||||
Convolution modules
|
||||
"""
|
||||
"""Convolution modules."""
|
||||
|
||||
import math
|
||||
|
||||
@ -69,7 +67,9 @@ class Conv2(Conv):
|
||||
|
||||
|
||||
class LightConv(nn.Module):
|
||||
"""Light convolution with args(ch_in, ch_out, kernel).
|
||||
"""
|
||||
Light convolution with args(ch_in, ch_out, kernel).
|
||||
|
||||
https://github.com/PaddlePaddle/PaddleDetection/blob/develop/ppdet/modeling/backbones/hgnet_v2.py
|
||||
"""
|
||||
|
||||
@ -148,12 +148,15 @@ class GhostConv(nn.Module):
|
||||
|
||||
class RepConv(nn.Module):
|
||||
"""
|
||||
RepConv is a basic rep-style block, including training and deploy status. This module is used in RT-DETR.
|
||||
RepConv is a basic rep-style block, including training and deploy status.
|
||||
|
||||
This module is used in RT-DETR.
|
||||
Based on https://github.com/DingXiaoH/RepVGG/blob/main/repvgg.py
|
||||
"""
|
||||
default_act = nn.SiLU() # default activation
|
||||
|
||||
def __init__(self, c1, c2, k=3, s=1, p=1, g=1, d=1, act=True, bn=False, deploy=False):
|
||||
"""Initializes Light Convolution layer with inputs, outputs & optional activation function."""
|
||||
super().__init__()
|
||||
assert k == 3 and p == 1
|
||||
self.g = g
|
||||
@ -166,27 +169,30 @@ class RepConv(nn.Module):
|
||||
self.conv2 = Conv(c1, c2, 1, s, p=(p - k // 2), g=g, act=False)
|
||||
|
||||
def forward_fuse(self, x):
|
||||
"""Forward process"""
|
||||
"""Forward process."""
|
||||
return self.act(self.conv(x))
|
||||
|
||||
def forward(self, x):
|
||||
"""Forward process"""
|
||||
"""Forward process."""
|
||||
id_out = 0 if self.bn is None else self.bn(x)
|
||||
return self.act(self.conv1(x) + self.conv2(x) + id_out)
|
||||
|
||||
def get_equivalent_kernel_bias(self):
|
||||
"""Returns equivalent kernel and bias by adding 3x3 kernel, 1x1 kernel and identity kernel with their biases."""
|
||||
kernel3x3, bias3x3 = self._fuse_bn_tensor(self.conv1)
|
||||
kernel1x1, bias1x1 = self._fuse_bn_tensor(self.conv2)
|
||||
kernelid, biasid = self._fuse_bn_tensor(self.bn)
|
||||
return kernel3x3 + self._pad_1x1_to_3x3_tensor(kernel1x1) + kernelid, bias3x3 + bias1x1 + biasid
|
||||
|
||||
def _pad_1x1_to_3x3_tensor(self, kernel1x1):
|
||||
"""Pads a 1x1 tensor to a 3x3 tensor."""
|
||||
if kernel1x1 is None:
|
||||
return 0
|
||||
else:
|
||||
return torch.nn.functional.pad(kernel1x1, [1, 1, 1, 1])
|
||||
|
||||
def _fuse_bn_tensor(self, branch):
|
||||
"""Generates appropriate kernels and biases for convolution by fusing branches of the neural network."""
|
||||
if branch is None:
|
||||
return 0, 0
|
||||
if isinstance(branch, Conv):
|
||||
@ -214,6 +220,7 @@ class RepConv(nn.Module):
|
||||
return kernel * t, beta - running_mean * gamma / std
|
||||
|
||||
def fuse_convs(self):
|
||||
"""Combines two convolution layers into a single layer and removes unused attributes from the class."""
|
||||
if hasattr(self, 'conv'):
|
||||
return
|
||||
kernel, bias = self.get_equivalent_kernel_bias()
|
||||
@ -243,12 +250,14 @@ class ChannelAttention(nn.Module):
|
||||
"""Channel-attention module https://github.com/open-mmlab/mmdetection/tree/v3.0.0rc1/configs/rtmdet."""
|
||||
|
||||
def __init__(self, channels: int) -> None:
|
||||
"""Initializes the class and sets the basic configurations and instance variables required."""
|
||||
super().__init__()
|
||||
self.pool = nn.AdaptiveAvgPool2d(1)
|
||||
self.fc = nn.Conv2d(channels, channels, 1, 1, 0, bias=True)
|
||||
self.act = nn.Sigmoid()
|
||||
|
||||
def forward(self, x: torch.Tensor) -> torch.Tensor:
|
||||
"""Applies forward pass using activation on convolutions of the input, optionally using batch normalization."""
|
||||
return x * self.act(self.fc(self.pool(x)))
|
||||
|
||||
|
||||
|
@ -1,7 +1,5 @@
|
||||
# Ultralytics YOLO 🚀, AGPL-3.0 license
|
||||
"""
|
||||
Model head modules
|
||||
"""
|
||||
"""Model head modules."""
|
||||
|
||||
import math
|
||||
|
||||
@ -229,6 +227,7 @@ class RTDETRDecoder(nn.Module):
|
||||
self._reset_parameters()
|
||||
|
||||
def forward(self, x, batch=None):
|
||||
"""Runs the forward pass of the module, returning bounding box and classification scores for the input."""
|
||||
from ultralytics.models.utils.ops import get_cdn_group
|
||||
|
||||
# input projection and embedding
|
||||
@ -265,6 +264,7 @@ class RTDETRDecoder(nn.Module):
|
||||
return y if self.export else (y, x)
|
||||
|
||||
def _generate_anchors(self, shapes, grid_size=0.05, dtype=torch.float32, device='cpu', eps=1e-2):
|
||||
"""Generates anchor bounding boxes for given shapes with specific grid size and validates them."""
|
||||
anchors = []
|
||||
for i, (h, w) in enumerate(shapes):
|
||||
sy = torch.arange(end=h, dtype=dtype, device=device)
|
||||
@ -284,6 +284,7 @@ class RTDETRDecoder(nn.Module):
|
||||
return anchors, valid_mask
|
||||
|
||||
def _get_encoder_input(self, x):
|
||||
"""Processes and returns encoder inputs by getting projection features from input and concatenating them."""
|
||||
# get projection features
|
||||
x = [self.input_proj[i](feat) for i, feat in enumerate(x)]
|
||||
# get encoder inputs
|
||||
@ -301,6 +302,7 @@ class RTDETRDecoder(nn.Module):
|
||||
return feats, shapes
|
||||
|
||||
def _get_decoder_input(self, feats, shapes, dn_embed=None, dn_bbox=None):
|
||||
"""Generates and prepares the input required for the decoder from the provided features and shapes."""
|
||||
bs = len(feats)
|
||||
# prepare input for decoder
|
||||
anchors, valid_mask = self._generate_anchors(shapes, dtype=feats.dtype, device=feats.device)
|
||||
@ -339,6 +341,7 @@ class RTDETRDecoder(nn.Module):
|
||||
|
||||
# TODO
|
||||
def _reset_parameters(self):
|
||||
"""Initializes or resets the parameters of the model's various components with predefined weights and biases."""
|
||||
# class and bbox head init
|
||||
bias_cls = bias_init_with_prob(0.01) / 80 * self.nc
|
||||
# NOTE: the weight initialization in `linear_init_` would cause NaN when training with custom datasets.
|
||||
|
@ -1,7 +1,5 @@
|
||||
# Ultralytics YOLO 🚀, AGPL-3.0 license
|
||||
"""
|
||||
Transformer modules
|
||||
"""
|
||||
"""Transformer modules."""
|
||||
|
||||
import math
|
||||
|
||||
@ -18,9 +16,10 @@ __all__ = ('TransformerEncoderLayer', 'TransformerLayer', 'TransformerBlock', 'M
|
||||
|
||||
|
||||
class TransformerEncoderLayer(nn.Module):
|
||||
"""Transformer Encoder."""
|
||||
"""Defines a single layer of the transformer encoder."""
|
||||
|
||||
def __init__(self, c1, cm=2048, num_heads=8, dropout=0.0, act=nn.GELU(), normalize_before=False):
|
||||
"""Initialize the TransformerEncoderLayer with specified parameters."""
|
||||
super().__init__()
|
||||
from ...utils.torch_utils import TORCH_1_9
|
||||
if not TORCH_1_9:
|
||||
@ -41,10 +40,11 @@ class TransformerEncoderLayer(nn.Module):
|
||||
self.normalize_before = normalize_before
|
||||
|
||||
def with_pos_embed(self, tensor, pos=None):
|
||||
"""Add position embeddings if given."""
|
||||
"""Add position embeddings to the tensor if provided."""
|
||||
return tensor if pos is None else tensor + pos
|
||||
|
||||
def forward_post(self, src, src_mask=None, src_key_padding_mask=None, pos=None):
|
||||
"""Performs forward pass with post-normalization."""
|
||||
q = k = self.with_pos_embed(src, pos)
|
||||
src2 = self.ma(q, k, value=src, attn_mask=src_mask, key_padding_mask=src_key_padding_mask)[0]
|
||||
src = src + self.dropout1(src2)
|
||||
@ -54,6 +54,7 @@ class TransformerEncoderLayer(nn.Module):
|
||||
return self.norm2(src)
|
||||
|
||||
def forward_pre(self, src, src_mask=None, src_key_padding_mask=None, pos=None):
|
||||
"""Performs forward pass with pre-normalization."""
|
||||
src2 = self.norm1(src)
|
||||
q = k = self.with_pos_embed(src2, pos)
|
||||
src2 = self.ma(q, k, value=src2, attn_mask=src_mask, key_padding_mask=src_key_padding_mask)[0]
|
||||
@ -70,11 +71,14 @@ class TransformerEncoderLayer(nn.Module):
|
||||
|
||||
|
||||
class AIFI(TransformerEncoderLayer):
|
||||
"""Defines the AIFI transformer layer."""
|
||||
|
||||
def __init__(self, c1, cm=2048, num_heads=8, dropout=0, act=nn.GELU(), normalize_before=False):
|
||||
"""Initialize the AIFI instance with specified parameters."""
|
||||
super().__init__(c1, cm, num_heads, dropout, act, normalize_before)
|
||||
|
||||
def forward(self, x):
|
||||
"""Forward pass for the AIFI transformer layer."""
|
||||
c, h, w = x.shape[1:]
|
||||
pos_embed = self.build_2d_sincos_position_embedding(w, h, c)
|
||||
# flatten [B, C, H, W] to [B, HxW, C]
|
||||
@ -82,7 +86,8 @@ class AIFI(TransformerEncoderLayer):
|
||||
return x.permute(0, 2, 1).view([-1, c, h, w]).contiguous()
|
||||
|
||||
@staticmethod
|
||||
def build_2d_sincos_position_embedding(w, h, embed_dim=256, temperature=10000.):
|
||||
def build_2d_sincos_position_embedding(w, h, embed_dim=256, temperature=10000.0):
|
||||
"""Builds 2D sine-cosine position embedding."""
|
||||
grid_w = torch.arange(int(w), dtype=torch.float32)
|
||||
grid_h = torch.arange(int(h), dtype=torch.float32)
|
||||
grid_w, grid_h = torch.meshgrid(grid_w, grid_h, indexing='ij')
|
||||
@ -140,27 +145,32 @@ class TransformerBlock(nn.Module):
|
||||
|
||||
|
||||
class MLPBlock(nn.Module):
|
||||
"""Implements a single block of a multi-layer perceptron."""
|
||||
|
||||
def __init__(self, embedding_dim, mlp_dim, act=nn.GELU):
|
||||
"""Initialize the MLPBlock with specified embedding dimension, MLP dimension, and activation function."""
|
||||
super().__init__()
|
||||
self.lin1 = nn.Linear(embedding_dim, mlp_dim)
|
||||
self.lin2 = nn.Linear(mlp_dim, embedding_dim)
|
||||
self.act = act()
|
||||
|
||||
def forward(self, x: torch.Tensor) -> torch.Tensor:
|
||||
"""Forward pass for the MLPBlock."""
|
||||
return self.lin2(self.act(self.lin1(x)))
|
||||
|
||||
|
||||
class MLP(nn.Module):
|
||||
""" Very simple multi-layer perceptron (also called FFN)"""
|
||||
"""Implements a simple multi-layer perceptron (also called FFN)."""
|
||||
|
||||
def __init__(self, input_dim, hidden_dim, output_dim, num_layers):
|
||||
"""Initialize the MLP with specified input, hidden, output dimensions and number of layers."""
|
||||
super().__init__()
|
||||
self.num_layers = num_layers
|
||||
h = [hidden_dim] * (num_layers - 1)
|
||||
self.layers = nn.ModuleList(nn.Linear(n, k) for n, k in zip([input_dim] + h, h + [output_dim]))
|
||||
|
||||
def forward(self, x):
|
||||
"""Forward pass for the entire MLP."""
|
||||
for i, layer in enumerate(self.layers):
|
||||
x = F.relu(layer(x)) if i < self.num_layers - 1 else layer(x)
|
||||
return x
|
||||
@ -168,17 +178,22 @@ class MLP(nn.Module):
|
||||
|
||||
class LayerNorm2d(nn.Module):
|
||||
"""
|
||||
LayerNorm2d module from https://github.com/facebookresearch/detectron2/blob/main/detectron2/layers/batch_norm.py
|
||||
2D Layer Normalization module inspired by Detectron2 and ConvNeXt implementations.
|
||||
|
||||
Original implementation at
|
||||
https://github.com/facebookresearch/detectron2/blob/main/detectron2/layers/batch_norm.py
|
||||
https://github.com/facebookresearch/ConvNeXt/blob/d1fa8f6fef0a165b27399986cc2bdacc92777e40/models/convnext.py#L119
|
||||
"""
|
||||
|
||||
def __init__(self, num_channels, eps=1e-6):
|
||||
"""Initialize LayerNorm2d with the given parameters."""
|
||||
super().__init__()
|
||||
self.weight = nn.Parameter(torch.ones(num_channels))
|
||||
self.bias = nn.Parameter(torch.zeros(num_channels))
|
||||
self.eps = eps
|
||||
|
||||
def forward(self, x):
|
||||
"""Perform forward pass for 2D layer normalization."""
|
||||
u = x.mean(1, keepdim=True)
|
||||
s = (x - u).pow(2).mean(1, keepdim=True)
|
||||
x = (x - u) / torch.sqrt(s + self.eps)
|
||||
@ -187,11 +202,13 @@ class LayerNorm2d(nn.Module):
|
||||
|
||||
class MSDeformAttn(nn.Module):
|
||||
"""
|
||||
Original Multi-Scale Deformable Attention Module.
|
||||
Multi-Scale Deformable Attention Module based on Deformable-DETR and PaddleDetection implementations.
|
||||
|
||||
https://github.com/fundamentalvision/Deformable-DETR/blob/main/models/ops/modules/ms_deform_attn.py
|
||||
"""
|
||||
|
||||
def __init__(self, d_model=256, n_levels=4, n_heads=8, n_points=4):
|
||||
"""Initialize MSDeformAttn with the given parameters."""
|
||||
super().__init__()
|
||||
if d_model % n_heads != 0:
|
||||
raise ValueError(f'd_model must be divisible by n_heads, but got {d_model} and {n_heads}')
|
||||
@ -214,6 +231,7 @@ class MSDeformAttn(nn.Module):
|
||||
self._reset_parameters()
|
||||
|
||||
def _reset_parameters(self):
|
||||
"""Reset module parameters."""
|
||||
constant_(self.sampling_offsets.weight.data, 0.)
|
||||
thetas = torch.arange(self.n_heads, dtype=torch.float32) * (2.0 * math.pi / self.n_heads)
|
||||
grid_init = torch.stack([thetas.cos(), thetas.sin()], -1)
|
||||
@ -232,7 +250,10 @@ class MSDeformAttn(nn.Module):
|
||||
|
||||
def forward(self, query, refer_bbox, value, value_shapes, value_mask=None):
|
||||
"""
|
||||
Perform forward pass for multi-scale deformable attention.
|
||||
|
||||
https://github.com/PaddlePaddle/PaddleDetection/blob/develop/ppdet/modeling/transformers/deformable_transformer.py
|
||||
|
||||
Args:
|
||||
query (torch.Tensor): [bs, query_length, C]
|
||||
refer_bbox (torch.Tensor): [bs, query_length, n_levels, 2], range in [0, 1], top-left (0,0),
|
||||
@ -272,24 +293,27 @@ class MSDeformAttn(nn.Module):
|
||||
|
||||
class DeformableTransformerDecoderLayer(nn.Module):
|
||||
"""
|
||||
Deformable Transformer Decoder Layer inspired by PaddleDetection and Deformable-DETR implementations.
|
||||
|
||||
https://github.com/PaddlePaddle/PaddleDetection/blob/develop/ppdet/modeling/transformers/deformable_transformer.py
|
||||
https://github.com/fundamentalvision/Deformable-DETR/blob/main/models/deformable_transformer.py
|
||||
"""
|
||||
|
||||
def __init__(self, d_model=256, n_heads=8, d_ffn=1024, dropout=0., act=nn.ReLU(), n_levels=4, n_points=4):
|
||||
"""Initialize the DeformableTransformerDecoderLayer with the given parameters."""
|
||||
super().__init__()
|
||||
|
||||
# self attention
|
||||
# Self attention
|
||||
self.self_attn = nn.MultiheadAttention(d_model, n_heads, dropout=dropout)
|
||||
self.dropout1 = nn.Dropout(dropout)
|
||||
self.norm1 = nn.LayerNorm(d_model)
|
||||
|
||||
# cross attention
|
||||
# Cross attention
|
||||
self.cross_attn = MSDeformAttn(d_model, n_levels, n_heads, n_points)
|
||||
self.dropout2 = nn.Dropout(dropout)
|
||||
self.norm2 = nn.LayerNorm(d_model)
|
||||
|
||||
# ffn
|
||||
# FFN
|
||||
self.linear1 = nn.Linear(d_model, d_ffn)
|
||||
self.act = act
|
||||
self.dropout3 = nn.Dropout(dropout)
|
||||
@ -299,37 +323,44 @@ class DeformableTransformerDecoderLayer(nn.Module):
|
||||
|
||||
@staticmethod
|
||||
def with_pos_embed(tensor, pos):
|
||||
"""Add positional embeddings to the input tensor, if provided."""
|
||||
return tensor if pos is None else tensor + pos
|
||||
|
||||
def forward_ffn(self, tgt):
|
||||
"""Perform forward pass through the Feed-Forward Network part of the layer."""
|
||||
tgt2 = self.linear2(self.dropout3(self.act(self.linear1(tgt))))
|
||||
tgt = tgt + self.dropout4(tgt2)
|
||||
return self.norm3(tgt)
|
||||
|
||||
def forward(self, embed, refer_bbox, feats, shapes, padding_mask=None, attn_mask=None, query_pos=None):
|
||||
# self attention
|
||||
"""Perform the forward pass through the entire decoder layer."""
|
||||
|
||||
# Self attention
|
||||
q = k = self.with_pos_embed(embed, query_pos)
|
||||
tgt = self.self_attn(q.transpose(0, 1), k.transpose(0, 1), embed.transpose(0, 1),
|
||||
attn_mask=attn_mask)[0].transpose(0, 1)
|
||||
embed = embed + self.dropout1(tgt)
|
||||
embed = self.norm1(embed)
|
||||
|
||||
# cross attention
|
||||
# Cross attention
|
||||
tgt = self.cross_attn(self.with_pos_embed(embed, query_pos), refer_bbox.unsqueeze(2), feats, shapes,
|
||||
padding_mask)
|
||||
embed = embed + self.dropout2(tgt)
|
||||
embed = self.norm2(embed)
|
||||
|
||||
# ffn
|
||||
# FFN
|
||||
return self.forward_ffn(embed)
|
||||
|
||||
|
||||
class DeformableTransformerDecoder(nn.Module):
|
||||
"""
|
||||
Implementation of Deformable Transformer Decoder based on PaddleDetection.
|
||||
|
||||
https://github.com/PaddlePaddle/PaddleDetection/blob/develop/ppdet/modeling/transformers/deformable_transformer.py
|
||||
"""
|
||||
|
||||
def __init__(self, hidden_dim, decoder_layer, num_layers, eval_idx=-1):
|
||||
"""Initialize the DeformableTransformerDecoder with the given parameters."""
|
||||
super().__init__()
|
||||
self.layers = _get_clones(decoder_layer, num_layers)
|
||||
self.num_layers = num_layers
|
||||
@ -347,6 +378,7 @@ class DeformableTransformerDecoder(nn.Module):
|
||||
pos_mlp,
|
||||
attn_mask=None,
|
||||
padding_mask=None):
|
||||
"""Perform the forward pass through the entire decoder."""
|
||||
output = embed
|
||||
dec_bboxes = []
|
||||
dec_cls = []
|
||||
|
@ -1,7 +1,5 @@
|
||||
# Ultralytics YOLO 🚀, AGPL-3.0 license
|
||||
"""
|
||||
Module utils
|
||||
"""
|
||||
"""Module utils."""
|
||||
|
||||
import copy
|
||||
import math
|
||||
@ -16,15 +14,17 @@ __all__ = 'multi_scale_deformable_attn_pytorch', 'inverse_sigmoid'
|
||||
|
||||
|
||||
def _get_clones(module, n):
|
||||
"""Create a list of cloned modules from the given module."""
|
||||
return nn.ModuleList([copy.deepcopy(module) for _ in range(n)])
|
||||
|
||||
|
||||
def bias_init_with_prob(prior_prob=0.01):
|
||||
"""initialize conv/fc bias value according to a given probability value."""
|
||||
"""Initialize conv/fc bias value according to a given probability value."""
|
||||
return float(-np.log((1 - prior_prob) / prior_prob)) # return bias_init
|
||||
|
||||
|
||||
def linear_init_(module):
|
||||
"""Initialize the weights and biases of a linear module."""
|
||||
bound = 1 / math.sqrt(module.weight.shape[0])
|
||||
uniform_(module.weight, -bound, bound)
|
||||
if hasattr(module, 'bias') and module.bias is not None:
|
||||
@ -32,6 +32,7 @@ def linear_init_(module):
|
||||
|
||||
|
||||
def inverse_sigmoid(x, eps=1e-5):
|
||||
"""Calculate the inverse sigmoid function for a tensor."""
|
||||
x = x.clamp(min=0, max=1)
|
||||
x1 = x.clamp(min=eps)
|
||||
x2 = (1 - x).clamp(min=eps)
|
||||
@ -43,6 +44,7 @@ def multi_scale_deformable_attn_pytorch(value: torch.Tensor, value_spatial_shape
|
||||
attention_weights: torch.Tensor) -> torch.Tensor:
|
||||
"""
|
||||
Multi-scale deformable attention.
|
||||
|
||||
https://github.com/IDEA-Research/detrex/blob/main/detrex/layers/multi_scale_deform_attn.py
|
||||
"""
|
||||
|
||||
|
@ -25,14 +25,11 @@ except ImportError:
|
||||
|
||||
|
||||
class BaseModel(nn.Module):
|
||||
"""
|
||||
The BaseModel class serves as a base class for all the models in the Ultralytics YOLO family.
|
||||
"""
|
||||
"""The BaseModel class serves as a base class for all the models in the Ultralytics YOLO family."""
|
||||
|
||||
def forward(self, x, *args, **kwargs):
|
||||
"""
|
||||
Forward pass of the model on a single scale.
|
||||
Wrapper for `_forward_once` method.
|
||||
Forward pass of the model on a single scale. Wrapper for `_forward_once` method.
|
||||
|
||||
Args:
|
||||
x (torch.Tensor | dict): The input image tensor or a dict including image tensor and gt labels.
|
||||
@ -93,8 +90,8 @@ class BaseModel(nn.Module):
|
||||
|
||||
def _profile_one_layer(self, m, x, dt):
|
||||
"""
|
||||
Profile the computation time and FLOPs of a single layer of the model on a given input.
|
||||
Appends the results to the provided list.
|
||||
Profile the computation time and FLOPs of a single layer of the model on a given input. Appends the results to
|
||||
the provided list.
|
||||
|
||||
Args:
|
||||
m (nn.Module): The layer to be profiled.
|
||||
@ -158,7 +155,7 @@ class BaseModel(nn.Module):
|
||||
|
||||
def info(self, detailed=False, verbose=True, imgsz=640):
|
||||
"""
|
||||
Prints model information
|
||||
Prints model information.
|
||||
|
||||
Args:
|
||||
detailed (bool): if True, prints out detailed information about the model. Defaults to False
|
||||
@ -175,7 +172,7 @@ class BaseModel(nn.Module):
|
||||
fn (function): the function to apply to the model
|
||||
|
||||
Returns:
|
||||
A model that is a Detect() object.
|
||||
(BaseModel): An updated BaseModel object.
|
||||
"""
|
||||
self = super()._apply(fn)
|
||||
m = self.model[-1] # Detect()
|
||||
@ -202,7 +199,7 @@ class BaseModel(nn.Module):
|
||||
|
||||
def loss(self, batch, preds=None):
|
||||
"""
|
||||
Compute loss
|
||||
Compute loss.
|
||||
|
||||
Args:
|
||||
batch (dict): Batch to compute loss on
|
||||
@ -215,6 +212,7 @@ class BaseModel(nn.Module):
|
||||
return self.criterion(preds, batch)
|
||||
|
||||
def init_criterion(self):
|
||||
"""Initialize the loss criterion for the BaseModel."""
|
||||
raise NotImplementedError('compute_loss() needs to be implemented by task heads')
|
||||
|
||||
|
||||
@ -222,6 +220,7 @@ class DetectionModel(BaseModel):
|
||||
"""YOLOv8 detection model."""
|
||||
|
||||
def __init__(self, cfg='yolov8n.yaml', ch=3, nc=None, verbose=True): # model, input channels, number of classes
|
||||
"""Initialize the YOLOv8 detection model with the given config and parameters."""
|
||||
super().__init__()
|
||||
self.yaml = cfg if isinstance(cfg, dict) else yaml_model_load(cfg) # cfg dict
|
||||
|
||||
@ -289,6 +288,7 @@ class DetectionModel(BaseModel):
|
||||
return y
|
||||
|
||||
def init_criterion(self):
|
||||
"""Initialize the loss criterion for the DetectionModel."""
|
||||
return v8DetectionLoss(self)
|
||||
|
||||
|
||||
@ -300,6 +300,7 @@ class SegmentationModel(DetectionModel):
|
||||
super().__init__(cfg=cfg, ch=ch, nc=nc, verbose=verbose)
|
||||
|
||||
def init_criterion(self):
|
||||
"""Initialize the loss criterion for the SegmentationModel."""
|
||||
return v8SegmentationLoss(self)
|
||||
|
||||
|
||||
@ -316,6 +317,7 @@ class PoseModel(DetectionModel):
|
||||
super().__init__(cfg=cfg, ch=ch, nc=nc, verbose=verbose)
|
||||
|
||||
def init_criterion(self):
|
||||
"""Initialize the loss criterion for the PoseModel."""
|
||||
return v8PoseLoss(self)
|
||||
|
||||
|
||||
@ -365,22 +367,59 @@ class ClassificationModel(BaseModel):
|
||||
m[i] = nn.Conv2d(m[i].in_channels, nc, m[i].kernel_size, m[i].stride, bias=m[i].bias is not None)
|
||||
|
||||
def init_criterion(self):
|
||||
"""Compute the classification loss between predictions and true labels."""
|
||||
"""Initialize the loss criterion for the ClassificationModel."""
|
||||
return v8ClassificationLoss()
|
||||
|
||||
|
||||
class RTDETRDetectionModel(DetectionModel):
|
||||
"""
|
||||
RTDETR (Real-time DEtection and Tracking using Transformers) Detection Model class.
|
||||
|
||||
This class is responsible for constructing the RTDETR architecture, defining loss functions, and
|
||||
facilitating both the training and inference processes. RTDETR is an object detection and tracking model
|
||||
that extends from the DetectionModel base class.
|
||||
|
||||
Attributes:
|
||||
cfg (str): The configuration file path or preset string. Default is 'rtdetr-l.yaml'.
|
||||
ch (int): Number of input channels. Default is 3 (RGB).
|
||||
nc (int, optional): Number of classes for object detection. Default is None.
|
||||
verbose (bool): Specifies if summary statistics are shown during initialization. Default is True.
|
||||
|
||||
Methods:
|
||||
init_criterion: Initializes the criterion used for loss calculation.
|
||||
loss: Computes and returns the loss during training.
|
||||
predict: Performs a forward pass through the network and returns the output.
|
||||
"""
|
||||
|
||||
def __init__(self, cfg='rtdetr-l.yaml', ch=3, nc=None, verbose=True):
|
||||
"""
|
||||
Initialize the RTDETRDetectionModel.
|
||||
|
||||
Args:
|
||||
cfg (str): Configuration file name or path.
|
||||
ch (int): Number of input channels.
|
||||
nc (int, optional): Number of classes. Defaults to None.
|
||||
verbose (bool, optional): Print additional information during initialization. Defaults to True.
|
||||
"""
|
||||
super().__init__(cfg=cfg, ch=ch, nc=nc, verbose=verbose)
|
||||
|
||||
def init_criterion(self):
|
||||
"""Compute the classification loss between predictions and true labels."""
|
||||
"""Initialize the loss criterion for the RTDETRDetectionModel."""
|
||||
from ultralytics.models.utils.loss import RTDETRDetectionLoss
|
||||
|
||||
return RTDETRDetectionLoss(nc=self.nc, use_vfl=True)
|
||||
|
||||
def loss(self, batch, preds=None):
|
||||
"""
|
||||
Compute the loss for the given batch of data.
|
||||
|
||||
Args:
|
||||
batch (dict): Dictionary containing image and label data.
|
||||
preds (torch.Tensor, optional): Precomputed model predictions. Defaults to None.
|
||||
|
||||
Returns:
|
||||
tuple: A tuple containing the total loss and main three losses in a tensor.
|
||||
"""
|
||||
if not hasattr(self, 'criterion'):
|
||||
self.criterion = self.init_criterion()
|
||||
|
||||
@ -417,16 +456,17 @@ class RTDETRDetectionModel(DetectionModel):
|
||||
|
||||
def predict(self, x, profile=False, visualize=False, batch=None, augment=False):
|
||||
"""
|
||||
Perform a forward pass through the network.
|
||||
Perform a forward pass through the model.
|
||||
|
||||
Args:
|
||||
x (torch.Tensor): The input tensor to the model
|
||||
profile (bool): Print the computation time of each layer if True, defaults to False.
|
||||
visualize (bool): Save the feature maps of the model if True, defaults to False
|
||||
batch (dict): A dict including gt boxes and labels from dataloader.
|
||||
x (torch.Tensor): The input tensor.
|
||||
profile (bool, optional): If True, profile the computation time for each layer. Defaults to False.
|
||||
visualize (bool, optional): If True, save feature maps for visualization. Defaults to False.
|
||||
batch (dict, optional): Ground truth data for evaluation. Defaults to None.
|
||||
augment (bool, optional): If True, perform data augmentation during inference. Defaults to False.
|
||||
|
||||
Returns:
|
||||
(torch.Tensor): The last output of the model.
|
||||
torch.Tensor: Model's output tensor.
|
||||
"""
|
||||
y, dt = [], [] # outputs
|
||||
for m in self.model[:-1]: # except the head part
|
||||
@ -708,9 +748,9 @@ def yaml_model_load(path):
|
||||
|
||||
def guess_model_scale(model_path):
|
||||
"""
|
||||
Takes a path to a YOLO model's YAML file as input and extracts the size character of the model's scale.
|
||||
The function uses regular expression matching to find the pattern of the model scale in the YAML file name,
|
||||
which is denoted by n, s, m, l, or x. The function returns the size character of the model scale as a string.
|
||||
Takes a path to a YOLO model's YAML file as input and extracts the size character of the model's scale. The function
|
||||
uses regular expression matching to find the pattern of the model scale in the YAML file name, which is denoted by
|
||||
n, s, m, l, or x. The function returns the size character of the model scale as a string.
|
||||
|
||||
Args:
|
||||
model_path (str | Path): The path to the YOLO model's YAML file.
|
||||
|
@ -12,6 +12,33 @@ from .utils.kalman_filter import KalmanFilterXYWH
|
||||
|
||||
|
||||
class BOTrack(STrack):
|
||||
"""
|
||||
An extended version of the STrack class for YOLOv8, adding object tracking features.
|
||||
|
||||
Attributes:
|
||||
shared_kalman (KalmanFilterXYWH): A shared Kalman filter for all instances of BOTrack.
|
||||
smooth_feat (np.ndarray): Smoothed feature vector.
|
||||
curr_feat (np.ndarray): Current feature vector.
|
||||
features (deque): A deque to store feature vectors with a maximum length defined by `feat_history`.
|
||||
alpha (float): Smoothing factor for the exponential moving average of features.
|
||||
mean (np.ndarray): The mean state of the Kalman filter.
|
||||
covariance (np.ndarray): The covariance matrix of the Kalman filter.
|
||||
|
||||
Methods:
|
||||
update_features(feat): Update features vector and smooth it using exponential moving average.
|
||||
predict(): Predicts the mean and covariance using Kalman filter.
|
||||
re_activate(new_track, frame_id, new_id): Reactivates a track with updated features and optionally new ID.
|
||||
update(new_track, frame_id): Update the YOLOv8 instance with new track and frame ID.
|
||||
tlwh: Property that gets the current position in tlwh format `(top left x, top left y, width, height)`.
|
||||
multi_predict(stracks): Predicts the mean and covariance of multiple object tracks using shared Kalman filter.
|
||||
convert_coords(tlwh): Converts tlwh bounding box coordinates to xywh format.
|
||||
tlwh_to_xywh(tlwh): Convert bounding box to xywh format `(center x, center y, width, height)`.
|
||||
|
||||
Usage:
|
||||
bo_track = BOTrack(tlwh, score, cls, feat)
|
||||
bo_track.predict()
|
||||
bo_track.update(new_track, frame_id)
|
||||
"""
|
||||
shared_kalman = KalmanFilterXYWH()
|
||||
|
||||
def __init__(self, tlwh, score, cls, feat=None, feat_history=50):
|
||||
@ -59,9 +86,7 @@ class BOTrack(STrack):
|
||||
|
||||
@property
|
||||
def tlwh(self):
|
||||
"""Get current position in bounding box format `(top left x, top left y,
|
||||
width, height)`.
|
||||
"""
|
||||
"""Get current position in bounding box format `(top left x, top left y, width, height)`."""
|
||||
if self.mean is None:
|
||||
return self._tlwh.copy()
|
||||
ret = self.mean[:4].copy()
|
||||
@ -90,15 +115,37 @@ class BOTrack(STrack):
|
||||
|
||||
@staticmethod
|
||||
def tlwh_to_xywh(tlwh):
|
||||
"""Convert bounding box to format `(center x, center y, width,
|
||||
height)`.
|
||||
"""
|
||||
"""Convert bounding box to format `(center x, center y, width, height)`."""
|
||||
ret = np.asarray(tlwh).copy()
|
||||
ret[:2] += ret[2:] / 2
|
||||
return ret
|
||||
|
||||
|
||||
class BOTSORT(BYTETracker):
|
||||
"""
|
||||
An extended version of the BYTETracker class for YOLOv8, designed for object tracking with ReID and GMC algorithm.
|
||||
|
||||
Attributes:
|
||||
proximity_thresh (float): Threshold for spatial proximity (IoU) between tracks and detections.
|
||||
appearance_thresh (float): Threshold for appearance similarity (ReID embeddings) between tracks and detections.
|
||||
encoder (object): Object to handle ReID embeddings, set to None if ReID is not enabled.
|
||||
gmc (GMC): An instance of the GMC algorithm for data association.
|
||||
args (object): Parsed command-line arguments containing tracking parameters.
|
||||
|
||||
Methods:
|
||||
get_kalmanfilter(): Returns an instance of KalmanFilterXYWH for object tracking.
|
||||
init_track(dets, scores, cls, img): Initialize track with detections, scores, and classes.
|
||||
get_dists(tracks, detections): Get distances between tracks and detections using IoU and (optionally) ReID.
|
||||
multi_predict(tracks): Predict and track multiple objects with YOLOv8 model.
|
||||
|
||||
Usage:
|
||||
bot_sort = BOTSORT(args, frame_rate)
|
||||
bot_sort.init_track(dets, scores, cls, img)
|
||||
bot_sort.multi_predict(tracks)
|
||||
|
||||
Note:
|
||||
The class is designed to work with the YOLOv8 object detection model and supports ReID only if enabled via args.
|
||||
"""
|
||||
|
||||
def __init__(self, args, frame_rate=30):
|
||||
"""Initialize YOLOv8 object with ReID module and GMC algorithm."""
|
||||
|
@ -8,10 +8,43 @@ from .utils.kalman_filter import KalmanFilterXYAH
|
||||
|
||||
|
||||
class STrack(BaseTrack):
|
||||
"""
|
||||
Single object tracking representation that uses Kalman filtering for state estimation.
|
||||
|
||||
This class is responsible for storing all the information regarding individual tracklets and performs state updates
|
||||
and predictions based on Kalman filter.
|
||||
|
||||
Attributes:
|
||||
shared_kalman (KalmanFilterXYAH): Shared Kalman filter that is used across all STrack instances for prediction.
|
||||
_tlwh (np.ndarray): Private attribute to store top-left corner coordinates and width and height of bounding box.
|
||||
kalman_filter (KalmanFilterXYAH): Instance of Kalman filter used for this particular object track.
|
||||
mean (np.ndarray): Mean state estimate vector.
|
||||
covariance (np.ndarray): Covariance of state estimate.
|
||||
is_activated (bool): Boolean flag indicating if the track has been activated.
|
||||
score (float): Confidence score of the track.
|
||||
tracklet_len (int): Length of the tracklet.
|
||||
cls (any): Class label for the object.
|
||||
idx (int): Index or identifier for the object.
|
||||
frame_id (int): Current frame ID.
|
||||
start_frame (int): Frame where the object was first detected.
|
||||
|
||||
Methods:
|
||||
predict(): Predict the next state of the object using Kalman filter.
|
||||
multi_predict(stracks): Predict the next states for multiple tracks.
|
||||
multi_gmc(stracks, H): Update multiple track states using a homography matrix.
|
||||
activate(kalman_filter, frame_id): Activate a new tracklet.
|
||||
re_activate(new_track, frame_id, new_id): Reactivate a previously lost tracklet.
|
||||
update(new_track, frame_id): Update the state of a matched track.
|
||||
convert_coords(tlwh): Convert bounding box to x-y-angle-height format.
|
||||
tlwh_to_xyah(tlwh): Convert tlwh bounding box to xyah format.
|
||||
tlbr_to_tlwh(tlbr): Convert tlbr bounding box to tlwh format.
|
||||
tlwh_to_tlbr(tlwh): Convert tlwh bounding box to tlbr format.
|
||||
"""
|
||||
|
||||
shared_kalman = KalmanFilterXYAH()
|
||||
|
||||
def __init__(self, tlwh, score, cls):
|
||||
"""wait activate."""
|
||||
"""Initialize new STrack instance."""
|
||||
self._tlwh = np.asarray(self.tlbr_to_tlwh(tlwh[:-1]), dtype=np.float32)
|
||||
self.kalman_filter = None
|
||||
self.mean, self.covariance = None, None
|
||||
@ -92,10 +125,11 @@ class STrack(BaseTrack):
|
||||
|
||||
def update(self, new_track, frame_id):
|
||||
"""
|
||||
Update a matched track
|
||||
:type new_track: STrack
|
||||
:type frame_id: int
|
||||
:return:
|
||||
Update the state of a matched track.
|
||||
|
||||
Args:
|
||||
new_track (STrack): The new track containing updated information.
|
||||
frame_id (int): The ID of the current frame.
|
||||
"""
|
||||
self.frame_id = frame_id
|
||||
self.tracklet_len += 1
|
||||
@ -116,9 +150,7 @@ class STrack(BaseTrack):
|
||||
|
||||
@property
|
||||
def tlwh(self):
|
||||
"""Get current position in bounding box format `(top left x, top left y,
|
||||
width, height)`.
|
||||
"""
|
||||
"""Get current position in bounding box format (top left x, top left y, width, height)."""
|
||||
if self.mean is None:
|
||||
return self._tlwh.copy()
|
||||
ret = self.mean[:4].copy()
|
||||
@ -128,17 +160,15 @@ class STrack(BaseTrack):
|
||||
|
||||
@property
|
||||
def tlbr(self):
|
||||
"""Convert bounding box to format `(min x, min y, max x, max y)`, i.e.,
|
||||
`(top left, bottom right)`.
|
||||
"""
|
||||
"""Convert bounding box to format (min x, min y, max x, max y), i.e., (top left, bottom right)."""
|
||||
ret = self.tlwh.copy()
|
||||
ret[2:] += ret[:2]
|
||||
return ret
|
||||
|
||||
@staticmethod
|
||||
def tlwh_to_xyah(tlwh):
|
||||
"""Convert bounding box to format `(center x, center y, aspect ratio,
|
||||
height)`, where the aspect ratio is `width / height`.
|
||||
"""Convert bounding box to format (center x, center y, aspect ratio, height), where the aspect ratio is width /
|
||||
height.
|
||||
"""
|
||||
ret = np.asarray(tlwh).copy()
|
||||
ret[:2] += ret[2:] / 2
|
||||
@ -165,6 +195,33 @@ class STrack(BaseTrack):
|
||||
|
||||
|
||||
class BYTETracker:
|
||||
"""
|
||||
BYTETracker: A tracking algorithm built on top of YOLOv8 for object detection and tracking.
|
||||
|
||||
The class is responsible for initializing, updating, and managing the tracks for detected objects in a video
|
||||
sequence. It maintains the state of tracked, lost, and removed tracks over frames, utilizes Kalman filtering for
|
||||
predicting the new object locations, and performs data association.
|
||||
|
||||
Attributes:
|
||||
tracked_stracks (list[STrack]): List of successfully activated tracks.
|
||||
lost_stracks (list[STrack]): List of lost tracks.
|
||||
removed_stracks (list[STrack]): List of removed tracks.
|
||||
frame_id (int): The current frame ID.
|
||||
args (namespace): Command-line arguments.
|
||||
max_time_lost (int): The maximum frames for a track to be considered as 'lost'.
|
||||
kalman_filter (object): Kalman Filter object.
|
||||
|
||||
Methods:
|
||||
update(results, img=None): Updates object tracker with new detections.
|
||||
get_kalmanfilter(): Returns a Kalman filter object for tracking bounding boxes.
|
||||
init_track(dets, scores, cls, img=None): Initialize object tracking with detections.
|
||||
get_dists(tracks, detections): Calculates the distance between tracks and detections.
|
||||
multi_predict(tracks): Predicts the location of tracks.
|
||||
reset_id(): Resets the ID counter of STrack.
|
||||
joint_stracks(tlista, tlistb): Combines two lists of stracks.
|
||||
sub_stracks(tlista, tlistb): Filters out the stracks present in the second list from the first list.
|
||||
remove_duplicate_stracks(stracksa, stracksb): Removes duplicate stracks based on IOU.
|
||||
"""
|
||||
|
||||
def __init__(self, args, frame_rate=30):
|
||||
"""Initialize a YOLOv8 object to track objects with given arguments and frame rate."""
|
||||
@ -234,8 +291,7 @@ class BYTETracker:
|
||||
else:
|
||||
track.re_activate(det, self.frame_id, new_id=False)
|
||||
refind_stracks.append(track)
|
||||
# Step 3: Second association, with low score detection boxes
|
||||
# association the untrack to the low score detections
|
||||
# Step 3: Second association, with low score detection boxes association the untrack to the low score detections
|
||||
detections_second = self.init_track(dets_second, scores_second, cls_second, img)
|
||||
r_tracked_stracks = [strack_pool[i] for i in u_track if strack_pool[i].state == TrackState.Tracked]
|
||||
# TODO
|
||||
|
@ -60,7 +60,6 @@ def register_tracker(model, persist):
|
||||
Args:
|
||||
model (object): The model object to register tracking callbacks for.
|
||||
persist (bool): Whether to persist the trackers if they already exist.
|
||||
|
||||
"""
|
||||
model.add_callback('on_predict_start', partial(on_predict_start, persist=persist))
|
||||
model.add_callback('on_predict_postprocess_end', on_predict_postprocess_end)
|
||||
|
@ -9,6 +9,29 @@ from ultralytics.utils import LOGGER
|
||||
|
||||
|
||||
class GMC:
|
||||
"""
|
||||
Generalized Motion Compensation (GMC) class for tracking and object detection in video frames.
|
||||
|
||||
This class provides methods for tracking and detecting objects based on several tracking algorithms including ORB,
|
||||
SIFT, ECC, and Sparse Optical Flow. It also supports downscaling of frames for computational efficiency.
|
||||
|
||||
Attributes:
|
||||
method (str): The method used for tracking. Options include 'orb', 'sift', 'ecc', 'sparseOptFlow', 'none'.
|
||||
downscale (int): Factor by which to downscale the frames for processing.
|
||||
prevFrame (np.array): Stores the previous frame for tracking.
|
||||
prevKeyPoints (list): Stores the keypoints from the previous frame.
|
||||
prevDescriptors (np.array): Stores the descriptors from the previous frame.
|
||||
initializedFirstFrame (bool): Flag to indicate if the first frame has been processed.
|
||||
|
||||
Methods:
|
||||
__init__(self, method='sparseOptFlow', downscale=2): Initializes a GMC object with the specified method
|
||||
and downscale factor.
|
||||
apply(self, raw_frame, detections=None): Applies the chosen method to a raw frame and optionally uses
|
||||
provided detections.
|
||||
applyEcc(self, raw_frame, detections=None): Applies the ECC algorithm to a raw frame.
|
||||
applyFeatures(self, raw_frame, detections=None): Applies feature-based methods like ORB or SIFT to a raw frame.
|
||||
applySparseOptFlow(self, raw_frame, detections=None): Applies the Sparse Optical Flow method to a raw frame.
|
||||
"""
|
||||
|
||||
def __init__(self, method='sparseOptFlow', downscale=2):
|
||||
"""Initialize a video tracker with specified parameters."""
|
||||
|
@ -8,8 +8,8 @@ class KalmanFilterXYAH:
|
||||
"""
|
||||
For bytetrack. A simple Kalman filter for tracking bounding boxes in image space.
|
||||
|
||||
The 8-dimensional state space (x, y, a, h, vx, vy, va, vh) contains the bounding box center position (x, y),
|
||||
aspect ratio a, height h, and their respective velocities.
|
||||
The 8-dimensional state space (x, y, a, h, vx, vy, va, vh) contains the bounding box center position (x, y), aspect
|
||||
ratio a, height h, and their respective velocities.
|
||||
|
||||
Object motion follows a constant velocity model. The bounding box location (x, y, a, h) is taken as direct
|
||||
observation of the state space (linear observation model).
|
||||
@ -182,8 +182,8 @@ class KalmanFilterXYAH:
|
||||
def gating_distance(self, mean, covariance, measurements, only_position=False, metric='maha'):
|
||||
"""
|
||||
Compute gating distance between state distribution and measurements. A suitable distance threshold can be
|
||||
obtained from `chi2inv95`. If `only_position` is False, the chi-square distribution has 4 degrees of
|
||||
freedom, otherwise 2.
|
||||
obtained from `chi2inv95`. If `only_position` is False, the chi-square distribution has 4 degrees of freedom,
|
||||
otherwise 2.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
@ -223,8 +223,8 @@ class KalmanFilterXYWH(KalmanFilterXYAH):
|
||||
"""
|
||||
For BoT-SORT. A simple Kalman filter for tracking bounding boxes in image space.
|
||||
|
||||
The 8-dimensional state space (x, y, w, h, vx, vy, vw, vh) contains the bounding box center position (x, y),
|
||||
width w, height h, and their respective velocities.
|
||||
The 8-dimensional state space (x, y, w, h, vx, vy, vw, vh) contains the bounding box center position (x, y), width
|
||||
w, height h, and their respective velocities.
|
||||
|
||||
Object motion follows a constant velocity model. The bounding box location (x, y, w, h) is taken as direct
|
||||
observation of the state space (linear observation model).
|
||||
|
@ -117,6 +117,7 @@ class TQDM(tqdm_original):
|
||||
"""
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
"""Initialize custom Ultralytics tqdm class with different default arguments."""
|
||||
# Set new default values (these can still be overridden when calling TQDM)
|
||||
kwargs['disable'] = not VERBOSE or kwargs.get('disable', False) # logical 'and' with default value if passed
|
||||
kwargs.setdefault('bar_format', TQDM_BAR_FORMAT) # override default value if passed
|
||||
@ -124,8 +125,7 @@ class TQDM(tqdm_original):
|
||||
|
||||
|
||||
class SimpleClass:
|
||||
"""
|
||||
Ultralytics SimpleClass is a base class providing helpful string representation, error reporting, and attribute
|
||||
"""Ultralytics SimpleClass is a base class providing helpful string representation, error reporting, and attribute
|
||||
access methods for easier debugging and usage.
|
||||
"""
|
||||
|
||||
@ -154,8 +154,7 @@ class SimpleClass:
|
||||
|
||||
|
||||
class IterableSimpleNamespace(SimpleNamespace):
|
||||
"""
|
||||
Ultralytics IterableSimpleNamespace is an extension class of SimpleNamespace that adds iterable functionality and
|
||||
"""Ultralytics IterableSimpleNamespace is an extension class of SimpleNamespace that adds iterable functionality and
|
||||
enables usage with dict() and for loops.
|
||||
"""
|
||||
|
||||
@ -256,8 +255,8 @@ class EmojiFilter(logging.Filter):
|
||||
"""
|
||||
A custom logging filter class for removing emojis in log messages.
|
||||
|
||||
This filter is particularly useful for ensuring compatibility with Windows terminals
|
||||
that may not support the display of emojis in log messages.
|
||||
This filter is particularly useful for ensuring compatibility with Windows terminals that may not support the
|
||||
display of emojis in log messages.
|
||||
"""
|
||||
|
||||
def filter(self, record):
|
||||
@ -275,9 +274,9 @@ if WINDOWS: # emoji-safe logging
|
||||
|
||||
class ThreadingLocked:
|
||||
"""
|
||||
A decorator class for ensuring thread-safe execution of a function or method.
|
||||
This class can be used as a decorator to make sure that if the decorated function
|
||||
is called from multiple threads, only one thread at a time will be able to execute the function.
|
||||
A decorator class for ensuring thread-safe execution of a function or method. This class can be used as a decorator
|
||||
to make sure that if the decorated function is called from multiple threads, only one thread at a time will be able
|
||||
to execute the function.
|
||||
|
||||
Attributes:
|
||||
lock (threading.Lock): A lock object used to manage access to the decorated function.
|
||||
@ -294,13 +293,16 @@ class ThreadingLocked:
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
"""Initializes the decorator class for thread-safe execution of a function or method."""
|
||||
self.lock = threading.Lock()
|
||||
|
||||
def __call__(self, f):
|
||||
"""Run thread-safe execution of function or method."""
|
||||
from functools import wraps
|
||||
|
||||
@wraps(f)
|
||||
def decorated(*args, **kwargs):
|
||||
"""Applies thread-safety to the decorated function or method."""
|
||||
with self.lock:
|
||||
return f(*args, **kwargs)
|
||||
|
||||
@ -424,8 +426,7 @@ def is_kaggle():
|
||||
|
||||
def is_jupyter():
|
||||
"""
|
||||
Check if the current script is running inside a Jupyter Notebook.
|
||||
Verified on Colab, Jupyterlab, Kaggle, Paperspace.
|
||||
Check if the current script is running inside a Jupyter Notebook. Verified on Colab, Jupyterlab, Kaggle, Paperspace.
|
||||
|
||||
Returns:
|
||||
(bool): True if running inside a Jupyter Notebook, False otherwise.
|
||||
@ -529,8 +530,8 @@ def is_github_actions_ci() -> bool:
|
||||
|
||||
def is_git_dir():
|
||||
"""
|
||||
Determines whether the current file is part of a git repository.
|
||||
If the current file is not part of a git repository, returns None.
|
||||
Determines whether the current file is part of a git repository. If the current file is not part of a git
|
||||
repository, returns None.
|
||||
|
||||
Returns:
|
||||
(bool): True if current file is part of a git repository.
|
||||
@ -540,8 +541,8 @@ def is_git_dir():
|
||||
|
||||
def get_git_dir():
|
||||
"""
|
||||
Determines whether the current file is part of a git repository and if so, returns the repository root directory.
|
||||
If the current file is not part of a git repository, returns None.
|
||||
Determines whether the current file is part of a git repository and if so, returns the repository root directory. If
|
||||
the current file is not part of a git repository, returns None.
|
||||
|
||||
Returns:
|
||||
(Path | None): Git root directory if found or None if not found.
|
||||
@ -578,7 +579,8 @@ def get_git_branch():
|
||||
|
||||
|
||||
def get_default_args(func):
|
||||
"""Returns a dictionary of default arguments for a function.
|
||||
"""
|
||||
Returns a dictionary of default arguments for a function.
|
||||
|
||||
Args:
|
||||
func (callable): The function to inspect.
|
||||
@ -710,7 +712,11 @@ def remove_colorstr(input_string):
|
||||
|
||||
|
||||
class TryExcept(contextlib.ContextDecorator):
|
||||
"""YOLOv8 TryExcept class. Usage: @TryExcept() decorator or 'with TryExcept():' context manager."""
|
||||
"""
|
||||
YOLOv8 TryExcept class.
|
||||
|
||||
Use as @TryExcept() decorator or 'with TryExcept():' context manager.
|
||||
"""
|
||||
|
||||
def __init__(self, msg='', verbose=True):
|
||||
"""Initialize TryExcept class with optional message and verbosity settings."""
|
||||
@ -729,7 +735,11 @@ class TryExcept(contextlib.ContextDecorator):
|
||||
|
||||
|
||||
def threaded(func):
|
||||
"""Multi-threads a target function and returns thread. Usage: @threaded decorator."""
|
||||
"""
|
||||
Multi-threads a target function and returns thread.
|
||||
|
||||
Use as @threaded decorator.
|
||||
"""
|
||||
|
||||
def wrapper(*args, **kwargs):
|
||||
"""Multi-threads a given function and returns the thread."""
|
||||
@ -824,6 +834,9 @@ class SettingsManager(dict):
|
||||
"""
|
||||
|
||||
def __init__(self, file=SETTINGS_YAML, version='0.0.4'):
|
||||
"""Initialize the SettingsManager with default settings, load and validate current settings from the YAML
|
||||
file.
|
||||
"""
|
||||
import copy
|
||||
import hashlib
|
||||
|
||||
|
@ -1,7 +1,5 @@
|
||||
# Ultralytics YOLO 🚀, AGPL-3.0 license
|
||||
"""
|
||||
Functions for estimating the best YOLO batch size to use a fraction of the available CUDA memory in PyTorch.
|
||||
"""
|
||||
"""Functions for estimating the best YOLO batch size to use a fraction of the available CUDA memory in PyTorch."""
|
||||
|
||||
from copy import deepcopy
|
||||
|
||||
|
@ -1,6 +1,6 @@
|
||||
# Ultralytics YOLO 🚀, AGPL-3.0 license
|
||||
"""
|
||||
Benchmark a YOLO model formats for speed and accuracy
|
||||
Benchmark a YOLO model formats for speed and accuracy.
|
||||
|
||||
Usage:
|
||||
from ultralytics.utils.benchmarks import ProfileModels, benchmark
|
||||
@ -194,6 +194,7 @@ class ProfileModels:
|
||||
self.device = device or torch.device(0 if torch.cuda.is_available() else 'cpu')
|
||||
|
||||
def profile(self):
|
||||
"""Logs the benchmarking results of a model, checks metrics against floor and returns the results."""
|
||||
files = self.get_files()
|
||||
|
||||
if not files:
|
||||
@ -235,6 +236,7 @@ class ProfileModels:
|
||||
return output
|
||||
|
||||
def get_files(self):
|
||||
"""Returns a list of paths for all relevant model files given by the user."""
|
||||
files = []
|
||||
for path in self.paths:
|
||||
path = Path(path)
|
||||
@ -250,10 +252,14 @@ class ProfileModels:
|
||||
return [Path(file) for file in sorted(files)]
|
||||
|
||||
def get_onnx_model_info(self, onnx_file: str):
|
||||
"""Retrieves the information including number of layers, parameters, gradients and FLOPs for an ONNX model
|
||||
file.
|
||||
"""
|
||||
# return (num_layers, num_params, num_gradients, num_flops)
|
||||
return 0.0, 0.0, 0.0, 0.0
|
||||
|
||||
def iterative_sigma_clipping(self, data, sigma=2, max_iters=3):
|
||||
"""Applies an iterative sigma clipping algorithm to the given data times number of iterations."""
|
||||
data = np.array(data)
|
||||
for _ in range(max_iters):
|
||||
mean, std = np.mean(data), np.std(data)
|
||||
@ -264,6 +270,7 @@ class ProfileModels:
|
||||
return data
|
||||
|
||||
def profile_tensorrt_model(self, engine_file: str, eps: float = 1e-3):
|
||||
"""Profiles the TensorRT model, measuring average run time and standard deviation among runs."""
|
||||
if not self.trt or not Path(engine_file).is_file():
|
||||
return 0.0, 0.0
|
||||
|
||||
@ -292,6 +299,9 @@ class ProfileModels:
|
||||
return np.mean(run_times), np.std(run_times)
|
||||
|
||||
def profile_onnx_model(self, onnx_file: str, eps: float = 1e-3):
|
||||
"""Profiles an ONNX model by executing it multiple times and returns the mean and standard deviation of run
|
||||
times.
|
||||
"""
|
||||
check_requirements('onnxruntime')
|
||||
import onnxruntime as ort
|
||||
|
||||
@ -344,10 +354,12 @@ class ProfileModels:
|
||||
return np.mean(run_times), np.std(run_times)
|
||||
|
||||
def generate_table_row(self, model_name, t_onnx, t_engine, model_info):
|
||||
"""Generates a formatted string for a table row that includes model performance and metric details."""
|
||||
layers, params, gradients, flops = model_info
|
||||
return f'| {model_name:18s} | {self.imgsz} | - | {t_onnx[0]:.2f} ± {t_onnx[1]:.2f} ms | {t_engine[0]:.2f} ± {t_engine[1]:.2f} ms | {params / 1e6:.1f} | {flops:.1f} |'
|
||||
|
||||
def generate_results_dict(self, model_name, t_onnx, t_engine, model_info):
|
||||
"""Generates a dictionary of model details including name, parameters, GFLOPS and speed metrics."""
|
||||
layers, params, gradients, flops = model_info
|
||||
return {
|
||||
'model/name': model_name,
|
||||
@ -357,6 +369,7 @@ class ProfileModels:
|
||||
'model/speed_TensorRT(ms)': round(t_engine[0], 3)}
|
||||
|
||||
def print_table(self, table_rows):
|
||||
"""Formats and prints a comparison table for different models with given statistics and performance data."""
|
||||
gpu = torch.cuda.get_device_name(0) if torch.cuda.is_available() else 'GPU'
|
||||
header = f'| Model | size<br><sup>(pixels) | mAP<sup>val<br>50-95 | Speed<br><sup>CPU ONNX<br>(ms) | Speed<br><sup>{gpu} TensorRT<br>(ms) | params<br><sup>(M) | FLOPs<br><sup>(B) |'
|
||||
separator = '|-------------|---------------------|--------------------|------------------------------|-----------------------------------|------------------|-----------------|'
|
||||
|
@ -1,7 +1,5 @@
|
||||
# Ultralytics YOLO 🚀, AGPL-3.0 license
|
||||
"""
|
||||
Base callbacks
|
||||
"""
|
||||
"""Base callbacks."""
|
||||
|
||||
from collections import defaultdict
|
||||
from copy import deepcopy
|
||||
|
@ -26,31 +26,38 @@ except (ImportError, AssertionError):
|
||||
|
||||
|
||||
def _get_comet_mode():
|
||||
"""Returns the mode of comet set in the environment variables, defaults to 'online' if not set."""
|
||||
return os.getenv('COMET_MODE', 'online')
|
||||
|
||||
|
||||
def _get_comet_model_name():
|
||||
"""Returns the model name for Comet from the environment variable 'COMET_MODEL_NAME' or defaults to 'YOLOv8'."""
|
||||
return os.getenv('COMET_MODEL_NAME', 'YOLOv8')
|
||||
|
||||
|
||||
def _get_eval_batch_logging_interval():
|
||||
"""Get the evaluation batch logging interval from environment variable or use default value 1."""
|
||||
return int(os.getenv('COMET_EVAL_BATCH_LOGGING_INTERVAL', 1))
|
||||
|
||||
|
||||
def _get_max_image_predictions_to_log():
|
||||
"""Get the maximum number of image predictions to log from the environment variables."""
|
||||
return int(os.getenv('COMET_MAX_IMAGE_PREDICTIONS', 100))
|
||||
|
||||
|
||||
def _scale_confidence_score(score):
|
||||
"""Scales the given confidence score by a factor specified in an environment variable."""
|
||||
scale = float(os.getenv('COMET_MAX_CONFIDENCE_SCORE', 100.0))
|
||||
return score * scale
|
||||
|
||||
|
||||
def _should_log_confusion_matrix():
|
||||
"""Determines if the confusion matrix should be logged based on the environment variable settings."""
|
||||
return os.getenv('COMET_EVAL_LOG_CONFUSION_MATRIX', 'false').lower() == 'true'
|
||||
|
||||
|
||||
def _should_log_image_predictions():
|
||||
"""Determines whether to log image predictions based on a specified environment variable."""
|
||||
return os.getenv('COMET_EVAL_LOG_IMAGE_PREDICTIONS', 'true').lower() == 'true'
|
||||
|
||||
|
||||
@ -104,9 +111,10 @@ def _fetch_trainer_metadata(trainer):
|
||||
|
||||
|
||||
def _scale_bounding_box_to_original_image_shape(box, resized_image_shape, original_image_shape, ratio_pad):
|
||||
"""YOLOv8 resizes images during training and the label values
|
||||
are normalized based on this resized shape. This function rescales the
|
||||
bounding box labels to the original image shape.
|
||||
"""
|
||||
YOLOv8 resizes images during training and the label values are normalized based on this resized shape.
|
||||
|
||||
This function rescales the bounding box labels to the original image shape.
|
||||
"""
|
||||
|
||||
resized_image_height, resized_image_width = resized_image_shape
|
||||
|
@ -25,6 +25,7 @@ except (ImportError, AssertionError, TypeError):
|
||||
|
||||
|
||||
def _log_images(path, prefix=''):
|
||||
"""Logs images at specified path with an optional prefix using DVCLive."""
|
||||
if live:
|
||||
name = path.name
|
||||
|
||||
@ -38,6 +39,7 @@ def _log_images(path, prefix=''):
|
||||
|
||||
|
||||
def _log_plots(plots, prefix=''):
|
||||
"""Logs plot images for training progress if they have not been previously processed."""
|
||||
for name, params in plots.items():
|
||||
timestamp = params['timestamp']
|
||||
if _processed_plots.get(name) != timestamp:
|
||||
@ -46,6 +48,7 @@ def _log_plots(plots, prefix=''):
|
||||
|
||||
|
||||
def _log_confusion_matrix(validator):
|
||||
"""Logs the confusion matrix for the given validator using DVCLive."""
|
||||
targets = []
|
||||
preds = []
|
||||
matrix = validator.confusion_matrix.matrix
|
||||
@ -62,6 +65,7 @@ def _log_confusion_matrix(validator):
|
||||
|
||||
|
||||
def on_pretrain_routine_start(trainer):
|
||||
"""Initializes DVCLive logger for training metadata during pre-training routine."""
|
||||
try:
|
||||
global live
|
||||
live = dvclive.Live(save_dvc_exp=True, cache_images=True)
|
||||
@ -71,20 +75,24 @@ def on_pretrain_routine_start(trainer):
|
||||
|
||||
|
||||
def on_pretrain_routine_end(trainer):
|
||||
"""Logs plots related to the training process at the end of the pretraining routine."""
|
||||
_log_plots(trainer.plots, 'train')
|
||||
|
||||
|
||||
def on_train_start(trainer):
|
||||
"""Logs the training parameters if DVCLive logging is active."""
|
||||
if live:
|
||||
live.log_params(trainer.args)
|
||||
|
||||
|
||||
def on_train_epoch_start(trainer):
|
||||
"""Sets the global variable _training_epoch value to True at the start of training each epoch."""
|
||||
global _training_epoch
|
||||
_training_epoch = True
|
||||
|
||||
|
||||
def on_fit_epoch_end(trainer):
|
||||
"""Logs training metrics and model info, and advances to next step on the end of each fit epoch."""
|
||||
global _training_epoch
|
||||
if live and _training_epoch:
|
||||
all_metrics = {**trainer.label_loss_items(trainer.tloss, prefix='train'), **trainer.metrics, **trainer.lr}
|
||||
@ -104,6 +112,7 @@ def on_fit_epoch_end(trainer):
|
||||
|
||||
|
||||
def on_train_end(trainer):
|
||||
"""Logs the best metrics, plots, and confusion matrix at the end of training if DVCLive is active."""
|
||||
if live:
|
||||
# At the end log the best metrics. It runs validator on the best model internally.
|
||||
all_metrics = {**trainer.label_loss_items(trainer.tloss, prefix='train'), **trainer.metrics, **trainer.lr}
|
||||
|
@ -31,14 +31,13 @@ def _log_images(imgs_dict, group=''):
|
||||
|
||||
|
||||
def _log_plot(title, plot_path):
|
||||
"""Log plots to the NeptuneAI experiment logger."""
|
||||
"""
|
||||
Log image as plot in the plot section of NeptuneAI
|
||||
Log plots to the NeptuneAI experiment logger.
|
||||
|
||||
arguments:
|
||||
title (str) Title of the plot
|
||||
plot_path (PosixPath or str) Path to the saved image file
|
||||
"""
|
||||
Args:
|
||||
title (str): Title of the plot.
|
||||
plot_path (PosixPath | str): Path to the saved image file.
|
||||
"""
|
||||
import matplotlib.image as mpimg
|
||||
import matplotlib.pyplot as plt
|
||||
|
||||
|
@ -17,6 +17,7 @@ except (ImportError, AssertionError):
|
||||
|
||||
|
||||
def _log_plots(plots, step):
|
||||
"""Logs plots from the input dictionary if they haven't been logged already at the specified step."""
|
||||
for name, params in plots.items():
|
||||
timestamp = params['timestamp']
|
||||
if _processed_plots.get(name) != timestamp:
|
||||
|
@ -64,8 +64,8 @@ def parse_requirements(file_path=ROOT.parent / 'requirements.txt', package=''):
|
||||
|
||||
def parse_version(version='0.0.0') -> tuple:
|
||||
"""
|
||||
Convert a version string to a tuple of integers, ignoring any extra non-numeric string attached to the version.
|
||||
This function replaces deprecated 'pkg_resources.parse_version(v)'
|
||||
Convert a version string to a tuple of integers, ignoring any extra non-numeric string attached to the version. This
|
||||
function replaces deprecated 'pkg_resources.parse_version(v)'.
|
||||
|
||||
Args:
|
||||
version (str): Version string, i.e. '2.0.1+cpu'
|
||||
@ -372,8 +372,10 @@ def check_torchvision():
|
||||
Checks the installed versions of PyTorch and Torchvision to ensure they're compatible.
|
||||
|
||||
This function checks the installed versions of PyTorch and Torchvision, and warns if they're incompatible according
|
||||
to the provided compatibility table based on https://github.com/pytorch/vision#installation. The
|
||||
compatibility table is a dictionary where the keys are PyTorch versions and the values are lists of compatible
|
||||
to the provided compatibility table based on:
|
||||
https://github.com/pytorch/vision#installation.
|
||||
|
||||
The compatibility table is a dictionary where the keys are PyTorch versions and the values are lists of compatible
|
||||
Torchvision versions.
|
||||
"""
|
||||
|
||||
@ -527,9 +529,9 @@ def collect_system_info():
|
||||
|
||||
def check_amp(model):
|
||||
"""
|
||||
This function checks the PyTorch Automatic Mixed Precision (AMP) functionality of a YOLOv8 model.
|
||||
If the checks fail, it means there are anomalies with AMP on the system that may cause NaN losses or zero-mAP
|
||||
results, so AMP will be disabled during training.
|
||||
This function checks the PyTorch Automatic Mixed Precision (AMP) functionality of a YOLOv8 model. If the checks
|
||||
fail, it means there are anomalies with AMP on the system that may cause NaN losses or zero-mAP results, so AMP will
|
||||
be disabled during training.
|
||||
|
||||
Args:
|
||||
model (nn.Module): A YOLOv8 model instance.
|
||||
@ -606,7 +608,8 @@ def print_args(args: Optional[dict] = None, show_file=True, show_func=False):
|
||||
|
||||
|
||||
def cuda_device_count() -> int:
|
||||
"""Get the number of NVIDIA GPUs available in the environment.
|
||||
"""
|
||||
Get the number of NVIDIA GPUs available in the environment.
|
||||
|
||||
Returns:
|
||||
(int): The number of NVIDIA GPUs available.
|
||||
@ -626,7 +629,8 @@ def cuda_device_count() -> int:
|
||||
|
||||
|
||||
def cuda_is_available() -> bool:
|
||||
"""Check if CUDA is available in the environment.
|
||||
"""
|
||||
Check if CUDA is available in the environment.
|
||||
|
||||
Returns:
|
||||
(bool): True if one or more NVIDIA GPUs are available, False otherwise.
|
||||
|
@ -13,7 +13,8 @@ from .torch_utils import TORCH_1_9
|
||||
|
||||
|
||||
def find_free_network_port() -> int:
|
||||
"""Finds a free port on localhost.
|
||||
"""
|
||||
Finds a free port on localhost.
|
||||
|
||||
It is useful in single-node training when we don't want to connect to a real main node but have to set the
|
||||
`MASTER_PORT` environment variable.
|
||||
|
@ -69,8 +69,8 @@ def delete_dsstore(path, files_to_delete=('.DS_Store', '__MACOSX')):
|
||||
|
||||
def zip_directory(directory, compress=True, exclude=('.DS_Store', '__MACOSX'), progress=True):
|
||||
"""
|
||||
Zips the contents of a directory, excluding files containing strings in the exclude list.
|
||||
The resulting zip file is named after the directory and placed alongside it.
|
||||
Zips the contents of a directory, excluding files containing strings in the exclude list. The resulting zip file is
|
||||
named after the directory and placed alongside it.
|
||||
|
||||
Args:
|
||||
directory (str | Path): The path to the directory to be zipped.
|
||||
@ -341,7 +341,11 @@ def get_github_assets(repo='ultralytics/assets', version='latest', retry=False):
|
||||
|
||||
|
||||
def attempt_download_asset(file, repo='ultralytics/assets', release='v0.0.0'):
|
||||
"""Attempt file download from GitHub release assets if not found locally. release = 'latest', 'v6.2', etc."""
|
||||
"""
|
||||
Attempt file download from GitHub release assets if not found locally.
|
||||
|
||||
release = 'latest', 'v6.2', etc.
|
||||
"""
|
||||
from ultralytics.utils import SETTINGS # scoped for circular import
|
||||
|
||||
# YOLOv3/5u updates
|
||||
|
@ -30,9 +30,9 @@ class WorkingDirectory(contextlib.ContextDecorator):
|
||||
@contextmanager
|
||||
def spaces_in_path(path):
|
||||
"""
|
||||
Context manager to handle paths with spaces in their names.
|
||||
If a path contains spaces, it replaces them with underscores, copies the file/directory to the new path,
|
||||
executes the context code block, then copies the file/directory back to its original location.
|
||||
Context manager to handle paths with spaces in their names. If a path contains spaces, it replaces them with
|
||||
underscores, copies the file/directory to the new path, executes the context code block, then copies the
|
||||
file/directory back to its original location.
|
||||
|
||||
Args:
|
||||
path (str | Path): The original path.
|
||||
|
@ -32,9 +32,14 @@ __all__ = 'Bboxes', # tuple or list
|
||||
|
||||
|
||||
class Bboxes:
|
||||
"""Bounding Boxes class. Only numpy variables are supported."""
|
||||
"""
|
||||
Bounding Boxes class.
|
||||
|
||||
Only numpy variables are supported.
|
||||
"""
|
||||
|
||||
def __init__(self, bboxes, format='xyxy') -> None:
|
||||
"""Initializes the Bboxes class with bounding box data in a specified format."""
|
||||
assert format in _formats, f'Invalid bounding box format: {format}, format must be one of {_formats}'
|
||||
bboxes = bboxes[None, :] if bboxes.ndim == 1 else bboxes
|
||||
assert bboxes.ndim == 2
|
||||
@ -194,7 +199,7 @@ class Instances:
|
||||
return self._bboxes.areas()
|
||||
|
||||
def scale(self, scale_w, scale_h, bbox_only=False):
|
||||
"""this might be similar with denormalize func but without normalized sign."""
|
||||
"""This might be similar with denormalize func but without normalized sign."""
|
||||
self._bboxes.mul(scale=(scale_w, scale_h, scale_w, scale_h))
|
||||
if bbox_only:
|
||||
return
|
||||
@ -307,7 +312,11 @@ class Instances:
|
||||
self.keypoints[..., 1] = self.keypoints[..., 1].clip(0, h)
|
||||
|
||||
def remove_zero_area_boxes(self):
|
||||
"""Remove zero-area boxes, i.e. after clipping some boxes may have zero width or height. This removes them."""
|
||||
"""
|
||||
Remove zero-area boxes, i.e. after clipping some boxes may have zero width or height.
|
||||
|
||||
This removes them.
|
||||
"""
|
||||
good = self.bbox_areas > 0
|
||||
if not all(good):
|
||||
self._bboxes = self._bboxes[good]
|
||||
|
@ -13,7 +13,11 @@ from .tal import bbox2dist
|
||||
|
||||
|
||||
class VarifocalLoss(nn.Module):
|
||||
"""Varifocal loss by Zhang et al. https://arxiv.org/abs/2008.13367."""
|
||||
"""
|
||||
Varifocal loss by Zhang et al.
|
||||
|
||||
https://arxiv.org/abs/2008.13367.
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
"""Initialize the VarifocalLoss class."""
|
||||
@ -33,6 +37,7 @@ class FocalLoss(nn.Module):
|
||||
"""Wraps focal loss around existing loss_fcn(), i.e. criteria = FocalLoss(nn.BCEWithLogitsLoss(), gamma=1.5)."""
|
||||
|
||||
def __init__(self, ):
|
||||
"""Initializer for FocalLoss class with no parameters."""
|
||||
super().__init__()
|
||||
|
||||
@staticmethod
|
||||
@ -93,6 +98,7 @@ class KeypointLoss(nn.Module):
|
||||
"""Criterion class for computing training losses."""
|
||||
|
||||
def __init__(self, sigmas) -> None:
|
||||
"""Initialize the KeypointLoss class."""
|
||||
super().__init__()
|
||||
self.sigmas = sigmas
|
||||
|
||||
|
@ -1,7 +1,5 @@
|
||||
# Ultralytics YOLO 🚀, AGPL-3.0 license
|
||||
"""
|
||||
Model validation metrics
|
||||
"""
|
||||
"""Model validation metrics."""
|
||||
|
||||
import math
|
||||
import warnings
|
||||
@ -195,7 +193,7 @@ class ConfusionMatrix:
|
||||
|
||||
def process_cls_preds(self, preds, targets):
|
||||
"""
|
||||
Update confusion matrix for classification task
|
||||
Update confusion matrix for classification task.
|
||||
|
||||
Args:
|
||||
preds (Array[N, min(nc,5)]): Predicted class labels.
|
||||
@ -308,9 +306,7 @@ class ConfusionMatrix:
|
||||
on_plot(plot_fname)
|
||||
|
||||
def print(self):
|
||||
"""
|
||||
Print the confusion matrix to the console.
|
||||
"""
|
||||
"""Print the confusion matrix to the console."""
|
||||
for i in range(self.nc + 1):
|
||||
LOGGER.info(' '.join(map(str, self.matrix[i])))
|
||||
|
||||
@ -440,7 +436,6 @@ def ap_per_class(tp,
|
||||
f1 (np.ndarray): F1-score values at each confidence threshold.
|
||||
ap (np.ndarray): Average precision for each class at different IoU thresholds.
|
||||
unique_classes (np.ndarray): An array of unique classes that have data.
|
||||
|
||||
"""
|
||||
|
||||
# Sort by objectness
|
||||
@ -498,32 +493,33 @@ def ap_per_class(tp,
|
||||
|
||||
class Metric(SimpleClass):
|
||||
"""
|
||||
Class for computing evaluation metrics for YOLOv8 model.
|
||||
Class for computing evaluation metrics for YOLOv8 model.
|
||||
|
||||
Attributes:
|
||||
p (list): Precision for each class. Shape: (nc,).
|
||||
r (list): Recall for each class. Shape: (nc,).
|
||||
f1 (list): F1 score for each class. Shape: (nc,).
|
||||
all_ap (list): AP scores for all classes and all IoU thresholds. Shape: (nc, 10).
|
||||
ap_class_index (list): Index of class for each AP score. Shape: (nc,).
|
||||
nc (int): Number of classes.
|
||||
Attributes:
|
||||
p (list): Precision for each class. Shape: (nc,).
|
||||
r (list): Recall for each class. Shape: (nc,).
|
||||
f1 (list): F1 score for each class. Shape: (nc,).
|
||||
all_ap (list): AP scores for all classes and all IoU thresholds. Shape: (nc, 10).
|
||||
ap_class_index (list): Index of class for each AP score. Shape: (nc,).
|
||||
nc (int): Number of classes.
|
||||
|
||||
Methods:
|
||||
ap50(): AP at IoU threshold of 0.5 for all classes. Returns: List of AP scores. Shape: (nc,) or [].
|
||||
ap(): AP at IoU thresholds from 0.5 to 0.95 for all classes. Returns: List of AP scores. Shape: (nc,) or [].
|
||||
mp(): Mean precision of all classes. Returns: Float.
|
||||
mr(): Mean recall of all classes. Returns: Float.
|
||||
map50(): Mean AP at IoU threshold of 0.5 for all classes. Returns: Float.
|
||||
map75(): Mean AP at IoU threshold of 0.75 for all classes. Returns: Float.
|
||||
map(): Mean AP at IoU thresholds from 0.5 to 0.95 for all classes. Returns: Float.
|
||||
mean_results(): Mean of results, returns mp, mr, map50, map.
|
||||
class_result(i): Class-aware result, returns p[i], r[i], ap50[i], ap[i].
|
||||
maps(): mAP of each class. Returns: Array of mAP scores, shape: (nc,).
|
||||
fitness(): Model fitness as a weighted combination of metrics. Returns: Float.
|
||||
update(results): Update metric attributes with new evaluation results.
|
||||
"""
|
||||
Methods:
|
||||
ap50(): AP at IoU threshold of 0.5 for all classes. Returns: List of AP scores. Shape: (nc,) or [].
|
||||
ap(): AP at IoU thresholds from 0.5 to 0.95 for all classes. Returns: List of AP scores. Shape: (nc,) or [].
|
||||
mp(): Mean precision of all classes. Returns: Float.
|
||||
mr(): Mean recall of all classes. Returns: Float.
|
||||
map50(): Mean AP at IoU threshold of 0.5 for all classes. Returns: Float.
|
||||
map75(): Mean AP at IoU threshold of 0.75 for all classes. Returns: Float.
|
||||
map(): Mean AP at IoU thresholds from 0.5 to 0.95 for all classes. Returns: Float.
|
||||
mean_results(): Mean of results, returns mp, mr, map50, map.
|
||||
class_result(i): Class-aware result, returns p[i], r[i], ap50[i], ap[i].
|
||||
maps(): mAP of each class. Returns: Array of mAP scores, shape: (nc,).
|
||||
fitness(): Model fitness as a weighted combination of metrics. Returns: Float.
|
||||
update(results): Update metric attributes with new evaluation results.
|
||||
"""
|
||||
|
||||
def __init__(self) -> None:
|
||||
"""Initializes a Metric instance for computing evaluation metrics for the YOLOv8 model."""
|
||||
self.p = [] # (nc, )
|
||||
self.r = [] # (nc, )
|
||||
self.f1 = [] # (nc, )
|
||||
@ -606,12 +602,12 @@ class Metric(SimpleClass):
|
||||
return [self.mp, self.mr, self.map50, self.map]
|
||||
|
||||
def class_result(self, i):
|
||||
"""class-aware result, return p[i], r[i], ap50[i], ap[i]."""
|
||||
"""Class-aware result, return p[i], r[i], ap50[i], ap[i]."""
|
||||
return self.p[i], self.r[i], self.ap50[i], self.ap[i]
|
||||
|
||||
@property
|
||||
def maps(self):
|
||||
"""mAP of each class."""
|
||||
"""MAP of each class."""
|
||||
maps = np.zeros(self.nc) + self.map
|
||||
for i, c in enumerate(self.ap_class_index):
|
||||
maps[c] = self.ap[i]
|
||||
@ -672,6 +668,7 @@ class DetMetrics(SimpleClass):
|
||||
"""
|
||||
|
||||
def __init__(self, save_dir=Path('.'), plot=False, on_plot=None, names=()) -> None:
|
||||
"""Initialize a DetMetrics instance with a save directory, plot flag, callback function, and class names."""
|
||||
self.save_dir = save_dir
|
||||
self.plot = plot
|
||||
self.on_plot = on_plot
|
||||
@ -756,6 +753,7 @@ class SegmentMetrics(SimpleClass):
|
||||
"""
|
||||
|
||||
def __init__(self, save_dir=Path('.'), plot=False, on_plot=None, names=()) -> None:
|
||||
"""Initialize a SegmentMetrics instance with a save directory, plot flag, callback function, and class names."""
|
||||
self.save_dir = save_dir
|
||||
self.plot = plot
|
||||
self.on_plot = on_plot
|
||||
@ -865,6 +863,7 @@ class PoseMetrics(SegmentMetrics):
|
||||
"""
|
||||
|
||||
def __init__(self, save_dir=Path('.'), plot=False, on_plot=None, names=()) -> None:
|
||||
"""Initialize the PoseMetrics class with directory path, class names, and plotting options."""
|
||||
super().__init__(save_dir, plot, names)
|
||||
self.save_dir = save_dir
|
||||
self.plot = plot
|
||||
@ -954,6 +953,7 @@ class ClassifyMetrics(SimpleClass):
|
||||
"""
|
||||
|
||||
def __init__(self) -> None:
|
||||
"""Initialize a ClassifyMetrics instance."""
|
||||
self.top1 = 0
|
||||
self.top5 = 0
|
||||
self.speed = {'preprocess': 0.0, 'inference': 0.0, 'loss': 0.0, 'postprocess': 0.0}
|
||||
|
@ -50,6 +50,7 @@ class Profile(contextlib.ContextDecorator):
|
||||
self.t += self.dt # accumulate dt
|
||||
|
||||
def __str__(self):
|
||||
"""Returns a human-readable string representing the accumulated elapsed time in the profiler."""
|
||||
return f'Elapsed time is {self.t} s'
|
||||
|
||||
def time(self):
|
||||
@ -303,7 +304,7 @@ def clip_coords(coords, shape):
|
||||
|
||||
def scale_image(masks, im0_shape, ratio_pad=None):
|
||||
"""
|
||||
Takes a mask, and resizes it to the original image size
|
||||
Takes a mask, and resizes it to the original image size.
|
||||
|
||||
Args:
|
||||
masks (np.ndarray): resized and padded masks/images, [h, w, num]/[h, w, 3].
|
||||
@ -403,8 +404,8 @@ def xywhn2xyxy(x, w=640, h=640, padw=0, padh=0):
|
||||
|
||||
def xyxy2xywhn(x, w=640, h=640, clip=False, eps=0.0):
|
||||
"""
|
||||
Convert bounding box coordinates from (x1, y1, x2, y2) format to (x, y, width, height, normalized) format.
|
||||
x, y, width and height are normalized to image dimensions
|
||||
Convert bounding box coordinates from (x1, y1, x2, y2) format to (x, y, width, height, normalized) format. x, y,
|
||||
width and height are normalized to image dimensions.
|
||||
|
||||
Args:
|
||||
x (np.ndarray | torch.Tensor): The input bounding box coordinates in (x1, y1, x2, y2) format.
|
||||
@ -445,7 +446,7 @@ def xywh2ltwh(x):
|
||||
|
||||
def xyxy2ltwh(x):
|
||||
"""
|
||||
Convert nx4 bounding boxes from [x1, y1, x2, y2] to [x1, y1, w, h], where xy1=top-left, xy2=bottom-right
|
||||
Convert nx4 bounding boxes from [x1, y1, x2, y2] to [x1, y1, w, h], where xy1=top-left, xy2=bottom-right.
|
||||
|
||||
Args:
|
||||
x (np.ndarray | torch.Tensor): The input tensor with the bounding boxes coordinates in the xyxy format
|
||||
@ -461,7 +462,7 @@ def xyxy2ltwh(x):
|
||||
|
||||
def ltwh2xywh(x):
|
||||
"""
|
||||
Convert nx4 boxes from [x1, y1, w, h] to [x, y, w, h] where xy1=top-left, xy=center
|
||||
Convert nx4 boxes from [x1, y1, w, h] to [x, y, w, h] where xy1=top-left, xy=center.
|
||||
|
||||
Args:
|
||||
x (torch.Tensor): the input tensor
|
||||
@ -544,7 +545,7 @@ def xywhr2xyxyxyxy(center):
|
||||
|
||||
def ltwh2xyxy(x):
|
||||
"""
|
||||
It converts the bounding box from [x1, y1, w, h] to [x1, y1, x2, y2] where xy1=top-left, xy2=bottom-right
|
||||
It converts the bounding box from [x1, y1, w, h] to [x1, y1, x2, y2] where xy1=top-left, xy2=bottom-right.
|
||||
|
||||
Args:
|
||||
x (np.ndarray | torch.Tensor): the input image
|
||||
@ -616,8 +617,8 @@ def crop_mask(masks, boxes):
|
||||
|
||||
def process_mask_upsample(protos, masks_in, bboxes, shape):
|
||||
"""
|
||||
Takes the output of the mask head, and applies the mask to the bounding boxes. This produces masks of higher
|
||||
quality but is slower.
|
||||
Takes the output of the mask head, and applies the mask to the bounding boxes. This produces masks of higher quality
|
||||
but is slower.
|
||||
|
||||
Args:
|
||||
protos (torch.Tensor): [mask_dim, mask_h, mask_w]
|
||||
@ -713,7 +714,7 @@ def scale_masks(masks, shape, padding=True):
|
||||
|
||||
def scale_coords(img1_shape, coords, img0_shape, ratio_pad=None, normalize=False, padding=True):
|
||||
"""
|
||||
Rescale segment coordinates (xy) from img1_shape to img0_shape
|
||||
Rescale segment coordinates (xy) from img1_shape to img0_shape.
|
||||
|
||||
Args:
|
||||
img1_shape (tuple): The shape of the image that the coords are from.
|
||||
|
@ -1,7 +1,5 @@
|
||||
# Ultralytics YOLO 🚀, AGPL-3.0 license
|
||||
"""
|
||||
Monkey patches to update/extend functionality of existing functions
|
||||
"""
|
||||
"""Monkey patches to update/extend functionality of existing functions."""
|
||||
|
||||
from pathlib import Path
|
||||
|
||||
@ -14,7 +12,8 @@ _imshow = cv2.imshow # copy to avoid recursion errors
|
||||
|
||||
|
||||
def imread(filename: str, flags: int = cv2.IMREAD_COLOR):
|
||||
"""Read an image from a file.
|
||||
"""
|
||||
Read an image from a file.
|
||||
|
||||
Args:
|
||||
filename (str): Path to the file to read.
|
||||
@ -27,7 +26,8 @@ def imread(filename: str, flags: int = cv2.IMREAD_COLOR):
|
||||
|
||||
|
||||
def imwrite(filename: str, img: np.ndarray, params=None):
|
||||
"""Write an image to a file.
|
||||
"""
|
||||
Write an image to a file.
|
||||
|
||||
Args:
|
||||
filename (str): Path to the file to write.
|
||||
@ -45,7 +45,8 @@ def imwrite(filename: str, img: np.ndarray, params=None):
|
||||
|
||||
|
||||
def imshow(winname: str, mat: np.ndarray):
|
||||
"""Displays an image in the specified window.
|
||||
"""
|
||||
Displays an image in the specified window.
|
||||
|
||||
Args:
|
||||
winname (str): Name of the window.
|
||||
@ -59,7 +60,8 @@ _torch_save = torch.save # copy to avoid recursion errors
|
||||
|
||||
|
||||
def torch_save(*args, **kwargs):
|
||||
"""Use dill (if exists) to serialize the lambda functions where pickle does not do this.
|
||||
"""
|
||||
Use dill (if exists) to serialize the lambda functions where pickle does not do this.
|
||||
|
||||
Args:
|
||||
*args (tuple): Positional arguments to pass to torch.save.
|
||||
|
@ -316,7 +316,8 @@ def plot_labels(boxes, cls, names=(), save_dir=Path(''), on_plot=None):
|
||||
|
||||
|
||||
def save_one_box(xyxy, im, file=Path('im.jpg'), gain=1.02, pad=10, square=False, BGR=False, save=True):
|
||||
"""Save image crop as {file} with crop size multiple {gain} and {pad} pixels. Save and/or return crop.
|
||||
"""
|
||||
Save image crop as {file} with crop size multiple {gain} and {pad} pixels. Save and/or return crop.
|
||||
|
||||
This function takes a bounding box and an image, and then saves a cropped portion of the image according
|
||||
to the bounding box. Optionally, the crop can be squared, and the function allows for gain and padding
|
||||
|
@ -205,7 +205,11 @@ def fuse_deconv_and_bn(deconv, bn):
|
||||
|
||||
|
||||
def model_info(model, detailed=False, verbose=True, imgsz=640):
|
||||
"""Model information. imgsz may be int or list, i.e. imgsz=640 or imgsz=[640, 320]."""
|
||||
"""
|
||||
Model information.
|
||||
|
||||
imgsz may be int or list, i.e. imgsz=640 or imgsz=[640, 320].
|
||||
"""
|
||||
if not verbose:
|
||||
return
|
||||
n_p = get_num_params(model) # number of parameters
|
||||
@ -517,13 +521,11 @@ def profile(input, ops, n=10, device=None):
|
||||
|
||||
|
||||
class EarlyStopping:
|
||||
"""
|
||||
Early stopping class that stops training when a specified number of epochs have passed without improvement.
|
||||
"""
|
||||
"""Early stopping class that stops training when a specified number of epochs have passed without improvement."""
|
||||
|
||||
def __init__(self, patience=50):
|
||||
"""
|
||||
Initialize early stopping object
|
||||
Initialize early stopping object.
|
||||
|
||||
Args:
|
||||
patience (int, optional): Number of epochs to wait after fitness stops improving before stopping.
|
||||
@ -535,7 +537,7 @@ class EarlyStopping:
|
||||
|
||||
def __call__(self, epoch, fitness):
|
||||
"""
|
||||
Check whether to stop training
|
||||
Check whether to stop training.
|
||||
|
||||
Args:
|
||||
epoch (int): Current epoch of training
|
||||
|
@ -7,7 +7,8 @@ import numpy as np
|
||||
|
||||
|
||||
class TritonRemoteModel:
|
||||
"""Client for interacting with a remote Triton Inference Server model.
|
||||
"""
|
||||
Client for interacting with a remote Triton Inference Server model.
|
||||
|
||||
Attributes:
|
||||
endpoint (str): The name of the model on the Triton server.
|
||||
|
Loading…
x
Reference in New Issue
Block a user