diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index e2b477da..1dc23a87 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -62,6 +62,11 @@ repos:
args:
- --ignore-words-list=crate,nd,strack,dota,ane,segway,fo
+ - repo: https://github.com/PyCQA/docformatter
+ rev: v1.7.5
+ hooks:
+ - id: docformatter
+
# - repo: https://github.com/asottile/yesqa
# rev: v1.4.0
# hooks:
diff --git a/docs/build_reference.py b/docs/build_reference.py
index 3641b132..d8809234 100644
--- a/docs/build_reference.py
+++ b/docs/build_reference.py
@@ -18,7 +18,15 @@ CODE_DIR = ROOT
REFERENCE_DIR = ROOT.parent / 'docs/reference'
-def extract_classes_and_functions(filepath):
+def extract_classes_and_functions(filepath: Path):
+ """Extracts class and function names from a given Python file.
+
+ Args:
+ filepath (Path): The path to the Python file.
+
+ Returns:
+ (tuple): A tuple containing lists of class and function names.
+ """
with open(filepath, 'r') as file:
content = file.read()
@@ -31,7 +39,15 @@ def extract_classes_and_functions(filepath):
return classes, functions
-def create_markdown(py_filepath, module_path, classes, functions):
+def create_markdown(py_filepath: Path, module_path: str, classes: list, functions: list):
+ """Creates a Markdown file containing the API reference for the given Python module.
+
+ Args:
+ py_filepath (Path): The path to the Python file.
+ module_path (str): The import path for the Python module.
+ classes (list): A list of class names within the module.
+ functions (list): A list of function names within the module.
+ """
md_filepath = py_filepath.with_suffix('.md')
# Read existing content and keep header content between first two ---
@@ -64,17 +80,35 @@ def create_markdown(py_filepath, module_path, classes, functions):
def nested_dict():
+ """Creates and returns a nested defaultdict.
+
+ Returns:
+ (defaultdict): A nested defaultdict object.
+ """
return defaultdict(nested_dict)
-def sort_nested_dict(d):
+def sort_nested_dict(d: dict):
+ """Sorts a nested dictionary recursively.
+
+ Args:
+ d (dict): The dictionary to sort.
+
+ Returns:
+ (dict): The sorted dictionary.
+ """
return {
key: sort_nested_dict(value) if isinstance(value, dict) else value
for key, value in sorted(d.items())
}
-def create_nav_menu_yaml(nav_items):
+def create_nav_menu_yaml(nav_items: list):
+ """Creates a YAML file for the navigation menu based on the provided list of items.
+
+ Args:
+ nav_items (list): A list of relative file paths to Markdown files for the navigation menu.
+ """
nav_tree = nested_dict()
for item_str in nav_items:
@@ -90,6 +124,7 @@ def create_nav_menu_yaml(nav_items):
nav_tree_sorted = sort_nested_dict(nav_tree)
def _dict_to_yaml(d, level=0):
+ """Converts a nested dictionary to a YAML-formatted string with indentation."""
yaml_str = ''
indent = ' ' * level
for k, v in d.items():
@@ -105,6 +140,7 @@ def create_nav_menu_yaml(nav_items):
def main():
+ """Main function to extract class and function names, create Markdown files, and generate a YAML navigation menu."""
nav_items = []
for root, _, files in os.walk(CODE_DIR):
for file in files:
diff --git a/docs/reference/models/utils/ops.md b/docs/reference/models/utils/ops.md
index 9538ee48..d1f93358 100644
--- a/docs/reference/models/utils/ops.md
+++ b/docs/reference/models/utils/ops.md
@@ -16,7 +16,3 @@ keywords: Ultralytics, YOLO, HungarianMatcher, inverse_sigmoid, detection models
---
## ::: ultralytics.models.utils.ops.get_cdn_group
-
----
-## ::: ultralytics.models.utils.ops.inverse_sigmoid
-
diff --git a/examples/YOLOv8-ONNXRuntime/main.py b/examples/YOLOv8-ONNXRuntime/main.py
index 8d03182b..ec768713 100644
--- a/examples/YOLOv8-ONNXRuntime/main.py
+++ b/examples/YOLOv8-ONNXRuntime/main.py
@@ -9,11 +9,12 @@ from ultralytics.utils import ASSETS, yaml_load
from ultralytics.utils.checks import check_requirements, check_yaml
-class Yolov8:
+class YOLOv8:
+ """YOLOv8 object detection model class for handling inference and visualization."""
def __init__(self, onnx_model, input_image, confidence_thres, iou_thres):
"""
- Initializes an instance of the Yolov8 class.
+ Initializes an instance of the YOLOv8 class.
Args:
onnx_model: Path to the ONNX model.
@@ -213,8 +214,8 @@ if __name__ == '__main__':
# Check the requirements and select the appropriate backend (CPU or GPU)
check_requirements('onnxruntime-gpu' if torch.cuda.is_available() else 'onnxruntime')
- # Create an instance of the Yolov8 class with the specified arguments
- detection = Yolov8(args.model, args.img, args.conf_thres, args.iou_thres)
+ # Create an instance of the YOLOv8 class with the specified arguments
+ detection = YOLOv8(args.model, args.img, args.conf_thres, args.iou_thres)
# Perform object detection and obtain the output image
output_image = detection.main()
diff --git a/examples/YOLOv8-OpenCV-ONNX-Python/main.py b/examples/YOLOv8-OpenCV-ONNX-Python/main.py
index 76802f0a..78b0b08e 100644
--- a/examples/YOLOv8-OpenCV-ONNX-Python/main.py
+++ b/examples/YOLOv8-OpenCV-ONNX-Python/main.py
@@ -7,11 +7,22 @@ from ultralytics.utils import ASSETS, yaml_load
from ultralytics.utils.checks import check_yaml
CLASSES = yaml_load(check_yaml('coco128.yaml'))['names']
-
colors = np.random.uniform(0, 255, size=(len(CLASSES), 3))
def draw_bounding_box(img, class_id, confidence, x, y, x_plus_w, y_plus_h):
+ """
+ Draws bounding boxes on the input image based on the provided arguments.
+
+ Args:
+ img (numpy.ndarray): The input image to draw the bounding box on.
+ class_id (int): Class ID of the detected object.
+ confidence (float): Confidence score of the detected object.
+ x (int): X-coordinate of the top-left corner of the bounding box.
+ y (int): Y-coordinate of the top-left corner of the bounding box.
+ x_plus_w (int): X-coordinate of the bottom-right corner of the bounding box.
+ y_plus_h (int): Y-coordinate of the bottom-right corner of the bounding box.
+ """
label = f'{CLASSES[class_id]} ({confidence:.2f})'
color = colors[class_id]
cv2.rectangle(img, (x, y), (x_plus_w, y_plus_h), color, 2)
@@ -19,18 +30,39 @@ def draw_bounding_box(img, class_id, confidence, x, y, x_plus_w, y_plus_h):
def main(onnx_model, input_image):
+ """
+ Main function to load ONNX model, perform inference, draw bounding boxes, and display the output image.
+
+ Args:
+ onnx_model (str): Path to the ONNX model.
+ input_image (str): Path to the input image.
+
+ Returns:
+ list: List of dictionaries containing detection information such as class_id, class_name, confidence, etc.
+ """
+ # Load the ONNX model
model: cv2.dnn.Net = cv2.dnn.readNetFromONNX(onnx_model)
+
+ # Read the input image
original_image: np.ndarray = cv2.imread(input_image)
[height, width, _] = original_image.shape
+
+ # Prepare a square image for inference
length = max((height, width))
image = np.zeros((length, length, 3), np.uint8)
image[0:height, 0:width] = original_image
+
+ # Calculate scale factor
scale = length / 640
+ # Preprocess the image and prepare blob for model
blob = cv2.dnn.blobFromImage(image, scalefactor=1 / 255, size=(640, 640), swapRB=True)
model.setInput(blob)
+
+ # Perform inference
outputs = model.forward()
+ # Prepare output array
outputs = np.array([cv2.transpose(outputs[0])])
rows = outputs.shape[1]
@@ -38,6 +70,7 @@ def main(onnx_model, input_image):
scores = []
class_ids = []
+ # Iterate through output to collect bounding boxes, confidence scores, and class IDs
for i in range(rows):
classes_scores = outputs[0][i][4:]
(minScore, maxScore, minClassLoc, (x, maxClassIndex)) = cv2.minMaxLoc(classes_scores)
@@ -49,9 +82,12 @@ def main(onnx_model, input_image):
scores.append(maxScore)
class_ids.append(maxClassIndex)
+ # Apply NMS (Non-maximum suppression)
result_boxes = cv2.dnn.NMSBoxes(boxes, scores, 0.25, 0.45, 0.5)
detections = []
+
+ # Iterate through NMS results to draw bounding boxes and labels
for i in range(len(result_boxes)):
index = result_boxes[i]
box = boxes[index]
@@ -65,6 +101,7 @@ def main(onnx_model, input_image):
draw_bounding_box(original_image, class_ids[index], scores[index], round(box[0] * scale), round(box[1] * scale),
round((box[0] + box[2]) * scale), round((box[1] + box[3]) * scale))
+ # Display the image with bounding boxes
cv2.imshow('image', original_image)
cv2.waitKey(0)
cv2.destroyAllWindows()
@@ -74,7 +111,7 @@ def main(onnx_model, input_image):
if __name__ == '__main__':
parser = argparse.ArgumentParser()
- parser.add_argument('--model', default='yolov8n.onnx', help='Input your onnx model.')
+ parser.add_argument('--model', default='yolov8n.onnx', help='Input your ONNX model.')
parser.add_argument('--img', default=str(ASSETS / 'bus.jpg'), help='Path to input image.')
args = parser.parse_args()
main(args.model, args.img)
diff --git a/examples/YOLOv8-Region-Counter/yolov8_region_counter.py b/examples/YOLOv8-Region-Counter/yolov8_region_counter.py
index bdc40132..dd0e476f 100644
--- a/examples/YOLOv8-Region-Counter/yolov8_region_counter.py
+++ b/examples/YOLOv8-Region-Counter/yolov8_region_counter.py
@@ -33,10 +33,6 @@ counting_regions = [
}, ]
-def is_inside_polygon(point, polygon):
- return polygon.contains(Point(point))
-
-
def mouse_callback(event, x, y, flags, param):
"""Mouse call back event."""
global current_region
@@ -44,7 +40,7 @@ def mouse_callback(event, x, y, flags, param):
# Mouse left button down event
if event == cv2.EVENT_LBUTTONDOWN:
for region in counting_regions:
- if is_inside_polygon((x, y), region['polygon']):
+ if region['polygon'].contains(Point((x, y))):
current_region = region
current_region['dragging'] = True
current_region['offset_x'] = x
@@ -150,7 +146,7 @@ def run(
# Check if detection inside region
for region in counting_regions:
- if is_inside_polygon((x, y), region['polygon']):
+ if region['polygon'].contains(Point((x, y))):
region['counts'] += 1
# Draw regions (Polygons/Rectangles)
diff --git a/setup.cfg b/setup.cfg
index a7d16ab9..ff364449 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -60,3 +60,12 @@ SPACE_BETWEEN_ENDING_COMMA_AND_CLOSING_BRACKET = True
SPLIT_BEFORE_CLOSING_BRACKET = False
SPLIT_BEFORE_FIRST_ARGUMENT = False
# EACH_DICT_ENTRY_ON_SEPARATE_LINE = False
+
+[docformatter]
+wrap-summaries = 120
+wrap-descriptions = 120
+in-place = true
+make-summary-multi-line = false
+pre-summary-newline = true
+force-wrap = false
+close-quotes-on-newline = true
diff --git a/setup.py b/setup.py
index 1132ffa1..8fb107c8 100644
--- a/setup.py
+++ b/setup.py
@@ -12,6 +12,12 @@ README = (PARENT / 'README.md').read_text(encoding='utf-8')
def get_version():
+ """
+ Retrieve the version number from the 'ultralytics/__init__.py' file.
+
+ Returns:
+ (str): The version number extracted from the '__version__' attribute in the 'ultralytics/__init__.py' file.
+ """
file = PARENT / 'ultralytics/__init__.py'
return re.search(r'^__version__ = [\'"]([^\'"]*)[\'"]', file.read_text(encoding='utf-8'), re.M)[1]
@@ -24,7 +30,7 @@ def parse_requirements(file_path: Path):
file_path (str | Path): Path to the requirements.txt file.
Returns:
- List[str]: List of parsed requirements.
+ (List[str]): List of parsed requirements.
"""
requirements = []
diff --git a/tests/conftest.py b/tests/conftest.py
index ac909310..59955bd1 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -9,7 +9,8 @@ TMP = Path(__file__).resolve().parent / 'tmp' # temp directory for test files
def pytest_addoption(parser):
- """Add custom command-line options to pytest.
+ """
+ Add custom command-line options to pytest.
Args:
parser (pytest.config.Parser): The pytest parser object.
@@ -18,7 +19,8 @@ def pytest_addoption(parser):
def pytest_configure(config):
- """Register custom markers to avoid pytest warnings.
+ """
+ Register custom markers to avoid pytest warnings.
Args:
config (pytest.config.Config): The pytest config object.
@@ -27,7 +29,8 @@ def pytest_configure(config):
def pytest_runtest_setup(item):
- """Setup hook to skip tests marked as slow if the --slow option is not provided.
+ """
+ Setup hook to skip tests marked as slow if the --slow option is not provided.
Args:
item (pytest.Item): The test item object.
diff --git a/tests/test_cli.py b/tests/test_cli.py
index a935aa0e..788651b4 100644
--- a/tests/test_cli.py
+++ b/tests/test_cli.py
@@ -22,11 +22,12 @@ EXPORT_ARGS = [
def run(cmd):
- # Run a subprocess command with check=True
+ """Execute a shell command using subprocess."""
subprocess.run(cmd.split(), check=True)
def test_special_modes():
+ """Test various special command modes of YOLO."""
run('yolo help')
run('yolo checks')
run('yolo version')
@@ -36,31 +37,37 @@ def test_special_modes():
@pytest.mark.parametrize('task,model,data', TASK_ARGS)
def test_train(task, model, data):
+ """Test YOLO training for a given task, model, and data."""
run(f'yolo train {task} model={model}.yaml data={data} imgsz=32 epochs=1 cache=disk')
@pytest.mark.parametrize('task,model,data', TASK_ARGS)
def test_val(task, model, data):
+ """Test YOLO validation for a given task, model, and data."""
run(f'yolo val {task} model={WEIGHTS_DIR / model}.pt data={data} imgsz=32 save_txt save_json')
@pytest.mark.parametrize('task,model,data', TASK_ARGS)
def test_predict(task, model, data):
+ """Test YOLO prediction on sample assets for a given task and model."""
run(f'yolo predict model={WEIGHTS_DIR / model}.pt source={ASSETS} imgsz=32 save save_crop save_txt')
@pytest.mark.parametrize('model,format', EXPORT_ARGS)
def test_export(model, format):
+ """Test exporting a YOLO model to different formats."""
run(f'yolo export model={WEIGHTS_DIR / model}.pt format={format} imgsz=32')
def test_rtdetr(task='detect', model='yolov8n-rtdetr.yaml', data='coco8.yaml'):
+ """Test the RTDETR functionality with the Ultralytics framework."""
# Warning: MUST use imgsz=640
run(f'yolo train {task} model={model} data={data} --imgsz= 640 epochs =1, cache = disk') # add coma, spaces to args
run(f"yolo predict {task} model={model} source={ASSETS / 'bus.jpg'} imgsz=640 save save_crop save_txt")
def test_fastsam(task='segment', model=WEIGHTS_DIR / 'FastSAM-s.pt', data='coco8-seg.yaml'):
+ """Test FastSAM segmentation functionality within Ultralytics."""
source = ASSETS / 'bus.jpg'
run(f'yolo segment val {task} model={model} data={data} imgsz=32')
@@ -97,6 +104,7 @@ def test_fastsam(task='segment', model=WEIGHTS_DIR / 'FastSAM-s.pt', data='coco8
def test_mobilesam():
+ """Test MobileSAM segmentation functionality using Ultralytics."""
from ultralytics import SAM
# Load the model
@@ -121,5 +129,6 @@ def test_mobilesam():
@pytest.mark.skipif(not CUDA_IS_AVAILABLE, reason='CUDA is not available')
@pytest.mark.skipif(CUDA_DEVICE_COUNT < 2, reason='DDP is not available')
def test_train_gpu(task, model, data):
+ """Test YOLO training on GPU(s) for various tasks and models."""
run(f'yolo train {task} model={model}.yaml data={data} imgsz=32 epochs=1 device=0') # single GPU
run(f'yolo train {task} model={model}.pt data={data} imgsz=32 epochs=1 device=0,1') # multi GPU
diff --git a/tests/test_cuda.py b/tests/test_cuda.py
index 92ecbe3d..15c2259b 100644
--- a/tests/test_cuda.py
+++ b/tests/test_cuda.py
@@ -1,4 +1,5 @@
# Ultralytics YOLO 🚀, AGPL-3.0 license
+
import contextlib
import pytest
@@ -17,18 +18,21 @@ BUS = ASSETS / 'bus.jpg'
def test_checks():
+ """Validate CUDA settings against torch CUDA functions."""
assert torch.cuda.is_available() == CUDA_IS_AVAILABLE
assert torch.cuda.device_count() == CUDA_DEVICE_COUNT
@pytest.mark.skipif(not CUDA_IS_AVAILABLE, reason='CUDA is not available')
def test_train():
+ """Test model training on a minimal dataset."""
device = 0 if CUDA_DEVICE_COUNT == 1 else [0, 1]
YOLO(MODEL).train(data=DATA, imgsz=64, epochs=1, device=device) # requires imgsz>=64
@pytest.mark.skipif(not CUDA_IS_AVAILABLE, reason='CUDA is not available')
def test_predict_multiple_devices():
+ """Validate model prediction on multiple devices."""
model = YOLO('yolov8n.pt')
model = model.cpu()
assert str(model.device) == 'cpu'
@@ -53,6 +57,7 @@ def test_predict_multiple_devices():
@pytest.mark.skipif(not CUDA_IS_AVAILABLE, reason='CUDA is not available')
def test_autobatch():
+ """Check batch size for YOLO model using autobatch."""
from ultralytics.utils.autobatch import check_train_batch_size
check_train_batch_size(YOLO(MODEL).model.cuda(), imgsz=128, amp=True)
@@ -60,6 +65,7 @@ def test_autobatch():
@pytest.mark.skipif(not CUDA_IS_AVAILABLE, reason='CUDA is not available')
def test_utils_benchmarks():
+ """Profile YOLO models for performance benchmarks."""
from ultralytics.utils.benchmarks import ProfileModels
# Pre-export a dynamic engine model to use dynamic inference
@@ -69,6 +75,7 @@ def test_utils_benchmarks():
@pytest.mark.skipif(not CUDA_IS_AVAILABLE, reason='CUDA is not available')
def test_predict_sam():
+ """Test SAM model prediction with various prompts."""
from ultralytics import SAM
from ultralytics.models.sam import Predictor as SAMPredictor
@@ -102,6 +109,7 @@ def test_predict_sam():
@pytest.mark.skipif(not CUDA_IS_AVAILABLE, reason='CUDA is not available')
def test_model_ray_tune():
+ """Tune YOLO model with Ray optimization library."""
with contextlib.suppress(RuntimeError): # RuntimeError may be caused by out-of-memory
YOLO('yolov8n-cls.yaml').tune(use_ray=True,
data='imagenet10',
@@ -115,12 +123,14 @@ def test_model_ray_tune():
@pytest.mark.skipif(not CUDA_IS_AVAILABLE, reason='CUDA is not available')
def test_model_tune():
+ """Tune YOLO model for performance."""
YOLO('yolov8n-pose.pt').tune(data='coco8-pose.yaml', plots=False, imgsz=32, epochs=1, iterations=2, device='cpu')
YOLO('yolov8n-cls.pt').tune(data='imagenet10', plots=False, imgsz=32, epochs=1, iterations=2, device='cpu')
@pytest.mark.skipif(not CUDA_IS_AVAILABLE, reason='CUDA is not available')
def test_pycocotools():
+ """Validate model predictions using pycocotools."""
from ultralytics.models.yolo.detect import DetectionValidator
from ultralytics.models.yolo.pose import PoseValidator
from ultralytics.models.yolo.segment import SegmentationValidator
diff --git a/tests/test_engine.py b/tests/test_engine.py
index 6ea4d9f0..2d789583 100644
--- a/tests/test_engine.py
+++ b/tests/test_engine.py
@@ -14,10 +14,12 @@ MODEL = WEIGHTS_DIR / 'yolov8n'
def test_func(*args): # noqa
+ """Test function callback."""
print('callback test passed')
def test_export():
+ """Test model exporting functionality."""
exporter = Exporter()
exporter.add_callback('on_export_start', test_func)
assert test_func in exporter.callbacks['on_export_start'], 'callback test failed'
@@ -26,6 +28,7 @@ def test_export():
def test_detect():
+ """Test object detection functionality."""
overrides = {'data': 'coco8.yaml', 'model': CFG_DET, 'imgsz': 32, 'epochs': 1, 'save': False}
CFG.data = 'coco8.yaml'
CFG.imgsz = 32
@@ -61,6 +64,7 @@ def test_detect():
def test_segment():
+ """Test image segmentation functionality."""
overrides = {'data': 'coco8-seg.yaml', 'model': CFG_SEG, 'imgsz': 32, 'epochs': 1, 'save': False}
CFG.data = 'coco8-seg.yaml'
CFG.imgsz = 32
@@ -98,6 +102,7 @@ def test_segment():
def test_classify():
+ """Test image classification functionality."""
overrides = {'data': 'imagenet10', 'model': CFG_CLS, 'imgsz': 32, 'epochs': 1, 'save': False}
CFG.data = 'imagenet10'
CFG.imgsz = 32
diff --git a/tests/test_python.py b/tests/test_python.py
index 3e49f570..bea8afe1 100644
--- a/tests/test_python.py
+++ b/tests/test_python.py
@@ -27,11 +27,13 @@ IS_TMP_WRITEABLE = is_dir_writeable(TMP)
def test_model_forward():
+ """Test the forward pass of the YOLO model."""
model = YOLO(CFG)
model(source=None, imgsz=32, augment=True) # also test no source and augment
def test_model_methods():
+ """Test various methods and properties of the YOLO model."""
model = YOLO(MODEL)
# Model methods
@@ -51,7 +53,7 @@ def test_model_methods():
def test_model_profile():
- # Test profile=True model argument
+ """Test profiling of the YOLO model with 'profile=True' argument."""
from ultralytics.nn.tasks import DetectionModel
model = DetectionModel() # build model
@@ -61,7 +63,7 @@ def test_model_profile():
@pytest.mark.skipif(not IS_TMP_WRITEABLE, reason='directory is not writeable')
def test_predict_txt():
- # Write a list of sources (file, dir, glob, recursive glob) to a txt file
+ """Test YOLO predictions with sources (file, dir, glob, recursive glob) specified in a text file."""
txt_file = TMP / 'sources.txt'
with open(txt_file, 'w') as f:
for x in [ASSETS / 'bus.jpg', ASSETS, ASSETS / '*', ASSETS / '**/*.jpg']:
@@ -70,6 +72,7 @@ def test_predict_txt():
def test_predict_img():
+ """Test YOLO prediction on various types of image sources."""
model = YOLO(MODEL)
seg_model = YOLO(WEIGHTS_DIR / 'yolov8n-seg.pt')
cls_model = YOLO(WEIGHTS_DIR / 'yolov8n-cls.pt')
@@ -105,7 +108,7 @@ def test_predict_img():
def test_predict_grey_and_4ch():
- # Convert SOURCE to greyscale and 4-ch
+ """Test YOLO prediction on SOURCE converted to greyscale and 4-channel images."""
im = Image.open(SOURCE)
directory = TMP / 'im4'
directory.mkdir(parents=True, exist_ok=True)
@@ -132,8 +135,11 @@ def test_predict_grey_and_4ch():
@pytest.mark.skipif(not ONLINE, reason='environment is offline')
@pytest.mark.skipif(not IS_TMP_WRITEABLE, reason='directory is not writeable')
def test_track_stream():
- # Test YouTube streaming inference (short 10 frame video) with non-default ByteTrack tracker
- # imgsz=160 required for tracking for higher confidence and better matches
+ """
+ Test YouTube streaming tracking (short 10 frame video) with non-default ByteTrack tracker.
+
+ Note imgsz=160 required for tracking for higher confidence and better matches
+ """
import yaml
model = YOLO(MODEL)
@@ -153,37 +159,44 @@ def test_track_stream():
def test_val():
+ """Test the validation mode of the YOLO model."""
YOLO(MODEL).val(data='coco8.yaml', imgsz=32, save_hybrid=True)
def test_train_scratch():
+ """Test training the YOLO model from scratch."""
model = YOLO(CFG)
model.train(data='coco8.yaml', epochs=2, imgsz=32, cache='disk', batch=-1, close_mosaic=1, name='model')
model(SOURCE)
def test_train_pretrained():
+ """Test training the YOLO model from a pre-trained state."""
model = YOLO(WEIGHTS_DIR / 'yolov8n-seg.pt')
model.train(data='coco8-seg.yaml', epochs=1, imgsz=32, cache='ram', copy_paste=0.5, mixup=0.5, name=0)
model(SOURCE)
def test_export_torchscript():
+ """Test exporting the YOLO model to TorchScript format."""
f = YOLO(MODEL).export(format='torchscript', optimize=False)
YOLO(f)(SOURCE) # exported model inference
def test_export_onnx():
+ """Test exporting the YOLO model to ONNX format."""
f = YOLO(MODEL).export(format='onnx', dynamic=True)
YOLO(f)(SOURCE) # exported model inference
def test_export_openvino():
+ """Test exporting the YOLO model to OpenVINO format."""
f = YOLO(MODEL).export(format='openvino')
YOLO(f)(SOURCE) # exported model inference
def test_export_coreml():
+ """Test exporting the YOLO model to CoreML format."""
if not WINDOWS: # RuntimeError: BlobWriter not loaded with coremltools 7.0 on windows
if MACOS:
f = YOLO(MODEL).export(format='coreml')
@@ -193,7 +206,11 @@ def test_export_coreml():
def test_export_tflite(enabled=False):
- # TF suffers from install conflicts on Windows and macOS
+ """
+ Test exporting the YOLO model to TFLite format.
+
+ Note TF suffers from install conflicts on Windows and macOS.
+ """
if enabled and LINUX:
model = YOLO(MODEL)
f = model.export(format='tflite')
@@ -201,7 +218,11 @@ def test_export_tflite(enabled=False):
def test_export_pb(enabled=False):
- # TF suffers from install conflicts on Windows and macOS
+ """
+ Test exporting the YOLO model to *.pb format.
+
+ Note TF suffers from install conflicts on Windows and macOS.
+ """
if enabled and LINUX:
model = YOLO(MODEL)
f = model.export(format='pb')
@@ -209,18 +230,24 @@ def test_export_pb(enabled=False):
def test_export_paddle(enabled=False):
- # Paddle protobuf requirements conflicting with onnx protobuf requirements
+ """
+ Test exporting the YOLO model to Paddle format.
+
+ Note Paddle protobuf requirements conflicting with onnx protobuf requirements.
+ """
if enabled:
YOLO(MODEL).export(format='paddle')
@pytest.mark.slow
def test_export_ncnn():
+ """Test exporting the YOLO model to NCNN format."""
f = YOLO(MODEL).export(format='ncnn')
YOLO(f)(SOURCE) # exported model inference
def test_all_model_yamls():
+ """Test YOLO model creation for all available YAML configurations."""
for m in (ROOT / 'cfg' / 'models').rglob('*.yaml'):
if 'rtdetr' in m.name:
if TORCH_1_9: # torch<=1.8 issue - TypeError: __init__() got an unexpected keyword argument 'batch_first'
@@ -230,6 +257,7 @@ def test_all_model_yamls():
def test_workflow():
+ """Test the complete workflow including training, validation, prediction, and exporting."""
model = YOLO(MODEL)
model.train(data='coco8.yaml', epochs=1, imgsz=32, optimizer='SGD')
model.val(imgsz=32)
@@ -238,12 +266,14 @@ def test_workflow():
def test_predict_callback_and_setup():
- # Test callback addition for prediction
- def on_predict_batch_end(predictor): # results -> List[batch_size]
+ """Test callback functionality during YOLO prediction."""
+
+ def on_predict_batch_end(predictor):
+ """Callback function that handles operations at the end of a prediction batch."""
path, im0s, _, _ = predictor.batch
im0s = im0s if isinstance(im0s, list) else [im0s]
bs = [predictor.dataset.bs for _ in range(len(path))]
- predictor.results = zip(predictor.results, im0s, bs)
+ predictor.results = zip(predictor.results, im0s, bs) # results is List[batch_size]
model = YOLO(MODEL)
model.add_callback('on_predict_batch_end', on_predict_batch_end)
@@ -259,6 +289,7 @@ def test_predict_callback_and_setup():
def test_results():
+ """Test various result formats for the YOLO model."""
for m in 'yolov8n-pose.pt', 'yolov8n-seg.pt', 'yolov8n.pt', 'yolov8n-cls.pt':
results = YOLO(WEIGHTS_DIR / m)([SOURCE, SOURCE], imgsz=160)
for r in results:
@@ -274,7 +305,7 @@ def test_results():
@pytest.mark.skipif(not ONLINE, reason='environment is offline')
def test_data_utils():
- # Test functions in ultralytics/data/utils.py
+ """Test utility functions in ultralytics/data/utils.py."""
from ultralytics.data.utils import HUBDatasetStats, autosplit
from ultralytics.utils.downloads import zip_directory
@@ -294,7 +325,7 @@ def test_data_utils():
@pytest.mark.skipif(not ONLINE, reason='environment is offline')
def test_data_converter():
- # Test dataset converters
+ """Test dataset converters."""
from ultralytics.data.converter import coco80_to_coco91_class, convert_coco
file = 'instances_val2017.json'
@@ -304,6 +335,7 @@ def test_data_converter():
def test_data_annotator():
+ """Test automatic data annotation."""
from ultralytics.data.annotator import auto_annotate
auto_annotate(ASSETS,
@@ -313,7 +345,7 @@ def test_data_annotator():
def test_events():
- # Test event sending
+ """Test event sending functionality."""
from ultralytics.hub.utils import Events
events = Events()
@@ -324,6 +356,7 @@ def test_events():
def test_cfg_init():
+ """Test configuration initialization utilities."""
from ultralytics.cfg import check_dict_alignment, copy_default_cfg, smart_value
with contextlib.suppress(SyntaxError):
@@ -334,6 +367,7 @@ def test_cfg_init():
def test_utils_init():
+ """Test initialization utilities."""
from ultralytics.utils import get_git_branch, get_git_origin_url, get_ubuntu_version, is_github_actions_ci
get_ubuntu_version()
@@ -343,6 +377,7 @@ def test_utils_init():
def test_utils_checks():
+ """Test various utility checks."""
checks.check_yolov5u_filename('yolov5n.pt')
checks.git_describe(ROOT)
checks.check_requirements() # check requirements.txt
@@ -354,12 +389,14 @@ def test_utils_checks():
def test_utils_benchmarks():
+ """Test model benchmarking."""
from ultralytics.utils.benchmarks import ProfileModels
ProfileModels(['yolov8n.yaml'], imgsz=32, min_time=1, num_timed_runs=3, num_warmup_runs=1).profile()
def test_utils_torchutils():
+ """Test Torch utility functions."""
from ultralytics.nn.modules.conv import Conv
from ultralytics.utils.torch_utils import get_flops_with_torch_profiler, profile, time_sync
@@ -373,12 +410,14 @@ def test_utils_torchutils():
@pytest.mark.skipif(not ONLINE, reason='environment is offline')
def test_utils_downloads():
+ """Test file download utilities."""
from ultralytics.utils.downloads import get_google_drive_file_info
get_google_drive_file_info('https://drive.google.com/file/d/1cqT-cJgANNrhIHCrEufUYhQ4RqiWG_lJ/view?usp=drive_link')
def test_utils_ops():
+ """Test various operations utilities."""
from ultralytics.utils.ops import (ltwh2xywh, ltwh2xyxy, make_divisible, xywh2ltwh, xywh2xyxy, xywhn2xyxy,
xywhr2xyxyxyxy, xyxy2ltwh, xyxy2xywh, xyxy2xywhn, xyxyxyxy2xywhr)
@@ -396,6 +435,7 @@ def test_utils_ops():
def test_utils_files():
+ """Test file handling utilities."""
from ultralytics.utils.files import file_age, file_date, get_latest_run, spaces_in_path
file_age(SOURCE)
@@ -409,6 +449,7 @@ def test_utils_files():
def test_nn_modules_conv():
+ """Test Convolutional Neural Network modules."""
from ultralytics.nn.modules.conv import CBAM, Conv2, ConvTranspose, DWConvTranspose2d, Focus
c1, c2 = 8, 16 # input and output channels
@@ -427,6 +468,7 @@ def test_nn_modules_conv():
def test_nn_modules_block():
+ """Test Neural Network block modules."""
from ultralytics.nn.modules.block import C1, C3TR, BottleneckCSP, C3Ghost, C3x
c1, c2 = 8, 16 # input and output channels
@@ -442,6 +484,7 @@ def test_nn_modules_block():
@pytest.mark.skipif(not ONLINE, reason='environment is offline')
def test_hub():
+ """Test Ultralytics HUB functionalities."""
from ultralytics.hub import export_fmts_hub, logout
from ultralytics.hub.utils import smart_request
@@ -453,6 +496,7 @@ def test_hub():
@pytest.mark.slow
@pytest.mark.skipif(not ONLINE, reason='environment is offline')
def test_triton():
+ """Test NVIDIA Triton Server functionalities."""
checks.check_requirements('tritonclient[all]')
import subprocess
import time
diff --git a/ultralytics/cfg/__init__.py b/ultralytics/cfg/__init__.py
index 98edf804..65cd7542 100644
--- a/ultralytics/cfg/__init__.py
+++ b/ultralytics/cfg/__init__.py
@@ -180,8 +180,8 @@ def _handle_deprecation(custom):
def check_dict_alignment(base: Dict, custom: Dict, e=None):
"""
- This function checks for any mismatched keys between a custom configuration list and a base configuration list.
- If any mismatched keys are found, the function prints out similar keys from the base list and exits the program.
+ This function checks for any mismatched keys between a custom configuration list and a base configuration list. If
+ any mismatched keys are found, the function prints out similar keys from the base list and exits the program.
Args:
custom (dict): a dictionary of custom configuration options
@@ -205,9 +205,8 @@ def check_dict_alignment(base: Dict, custom: Dict, e=None):
def merge_equals_args(args: List[str]) -> List[str]:
"""
- Merges arguments around isolated '=' args in a list of strings.
- The function considers cases where the first argument ends with '=' or the second starts with '=',
- as well as when the middle one is an equals sign.
+ Merges arguments around isolated '=' args in a list of strings. The function considers cases where the first
+ argument ends with '=' or the second starts with '=', as well as when the middle one is an equals sign.
Args:
args (List[str]): A list of strings where each element is an argument.
diff --git a/ultralytics/data/augment.py b/ultralytics/data/augment.py
index 432023c7..7fb32477 100644
--- a/ultralytics/data/augment.py
+++ b/ultralytics/data/augment.py
@@ -20,16 +20,30 @@ from .utils import polygons2masks, polygons2masks_overlap
# TODO: we might need a BaseTransform to make all these augments be compatible with both classification and semantic
class BaseTransform:
+ """
+ Base class for image transformations.
+
+ This is a generic transformation class that can be extended for specific image processing needs.
+ The class is designed to be compatible with both classification and semantic segmentation tasks.
+
+ Methods:
+ __init__: Initializes the BaseTransform object.
+ apply_image: Applies image transformation to labels.
+ apply_instances: Applies transformations to object instances in labels.
+ apply_semantic: Applies semantic segmentation to an image.
+ __call__: Applies all label transformations to an image, instances, and semantic masks.
+ """
def __init__(self) -> None:
+ """Initializes the BaseTransform object."""
pass
def apply_image(self, labels):
- """Applies image transformation to labels."""
+ """Applies image transformations to labels."""
pass
def apply_instances(self, labels):
- """Applies transformations to input 'labels' and returns object instances."""
+ """Applies transformations to object instances in labels."""
pass
def apply_semantic(self, labels):
@@ -37,13 +51,14 @@ class BaseTransform:
pass
def __call__(self, labels):
- """Applies label transformations to an image, instances and semantic masks."""
+ """Applies all label transformations to an image, instances, and semantic masks."""
self.apply_image(labels)
self.apply_instances(labels)
self.apply_semantic(labels)
class Compose:
+ """Class for composing multiple image transformations."""
def __init__(self, transforms):
"""Initializes the Compose object with a list of transforms."""
@@ -60,18 +75,23 @@ class Compose:
self.transforms.append(transform)
def tolist(self):
- """Converts list of transforms to a standard Python list."""
+ """Converts the list of transforms to a standard Python list."""
return self.transforms
def __repr__(self):
- """Return string representation of object."""
+ """Returns a string representation of the object."""
return f"{self.__class__.__name__}({', '.join([f'{t}' for t in self.transforms])})"
class BaseMixTransform:
- """This implementation is from mmyolo."""
+ """
+ Class for base mix (MixUp/Mosaic) transformations.
+
+ This implementation is from mmyolo.
+ """
def __init__(self, dataset, pre_transform=None, p=0.0) -> None:
+ """Initializes the BaseMixTransform object with dataset, pre_transform, and probability."""
self.dataset = dataset
self.pre_transform = pre_transform
self.p = p
@@ -262,8 +282,10 @@ class Mosaic(BaseMixTransform):
class MixUp(BaseMixTransform):
+ """Class for applying MixUp augmentation to the dataset."""
def __init__(self, dataset, pre_transform=None, p=0.0) -> None:
+ """Initializes MixUp object with dataset, pre_transform, and probability of applying MixUp."""
super().__init__(dataset=dataset, pre_transform=pre_transform, p=p)
def get_indexes(self):
@@ -271,7 +293,7 @@ class MixUp(BaseMixTransform):
return random.randint(0, len(self.dataset) - 1)
def _mix_transform(self, labels):
- """Applies MixUp augmentation https://arxiv.org/pdf/1710.09412.pdf."""
+ """Applies MixUp augmentation as per https://arxiv.org/pdf/1710.09412.pdf."""
r = np.random.beta(32.0, 32.0) # mixup ratio, alpha=beta=32.0
labels2 = labels['mix_labels'][0]
labels['img'] = (labels['img'] * r + labels2['img'] * (1 - r)).astype(np.uint8)
@@ -281,6 +303,28 @@ class MixUp(BaseMixTransform):
class RandomPerspective:
+ """
+ Implements random perspective and affine transformations on images and corresponding bounding boxes, segments, and
+ keypoints. These transformations include rotation, translation, scaling, and shearing. The class also offers the
+ option to apply these transformations conditionally with a specified probability.
+
+ Attributes:
+ degrees (float): Degree range for random rotations.
+ translate (float): Fraction of total width and height for random translation.
+ scale (float): Scaling factor interval, e.g., a scale factor of 0.1 allows a resize between 90%-110%.
+ shear (float): Shear intensity (angle in degrees).
+ perspective (float): Perspective distortion factor.
+ border (tuple): Tuple specifying mosaic border.
+ pre_transform (callable): A function/transform to apply to the image before starting the random transformation.
+
+ Methods:
+ affine_transform(img, border): Applies a series of affine transformations to the image.
+ apply_bboxes(bboxes, M): Transforms bounding boxes using the calculated affine matrix.
+ apply_segments(segments, M): Transforms segments and generates new bounding boxes.
+ apply_keypoints(keypoints, M): Transforms keypoints.
+ __call__(labels): Main method to apply transformations to both images and their corresponding annotations.
+ box_candidates(box1, box2): Filters out bounding boxes that don't meet certain criteria post-transformation.
+ """
def __init__(self,
degrees=0.0,
@@ -290,17 +334,31 @@ class RandomPerspective:
perspective=0.0,
border=(0, 0),
pre_transform=None):
+ """Initializes RandomPerspective object with transformation parameters."""
+
self.degrees = degrees
self.translate = translate
self.scale = scale
self.shear = shear
self.perspective = perspective
- # Mosaic border
- self.border = border
+ self.border = border # mosaic border
self.pre_transform = pre_transform
def affine_transform(self, img, border):
- """Center."""
+ """
+ Applies a sequence of affine transformations centered around the image center.
+
+ Args:
+ img (ndarray): Input image.
+ border (tuple): Border dimensions.
+
+ Returns:
+ img (ndarray): Transformed image.
+ M (ndarray): Transformation matrix.
+ s (float): Scale factor.
+ """
+
+ # Center
C = np.eye(3, dtype=np.float32)
C[0, 2] = -img.shape[1] / 2 # x translation (pixels)
@@ -462,8 +520,22 @@ class RandomPerspective:
labels['resized_shape'] = img.shape[:2]
return labels
- def box_candidates(self, box1, box2, wh_thr=2, ar_thr=100, area_thr=0.1, eps=1e-16): # box1(4,n), box2(4,n)
- # Compute box candidates: box1 before augment, box2 after augment, wh_thr (pixels), aspect_ratio_thr, area_ratio
+ def box_candidates(self, box1, box2, wh_thr=2, ar_thr=100, area_thr=0.1, eps=1e-16):
+ """
+ Compute box candidates based on a set of thresholds. This method compares the characteristics of the boxes
+ before and after augmentation to decide whether a box is a candidate for further processing.
+
+ Args:
+ box1 (numpy.ndarray): The 4,n bounding box before augmentation, represented as [x1, y1, x2, y2].
+ box2 (numpy.ndarray): The 4,n bounding box after augmentation, represented as [x1, y1, x2, y2].
+ wh_thr (float, optional): The width and height threshold in pixels. Default is 2.
+ ar_thr (float, optional): The aspect ratio threshold. Default is 100.
+ area_thr (float, optional): The area ratio threshold. Default is 0.1.
+ eps (float, optional): A small epsilon value to prevent division by zero. Default is 1e-16.
+
+ Returns:
+ (numpy.ndarray): A boolean array indicating which boxes are candidates based on the given thresholds.
+ """
w1, h1 = box1[2] - box1[0], box1[3] - box1[1]
w2, h2 = box2[2] - box2[0], box2[3] - box2[1]
ar = np.maximum(w2 / (h2 + eps), h2 / (w2 + eps)) # aspect ratio
@@ -471,14 +543,32 @@ class RandomPerspective:
class RandomHSV:
+ """
+ This class is responsible for performing random adjustments to the Hue, Saturation, and Value (HSV) channels of an
+ image.
+
+ The adjustments are random but within limits set by hgain, sgain, and vgain.
+ """
def __init__(self, hgain=0.5, sgain=0.5, vgain=0.5) -> None:
+ """
+ Initialize RandomHSV class with gains for each HSV channel.
+
+ Args:
+ hgain (float, optional): Maximum variation for hue. Default is 0.5.
+ sgain (float, optional): Maximum variation for saturation. Default is 0.5.
+ vgain (float, optional): Maximum variation for value. Default is 0.5.
+ """
self.hgain = hgain
self.sgain = sgain
self.vgain = vgain
def __call__(self, labels):
- """Applies image HSV augmentation"""
+ """
+ Applies random HSV augmentation to an image within the predefined limits.
+
+ The modified image replaces the original image in the input 'labels' dict.
+ """
img = labels['img']
if self.hgain or self.sgain or self.vgain:
r = np.random.uniform(-1, 1, 3) * [self.hgain, self.sgain, self.vgain] + 1 # random gains
@@ -496,9 +586,22 @@ class RandomHSV:
class RandomFlip:
- """Applies random horizontal or vertical flip to an image with a given probability."""
+ """
+ Applies a random horizontal or vertical flip to an image with a given probability.
+
+ Also updates any instances (bounding boxes, keypoints, etc.) accordingly.
+ """
def __init__(self, p=0.5, direction='horizontal', flip_idx=None) -> None:
+ """
+ Initializes the RandomFlip class with probability and direction.
+
+ Args:
+ p (float, optional): The probability of applying the flip. Must be between 0 and 1. Default is 0.5.
+ direction (str, optional): The direction to apply the flip. Must be 'horizontal' or 'vertical'.
+ Default is 'horizontal'.
+ flip_idx (array-like, optional): Index mapping for flipping keypoints, if any.
+ """
assert direction in ['horizontal', 'vertical'], f'Support direction `horizontal` or `vertical`, got {direction}'
assert 0 <= p <= 1.0
@@ -507,7 +610,16 @@ class RandomFlip:
self.flip_idx = flip_idx
def __call__(self, labels):
- """Resize image and padding for detection, instance segmentation, pose."""
+ """
+ Applies random flip to an image and updates any instances like bounding boxes or keypoints accordingly.
+
+ Args:
+ labels (dict): A dictionary containing the keys 'img' and 'instances'. 'img' is the image to be flipped.
+ 'instances' is an object containing bounding boxes and optionally keypoints.
+
+ Returns:
+ (dict): The same dict with the flipped image and updated instances under the 'img' and 'instances' keys.
+ """
img = labels['img']
instances = labels.pop('instances')
instances.convert_bbox(format='xywh')
@@ -599,12 +711,38 @@ class LetterBox:
class CopyPaste:
+ """
+ Implements the Copy-Paste augmentation as described in the paper https://arxiv.org/abs/2012.07177. This class is
+ responsible for applying the Copy-Paste augmentation on images and their corresponding instances.
+ """
def __init__(self, p=0.5) -> None:
+ """
+ Initializes the CopyPaste class with a given probability.
+
+ Args:
+ p (float, optional): The probability of applying the Copy-Paste augmentation. Must be between 0 and 1.
+ Default is 0.5.
+ """
self.p = p
def __call__(self, labels):
- """Implement Copy-Paste augmentation https://arxiv.org/abs/2012.07177, labels as nx5 np.array(cls, xyxy)."""
+ """
+ Applies the Copy-Paste augmentation to the given image and instances.
+
+ Args:
+ labels (dict): A dictionary containing:
+ - 'img': The image to augment.
+ - 'cls': Class labels associated with the instances.
+ - 'instances': Object containing bounding boxes, and optionally, keypoints and segments.
+
+ Returns:
+ (dict): Dict with augmented image and updated instances under the 'img', 'cls', and 'instances' keys.
+
+ Notes:
+ 1. Instances are expected to have 'segments' as one of their attributes for this augmentation to work.
+ 2. This method modifies the input dictionary 'labels' in place.
+ """
im = labels['img']
cls = labels['cls']
h, w = im.shape[:2]
@@ -639,9 +777,13 @@ class CopyPaste:
class Albumentations:
- """Albumentations transformations. Optional, uninstall package to disable.
- Applies Blur, Median Blur, convert to grayscale, Contrast Limited Adaptive Histogram Equalization,
- random change of brightness and contrast, RandomGamma and lowering of image quality by compression."""
+ """
+ Albumentations transformations.
+
+ Optional, uninstall package to disable. Applies Blur, Median Blur, convert to grayscale, Contrast Limited Adaptive
+ Histogram Equalization, random change of brightness and contrast, RandomGamma and lowering of image quality by
+ compression.
+ """
def __init__(self, p=1.0):
"""Initialize the transform object for YOLO bbox formatted params."""
@@ -690,6 +832,19 @@ class Albumentations:
# TODO: technically this is not an augmentation, maybe we should put this to another files
class Format:
+ """
+ Formats image annotations for object detection, instance segmentation, and pose estimation tasks. The class
+ standardizes the image and instance annotations to be used by the `collate_fn` in PyTorch DataLoader.
+
+ Attributes:
+ bbox_format (str): Format for bounding boxes. Default is 'xywh'.
+ normalize (bool): Whether to normalize bounding boxes. Default is True.
+ return_mask (bool): Return instance masks for segmentation. Default is False.
+ return_keypoint (bool): Return keypoints for pose estimation. Default is False.
+ mask_ratio (int): Downsample ratio for masks. Default is 4.
+ mask_overlap (bool): Whether to overlap masks. Default is True.
+ batch_idx (bool): Keep batch indexes. Default is True.
+ """
def __init__(self,
bbox_format='xywh',
@@ -699,6 +854,7 @@ class Format:
mask_ratio=4,
mask_overlap=True,
batch_idx=True):
+ """Initializes the Format class with given parameters."""
self.bbox_format = bbox_format
self.normalize = normalize
self.return_mask = return_mask # set False when training detection only
@@ -746,7 +902,7 @@ class Format:
return img
def _format_segments(self, instances, cls, w, h):
- """convert polygon points to bitmap."""
+ """Convert polygon points to bitmap."""
segments = instances.segments
if self.mask_overlap:
masks, sorted_idx = polygons2masks_overlap((h, w), segments, downsample_ratio=self.mask_ratio)
@@ -851,35 +1007,75 @@ def classify_albumentations(
class ClassifyLetterBox:
- """YOLOv8 LetterBox class for image preprocessing, i.e. T.Compose([LetterBox(size), ToTensor()])"""
+ """
+ YOLOv8 LetterBox class for image preprocessing, designed to be part of a transformation pipeline, e.g.,
+ T.Compose([LetterBox(size), ToTensor()]).
+
+ Attributes:
+ h (int): Target height of the image.
+ w (int): Target width of the image.
+ auto (bool): If True, automatically solves for short side using stride.
+ stride (int): The stride value, used when 'auto' is True.
+ """
def __init__(self, size=(640, 640), auto=False, stride=32):
- """Resizes image and crops it to center with max dimensions 'h' and 'w'."""
+ """
+ Initializes the ClassifyLetterBox class with a target size, auto-flag, and stride.
+
+ Args:
+ size (Union[int, Tuple[int, int]]): The target dimensions (height, width) for the letterbox.
+ auto (bool): If True, automatically calculates the short side based on stride.
+ stride (int): The stride value, used when 'auto' is True.
+ """
super().__init__()
self.h, self.w = (size, size) if isinstance(size, int) else size
self.auto = auto # pass max size integer, automatically solve for short side using stride
self.stride = stride # used with auto
- def __call__(self, im): # im = np.array HWC
+ def __call__(self, im):
+ """
+ Resizes the image and pads it with a letterbox method.
+
+ Args:
+ im (numpy.ndarray): The input image as a numpy array of shape HWC.
+
+ Returns:
+ (numpy.ndarray): The letterboxed and resized image as a numpy array.
+ """
imh, imw = im.shape[:2]
- r = min(self.h / imh, self.w / imw) # ratio of new/old
- h, w = round(imh * r), round(imw * r) # resized image
+ r = min(self.h / imh, self.w / imw) # ratio of new/old dimensions
+ h, w = round(imh * r), round(imw * r) # resized image dimensions
+
+ # Calculate padding dimensions
hs, ws = (math.ceil(x / self.stride) * self.stride for x in (h, w)) if self.auto else (self.h, self.w)
top, left = round((hs - h) / 2 - 0.1), round((ws - w) / 2 - 0.1)
+
+ # Create padded image
im_out = np.full((hs, ws, 3), 114, dtype=im.dtype)
im_out[top:top + h, left:left + w] = cv2.resize(im, (w, h), interpolation=cv2.INTER_LINEAR)
return im_out
class CenterCrop:
- """YOLOv8 CenterCrop class for image preprocessing, i.e. T.Compose([CenterCrop(size), ToTensor()])"""
+ """YOLOv8 CenterCrop class for image preprocessing, designed to be part of a transformation pipeline, e.g.,
+ T.Compose([CenterCrop(size), ToTensor()]).
+ """
def __init__(self, size=640):
"""Converts an image from numpy array to PyTorch tensor."""
super().__init__()
self.h, self.w = (size, size) if isinstance(size, int) else size
- def __call__(self, im): # im = np.array HWC
+ def __call__(self, im):
+ """
+ Resizes and crops the center of the image using a letterbox method.
+
+ Args:
+ im (numpy.ndarray): The input image as a numpy array of shape HWC.
+
+ Returns:
+ (numpy.ndarray): The center-cropped and resized image as a numpy array.
+ """
imh, imw = im.shape[:2]
m = min(imh, imw) # min dimension
top, left = (imh - m) // 2, (imw - m) // 2
@@ -887,14 +1083,23 @@ class CenterCrop:
class ToTensor:
- """YOLOv8 ToTensor class for image preprocessing, i.e. T.Compose([LetterBox(size), ToTensor()])."""
+ """YOLOv8 ToTensor class for image preprocessing, i.e., T.Compose([LetterBox(size), ToTensor()])."""
def __init__(self, half=False):
"""Initialize YOLOv8 ToTensor object with optional half-precision support."""
super().__init__()
self.half = half
- def __call__(self, im): # im = np.array HWC in BGR order
+ def __call__(self, im):
+ """
+ Transforms an image from a numpy array to a PyTorch tensor, applying optional half-precision and normalization.
+
+ Args:
+ im (numpy.ndarray): Input image as a numpy array with shape (H, W, C) in BGR order.
+
+ Returns:
+ (torch.Tensor): The transformed image as a PyTorch tensor in float32 or float16, normalized to [0, 1].
+ """
im = np.ascontiguousarray(im.transpose((2, 0, 1))[::-1]) # HWC to CHW -> BGR to RGB -> contiguous
im = torch.from_numpy(im) # to torch
im = im.half() if self.half else im.float() # uint8 to fp16/32
diff --git a/ultralytics/data/base.py b/ultralytics/data/base.py
index 429533dc..462280a6 100644
--- a/ultralytics/data/base.py
+++ b/ultralytics/data/base.py
@@ -62,6 +62,7 @@ class BaseDataset(Dataset):
classes=None,
fraction=1.0):
super().__init__()
+ """Initialize BaseDataset with given configuration and options."""
self.img_path = img_path
self.imgsz = imgsz
self.augment = augment
@@ -256,7 +257,7 @@ class BaseDataset(Dataset):
return len(self.labels)
def update_labels_info(self, label):
- """custom your label format here."""
+ """Custom your label format here."""
return label
def build_transforms(self, hyp=None):
diff --git a/ultralytics/data/build.py b/ultralytics/data/build.py
index 33b5edad..07de91c8 100644
--- a/ultralytics/data/build.py
+++ b/ultralytics/data/build.py
@@ -20,7 +20,11 @@ from .utils import PIN_MEMORY
class InfiniteDataLoader(dataloader.DataLoader):
- """Dataloader that reuses workers. Uses same syntax as vanilla DataLoader."""
+ """
+ Dataloader that reuses workers.
+
+ Uses same syntax as vanilla DataLoader.
+ """
def __init__(self, *args, **kwargs):
"""Dataloader that infinitely recycles workers, inherits from DataLoader."""
@@ -38,7 +42,9 @@ class InfiniteDataLoader(dataloader.DataLoader):
yield next(self.iterator)
def reset(self):
- """Reset iterator.
+ """
+ Reset iterator.
+
This is useful when we want to modify settings of dataset while training.
"""
self.iterator = self._get_iterator()
@@ -70,7 +76,7 @@ def seed_worker(worker_id): # noqa
def build_yolo_dataset(cfg, img_path, batch, data, mode='train', rect=False, stride=32):
- """Build YOLO Dataset"""
+ """Build YOLO Dataset."""
return YOLODataset(
img_path=img_path,
imgsz=cfg.imgsz,
diff --git a/ultralytics/data/converter.py b/ultralytics/data/converter.py
index fecc30ce..4451df17 100644
--- a/ultralytics/data/converter.py
+++ b/ultralytics/data/converter.py
@@ -12,7 +12,8 @@ from ultralytics.utils import TQDM
def coco91_to_coco80_class():
- """Converts 91-index COCO class IDs to 80-index COCO class IDs.
+ """
+ Converts 91-index COCO class IDs to 80-index COCO class IDs.
Returns:
(list): A list of 91 class IDs where the index represents the 80-index class ID and the value is the
@@ -51,7 +52,8 @@ def convert_coco(labels_dir='../coco/annotations/',
use_segments=False,
use_keypoints=False,
cls91to80=True):
- """Converts COCO dataset annotations to a format suitable for training YOLOv5 models.
+ """
+ Converts COCO dataset annotations to a format suitable for training YOLOv5 models.
Args:
labels_dir (str, optional): Path to directory containing COCO dataset annotation files.
@@ -203,6 +205,7 @@ def convert_dota_to_yolo_obb(dota_root_path: str):
'helipad': 17}
def convert_label(image_name, image_width, image_height, orig_label_dir, save_dir):
+ """Converts a single image's DOTA annotation to YOLO OBB format and saves it to a specified directory."""
orig_label_path = orig_label_dir / f'{image_name}.txt'
save_path = save_dir / f'{image_name}.txt'
diff --git a/ultralytics/data/dataset.py b/ultralytics/data/dataset.py
index 217d3bab..f0f431ea 100644
--- a/ultralytics/data/dataset.py
+++ b/ultralytics/data/dataset.py
@@ -33,6 +33,7 @@ class YOLODataset(BaseDataset):
"""
def __init__(self, *args, data=None, use_segments=False, use_keypoints=False, **kwargs):
+ """Initializes the YOLODataset with optional configurations for segments and keypoints."""
self.use_segments = use_segments
self.use_keypoints = use_keypoints
self.data = data
@@ -40,7 +41,9 @@ class YOLODataset(BaseDataset):
super().__init__(*args, **kwargs)
def cache_labels(self, path=Path('./labels.cache')):
- """Cache dataset labels, check images and read shapes.
+ """
+ Cache dataset labels, check images and read shapes.
+
Args:
path (Path): path where to save the cache file (default: Path('./labels.cache')).
Returns:
@@ -157,7 +160,7 @@ class YOLODataset(BaseDataset):
self.transforms = self.build_transforms(hyp)
def update_labels_info(self, label):
- """custom your label format here."""
+ """Custom your label format here."""
# NOTE: cls is not with bboxes now, classification and semantic segmentation need an independent cls label
# we can make it also support classification and semantic segmentation by add or remove some dict keys there.
bboxes = label.pop('bboxes')
@@ -254,6 +257,7 @@ class ClassificationDataset(torchvision.datasets.ImageFolder):
return {'img': sample, 'cls': j}
def __len__(self) -> int:
+ """Return the total number of samples in the dataset."""
return len(self.samples)
def verify_images(self):
@@ -320,6 +324,16 @@ def save_dataset_cache_file(prefix, path, x):
# TODO: support semantic segmentation
class SemanticDataset(BaseDataset):
+ """
+ Semantic Segmentation Dataset.
+
+ This class is responsible for handling datasets used for semantic segmentation tasks. It inherits functionalities
+ from the BaseDataset class.
+
+ Note:
+ This class is currently a placeholder and needs to be populated with methods and attributes for supporting
+ semantic segmentation tasks.
+ """
def __init__(self):
"""Initialize a SemanticDataset object."""
diff --git a/ultralytics/data/loaders.py b/ultralytics/data/loaders.py
index 88491c79..0541f712 100644
--- a/ultralytics/data/loaders.py
+++ b/ultralytics/data/loaders.py
@@ -22,6 +22,7 @@ from ultralytics.utils.checks import check_requirements
@dataclass
class SourceTypes:
+ """Class to represent various types of input sources for predictions."""
webcam: bool = False
screenshot: bool = False
from_img: bool = False
@@ -29,7 +30,34 @@ class SourceTypes:
class LoadStreams:
- """Stream Loader, i.e. `yolo predict source='rtsp://example.com/media.mp4' # RTSP, RTMP, HTTP, TCP streams`."""
+ """
+ Stream Loader for various types of video streams.
+
+ Suitable for use with `yolo predict source='rtsp://example.com/media.mp4'`, supports RTSP, RTMP, HTTP, and TCP streams.
+
+ Attributes:
+ sources (str): The source input paths or URLs for the video streams.
+ imgsz (int): The image size for processing, defaults to 640.
+ vid_stride (int): Video frame-rate stride, defaults to 1.
+ buffer (bool): Whether to buffer input streams, defaults to False.
+ running (bool): Flag to indicate if the streaming thread is running.
+ mode (str): Set to 'stream' indicating real-time capture.
+ imgs (list): List of image frames for each stream.
+ fps (list): List of FPS for each stream.
+ frames (list): List of total frames for each stream.
+ threads (list): List of threads for each stream.
+ shape (list): List of shapes for each stream.
+ caps (list): List of cv2.VideoCapture objects for each stream.
+ bs (int): Batch size for processing.
+
+ Methods:
+ __init__: Initialize the stream loader.
+ update: Read stream frames in daemon thread.
+ close: Close stream loader and release resources.
+ __iter__: Returns an iterator object for the class.
+ __next__: Returns source paths, transformed, and original images for processing.
+ __len__: Return the length of the sources object.
+ """
def __init__(self, sources='file.streams', imgsz=640, vid_stride=1, buffer=False):
"""Initialize instance variables and check for consistent input stream shapes."""
@@ -149,10 +177,33 @@ class LoadStreams:
class LoadScreenshots:
- """YOLOv8 screenshot dataloader, i.e. `yolo predict source=screen`."""
+ """
+ YOLOv8 screenshot dataloader.
+
+ This class manages the loading of screenshot images for processing with YOLOv8.
+ Suitable for use with `yolo predict source=screen`.
+
+ Attributes:
+ source (str): The source input indicating which screen to capture.
+ imgsz (int): The image size for processing, defaults to 640.
+ screen (int): The screen number to capture.
+ left (int): The left coordinate for screen capture area.
+ top (int): The top coordinate for screen capture area.
+ width (int): The width of the screen capture area.
+ height (int): The height of the screen capture area.
+ mode (str): Set to 'stream' indicating real-time capture.
+ frame (int): Counter for captured frames.
+ sct (mss.mss): Screen capture object from `mss` library.
+ bs (int): Batch size, set to 1.
+ monitor (dict): Monitor configuration details.
+
+ Methods:
+ __iter__: Returns an iterator object.
+ __next__: Captures the next screenshot and returns it.
+ """
def __init__(self, source, imgsz=640):
- """source = [screen_number left top width height] (pixels)."""
+ """Source = [screen_number left top width height] (pixels)."""
check_requirements('mss')
import mss # noqa
@@ -192,7 +243,28 @@ class LoadScreenshots:
class LoadImages:
- """YOLOv8 image/video dataloader, i.e. `yolo predict source=image.jpg/vid.mp4`."""
+ """
+ YOLOv8 image/video dataloader.
+
+ This class manages the loading and pre-processing of image and video data for YOLOv8. It supports loading from
+ various formats, including single image files, video files, and lists of image and video paths.
+
+ Attributes:
+ imgsz (int): Image size, defaults to 640.
+ files (list): List of image and video file paths.
+ nf (int): Total number of files (images and videos).
+ video_flag (list): Flags indicating whether a file is a video (True) or an image (False).
+ mode (str): Current mode, 'image' or 'video'.
+ vid_stride (int): Stride for video frame-rate, defaults to 1.
+ bs (int): Batch size, set to 1 for this class.
+ cap (cv2.VideoCapture): Video capture object for OpenCV.
+ frame (int): Frame counter for video.
+ frames (int): Total number of frames in the video.
+ count (int): Counter for iteration, initialized at 0 during `__iter__()`.
+
+ Methods:
+ _new_video(path): Create a new cv2.VideoCapture object for a given video path.
+ """
def __init__(self, path, imgsz=640, vid_stride=1):
"""Initialize the Dataloader and raise FileNotFoundError if file not found."""
@@ -285,6 +357,24 @@ class LoadImages:
class LoadPilAndNumpy:
+ """
+ Load images from PIL and Numpy arrays for batch processing.
+
+ This class is designed to manage loading and pre-processing of image data from both PIL and Numpy formats.
+ It performs basic validation and format conversion to ensure that the images are in the required format for
+ downstream processing.
+
+ Attributes:
+ paths (list): List of image paths or autogenerated filenames.
+ im0 (list): List of images stored as Numpy arrays.
+ imgsz (int): Image size, defaults to 640.
+ mode (str): Type of data being processed, defaults to 'image'.
+ bs (int): Batch size, equivalent to the length of `im0`.
+ count (int): Counter for iteration, initialized at 0 during `__iter__()`.
+
+ Methods:
+ _single_check(im): Validate and format a single image to a Numpy array.
+ """
def __init__(self, im0, imgsz=640):
"""Initialize PIL and Numpy Dataloader."""
@@ -326,8 +416,24 @@ class LoadPilAndNumpy:
class LoadTensor:
+ """
+ Load images from torch.Tensor data.
+
+ This class manages the loading and pre-processing of image data from PyTorch tensors for further processing.
+
+ Attributes:
+ im0 (torch.Tensor): The input tensor containing the image(s).
+ bs (int): Batch size, inferred from the shape of `im0`.
+ mode (str): Current mode, set to 'image'.
+ paths (list): List of image paths or filenames.
+ count (int): Counter for iteration, initialized at 0 during `__iter__()`.
+
+ Methods:
+ _single_check(im, stride): Validate and possibly modify the input tensor.
+ """
def __init__(self, im0) -> None:
+ """Initialize Tensor Dataloader."""
self.im0 = self._single_check(im0)
self.bs = self.im0.shape[0]
self.mode = 'image'
@@ -370,9 +476,7 @@ class LoadTensor:
def autocast_list(source):
- """
- Merges a list of source of different types into a list of numpy arrays or PIL images
- """
+ """Merges a list of source of different types into a list of numpy arrays or PIL images."""
files = []
for im in source:
if isinstance(im, (str, Path)): # filename or uri
diff --git a/ultralytics/data/utils.py b/ultralytics/data/utils.py
index 00ddc6e5..c5c2d994 100644
--- a/ultralytics/data/utils.py
+++ b/ultralytics/data/utils.py
@@ -547,9 +547,9 @@ class HUBDatasetStats:
def compress_one_image(f, f_new=None, max_dim=1920, quality=50):
"""
- Compresses a single image file to reduced size while preserving its aspect ratio and quality using either the
- Python Imaging Library (PIL) or OpenCV library. If the input image is smaller than the maximum dimension, it will
- not be resized.
+ Compresses a single image file to reduced size while preserving its aspect ratio and quality using either the Python
+ Imaging Library (PIL) or OpenCV library. If the input image is smaller than the maximum dimension, it will not be
+ resized.
Args:
f (str): The path to the input image file.
diff --git a/ultralytics/engine/exporter.py b/ultralytics/engine/exporter.py
index b209b9d7..66932a65 100644
--- a/ultralytics/engine/exporter.py
+++ b/ultralytics/engine/exporter.py
@@ -986,9 +986,7 @@ class Exporter:
return model
def add_callback(self, event: str, callback):
- """
- Appends the given callback.
- """
+ """Appends the given callback."""
self.callbacks[event].append(callback)
def run_callbacks(self, event: str):
diff --git a/ultralytics/engine/model.py b/ultralytics/engine/model.py
index 69de12b2..806b0346 100644
--- a/ultralytics/engine/model.py
+++ b/ultralytics/engine/model.py
@@ -159,9 +159,7 @@ class Model(nn.Module):
self.overrides['task'] = self.task
def _check_is_pytorch_model(self):
- """
- Raises TypeError is model is not a PyTorch model
- """
+ """Raises TypeError is model is not a PyTorch model."""
pt_str = isinstance(self.model, (str, Path)) and Path(self.model).suffix == '.pt'
pt_module = isinstance(self.model, nn.Module)
if not (pt_module or pt_str):
@@ -173,9 +171,7 @@ class Model(nn.Module):
f"argument directly in your inference command, i.e. 'model.predict(source=..., device=0)'")
def reset_weights(self):
- """
- Resets the model modules parameters to randomly initialized values, losing all training information.
- """
+ """Resets the model modules parameters to randomly initialized values, losing all training information."""
self._check_is_pytorch_model()
for m in self.model.modules():
if hasattr(m, 'reset_parameters'):
@@ -185,9 +181,7 @@ class Model(nn.Module):
return self
def load(self, weights='yolov8n.pt'):
- """
- Transfers parameters with matching names and shapes from 'weights' to model.
- """
+ """Transfers parameters with matching names and shapes from 'weights' to model."""
self._check_is_pytorch_model()
if isinstance(weights, (str, Path)):
weights, self.ckpt = attempt_load_one_weight(weights)
diff --git a/ultralytics/engine/predictor.py b/ultralytics/engine/predictor.py
index 3eb851b3..367efc69 100644
--- a/ultralytics/engine/predictor.py
+++ b/ultralytics/engine/predictor.py
@@ -58,7 +58,7 @@ Example:
class BasePredictor:
"""
- BasePredictor
+ BasePredictor.
A base class for creating predictors.
@@ -109,7 +109,8 @@ class BasePredictor:
callbacks.add_integration_callbacks(self)
def preprocess(self, im):
- """Prepares input image before inference.
+ """
+ Prepares input image before inference.
Args:
im (torch.Tensor | List(np.ndarray)): BCHW for tensor, [(HWC) x B] for list.
@@ -128,6 +129,7 @@ class BasePredictor:
return im
def inference(self, im, *args, **kwargs):
+ """Runs inference on a given image using the specified model and arguments."""
visualize = increment_path(self.save_dir / Path(self.batch[0][0]).stem,
mkdir=True) if self.args.visualize and (not self.source_type.tensor) else False
return self.model(im, augment=self.args.augment, visualize=visualize)
@@ -194,7 +196,11 @@ class BasePredictor:
return list(self.stream_inference(source, model, *args, **kwargs)) # merge list of Result into one
def predict_cli(self, source=None, model=None):
- """Method used for CLI prediction. It uses always generator as outputs as not required by CLI mode."""
+ """
+ Method used for CLI prediction.
+
+ It uses always generator as outputs as not required by CLI mode.
+ """
gen = self.stream_inference(source, model)
for _ in gen: # running CLI inference without accumulating any outputs (do not modify)
pass
@@ -352,7 +358,5 @@ class BasePredictor:
callback(self)
def add_callback(self, event: str, func):
- """
- Add callback
- """
+ """Add callback."""
self.callbacks[event].append(func)
diff --git a/ultralytics/engine/results.py b/ultralytics/engine/results.py
index 0fc6a0dd..fcbd762c 100644
--- a/ultralytics/engine/results.py
+++ b/ultralytics/engine/results.py
@@ -1,6 +1,6 @@
# Ultralytics YOLO 🚀, AGPL-3.0 license
"""
-Ultralytics Results, Boxes and Masks classes for handling inference results
+Ultralytics Results, Boxes and Masks classes for handling inference results.
Usage: See https://docs.ultralytics.com/modes/predict/
"""
@@ -19,12 +19,11 @@ from ultralytics.utils.torch_utils import smart_inference_mode
class BaseTensor(SimpleClass):
- """
- Base tensor class with additional methods for easy manipulation and device handling.
- """
+ """Base tensor class with additional methods for easy manipulation and device handling."""
def __init__(self, data, orig_shape) -> None:
- """Initialize BaseTensor with data and original shape.
+ """
+ Initialize BaseTensor with data and original shape.
Args:
data (torch.Tensor | np.ndarray): Predictions, such as bboxes, masks and keypoints.
@@ -126,6 +125,18 @@ class Results(SimpleClass):
self.probs = probs
def _apply(self, fn, *args, **kwargs):
+ """
+ Applies a function to all non-empty attributes and returns a new Results object with modified attributes. This
+ function is internally called by methods like .to(), .cuda(), .cpu(), etc.
+
+ Args:
+ fn (str): The name of the function to apply.
+ *args: Variable length argument list to pass to the function.
+ **kwargs: Arbitrary keyword arguments to pass to the function.
+
+ Returns:
+ Results: A new Results object with attributes modified by the applied function.
+ """
r = self.new()
for k in self._keys:
v = getattr(self, k)
@@ -250,9 +261,7 @@ class Results(SimpleClass):
return annotator.result()
def verbose(self):
- """
- Return log string for each task.
- """
+ """Return log string for each task."""
log_string = ''
probs = self.probs
boxes = self.boxes
@@ -537,6 +546,7 @@ class Probs(BaseTensor):
"""
def __init__(self, probs, orig_shape=None) -> None:
+ """Initialize the Probs class with classification probabilities and optional original shape of the image."""
super().__init__(probs, orig_shape)
@property
diff --git a/ultralytics/engine/trainer.py b/ultralytics/engine/trainer.py
index 38df2673..6905847a 100644
--- a/ultralytics/engine/trainer.py
+++ b/ultralytics/engine/trainer.py
@@ -1,6 +1,6 @@
# Ultralytics YOLO 🚀, AGPL-3.0 license
"""
-Train a model on a dataset
+Train a model on a dataset.
Usage:
$ yolo mode=train model=yolov8n.pt data=coco128.yaml imgsz=640 epochs=100 batch=16
@@ -37,7 +37,7 @@ from ultralytics.utils.torch_utils import (EarlyStopping, ModelEMA, de_parallel,
class BaseTrainer:
"""
- BaseTrainer
+ BaseTrainer.
A base class for creating trainers.
@@ -143,15 +143,11 @@ class BaseTrainer:
callbacks.add_integration_callbacks(self)
def add_callback(self, event: str, callback):
- """
- Appends the given callback.
- """
+ """Appends the given callback."""
self.callbacks[event].append(callback)
def set_callback(self, event: str, callback):
- """
- Overrides the existing callbacks with the given callback.
- """
+ """Overrides the existing callbacks with the given callback."""
self.callbacks[event] = [callback]
def run_callbacks(self, event: str):
@@ -207,9 +203,7 @@ class BaseTrainer:
world_size=world_size)
def _setup_train(self, world_size):
- """
- Builds dataloaders and optimizer on correct rank process.
- """
+ """Builds dataloaders and optimizer on correct rank process."""
# Model
self.run_callbacks('on_pretrain_routine_start')
@@ -450,14 +444,14 @@ class BaseTrainer:
@staticmethod
def get_dataset(data):
"""
- Get train, val path from data dict if it exists. Returns None if data format is not recognized.
+ Get train, val path from data dict if it exists.
+
+ Returns None if data format is not recognized.
"""
return data['train'], data.get('val') or data.get('test')
def setup_model(self):
- """
- load/create/download model for any task.
- """
+ """Load/create/download model for any task."""
if isinstance(self.model, torch.nn.Module): # if model is loaded beforehand. No setup needed
return
@@ -482,14 +476,14 @@ class BaseTrainer:
self.ema.update(self.model)
def preprocess_batch(self, batch):
- """
- Allows custom preprocessing model inputs and ground truths depending on task type.
- """
+ """Allows custom preprocessing model inputs and ground truths depending on task type."""
return batch
def validate(self):
"""
- Runs validation on test set using self.validator. The returned dict is expected to contain "fitness" key.
+ Runs validation on test set using self.validator.
+
+ The returned dict is expected to contain "fitness" key.
"""
metrics = self.validator(self)
fitness = metrics.pop('fitness', -self.loss.detach().cpu().numpy()) # use loss as fitness measure if not found
@@ -506,26 +500,20 @@ class BaseTrainer:
raise NotImplementedError('get_validator function not implemented in trainer')
def get_dataloader(self, dataset_path, batch_size=16, rank=0, mode='train'):
- """
- Returns dataloader derived from torch.data.Dataloader.
- """
+ """Returns dataloader derived from torch.data.Dataloader."""
raise NotImplementedError('get_dataloader function not implemented in trainer')
def build_dataset(self, img_path, mode='train', batch=None):
- """Build dataset"""
+ """Build dataset."""
raise NotImplementedError('build_dataset function not implemented in trainer')
def label_loss_items(self, loss_items=None, prefix='train'):
- """
- Returns a loss dict with labelled training loss items tensor
- """
+ """Returns a loss dict with labelled training loss items tensor."""
# Not needed for classification but necessary for segmentation & detection
return {'loss': loss_items} if loss_items is not None else ['loss']
def set_model_attributes(self):
- """
- To set or update model parameters before training.
- """
+ """To set or update model parameters before training."""
self.model.names = self.data['names']
def build_targets(self, preds, targets):
@@ -632,8 +620,8 @@ class BaseTrainer:
def build_optimizer(self, model, name='auto', lr=0.001, momentum=0.9, decay=1e-5, iterations=1e5):
"""
- Constructs an optimizer for the given model, based on the specified optimizer name, learning rate,
- momentum, weight decay, and number of iterations.
+ Constructs an optimizer for the given model, based on the specified optimizer name, learning rate, momentum,
+ weight decay, and number of iterations.
Args:
model (torch.nn.Module): The model for which to build an optimizer.
diff --git a/ultralytics/engine/tuner.py b/ultralytics/engine/tuner.py
index d60a56c2..6d6e0a86 100644
--- a/ultralytics/engine/tuner.py
+++ b/ultralytics/engine/tuner.py
@@ -31,32 +31,32 @@ from ultralytics.utils.plotting import plot_tune_results
class Tuner:
"""
- Class responsible for hyperparameter tuning of YOLO models.
+ Class responsible for hyperparameter tuning of YOLO models.
- The class evolves YOLO model hyperparameters over a given number of iterations
- by mutating them according to the search space and retraining the model to evaluate their performance.
+ The class evolves YOLO model hyperparameters over a given number of iterations
+ by mutating them according to the search space and retraining the model to evaluate their performance.
- Attributes:
- space (dict): Hyperparameter search space containing bounds and scaling factors for mutation.
- tune_dir (Path): Directory where evolution logs and results will be saved.
- tune_csv (Path): Path to the CSV file where evolution logs are saved.
+ Attributes:
+ space (dict): Hyperparameter search space containing bounds and scaling factors for mutation.
+ tune_dir (Path): Directory where evolution logs and results will be saved.
+ tune_csv (Path): Path to the CSV file where evolution logs are saved.
- Methods:
- _mutate(hyp: dict) -> dict:
- Mutates the given hyperparameters within the bounds specified in `self.space`.
+ Methods:
+ _mutate(hyp: dict) -> dict:
+ Mutates the given hyperparameters within the bounds specified in `self.space`.
- __call__():
- Executes the hyperparameter evolution across multiple iterations.
+ __call__():
+ Executes the hyperparameter evolution across multiple iterations.
- Example:
- Tune hyperparameters for YOLOv8n on COCO8 at imgsz=640 and epochs=30 for 300 tuning iterations.
- ```python
- from ultralytics import YOLO
+ Example:
+ Tune hyperparameters for YOLOv8n on COCO8 at imgsz=640 and epochs=30 for 300 tuning iterations.
+ ```python
+ from ultralytics import YOLO
- model = YOLO('yolov8n.pt')
- model.tune(data='coco8.yaml', epochs=10, iterations=300, optimizer='AdamW', plots=False, save=False, val=False)
- ```
- """
+ model = YOLO('yolov8n.pt')
+ model.tune(data='coco8.yaml', epochs=10, iterations=300, optimizer='AdamW', plots=False, save=False, val=False)
+ ```
+ """
def __init__(self, args=DEFAULT_CFG, _callbacks=None):
"""
diff --git a/ultralytics/engine/validator.py b/ultralytics/engine/validator.py
index 8d8349bd..5ad69546 100644
--- a/ultralytics/engine/validator.py
+++ b/ultralytics/engine/validator.py
@@ -36,7 +36,7 @@ from ultralytics.utils.torch_utils import de_parallel, select_device, smart_infe
class BaseValidator:
"""
- BaseValidator
+ BaseValidator.
A base class for creating validators.
@@ -102,8 +102,7 @@ class BaseValidator:
@smart_inference_mode()
def __call__(self, trainer=None, model=None):
- """
- Supports validation of a pre-trained model if passed or a model being trained if trainer is passed (trainer
+ """Supports validation of a pre-trained model if passed or a model being trained if trainer is passed (trainer
gets priority).
"""
self.training = trainer is not None
@@ -260,7 +259,7 @@ class BaseValidator:
raise NotImplementedError('get_dataloader function not implemented for this validator')
def build_dataset(self, img_path):
- """Build dataset"""
+ """Build dataset."""
raise NotImplementedError('build_dataset function not implemented in validator')
def preprocess(self, batch):
diff --git a/ultralytics/hub/__init__.py b/ultralytics/hub/__init__.py
index daed439c..8e101d6b 100644
--- a/ultralytics/hub/__init__.py
+++ b/ultralytics/hub/__init__.py
@@ -80,8 +80,8 @@ def get_export(model_id='', format='torchscript'):
def check_dataset(path='', task='detect'):
"""
- Function for error-checking HUB dataset Zip file before upload. It checks a dataset for errors before it is
- uploaded to the HUB. Usage examples are given below.
+ Function for error-checking HUB dataset Zip file before upload. It checks a dataset for errors before it is uploaded
+ to the HUB. Usage examples are given below.
Args:
path (str, optional): Path to data.zip (with data.yaml inside data.zip). Defaults to ''.
diff --git a/ultralytics/hub/auth.py b/ultralytics/hub/auth.py
index 9963d79c..deea9a32 100644
--- a/ultralytics/hub/auth.py
+++ b/ultralytics/hub/auth.py
@@ -9,6 +9,19 @@ API_KEY_URL = f'{HUB_WEB_ROOT}/settings?tab=api+keys'
class Auth:
+ """
+ Manages authentication processes including API key handling, cookie-based authentication, and header generation.
+
+ The class supports different methods of authentication:
+ 1. Directly using an API key.
+ 2. Authenticating using browser cookies (specifically in Google Colab).
+ 3. Prompting the user to enter an API key.
+
+ Attributes:
+ id_token (str or bool): Token used for identity verification, initialized as False.
+ api_key (str or bool): API key for authentication, initialized as False.
+ model_key (bool): Placeholder for model key, initialized as False.
+ """
id_token = api_key = model_key = False
def __init__(self, api_key='', verbose=False):
@@ -54,7 +67,9 @@ class Auth:
def request_api_key(self, max_attempts=3):
"""
- Prompt the user to input their API key. Returns the model ID.
+ Prompt the user to input their API key.
+
+ Returns the model ID.
"""
import getpass
for attempts in range(max_attempts):
@@ -86,8 +101,8 @@ class Auth:
def auth_with_cookies(self) -> bool:
"""
- Attempt to fetch authentication via cookies and set id_token.
- User must be logged in to HUB and running in a supported browser.
+ Attempt to fetch authentication via cookies and set id_token. User must be logged in to HUB and running in a
+ supported browser.
Returns:
bool: True if authentication is successful, False otherwise.
diff --git a/ultralytics/hub/session.py b/ultralytics/hub/session.py
index 57c55f80..9870ca16 100644
--- a/ultralytics/hub/session.py
+++ b/ultralytics/hub/session.py
@@ -84,6 +84,7 @@ class HUBTrainingSession:
def _handle_signal(self, signum, frame):
"""
Handle kill signals and prevent heartbeats from being sent on Colab after termination.
+
This method does not use frame, it is included as it is passed by signal.
"""
if self.alive is True:
diff --git a/ultralytics/hub/utils.py b/ultralytics/hub/utils.py
index 3ca954e6..f2621d7a 100644
--- a/ultralytics/hub/utils.py
+++ b/ultralytics/hub/utils.py
@@ -161,9 +161,7 @@ class Events:
url = 'https://www.google-analytics.com/mp/collect?measurement_id=G-X8NCJYTQXM&api_secret=QLQrATrNSwGRFRLE-cbHJw'
def __init__(self):
- """
- Initializes the Events object with default values for events, rate_limit, and metadata.
- """
+ """Initializes the Events object with default values for events, rate_limit, and metadata."""
self.events = [] # events list
self.rate_limit = 60.0 # rate limit (seconds)
self.t = 0.0 # rate limit timer (seconds)
diff --git a/ultralytics/models/fastsam/model.py b/ultralytics/models/fastsam/model.py
index c1895fc6..e6475faa 100644
--- a/ultralytics/models/fastsam/model.py
+++ b/ultralytics/models/fastsam/model.py
@@ -22,7 +22,7 @@ class FastSAM(Model):
"""
def __init__(self, model='FastSAM-x.pt'):
- """Call the __init__ method of the parent class (YOLO) with the updated default model"""
+ """Call the __init__ method of the parent class (YOLO) with the updated default model."""
if str(model) == 'FastSAM.pt':
model = 'FastSAM-x.pt'
assert Path(model).suffix not in ('.yaml', '.yml'), 'FastSAM models only support pre-trained models.'
@@ -30,4 +30,5 @@ class FastSAM(Model):
@property
def task_map(self):
+ """Returns a dictionary mapping segment task to corresponding predictor and validator classes."""
return {'segment': {'predictor': FastSAMPredictor, 'validator': FastSAMValidator}}
diff --git a/ultralytics/models/fastsam/predict.py b/ultralytics/models/fastsam/predict.py
index 4eac69f9..b64d2d6e 100644
--- a/ultralytics/models/fastsam/predict.py
+++ b/ultralytics/models/fastsam/predict.py
@@ -11,10 +11,12 @@ from ultralytics.utils import DEFAULT_CFG, ops
class FastSAMPredictor(DetectionPredictor):
def __init__(self, cfg=DEFAULT_CFG, overrides=None, _callbacks=None):
+ """Initializes FastSAMPredictor class by inheriting from DetectionPredictor and setting task to 'segment'."""
super().__init__(cfg, overrides, _callbacks)
self.args.task = 'segment'
def postprocess(self, preds, img, orig_imgs):
+ """Postprocesses the predictions, applies non-max suppression, scales the boxes, and returns the results."""
p = ops.non_max_suppression(
preds[0],
self.args.conf,
diff --git a/ultralytics/models/fastsam/prompt.py b/ultralytics/models/fastsam/prompt.py
index 97ab46c3..5eb581e9 100644
--- a/ultralytics/models/fastsam/prompt.py
+++ b/ultralytics/models/fastsam/prompt.py
@@ -15,6 +15,7 @@ from ultralytics.utils import TQDM
class FastSAMPrompt:
def __init__(self, source, results, device='cuda') -> None:
+ """Initializes FastSAMPrompt with given source, results and device, and assigns clip for linear assignment."""
self.device = device
self.results = results
self.source = source
@@ -30,6 +31,7 @@ class FastSAMPrompt:
@staticmethod
def _segment_image(image, bbox):
+ """Segments the given image according to the provided bounding box coordinates."""
image_array = np.array(image)
segmented_image_array = np.zeros_like(image_array)
x1, y1, x2, y2 = bbox
@@ -45,6 +47,9 @@ class FastSAMPrompt:
@staticmethod
def _format_results(result, filter=0):
+ """Formats detection results into list of annotations each containing ID, segmentation, bounding box, score and
+ area.
+ """
annotations = []
n = len(result.masks.data) if result.masks is not None else 0
for i in range(n):
@@ -61,6 +66,9 @@ class FastSAMPrompt:
@staticmethod
def _get_bbox_from_mask(mask):
+ """Applies morphological transformations to the mask, displays it, and if with_contours is True, draws
+ contours.
+ """
mask = mask.astype(np.uint8)
contours, hierarchy = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
x1, y1, w, h = cv2.boundingRect(contours[0])
@@ -195,6 +203,7 @@ class FastSAMPrompt:
@torch.no_grad()
def retrieve(self, model, preprocess, elements, search_text: str, device) -> int:
+ """Processes images and text with a model, calculates similarity, and returns softmax score."""
preprocessed_images = [preprocess(image).to(device) for image in elements]
tokenized_text = self.clip.tokenize([search_text]).to(device)
stacked_images = torch.stack(preprocessed_images)
@@ -206,6 +215,7 @@ class FastSAMPrompt:
return probs[:, 0].softmax(dim=0)
def _crop_image(self, format_results):
+ """Crops an image based on provided annotation format and returns cropped images and related data."""
if os.path.isdir(self.source):
raise ValueError(f"'{self.source}' is a directory, not a valid source for this function.")
image = Image.fromarray(cv2.cvtColor(self.results[0].orig_img, cv2.COLOR_BGR2RGB))
@@ -229,6 +239,7 @@ class FastSAMPrompt:
return cropped_boxes, cropped_images, not_crop, filter_id, annotations
def box_prompt(self, bbox):
+ """Modifies the bounding box properties and calculates IoU between masks and bounding box."""
if self.results[0].masks is not None:
assert (bbox[2] != 0 and bbox[3] != 0)
if os.path.isdir(self.source):
@@ -261,7 +272,8 @@ class FastSAMPrompt:
self.results[0].masks.data = torch.tensor(np.array([masks[max_iou_index].cpu().numpy()]))
return self.results
- def point_prompt(self, points, pointlabel): # numpy 处理
+ def point_prompt(self, points, pointlabel): # numpy
+ """Adjusts points on detected masks based on user input and returns the modified results."""
if self.results[0].masks is not None:
if os.path.isdir(self.source):
raise ValueError(f"'{self.source}' is a directory, not a valid source for this function.")
@@ -284,6 +296,7 @@ class FastSAMPrompt:
return self.results
def text_prompt(self, text):
+ """Processes a text prompt, applies it to existing results and returns the updated results."""
if self.results[0].masks is not None:
format_results = self._format_results(self.results[0], 0)
cropped_boxes, cropped_images, not_crop, filter_id, annotations = self._crop_image(format_results)
@@ -296,4 +309,5 @@ class FastSAMPrompt:
return self.results
def everything_prompt(self):
+ """Returns the processed results from the previous methods in the class."""
return self.results
diff --git a/ultralytics/models/nas/model.py b/ultralytics/models/nas/model.py
index f848cc4b..9a770c4c 100644
--- a/ultralytics/models/nas/model.py
+++ b/ultralytics/models/nas/model.py
@@ -25,12 +25,13 @@ from .val import NASValidator
class NAS(Model):
def __init__(self, model='yolo_nas_s.pt') -> None:
+ """Initializes the NAS model with the provided or default 'yolo_nas_s.pt' model."""
assert Path(model).suffix not in ('.yaml', '.yml'), 'YOLO-NAS models only support pre-trained models.'
super().__init__(model, task='detect')
@smart_inference_mode()
def _load(self, weights: str, task: str):
- # Load or create new NAS model
+ """Loads an existing NAS model weights or creates a new NAS model with pretrained weights if not provided."""
import super_gradients
suffix = Path(weights).suffix
if suffix == '.pt':
@@ -58,4 +59,5 @@ class NAS(Model):
@property
def task_map(self):
+ """Returns a dictionary mapping tasks to respective predictor and validator classes."""
return {'detect': {'predictor': NASPredictor, 'validator': NASValidator}}
diff --git a/ultralytics/models/rtdetr/model.py b/ultralytics/models/rtdetr/model.py
index c20d72f6..fa7d484e 100644
--- a/ultralytics/models/rtdetr/model.py
+++ b/ultralytics/models/rtdetr/model.py
@@ -1,7 +1,5 @@
# Ultralytics YOLO 🚀, AGPL-3.0 license
-"""
-RT-DETR model interface
-"""
+"""RT-DETR model interface."""
from ultralytics.engine.model import Model
from ultralytics.nn.tasks import RTDETRDetectionModel
@@ -11,17 +9,17 @@ from .val import RTDETRValidator
class RTDETR(Model):
- """
- RTDETR model interface.
- """
+ """RTDETR model interface."""
def __init__(self, model='rtdetr-l.pt') -> None:
+ """Initializes the RTDETR model with the given model file, defaulting to 'rtdetr-l.pt'."""
if model and model.split('.')[-1] not in ('pt', 'yaml', 'yml'):
raise NotImplementedError('RT-DETR only supports creating from *.pt file or *.yaml file.')
super().__init__(model=model, task='detect')
@property
def task_map(self):
+ """Returns a dictionary mapping task names to corresponding Ultralytics task classes for RTDETR model."""
return {
'detect': {
'predictor': RTDETRPredictor,
diff --git a/ultralytics/models/rtdetr/predict.py b/ultralytics/models/rtdetr/predict.py
index 33d5d7a2..1a2b0cbc 100644
--- a/ultralytics/models/rtdetr/predict.py
+++ b/ultralytics/models/rtdetr/predict.py
@@ -48,7 +48,8 @@ class RTDETRPredictor(BasePredictor):
return results
def pre_transform(self, im):
- """Pre-transform input image before inference.
+ """
+ Pre-transform input image before inference.
Args:
im (List(np.ndarray)): (N, 3, h, w) for tensor, [(h, w, 3) x N] for list.
diff --git a/ultralytics/models/rtdetr/train.py b/ultralytics/models/rtdetr/train.py
index 1e586683..91d4729e 100644
--- a/ultralytics/models/rtdetr/train.py
+++ b/ultralytics/models/rtdetr/train.py
@@ -37,7 +37,8 @@ class RTDETRTrainer(DetectionTrainer):
return model
def build_dataset(self, img_path, mode='val', batch=None):
- """Build RTDETR Dataset
+ """
+ Build RTDETR Dataset.
Args:
img_path (str): Path to the folder containing images.
diff --git a/ultralytics/models/rtdetr/val.py b/ultralytics/models/rtdetr/val.py
index 9b984bed..d8e5fb69 100644
--- a/ultralytics/models/rtdetr/val.py
+++ b/ultralytics/models/rtdetr/val.py
@@ -16,6 +16,7 @@ __all__ = 'RTDETRValidator', # tuple or list
class RTDETRDataset(YOLODataset):
def __init__(self, *args, data=None, **kwargs):
+ """Initialize the RTDETRDataset class by inheriting from the YOLODataset class."""
super().__init__(*args, data=data, use_segments=False, use_keypoints=False, **kwargs)
# NOTE: add stretch version load_image for rtdetr mosaic
diff --git a/ultralytics/models/sam/amg.py b/ultralytics/models/sam/amg.py
index f251fe4e..d7751d6f 100644
--- a/ultralytics/models/sam/amg.py
+++ b/ultralytics/models/sam/amg.py
@@ -32,9 +32,10 @@ def batch_iterator(batch_size: int, *args) -> Generator[List[Any], None, None]:
def calculate_stability_score(masks: torch.Tensor, mask_threshold: float, threshold_offset: float) -> torch.Tensor:
"""
- Computes the stability score for a batch of masks. The stability
- score is the IoU between the binary masks obtained by thresholding
- the predicted mask logits at high and low values.
+ Computes the stability score for a batch of masks.
+
+ The stability score is the IoU between the binary masks obtained by thresholding the predicted mask logits at high
+ and low values.
"""
# One mask is always contained inside the other.
# Save memory by preventing unnecessary cast to torch.int64
@@ -60,7 +61,11 @@ def build_all_layer_point_grids(n_per_side: int, n_layers: int, scale_per_layer:
def generate_crop_boxes(im_size: Tuple[int, ...], n_layers: int,
overlap_ratio: float) -> Tuple[List[List[int]], List[int]]:
- """Generates a list of crop boxes of different sizes. Each layer has (2**i)**2 boxes for the ith layer."""
+ """
+ Generates a list of crop boxes of different sizes.
+
+ Each layer has (2**i)**2 boxes for the ith layer.
+ """
crop_boxes, layer_idxs = [], []
im_h, im_w = im_size
short_side = min(im_h, im_w)
@@ -145,8 +150,9 @@ def remove_small_regions(mask: np.ndarray, area_thresh: float, mode: str) -> Tup
def batched_mask_to_box(masks: torch.Tensor) -> torch.Tensor:
"""
- Calculates boxes in XYXY format around masks. Return [0,0,0,0] for
- an empty mask. For input shape C1xC2x...xHxW, the output shape is C1xC2x...x4.
+ Calculates boxes in XYXY format around masks.
+
+ Return [0,0,0,0] for an empty mask. For input shape C1xC2x...xHxW, the output shape is C1xC2x...x4.
"""
# torch.max below raises an error on empty inputs, just skip in this case
if torch.numel(masks) == 0:
diff --git a/ultralytics/models/sam/model.py b/ultralytics/models/sam/model.py
index 2ca35011..8a140b3f 100644
--- a/ultralytics/models/sam/model.py
+++ b/ultralytics/models/sam/model.py
@@ -1,7 +1,5 @@
# Ultralytics YOLO 🚀, AGPL-3.0 license
-"""
-SAM model interface
-"""
+"""SAM model interface."""
from pathlib import Path
@@ -13,16 +11,16 @@ from .predict import Predictor
class SAM(Model):
- """
- SAM model interface.
- """
+ """SAM model interface."""
def __init__(self, model='sam_b.pt') -> None:
+ """Initializes the SAM model instance with the specified pre-trained model file."""
if model and Path(model).suffix not in ('.pt', '.pth'):
raise NotImplementedError('SAM prediction requires pre-trained *.pt or *.pth model.')
super().__init__(model=model, task='segment')
def _load(self, weights: str, task=None):
+ """Loads the provided weights into the SAM model."""
self.model = build_sam(weights)
def predict(self, source, stream=False, bboxes=None, points=None, labels=None, **kwargs):
@@ -48,4 +46,5 @@ class SAM(Model):
@property
def task_map(self):
+ """Returns a dictionary mapping the 'segment' task to its corresponding 'Predictor'."""
return {'segment': {'predictor': Predictor}}
diff --git a/ultralytics/models/sam/modules/decoders.py b/ultralytics/models/sam/modules/decoders.py
index 0c64a7e4..a9a3a319 100644
--- a/ultralytics/models/sam/modules/decoders.py
+++ b/ultralytics/models/sam/modules/decoders.py
@@ -98,7 +98,11 @@ class MaskDecoder(nn.Module):
sparse_prompt_embeddings: torch.Tensor,
dense_prompt_embeddings: torch.Tensor,
) -> Tuple[torch.Tensor, torch.Tensor]:
- """Predicts masks. See 'forward' for more details."""
+ """
+ Predicts masks.
+
+ See 'forward' for more details.
+ """
# Concatenate output tokens
output_tokens = torch.cat([self.iou_token.weight, self.mask_tokens.weight], dim=0)
output_tokens = output_tokens.unsqueeze(0).expand(sparse_prompt_embeddings.size(0), -1, -1)
diff --git a/ultralytics/models/sam/modules/encoders.py b/ultralytics/models/sam/modules/encoders.py
index eb9352f9..b4c07744 100644
--- a/ultralytics/models/sam/modules/encoders.py
+++ b/ultralytics/models/sam/modules/encoders.py
@@ -100,6 +100,9 @@ class ImageEncoderViT(nn.Module):
)
def forward(self, x: torch.Tensor) -> torch.Tensor:
+ """Processes input through patch embedding, applies positional embedding if present, and passes through blocks
+ and neck.
+ """
x = self.patch_embed(x)
if self.pos_embed is not None:
x = x + self.pos_embed
@@ -157,8 +160,8 @@ class PromptEncoder(nn.Module):
def get_dense_pe(self) -> torch.Tensor:
"""
- Returns the positional encoding used to encode point prompts,
- applied to a dense set of points the shape of the image encoding.
+ Returns the positional encoding used to encode point prompts, applied to a dense set of points the shape of the
+ image encoding.
Returns:
torch.Tensor: Positional encoding with shape 1x(embed_dim)x(embedding_h)x(embedding_w)
@@ -204,9 +207,7 @@ class PromptEncoder(nn.Module):
boxes: Optional[torch.Tensor],
masks: Optional[torch.Tensor],
) -> int:
- """
- Gets the batch size of the output given the batch size of the input prompts.
- """
+ """Gets the batch size of the output given the batch size of the input prompts."""
if points is not None:
return points[0].shape[0]
elif boxes is not None:
@@ -217,6 +218,7 @@ class PromptEncoder(nn.Module):
return 1
def _get_device(self) -> torch.device:
+ """Returns the device of the first point embedding's weight tensor."""
return self.point_embeddings[0].weight.device
def forward(
@@ -259,11 +261,10 @@ class PromptEncoder(nn.Module):
class PositionEmbeddingRandom(nn.Module):
- """
- Positional encoding using random spatial frequencies.
- """
+ """Positional encoding using random spatial frequencies."""
def __init__(self, num_pos_feats: int = 64, scale: Optional[float] = None) -> None:
+ """Initializes a position embedding using random spatial frequencies."""
super().__init__()
if scale is None or scale <= 0.0:
scale = 1.0
@@ -304,7 +305,7 @@ class PositionEmbeddingRandom(nn.Module):
class Block(nn.Module):
- """Transformer blocks with support of window attention and residual propagation blocks"""
+ """Transformer blocks with support of window attention and residual propagation blocks."""
def __init__(
self,
@@ -351,6 +352,7 @@ class Block(nn.Module):
self.window_size = window_size
def forward(self, x: torch.Tensor) -> torch.Tensor:
+ """Executes a forward pass through the transformer block with window attention and non-overlapping windows."""
shortcut = x
x = self.norm1(x)
# Window partition
@@ -404,6 +406,7 @@ class Attention(nn.Module):
self.rel_pos_w = nn.Parameter(torch.zeros(2 * input_size[1] - 1, head_dim))
def forward(self, x: torch.Tensor) -> torch.Tensor:
+ """Applies the forward operation including attention, normalization, MLP, and indexing within window limits."""
B, H, W, _ = x.shape
# qkv with shape (3, B, nHead, H * W, C)
qkv = self.qkv(x).reshape(B, H * W, 3, self.num_heads, -1).permute(2, 0, 3, 1, 4)
@@ -448,6 +451,7 @@ def window_unpartition(windows: torch.Tensor, window_size: int, pad_hw: Tuple[in
hw: Tuple[int, int]) -> torch.Tensor:
"""
Window unpartition into original sequences and removing padding.
+
Args:
windows (tensor): input tokens with [B * num_windows, window_size, window_size, C].
window_size (int): window size.
@@ -540,9 +544,7 @@ def add_decomposed_rel_pos(
class PatchEmbed(nn.Module):
- """
- Image to Patch Embedding.
- """
+ """Image to Patch Embedding."""
def __init__(
self,
@@ -565,4 +567,5 @@ class PatchEmbed(nn.Module):
self.proj = nn.Conv2d(in_chans, embed_dim, kernel_size=kernel_size, stride=stride, padding=padding)
def forward(self, x: torch.Tensor) -> torch.Tensor:
+ """Computes patch embedding by applying convolution and transposing resulting tensor."""
return self.proj(x).permute(0, 2, 3, 1) # B C H W -> B H W C
diff --git a/ultralytics/models/sam/modules/tiny_encoder.py b/ultralytics/models/sam/modules/tiny_encoder.py
index ca8de50b..d96b3032 100644
--- a/ultralytics/models/sam/modules/tiny_encoder.py
+++ b/ultralytics/models/sam/modules/tiny_encoder.py
@@ -23,6 +23,9 @@ from ultralytics.utils.instance import to_2tuple
class Conv2d_BN(torch.nn.Sequential):
def __init__(self, a, b, ks=1, stride=1, pad=0, dilation=1, groups=1, bn_weight_init=1):
+ """Initializes the MBConv model with given input channels, output channels, expansion ratio, activation, and
+ drop path.
+ """
super().__init__()
self.add_module('c', torch.nn.Conv2d(a, b, ks, stride, pad, dilation, groups, bias=False))
bn = torch.nn.BatchNorm2d(b)
@@ -34,6 +37,9 @@ class Conv2d_BN(torch.nn.Sequential):
class PatchEmbed(nn.Module):
def __init__(self, in_chans, embed_dim, resolution, activation):
+ """Initialize the PatchMerging class with specified input, output dimensions, resolution and activation
+ function.
+ """
super().__init__()
img_size: Tuple[int, int] = to_2tuple(resolution)
self.patches_resolution = (img_size[0] // 4, img_size[1] // 4)
@@ -48,12 +54,16 @@ class PatchEmbed(nn.Module):
)
def forward(self, x):
+ """Runs input tensor 'x' through the PatchMerging model's sequence of operations."""
return self.seq(x)
class MBConv(nn.Module):
def __init__(self, in_chans, out_chans, expand_ratio, activation, drop_path):
+ """Initializes a convolutional layer with specified dimensions, input resolution, depth, and activation
+ function.
+ """
super().__init__()
self.in_chans = in_chans
self.hidden_chans = int(in_chans * expand_ratio)
@@ -73,6 +83,7 @@ class MBConv(nn.Module):
self.drop_path = nn.Identity()
def forward(self, x):
+ """Implements the forward pass for the model architecture."""
shortcut = x
x = self.conv1(x)
x = self.act1(x)
@@ -87,6 +98,9 @@ class MBConv(nn.Module):
class PatchMerging(nn.Module):
def __init__(self, input_resolution, dim, out_dim, activation):
+ """Initializes the ConvLayer with specific dimension, input resolution, depth, activation, drop path, and other
+ optional parameters.
+ """
super().__init__()
self.input_resolution = input_resolution
@@ -99,6 +113,7 @@ class PatchMerging(nn.Module):
self.conv3 = Conv2d_BN(out_dim, out_dim, 1, 1, 0)
def forward(self, x):
+ """Applies forward pass on the input utilizing convolution and activation layers, and returns the result."""
if x.ndim == 3:
H, W = self.input_resolution
B = len(x)
@@ -149,6 +164,7 @@ class ConvLayer(nn.Module):
input_resolution, dim=dim, out_dim=out_dim, activation=activation)
def forward(self, x):
+ """Processes the input through a series of convolutional layers and returns the activated output."""
for blk in self.blocks:
x = checkpoint.checkpoint(blk, x) if self.use_checkpoint else blk(x)
return x if self.downsample is None else self.downsample(x)
@@ -157,6 +173,7 @@ class ConvLayer(nn.Module):
class Mlp(nn.Module):
def __init__(self, in_features, hidden_features=None, out_features=None, act_layer=nn.GELU, drop=0.):
+ """Initializes Attention module with the given parameters including dimension, key_dim, number of heads, etc."""
super().__init__()
out_features = out_features or in_features
hidden_features = hidden_features or in_features
@@ -167,6 +184,7 @@ class Mlp(nn.Module):
self.drop = nn.Dropout(drop)
def forward(self, x):
+ """Applies operations on input x and returns modified x, runs downsample if not None."""
x = self.norm(x)
x = self.fc1(x)
x = self.act(x)
@@ -216,6 +234,7 @@ class Attention(torch.nn.Module):
@torch.no_grad()
def train(self, mode=True):
+ """Sets the module in training mode and handles attribute 'ab' based on the mode."""
super().train(mode)
if mode and hasattr(self, 'ab'):
del self.ab
@@ -298,6 +317,9 @@ class TinyViTBlock(nn.Module):
self.local_conv = Conv2d_BN(dim, dim, ks=local_conv_size, stride=1, pad=pad, groups=dim)
def forward(self, x):
+ """Applies attention-based transformation or padding to input 'x' before passing it through a local
+ convolution.
+ """
H, W = self.input_resolution
B, L, C = x.shape
assert L == H * W, 'input feature has wrong size'
@@ -337,6 +359,9 @@ class TinyViTBlock(nn.Module):
return x + self.drop_path(self.mlp(x))
def extra_repr(self) -> str:
+ """Returns a formatted string representing the TinyViTBlock's parameters: dimension, input resolution, number of
+ attentions heads, window size, and MLP ratio.
+ """
return f'dim={self.dim}, input_resolution={self.input_resolution}, num_heads={self.num_heads}, ' \
f'window_size={self.window_size}, mlp_ratio={self.mlp_ratio}'
@@ -402,23 +427,28 @@ class BasicLayer(nn.Module):
input_resolution, dim=dim, out_dim=out_dim, activation=activation)
def forward(self, x):
+ """Performs forward propagation on the input tensor and returns a normalized tensor."""
for blk in self.blocks:
x = checkpoint.checkpoint(blk, x) if self.use_checkpoint else blk(x)
return x if self.downsample is None else self.downsample(x)
def extra_repr(self) -> str:
+ """Returns a string representation of the extra_repr function with the layer's parameters."""
return f'dim={self.dim}, input_resolution={self.input_resolution}, depth={self.depth}'
class LayerNorm2d(nn.Module):
+ """A PyTorch implementation of Layer Normalization in 2D."""
def __init__(self, num_channels: int, eps: float = 1e-6) -> None:
+ """Initialize LayerNorm2d with the number of channels and an optional epsilon."""
super().__init__()
self.weight = nn.Parameter(torch.ones(num_channels))
self.bias = nn.Parameter(torch.zeros(num_channels))
self.eps = eps
def forward(self, x: torch.Tensor) -> torch.Tensor:
+ """Perform a forward pass, normalizing the input tensor."""
u = x.mean(1, keepdim=True)
s = (x - u).pow(2).mean(1, keepdim=True)
x = (x - u) / torch.sqrt(s + self.eps)
@@ -518,6 +548,7 @@ class TinyViT(nn.Module):
)
def set_layer_lr_decay(self, layer_lr_decay):
+ """Sets the learning rate decay for each layer in the TinyViT model."""
decay_rate = layer_lr_decay
# layers -> blocks (depth)
@@ -525,6 +556,7 @@ class TinyViT(nn.Module):
lr_scales = [decay_rate ** (depth - i - 1) for i in range(depth)]
def _set_lr_scale(m, scale):
+ """Sets the learning rate scale for each layer in the model based on the layer's depth."""
for p in m.parameters():
p.lr_scale = scale
@@ -544,12 +576,14 @@ class TinyViT(nn.Module):
p.param_name = k
def _check_lr_scale(m):
+ """Checks if the learning rate scale attribute is present in module's parameters."""
for p in m.parameters():
assert hasattr(p, 'lr_scale'), p.param_name
self.apply(_check_lr_scale)
def _init_weights(self, m):
+ """Initializes weights for linear layers and layer normalization in the given module."""
if isinstance(m, nn.Linear):
# NOTE: This initialization is needed only for training.
# trunc_normal_(m.weight, std=.02)
@@ -561,11 +595,12 @@ class TinyViT(nn.Module):
@torch.jit.ignore
def no_weight_decay_keywords(self):
+ """Returns a dictionary of parameter names where weight decay should not be applied."""
return {'attention_biases'}
def forward_features(self, x):
- # x: (N, C, H, W)
- x = self.patch_embed(x)
+ """Runs the input through the model layers and returns the transformed output."""
+ x = self.patch_embed(x) # x input is (N, C, H, W)
x = self.layers[0](x)
start_i = 1
@@ -579,4 +614,5 @@ class TinyViT(nn.Module):
return self.neck(x)
def forward(self, x):
+ """Executes a forward pass on the input tensor through the constructed model layers."""
return self.forward_features(x)
diff --git a/ultralytics/models/sam/modules/transformer.py b/ultralytics/models/sam/modules/transformer.py
index f925538b..95a04666 100644
--- a/ultralytics/models/sam/modules/transformer.py
+++ b/ultralytics/models/sam/modules/transformer.py
@@ -21,8 +21,7 @@ class TwoWayTransformer(nn.Module):
attention_downsample_rate: int = 2,
) -> None:
"""
- A transformer decoder that attends to an input image using
- queries whose positional embedding is supplied.
+ A transformer decoder that attends to an input image using queries whose positional embedding is supplied.
Args:
depth (int): number of layers in the transformer
@@ -171,8 +170,7 @@ class TwoWayAttentionBlock(nn.Module):
class Attention(nn.Module):
- """
- An attention layer that allows for downscaling the size of the embedding after projection to queries, keys, and
+ """An attention layer that allows for downscaling the size of the embedding after projection to queries, keys, and
values.
"""
diff --git a/ultralytics/models/sam/predict.py b/ultralytics/models/sam/predict.py
index e8a8197b..31e0da93 100644
--- a/ultralytics/models/sam/predict.py
+++ b/ultralytics/models/sam/predict.py
@@ -19,6 +19,7 @@ from .build import build_sam
class Predictor(BasePredictor):
def __init__(self, cfg=DEFAULT_CFG, overrides=None, _callbacks=None):
+ """Initializes the Predictor class with default or provided configuration, overrides, and callbacks."""
if overrides is None:
overrides = {}
overrides.update(dict(task='segment', mode='predict', imgsz=1024))
@@ -34,7 +35,8 @@ class Predictor(BasePredictor):
self.segment_all = False
def preprocess(self, im):
- """Prepares input image before inference.
+ """
+ Prepares input image before inference.
Args:
im (torch.Tensor | List(np.ndarray)): BCHW for tensor, [(HWC) x B] for list.
@@ -189,7 +191,8 @@ class Predictor(BasePredictor):
stability_score_thresh=0.95,
stability_score_offset=0.95,
crop_nms_thresh=0.7):
- """Segment the whole image.
+ """
+ Segment the whole image.
Args:
im (torch.Tensor): The preprocessed image, (N, C, H, W).
@@ -360,14 +363,15 @@ class Predictor(BasePredictor):
self.prompts = prompts
def reset_image(self):
+ """Resets the image and its features to None."""
self.im = None
self.features = None
@staticmethod
def remove_small_regions(masks, min_area=0, nms_thresh=0.7):
"""
- Removes small disconnected regions and holes in masks, then reruns
- box NMS to remove any new duplicates. Requires open-cv as a dependency.
+ Removes small disconnected regions and holes in masks, then reruns box NMS to remove any new duplicates.
+ Requires open-cv as a dependency.
Args:
masks (torch.Tensor): Masks, (N, H, W).
diff --git a/ultralytics/models/utils/loss.py b/ultralytics/models/utils/loss.py
index 95406e1f..77eadce7 100644
--- a/ultralytics/models/utils/loss.py
+++ b/ultralytics/models/utils/loss.py
@@ -47,6 +47,7 @@ class DETRLoss(nn.Module):
self.device = None
def _get_loss_class(self, pred_scores, targets, gt_scores, num_gts, postfix=''):
+ """Computes the classification loss based on predictions, target values, and ground truth scores."""
# logits: [b, query, num_classes], gt_class: list[[n, 1]]
name_class = f'loss_class{postfix}'
bs, nq = pred_scores.shape[:2]
@@ -68,6 +69,9 @@ class DETRLoss(nn.Module):
return {name_class: loss_cls.squeeze() * self.loss_gain['class']}
def _get_loss_bbox(self, pred_bboxes, gt_bboxes, postfix=''):
+ """Calculates and returns the bounding box loss and GIoU loss for the predicted and ground truth bounding
+ boxes.
+ """
# boxes: [b, query, 4], gt_bbox: list[[n, 4]]
name_bbox = f'loss_bbox{postfix}'
name_giou = f'loss_giou{postfix}'
@@ -125,7 +129,7 @@ class DETRLoss(nn.Module):
postfix='',
masks=None,
gt_mask=None):
- """Get auxiliary losses"""
+ """Get auxiliary losses."""
# NOTE: loss class, bbox, giou, mask, dice
loss = torch.zeros(5 if masks is not None else 3, device=pred_bboxes.device)
if match_indices is None and self.use_uni_match:
@@ -166,12 +170,14 @@ class DETRLoss(nn.Module):
@staticmethod
def _get_index(match_indices):
+ """Returns batch indices, source indices, and destination indices from provided match indices."""
batch_idx = torch.cat([torch.full_like(src, i) for i, (src, _) in enumerate(match_indices)])
src_idx = torch.cat([src for (src, _) in match_indices])
dst_idx = torch.cat([dst for (_, dst) in match_indices])
return (batch_idx, src_idx), dst_idx
def _get_assigned_bboxes(self, pred_bboxes, gt_bboxes, match_indices):
+ """Assigns predicted bounding boxes to ground truth bounding boxes based on the match indices."""
pred_assigned = torch.cat([
t[I] if len(I) > 0 else torch.zeros(0, t.shape[-1], device=self.device)
for t, (I, _) in zip(pred_bboxes, match_indices)])
@@ -190,7 +196,7 @@ class DETRLoss(nn.Module):
gt_mask=None,
postfix='',
match_indices=None):
- """Get losses"""
+ """Get losses."""
if match_indices is None:
match_indices = self.matcher(pred_bboxes,
pred_scores,
@@ -250,22 +256,43 @@ class DETRLoss(nn.Module):
class RTDETRDetectionLoss(DETRLoss):
+ """
+ Real-Time DeepTracker (RT-DETR) Detection Loss class that extends the DETRLoss.
+
+ This class computes the detection loss for the RT-DETR model, which includes the standard detection loss as well as
+ an additional denoising training loss when provided with denoising metadata.
+ """
def forward(self, preds, batch, dn_bboxes=None, dn_scores=None, dn_meta=None):
+ """
+ Forward pass to compute the detection loss.
+
+ Args:
+ preds (tuple): Predicted bounding boxes and scores.
+ batch (dict): Batch data containing ground truth information.
+ dn_bboxes (torch.Tensor, optional): Denoising bounding boxes. Default is None.
+ dn_scores (torch.Tensor, optional): Denoising scores. Default is None.
+ dn_meta (dict, optional): Metadata for denoising. Default is None.
+
+ Returns:
+ (dict): Dictionary containing the total loss and, if applicable, the denoising loss.
+ """
pred_bboxes, pred_scores = preds
total_loss = super().forward(pred_bboxes, pred_scores, batch)
+ # Check for denoising metadata to compute denoising training loss
if dn_meta is not None:
dn_pos_idx, dn_num_group = dn_meta['dn_pos_idx'], dn_meta['dn_num_group']
assert len(batch['gt_groups']) == len(dn_pos_idx)
- # Denoising match indices
+ # Get the match indices for denoising
match_indices = self.get_dn_match_indices(dn_pos_idx, dn_num_group, batch['gt_groups'])
- # Compute denoising training loss
+ # Compute the denoising training loss
dn_loss = super().forward(dn_bboxes, dn_scores, batch, postfix='_dn', match_indices=match_indices)
total_loss.update(dn_loss)
else:
+ # If no denoising metadata is provided, set denoising loss to zero
total_loss.update({f'{k}_dn': torch.tensor(0., device=self.device) for k in total_loss.keys()})
return total_loss
@@ -276,12 +303,12 @@ class RTDETRDetectionLoss(DETRLoss):
Get the match indices for denoising.
Args:
- dn_pos_idx (List[torch.Tensor]): A list includes positive indices of denoising.
- dn_num_group (int): The number of groups of denoising.
- gt_groups (List(int)): a list of batch size length includes the number of gts of each image.
+ dn_pos_idx (List[torch.Tensor]): List of tensors containing positive indices for denoising.
+ dn_num_group (int): Number of denoising groups.
+ gt_groups (List[int]): List of integers representing the number of ground truths for each image.
Returns:
- dn_match_indices (List(tuple)): Matched indices.
+ (List[tuple]): List of tuples containing matched indices for denoising.
"""
dn_match_indices = []
idx_groups = torch.as_tensor([0, *gt_groups[:-1]]).cumsum_(0)
diff --git a/ultralytics/models/utils/ops.py b/ultralytics/models/utils/ops.py
index abce97a6..99357d19 100644
--- a/ultralytics/models/utils/ops.py
+++ b/ultralytics/models/utils/ops.py
@@ -11,8 +11,8 @@ from ultralytics.utils.ops import xywh2xyxy, xyxy2xywh
class HungarianMatcher(nn.Module):
"""
- A module implementing the HungarianMatcher, which is a differentiable module to solve the assignment problem in
- an end-to-end fashion.
+ A module implementing the HungarianMatcher, which is a differentiable module to solve the assignment problem in an
+ end-to-end fashion.
HungarianMatcher performs optimal assignment over the predicted and ground truth bounding boxes using a cost
function that considers classification scores, bounding box coordinates, and optionally, mask predictions.
@@ -32,6 +32,9 @@ class HungarianMatcher(nn.Module):
"""
def __init__(self, cost_gain=None, use_fl=True, with_mask=False, num_sample_points=12544, alpha=0.25, gamma=2.0):
+ """Initializes HungarianMatcher with cost coefficients, Focal Loss, mask prediction, sample points, and alpha
+ gamma factors.
+ """
super().__init__()
if cost_gain is None:
cost_gain = {'class': 1, 'bbox': 5, 'giou': 2, 'mask': 1, 'dice': 1}
@@ -45,8 +48,8 @@ class HungarianMatcher(nn.Module):
def forward(self, pred_bboxes, pred_scores, gt_bboxes, gt_cls, gt_groups, masks=None, gt_mask=None):
"""
Forward pass for HungarianMatcher. This function computes costs based on prediction and ground truth
- (classification cost, L1 cost between boxes and GIoU cost between boxes) and finds the optimal matching
- between predictions and ground truth based on these costs.
+ (classification cost, L1 cost between boxes and GIoU cost between boxes) and finds the optimal matching between
+ predictions and ground truth based on these costs.
Args:
pred_bboxes (Tensor): Predicted bounding boxes with shape [batch_size, num_queries, 4].
@@ -153,9 +156,9 @@ def get_cdn_group(batch,
box_noise_scale=1.0,
training=False):
"""
- Get contrastive denoising training group. This function creates a contrastive denoising training group with
- positive and negative samples from the ground truths (gt). It applies noise to the class labels and bounding
- box coordinates, and returns the modified labels, bounding boxes, attention mask and meta information.
+ Get contrastive denoising training group. This function creates a contrastive denoising training group with positive
+ and negative samples from the ground truths (gt). It applies noise to the class labels and bounding box coordinates,
+ and returns the modified labels, bounding boxes, attention mask and meta information.
Args:
batch (dict): A dict that includes 'gt_cls' (torch.Tensor with shape [num_gts, ]), 'gt_bboxes'
@@ -191,12 +194,12 @@ def get_cdn_group(batch,
gt_bbox = batch['bboxes'] # bs*num, 4
b_idx = batch['batch_idx']
- # each group has positive and negative queries.
+ # Each group has positive and negative queries.
dn_cls = gt_cls.repeat(2 * num_group) # (2*num_group*bs*num, )
dn_bbox = gt_bbox.repeat(2 * num_group, 1) # 2*num_group*bs*num, 4
dn_b_idx = b_idx.repeat(2 * num_group).view(-1) # (2*num_group*bs*num, )
- # positive and negative mask
+ # Positive and negative mask
# (bs*num*num_group, ), the second total_num*num_group part as negative samples
neg_idx = torch.arange(total_num * num_group, dtype=torch.long, device=gt_bbox.device) + num_group * total_num
@@ -220,10 +223,9 @@ def get_cdn_group(batch,
known_bbox += rand_part * diff
known_bbox.clip_(min=0.0, max=1.0)
dn_bbox = xyxy2xywh(known_bbox)
- dn_bbox = inverse_sigmoid(dn_bbox)
+ dn_bbox = torch.logit(dn_bbox, eps=1e-6) # inverse sigmoid
- # total denoising queries
- num_dn = int(max_nums * 2 * num_group)
+ num_dn = int(max_nums * 2 * num_group) # total denoising queries
# class_embed = torch.cat([class_embed, torch.zeros([1, class_embed.shape[-1]], device=class_embed.device)])
dn_cls_embed = class_embed[dn_cls] # bs*num * 2 * num_group, 256
padding_cls = torch.zeros(bs, num_dn, dn_cls_embed.shape[-1], device=gt_cls.device)
@@ -256,9 +258,3 @@ def get_cdn_group(batch,
return padding_cls.to(class_embed.device), padding_bbox.to(class_embed.device), attn_mask.to(
class_embed.device), dn_meta
-
-
-def inverse_sigmoid(x, eps=1e-6):
- """Inverse sigmoid function."""
- x = x.clip(min=0., max=1.)
- return torch.log(x / (1 - x + eps) + eps)
diff --git a/ultralytics/models/yolo/classify/predict.py b/ultralytics/models/yolo/classify/predict.py
index a22616e5..ca463b67 100644
--- a/ultralytics/models/yolo/classify/predict.py
+++ b/ultralytics/models/yolo/classify/predict.py
@@ -26,6 +26,7 @@ class ClassificationPredictor(BasePredictor):
"""
def __init__(self, cfg=DEFAULT_CFG, overrides=None, _callbacks=None):
+ """Initializes ClassificationPredictor setting the task to 'classify'."""
super().__init__(cfg, overrides, _callbacks)
self.args.task = 'classify'
diff --git a/ultralytics/models/yolo/classify/train.py b/ultralytics/models/yolo/classify/train.py
index 0829f05b..c59f2853 100644
--- a/ultralytics/models/yolo/classify/train.py
+++ b/ultralytics/models/yolo/classify/train.py
@@ -79,6 +79,7 @@ class ClassificationTrainer(BaseTrainer):
return ckpt
def build_dataset(self, img_path, mode='train', batch=None):
+ """Creates a ClassificationDataset instance given an image path, and mode (train/test etc.)."""
return ClassificationDataset(root=img_path, args=self.args, augment=mode == 'train', prefix=mode)
def get_dataloader(self, dataset_path, batch_size=16, rank=0, mode='train'):
@@ -113,8 +114,9 @@ class ClassificationTrainer(BaseTrainer):
def label_loss_items(self, loss_items=None, prefix='train'):
"""
- Returns a loss dict with labelled training loss items tensor. Not needed for classification but necessary for
- segmentation & detection
+ Returns a loss dict with labelled training loss items tensor.
+
+ Not needed for classification but necessary for segmentation & detection
"""
keys = [f'{prefix}/{x}' for x in self.loss_names]
if loss_items is None:
diff --git a/ultralytics/models/yolo/classify/val.py b/ultralytics/models/yolo/classify/val.py
index 0748e27f..3ebf3808 100644
--- a/ultralytics/models/yolo/classify/val.py
+++ b/ultralytics/models/yolo/classify/val.py
@@ -78,6 +78,7 @@ class ClassificationValidator(BaseValidator):
return self.metrics.results_dict
def build_dataset(self, img_path):
+ """Creates and returns a ClassificationDataset instance using given image path and preprocessing parameters."""
return ClassificationDataset(root=img_path, args=self.args, augment=False, prefix=self.args.split)
def get_dataloader(self, dataset_path, batch_size):
diff --git a/ultralytics/models/yolo/detect/train.py b/ultralytics/models/yolo/detect/train.py
index 56d9243c..d0028c6e 100644
--- a/ultralytics/models/yolo/detect/train.py
+++ b/ultralytics/models/yolo/detect/train.py
@@ -57,7 +57,7 @@ class DetectionTrainer(BaseTrainer):
return batch
def set_model_attributes(self):
- """nl = de_parallel(self.model).model[-1].nl # number of detection layers (to scale hyps)."""
+ """Nl = de_parallel(self.model).model[-1].nl # number of detection layers (to scale hyps)."""
# self.args.box *= 3 / nl # scale to layers
# self.args.cls *= self.data["nc"] / 80 * 3 / nl # scale to classes and layers
# self.args.cls *= (self.args.imgsz / 640) ** 2 * 3 / nl # scale to image size and layers
@@ -80,8 +80,9 @@ class DetectionTrainer(BaseTrainer):
def label_loss_items(self, loss_items=None, prefix='train'):
"""
- Returns a loss dict with labelled training loss items tensor. Not needed for classification but necessary for
- segmentation & detection
+ Returns a loss dict with labelled training loss items tensor.
+
+ Not needed for classification but necessary for segmentation & detection
"""
keys = [f'{prefix}/{x}' for x in self.loss_names]
if loss_items is not None:
diff --git a/ultralytics/models/yolo/model.py b/ultralytics/models/yolo/model.py
index b85d46bd..ef1b41ab 100644
--- a/ultralytics/models/yolo/model.py
+++ b/ultralytics/models/yolo/model.py
@@ -6,13 +6,11 @@ from ultralytics.nn.tasks import ClassificationModel, DetectionModel, PoseModel,
class YOLO(Model):
- """
- YOLO (You Only Look Once) object detection model.
- """
+ """YOLO (You Only Look Once) object detection model."""
@property
def task_map(self):
- """Map head to model, trainer, validator, and predictor classes"""
+ """Map head to model, trainer, validator, and predictor classes."""
return {
'classify': {
'model': ClassificationModel,
diff --git a/ultralytics/models/yolo/pose/predict.py b/ultralytics/models/yolo/pose/predict.py
index 14ae40b1..d00cea02 100644
--- a/ultralytics/models/yolo/pose/predict.py
+++ b/ultralytics/models/yolo/pose/predict.py
@@ -21,6 +21,7 @@ class PosePredictor(DetectionPredictor):
"""
def __init__(self, cfg=DEFAULT_CFG, overrides=None, _callbacks=None):
+ """Initializes PosePredictor, sets task to 'pose' and logs a warning for using 'mps' as device."""
super().__init__(cfg, overrides, _callbacks)
self.args.task = 'pose'
if isinstance(self.args.device, str) and self.args.device.lower() == 'mps':
diff --git a/ultralytics/models/yolo/segment/predict.py b/ultralytics/models/yolo/segment/predict.py
index 7d51f7d4..ba44a482 100644
--- a/ultralytics/models/yolo/segment/predict.py
+++ b/ultralytics/models/yolo/segment/predict.py
@@ -21,10 +21,12 @@ class SegmentationPredictor(DetectionPredictor):
"""
def __init__(self, cfg=DEFAULT_CFG, overrides=None, _callbacks=None):
+ """Initializes the SegmentationPredictor with the provided configuration, overrides, and callbacks."""
super().__init__(cfg, overrides, _callbacks)
self.args.task = 'segment'
def postprocess(self, preds, img, orig_imgs):
+ """Applies non-max suppression and processes detections for each image in an input batch."""
p = ops.non_max_suppression(preds[0],
self.args.conf,
self.args.iou,
diff --git a/ultralytics/models/yolo/segment/val.py b/ultralytics/models/yolo/segment/val.py
index 0a2acb41..599b0d53 100644
--- a/ultralytics/models/yolo/segment/val.py
+++ b/ultralytics/models/yolo/segment/val.py
@@ -144,7 +144,7 @@ class SegmentationValidator(DetectionValidator):
def _process_batch(self, detections, labels, pred_masks=None, gt_masks=None, overlap=False, masks=False):
"""
- Return correct prediction matrix
+ Return correct prediction matrix.
Args:
detections (array[N, 6]), x1, y1, x2, y2, conf, class
diff --git a/ultralytics/nn/autobackend.py b/ultralytics/nn/autobackend.py
index 61ca6db6..ab5cb2fb 100644
--- a/ultralytics/nn/autobackend.py
+++ b/ultralytics/nn/autobackend.py
@@ -20,7 +20,11 @@ from ultralytics.utils.downloads import attempt_download_asset, is_url
def check_class_names(names):
- """Check class names. Map imagenet class codes to human-readable names if required. Convert lists to dicts."""
+ """
+ Check class names.
+
+ Map imagenet class codes to human-readable names if required. Convert lists to dicts.
+ """
if isinstance(names, list): # names is a list
names = dict(enumerate(names)) # convert to dict
if isinstance(names, dict):
@@ -37,6 +41,32 @@ def check_class_names(names):
class AutoBackend(nn.Module):
+ """
+ Handles dynamic backend selection for running inference using Ultralytics YOLO models.
+
+ The AutoBackend class is designed to provide an abstraction layer for various inference engines. It supports a wide
+ range of formats, each with specific naming conventions as outlined below:
+
+ Supported Formats and Naming Conventions:
+ | Format | File Suffix |
+ |-----------------------|------------------|
+ | PyTorch | *.pt |
+ | TorchScript | *.torchscript |
+ | ONNX Runtime | *.onnx |
+ | ONNX OpenCV DNN | *.onnx (dnn=True)|
+ | OpenVINO | *openvino_model/ |
+ | CoreML | *.mlpackage |
+ | TensorRT | *.engine |
+ | TensorFlow SavedModel | *_saved_model |
+ | TensorFlow GraphDef | *.pb |
+ | TensorFlow Lite | *.tflite |
+ | TensorFlow Edge TPU | *_edgetpu.tflite |
+ | PaddlePaddle | *_paddle_model |
+ | ncnn | *_ncnn_model |
+
+ This class offers dynamic backend switching capabilities based on the input model format, making it easier to deploy
+ models across various platforms.
+ """
@torch.no_grad()
def __init__(self,
@@ -48,33 +78,16 @@ class AutoBackend(nn.Module):
fuse=True,
verbose=True):
"""
- MultiBackend class for python inference on various platforms using Ultralytics YOLO.
+ Initialize the AutoBackend for inference.
Args:
- weights (str): The path to the weights file. Default: 'yolov8n.pt'
- device (torch.device): The device to run the model on.
- dnn (bool): Use OpenCV DNN module for inference if True, defaults to False.
- data (str | Path | optional): Additional data.yaml file for class names.
- fp16 (bool): If True, use half precision. Default: False
- fuse (bool): Whether to fuse the model or not. Default: True
- verbose (bool): Whether to run in verbose mode or not. Default: True
-
- Supported formats and their naming conventions:
- | Format | Suffix |
- |-----------------------|------------------|
- | PyTorch | *.pt |
- | TorchScript | *.torchscript |
- | ONNX Runtime | *.onnx |
- | ONNX OpenCV DNN | *.onnx dnn=True |
- | OpenVINO | *.xml |
- | CoreML | *.mlpackage |
- | TensorRT | *.engine |
- | TensorFlow SavedModel | *_saved_model |
- | TensorFlow GraphDef | *.pb |
- | TensorFlow Lite | *.tflite |
- | TensorFlow Edge TPU | *_edgetpu.tflite |
- | PaddlePaddle | *_paddle_model |
- | ncnn | *_ncnn_model |
+ weights (str): Path to the model weights file. Defaults to 'yolov8n.pt'.
+ device (torch.device): Device to run the model on. Defaults to CPU.
+ dnn (bool): Use OpenCV DNN module for ONNX inference. Defaults to False.
+ data (str | Path | optional): Path to the additional data.yaml file containing class names. Optional.
+ fp16 (bool): Enable half-precision inference. Supported only on specific backends. Defaults to False.
+ fuse (bool): Fuse Conv2D + BatchNorm layers for optimization. Defaults to True.
+ verbose (bool): Enable verbose logging. Defaults to True.
"""
super().__init__()
w = str(weights[0] if isinstance(weights, list) else weights)
@@ -440,14 +453,14 @@ class AutoBackend(nn.Module):
def from_numpy(self, x):
"""
- Convert a numpy array to a tensor.
+ Convert a numpy array to a tensor.
- Args:
- x (np.ndarray): The array to be converted.
+ Args:
+ x (np.ndarray): The array to be converted.
- Returns:
- (torch.Tensor): The converted tensor
- """
+ Returns:
+ (torch.Tensor): The converted tensor
+ """
return torch.tensor(x).to(self.device) if isinstance(x, np.ndarray) else x
def warmup(self, imgsz=(1, 3, 640, 640)):
@@ -476,7 +489,7 @@ class AutoBackend(nn.Module):
@staticmethod
def _model_type(p='path/to/model.pt'):
"""
- This function takes a path to a model file and returns the model type
+ This function takes a path to a model file and returns the model type.
Args:
p: path to the model file. Defaults to path/to/model.pt
diff --git a/ultralytics/nn/modules/__init__.py b/ultralytics/nn/modules/__init__.py
index b6dc6c44..584a394f 100644
--- a/ultralytics/nn/modules/__init__.py
+++ b/ultralytics/nn/modules/__init__.py
@@ -1,16 +1,20 @@
# Ultralytics YOLO 🚀, AGPL-3.0 license
"""
-Ultralytics modules. Visualize with:
+Ultralytics modules.
-from ultralytics.nn.modules import *
-import torch
-import os
+Example:
+ Visualize a module with Netron.
+ ```python
+ from ultralytics.nn.modules import *
+ import torch
+ import os
-x = torch.ones(1, 128, 40, 40)
-m = Conv(128, 128)
-f = f'{m._get_name()}.onnx'
-torch.onnx.export(m, x, f)
-os.system(f'onnxsim {f} {f} && open {f}')
+ x = torch.ones(1, 128, 40, 40)
+ m = Conv(128, 128)
+ f = f'{m._get_name()}.onnx'
+ torch.onnx.export(m, x, f)
+ os.system(f'onnxsim {f} {f} && open {f}')
+ ```
"""
from .block import (C1, C2, C3, C3TR, DFL, SPP, SPPF, Bottleneck, BottleneckCSP, C2f, C3Ghost, C3x, GhostBottleneck,
diff --git a/ultralytics/nn/modules/block.py b/ultralytics/nn/modules/block.py
index d8183d84..593ae24c 100644
--- a/ultralytics/nn/modules/block.py
+++ b/ultralytics/nn/modules/block.py
@@ -1,7 +1,5 @@
# Ultralytics YOLO 🚀, AGPL-3.0 license
-"""
-Block modules
-"""
+"""Block modules."""
import torch
import torch.nn as nn
@@ -17,6 +15,7 @@ __all__ = ('DFL', 'HGBlock', 'HGStem', 'SPP', 'SPPF', 'C1', 'C2', 'C3', 'C2f', '
class DFL(nn.Module):
"""
Integral module of Distribution Focal Loss (DFL).
+
Proposed in Generalized Focal Loss https://ieeexplore.ieee.org/document/9792391
"""
@@ -51,11 +50,14 @@ class Proto(nn.Module):
class HGStem(nn.Module):
- """StemBlock of PPHGNetV2 with 5 convolutions and one maxpool2d.
+ """
+ StemBlock of PPHGNetV2 with 5 convolutions and one maxpool2d.
+
https://github.com/PaddlePaddle/PaddleDetection/blob/develop/ppdet/modeling/backbones/hgnet_v2.py
"""
def __init__(self, c1, cm, c2):
+ """Initialize the SPP layer with input/output channels and specified kernel sizes for max pooling."""
super().__init__()
self.stem1 = Conv(c1, cm, 3, 2, act=nn.ReLU())
self.stem2a = Conv(cm, cm // 2, 2, 1, 0, act=nn.ReLU())
@@ -79,11 +81,14 @@ class HGStem(nn.Module):
class HGBlock(nn.Module):
- """HG_Block of PPHGNetV2 with 2 convolutions and LightConv.
+ """
+ HG_Block of PPHGNetV2 with 2 convolutions and LightConv.
+
https://github.com/PaddlePaddle/PaddleDetection/blob/develop/ppdet/modeling/backbones/hgnet_v2.py
"""
def __init__(self, c1, cm, c2, k=3, n=6, lightconv=False, shortcut=False, act=nn.ReLU()):
+ """Initializes a CSP Bottleneck with 1 convolution using specified input and output channels."""
super().__init__()
block = LightConv if lightconv else Conv
self.m = nn.ModuleList(block(c1 if i == 0 else cm, cm, k=k, act=act) for i in range(n))
@@ -218,6 +223,7 @@ class RepC3(nn.Module):
"""Rep C3."""
def __init__(self, c1, c2, n=3, e=1.0):
+ """Initialize CSP Bottleneck with a single convolution using input channels, output channels, and number."""
super().__init__()
c_ = int(c2 * e) # hidden channels
self.cv1 = Conv(c1, c2, 1, 1)
diff --git a/ultralytics/nn/modules/conv.py b/ultralytics/nn/modules/conv.py
index 77e99c00..21a27009 100644
--- a/ultralytics/nn/modules/conv.py
+++ b/ultralytics/nn/modules/conv.py
@@ -1,7 +1,5 @@
# Ultralytics YOLO 🚀, AGPL-3.0 license
-"""
-Convolution modules
-"""
+"""Convolution modules."""
import math
@@ -69,7 +67,9 @@ class Conv2(Conv):
class LightConv(nn.Module):
- """Light convolution with args(ch_in, ch_out, kernel).
+ """
+ Light convolution with args(ch_in, ch_out, kernel).
+
https://github.com/PaddlePaddle/PaddleDetection/blob/develop/ppdet/modeling/backbones/hgnet_v2.py
"""
@@ -148,12 +148,15 @@ class GhostConv(nn.Module):
class RepConv(nn.Module):
"""
- RepConv is a basic rep-style block, including training and deploy status. This module is used in RT-DETR.
+ RepConv is a basic rep-style block, including training and deploy status.
+
+ This module is used in RT-DETR.
Based on https://github.com/DingXiaoH/RepVGG/blob/main/repvgg.py
"""
default_act = nn.SiLU() # default activation
def __init__(self, c1, c2, k=3, s=1, p=1, g=1, d=1, act=True, bn=False, deploy=False):
+ """Initializes Light Convolution layer with inputs, outputs & optional activation function."""
super().__init__()
assert k == 3 and p == 1
self.g = g
@@ -166,27 +169,30 @@ class RepConv(nn.Module):
self.conv2 = Conv(c1, c2, 1, s, p=(p - k // 2), g=g, act=False)
def forward_fuse(self, x):
- """Forward process"""
+ """Forward process."""
return self.act(self.conv(x))
def forward(self, x):
- """Forward process"""
+ """Forward process."""
id_out = 0 if self.bn is None else self.bn(x)
return self.act(self.conv1(x) + self.conv2(x) + id_out)
def get_equivalent_kernel_bias(self):
+ """Returns equivalent kernel and bias by adding 3x3 kernel, 1x1 kernel and identity kernel with their biases."""
kernel3x3, bias3x3 = self._fuse_bn_tensor(self.conv1)
kernel1x1, bias1x1 = self._fuse_bn_tensor(self.conv2)
kernelid, biasid = self._fuse_bn_tensor(self.bn)
return kernel3x3 + self._pad_1x1_to_3x3_tensor(kernel1x1) + kernelid, bias3x3 + bias1x1 + biasid
def _pad_1x1_to_3x3_tensor(self, kernel1x1):
+ """Pads a 1x1 tensor to a 3x3 tensor."""
if kernel1x1 is None:
return 0
else:
return torch.nn.functional.pad(kernel1x1, [1, 1, 1, 1])
def _fuse_bn_tensor(self, branch):
+ """Generates appropriate kernels and biases for convolution by fusing branches of the neural network."""
if branch is None:
return 0, 0
if isinstance(branch, Conv):
@@ -214,6 +220,7 @@ class RepConv(nn.Module):
return kernel * t, beta - running_mean * gamma / std
def fuse_convs(self):
+ """Combines two convolution layers into a single layer and removes unused attributes from the class."""
if hasattr(self, 'conv'):
return
kernel, bias = self.get_equivalent_kernel_bias()
@@ -243,12 +250,14 @@ class ChannelAttention(nn.Module):
"""Channel-attention module https://github.com/open-mmlab/mmdetection/tree/v3.0.0rc1/configs/rtmdet."""
def __init__(self, channels: int) -> None:
+ """Initializes the class and sets the basic configurations and instance variables required."""
super().__init__()
self.pool = nn.AdaptiveAvgPool2d(1)
self.fc = nn.Conv2d(channels, channels, 1, 1, 0, bias=True)
self.act = nn.Sigmoid()
def forward(self, x: torch.Tensor) -> torch.Tensor:
+ """Applies forward pass using activation on convolutions of the input, optionally using batch normalization."""
return x * self.act(self.fc(self.pool(x)))
diff --git a/ultralytics/nn/modules/head.py b/ultralytics/nn/modules/head.py
index 0b02eb3c..9e993d79 100644
--- a/ultralytics/nn/modules/head.py
+++ b/ultralytics/nn/modules/head.py
@@ -1,7 +1,5 @@
# Ultralytics YOLO 🚀, AGPL-3.0 license
-"""
-Model head modules
-"""
+"""Model head modules."""
import math
@@ -229,6 +227,7 @@ class RTDETRDecoder(nn.Module):
self._reset_parameters()
def forward(self, x, batch=None):
+ """Runs the forward pass of the module, returning bounding box and classification scores for the input."""
from ultralytics.models.utils.ops import get_cdn_group
# input projection and embedding
@@ -265,6 +264,7 @@ class RTDETRDecoder(nn.Module):
return y if self.export else (y, x)
def _generate_anchors(self, shapes, grid_size=0.05, dtype=torch.float32, device='cpu', eps=1e-2):
+ """Generates anchor bounding boxes for given shapes with specific grid size and validates them."""
anchors = []
for i, (h, w) in enumerate(shapes):
sy = torch.arange(end=h, dtype=dtype, device=device)
@@ -284,6 +284,7 @@ class RTDETRDecoder(nn.Module):
return anchors, valid_mask
def _get_encoder_input(self, x):
+ """Processes and returns encoder inputs by getting projection features from input and concatenating them."""
# get projection features
x = [self.input_proj[i](feat) for i, feat in enumerate(x)]
# get encoder inputs
@@ -301,6 +302,7 @@ class RTDETRDecoder(nn.Module):
return feats, shapes
def _get_decoder_input(self, feats, shapes, dn_embed=None, dn_bbox=None):
+ """Generates and prepares the input required for the decoder from the provided features and shapes."""
bs = len(feats)
# prepare input for decoder
anchors, valid_mask = self._generate_anchors(shapes, dtype=feats.dtype, device=feats.device)
@@ -339,6 +341,7 @@ class RTDETRDecoder(nn.Module):
# TODO
def _reset_parameters(self):
+ """Initializes or resets the parameters of the model's various components with predefined weights and biases."""
# class and bbox head init
bias_cls = bias_init_with_prob(0.01) / 80 * self.nc
# NOTE: the weight initialization in `linear_init_` would cause NaN when training with custom datasets.
diff --git a/ultralytics/nn/modules/transformer.py b/ultralytics/nn/modules/transformer.py
index 9a51d2cb..4b7c0868 100644
--- a/ultralytics/nn/modules/transformer.py
+++ b/ultralytics/nn/modules/transformer.py
@@ -1,7 +1,5 @@
# Ultralytics YOLO 🚀, AGPL-3.0 license
-"""
-Transformer modules
-"""
+"""Transformer modules."""
import math
@@ -18,9 +16,10 @@ __all__ = ('TransformerEncoderLayer', 'TransformerLayer', 'TransformerBlock', 'M
class TransformerEncoderLayer(nn.Module):
- """Transformer Encoder."""
+ """Defines a single layer of the transformer encoder."""
def __init__(self, c1, cm=2048, num_heads=8, dropout=0.0, act=nn.GELU(), normalize_before=False):
+ """Initialize the TransformerEncoderLayer with specified parameters."""
super().__init__()
from ...utils.torch_utils import TORCH_1_9
if not TORCH_1_9:
@@ -41,10 +40,11 @@ class TransformerEncoderLayer(nn.Module):
self.normalize_before = normalize_before
def with_pos_embed(self, tensor, pos=None):
- """Add position embeddings if given."""
+ """Add position embeddings to the tensor if provided."""
return tensor if pos is None else tensor + pos
def forward_post(self, src, src_mask=None, src_key_padding_mask=None, pos=None):
+ """Performs forward pass with post-normalization."""
q = k = self.with_pos_embed(src, pos)
src2 = self.ma(q, k, value=src, attn_mask=src_mask, key_padding_mask=src_key_padding_mask)[0]
src = src + self.dropout1(src2)
@@ -54,6 +54,7 @@ class TransformerEncoderLayer(nn.Module):
return self.norm2(src)
def forward_pre(self, src, src_mask=None, src_key_padding_mask=None, pos=None):
+ """Performs forward pass with pre-normalization."""
src2 = self.norm1(src)
q = k = self.with_pos_embed(src2, pos)
src2 = self.ma(q, k, value=src2, attn_mask=src_mask, key_padding_mask=src_key_padding_mask)[0]
@@ -70,11 +71,14 @@ class TransformerEncoderLayer(nn.Module):
class AIFI(TransformerEncoderLayer):
+ """Defines the AIFI transformer layer."""
def __init__(self, c1, cm=2048, num_heads=8, dropout=0, act=nn.GELU(), normalize_before=False):
+ """Initialize the AIFI instance with specified parameters."""
super().__init__(c1, cm, num_heads, dropout, act, normalize_before)
def forward(self, x):
+ """Forward pass for the AIFI transformer layer."""
c, h, w = x.shape[1:]
pos_embed = self.build_2d_sincos_position_embedding(w, h, c)
# flatten [B, C, H, W] to [B, HxW, C]
@@ -82,7 +86,8 @@ class AIFI(TransformerEncoderLayer):
return x.permute(0, 2, 1).view([-1, c, h, w]).contiguous()
@staticmethod
- def build_2d_sincos_position_embedding(w, h, embed_dim=256, temperature=10000.):
+ def build_2d_sincos_position_embedding(w, h, embed_dim=256, temperature=10000.0):
+ """Builds 2D sine-cosine position embedding."""
grid_w = torch.arange(int(w), dtype=torch.float32)
grid_h = torch.arange(int(h), dtype=torch.float32)
grid_w, grid_h = torch.meshgrid(grid_w, grid_h, indexing='ij')
@@ -140,27 +145,32 @@ class TransformerBlock(nn.Module):
class MLPBlock(nn.Module):
+ """Implements a single block of a multi-layer perceptron."""
def __init__(self, embedding_dim, mlp_dim, act=nn.GELU):
+ """Initialize the MLPBlock with specified embedding dimension, MLP dimension, and activation function."""
super().__init__()
self.lin1 = nn.Linear(embedding_dim, mlp_dim)
self.lin2 = nn.Linear(mlp_dim, embedding_dim)
self.act = act()
def forward(self, x: torch.Tensor) -> torch.Tensor:
+ """Forward pass for the MLPBlock."""
return self.lin2(self.act(self.lin1(x)))
class MLP(nn.Module):
- """ Very simple multi-layer perceptron (also called FFN)"""
+ """Implements a simple multi-layer perceptron (also called FFN)."""
def __init__(self, input_dim, hidden_dim, output_dim, num_layers):
+ """Initialize the MLP with specified input, hidden, output dimensions and number of layers."""
super().__init__()
self.num_layers = num_layers
h = [hidden_dim] * (num_layers - 1)
self.layers = nn.ModuleList(nn.Linear(n, k) for n, k in zip([input_dim] + h, h + [output_dim]))
def forward(self, x):
+ """Forward pass for the entire MLP."""
for i, layer in enumerate(self.layers):
x = F.relu(layer(x)) if i < self.num_layers - 1 else layer(x)
return x
@@ -168,17 +178,22 @@ class MLP(nn.Module):
class LayerNorm2d(nn.Module):
"""
- LayerNorm2d module from https://github.com/facebookresearch/detectron2/blob/main/detectron2/layers/batch_norm.py
+ 2D Layer Normalization module inspired by Detectron2 and ConvNeXt implementations.
+
+ Original implementation at
+ https://github.com/facebookresearch/detectron2/blob/main/detectron2/layers/batch_norm.py
https://github.com/facebookresearch/ConvNeXt/blob/d1fa8f6fef0a165b27399986cc2bdacc92777e40/models/convnext.py#L119
"""
def __init__(self, num_channels, eps=1e-6):
+ """Initialize LayerNorm2d with the given parameters."""
super().__init__()
self.weight = nn.Parameter(torch.ones(num_channels))
self.bias = nn.Parameter(torch.zeros(num_channels))
self.eps = eps
def forward(self, x):
+ """Perform forward pass for 2D layer normalization."""
u = x.mean(1, keepdim=True)
s = (x - u).pow(2).mean(1, keepdim=True)
x = (x - u) / torch.sqrt(s + self.eps)
@@ -187,11 +202,13 @@ class LayerNorm2d(nn.Module):
class MSDeformAttn(nn.Module):
"""
- Original Multi-Scale Deformable Attention Module.
+ Multi-Scale Deformable Attention Module based on Deformable-DETR and PaddleDetection implementations.
+
https://github.com/fundamentalvision/Deformable-DETR/blob/main/models/ops/modules/ms_deform_attn.py
"""
def __init__(self, d_model=256, n_levels=4, n_heads=8, n_points=4):
+ """Initialize MSDeformAttn with the given parameters."""
super().__init__()
if d_model % n_heads != 0:
raise ValueError(f'd_model must be divisible by n_heads, but got {d_model} and {n_heads}')
@@ -214,6 +231,7 @@ class MSDeformAttn(nn.Module):
self._reset_parameters()
def _reset_parameters(self):
+ """Reset module parameters."""
constant_(self.sampling_offsets.weight.data, 0.)
thetas = torch.arange(self.n_heads, dtype=torch.float32) * (2.0 * math.pi / self.n_heads)
grid_init = torch.stack([thetas.cos(), thetas.sin()], -1)
@@ -232,7 +250,10 @@ class MSDeformAttn(nn.Module):
def forward(self, query, refer_bbox, value, value_shapes, value_mask=None):
"""
+ Perform forward pass for multi-scale deformable attention.
+
https://github.com/PaddlePaddle/PaddleDetection/blob/develop/ppdet/modeling/transformers/deformable_transformer.py
+
Args:
query (torch.Tensor): [bs, query_length, C]
refer_bbox (torch.Tensor): [bs, query_length, n_levels, 2], range in [0, 1], top-left (0,0),
@@ -272,24 +293,27 @@ class MSDeformAttn(nn.Module):
class DeformableTransformerDecoderLayer(nn.Module):
"""
+ Deformable Transformer Decoder Layer inspired by PaddleDetection and Deformable-DETR implementations.
+
https://github.com/PaddlePaddle/PaddleDetection/blob/develop/ppdet/modeling/transformers/deformable_transformer.py
https://github.com/fundamentalvision/Deformable-DETR/blob/main/models/deformable_transformer.py
"""
def __init__(self, d_model=256, n_heads=8, d_ffn=1024, dropout=0., act=nn.ReLU(), n_levels=4, n_points=4):
+ """Initialize the DeformableTransformerDecoderLayer with the given parameters."""
super().__init__()
- # self attention
+ # Self attention
self.self_attn = nn.MultiheadAttention(d_model, n_heads, dropout=dropout)
self.dropout1 = nn.Dropout(dropout)
self.norm1 = nn.LayerNorm(d_model)
- # cross attention
+ # Cross attention
self.cross_attn = MSDeformAttn(d_model, n_levels, n_heads, n_points)
self.dropout2 = nn.Dropout(dropout)
self.norm2 = nn.LayerNorm(d_model)
- # ffn
+ # FFN
self.linear1 = nn.Linear(d_model, d_ffn)
self.act = act
self.dropout3 = nn.Dropout(dropout)
@@ -299,37 +323,44 @@ class DeformableTransformerDecoderLayer(nn.Module):
@staticmethod
def with_pos_embed(tensor, pos):
+ """Add positional embeddings to the input tensor, if provided."""
return tensor if pos is None else tensor + pos
def forward_ffn(self, tgt):
+ """Perform forward pass through the Feed-Forward Network part of the layer."""
tgt2 = self.linear2(self.dropout3(self.act(self.linear1(tgt))))
tgt = tgt + self.dropout4(tgt2)
return self.norm3(tgt)
def forward(self, embed, refer_bbox, feats, shapes, padding_mask=None, attn_mask=None, query_pos=None):
- # self attention
+ """Perform the forward pass through the entire decoder layer."""
+
+ # Self attention
q = k = self.with_pos_embed(embed, query_pos)
tgt = self.self_attn(q.transpose(0, 1), k.transpose(0, 1), embed.transpose(0, 1),
attn_mask=attn_mask)[0].transpose(0, 1)
embed = embed + self.dropout1(tgt)
embed = self.norm1(embed)
- # cross attention
+ # Cross attention
tgt = self.cross_attn(self.with_pos_embed(embed, query_pos), refer_bbox.unsqueeze(2), feats, shapes,
padding_mask)
embed = embed + self.dropout2(tgt)
embed = self.norm2(embed)
- # ffn
+ # FFN
return self.forward_ffn(embed)
class DeformableTransformerDecoder(nn.Module):
"""
+ Implementation of Deformable Transformer Decoder based on PaddleDetection.
+
https://github.com/PaddlePaddle/PaddleDetection/blob/develop/ppdet/modeling/transformers/deformable_transformer.py
"""
def __init__(self, hidden_dim, decoder_layer, num_layers, eval_idx=-1):
+ """Initialize the DeformableTransformerDecoder with the given parameters."""
super().__init__()
self.layers = _get_clones(decoder_layer, num_layers)
self.num_layers = num_layers
@@ -347,6 +378,7 @@ class DeformableTransformerDecoder(nn.Module):
pos_mlp,
attn_mask=None,
padding_mask=None):
+ """Perform the forward pass through the entire decoder."""
output = embed
dec_bboxes = []
dec_cls = []
diff --git a/ultralytics/nn/modules/utils.py b/ultralytics/nn/modules/utils.py
index f8636dc4..c7bec7af 100644
--- a/ultralytics/nn/modules/utils.py
+++ b/ultralytics/nn/modules/utils.py
@@ -1,7 +1,5 @@
# Ultralytics YOLO 🚀, AGPL-3.0 license
-"""
-Module utils
-"""
+"""Module utils."""
import copy
import math
@@ -16,15 +14,17 @@ __all__ = 'multi_scale_deformable_attn_pytorch', 'inverse_sigmoid'
def _get_clones(module, n):
+ """Create a list of cloned modules from the given module."""
return nn.ModuleList([copy.deepcopy(module) for _ in range(n)])
def bias_init_with_prob(prior_prob=0.01):
- """initialize conv/fc bias value according to a given probability value."""
+ """Initialize conv/fc bias value according to a given probability value."""
return float(-np.log((1 - prior_prob) / prior_prob)) # return bias_init
def linear_init_(module):
+ """Initialize the weights and biases of a linear module."""
bound = 1 / math.sqrt(module.weight.shape[0])
uniform_(module.weight, -bound, bound)
if hasattr(module, 'bias') and module.bias is not None:
@@ -32,6 +32,7 @@ def linear_init_(module):
def inverse_sigmoid(x, eps=1e-5):
+ """Calculate the inverse sigmoid function for a tensor."""
x = x.clamp(min=0, max=1)
x1 = x.clamp(min=eps)
x2 = (1 - x).clamp(min=eps)
@@ -43,6 +44,7 @@ def multi_scale_deformable_attn_pytorch(value: torch.Tensor, value_spatial_shape
attention_weights: torch.Tensor) -> torch.Tensor:
"""
Multi-scale deformable attention.
+
https://github.com/IDEA-Research/detrex/blob/main/detrex/layers/multi_scale_deform_attn.py
"""
diff --git a/ultralytics/nn/tasks.py b/ultralytics/nn/tasks.py
index 24153d24..6145146b 100644
--- a/ultralytics/nn/tasks.py
+++ b/ultralytics/nn/tasks.py
@@ -25,14 +25,11 @@ except ImportError:
class BaseModel(nn.Module):
- """
- The BaseModel class serves as a base class for all the models in the Ultralytics YOLO family.
- """
+ """The BaseModel class serves as a base class for all the models in the Ultralytics YOLO family."""
def forward(self, x, *args, **kwargs):
"""
- Forward pass of the model on a single scale.
- Wrapper for `_forward_once` method.
+ Forward pass of the model on a single scale. Wrapper for `_forward_once` method.
Args:
x (torch.Tensor | dict): The input image tensor or a dict including image tensor and gt labels.
@@ -93,8 +90,8 @@ class BaseModel(nn.Module):
def _profile_one_layer(self, m, x, dt):
"""
- Profile the computation time and FLOPs of a single layer of the model on a given input.
- Appends the results to the provided list.
+ Profile the computation time and FLOPs of a single layer of the model on a given input. Appends the results to
+ the provided list.
Args:
m (nn.Module): The layer to be profiled.
@@ -158,7 +155,7 @@ class BaseModel(nn.Module):
def info(self, detailed=False, verbose=True, imgsz=640):
"""
- Prints model information
+ Prints model information.
Args:
detailed (bool): if True, prints out detailed information about the model. Defaults to False
@@ -175,7 +172,7 @@ class BaseModel(nn.Module):
fn (function): the function to apply to the model
Returns:
- A model that is a Detect() object.
+ (BaseModel): An updated BaseModel object.
"""
self = super()._apply(fn)
m = self.model[-1] # Detect()
@@ -202,7 +199,7 @@ class BaseModel(nn.Module):
def loss(self, batch, preds=None):
"""
- Compute loss
+ Compute loss.
Args:
batch (dict): Batch to compute loss on
@@ -215,6 +212,7 @@ class BaseModel(nn.Module):
return self.criterion(preds, batch)
def init_criterion(self):
+ """Initialize the loss criterion for the BaseModel."""
raise NotImplementedError('compute_loss() needs to be implemented by task heads')
@@ -222,6 +220,7 @@ class DetectionModel(BaseModel):
"""YOLOv8 detection model."""
def __init__(self, cfg='yolov8n.yaml', ch=3, nc=None, verbose=True): # model, input channels, number of classes
+ """Initialize the YOLOv8 detection model with the given config and parameters."""
super().__init__()
self.yaml = cfg if isinstance(cfg, dict) else yaml_model_load(cfg) # cfg dict
@@ -289,6 +288,7 @@ class DetectionModel(BaseModel):
return y
def init_criterion(self):
+ """Initialize the loss criterion for the DetectionModel."""
return v8DetectionLoss(self)
@@ -300,6 +300,7 @@ class SegmentationModel(DetectionModel):
super().__init__(cfg=cfg, ch=ch, nc=nc, verbose=verbose)
def init_criterion(self):
+ """Initialize the loss criterion for the SegmentationModel."""
return v8SegmentationLoss(self)
@@ -316,6 +317,7 @@ class PoseModel(DetectionModel):
super().__init__(cfg=cfg, ch=ch, nc=nc, verbose=verbose)
def init_criterion(self):
+ """Initialize the loss criterion for the PoseModel."""
return v8PoseLoss(self)
@@ -365,22 +367,59 @@ class ClassificationModel(BaseModel):
m[i] = nn.Conv2d(m[i].in_channels, nc, m[i].kernel_size, m[i].stride, bias=m[i].bias is not None)
def init_criterion(self):
- """Compute the classification loss between predictions and true labels."""
+ """Initialize the loss criterion for the ClassificationModel."""
return v8ClassificationLoss()
class RTDETRDetectionModel(DetectionModel):
+ """
+ RTDETR (Real-time DEtection and Tracking using Transformers) Detection Model class.
+
+ This class is responsible for constructing the RTDETR architecture, defining loss functions, and
+ facilitating both the training and inference processes. RTDETR is an object detection and tracking model
+ that extends from the DetectionModel base class.
+
+ Attributes:
+ cfg (str): The configuration file path or preset string. Default is 'rtdetr-l.yaml'.
+ ch (int): Number of input channels. Default is 3 (RGB).
+ nc (int, optional): Number of classes for object detection. Default is None.
+ verbose (bool): Specifies if summary statistics are shown during initialization. Default is True.
+
+ Methods:
+ init_criterion: Initializes the criterion used for loss calculation.
+ loss: Computes and returns the loss during training.
+ predict: Performs a forward pass through the network and returns the output.
+ """
def __init__(self, cfg='rtdetr-l.yaml', ch=3, nc=None, verbose=True):
+ """
+ Initialize the RTDETRDetectionModel.
+
+ Args:
+ cfg (str): Configuration file name or path.
+ ch (int): Number of input channels.
+ nc (int, optional): Number of classes. Defaults to None.
+ verbose (bool, optional): Print additional information during initialization. Defaults to True.
+ """
super().__init__(cfg=cfg, ch=ch, nc=nc, verbose=verbose)
def init_criterion(self):
- """Compute the classification loss between predictions and true labels."""
+ """Initialize the loss criterion for the RTDETRDetectionModel."""
from ultralytics.models.utils.loss import RTDETRDetectionLoss
return RTDETRDetectionLoss(nc=self.nc, use_vfl=True)
def loss(self, batch, preds=None):
+ """
+ Compute the loss for the given batch of data.
+
+ Args:
+ batch (dict): Dictionary containing image and label data.
+ preds (torch.Tensor, optional): Precomputed model predictions. Defaults to None.
+
+ Returns:
+ tuple: A tuple containing the total loss and main three losses in a tensor.
+ """
if not hasattr(self, 'criterion'):
self.criterion = self.init_criterion()
@@ -417,16 +456,17 @@ class RTDETRDetectionModel(DetectionModel):
def predict(self, x, profile=False, visualize=False, batch=None, augment=False):
"""
- Perform a forward pass through the network.
+ Perform a forward pass through the model.
Args:
- x (torch.Tensor): The input tensor to the model
- profile (bool): Print the computation time of each layer if True, defaults to False.
- visualize (bool): Save the feature maps of the model if True, defaults to False
- batch (dict): A dict including gt boxes and labels from dataloader.
+ x (torch.Tensor): The input tensor.
+ profile (bool, optional): If True, profile the computation time for each layer. Defaults to False.
+ visualize (bool, optional): If True, save feature maps for visualization. Defaults to False.
+ batch (dict, optional): Ground truth data for evaluation. Defaults to None.
+ augment (bool, optional): If True, perform data augmentation during inference. Defaults to False.
Returns:
- (torch.Tensor): The last output of the model.
+ torch.Tensor: Model's output tensor.
"""
y, dt = [], [] # outputs
for m in self.model[:-1]: # except the head part
@@ -708,9 +748,9 @@ def yaml_model_load(path):
def guess_model_scale(model_path):
"""
- Takes a path to a YOLO model's YAML file as input and extracts the size character of the model's scale.
- The function uses regular expression matching to find the pattern of the model scale in the YAML file name,
- which is denoted by n, s, m, l, or x. The function returns the size character of the model scale as a string.
+ Takes a path to a YOLO model's YAML file as input and extracts the size character of the model's scale. The function
+ uses regular expression matching to find the pattern of the model scale in the YAML file name, which is denoted by
+ n, s, m, l, or x. The function returns the size character of the model scale as a string.
Args:
model_path (str | Path): The path to the YOLO model's YAML file.
diff --git a/ultralytics/trackers/bot_sort.py b/ultralytics/trackers/bot_sort.py
index d42d46eb..543d7526 100644
--- a/ultralytics/trackers/bot_sort.py
+++ b/ultralytics/trackers/bot_sort.py
@@ -12,6 +12,33 @@ from .utils.kalman_filter import KalmanFilterXYWH
class BOTrack(STrack):
+ """
+ An extended version of the STrack class for YOLOv8, adding object tracking features.
+
+ Attributes:
+ shared_kalman (KalmanFilterXYWH): A shared Kalman filter for all instances of BOTrack.
+ smooth_feat (np.ndarray): Smoothed feature vector.
+ curr_feat (np.ndarray): Current feature vector.
+ features (deque): A deque to store feature vectors with a maximum length defined by `feat_history`.
+ alpha (float): Smoothing factor for the exponential moving average of features.
+ mean (np.ndarray): The mean state of the Kalman filter.
+ covariance (np.ndarray): The covariance matrix of the Kalman filter.
+
+ Methods:
+ update_features(feat): Update features vector and smooth it using exponential moving average.
+ predict(): Predicts the mean and covariance using Kalman filter.
+ re_activate(new_track, frame_id, new_id): Reactivates a track with updated features and optionally new ID.
+ update(new_track, frame_id): Update the YOLOv8 instance with new track and frame ID.
+ tlwh: Property that gets the current position in tlwh format `(top left x, top left y, width, height)`.
+ multi_predict(stracks): Predicts the mean and covariance of multiple object tracks using shared Kalman filter.
+ convert_coords(tlwh): Converts tlwh bounding box coordinates to xywh format.
+ tlwh_to_xywh(tlwh): Convert bounding box to xywh format `(center x, center y, width, height)`.
+
+ Usage:
+ bo_track = BOTrack(tlwh, score, cls, feat)
+ bo_track.predict()
+ bo_track.update(new_track, frame_id)
+ """
shared_kalman = KalmanFilterXYWH()
def __init__(self, tlwh, score, cls, feat=None, feat_history=50):
@@ -59,9 +86,7 @@ class BOTrack(STrack):
@property
def tlwh(self):
- """Get current position in bounding box format `(top left x, top left y,
- width, height)`.
- """
+ """Get current position in bounding box format `(top left x, top left y, width, height)`."""
if self.mean is None:
return self._tlwh.copy()
ret = self.mean[:4].copy()
@@ -90,15 +115,37 @@ class BOTrack(STrack):
@staticmethod
def tlwh_to_xywh(tlwh):
- """Convert bounding box to format `(center x, center y, width,
- height)`.
- """
+ """Convert bounding box to format `(center x, center y, width, height)`."""
ret = np.asarray(tlwh).copy()
ret[:2] += ret[2:] / 2
return ret
class BOTSORT(BYTETracker):
+ """
+ An extended version of the BYTETracker class for YOLOv8, designed for object tracking with ReID and GMC algorithm.
+
+ Attributes:
+ proximity_thresh (float): Threshold for spatial proximity (IoU) between tracks and detections.
+ appearance_thresh (float): Threshold for appearance similarity (ReID embeddings) between tracks and detections.
+ encoder (object): Object to handle ReID embeddings, set to None if ReID is not enabled.
+ gmc (GMC): An instance of the GMC algorithm for data association.
+ args (object): Parsed command-line arguments containing tracking parameters.
+
+ Methods:
+ get_kalmanfilter(): Returns an instance of KalmanFilterXYWH for object tracking.
+ init_track(dets, scores, cls, img): Initialize track with detections, scores, and classes.
+ get_dists(tracks, detections): Get distances between tracks and detections using IoU and (optionally) ReID.
+ multi_predict(tracks): Predict and track multiple objects with YOLOv8 model.
+
+ Usage:
+ bot_sort = BOTSORT(args, frame_rate)
+ bot_sort.init_track(dets, scores, cls, img)
+ bot_sort.multi_predict(tracks)
+
+ Note:
+ The class is designed to work with the YOLOv8 object detection model and supports ReID only if enabled via args.
+ """
def __init__(self, args, frame_rate=30):
"""Initialize YOLOv8 object with ReID module and GMC algorithm."""
diff --git a/ultralytics/trackers/byte_tracker.py b/ultralytics/trackers/byte_tracker.py
index 04958cda..40954c98 100644
--- a/ultralytics/trackers/byte_tracker.py
+++ b/ultralytics/trackers/byte_tracker.py
@@ -8,10 +8,43 @@ from .utils.kalman_filter import KalmanFilterXYAH
class STrack(BaseTrack):
+ """
+ Single object tracking representation that uses Kalman filtering for state estimation.
+
+ This class is responsible for storing all the information regarding individual tracklets and performs state updates
+ and predictions based on Kalman filter.
+
+ Attributes:
+ shared_kalman (KalmanFilterXYAH): Shared Kalman filter that is used across all STrack instances for prediction.
+ _tlwh (np.ndarray): Private attribute to store top-left corner coordinates and width and height of bounding box.
+ kalman_filter (KalmanFilterXYAH): Instance of Kalman filter used for this particular object track.
+ mean (np.ndarray): Mean state estimate vector.
+ covariance (np.ndarray): Covariance of state estimate.
+ is_activated (bool): Boolean flag indicating if the track has been activated.
+ score (float): Confidence score of the track.
+ tracklet_len (int): Length of the tracklet.
+ cls (any): Class label for the object.
+ idx (int): Index or identifier for the object.
+ frame_id (int): Current frame ID.
+ start_frame (int): Frame where the object was first detected.
+
+ Methods:
+ predict(): Predict the next state of the object using Kalman filter.
+ multi_predict(stracks): Predict the next states for multiple tracks.
+ multi_gmc(stracks, H): Update multiple track states using a homography matrix.
+ activate(kalman_filter, frame_id): Activate a new tracklet.
+ re_activate(new_track, frame_id, new_id): Reactivate a previously lost tracklet.
+ update(new_track, frame_id): Update the state of a matched track.
+ convert_coords(tlwh): Convert bounding box to x-y-angle-height format.
+ tlwh_to_xyah(tlwh): Convert tlwh bounding box to xyah format.
+ tlbr_to_tlwh(tlbr): Convert tlbr bounding box to tlwh format.
+ tlwh_to_tlbr(tlwh): Convert tlwh bounding box to tlbr format.
+ """
+
shared_kalman = KalmanFilterXYAH()
def __init__(self, tlwh, score, cls):
- """wait activate."""
+ """Initialize new STrack instance."""
self._tlwh = np.asarray(self.tlbr_to_tlwh(tlwh[:-1]), dtype=np.float32)
self.kalman_filter = None
self.mean, self.covariance = None, None
@@ -92,10 +125,11 @@ class STrack(BaseTrack):
def update(self, new_track, frame_id):
"""
- Update a matched track
- :type new_track: STrack
- :type frame_id: int
- :return:
+ Update the state of a matched track.
+
+ Args:
+ new_track (STrack): The new track containing updated information.
+ frame_id (int): The ID of the current frame.
"""
self.frame_id = frame_id
self.tracklet_len += 1
@@ -116,9 +150,7 @@ class STrack(BaseTrack):
@property
def tlwh(self):
- """Get current position in bounding box format `(top left x, top left y,
- width, height)`.
- """
+ """Get current position in bounding box format (top left x, top left y, width, height)."""
if self.mean is None:
return self._tlwh.copy()
ret = self.mean[:4].copy()
@@ -128,17 +160,15 @@ class STrack(BaseTrack):
@property
def tlbr(self):
- """Convert bounding box to format `(min x, min y, max x, max y)`, i.e.,
- `(top left, bottom right)`.
- """
+ """Convert bounding box to format (min x, min y, max x, max y), i.e., (top left, bottom right)."""
ret = self.tlwh.copy()
ret[2:] += ret[:2]
return ret
@staticmethod
def tlwh_to_xyah(tlwh):
- """Convert bounding box to format `(center x, center y, aspect ratio,
- height)`, where the aspect ratio is `width / height`.
+ """Convert bounding box to format (center x, center y, aspect ratio, height), where the aspect ratio is width /
+ height.
"""
ret = np.asarray(tlwh).copy()
ret[:2] += ret[2:] / 2
@@ -165,6 +195,33 @@ class STrack(BaseTrack):
class BYTETracker:
+ """
+ BYTETracker: A tracking algorithm built on top of YOLOv8 for object detection and tracking.
+
+ The class is responsible for initializing, updating, and managing the tracks for detected objects in a video
+ sequence. It maintains the state of tracked, lost, and removed tracks over frames, utilizes Kalman filtering for
+ predicting the new object locations, and performs data association.
+
+ Attributes:
+ tracked_stracks (list[STrack]): List of successfully activated tracks.
+ lost_stracks (list[STrack]): List of lost tracks.
+ removed_stracks (list[STrack]): List of removed tracks.
+ frame_id (int): The current frame ID.
+ args (namespace): Command-line arguments.
+ max_time_lost (int): The maximum frames for a track to be considered as 'lost'.
+ kalman_filter (object): Kalman Filter object.
+
+ Methods:
+ update(results, img=None): Updates object tracker with new detections.
+ get_kalmanfilter(): Returns a Kalman filter object for tracking bounding boxes.
+ init_track(dets, scores, cls, img=None): Initialize object tracking with detections.
+ get_dists(tracks, detections): Calculates the distance between tracks and detections.
+ multi_predict(tracks): Predicts the location of tracks.
+ reset_id(): Resets the ID counter of STrack.
+ joint_stracks(tlista, tlistb): Combines two lists of stracks.
+ sub_stracks(tlista, tlistb): Filters out the stracks present in the second list from the first list.
+ remove_duplicate_stracks(stracksa, stracksb): Removes duplicate stracks based on IOU.
+ """
def __init__(self, args, frame_rate=30):
"""Initialize a YOLOv8 object to track objects with given arguments and frame rate."""
@@ -234,8 +291,7 @@ class BYTETracker:
else:
track.re_activate(det, self.frame_id, new_id=False)
refind_stracks.append(track)
- # Step 3: Second association, with low score detection boxes
- # association the untrack to the low score detections
+ # Step 3: Second association, with low score detection boxes association the untrack to the low score detections
detections_second = self.init_track(dets_second, scores_second, cls_second, img)
r_tracked_stracks = [strack_pool[i] for i in u_track if strack_pool[i].state == TrackState.Tracked]
# TODO
diff --git a/ultralytics/trackers/track.py b/ultralytics/trackers/track.py
index cfb4b08a..cf06c033 100644
--- a/ultralytics/trackers/track.py
+++ b/ultralytics/trackers/track.py
@@ -60,7 +60,6 @@ def register_tracker(model, persist):
Args:
model (object): The model object to register tracking callbacks for.
persist (bool): Whether to persist the trackers if they already exist.
-
"""
model.add_callback('on_predict_start', partial(on_predict_start, persist=persist))
model.add_callback('on_predict_postprocess_end', on_predict_postprocess_end)
diff --git a/ultralytics/trackers/utils/gmc.py b/ultralytics/trackers/utils/gmc.py
index 4d91df45..24e4907d 100644
--- a/ultralytics/trackers/utils/gmc.py
+++ b/ultralytics/trackers/utils/gmc.py
@@ -9,6 +9,29 @@ from ultralytics.utils import LOGGER
class GMC:
+ """
+ Generalized Motion Compensation (GMC) class for tracking and object detection in video frames.
+
+ This class provides methods for tracking and detecting objects based on several tracking algorithms including ORB,
+ SIFT, ECC, and Sparse Optical Flow. It also supports downscaling of frames for computational efficiency.
+
+ Attributes:
+ method (str): The method used for tracking. Options include 'orb', 'sift', 'ecc', 'sparseOptFlow', 'none'.
+ downscale (int): Factor by which to downscale the frames for processing.
+ prevFrame (np.array): Stores the previous frame for tracking.
+ prevKeyPoints (list): Stores the keypoints from the previous frame.
+ prevDescriptors (np.array): Stores the descriptors from the previous frame.
+ initializedFirstFrame (bool): Flag to indicate if the first frame has been processed.
+
+ Methods:
+ __init__(self, method='sparseOptFlow', downscale=2): Initializes a GMC object with the specified method
+ and downscale factor.
+ apply(self, raw_frame, detections=None): Applies the chosen method to a raw frame and optionally uses
+ provided detections.
+ applyEcc(self, raw_frame, detections=None): Applies the ECC algorithm to a raw frame.
+ applyFeatures(self, raw_frame, detections=None): Applies feature-based methods like ORB or SIFT to a raw frame.
+ applySparseOptFlow(self, raw_frame, detections=None): Applies the Sparse Optical Flow method to a raw frame.
+ """
def __init__(self, method='sparseOptFlow', downscale=2):
"""Initialize a video tracker with specified parameters."""
diff --git a/ultralytics/trackers/utils/kalman_filter.py b/ultralytics/trackers/utils/kalman_filter.py
index 9527ede7..d7408274 100644
--- a/ultralytics/trackers/utils/kalman_filter.py
+++ b/ultralytics/trackers/utils/kalman_filter.py
@@ -8,8 +8,8 @@ class KalmanFilterXYAH:
"""
For bytetrack. A simple Kalman filter for tracking bounding boxes in image space.
- The 8-dimensional state space (x, y, a, h, vx, vy, va, vh) contains the bounding box center position (x, y),
- aspect ratio a, height h, and their respective velocities.
+ The 8-dimensional state space (x, y, a, h, vx, vy, va, vh) contains the bounding box center position (x, y), aspect
+ ratio a, height h, and their respective velocities.
Object motion follows a constant velocity model. The bounding box location (x, y, a, h) is taken as direct
observation of the state space (linear observation model).
@@ -182,8 +182,8 @@ class KalmanFilterXYAH:
def gating_distance(self, mean, covariance, measurements, only_position=False, metric='maha'):
"""
Compute gating distance between state distribution and measurements. A suitable distance threshold can be
- obtained from `chi2inv95`. If `only_position` is False, the chi-square distribution has 4 degrees of
- freedom, otherwise 2.
+ obtained from `chi2inv95`. If `only_position` is False, the chi-square distribution has 4 degrees of freedom,
+ otherwise 2.
Parameters
----------
@@ -223,8 +223,8 @@ class KalmanFilterXYWH(KalmanFilterXYAH):
"""
For BoT-SORT. A simple Kalman filter for tracking bounding boxes in image space.
- The 8-dimensional state space (x, y, w, h, vx, vy, vw, vh) contains the bounding box center position (x, y),
- width w, height h, and their respective velocities.
+ The 8-dimensional state space (x, y, w, h, vx, vy, vw, vh) contains the bounding box center position (x, y), width
+ w, height h, and their respective velocities.
Object motion follows a constant velocity model. The bounding box location (x, y, w, h) is taken as direct
observation of the state space (linear observation model).
diff --git a/ultralytics/utils/__init__.py b/ultralytics/utils/__init__.py
index c4490136..d8e57431 100644
--- a/ultralytics/utils/__init__.py
+++ b/ultralytics/utils/__init__.py
@@ -117,6 +117,7 @@ class TQDM(tqdm_original):
"""
def __init__(self, *args, **kwargs):
+ """Initialize custom Ultralytics tqdm class with different default arguments."""
# Set new default values (these can still be overridden when calling TQDM)
kwargs['disable'] = not VERBOSE or kwargs.get('disable', False) # logical 'and' with default value if passed
kwargs.setdefault('bar_format', TQDM_BAR_FORMAT) # override default value if passed
@@ -124,8 +125,7 @@ class TQDM(tqdm_original):
class SimpleClass:
- """
- Ultralytics SimpleClass is a base class providing helpful string representation, error reporting, and attribute
+ """Ultralytics SimpleClass is a base class providing helpful string representation, error reporting, and attribute
access methods for easier debugging and usage.
"""
@@ -154,8 +154,7 @@ class SimpleClass:
class IterableSimpleNamespace(SimpleNamespace):
- """
- Ultralytics IterableSimpleNamespace is an extension class of SimpleNamespace that adds iterable functionality and
+ """Ultralytics IterableSimpleNamespace is an extension class of SimpleNamespace that adds iterable functionality and
enables usage with dict() and for loops.
"""
@@ -256,8 +255,8 @@ class EmojiFilter(logging.Filter):
"""
A custom logging filter class for removing emojis in log messages.
- This filter is particularly useful for ensuring compatibility with Windows terminals
- that may not support the display of emojis in log messages.
+ This filter is particularly useful for ensuring compatibility with Windows terminals that may not support the
+ display of emojis in log messages.
"""
def filter(self, record):
@@ -275,9 +274,9 @@ if WINDOWS: # emoji-safe logging
class ThreadingLocked:
"""
- A decorator class for ensuring thread-safe execution of a function or method.
- This class can be used as a decorator to make sure that if the decorated function
- is called from multiple threads, only one thread at a time will be able to execute the function.
+ A decorator class for ensuring thread-safe execution of a function or method. This class can be used as a decorator
+ to make sure that if the decorated function is called from multiple threads, only one thread at a time will be able
+ to execute the function.
Attributes:
lock (threading.Lock): A lock object used to manage access to the decorated function.
@@ -294,13 +293,16 @@ class ThreadingLocked:
"""
def __init__(self):
+ """Initializes the decorator class for thread-safe execution of a function or method."""
self.lock = threading.Lock()
def __call__(self, f):
+ """Run thread-safe execution of function or method."""
from functools import wraps
@wraps(f)
def decorated(*args, **kwargs):
+ """Applies thread-safety to the decorated function or method."""
with self.lock:
return f(*args, **kwargs)
@@ -424,8 +426,7 @@ def is_kaggle():
def is_jupyter():
"""
- Check if the current script is running inside a Jupyter Notebook.
- Verified on Colab, Jupyterlab, Kaggle, Paperspace.
+ Check if the current script is running inside a Jupyter Notebook. Verified on Colab, Jupyterlab, Kaggle, Paperspace.
Returns:
(bool): True if running inside a Jupyter Notebook, False otherwise.
@@ -529,8 +530,8 @@ def is_github_actions_ci() -> bool:
def is_git_dir():
"""
- Determines whether the current file is part of a git repository.
- If the current file is not part of a git repository, returns None.
+ Determines whether the current file is part of a git repository. If the current file is not part of a git
+ repository, returns None.
Returns:
(bool): True if current file is part of a git repository.
@@ -540,8 +541,8 @@ def is_git_dir():
def get_git_dir():
"""
- Determines whether the current file is part of a git repository and if so, returns the repository root directory.
- If the current file is not part of a git repository, returns None.
+ Determines whether the current file is part of a git repository and if so, returns the repository root directory. If
+ the current file is not part of a git repository, returns None.
Returns:
(Path | None): Git root directory if found or None if not found.
@@ -578,7 +579,8 @@ def get_git_branch():
def get_default_args(func):
- """Returns a dictionary of default arguments for a function.
+ """
+ Returns a dictionary of default arguments for a function.
Args:
func (callable): The function to inspect.
@@ -710,7 +712,11 @@ def remove_colorstr(input_string):
class TryExcept(contextlib.ContextDecorator):
- """YOLOv8 TryExcept class. Usage: @TryExcept() decorator or 'with TryExcept():' context manager."""
+ """
+ YOLOv8 TryExcept class.
+
+ Use as @TryExcept() decorator or 'with TryExcept():' context manager.
+ """
def __init__(self, msg='', verbose=True):
"""Initialize TryExcept class with optional message and verbosity settings."""
@@ -729,7 +735,11 @@ class TryExcept(contextlib.ContextDecorator):
def threaded(func):
- """Multi-threads a target function and returns thread. Usage: @threaded decorator."""
+ """
+ Multi-threads a target function and returns thread.
+
+ Use as @threaded decorator.
+ """
def wrapper(*args, **kwargs):
"""Multi-threads a given function and returns the thread."""
@@ -824,6 +834,9 @@ class SettingsManager(dict):
"""
def __init__(self, file=SETTINGS_YAML, version='0.0.4'):
+ """Initialize the SettingsManager with default settings, load and validate current settings from the YAML
+ file.
+ """
import copy
import hashlib
diff --git a/ultralytics/utils/autobatch.py b/ultralytics/utils/autobatch.py
index 4e9ed07c..89f7e99f 100644
--- a/ultralytics/utils/autobatch.py
+++ b/ultralytics/utils/autobatch.py
@@ -1,7 +1,5 @@
# Ultralytics YOLO 🚀, AGPL-3.0 license
-"""
-Functions for estimating the best YOLO batch size to use a fraction of the available CUDA memory in PyTorch.
-"""
+"""Functions for estimating the best YOLO batch size to use a fraction of the available CUDA memory in PyTorch."""
from copy import deepcopy
diff --git a/ultralytics/utils/benchmarks.py b/ultralytics/utils/benchmarks.py
index e4135bc8..bf86b535 100644
--- a/ultralytics/utils/benchmarks.py
+++ b/ultralytics/utils/benchmarks.py
@@ -1,6 +1,6 @@
# Ultralytics YOLO 🚀, AGPL-3.0 license
"""
-Benchmark a YOLO model formats for speed and accuracy
+Benchmark a YOLO model formats for speed and accuracy.
Usage:
from ultralytics.utils.benchmarks import ProfileModels, benchmark
@@ -194,6 +194,7 @@ class ProfileModels:
self.device = device or torch.device(0 if torch.cuda.is_available() else 'cpu')
def profile(self):
+ """Logs the benchmarking results of a model, checks metrics against floor and returns the results."""
files = self.get_files()
if not files:
@@ -235,6 +236,7 @@ class ProfileModels:
return output
def get_files(self):
+ """Returns a list of paths for all relevant model files given by the user."""
files = []
for path in self.paths:
path = Path(path)
@@ -250,10 +252,14 @@ class ProfileModels:
return [Path(file) for file in sorted(files)]
def get_onnx_model_info(self, onnx_file: str):
+ """Retrieves the information including number of layers, parameters, gradients and FLOPs for an ONNX model
+ file.
+ """
# return (num_layers, num_params, num_gradients, num_flops)
return 0.0, 0.0, 0.0, 0.0
def iterative_sigma_clipping(self, data, sigma=2, max_iters=3):
+ """Applies an iterative sigma clipping algorithm to the given data times number of iterations."""
data = np.array(data)
for _ in range(max_iters):
mean, std = np.mean(data), np.std(data)
@@ -264,6 +270,7 @@ class ProfileModels:
return data
def profile_tensorrt_model(self, engine_file: str, eps: float = 1e-3):
+ """Profiles the TensorRT model, measuring average run time and standard deviation among runs."""
if not self.trt or not Path(engine_file).is_file():
return 0.0, 0.0
@@ -292,6 +299,9 @@ class ProfileModels:
return np.mean(run_times), np.std(run_times)
def profile_onnx_model(self, onnx_file: str, eps: float = 1e-3):
+ """Profiles an ONNX model by executing it multiple times and returns the mean and standard deviation of run
+ times.
+ """
check_requirements('onnxruntime')
import onnxruntime as ort
@@ -344,10 +354,12 @@ class ProfileModels:
return np.mean(run_times), np.std(run_times)
def generate_table_row(self, model_name, t_onnx, t_engine, model_info):
+ """Generates a formatted string for a table row that includes model performance and metric details."""
layers, params, gradients, flops = model_info
return f'| {model_name:18s} | {self.imgsz} | - | {t_onnx[0]:.2f} ± {t_onnx[1]:.2f} ms | {t_engine[0]:.2f} ± {t_engine[1]:.2f} ms | {params / 1e6:.1f} | {flops:.1f} |'
def generate_results_dict(self, model_name, t_onnx, t_engine, model_info):
+ """Generates a dictionary of model details including name, parameters, GFLOPS and speed metrics."""
layers, params, gradients, flops = model_info
return {
'model/name': model_name,
@@ -357,6 +369,7 @@ class ProfileModels:
'model/speed_TensorRT(ms)': round(t_engine[0], 3)}
def print_table(self, table_rows):
+ """Formats and prints a comparison table for different models with given statistics and performance data."""
gpu = torch.cuda.get_device_name(0) if torch.cuda.is_available() else 'GPU'
header = f'| Model | size
(pixels) | mAPval
50-95 | Speed
CPU ONNX
(ms) | Speed
{gpu} TensorRT
(ms) | params
(M) | FLOPs
(B) |'
separator = '|-------------|---------------------|--------------------|------------------------------|-----------------------------------|------------------|-----------------|'
diff --git a/ultralytics/utils/callbacks/base.py b/ultralytics/utils/callbacks/base.py
index ace8bfbf..211ae5bf 100644
--- a/ultralytics/utils/callbacks/base.py
+++ b/ultralytics/utils/callbacks/base.py
@@ -1,7 +1,5 @@
# Ultralytics YOLO 🚀, AGPL-3.0 license
-"""
-Base callbacks
-"""
+"""Base callbacks."""
from collections import defaultdict
from copy import deepcopy
diff --git a/ultralytics/utils/callbacks/comet.py b/ultralytics/utils/callbacks/comet.py
index 2da71a95..e8016f4e 100644
--- a/ultralytics/utils/callbacks/comet.py
+++ b/ultralytics/utils/callbacks/comet.py
@@ -26,31 +26,38 @@ except (ImportError, AssertionError):
def _get_comet_mode():
+ """Returns the mode of comet set in the environment variables, defaults to 'online' if not set."""
return os.getenv('COMET_MODE', 'online')
def _get_comet_model_name():
+ """Returns the model name for Comet from the environment variable 'COMET_MODEL_NAME' or defaults to 'YOLOv8'."""
return os.getenv('COMET_MODEL_NAME', 'YOLOv8')
def _get_eval_batch_logging_interval():
+ """Get the evaluation batch logging interval from environment variable or use default value 1."""
return int(os.getenv('COMET_EVAL_BATCH_LOGGING_INTERVAL', 1))
def _get_max_image_predictions_to_log():
+ """Get the maximum number of image predictions to log from the environment variables."""
return int(os.getenv('COMET_MAX_IMAGE_PREDICTIONS', 100))
def _scale_confidence_score(score):
+ """Scales the given confidence score by a factor specified in an environment variable."""
scale = float(os.getenv('COMET_MAX_CONFIDENCE_SCORE', 100.0))
return score * scale
def _should_log_confusion_matrix():
+ """Determines if the confusion matrix should be logged based on the environment variable settings."""
return os.getenv('COMET_EVAL_LOG_CONFUSION_MATRIX', 'false').lower() == 'true'
def _should_log_image_predictions():
+ """Determines whether to log image predictions based on a specified environment variable."""
return os.getenv('COMET_EVAL_LOG_IMAGE_PREDICTIONS', 'true').lower() == 'true'
@@ -104,9 +111,10 @@ def _fetch_trainer_metadata(trainer):
def _scale_bounding_box_to_original_image_shape(box, resized_image_shape, original_image_shape, ratio_pad):
- """YOLOv8 resizes images during training and the label values
- are normalized based on this resized shape. This function rescales the
- bounding box labels to the original image shape.
+ """
+ YOLOv8 resizes images during training and the label values are normalized based on this resized shape.
+
+ This function rescales the bounding box labels to the original image shape.
"""
resized_image_height, resized_image_width = resized_image_shape
diff --git a/ultralytics/utils/callbacks/dvc.py b/ultralytics/utils/callbacks/dvc.py
index 98e232b8..7fa05c6b 100644
--- a/ultralytics/utils/callbacks/dvc.py
+++ b/ultralytics/utils/callbacks/dvc.py
@@ -25,6 +25,7 @@ except (ImportError, AssertionError, TypeError):
def _log_images(path, prefix=''):
+ """Logs images at specified path with an optional prefix using DVCLive."""
if live:
name = path.name
@@ -38,6 +39,7 @@ def _log_images(path, prefix=''):
def _log_plots(plots, prefix=''):
+ """Logs plot images for training progress if they have not been previously processed."""
for name, params in plots.items():
timestamp = params['timestamp']
if _processed_plots.get(name) != timestamp:
@@ -46,6 +48,7 @@ def _log_plots(plots, prefix=''):
def _log_confusion_matrix(validator):
+ """Logs the confusion matrix for the given validator using DVCLive."""
targets = []
preds = []
matrix = validator.confusion_matrix.matrix
@@ -62,6 +65,7 @@ def _log_confusion_matrix(validator):
def on_pretrain_routine_start(trainer):
+ """Initializes DVCLive logger for training metadata during pre-training routine."""
try:
global live
live = dvclive.Live(save_dvc_exp=True, cache_images=True)
@@ -71,20 +75,24 @@ def on_pretrain_routine_start(trainer):
def on_pretrain_routine_end(trainer):
+ """Logs plots related to the training process at the end of the pretraining routine."""
_log_plots(trainer.plots, 'train')
def on_train_start(trainer):
+ """Logs the training parameters if DVCLive logging is active."""
if live:
live.log_params(trainer.args)
def on_train_epoch_start(trainer):
+ """Sets the global variable _training_epoch value to True at the start of training each epoch."""
global _training_epoch
_training_epoch = True
def on_fit_epoch_end(trainer):
+ """Logs training metrics and model info, and advances to next step on the end of each fit epoch."""
global _training_epoch
if live and _training_epoch:
all_metrics = {**trainer.label_loss_items(trainer.tloss, prefix='train'), **trainer.metrics, **trainer.lr}
@@ -104,6 +112,7 @@ def on_fit_epoch_end(trainer):
def on_train_end(trainer):
+ """Logs the best metrics, plots, and confusion matrix at the end of training if DVCLive is active."""
if live:
# At the end log the best metrics. It runs validator on the best model internally.
all_metrics = {**trainer.label_loss_items(trainer.tloss, prefix='train'), **trainer.metrics, **trainer.lr}
diff --git a/ultralytics/utils/callbacks/neptune.py b/ultralytics/utils/callbacks/neptune.py
index 40916a3c..088e3f8e 100644
--- a/ultralytics/utils/callbacks/neptune.py
+++ b/ultralytics/utils/callbacks/neptune.py
@@ -31,14 +31,13 @@ def _log_images(imgs_dict, group=''):
def _log_plot(title, plot_path):
- """Log plots to the NeptuneAI experiment logger."""
"""
- Log image as plot in the plot section of NeptuneAI
+ Log plots to the NeptuneAI experiment logger.
- arguments:
- title (str) Title of the plot
- plot_path (PosixPath or str) Path to the saved image file
- """
+ Args:
+ title (str): Title of the plot.
+ plot_path (PosixPath | str): Path to the saved image file.
+ """
import matplotlib.image as mpimg
import matplotlib.pyplot as plt
diff --git a/ultralytics/utils/callbacks/wb.py b/ultralytics/utils/callbacks/wb.py
index dd2f5d32..b901e3cc 100644
--- a/ultralytics/utils/callbacks/wb.py
+++ b/ultralytics/utils/callbacks/wb.py
@@ -17,6 +17,7 @@ except (ImportError, AssertionError):
def _log_plots(plots, step):
+ """Logs plots from the input dictionary if they haven't been logged already at the specified step."""
for name, params in plots.items():
timestamp = params['timestamp']
if _processed_plots.get(name) != timestamp:
diff --git a/ultralytics/utils/checks.py b/ultralytics/utils/checks.py
index 956f1b32..184ce06e 100644
--- a/ultralytics/utils/checks.py
+++ b/ultralytics/utils/checks.py
@@ -64,8 +64,8 @@ def parse_requirements(file_path=ROOT.parent / 'requirements.txt', package=''):
def parse_version(version='0.0.0') -> tuple:
"""
- Convert a version string to a tuple of integers, ignoring any extra non-numeric string attached to the version.
- This function replaces deprecated 'pkg_resources.parse_version(v)'
+ Convert a version string to a tuple of integers, ignoring any extra non-numeric string attached to the version. This
+ function replaces deprecated 'pkg_resources.parse_version(v)'.
Args:
version (str): Version string, i.e. '2.0.1+cpu'
@@ -372,8 +372,10 @@ def check_torchvision():
Checks the installed versions of PyTorch and Torchvision to ensure they're compatible.
This function checks the installed versions of PyTorch and Torchvision, and warns if they're incompatible according
- to the provided compatibility table based on https://github.com/pytorch/vision#installation. The
- compatibility table is a dictionary where the keys are PyTorch versions and the values are lists of compatible
+ to the provided compatibility table based on:
+ https://github.com/pytorch/vision#installation.
+
+ The compatibility table is a dictionary where the keys are PyTorch versions and the values are lists of compatible
Torchvision versions.
"""
@@ -527,9 +529,9 @@ def collect_system_info():
def check_amp(model):
"""
- This function checks the PyTorch Automatic Mixed Precision (AMP) functionality of a YOLOv8 model.
- If the checks fail, it means there are anomalies with AMP on the system that may cause NaN losses or zero-mAP
- results, so AMP will be disabled during training.
+ This function checks the PyTorch Automatic Mixed Precision (AMP) functionality of a YOLOv8 model. If the checks
+ fail, it means there are anomalies with AMP on the system that may cause NaN losses or zero-mAP results, so AMP will
+ be disabled during training.
Args:
model (nn.Module): A YOLOv8 model instance.
@@ -606,7 +608,8 @@ def print_args(args: Optional[dict] = None, show_file=True, show_func=False):
def cuda_device_count() -> int:
- """Get the number of NVIDIA GPUs available in the environment.
+ """
+ Get the number of NVIDIA GPUs available in the environment.
Returns:
(int): The number of NVIDIA GPUs available.
@@ -626,7 +629,8 @@ def cuda_device_count() -> int:
def cuda_is_available() -> bool:
- """Check if CUDA is available in the environment.
+ """
+ Check if CUDA is available in the environment.
Returns:
(bool): True if one or more NVIDIA GPUs are available, False otherwise.
diff --git a/ultralytics/utils/dist.py b/ultralytics/utils/dist.py
index 11900985..aaef2b94 100644
--- a/ultralytics/utils/dist.py
+++ b/ultralytics/utils/dist.py
@@ -13,7 +13,8 @@ from .torch_utils import TORCH_1_9
def find_free_network_port() -> int:
- """Finds a free port on localhost.
+ """
+ Finds a free port on localhost.
It is useful in single-node training when we don't want to connect to a real main node but have to set the
`MASTER_PORT` environment variable.
diff --git a/ultralytics/utils/downloads.py b/ultralytics/utils/downloads.py
index 3db1d3ec..10983a6e 100644
--- a/ultralytics/utils/downloads.py
+++ b/ultralytics/utils/downloads.py
@@ -69,8 +69,8 @@ def delete_dsstore(path, files_to_delete=('.DS_Store', '__MACOSX')):
def zip_directory(directory, compress=True, exclude=('.DS_Store', '__MACOSX'), progress=True):
"""
- Zips the contents of a directory, excluding files containing strings in the exclude list.
- The resulting zip file is named after the directory and placed alongside it.
+ Zips the contents of a directory, excluding files containing strings in the exclude list. The resulting zip file is
+ named after the directory and placed alongside it.
Args:
directory (str | Path): The path to the directory to be zipped.
@@ -341,7 +341,11 @@ def get_github_assets(repo='ultralytics/assets', version='latest', retry=False):
def attempt_download_asset(file, repo='ultralytics/assets', release='v0.0.0'):
- """Attempt file download from GitHub release assets if not found locally. release = 'latest', 'v6.2', etc."""
+ """
+ Attempt file download from GitHub release assets if not found locally.
+
+ release = 'latest', 'v6.2', etc.
+ """
from ultralytics.utils import SETTINGS # scoped for circular import
# YOLOv3/5u updates
diff --git a/ultralytics/utils/files.py b/ultralytics/utils/files.py
index 0102c4b6..82e8ebfe 100644
--- a/ultralytics/utils/files.py
+++ b/ultralytics/utils/files.py
@@ -30,9 +30,9 @@ class WorkingDirectory(contextlib.ContextDecorator):
@contextmanager
def spaces_in_path(path):
"""
- Context manager to handle paths with spaces in their names.
- If a path contains spaces, it replaces them with underscores, copies the file/directory to the new path,
- executes the context code block, then copies the file/directory back to its original location.
+ Context manager to handle paths with spaces in their names. If a path contains spaces, it replaces them with
+ underscores, copies the file/directory to the new path, executes the context code block, then copies the
+ file/directory back to its original location.
Args:
path (str | Path): The original path.
diff --git a/ultralytics/utils/instance.py b/ultralytics/utils/instance.py
index 4e2e4380..28f1f654 100644
--- a/ultralytics/utils/instance.py
+++ b/ultralytics/utils/instance.py
@@ -32,9 +32,14 @@ __all__ = 'Bboxes', # tuple or list
class Bboxes:
- """Bounding Boxes class. Only numpy variables are supported."""
+ """
+ Bounding Boxes class.
+
+ Only numpy variables are supported.
+ """
def __init__(self, bboxes, format='xyxy') -> None:
+ """Initializes the Bboxes class with bounding box data in a specified format."""
assert format in _formats, f'Invalid bounding box format: {format}, format must be one of {_formats}'
bboxes = bboxes[None, :] if bboxes.ndim == 1 else bboxes
assert bboxes.ndim == 2
@@ -194,7 +199,7 @@ class Instances:
return self._bboxes.areas()
def scale(self, scale_w, scale_h, bbox_only=False):
- """this might be similar with denormalize func but without normalized sign."""
+ """This might be similar with denormalize func but without normalized sign."""
self._bboxes.mul(scale=(scale_w, scale_h, scale_w, scale_h))
if bbox_only:
return
@@ -307,7 +312,11 @@ class Instances:
self.keypoints[..., 1] = self.keypoints[..., 1].clip(0, h)
def remove_zero_area_boxes(self):
- """Remove zero-area boxes, i.e. after clipping some boxes may have zero width or height. This removes them."""
+ """
+ Remove zero-area boxes, i.e. after clipping some boxes may have zero width or height.
+
+ This removes them.
+ """
good = self.bbox_areas > 0
if not all(good):
self._bboxes = self._bboxes[good]
diff --git a/ultralytics/utils/loss.py b/ultralytics/utils/loss.py
index dacf326f..62186678 100644
--- a/ultralytics/utils/loss.py
+++ b/ultralytics/utils/loss.py
@@ -13,7 +13,11 @@ from .tal import bbox2dist
class VarifocalLoss(nn.Module):
- """Varifocal loss by Zhang et al. https://arxiv.org/abs/2008.13367."""
+ """
+ Varifocal loss by Zhang et al.
+
+ https://arxiv.org/abs/2008.13367.
+ """
def __init__(self):
"""Initialize the VarifocalLoss class."""
@@ -33,6 +37,7 @@ class FocalLoss(nn.Module):
"""Wraps focal loss around existing loss_fcn(), i.e. criteria = FocalLoss(nn.BCEWithLogitsLoss(), gamma=1.5)."""
def __init__(self, ):
+ """Initializer for FocalLoss class with no parameters."""
super().__init__()
@staticmethod
@@ -93,6 +98,7 @@ class KeypointLoss(nn.Module):
"""Criterion class for computing training losses."""
def __init__(self, sigmas) -> None:
+ """Initialize the KeypointLoss class."""
super().__init__()
self.sigmas = sigmas
diff --git a/ultralytics/utils/metrics.py b/ultralytics/utils/metrics.py
index 574d4039..36957e9f 100644
--- a/ultralytics/utils/metrics.py
+++ b/ultralytics/utils/metrics.py
@@ -1,7 +1,5 @@
# Ultralytics YOLO 🚀, AGPL-3.0 license
-"""
-Model validation metrics
-"""
+"""Model validation metrics."""
import math
import warnings
@@ -195,7 +193,7 @@ class ConfusionMatrix:
def process_cls_preds(self, preds, targets):
"""
- Update confusion matrix for classification task
+ Update confusion matrix for classification task.
Args:
preds (Array[N, min(nc,5)]): Predicted class labels.
@@ -308,9 +306,7 @@ class ConfusionMatrix:
on_plot(plot_fname)
def print(self):
- """
- Print the confusion matrix to the console.
- """
+ """Print the confusion matrix to the console."""
for i in range(self.nc + 1):
LOGGER.info(' '.join(map(str, self.matrix[i])))
@@ -440,7 +436,6 @@ def ap_per_class(tp,
f1 (np.ndarray): F1-score values at each confidence threshold.
ap (np.ndarray): Average precision for each class at different IoU thresholds.
unique_classes (np.ndarray): An array of unique classes that have data.
-
"""
# Sort by objectness
@@ -498,32 +493,33 @@ def ap_per_class(tp,
class Metric(SimpleClass):
"""
- Class for computing evaluation metrics for YOLOv8 model.
+ Class for computing evaluation metrics for YOLOv8 model.
- Attributes:
- p (list): Precision for each class. Shape: (nc,).
- r (list): Recall for each class. Shape: (nc,).
- f1 (list): F1 score for each class. Shape: (nc,).
- all_ap (list): AP scores for all classes and all IoU thresholds. Shape: (nc, 10).
- ap_class_index (list): Index of class for each AP score. Shape: (nc,).
- nc (int): Number of classes.
+ Attributes:
+ p (list): Precision for each class. Shape: (nc,).
+ r (list): Recall for each class. Shape: (nc,).
+ f1 (list): F1 score for each class. Shape: (nc,).
+ all_ap (list): AP scores for all classes and all IoU thresholds. Shape: (nc, 10).
+ ap_class_index (list): Index of class for each AP score. Shape: (nc,).
+ nc (int): Number of classes.
- Methods:
- ap50(): AP at IoU threshold of 0.5 for all classes. Returns: List of AP scores. Shape: (nc,) or [].
- ap(): AP at IoU thresholds from 0.5 to 0.95 for all classes. Returns: List of AP scores. Shape: (nc,) or [].
- mp(): Mean precision of all classes. Returns: Float.
- mr(): Mean recall of all classes. Returns: Float.
- map50(): Mean AP at IoU threshold of 0.5 for all classes. Returns: Float.
- map75(): Mean AP at IoU threshold of 0.75 for all classes. Returns: Float.
- map(): Mean AP at IoU thresholds from 0.5 to 0.95 for all classes. Returns: Float.
- mean_results(): Mean of results, returns mp, mr, map50, map.
- class_result(i): Class-aware result, returns p[i], r[i], ap50[i], ap[i].
- maps(): mAP of each class. Returns: Array of mAP scores, shape: (nc,).
- fitness(): Model fitness as a weighted combination of metrics. Returns: Float.
- update(results): Update metric attributes with new evaluation results.
- """
+ Methods:
+ ap50(): AP at IoU threshold of 0.5 for all classes. Returns: List of AP scores. Shape: (nc,) or [].
+ ap(): AP at IoU thresholds from 0.5 to 0.95 for all classes. Returns: List of AP scores. Shape: (nc,) or [].
+ mp(): Mean precision of all classes. Returns: Float.
+ mr(): Mean recall of all classes. Returns: Float.
+ map50(): Mean AP at IoU threshold of 0.5 for all classes. Returns: Float.
+ map75(): Mean AP at IoU threshold of 0.75 for all classes. Returns: Float.
+ map(): Mean AP at IoU thresholds from 0.5 to 0.95 for all classes. Returns: Float.
+ mean_results(): Mean of results, returns mp, mr, map50, map.
+ class_result(i): Class-aware result, returns p[i], r[i], ap50[i], ap[i].
+ maps(): mAP of each class. Returns: Array of mAP scores, shape: (nc,).
+ fitness(): Model fitness as a weighted combination of metrics. Returns: Float.
+ update(results): Update metric attributes with new evaluation results.
+ """
def __init__(self) -> None:
+ """Initializes a Metric instance for computing evaluation metrics for the YOLOv8 model."""
self.p = [] # (nc, )
self.r = [] # (nc, )
self.f1 = [] # (nc, )
@@ -606,12 +602,12 @@ class Metric(SimpleClass):
return [self.mp, self.mr, self.map50, self.map]
def class_result(self, i):
- """class-aware result, return p[i], r[i], ap50[i], ap[i]."""
+ """Class-aware result, return p[i], r[i], ap50[i], ap[i]."""
return self.p[i], self.r[i], self.ap50[i], self.ap[i]
@property
def maps(self):
- """mAP of each class."""
+ """MAP of each class."""
maps = np.zeros(self.nc) + self.map
for i, c in enumerate(self.ap_class_index):
maps[c] = self.ap[i]
@@ -672,6 +668,7 @@ class DetMetrics(SimpleClass):
"""
def __init__(self, save_dir=Path('.'), plot=False, on_plot=None, names=()) -> None:
+ """Initialize a DetMetrics instance with a save directory, plot flag, callback function, and class names."""
self.save_dir = save_dir
self.plot = plot
self.on_plot = on_plot
@@ -756,6 +753,7 @@ class SegmentMetrics(SimpleClass):
"""
def __init__(self, save_dir=Path('.'), plot=False, on_plot=None, names=()) -> None:
+ """Initialize a SegmentMetrics instance with a save directory, plot flag, callback function, and class names."""
self.save_dir = save_dir
self.plot = plot
self.on_plot = on_plot
@@ -865,6 +863,7 @@ class PoseMetrics(SegmentMetrics):
"""
def __init__(self, save_dir=Path('.'), plot=False, on_plot=None, names=()) -> None:
+ """Initialize the PoseMetrics class with directory path, class names, and plotting options."""
super().__init__(save_dir, plot, names)
self.save_dir = save_dir
self.plot = plot
@@ -954,6 +953,7 @@ class ClassifyMetrics(SimpleClass):
"""
def __init__(self) -> None:
+ """Initialize a ClassifyMetrics instance."""
self.top1 = 0
self.top5 = 0
self.speed = {'preprocess': 0.0, 'inference': 0.0, 'loss': 0.0, 'postprocess': 0.0}
diff --git a/ultralytics/utils/ops.py b/ultralytics/utils/ops.py
index 9089d0fa..5d37c591 100644
--- a/ultralytics/utils/ops.py
+++ b/ultralytics/utils/ops.py
@@ -50,6 +50,7 @@ class Profile(contextlib.ContextDecorator):
self.t += self.dt # accumulate dt
def __str__(self):
+ """Returns a human-readable string representing the accumulated elapsed time in the profiler."""
return f'Elapsed time is {self.t} s'
def time(self):
@@ -303,7 +304,7 @@ def clip_coords(coords, shape):
def scale_image(masks, im0_shape, ratio_pad=None):
"""
- Takes a mask, and resizes it to the original image size
+ Takes a mask, and resizes it to the original image size.
Args:
masks (np.ndarray): resized and padded masks/images, [h, w, num]/[h, w, 3].
@@ -403,8 +404,8 @@ def xywhn2xyxy(x, w=640, h=640, padw=0, padh=0):
def xyxy2xywhn(x, w=640, h=640, clip=False, eps=0.0):
"""
- Convert bounding box coordinates from (x1, y1, x2, y2) format to (x, y, width, height, normalized) format.
- x, y, width and height are normalized to image dimensions
+ Convert bounding box coordinates from (x1, y1, x2, y2) format to (x, y, width, height, normalized) format. x, y,
+ width and height are normalized to image dimensions.
Args:
x (np.ndarray | torch.Tensor): The input bounding box coordinates in (x1, y1, x2, y2) format.
@@ -445,7 +446,7 @@ def xywh2ltwh(x):
def xyxy2ltwh(x):
"""
- Convert nx4 bounding boxes from [x1, y1, x2, y2] to [x1, y1, w, h], where xy1=top-left, xy2=bottom-right
+ Convert nx4 bounding boxes from [x1, y1, x2, y2] to [x1, y1, w, h], where xy1=top-left, xy2=bottom-right.
Args:
x (np.ndarray | torch.Tensor): The input tensor with the bounding boxes coordinates in the xyxy format
@@ -461,7 +462,7 @@ def xyxy2ltwh(x):
def ltwh2xywh(x):
"""
- Convert nx4 boxes from [x1, y1, w, h] to [x, y, w, h] where xy1=top-left, xy=center
+ Convert nx4 boxes from [x1, y1, w, h] to [x, y, w, h] where xy1=top-left, xy=center.
Args:
x (torch.Tensor): the input tensor
@@ -544,7 +545,7 @@ def xywhr2xyxyxyxy(center):
def ltwh2xyxy(x):
"""
- It converts the bounding box from [x1, y1, w, h] to [x1, y1, x2, y2] where xy1=top-left, xy2=bottom-right
+ It converts the bounding box from [x1, y1, w, h] to [x1, y1, x2, y2] where xy1=top-left, xy2=bottom-right.
Args:
x (np.ndarray | torch.Tensor): the input image
@@ -616,8 +617,8 @@ def crop_mask(masks, boxes):
def process_mask_upsample(protos, masks_in, bboxes, shape):
"""
- Takes the output of the mask head, and applies the mask to the bounding boxes. This produces masks of higher
- quality but is slower.
+ Takes the output of the mask head, and applies the mask to the bounding boxes. This produces masks of higher quality
+ but is slower.
Args:
protos (torch.Tensor): [mask_dim, mask_h, mask_w]
@@ -713,7 +714,7 @@ def scale_masks(masks, shape, padding=True):
def scale_coords(img1_shape, coords, img0_shape, ratio_pad=None, normalize=False, padding=True):
"""
- Rescale segment coordinates (xy) from img1_shape to img0_shape
+ Rescale segment coordinates (xy) from img1_shape to img0_shape.
Args:
img1_shape (tuple): The shape of the image that the coords are from.
diff --git a/ultralytics/utils/patches.py b/ultralytics/utils/patches.py
index a1457639..541cf45a 100644
--- a/ultralytics/utils/patches.py
+++ b/ultralytics/utils/patches.py
@@ -1,7 +1,5 @@
# Ultralytics YOLO 🚀, AGPL-3.0 license
-"""
-Monkey patches to update/extend functionality of existing functions
-"""
+"""Monkey patches to update/extend functionality of existing functions."""
from pathlib import Path
@@ -14,7 +12,8 @@ _imshow = cv2.imshow # copy to avoid recursion errors
def imread(filename: str, flags: int = cv2.IMREAD_COLOR):
- """Read an image from a file.
+ """
+ Read an image from a file.
Args:
filename (str): Path to the file to read.
@@ -27,7 +26,8 @@ def imread(filename: str, flags: int = cv2.IMREAD_COLOR):
def imwrite(filename: str, img: np.ndarray, params=None):
- """Write an image to a file.
+ """
+ Write an image to a file.
Args:
filename (str): Path to the file to write.
@@ -45,7 +45,8 @@ def imwrite(filename: str, img: np.ndarray, params=None):
def imshow(winname: str, mat: np.ndarray):
- """Displays an image in the specified window.
+ """
+ Displays an image in the specified window.
Args:
winname (str): Name of the window.
@@ -59,7 +60,8 @@ _torch_save = torch.save # copy to avoid recursion errors
def torch_save(*args, **kwargs):
- """Use dill (if exists) to serialize the lambda functions where pickle does not do this.
+ """
+ Use dill (if exists) to serialize the lambda functions where pickle does not do this.
Args:
*args (tuple): Positional arguments to pass to torch.save.
diff --git a/ultralytics/utils/plotting.py b/ultralytics/utils/plotting.py
index bfd2aaa1..88fb73c2 100644
--- a/ultralytics/utils/plotting.py
+++ b/ultralytics/utils/plotting.py
@@ -316,7 +316,8 @@ def plot_labels(boxes, cls, names=(), save_dir=Path(''), on_plot=None):
def save_one_box(xyxy, im, file=Path('im.jpg'), gain=1.02, pad=10, square=False, BGR=False, save=True):
- """Save image crop as {file} with crop size multiple {gain} and {pad} pixels. Save and/or return crop.
+ """
+ Save image crop as {file} with crop size multiple {gain} and {pad} pixels. Save and/or return crop.
This function takes a bounding box and an image, and then saves a cropped portion of the image according
to the bounding box. Optionally, the crop can be squared, and the function allows for gain and padding
diff --git a/ultralytics/utils/torch_utils.py b/ultralytics/utils/torch_utils.py
index 0ea74088..ea6d7a6b 100644
--- a/ultralytics/utils/torch_utils.py
+++ b/ultralytics/utils/torch_utils.py
@@ -205,7 +205,11 @@ def fuse_deconv_and_bn(deconv, bn):
def model_info(model, detailed=False, verbose=True, imgsz=640):
- """Model information. imgsz may be int or list, i.e. imgsz=640 or imgsz=[640, 320]."""
+ """
+ Model information.
+
+ imgsz may be int or list, i.e. imgsz=640 or imgsz=[640, 320].
+ """
if not verbose:
return
n_p = get_num_params(model) # number of parameters
@@ -517,13 +521,11 @@ def profile(input, ops, n=10, device=None):
class EarlyStopping:
- """
- Early stopping class that stops training when a specified number of epochs have passed without improvement.
- """
+ """Early stopping class that stops training when a specified number of epochs have passed without improvement."""
def __init__(self, patience=50):
"""
- Initialize early stopping object
+ Initialize early stopping object.
Args:
patience (int, optional): Number of epochs to wait after fitness stops improving before stopping.
@@ -535,7 +537,7 @@ class EarlyStopping:
def __call__(self, epoch, fitness):
"""
- Check whether to stop training
+ Check whether to stop training.
Args:
epoch (int): Current epoch of training
diff --git a/ultralytics/utils/triton.py b/ultralytics/utils/triton.py
index c48e418a..45bb6e5b 100644
--- a/ultralytics/utils/triton.py
+++ b/ultralytics/utils/triton.py
@@ -7,7 +7,8 @@ import numpy as np
class TritonRemoteModel:
- """Client for interacting with a remote Triton Inference Server model.
+ """
+ Client for interacting with a remote Triton Inference Server model.
Attributes:
endpoint (str): The name of the model on the Triton server.