diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index e2b477da..1dc23a87 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -62,6 +62,11 @@ repos:
         args:
           - --ignore-words-list=crate,nd,strack,dota,ane,segway,fo
 
+  - repo: https://github.com/PyCQA/docformatter
+    rev: v1.7.5
+    hooks:
+      - id: docformatter
+
 #  - repo: https://github.com/asottile/yesqa
 #    rev: v1.4.0
 #    hooks:
diff --git a/docs/build_reference.py b/docs/build_reference.py
index 3641b132..d8809234 100644
--- a/docs/build_reference.py
+++ b/docs/build_reference.py
@@ -18,7 +18,15 @@ CODE_DIR = ROOT
 REFERENCE_DIR = ROOT.parent / 'docs/reference'
 
 
-def extract_classes_and_functions(filepath):
+def extract_classes_and_functions(filepath: Path):
+    """Extracts class and function names from a given Python file.
+
+    Args:
+        filepath (Path): The path to the Python file.
+
+    Returns:
+        (tuple): A tuple containing lists of class and function names.
+    """
     with open(filepath, 'r') as file:
         content = file.read()
 
@@ -31,7 +39,15 @@ def extract_classes_and_functions(filepath):
     return classes, functions
 
 
-def create_markdown(py_filepath, module_path, classes, functions):
+def create_markdown(py_filepath: Path, module_path: str, classes: list, functions: list):
+    """Creates a Markdown file containing the API reference for the given Python module.
+
+    Args:
+        py_filepath (Path): The path to the Python file.
+        module_path (str): The import path for the Python module.
+        classes (list): A list of class names within the module.
+        functions (list): A list of function names within the module.
+    """
     md_filepath = py_filepath.with_suffix('.md')
 
     # Read existing content and keep header content between first two ---
@@ -64,17 +80,35 @@ def create_markdown(py_filepath, module_path, classes, functions):
 
 
 def nested_dict():
+    """Creates and returns a nested defaultdict.
+
+    Returns:
+        (defaultdict): A nested defaultdict object.
+    """
     return defaultdict(nested_dict)
 
 
-def sort_nested_dict(d):
+def sort_nested_dict(d: dict):
+    """Sorts a nested dictionary recursively.
+
+    Args:
+        d (dict): The dictionary to sort.
+
+    Returns:
+        (dict): The sorted dictionary.
+    """
     return {
         key: sort_nested_dict(value) if isinstance(value, dict) else value
         for key, value in sorted(d.items())
     }
 
 
-def create_nav_menu_yaml(nav_items):
+def create_nav_menu_yaml(nav_items: list):
+    """Creates a YAML file for the navigation menu based on the provided list of items.
+
+    Args:
+        nav_items (list): A list of relative file paths to Markdown files for the navigation menu.
+    """
     nav_tree = nested_dict()
 
     for item_str in nav_items:
@@ -90,6 +124,7 @@ def create_nav_menu_yaml(nav_items):
     nav_tree_sorted = sort_nested_dict(nav_tree)
 
     def _dict_to_yaml(d, level=0):
+        """Converts a nested dictionary to a YAML-formatted string with indentation."""
         yaml_str = ''
         indent = '  ' * level
         for k, v in d.items():
@@ -105,6 +140,7 @@ def create_nav_menu_yaml(nav_items):
 
 
 def main():
+    """Main function to extract class and function names, create Markdown files, and generate a YAML navigation menu."""
     nav_items = []
     for root, _, files in os.walk(CODE_DIR):
         for file in files:
diff --git a/docs/reference/models/utils/ops.md b/docs/reference/models/utils/ops.md
index 9538ee48..d1f93358 100644
--- a/docs/reference/models/utils/ops.md
+++ b/docs/reference/models/utils/ops.md
@@ -16,7 +16,3 @@ keywords: Ultralytics, YOLO, HungarianMatcher, inverse_sigmoid, detection models
 ---
 ## ::: ultralytics.models.utils.ops.get_cdn_group
 <br><br>
-
----
-## ::: ultralytics.models.utils.ops.inverse_sigmoid
-<br><br>
diff --git a/examples/YOLOv8-ONNXRuntime/main.py b/examples/YOLOv8-ONNXRuntime/main.py
index 8d03182b..ec768713 100644
--- a/examples/YOLOv8-ONNXRuntime/main.py
+++ b/examples/YOLOv8-ONNXRuntime/main.py
@@ -9,11 +9,12 @@ from ultralytics.utils import ASSETS, yaml_load
 from ultralytics.utils.checks import check_requirements, check_yaml
 
 
-class Yolov8:
+class YOLOv8:
+    """YOLOv8 object detection model class for handling inference and visualization."""
 
     def __init__(self, onnx_model, input_image, confidence_thres, iou_thres):
         """
-        Initializes an instance of the Yolov8 class.
+        Initializes an instance of the YOLOv8 class.
 
         Args:
             onnx_model: Path to the ONNX model.
@@ -213,8 +214,8 @@ if __name__ == '__main__':
     # Check the requirements and select the appropriate backend (CPU or GPU)
     check_requirements('onnxruntime-gpu' if torch.cuda.is_available() else 'onnxruntime')
 
-    # Create an instance of the Yolov8 class with the specified arguments
-    detection = Yolov8(args.model, args.img, args.conf_thres, args.iou_thres)
+    # Create an instance of the YOLOv8 class with the specified arguments
+    detection = YOLOv8(args.model, args.img, args.conf_thres, args.iou_thres)
 
     # Perform object detection and obtain the output image
     output_image = detection.main()
diff --git a/examples/YOLOv8-OpenCV-ONNX-Python/main.py b/examples/YOLOv8-OpenCV-ONNX-Python/main.py
index 76802f0a..78b0b08e 100644
--- a/examples/YOLOv8-OpenCV-ONNX-Python/main.py
+++ b/examples/YOLOv8-OpenCV-ONNX-Python/main.py
@@ -7,11 +7,22 @@ from ultralytics.utils import ASSETS, yaml_load
 from ultralytics.utils.checks import check_yaml
 
 CLASSES = yaml_load(check_yaml('coco128.yaml'))['names']
-
 colors = np.random.uniform(0, 255, size=(len(CLASSES), 3))
 
 
 def draw_bounding_box(img, class_id, confidence, x, y, x_plus_w, y_plus_h):
+    """
+    Draws bounding boxes on the input image based on the provided arguments.
+
+    Args:
+        img (numpy.ndarray): The input image to draw the bounding box on.
+        class_id (int): Class ID of the detected object.
+        confidence (float): Confidence score of the detected object.
+        x (int): X-coordinate of the top-left corner of the bounding box.
+        y (int): Y-coordinate of the top-left corner of the bounding box.
+        x_plus_w (int): X-coordinate of the bottom-right corner of the bounding box.
+        y_plus_h (int): Y-coordinate of the bottom-right corner of the bounding box.
+    """
     label = f'{CLASSES[class_id]} ({confidence:.2f})'
     color = colors[class_id]
     cv2.rectangle(img, (x, y), (x_plus_w, y_plus_h), color, 2)
@@ -19,18 +30,39 @@ def draw_bounding_box(img, class_id, confidence, x, y, x_plus_w, y_plus_h):
 
 
 def main(onnx_model, input_image):
+    """
+    Main function to load ONNX model, perform inference, draw bounding boxes, and display the output image.
+
+    Args:
+        onnx_model (str): Path to the ONNX model.
+        input_image (str): Path to the input image.
+
+    Returns:
+        list: List of dictionaries containing detection information such as class_id, class_name, confidence, etc.
+    """
+    # Load the ONNX model
     model: cv2.dnn.Net = cv2.dnn.readNetFromONNX(onnx_model)
+
+    # Read the input image
     original_image: np.ndarray = cv2.imread(input_image)
     [height, width, _] = original_image.shape
+
+    # Prepare a square image for inference
     length = max((height, width))
     image = np.zeros((length, length, 3), np.uint8)
     image[0:height, 0:width] = original_image
+
+    # Calculate scale factor
     scale = length / 640
 
+    # Preprocess the image and prepare blob for model
     blob = cv2.dnn.blobFromImage(image, scalefactor=1 / 255, size=(640, 640), swapRB=True)
     model.setInput(blob)
+
+    # Perform inference
     outputs = model.forward()
 
+    # Prepare output array
     outputs = np.array([cv2.transpose(outputs[0])])
     rows = outputs.shape[1]
 
@@ -38,6 +70,7 @@ def main(onnx_model, input_image):
     scores = []
     class_ids = []
 
+    # Iterate through output to collect bounding boxes, confidence scores, and class IDs
     for i in range(rows):
         classes_scores = outputs[0][i][4:]
         (minScore, maxScore, minClassLoc, (x, maxClassIndex)) = cv2.minMaxLoc(classes_scores)
@@ -49,9 +82,12 @@ def main(onnx_model, input_image):
             scores.append(maxScore)
             class_ids.append(maxClassIndex)
 
+    # Apply NMS (Non-maximum suppression)
     result_boxes = cv2.dnn.NMSBoxes(boxes, scores, 0.25, 0.45, 0.5)
 
     detections = []
+
+    # Iterate through NMS results to draw bounding boxes and labels
     for i in range(len(result_boxes)):
         index = result_boxes[i]
         box = boxes[index]
@@ -65,6 +101,7 @@ def main(onnx_model, input_image):
         draw_bounding_box(original_image, class_ids[index], scores[index], round(box[0] * scale), round(box[1] * scale),
                           round((box[0] + box[2]) * scale), round((box[1] + box[3]) * scale))
 
+    # Display the image with bounding boxes
     cv2.imshow('image', original_image)
     cv2.waitKey(0)
     cv2.destroyAllWindows()
@@ -74,7 +111,7 @@ def main(onnx_model, input_image):
 
 if __name__ == '__main__':
     parser = argparse.ArgumentParser()
-    parser.add_argument('--model', default='yolov8n.onnx', help='Input your onnx model.')
+    parser.add_argument('--model', default='yolov8n.onnx', help='Input your ONNX model.')
     parser.add_argument('--img', default=str(ASSETS / 'bus.jpg'), help='Path to input image.')
     args = parser.parse_args()
     main(args.model, args.img)
diff --git a/examples/YOLOv8-Region-Counter/yolov8_region_counter.py b/examples/YOLOv8-Region-Counter/yolov8_region_counter.py
index bdc40132..dd0e476f 100644
--- a/examples/YOLOv8-Region-Counter/yolov8_region_counter.py
+++ b/examples/YOLOv8-Region-Counter/yolov8_region_counter.py
@@ -33,10 +33,6 @@ counting_regions = [
     }, ]
 
 
-def is_inside_polygon(point, polygon):
-    return polygon.contains(Point(point))
-
-
 def mouse_callback(event, x, y, flags, param):
     """Mouse call back event."""
     global current_region
@@ -44,7 +40,7 @@ def mouse_callback(event, x, y, flags, param):
     # Mouse left button down event
     if event == cv2.EVENT_LBUTTONDOWN:
         for region in counting_regions:
-            if is_inside_polygon((x, y), region['polygon']):
+            if region['polygon'].contains(Point((x, y))):
                 current_region = region
                 current_region['dragging'] = True
                 current_region['offset_x'] = x
@@ -150,7 +146,7 @@ def run(
 
             # Check if detection inside region
             for region in counting_regions:
-                if is_inside_polygon((x, y), region['polygon']):
+                if region['polygon'].contains(Point((x, y))):
                     region['counts'] += 1
 
         # Draw regions (Polygons/Rectangles)
diff --git a/setup.cfg b/setup.cfg
index a7d16ab9..ff364449 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -60,3 +60,12 @@ SPACE_BETWEEN_ENDING_COMMA_AND_CLOSING_BRACKET = True
 SPLIT_BEFORE_CLOSING_BRACKET = False
 SPLIT_BEFORE_FIRST_ARGUMENT = False
 # EACH_DICT_ENTRY_ON_SEPARATE_LINE = False
+
+[docformatter]
+wrap-summaries = 120
+wrap-descriptions = 120
+in-place = true
+make-summary-multi-line = false
+pre-summary-newline = true
+force-wrap = false
+close-quotes-on-newline = true
diff --git a/setup.py b/setup.py
index 1132ffa1..8fb107c8 100644
--- a/setup.py
+++ b/setup.py
@@ -12,6 +12,12 @@ README = (PARENT / 'README.md').read_text(encoding='utf-8')
 
 
 def get_version():
+    """
+    Retrieve the version number from the 'ultralytics/__init__.py' file.
+
+    Returns:
+        (str): The version number extracted from the '__version__' attribute in the 'ultralytics/__init__.py' file.
+    """
     file = PARENT / 'ultralytics/__init__.py'
     return re.search(r'^__version__ = [\'"]([^\'"]*)[\'"]', file.read_text(encoding='utf-8'), re.M)[1]
 
@@ -24,7 +30,7 @@ def parse_requirements(file_path: Path):
         file_path (str | Path): Path to the requirements.txt file.
 
     Returns:
-        List[str]: List of parsed requirements.
+        (List[str]): List of parsed requirements.
     """
 
     requirements = []
diff --git a/tests/conftest.py b/tests/conftest.py
index ac909310..59955bd1 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -9,7 +9,8 @@ TMP = Path(__file__).resolve().parent / 'tmp'  # temp directory for test files
 
 
 def pytest_addoption(parser):
-    """Add custom command-line options to pytest.
+    """
+    Add custom command-line options to pytest.
 
     Args:
         parser (pytest.config.Parser): The pytest parser object.
@@ -18,7 +19,8 @@ def pytest_addoption(parser):
 
 
 def pytest_configure(config):
-    """Register custom markers to avoid pytest warnings.
+    """
+    Register custom markers to avoid pytest warnings.
 
     Args:
         config (pytest.config.Config): The pytest config object.
@@ -27,7 +29,8 @@ def pytest_configure(config):
 
 
 def pytest_runtest_setup(item):
-    """Setup hook to skip tests marked as slow if the --slow option is not provided.
+    """
+    Setup hook to skip tests marked as slow if the --slow option is not provided.
 
     Args:
         item (pytest.Item): The test item object.
diff --git a/tests/test_cli.py b/tests/test_cli.py
index a935aa0e..788651b4 100644
--- a/tests/test_cli.py
+++ b/tests/test_cli.py
@@ -22,11 +22,12 @@ EXPORT_ARGS = [
 
 
 def run(cmd):
-    # Run a subprocess command with check=True
+    """Execute a shell command using subprocess."""
     subprocess.run(cmd.split(), check=True)
 
 
 def test_special_modes():
+    """Test various special command modes of YOLO."""
     run('yolo help')
     run('yolo checks')
     run('yolo version')
@@ -36,31 +37,37 @@ def test_special_modes():
 
 @pytest.mark.parametrize('task,model,data', TASK_ARGS)
 def test_train(task, model, data):
+    """Test YOLO training for a given task, model, and data."""
     run(f'yolo train {task} model={model}.yaml data={data} imgsz=32 epochs=1 cache=disk')
 
 
 @pytest.mark.parametrize('task,model,data', TASK_ARGS)
 def test_val(task, model, data):
+    """Test YOLO validation for a given task, model, and data."""
     run(f'yolo val {task} model={WEIGHTS_DIR / model}.pt data={data} imgsz=32 save_txt save_json')
 
 
 @pytest.mark.parametrize('task,model,data', TASK_ARGS)
 def test_predict(task, model, data):
+    """Test YOLO prediction on sample assets for a given task and model."""
     run(f'yolo predict model={WEIGHTS_DIR / model}.pt source={ASSETS} imgsz=32 save save_crop save_txt')
 
 
 @pytest.mark.parametrize('model,format', EXPORT_ARGS)
 def test_export(model, format):
+    """Test exporting a YOLO model to different formats."""
     run(f'yolo export model={WEIGHTS_DIR / model}.pt format={format} imgsz=32')
 
 
 def test_rtdetr(task='detect', model='yolov8n-rtdetr.yaml', data='coco8.yaml'):
+    """Test the RTDETR functionality with the Ultralytics framework."""
     # Warning: MUST use imgsz=640
     run(f'yolo train {task} model={model} data={data} --imgsz= 640 epochs =1, cache = disk')  # add coma, spaces to args
     run(f"yolo predict {task} model={model} source={ASSETS / 'bus.jpg'} imgsz=640 save save_crop save_txt")
 
 
 def test_fastsam(task='segment', model=WEIGHTS_DIR / 'FastSAM-s.pt', data='coco8-seg.yaml'):
+    """Test FastSAM segmentation functionality within Ultralytics."""
     source = ASSETS / 'bus.jpg'
 
     run(f'yolo segment val {task} model={model} data={data} imgsz=32')
@@ -97,6 +104,7 @@ def test_fastsam(task='segment', model=WEIGHTS_DIR / 'FastSAM-s.pt', data='coco8
 
 
 def test_mobilesam():
+    """Test MobileSAM segmentation functionality using Ultralytics."""
     from ultralytics import SAM
 
     # Load the model
@@ -121,5 +129,6 @@ def test_mobilesam():
 @pytest.mark.skipif(not CUDA_IS_AVAILABLE, reason='CUDA is not available')
 @pytest.mark.skipif(CUDA_DEVICE_COUNT < 2, reason='DDP is not available')
 def test_train_gpu(task, model, data):
+    """Test YOLO training on GPU(s) for various tasks and models."""
     run(f'yolo train {task} model={model}.yaml data={data} imgsz=32 epochs=1 device=0')  # single GPU
     run(f'yolo train {task} model={model}.pt data={data} imgsz=32 epochs=1 device=0,1')  # multi GPU
diff --git a/tests/test_cuda.py b/tests/test_cuda.py
index 92ecbe3d..15c2259b 100644
--- a/tests/test_cuda.py
+++ b/tests/test_cuda.py
@@ -1,4 +1,5 @@
 # Ultralytics YOLO 🚀, AGPL-3.0 license
+
 import contextlib
 
 import pytest
@@ -17,18 +18,21 @@ BUS = ASSETS / 'bus.jpg'
 
 
 def test_checks():
+    """Validate CUDA settings against torch CUDA functions."""
     assert torch.cuda.is_available() == CUDA_IS_AVAILABLE
     assert torch.cuda.device_count() == CUDA_DEVICE_COUNT
 
 
 @pytest.mark.skipif(not CUDA_IS_AVAILABLE, reason='CUDA is not available')
 def test_train():
+    """Test model training on a minimal dataset."""
     device = 0 if CUDA_DEVICE_COUNT == 1 else [0, 1]
     YOLO(MODEL).train(data=DATA, imgsz=64, epochs=1, device=device)  # requires imgsz>=64
 
 
 @pytest.mark.skipif(not CUDA_IS_AVAILABLE, reason='CUDA is not available')
 def test_predict_multiple_devices():
+    """Validate model prediction on multiple devices."""
     model = YOLO('yolov8n.pt')
     model = model.cpu()
     assert str(model.device) == 'cpu'
@@ -53,6 +57,7 @@ def test_predict_multiple_devices():
 
 @pytest.mark.skipif(not CUDA_IS_AVAILABLE, reason='CUDA is not available')
 def test_autobatch():
+    """Check batch size for YOLO model using autobatch."""
     from ultralytics.utils.autobatch import check_train_batch_size
 
     check_train_batch_size(YOLO(MODEL).model.cuda(), imgsz=128, amp=True)
@@ -60,6 +65,7 @@ def test_autobatch():
 
 @pytest.mark.skipif(not CUDA_IS_AVAILABLE, reason='CUDA is not available')
 def test_utils_benchmarks():
+    """Profile YOLO models for performance benchmarks."""
     from ultralytics.utils.benchmarks import ProfileModels
 
     # Pre-export a dynamic engine model to use dynamic inference
@@ -69,6 +75,7 @@ def test_utils_benchmarks():
 
 @pytest.mark.skipif(not CUDA_IS_AVAILABLE, reason='CUDA is not available')
 def test_predict_sam():
+    """Test SAM model prediction with various prompts."""
     from ultralytics import SAM
     from ultralytics.models.sam import Predictor as SAMPredictor
 
@@ -102,6 +109,7 @@ def test_predict_sam():
 
 @pytest.mark.skipif(not CUDA_IS_AVAILABLE, reason='CUDA is not available')
 def test_model_ray_tune():
+    """Tune YOLO model with Ray optimization library."""
     with contextlib.suppress(RuntimeError):  # RuntimeError may be caused by out-of-memory
         YOLO('yolov8n-cls.yaml').tune(use_ray=True,
                                       data='imagenet10',
@@ -115,12 +123,14 @@ def test_model_ray_tune():
 
 @pytest.mark.skipif(not CUDA_IS_AVAILABLE, reason='CUDA is not available')
 def test_model_tune():
+    """Tune YOLO model for performance."""
     YOLO('yolov8n-pose.pt').tune(data='coco8-pose.yaml', plots=False, imgsz=32, epochs=1, iterations=2, device='cpu')
     YOLO('yolov8n-cls.pt').tune(data='imagenet10', plots=False, imgsz=32, epochs=1, iterations=2, device='cpu')
 
 
 @pytest.mark.skipif(not CUDA_IS_AVAILABLE, reason='CUDA is not available')
 def test_pycocotools():
+    """Validate model predictions using pycocotools."""
     from ultralytics.models.yolo.detect import DetectionValidator
     from ultralytics.models.yolo.pose import PoseValidator
     from ultralytics.models.yolo.segment import SegmentationValidator
diff --git a/tests/test_engine.py b/tests/test_engine.py
index 6ea4d9f0..2d789583 100644
--- a/tests/test_engine.py
+++ b/tests/test_engine.py
@@ -14,10 +14,12 @@ MODEL = WEIGHTS_DIR / 'yolov8n'
 
 
 def test_func(*args):  # noqa
+    """Test function callback."""
     print('callback test passed')
 
 
 def test_export():
+    """Test model exporting functionality."""
     exporter = Exporter()
     exporter.add_callback('on_export_start', test_func)
     assert test_func in exporter.callbacks['on_export_start'], 'callback test failed'
@@ -26,6 +28,7 @@ def test_export():
 
 
 def test_detect():
+    """Test object detection functionality."""
     overrides = {'data': 'coco8.yaml', 'model': CFG_DET, 'imgsz': 32, 'epochs': 1, 'save': False}
     CFG.data = 'coco8.yaml'
     CFG.imgsz = 32
@@ -61,6 +64,7 @@ def test_detect():
 
 
 def test_segment():
+    """Test image segmentation functionality."""
     overrides = {'data': 'coco8-seg.yaml', 'model': CFG_SEG, 'imgsz': 32, 'epochs': 1, 'save': False}
     CFG.data = 'coco8-seg.yaml'
     CFG.imgsz = 32
@@ -98,6 +102,7 @@ def test_segment():
 
 
 def test_classify():
+    """Test image classification functionality."""
     overrides = {'data': 'imagenet10', 'model': CFG_CLS, 'imgsz': 32, 'epochs': 1, 'save': False}
     CFG.data = 'imagenet10'
     CFG.imgsz = 32
diff --git a/tests/test_python.py b/tests/test_python.py
index 3e49f570..bea8afe1 100644
--- a/tests/test_python.py
+++ b/tests/test_python.py
@@ -27,11 +27,13 @@ IS_TMP_WRITEABLE = is_dir_writeable(TMP)
 
 
 def test_model_forward():
+    """Test the forward pass of the YOLO model."""
     model = YOLO(CFG)
     model(source=None, imgsz=32, augment=True)  # also test no source and augment
 
 
 def test_model_methods():
+    """Test various methods and properties of the YOLO model."""
     model = YOLO(MODEL)
 
     # Model methods
@@ -51,7 +53,7 @@ def test_model_methods():
 
 
 def test_model_profile():
-    # Test profile=True model argument
+    """Test profiling of the YOLO model with 'profile=True' argument."""
     from ultralytics.nn.tasks import DetectionModel
 
     model = DetectionModel()  # build model
@@ -61,7 +63,7 @@ def test_model_profile():
 
 @pytest.mark.skipif(not IS_TMP_WRITEABLE, reason='directory is not writeable')
 def test_predict_txt():
-    # Write a list of sources (file, dir, glob, recursive glob) to a txt file
+    """Test YOLO predictions with sources (file, dir, glob, recursive glob) specified in a text file."""
     txt_file = TMP / 'sources.txt'
     with open(txt_file, 'w') as f:
         for x in [ASSETS / 'bus.jpg', ASSETS, ASSETS / '*', ASSETS / '**/*.jpg']:
@@ -70,6 +72,7 @@ def test_predict_txt():
 
 
 def test_predict_img():
+    """Test YOLO prediction on various types of image sources."""
     model = YOLO(MODEL)
     seg_model = YOLO(WEIGHTS_DIR / 'yolov8n-seg.pt')
     cls_model = YOLO(WEIGHTS_DIR / 'yolov8n-cls.pt')
@@ -105,7 +108,7 @@ def test_predict_img():
 
 
 def test_predict_grey_and_4ch():
-    # Convert SOURCE to greyscale and 4-ch
+    """Test YOLO prediction on SOURCE converted to greyscale and 4-channel images."""
     im = Image.open(SOURCE)
     directory = TMP / 'im4'
     directory.mkdir(parents=True, exist_ok=True)
@@ -132,8 +135,11 @@ def test_predict_grey_and_4ch():
 @pytest.mark.skipif(not ONLINE, reason='environment is offline')
 @pytest.mark.skipif(not IS_TMP_WRITEABLE, reason='directory is not writeable')
 def test_track_stream():
-    # Test YouTube streaming inference (short 10 frame video) with non-default ByteTrack tracker
-    # imgsz=160 required for tracking for higher confidence and better matches
+    """
+    Test YouTube streaming tracking (short 10 frame video) with non-default ByteTrack tracker.
+
+    Note imgsz=160 required for tracking for higher confidence and better matches
+    """
     import yaml
 
     model = YOLO(MODEL)
@@ -153,37 +159,44 @@ def test_track_stream():
 
 
 def test_val():
+    """Test the validation mode of the YOLO model."""
     YOLO(MODEL).val(data='coco8.yaml', imgsz=32, save_hybrid=True)
 
 
 def test_train_scratch():
+    """Test training the YOLO model from scratch."""
     model = YOLO(CFG)
     model.train(data='coco8.yaml', epochs=2, imgsz=32, cache='disk', batch=-1, close_mosaic=1, name='model')
     model(SOURCE)
 
 
 def test_train_pretrained():
+    """Test training the YOLO model from a pre-trained state."""
     model = YOLO(WEIGHTS_DIR / 'yolov8n-seg.pt')
     model.train(data='coco8-seg.yaml', epochs=1, imgsz=32, cache='ram', copy_paste=0.5, mixup=0.5, name=0)
     model(SOURCE)
 
 
 def test_export_torchscript():
+    """Test exporting the YOLO model to TorchScript format."""
     f = YOLO(MODEL).export(format='torchscript', optimize=False)
     YOLO(f)(SOURCE)  # exported model inference
 
 
 def test_export_onnx():
+    """Test exporting the YOLO model to ONNX format."""
     f = YOLO(MODEL).export(format='onnx', dynamic=True)
     YOLO(f)(SOURCE)  # exported model inference
 
 
 def test_export_openvino():
+    """Test exporting the YOLO model to OpenVINO format."""
     f = YOLO(MODEL).export(format='openvino')
     YOLO(f)(SOURCE)  # exported model inference
 
 
 def test_export_coreml():
+    """Test exporting the YOLO model to CoreML format."""
     if not WINDOWS:  # RuntimeError: BlobWriter not loaded with coremltools 7.0 on windows
         if MACOS:
             f = YOLO(MODEL).export(format='coreml')
@@ -193,7 +206,11 @@ def test_export_coreml():
 
 
 def test_export_tflite(enabled=False):
-    # TF suffers from install conflicts on Windows and macOS
+    """
+    Test exporting the YOLO model to TFLite format.
+
+    Note TF suffers from install conflicts on Windows and macOS.
+    """
     if enabled and LINUX:
         model = YOLO(MODEL)
         f = model.export(format='tflite')
@@ -201,7 +218,11 @@ def test_export_tflite(enabled=False):
 
 
 def test_export_pb(enabled=False):
-    # TF suffers from install conflicts on Windows and macOS
+    """
+    Test exporting the YOLO model to *.pb format.
+
+    Note TF suffers from install conflicts on Windows and macOS.
+    """
     if enabled and LINUX:
         model = YOLO(MODEL)
         f = model.export(format='pb')
@@ -209,18 +230,24 @@ def test_export_pb(enabled=False):
 
 
 def test_export_paddle(enabled=False):
-    # Paddle protobuf requirements conflicting with onnx protobuf requirements
+    """
+    Test exporting the YOLO model to Paddle format.
+
+    Note Paddle protobuf requirements conflicting with onnx protobuf requirements.
+    """
     if enabled:
         YOLO(MODEL).export(format='paddle')
 
 
 @pytest.mark.slow
 def test_export_ncnn():
+    """Test exporting the YOLO model to NCNN format."""
     f = YOLO(MODEL).export(format='ncnn')
     YOLO(f)(SOURCE)  # exported model inference
 
 
 def test_all_model_yamls():
+    """Test YOLO model creation for all available YAML configurations."""
     for m in (ROOT / 'cfg' / 'models').rglob('*.yaml'):
         if 'rtdetr' in m.name:
             if TORCH_1_9:  # torch<=1.8 issue - TypeError: __init__() got an unexpected keyword argument 'batch_first'
@@ -230,6 +257,7 @@ def test_all_model_yamls():
 
 
 def test_workflow():
+    """Test the complete workflow including training, validation, prediction, and exporting."""
     model = YOLO(MODEL)
     model.train(data='coco8.yaml', epochs=1, imgsz=32, optimizer='SGD')
     model.val(imgsz=32)
@@ -238,12 +266,14 @@ def test_workflow():
 
 
 def test_predict_callback_and_setup():
-    # Test callback addition for prediction
-    def on_predict_batch_end(predictor):  # results -> List[batch_size]
+    """Test callback functionality during YOLO prediction."""
+
+    def on_predict_batch_end(predictor):
+        """Callback function that handles operations at the end of a prediction batch."""
         path, im0s, _, _ = predictor.batch
         im0s = im0s if isinstance(im0s, list) else [im0s]
         bs = [predictor.dataset.bs for _ in range(len(path))]
-        predictor.results = zip(predictor.results, im0s, bs)
+        predictor.results = zip(predictor.results, im0s, bs)  # results is List[batch_size]
 
     model = YOLO(MODEL)
     model.add_callback('on_predict_batch_end', on_predict_batch_end)
@@ -259,6 +289,7 @@ def test_predict_callback_and_setup():
 
 
 def test_results():
+    """Test various result formats for the YOLO model."""
     for m in 'yolov8n-pose.pt', 'yolov8n-seg.pt', 'yolov8n.pt', 'yolov8n-cls.pt':
         results = YOLO(WEIGHTS_DIR / m)([SOURCE, SOURCE], imgsz=160)
         for r in results:
@@ -274,7 +305,7 @@ def test_results():
 
 @pytest.mark.skipif(not ONLINE, reason='environment is offline')
 def test_data_utils():
-    # Test functions in ultralytics/data/utils.py
+    """Test utility functions in ultralytics/data/utils.py."""
     from ultralytics.data.utils import HUBDatasetStats, autosplit
     from ultralytics.utils.downloads import zip_directory
 
@@ -294,7 +325,7 @@ def test_data_utils():
 
 @pytest.mark.skipif(not ONLINE, reason='environment is offline')
 def test_data_converter():
-    # Test dataset converters
+    """Test dataset converters."""
     from ultralytics.data.converter import coco80_to_coco91_class, convert_coco
 
     file = 'instances_val2017.json'
@@ -304,6 +335,7 @@ def test_data_converter():
 
 
 def test_data_annotator():
+    """Test automatic data annotation."""
     from ultralytics.data.annotator import auto_annotate
 
     auto_annotate(ASSETS,
@@ -313,7 +345,7 @@ def test_data_annotator():
 
 
 def test_events():
-    # Test event sending
+    """Test event sending functionality."""
     from ultralytics.hub.utils import Events
 
     events = Events()
@@ -324,6 +356,7 @@ def test_events():
 
 
 def test_cfg_init():
+    """Test configuration initialization utilities."""
     from ultralytics.cfg import check_dict_alignment, copy_default_cfg, smart_value
 
     with contextlib.suppress(SyntaxError):
@@ -334,6 +367,7 @@ def test_cfg_init():
 
 
 def test_utils_init():
+    """Test initialization utilities."""
     from ultralytics.utils import get_git_branch, get_git_origin_url, get_ubuntu_version, is_github_actions_ci
 
     get_ubuntu_version()
@@ -343,6 +377,7 @@ def test_utils_init():
 
 
 def test_utils_checks():
+    """Test various utility checks."""
     checks.check_yolov5u_filename('yolov5n.pt')
     checks.git_describe(ROOT)
     checks.check_requirements()  # check requirements.txt
@@ -354,12 +389,14 @@ def test_utils_checks():
 
 
 def test_utils_benchmarks():
+    """Test model benchmarking."""
     from ultralytics.utils.benchmarks import ProfileModels
 
     ProfileModels(['yolov8n.yaml'], imgsz=32, min_time=1, num_timed_runs=3, num_warmup_runs=1).profile()
 
 
 def test_utils_torchutils():
+    """Test Torch utility functions."""
     from ultralytics.nn.modules.conv import Conv
     from ultralytics.utils.torch_utils import get_flops_with_torch_profiler, profile, time_sync
 
@@ -373,12 +410,14 @@ def test_utils_torchutils():
 
 @pytest.mark.skipif(not ONLINE, reason='environment is offline')
 def test_utils_downloads():
+    """Test file download utilities."""
     from ultralytics.utils.downloads import get_google_drive_file_info
 
     get_google_drive_file_info('https://drive.google.com/file/d/1cqT-cJgANNrhIHCrEufUYhQ4RqiWG_lJ/view?usp=drive_link')
 
 
 def test_utils_ops():
+    """Test various operations utilities."""
     from ultralytics.utils.ops import (ltwh2xywh, ltwh2xyxy, make_divisible, xywh2ltwh, xywh2xyxy, xywhn2xyxy,
                                        xywhr2xyxyxyxy, xyxy2ltwh, xyxy2xywh, xyxy2xywhn, xyxyxyxy2xywhr)
 
@@ -396,6 +435,7 @@ def test_utils_ops():
 
 
 def test_utils_files():
+    """Test file handling utilities."""
     from ultralytics.utils.files import file_age, file_date, get_latest_run, spaces_in_path
 
     file_age(SOURCE)
@@ -409,6 +449,7 @@ def test_utils_files():
 
 
 def test_nn_modules_conv():
+    """Test Convolutional Neural Network modules."""
     from ultralytics.nn.modules.conv import CBAM, Conv2, ConvTranspose, DWConvTranspose2d, Focus
 
     c1, c2 = 8, 16  # input and output channels
@@ -427,6 +468,7 @@ def test_nn_modules_conv():
 
 
 def test_nn_modules_block():
+    """Test Neural Network block modules."""
     from ultralytics.nn.modules.block import C1, C3TR, BottleneckCSP, C3Ghost, C3x
 
     c1, c2 = 8, 16  # input and output channels
@@ -442,6 +484,7 @@ def test_nn_modules_block():
 
 @pytest.mark.skipif(not ONLINE, reason='environment is offline')
 def test_hub():
+    """Test Ultralytics HUB functionalities."""
     from ultralytics.hub import export_fmts_hub, logout
     from ultralytics.hub.utils import smart_request
 
@@ -453,6 +496,7 @@ def test_hub():
 @pytest.mark.slow
 @pytest.mark.skipif(not ONLINE, reason='environment is offline')
 def test_triton():
+    """Test NVIDIA Triton Server functionalities."""
     checks.check_requirements('tritonclient[all]')
     import subprocess
     import time
diff --git a/ultralytics/cfg/__init__.py b/ultralytics/cfg/__init__.py
index 98edf804..65cd7542 100644
--- a/ultralytics/cfg/__init__.py
+++ b/ultralytics/cfg/__init__.py
@@ -180,8 +180,8 @@ def _handle_deprecation(custom):
 
 def check_dict_alignment(base: Dict, custom: Dict, e=None):
     """
-    This function checks for any mismatched keys between a custom configuration list and a base configuration list.
-    If any mismatched keys are found, the function prints out similar keys from the base list and exits the program.
+    This function checks for any mismatched keys between a custom configuration list and a base configuration list. If
+    any mismatched keys are found, the function prints out similar keys from the base list and exits the program.
 
     Args:
         custom (dict): a dictionary of custom configuration options
@@ -205,9 +205,8 @@ def check_dict_alignment(base: Dict, custom: Dict, e=None):
 
 def merge_equals_args(args: List[str]) -> List[str]:
     """
-    Merges arguments around isolated '=' args in a list of strings.
-    The function considers cases where the first argument ends with '=' or the second starts with '=',
-    as well as when the middle one is an equals sign.
+    Merges arguments around isolated '=' args in a list of strings. The function considers cases where the first
+    argument ends with '=' or the second starts with '=', as well as when the middle one is an equals sign.
 
     Args:
         args (List[str]): A list of strings where each element is an argument.
diff --git a/ultralytics/data/augment.py b/ultralytics/data/augment.py
index 432023c7..7fb32477 100644
--- a/ultralytics/data/augment.py
+++ b/ultralytics/data/augment.py
@@ -20,16 +20,30 @@ from .utils import polygons2masks, polygons2masks_overlap
 
 # TODO: we might need a BaseTransform to make all these augments be compatible with both classification and semantic
 class BaseTransform:
+    """
+    Base class for image transformations.
+
+    This is a generic transformation class that can be extended for specific image processing needs.
+    The class is designed to be compatible with both classification and semantic segmentation tasks.
+
+    Methods:
+        __init__: Initializes the BaseTransform object.
+        apply_image: Applies image transformation to labels.
+        apply_instances: Applies transformations to object instances in labels.
+        apply_semantic: Applies semantic segmentation to an image.
+        __call__: Applies all label transformations to an image, instances, and semantic masks.
+    """
 
     def __init__(self) -> None:
+        """Initializes the BaseTransform object."""
         pass
 
     def apply_image(self, labels):
-        """Applies image transformation to labels."""
+        """Applies image transformations to labels."""
         pass
 
     def apply_instances(self, labels):
-        """Applies transformations to input 'labels' and returns object instances."""
+        """Applies transformations to object instances in labels."""
         pass
 
     def apply_semantic(self, labels):
@@ -37,13 +51,14 @@ class BaseTransform:
         pass
 
     def __call__(self, labels):
-        """Applies label transformations to an image, instances and semantic masks."""
+        """Applies all label transformations to an image, instances, and semantic masks."""
         self.apply_image(labels)
         self.apply_instances(labels)
         self.apply_semantic(labels)
 
 
 class Compose:
+    """Class for composing multiple image transformations."""
 
     def __init__(self, transforms):
         """Initializes the Compose object with a list of transforms."""
@@ -60,18 +75,23 @@ class Compose:
         self.transforms.append(transform)
 
     def tolist(self):
-        """Converts list of transforms to a standard Python list."""
+        """Converts the list of transforms to a standard Python list."""
         return self.transforms
 
     def __repr__(self):
-        """Return string representation of object."""
+        """Returns a string representation of the object."""
         return f"{self.__class__.__name__}({', '.join([f'{t}' for t in self.transforms])})"
 
 
 class BaseMixTransform:
-    """This implementation is from mmyolo."""
+    """
+    Class for base mix (MixUp/Mosaic) transformations.
+
+    This implementation is from mmyolo.
+    """
 
     def __init__(self, dataset, pre_transform=None, p=0.0) -> None:
+        """Initializes the BaseMixTransform object with dataset, pre_transform, and probability."""
         self.dataset = dataset
         self.pre_transform = pre_transform
         self.p = p
@@ -262,8 +282,10 @@ class Mosaic(BaseMixTransform):
 
 
 class MixUp(BaseMixTransform):
+    """Class for applying MixUp augmentation to the dataset."""
 
     def __init__(self, dataset, pre_transform=None, p=0.0) -> None:
+        """Initializes MixUp object with dataset, pre_transform, and probability of applying MixUp."""
         super().__init__(dataset=dataset, pre_transform=pre_transform, p=p)
 
     def get_indexes(self):
@@ -271,7 +293,7 @@ class MixUp(BaseMixTransform):
         return random.randint(0, len(self.dataset) - 1)
 
     def _mix_transform(self, labels):
-        """Applies MixUp augmentation https://arxiv.org/pdf/1710.09412.pdf."""
+        """Applies MixUp augmentation as per https://arxiv.org/pdf/1710.09412.pdf."""
         r = np.random.beta(32.0, 32.0)  # mixup ratio, alpha=beta=32.0
         labels2 = labels['mix_labels'][0]
         labels['img'] = (labels['img'] * r + labels2['img'] * (1 - r)).astype(np.uint8)
@@ -281,6 +303,28 @@ class MixUp(BaseMixTransform):
 
 
 class RandomPerspective:
+    """
+    Implements random perspective and affine transformations on images and corresponding bounding boxes, segments, and
+    keypoints. These transformations include rotation, translation, scaling, and shearing. The class also offers the
+    option to apply these transformations conditionally with a specified probability.
+
+    Attributes:
+        degrees (float): Degree range for random rotations.
+        translate (float): Fraction of total width and height for random translation.
+        scale (float): Scaling factor interval, e.g., a scale factor of 0.1 allows a resize between 90%-110%.
+        shear (float): Shear intensity (angle in degrees).
+        perspective (float): Perspective distortion factor.
+        border (tuple): Tuple specifying mosaic border.
+        pre_transform (callable): A function/transform to apply to the image before starting the random transformation.
+
+    Methods:
+        affine_transform(img, border): Applies a series of affine transformations to the image.
+        apply_bboxes(bboxes, M): Transforms bounding boxes using the calculated affine matrix.
+        apply_segments(segments, M): Transforms segments and generates new bounding boxes.
+        apply_keypoints(keypoints, M): Transforms keypoints.
+        __call__(labels): Main method to apply transformations to both images and their corresponding annotations.
+        box_candidates(box1, box2): Filters out bounding boxes that don't meet certain criteria post-transformation.
+    """
 
     def __init__(self,
                  degrees=0.0,
@@ -290,17 +334,31 @@ class RandomPerspective:
                  perspective=0.0,
                  border=(0, 0),
                  pre_transform=None):
+        """Initializes RandomPerspective object with transformation parameters."""
+
         self.degrees = degrees
         self.translate = translate
         self.scale = scale
         self.shear = shear
         self.perspective = perspective
-        # Mosaic border
-        self.border = border
+        self.border = border  # mosaic border
         self.pre_transform = pre_transform
 
     def affine_transform(self, img, border):
-        """Center."""
+        """
+        Applies a sequence of affine transformations centered around the image center.
+
+        Args:
+            img (ndarray): Input image.
+            border (tuple): Border dimensions.
+
+        Returns:
+            img (ndarray): Transformed image.
+            M (ndarray): Transformation matrix.
+            s (float): Scale factor.
+        """
+
+        # Center
         C = np.eye(3, dtype=np.float32)
 
         C[0, 2] = -img.shape[1] / 2  # x translation (pixels)
@@ -462,8 +520,22 @@ class RandomPerspective:
         labels['resized_shape'] = img.shape[:2]
         return labels
 
-    def box_candidates(self, box1, box2, wh_thr=2, ar_thr=100, area_thr=0.1, eps=1e-16):  # box1(4,n), box2(4,n)
-        # Compute box candidates: box1 before augment, box2 after augment, wh_thr (pixels), aspect_ratio_thr, area_ratio
+    def box_candidates(self, box1, box2, wh_thr=2, ar_thr=100, area_thr=0.1, eps=1e-16):
+        """
+        Compute box candidates based on a set of thresholds. This method compares the characteristics of the boxes
+        before and after augmentation to decide whether a box is a candidate for further processing.
+
+        Args:
+            box1 (numpy.ndarray): The 4,n bounding box before augmentation, represented as [x1, y1, x2, y2].
+            box2 (numpy.ndarray): The 4,n bounding box after augmentation, represented as [x1, y1, x2, y2].
+            wh_thr (float, optional): The width and height threshold in pixels. Default is 2.
+            ar_thr (float, optional): The aspect ratio threshold. Default is 100.
+            area_thr (float, optional): The area ratio threshold. Default is 0.1.
+            eps (float, optional): A small epsilon value to prevent division by zero. Default is 1e-16.
+
+        Returns:
+            (numpy.ndarray): A boolean array indicating which boxes are candidates based on the given thresholds.
+        """
         w1, h1 = box1[2] - box1[0], box1[3] - box1[1]
         w2, h2 = box2[2] - box2[0], box2[3] - box2[1]
         ar = np.maximum(w2 / (h2 + eps), h2 / (w2 + eps))  # aspect ratio
@@ -471,14 +543,32 @@ class RandomPerspective:
 
 
 class RandomHSV:
+    """
+    This class is responsible for performing random adjustments to the Hue, Saturation, and Value (HSV) channels of an
+    image.
+
+    The adjustments are random but within limits set by hgain, sgain, and vgain.
+    """
 
     def __init__(self, hgain=0.5, sgain=0.5, vgain=0.5) -> None:
+        """
+        Initialize RandomHSV class with gains for each HSV channel.
+
+        Args:
+            hgain (float, optional): Maximum variation for hue. Default is 0.5.
+            sgain (float, optional): Maximum variation for saturation. Default is 0.5.
+            vgain (float, optional): Maximum variation for value. Default is 0.5.
+        """
         self.hgain = hgain
         self.sgain = sgain
         self.vgain = vgain
 
     def __call__(self, labels):
-        """Applies image HSV augmentation"""
+        """
+        Applies random HSV augmentation to an image within the predefined limits.
+
+        The modified image replaces the original image in the input 'labels' dict.
+        """
         img = labels['img']
         if self.hgain or self.sgain or self.vgain:
             r = np.random.uniform(-1, 1, 3) * [self.hgain, self.sgain, self.vgain] + 1  # random gains
@@ -496,9 +586,22 @@ class RandomHSV:
 
 
 class RandomFlip:
-    """Applies random horizontal or vertical flip to an image with a given probability."""
+    """
+    Applies a random horizontal or vertical flip to an image with a given probability.
+
+    Also updates any instances (bounding boxes, keypoints, etc.) accordingly.
+    """
 
     def __init__(self, p=0.5, direction='horizontal', flip_idx=None) -> None:
+        """
+        Initializes the RandomFlip class with probability and direction.
+
+        Args:
+            p (float, optional): The probability of applying the flip. Must be between 0 and 1. Default is 0.5.
+            direction (str, optional): The direction to apply the flip. Must be 'horizontal' or 'vertical'.
+                Default is 'horizontal'.
+            flip_idx (array-like, optional): Index mapping for flipping keypoints, if any.
+        """
         assert direction in ['horizontal', 'vertical'], f'Support direction `horizontal` or `vertical`, got {direction}'
         assert 0 <= p <= 1.0
 
@@ -507,7 +610,16 @@ class RandomFlip:
         self.flip_idx = flip_idx
 
     def __call__(self, labels):
-        """Resize image and padding for detection, instance segmentation, pose."""
+        """
+        Applies random flip to an image and updates any instances like bounding boxes or keypoints accordingly.
+
+        Args:
+            labels (dict): A dictionary containing the keys 'img' and 'instances'. 'img' is the image to be flipped.
+                           'instances' is an object containing bounding boxes and optionally keypoints.
+
+        Returns:
+            (dict): The same dict with the flipped image and updated instances under the 'img' and 'instances' keys.
+        """
         img = labels['img']
         instances = labels.pop('instances')
         instances.convert_bbox(format='xywh')
@@ -599,12 +711,38 @@ class LetterBox:
 
 
 class CopyPaste:
+    """
+    Implements the Copy-Paste augmentation as described in the paper https://arxiv.org/abs/2012.07177. This class is
+    responsible for applying the Copy-Paste augmentation on images and their corresponding instances.
+    """
 
     def __init__(self, p=0.5) -> None:
+        """
+        Initializes the CopyPaste class with a given probability.
+
+        Args:
+            p (float, optional): The probability of applying the Copy-Paste augmentation. Must be between 0 and 1.
+                                 Default is 0.5.
+        """
         self.p = p
 
     def __call__(self, labels):
-        """Implement Copy-Paste augmentation https://arxiv.org/abs/2012.07177, labels as nx5 np.array(cls, xyxy)."""
+        """
+        Applies the Copy-Paste augmentation to the given image and instances.
+
+        Args:
+            labels (dict): A dictionary containing:
+                           - 'img': The image to augment.
+                           - 'cls': Class labels associated with the instances.
+                           - 'instances': Object containing bounding boxes, and optionally, keypoints and segments.
+
+        Returns:
+            (dict): Dict with augmented image and updated instances under the 'img', 'cls', and 'instances' keys.
+
+        Notes:
+            1. Instances are expected to have 'segments' as one of their attributes for this augmentation to work.
+            2. This method modifies the input dictionary 'labels' in place.
+        """
         im = labels['img']
         cls = labels['cls']
         h, w = im.shape[:2]
@@ -639,9 +777,13 @@ class CopyPaste:
 
 
 class Albumentations:
-    """Albumentations transformations. Optional, uninstall package to disable.
-    Applies Blur, Median Blur, convert to grayscale, Contrast Limited Adaptive Histogram Equalization,
-    random change of brightness and contrast, RandomGamma and lowering of image quality by compression."""
+    """
+    Albumentations transformations.
+
+    Optional, uninstall package to disable. Applies Blur, Median Blur, convert to grayscale, Contrast Limited Adaptive
+    Histogram Equalization, random change of brightness and contrast, RandomGamma and lowering of image quality by
+    compression.
+    """
 
     def __init__(self, p=1.0):
         """Initialize the transform object for YOLO bbox formatted params."""
@@ -690,6 +832,19 @@ class Albumentations:
 
 # TODO: technically this is not an augmentation, maybe we should put this to another files
 class Format:
+    """
+    Formats image annotations for object detection, instance segmentation, and pose estimation tasks. The class
+    standardizes the image and instance annotations to be used by the `collate_fn` in PyTorch DataLoader.
+
+    Attributes:
+        bbox_format (str): Format for bounding boxes. Default is 'xywh'.
+        normalize (bool): Whether to normalize bounding boxes. Default is True.
+        return_mask (bool): Return instance masks for segmentation. Default is False.
+        return_keypoint (bool): Return keypoints for pose estimation. Default is False.
+        mask_ratio (int): Downsample ratio for masks. Default is 4.
+        mask_overlap (bool): Whether to overlap masks. Default is True.
+        batch_idx (bool): Keep batch indexes. Default is True.
+    """
 
     def __init__(self,
                  bbox_format='xywh',
@@ -699,6 +854,7 @@ class Format:
                  mask_ratio=4,
                  mask_overlap=True,
                  batch_idx=True):
+        """Initializes the Format class with given parameters."""
         self.bbox_format = bbox_format
         self.normalize = normalize
         self.return_mask = return_mask  # set False when training detection only
@@ -746,7 +902,7 @@ class Format:
         return img
 
     def _format_segments(self, instances, cls, w, h):
-        """convert polygon points to bitmap."""
+        """Convert polygon points to bitmap."""
         segments = instances.segments
         if self.mask_overlap:
             masks, sorted_idx = polygons2masks_overlap((h, w), segments, downsample_ratio=self.mask_ratio)
@@ -851,35 +1007,75 @@ def classify_albumentations(
 
 
 class ClassifyLetterBox:
-    """YOLOv8 LetterBox class for image preprocessing, i.e. T.Compose([LetterBox(size), ToTensor()])"""
+    """
+    YOLOv8 LetterBox class for image preprocessing, designed to be part of a transformation pipeline, e.g.,
+    T.Compose([LetterBox(size), ToTensor()]).
+
+    Attributes:
+        h (int): Target height of the image.
+        w (int): Target width of the image.
+        auto (bool): If True, automatically solves for short side using stride.
+        stride (int): The stride value, used when 'auto' is True.
+    """
 
     def __init__(self, size=(640, 640), auto=False, stride=32):
-        """Resizes image and crops it to center with max dimensions 'h' and 'w'."""
+        """
+        Initializes the ClassifyLetterBox class with a target size, auto-flag, and stride.
+
+        Args:
+            size (Union[int, Tuple[int, int]]): The target dimensions (height, width) for the letterbox.
+            auto (bool): If True, automatically calculates the short side based on stride.
+            stride (int): The stride value, used when 'auto' is True.
+        """
         super().__init__()
         self.h, self.w = (size, size) if isinstance(size, int) else size
         self.auto = auto  # pass max size integer, automatically solve for short side using stride
         self.stride = stride  # used with auto
 
-    def __call__(self, im):  # im = np.array HWC
+    def __call__(self, im):
+        """
+        Resizes the image and pads it with a letterbox method.
+
+        Args:
+            im (numpy.ndarray): The input image as a numpy array of shape HWC.
+
+        Returns:
+            (numpy.ndarray): The letterboxed and resized image as a numpy array.
+        """
         imh, imw = im.shape[:2]
-        r = min(self.h / imh, self.w / imw)  # ratio of new/old
-        h, w = round(imh * r), round(imw * r)  # resized image
+        r = min(self.h / imh, self.w / imw)  # ratio of new/old dimensions
+        h, w = round(imh * r), round(imw * r)  # resized image dimensions
+
+        # Calculate padding dimensions
         hs, ws = (math.ceil(x / self.stride) * self.stride for x in (h, w)) if self.auto else (self.h, self.w)
         top, left = round((hs - h) / 2 - 0.1), round((ws - w) / 2 - 0.1)
+
+        # Create padded image
         im_out = np.full((hs, ws, 3), 114, dtype=im.dtype)
         im_out[top:top + h, left:left + w] = cv2.resize(im, (w, h), interpolation=cv2.INTER_LINEAR)
         return im_out
 
 
 class CenterCrop:
-    """YOLOv8 CenterCrop class for image preprocessing, i.e. T.Compose([CenterCrop(size), ToTensor()])"""
+    """YOLOv8 CenterCrop class for image preprocessing, designed to be part of a transformation pipeline, e.g.,
+    T.Compose([CenterCrop(size), ToTensor()]).
+    """
 
     def __init__(self, size=640):
         """Converts an image from numpy array to PyTorch tensor."""
         super().__init__()
         self.h, self.w = (size, size) if isinstance(size, int) else size
 
-    def __call__(self, im):  # im = np.array HWC
+    def __call__(self, im):
+        """
+        Resizes and crops the center of the image using a letterbox method.
+
+        Args:
+            im (numpy.ndarray): The input image as a numpy array of shape HWC.
+
+        Returns:
+            (numpy.ndarray): The center-cropped and resized image as a numpy array.
+        """
         imh, imw = im.shape[:2]
         m = min(imh, imw)  # min dimension
         top, left = (imh - m) // 2, (imw - m) // 2
@@ -887,14 +1083,23 @@ class CenterCrop:
 
 
 class ToTensor:
-    """YOLOv8 ToTensor class for image preprocessing, i.e. T.Compose([LetterBox(size), ToTensor()])."""
+    """YOLOv8 ToTensor class for image preprocessing, i.e., T.Compose([LetterBox(size), ToTensor()])."""
 
     def __init__(self, half=False):
         """Initialize YOLOv8 ToTensor object with optional half-precision support."""
         super().__init__()
         self.half = half
 
-    def __call__(self, im):  # im = np.array HWC in BGR order
+    def __call__(self, im):
+        """
+        Transforms an image from a numpy array to a PyTorch tensor, applying optional half-precision and normalization.
+
+        Args:
+            im (numpy.ndarray): Input image as a numpy array with shape (H, W, C) in BGR order.
+
+        Returns:
+            (torch.Tensor): The transformed image as a PyTorch tensor in float32 or float16, normalized to [0, 1].
+        """
         im = np.ascontiguousarray(im.transpose((2, 0, 1))[::-1])  # HWC to CHW -> BGR to RGB -> contiguous
         im = torch.from_numpy(im)  # to torch
         im = im.half() if self.half else im.float()  # uint8 to fp16/32
diff --git a/ultralytics/data/base.py b/ultralytics/data/base.py
index 429533dc..462280a6 100644
--- a/ultralytics/data/base.py
+++ b/ultralytics/data/base.py
@@ -62,6 +62,7 @@ class BaseDataset(Dataset):
                  classes=None,
                  fraction=1.0):
         super().__init__()
+        """Initialize BaseDataset with given configuration and options."""
         self.img_path = img_path
         self.imgsz = imgsz
         self.augment = augment
@@ -256,7 +257,7 @@ class BaseDataset(Dataset):
         return len(self.labels)
 
     def update_labels_info(self, label):
-        """custom your label format here."""
+        """Custom your label format here."""
         return label
 
     def build_transforms(self, hyp=None):
diff --git a/ultralytics/data/build.py b/ultralytics/data/build.py
index 33b5edad..07de91c8 100644
--- a/ultralytics/data/build.py
+++ b/ultralytics/data/build.py
@@ -20,7 +20,11 @@ from .utils import PIN_MEMORY
 
 
 class InfiniteDataLoader(dataloader.DataLoader):
-    """Dataloader that reuses workers. Uses same syntax as vanilla DataLoader."""
+    """
+    Dataloader that reuses workers.
+
+    Uses same syntax as vanilla DataLoader.
+    """
 
     def __init__(self, *args, **kwargs):
         """Dataloader that infinitely recycles workers, inherits from DataLoader."""
@@ -38,7 +42,9 @@ class InfiniteDataLoader(dataloader.DataLoader):
             yield next(self.iterator)
 
     def reset(self):
-        """Reset iterator.
+        """
+        Reset iterator.
+
         This is useful when we want to modify settings of dataset while training.
         """
         self.iterator = self._get_iterator()
@@ -70,7 +76,7 @@ def seed_worker(worker_id):  # noqa
 
 
 def build_yolo_dataset(cfg, img_path, batch, data, mode='train', rect=False, stride=32):
-    """Build YOLO Dataset"""
+    """Build YOLO Dataset."""
     return YOLODataset(
         img_path=img_path,
         imgsz=cfg.imgsz,
diff --git a/ultralytics/data/converter.py b/ultralytics/data/converter.py
index fecc30ce..4451df17 100644
--- a/ultralytics/data/converter.py
+++ b/ultralytics/data/converter.py
@@ -12,7 +12,8 @@ from ultralytics.utils import TQDM
 
 
 def coco91_to_coco80_class():
-    """Converts 91-index COCO class IDs to 80-index COCO class IDs.
+    """
+    Converts 91-index COCO class IDs to 80-index COCO class IDs.
 
     Returns:
         (list): A list of 91 class IDs where the index represents the 80-index class ID and the value is the
@@ -51,7 +52,8 @@ def convert_coco(labels_dir='../coco/annotations/',
                  use_segments=False,
                  use_keypoints=False,
                  cls91to80=True):
-    """Converts COCO dataset annotations to a format suitable for training YOLOv5 models.
+    """
+    Converts COCO dataset annotations to a format suitable for training YOLOv5 models.
 
     Args:
         labels_dir (str, optional): Path to directory containing COCO dataset annotation files.
@@ -203,6 +205,7 @@ def convert_dota_to_yolo_obb(dota_root_path: str):
         'helipad': 17}
 
     def convert_label(image_name, image_width, image_height, orig_label_dir, save_dir):
+        """Converts a single image's DOTA annotation to YOLO OBB format and saves it to a specified directory."""
         orig_label_path = orig_label_dir / f'{image_name}.txt'
         save_path = save_dir / f'{image_name}.txt'
 
diff --git a/ultralytics/data/dataset.py b/ultralytics/data/dataset.py
index 217d3bab..f0f431ea 100644
--- a/ultralytics/data/dataset.py
+++ b/ultralytics/data/dataset.py
@@ -33,6 +33,7 @@ class YOLODataset(BaseDataset):
     """
 
     def __init__(self, *args, data=None, use_segments=False, use_keypoints=False, **kwargs):
+        """Initializes the YOLODataset with optional configurations for segments and keypoints."""
         self.use_segments = use_segments
         self.use_keypoints = use_keypoints
         self.data = data
@@ -40,7 +41,9 @@ class YOLODataset(BaseDataset):
         super().__init__(*args, **kwargs)
 
     def cache_labels(self, path=Path('./labels.cache')):
-        """Cache dataset labels, check images and read shapes.
+        """
+        Cache dataset labels, check images and read shapes.
+
         Args:
             path (Path): path where to save the cache file (default: Path('./labels.cache')).
         Returns:
@@ -157,7 +160,7 @@ class YOLODataset(BaseDataset):
         self.transforms = self.build_transforms(hyp)
 
     def update_labels_info(self, label):
-        """custom your label format here."""
+        """Custom your label format here."""
         # NOTE: cls is not with bboxes now, classification and semantic segmentation need an independent cls label
         # we can make it also support classification and semantic segmentation by add or remove some dict keys there.
         bboxes = label.pop('bboxes')
@@ -254,6 +257,7 @@ class ClassificationDataset(torchvision.datasets.ImageFolder):
         return {'img': sample, 'cls': j}
 
     def __len__(self) -> int:
+        """Return the total number of samples in the dataset."""
         return len(self.samples)
 
     def verify_images(self):
@@ -320,6 +324,16 @@ def save_dataset_cache_file(prefix, path, x):
 
 # TODO: support semantic segmentation
 class SemanticDataset(BaseDataset):
+    """
+    Semantic Segmentation Dataset.
+
+    This class is responsible for handling datasets used for semantic segmentation tasks. It inherits functionalities
+    from the BaseDataset class.
+
+    Note:
+        This class is currently a placeholder and needs to be populated with methods and attributes for supporting
+        semantic segmentation tasks.
+    """
 
     def __init__(self):
         """Initialize a SemanticDataset object."""
diff --git a/ultralytics/data/loaders.py b/ultralytics/data/loaders.py
index 88491c79..0541f712 100644
--- a/ultralytics/data/loaders.py
+++ b/ultralytics/data/loaders.py
@@ -22,6 +22,7 @@ from ultralytics.utils.checks import check_requirements
 
 @dataclass
 class SourceTypes:
+    """Class to represent various types of input sources for predictions."""
     webcam: bool = False
     screenshot: bool = False
     from_img: bool = False
@@ -29,7 +30,34 @@ class SourceTypes:
 
 
 class LoadStreams:
-    """Stream  Loader, i.e. `yolo predict source='rtsp://example.com/media.mp4'  # RTSP, RTMP, HTTP, TCP streams`."""
+    """
+    Stream Loader for various types of video streams.
+
+    Suitable for use with `yolo predict source='rtsp://example.com/media.mp4'`, supports RTSP, RTMP, HTTP, and TCP streams.
+
+    Attributes:
+        sources (str): The source input paths or URLs for the video streams.
+        imgsz (int): The image size for processing, defaults to 640.
+        vid_stride (int): Video frame-rate stride, defaults to 1.
+        buffer (bool): Whether to buffer input streams, defaults to False.
+        running (bool): Flag to indicate if the streaming thread is running.
+        mode (str): Set to 'stream' indicating real-time capture.
+        imgs (list): List of image frames for each stream.
+        fps (list): List of FPS for each stream.
+        frames (list): List of total frames for each stream.
+        threads (list): List of threads for each stream.
+        shape (list): List of shapes for each stream.
+        caps (list): List of cv2.VideoCapture objects for each stream.
+        bs (int): Batch size for processing.
+
+    Methods:
+        __init__: Initialize the stream loader.
+        update: Read stream frames in daemon thread.
+        close: Close stream loader and release resources.
+        __iter__: Returns an iterator object for the class.
+        __next__: Returns source paths, transformed, and original images for processing.
+        __len__: Return the length of the sources object.
+    """
 
     def __init__(self, sources='file.streams', imgsz=640, vid_stride=1, buffer=False):
         """Initialize instance variables and check for consistent input stream shapes."""
@@ -149,10 +177,33 @@ class LoadStreams:
 
 
 class LoadScreenshots:
-    """YOLOv8 screenshot dataloader, i.e. `yolo predict source=screen`."""
+    """
+    YOLOv8 screenshot dataloader.
+
+    This class manages the loading of screenshot images for processing with YOLOv8.
+    Suitable for use with `yolo predict source=screen`.
+
+    Attributes:
+        source (str): The source input indicating which screen to capture.
+        imgsz (int): The image size for processing, defaults to 640.
+        screen (int): The screen number to capture.
+        left (int): The left coordinate for screen capture area.
+        top (int): The top coordinate for screen capture area.
+        width (int): The width of the screen capture area.
+        height (int): The height of the screen capture area.
+        mode (str): Set to 'stream' indicating real-time capture.
+        frame (int): Counter for captured frames.
+        sct (mss.mss): Screen capture object from `mss` library.
+        bs (int): Batch size, set to 1.
+        monitor (dict): Monitor configuration details.
+
+    Methods:
+        __iter__: Returns an iterator object.
+        __next__: Captures the next screenshot and returns it.
+    """
 
     def __init__(self, source, imgsz=640):
-        """source = [screen_number left top width height] (pixels)."""
+        """Source = [screen_number left top width height] (pixels)."""
         check_requirements('mss')
         import mss  # noqa
 
@@ -192,7 +243,28 @@ class LoadScreenshots:
 
 
 class LoadImages:
-    """YOLOv8 image/video dataloader, i.e. `yolo predict source=image.jpg/vid.mp4`."""
+    """
+    YOLOv8 image/video dataloader.
+
+    This class manages the loading and pre-processing of image and video data for YOLOv8. It supports loading from
+    various formats, including single image files, video files, and lists of image and video paths.
+
+    Attributes:
+        imgsz (int): Image size, defaults to 640.
+        files (list): List of image and video file paths.
+        nf (int): Total number of files (images and videos).
+        video_flag (list): Flags indicating whether a file is a video (True) or an image (False).
+        mode (str): Current mode, 'image' or 'video'.
+        vid_stride (int): Stride for video frame-rate, defaults to 1.
+        bs (int): Batch size, set to 1 for this class.
+        cap (cv2.VideoCapture): Video capture object for OpenCV.
+        frame (int): Frame counter for video.
+        frames (int): Total number of frames in the video.
+        count (int): Counter for iteration, initialized at 0 during `__iter__()`.
+
+    Methods:
+        _new_video(path): Create a new cv2.VideoCapture object for a given video path.
+    """
 
     def __init__(self, path, imgsz=640, vid_stride=1):
         """Initialize the Dataloader and raise FileNotFoundError if file not found."""
@@ -285,6 +357,24 @@ class LoadImages:
 
 
 class LoadPilAndNumpy:
+    """
+    Load images from PIL and Numpy arrays for batch processing.
+
+    This class is designed to manage loading and pre-processing of image data from both PIL and Numpy formats.
+    It performs basic validation and format conversion to ensure that the images are in the required format for
+    downstream processing.
+
+    Attributes:
+        paths (list): List of image paths or autogenerated filenames.
+        im0 (list): List of images stored as Numpy arrays.
+        imgsz (int): Image size, defaults to 640.
+        mode (str): Type of data being processed, defaults to 'image'.
+        bs (int): Batch size, equivalent to the length of `im0`.
+        count (int): Counter for iteration, initialized at 0 during `__iter__()`.
+
+    Methods:
+        _single_check(im): Validate and format a single image to a Numpy array.
+    """
 
     def __init__(self, im0, imgsz=640):
         """Initialize PIL and Numpy Dataloader."""
@@ -326,8 +416,24 @@ class LoadPilAndNumpy:
 
 
 class LoadTensor:
+    """
+    Load images from torch.Tensor data.
+
+    This class manages the loading and pre-processing of image data from PyTorch tensors for further processing.
+
+    Attributes:
+        im0 (torch.Tensor): The input tensor containing the image(s).
+        bs (int): Batch size, inferred from the shape of `im0`.
+        mode (str): Current mode, set to 'image'.
+        paths (list): List of image paths or filenames.
+        count (int): Counter for iteration, initialized at 0 during `__iter__()`.
+
+    Methods:
+        _single_check(im, stride): Validate and possibly modify the input tensor.
+    """
 
     def __init__(self, im0) -> None:
+        """Initialize Tensor Dataloader."""
         self.im0 = self._single_check(im0)
         self.bs = self.im0.shape[0]
         self.mode = 'image'
@@ -370,9 +476,7 @@ class LoadTensor:
 
 
 def autocast_list(source):
-    """
-    Merges a list of source of different types into a list of numpy arrays or PIL images
-    """
+    """Merges a list of source of different types into a list of numpy arrays or PIL images."""
     files = []
     for im in source:
         if isinstance(im, (str, Path)):  # filename or uri
diff --git a/ultralytics/data/utils.py b/ultralytics/data/utils.py
index 00ddc6e5..c5c2d994 100644
--- a/ultralytics/data/utils.py
+++ b/ultralytics/data/utils.py
@@ -547,9 +547,9 @@ class HUBDatasetStats:
 
 def compress_one_image(f, f_new=None, max_dim=1920, quality=50):
     """
-    Compresses a single image file to reduced size while preserving its aspect ratio and quality using either the
-    Python Imaging Library (PIL) or OpenCV library. If the input image is smaller than the maximum dimension, it will
-    not be resized.
+    Compresses a single image file to reduced size while preserving its aspect ratio and quality using either the Python
+    Imaging Library (PIL) or OpenCV library. If the input image is smaller than the maximum dimension, it will not be
+    resized.
 
     Args:
         f (str): The path to the input image file.
diff --git a/ultralytics/engine/exporter.py b/ultralytics/engine/exporter.py
index b209b9d7..66932a65 100644
--- a/ultralytics/engine/exporter.py
+++ b/ultralytics/engine/exporter.py
@@ -986,9 +986,7 @@ class Exporter:
         return model
 
     def add_callback(self, event: str, callback):
-        """
-        Appends the given callback.
-        """
+        """Appends the given callback."""
         self.callbacks[event].append(callback)
 
     def run_callbacks(self, event: str):
diff --git a/ultralytics/engine/model.py b/ultralytics/engine/model.py
index 69de12b2..806b0346 100644
--- a/ultralytics/engine/model.py
+++ b/ultralytics/engine/model.py
@@ -159,9 +159,7 @@ class Model(nn.Module):
         self.overrides['task'] = self.task
 
     def _check_is_pytorch_model(self):
-        """
-        Raises TypeError is model is not a PyTorch model
-        """
+        """Raises TypeError is model is not a PyTorch model."""
         pt_str = isinstance(self.model, (str, Path)) and Path(self.model).suffix == '.pt'
         pt_module = isinstance(self.model, nn.Module)
         if not (pt_module or pt_str):
@@ -173,9 +171,7 @@ class Model(nn.Module):
                 f"argument directly in your inference command, i.e. 'model.predict(source=..., device=0)'")
 
     def reset_weights(self):
-        """
-        Resets the model modules parameters to randomly initialized values, losing all training information.
-        """
+        """Resets the model modules parameters to randomly initialized values, losing all training information."""
         self._check_is_pytorch_model()
         for m in self.model.modules():
             if hasattr(m, 'reset_parameters'):
@@ -185,9 +181,7 @@ class Model(nn.Module):
         return self
 
     def load(self, weights='yolov8n.pt'):
-        """
-        Transfers parameters with matching names and shapes from 'weights' to model.
-        """
+        """Transfers parameters with matching names and shapes from 'weights' to model."""
         self._check_is_pytorch_model()
         if isinstance(weights, (str, Path)):
             weights, self.ckpt = attempt_load_one_weight(weights)
diff --git a/ultralytics/engine/predictor.py b/ultralytics/engine/predictor.py
index 3eb851b3..367efc69 100644
--- a/ultralytics/engine/predictor.py
+++ b/ultralytics/engine/predictor.py
@@ -58,7 +58,7 @@ Example:
 
 class BasePredictor:
     """
-    BasePredictor
+    BasePredictor.
 
     A base class for creating predictors.
 
@@ -109,7 +109,8 @@ class BasePredictor:
         callbacks.add_integration_callbacks(self)
 
     def preprocess(self, im):
-        """Prepares input image before inference.
+        """
+        Prepares input image before inference.
 
         Args:
             im (torch.Tensor | List(np.ndarray)): BCHW for tensor, [(HWC) x B] for list.
@@ -128,6 +129,7 @@ class BasePredictor:
         return im
 
     def inference(self, im, *args, **kwargs):
+        """Runs inference on a given image using the specified model and arguments."""
         visualize = increment_path(self.save_dir / Path(self.batch[0][0]).stem,
                                    mkdir=True) if self.args.visualize and (not self.source_type.tensor) else False
         return self.model(im, augment=self.args.augment, visualize=visualize)
@@ -194,7 +196,11 @@ class BasePredictor:
             return list(self.stream_inference(source, model, *args, **kwargs))  # merge list of Result into one
 
     def predict_cli(self, source=None, model=None):
-        """Method used for CLI prediction. It uses always generator as outputs as not required by CLI mode."""
+        """
+        Method used for CLI prediction.
+
+        It uses always generator as outputs as not required by CLI mode.
+        """
         gen = self.stream_inference(source, model)
         for _ in gen:  # running CLI inference without accumulating any outputs (do not modify)
             pass
@@ -352,7 +358,5 @@ class BasePredictor:
             callback(self)
 
     def add_callback(self, event: str, func):
-        """
-        Add callback
-        """
+        """Add callback."""
         self.callbacks[event].append(func)
diff --git a/ultralytics/engine/results.py b/ultralytics/engine/results.py
index 0fc6a0dd..fcbd762c 100644
--- a/ultralytics/engine/results.py
+++ b/ultralytics/engine/results.py
@@ -1,6 +1,6 @@
 # Ultralytics YOLO 🚀, AGPL-3.0 license
 """
-Ultralytics Results, Boxes and Masks classes for handling inference results
+Ultralytics Results, Boxes and Masks classes for handling inference results.
 
 Usage: See https://docs.ultralytics.com/modes/predict/
 """
@@ -19,12 +19,11 @@ from ultralytics.utils.torch_utils import smart_inference_mode
 
 
 class BaseTensor(SimpleClass):
-    """
-    Base tensor class with additional methods for easy manipulation and device handling.
-    """
+    """Base tensor class with additional methods for easy manipulation and device handling."""
 
     def __init__(self, data, orig_shape) -> None:
-        """Initialize BaseTensor with data and original shape.
+        """
+        Initialize BaseTensor with data and original shape.
 
         Args:
             data (torch.Tensor | np.ndarray): Predictions, such as bboxes, masks and keypoints.
@@ -126,6 +125,18 @@ class Results(SimpleClass):
             self.probs = probs
 
     def _apply(self, fn, *args, **kwargs):
+        """
+        Applies a function to all non-empty attributes and returns a new Results object with modified attributes. This
+        function is internally called by methods like .to(), .cuda(), .cpu(), etc.
+
+        Args:
+            fn (str): The name of the function to apply.
+            *args: Variable length argument list to pass to the function.
+            **kwargs: Arbitrary keyword arguments to pass to the function.
+
+        Returns:
+            Results: A new Results object with attributes modified by the applied function.
+        """
         r = self.new()
         for k in self._keys:
             v = getattr(self, k)
@@ -250,9 +261,7 @@ class Results(SimpleClass):
         return annotator.result()
 
     def verbose(self):
-        """
-        Return log string for each task.
-        """
+        """Return log string for each task."""
         log_string = ''
         probs = self.probs
         boxes = self.boxes
@@ -537,6 +546,7 @@ class Probs(BaseTensor):
     """
 
     def __init__(self, probs, orig_shape=None) -> None:
+        """Initialize the Probs class with classification probabilities and optional original shape of the image."""
         super().__init__(probs, orig_shape)
 
     @property
diff --git a/ultralytics/engine/trainer.py b/ultralytics/engine/trainer.py
index 38df2673..6905847a 100644
--- a/ultralytics/engine/trainer.py
+++ b/ultralytics/engine/trainer.py
@@ -1,6 +1,6 @@
 # Ultralytics YOLO 🚀, AGPL-3.0 license
 """
-Train a model on a dataset
+Train a model on a dataset.
 
 Usage:
     $ yolo mode=train model=yolov8n.pt data=coco128.yaml imgsz=640 epochs=100 batch=16
@@ -37,7 +37,7 @@ from ultralytics.utils.torch_utils import (EarlyStopping, ModelEMA, de_parallel,
 
 class BaseTrainer:
     """
-    BaseTrainer
+    BaseTrainer.
 
     A base class for creating trainers.
 
@@ -143,15 +143,11 @@ class BaseTrainer:
             callbacks.add_integration_callbacks(self)
 
     def add_callback(self, event: str, callback):
-        """
-        Appends the given callback.
-        """
+        """Appends the given callback."""
         self.callbacks[event].append(callback)
 
     def set_callback(self, event: str, callback):
-        """
-        Overrides the existing callbacks with the given callback.
-        """
+        """Overrides the existing callbacks with the given callback."""
         self.callbacks[event] = [callback]
 
     def run_callbacks(self, event: str):
@@ -207,9 +203,7 @@ class BaseTrainer:
             world_size=world_size)
 
     def _setup_train(self, world_size):
-        """
-        Builds dataloaders and optimizer on correct rank process.
-        """
+        """Builds dataloaders and optimizer on correct rank process."""
 
         # Model
         self.run_callbacks('on_pretrain_routine_start')
@@ -450,14 +444,14 @@ class BaseTrainer:
     @staticmethod
     def get_dataset(data):
         """
-        Get train, val path from data dict if it exists. Returns None if data format is not recognized.
+        Get train, val path from data dict if it exists.
+
+        Returns None if data format is not recognized.
         """
         return data['train'], data.get('val') or data.get('test')
 
     def setup_model(self):
-        """
-        load/create/download model for any task.
-        """
+        """Load/create/download model for any task."""
         if isinstance(self.model, torch.nn.Module):  # if model is loaded beforehand. No setup needed
             return
 
@@ -482,14 +476,14 @@ class BaseTrainer:
             self.ema.update(self.model)
 
     def preprocess_batch(self, batch):
-        """
-        Allows custom preprocessing model inputs and ground truths depending on task type.
-        """
+        """Allows custom preprocessing model inputs and ground truths depending on task type."""
         return batch
 
     def validate(self):
         """
-        Runs validation on test set using self.validator. The returned dict is expected to contain "fitness" key.
+        Runs validation on test set using self.validator.
+
+        The returned dict is expected to contain "fitness" key.
         """
         metrics = self.validator(self)
         fitness = metrics.pop('fitness', -self.loss.detach().cpu().numpy())  # use loss as fitness measure if not found
@@ -506,26 +500,20 @@ class BaseTrainer:
         raise NotImplementedError('get_validator function not implemented in trainer')
 
     def get_dataloader(self, dataset_path, batch_size=16, rank=0, mode='train'):
-        """
-        Returns dataloader derived from torch.data.Dataloader.
-        """
+        """Returns dataloader derived from torch.data.Dataloader."""
         raise NotImplementedError('get_dataloader function not implemented in trainer')
 
     def build_dataset(self, img_path, mode='train', batch=None):
-        """Build dataset"""
+        """Build dataset."""
         raise NotImplementedError('build_dataset function not implemented in trainer')
 
     def label_loss_items(self, loss_items=None, prefix='train'):
-        """
-        Returns a loss dict with labelled training loss items tensor
-        """
+        """Returns a loss dict with labelled training loss items tensor."""
         # Not needed for classification but necessary for segmentation & detection
         return {'loss': loss_items} if loss_items is not None else ['loss']
 
     def set_model_attributes(self):
-        """
-        To set or update model parameters before training.
-        """
+        """To set or update model parameters before training."""
         self.model.names = self.data['names']
 
     def build_targets(self, preds, targets):
@@ -632,8 +620,8 @@ class BaseTrainer:
 
     def build_optimizer(self, model, name='auto', lr=0.001, momentum=0.9, decay=1e-5, iterations=1e5):
         """
-        Constructs an optimizer for the given model, based on the specified optimizer name, learning rate,
-        momentum, weight decay, and number of iterations.
+        Constructs an optimizer for the given model, based on the specified optimizer name, learning rate, momentum,
+        weight decay, and number of iterations.
 
         Args:
             model (torch.nn.Module): The model for which to build an optimizer.
diff --git a/ultralytics/engine/tuner.py b/ultralytics/engine/tuner.py
index d60a56c2..6d6e0a86 100644
--- a/ultralytics/engine/tuner.py
+++ b/ultralytics/engine/tuner.py
@@ -31,32 +31,32 @@ from ultralytics.utils.plotting import plot_tune_results
 
 class Tuner:
     """
-     Class responsible for hyperparameter tuning of YOLO models.
+    Class responsible for hyperparameter tuning of YOLO models.
 
-     The class evolves YOLO model hyperparameters over a given number of iterations
-     by mutating them according to the search space and retraining the model to evaluate their performance.
+    The class evolves YOLO model hyperparameters over a given number of iterations
+    by mutating them according to the search space and retraining the model to evaluate their performance.
 
-     Attributes:
-         space (dict): Hyperparameter search space containing bounds and scaling factors for mutation.
-         tune_dir (Path): Directory where evolution logs and results will be saved.
-         tune_csv (Path): Path to the CSV file where evolution logs are saved.
+    Attributes:
+        space (dict): Hyperparameter search space containing bounds and scaling factors for mutation.
+        tune_dir (Path): Directory where evolution logs and results will be saved.
+        tune_csv (Path): Path to the CSV file where evolution logs are saved.
 
-     Methods:
-         _mutate(hyp: dict) -> dict:
-             Mutates the given hyperparameters within the bounds specified in `self.space`.
+    Methods:
+        _mutate(hyp: dict) -> dict:
+            Mutates the given hyperparameters within the bounds specified in `self.space`.
 
-         __call__():
-             Executes the hyperparameter evolution across multiple iterations.
+        __call__():
+            Executes the hyperparameter evolution across multiple iterations.
 
-     Example:
-         Tune hyperparameters for YOLOv8n on COCO8 at imgsz=640 and epochs=30 for 300 tuning iterations.
-         ```python
-         from ultralytics import YOLO
+    Example:
+        Tune hyperparameters for YOLOv8n on COCO8 at imgsz=640 and epochs=30 for 300 tuning iterations.
+        ```python
+        from ultralytics import YOLO
 
-         model = YOLO('yolov8n.pt')
-         model.tune(data='coco8.yaml', epochs=10, iterations=300, optimizer='AdamW', plots=False, save=False, val=False)
-         ```
-     """
+        model = YOLO('yolov8n.pt')
+        model.tune(data='coco8.yaml', epochs=10, iterations=300, optimizer='AdamW', plots=False, save=False, val=False)
+        ```
+    """
 
     def __init__(self, args=DEFAULT_CFG, _callbacks=None):
         """
diff --git a/ultralytics/engine/validator.py b/ultralytics/engine/validator.py
index 8d8349bd..5ad69546 100644
--- a/ultralytics/engine/validator.py
+++ b/ultralytics/engine/validator.py
@@ -36,7 +36,7 @@ from ultralytics.utils.torch_utils import de_parallel, select_device, smart_infe
 
 class BaseValidator:
     """
-    BaseValidator
+    BaseValidator.
 
     A base class for creating validators.
 
@@ -102,8 +102,7 @@ class BaseValidator:
 
     @smart_inference_mode()
     def __call__(self, trainer=None, model=None):
-        """
-        Supports validation of a pre-trained model if passed or a model being trained if trainer is passed (trainer
+        """Supports validation of a pre-trained model if passed or a model being trained if trainer is passed (trainer
         gets priority).
         """
         self.training = trainer is not None
@@ -260,7 +259,7 @@ class BaseValidator:
         raise NotImplementedError('get_dataloader function not implemented for this validator')
 
     def build_dataset(self, img_path):
-        """Build dataset"""
+        """Build dataset."""
         raise NotImplementedError('build_dataset function not implemented in validator')
 
     def preprocess(self, batch):
diff --git a/ultralytics/hub/__init__.py b/ultralytics/hub/__init__.py
index daed439c..8e101d6b 100644
--- a/ultralytics/hub/__init__.py
+++ b/ultralytics/hub/__init__.py
@@ -80,8 +80,8 @@ def get_export(model_id='', format='torchscript'):
 
 def check_dataset(path='', task='detect'):
     """
-    Function for error-checking HUB dataset Zip file before upload. It checks a dataset for errors before it is
-    uploaded to the HUB. Usage examples are given below.
+    Function for error-checking HUB dataset Zip file before upload. It checks a dataset for errors before it is uploaded
+    to the HUB. Usage examples are given below.
 
     Args:
         path (str, optional): Path to data.zip (with data.yaml inside data.zip). Defaults to ''.
diff --git a/ultralytics/hub/auth.py b/ultralytics/hub/auth.py
index 9963d79c..deea9a32 100644
--- a/ultralytics/hub/auth.py
+++ b/ultralytics/hub/auth.py
@@ -9,6 +9,19 @@ API_KEY_URL = f'{HUB_WEB_ROOT}/settings?tab=api+keys'
 
 
 class Auth:
+    """
+    Manages authentication processes including API key handling, cookie-based authentication, and header generation.
+
+    The class supports different methods of authentication:
+    1. Directly using an API key.
+    2. Authenticating using browser cookies (specifically in Google Colab).
+    3. Prompting the user to enter an API key.
+
+    Attributes:
+        id_token (str or bool): Token used for identity verification, initialized as False.
+        api_key (str or bool): API key for authentication, initialized as False.
+        model_key (bool): Placeholder for model key, initialized as False.
+    """
     id_token = api_key = model_key = False
 
     def __init__(self, api_key='', verbose=False):
@@ -54,7 +67,9 @@ class Auth:
 
     def request_api_key(self, max_attempts=3):
         """
-        Prompt the user to input their API key. Returns the model ID.
+        Prompt the user to input their API key.
+
+        Returns the model ID.
         """
         import getpass
         for attempts in range(max_attempts):
@@ -86,8 +101,8 @@ class Auth:
 
     def auth_with_cookies(self) -> bool:
         """
-        Attempt to fetch authentication via cookies and set id_token.
-        User must be logged in to HUB and running in a supported browser.
+        Attempt to fetch authentication via cookies and set id_token. User must be logged in to HUB and running in a
+        supported browser.
 
         Returns:
             bool: True if authentication is successful, False otherwise.
diff --git a/ultralytics/hub/session.py b/ultralytics/hub/session.py
index 57c55f80..9870ca16 100644
--- a/ultralytics/hub/session.py
+++ b/ultralytics/hub/session.py
@@ -84,6 +84,7 @@ class HUBTrainingSession:
     def _handle_signal(self, signum, frame):
         """
         Handle kill signals and prevent heartbeats from being sent on Colab after termination.
+
         This method does not use frame, it is included as it is passed by signal.
         """
         if self.alive is True:
diff --git a/ultralytics/hub/utils.py b/ultralytics/hub/utils.py
index 3ca954e6..f2621d7a 100644
--- a/ultralytics/hub/utils.py
+++ b/ultralytics/hub/utils.py
@@ -161,9 +161,7 @@ class Events:
     url = 'https://www.google-analytics.com/mp/collect?measurement_id=G-X8NCJYTQXM&api_secret=QLQrATrNSwGRFRLE-cbHJw'
 
     def __init__(self):
-        """
-        Initializes the Events object with default values for events, rate_limit, and metadata.
-        """
+        """Initializes the Events object with default values for events, rate_limit, and metadata."""
         self.events = []  # events list
         self.rate_limit = 60.0  # rate limit (seconds)
         self.t = 0.0  # rate limit timer (seconds)
diff --git a/ultralytics/models/fastsam/model.py b/ultralytics/models/fastsam/model.py
index c1895fc6..e6475faa 100644
--- a/ultralytics/models/fastsam/model.py
+++ b/ultralytics/models/fastsam/model.py
@@ -22,7 +22,7 @@ class FastSAM(Model):
     """
 
     def __init__(self, model='FastSAM-x.pt'):
-        """Call the __init__ method of the parent class (YOLO) with the updated default model"""
+        """Call the __init__ method of the parent class (YOLO) with the updated default model."""
         if str(model) == 'FastSAM.pt':
             model = 'FastSAM-x.pt'
         assert Path(model).suffix not in ('.yaml', '.yml'), 'FastSAM models only support pre-trained models.'
@@ -30,4 +30,5 @@ class FastSAM(Model):
 
     @property
     def task_map(self):
+        """Returns a dictionary mapping segment task to corresponding predictor and validator classes."""
         return {'segment': {'predictor': FastSAMPredictor, 'validator': FastSAMValidator}}
diff --git a/ultralytics/models/fastsam/predict.py b/ultralytics/models/fastsam/predict.py
index 4eac69f9..b64d2d6e 100644
--- a/ultralytics/models/fastsam/predict.py
+++ b/ultralytics/models/fastsam/predict.py
@@ -11,10 +11,12 @@ from ultralytics.utils import DEFAULT_CFG, ops
 class FastSAMPredictor(DetectionPredictor):
 
     def __init__(self, cfg=DEFAULT_CFG, overrides=None, _callbacks=None):
+        """Initializes FastSAMPredictor class by inheriting from DetectionPredictor and setting task to 'segment'."""
         super().__init__(cfg, overrides, _callbacks)
         self.args.task = 'segment'
 
     def postprocess(self, preds, img, orig_imgs):
+        """Postprocesses the predictions, applies non-max suppression, scales the boxes, and returns the results."""
         p = ops.non_max_suppression(
             preds[0],
             self.args.conf,
diff --git a/ultralytics/models/fastsam/prompt.py b/ultralytics/models/fastsam/prompt.py
index 97ab46c3..5eb581e9 100644
--- a/ultralytics/models/fastsam/prompt.py
+++ b/ultralytics/models/fastsam/prompt.py
@@ -15,6 +15,7 @@ from ultralytics.utils import TQDM
 class FastSAMPrompt:
 
     def __init__(self, source, results, device='cuda') -> None:
+        """Initializes FastSAMPrompt with given source, results and device, and assigns clip for linear assignment."""
         self.device = device
         self.results = results
         self.source = source
@@ -30,6 +31,7 @@ class FastSAMPrompt:
 
     @staticmethod
     def _segment_image(image, bbox):
+        """Segments the given image according to the provided bounding box coordinates."""
         image_array = np.array(image)
         segmented_image_array = np.zeros_like(image_array)
         x1, y1, x2, y2 = bbox
@@ -45,6 +47,9 @@ class FastSAMPrompt:
 
     @staticmethod
     def _format_results(result, filter=0):
+        """Formats detection results into list of annotations each containing ID, segmentation, bounding box, score and
+        area.
+        """
         annotations = []
         n = len(result.masks.data) if result.masks is not None else 0
         for i in range(n):
@@ -61,6 +66,9 @@ class FastSAMPrompt:
 
     @staticmethod
     def _get_bbox_from_mask(mask):
+        """Applies morphological transformations to the mask, displays it, and if with_contours is True, draws
+        contours.
+        """
         mask = mask.astype(np.uint8)
         contours, hierarchy = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
         x1, y1, w, h = cv2.boundingRect(contours[0])
@@ -195,6 +203,7 @@ class FastSAMPrompt:
 
     @torch.no_grad()
     def retrieve(self, model, preprocess, elements, search_text: str, device) -> int:
+        """Processes images and text with a model, calculates similarity, and returns softmax score."""
         preprocessed_images = [preprocess(image).to(device) for image in elements]
         tokenized_text = self.clip.tokenize([search_text]).to(device)
         stacked_images = torch.stack(preprocessed_images)
@@ -206,6 +215,7 @@ class FastSAMPrompt:
         return probs[:, 0].softmax(dim=0)
 
     def _crop_image(self, format_results):
+        """Crops an image based on provided annotation format and returns cropped images and related data."""
         if os.path.isdir(self.source):
             raise ValueError(f"'{self.source}' is a directory, not a valid source for this function.")
         image = Image.fromarray(cv2.cvtColor(self.results[0].orig_img, cv2.COLOR_BGR2RGB))
@@ -229,6 +239,7 @@ class FastSAMPrompt:
         return cropped_boxes, cropped_images, not_crop, filter_id, annotations
 
     def box_prompt(self, bbox):
+        """Modifies the bounding box properties and calculates IoU between masks and bounding box."""
         if self.results[0].masks is not None:
             assert (bbox[2] != 0 and bbox[3] != 0)
             if os.path.isdir(self.source):
@@ -261,7 +272,8 @@ class FastSAMPrompt:
             self.results[0].masks.data = torch.tensor(np.array([masks[max_iou_index].cpu().numpy()]))
         return self.results
 
-    def point_prompt(self, points, pointlabel):  # numpy 处理
+    def point_prompt(self, points, pointlabel):  # numpy
+        """Adjusts points on detected masks based on user input and returns the modified results."""
         if self.results[0].masks is not None:
             if os.path.isdir(self.source):
                 raise ValueError(f"'{self.source}' is a directory, not a valid source for this function.")
@@ -284,6 +296,7 @@ class FastSAMPrompt:
         return self.results
 
     def text_prompt(self, text):
+        """Processes a text prompt, applies it to existing results and returns the updated results."""
         if self.results[0].masks is not None:
             format_results = self._format_results(self.results[0], 0)
             cropped_boxes, cropped_images, not_crop, filter_id, annotations = self._crop_image(format_results)
@@ -296,4 +309,5 @@ class FastSAMPrompt:
         return self.results
 
     def everything_prompt(self):
+        """Returns the processed results from the previous methods in the class."""
         return self.results
diff --git a/ultralytics/models/nas/model.py b/ultralytics/models/nas/model.py
index f848cc4b..9a770c4c 100644
--- a/ultralytics/models/nas/model.py
+++ b/ultralytics/models/nas/model.py
@@ -25,12 +25,13 @@ from .val import NASValidator
 class NAS(Model):
 
     def __init__(self, model='yolo_nas_s.pt') -> None:
+        """Initializes the NAS model with the provided or default 'yolo_nas_s.pt' model."""
         assert Path(model).suffix not in ('.yaml', '.yml'), 'YOLO-NAS models only support pre-trained models.'
         super().__init__(model, task='detect')
 
     @smart_inference_mode()
     def _load(self, weights: str, task: str):
-        # Load or create new NAS model
+        """Loads an existing NAS model weights or creates a new NAS model with pretrained weights if not provided."""
         import super_gradients
         suffix = Path(weights).suffix
         if suffix == '.pt':
@@ -58,4 +59,5 @@ class NAS(Model):
 
     @property
     def task_map(self):
+        """Returns a dictionary mapping tasks to respective predictor and validator classes."""
         return {'detect': {'predictor': NASPredictor, 'validator': NASValidator}}
diff --git a/ultralytics/models/rtdetr/model.py b/ultralytics/models/rtdetr/model.py
index c20d72f6..fa7d484e 100644
--- a/ultralytics/models/rtdetr/model.py
+++ b/ultralytics/models/rtdetr/model.py
@@ -1,7 +1,5 @@
 # Ultralytics YOLO 🚀, AGPL-3.0 license
-"""
-RT-DETR model interface
-"""
+"""RT-DETR model interface."""
 from ultralytics.engine.model import Model
 from ultralytics.nn.tasks import RTDETRDetectionModel
 
@@ -11,17 +9,17 @@ from .val import RTDETRValidator
 
 
 class RTDETR(Model):
-    """
-    RTDETR model interface.
-    """
+    """RTDETR model interface."""
 
     def __init__(self, model='rtdetr-l.pt') -> None:
+        """Initializes the RTDETR model with the given model file, defaulting to 'rtdetr-l.pt'."""
         if model and model.split('.')[-1] not in ('pt', 'yaml', 'yml'):
             raise NotImplementedError('RT-DETR only supports creating from *.pt file or *.yaml file.')
         super().__init__(model=model, task='detect')
 
     @property
     def task_map(self):
+        """Returns a dictionary mapping task names to corresponding Ultralytics task classes for RTDETR model."""
         return {
             'detect': {
                 'predictor': RTDETRPredictor,
diff --git a/ultralytics/models/rtdetr/predict.py b/ultralytics/models/rtdetr/predict.py
index 33d5d7a2..1a2b0cbc 100644
--- a/ultralytics/models/rtdetr/predict.py
+++ b/ultralytics/models/rtdetr/predict.py
@@ -48,7 +48,8 @@ class RTDETRPredictor(BasePredictor):
         return results
 
     def pre_transform(self, im):
-        """Pre-transform input image before inference.
+        """
+        Pre-transform input image before inference.
 
         Args:
             im (List(np.ndarray)): (N, 3, h, w) for tensor, [(h, w, 3) x N] for list.
diff --git a/ultralytics/models/rtdetr/train.py b/ultralytics/models/rtdetr/train.py
index 1e586683..91d4729e 100644
--- a/ultralytics/models/rtdetr/train.py
+++ b/ultralytics/models/rtdetr/train.py
@@ -37,7 +37,8 @@ class RTDETRTrainer(DetectionTrainer):
         return model
 
     def build_dataset(self, img_path, mode='val', batch=None):
-        """Build RTDETR Dataset
+        """
+        Build RTDETR Dataset.
 
         Args:
             img_path (str): Path to the folder containing images.
diff --git a/ultralytics/models/rtdetr/val.py b/ultralytics/models/rtdetr/val.py
index 9b984bed..d8e5fb69 100644
--- a/ultralytics/models/rtdetr/val.py
+++ b/ultralytics/models/rtdetr/val.py
@@ -16,6 +16,7 @@ __all__ = 'RTDETRValidator',  # tuple or list
 class RTDETRDataset(YOLODataset):
 
     def __init__(self, *args, data=None, **kwargs):
+        """Initialize the RTDETRDataset class by inheriting from the YOLODataset class."""
         super().__init__(*args, data=data, use_segments=False, use_keypoints=False, **kwargs)
 
     # NOTE: add stretch version load_image for rtdetr mosaic
diff --git a/ultralytics/models/sam/amg.py b/ultralytics/models/sam/amg.py
index f251fe4e..d7751d6f 100644
--- a/ultralytics/models/sam/amg.py
+++ b/ultralytics/models/sam/amg.py
@@ -32,9 +32,10 @@ def batch_iterator(batch_size: int, *args) -> Generator[List[Any], None, None]:
 
 def calculate_stability_score(masks: torch.Tensor, mask_threshold: float, threshold_offset: float) -> torch.Tensor:
     """
-    Computes the stability score for a batch of masks. The stability
-    score is the IoU between the binary masks obtained by thresholding
-    the predicted mask logits at high and low values.
+    Computes the stability score for a batch of masks.
+
+    The stability score is the IoU between the binary masks obtained by thresholding the predicted mask logits at high
+    and low values.
     """
     # One mask is always contained inside the other.
     # Save memory by preventing unnecessary cast to torch.int64
@@ -60,7 +61,11 @@ def build_all_layer_point_grids(n_per_side: int, n_layers: int, scale_per_layer:
 
 def generate_crop_boxes(im_size: Tuple[int, ...], n_layers: int,
                         overlap_ratio: float) -> Tuple[List[List[int]], List[int]]:
-    """Generates a list of crop boxes of different sizes. Each layer has (2**i)**2 boxes for the ith layer."""
+    """
+    Generates a list of crop boxes of different sizes.
+
+    Each layer has (2**i)**2 boxes for the ith layer.
+    """
     crop_boxes, layer_idxs = [], []
     im_h, im_w = im_size
     short_side = min(im_h, im_w)
@@ -145,8 +150,9 @@ def remove_small_regions(mask: np.ndarray, area_thresh: float, mode: str) -> Tup
 
 def batched_mask_to_box(masks: torch.Tensor) -> torch.Tensor:
     """
-    Calculates boxes in XYXY format around masks. Return [0,0,0,0] for
-    an empty mask. For input shape C1xC2x...xHxW, the output shape is C1xC2x...x4.
+    Calculates boxes in XYXY format around masks.
+
+    Return [0,0,0,0] for an empty mask. For input shape C1xC2x...xHxW, the output shape is C1xC2x...x4.
     """
     # torch.max below raises an error on empty inputs, just skip in this case
     if torch.numel(masks) == 0:
diff --git a/ultralytics/models/sam/model.py b/ultralytics/models/sam/model.py
index 2ca35011..8a140b3f 100644
--- a/ultralytics/models/sam/model.py
+++ b/ultralytics/models/sam/model.py
@@ -1,7 +1,5 @@
 # Ultralytics YOLO 🚀, AGPL-3.0 license
-"""
-SAM model interface
-"""
+"""SAM model interface."""
 
 from pathlib import Path
 
@@ -13,16 +11,16 @@ from .predict import Predictor
 
 
 class SAM(Model):
-    """
-    SAM model interface.
-    """
+    """SAM model interface."""
 
     def __init__(self, model='sam_b.pt') -> None:
+        """Initializes the SAM model instance with the specified pre-trained model file."""
         if model and Path(model).suffix not in ('.pt', '.pth'):
             raise NotImplementedError('SAM prediction requires pre-trained *.pt or *.pth model.')
         super().__init__(model=model, task='segment')
 
     def _load(self, weights: str, task=None):
+        """Loads the provided weights into the SAM model."""
         self.model = build_sam(weights)
 
     def predict(self, source, stream=False, bboxes=None, points=None, labels=None, **kwargs):
@@ -48,4 +46,5 @@ class SAM(Model):
 
     @property
     def task_map(self):
+        """Returns a dictionary mapping the 'segment' task to its corresponding 'Predictor'."""
         return {'segment': {'predictor': Predictor}}
diff --git a/ultralytics/models/sam/modules/decoders.py b/ultralytics/models/sam/modules/decoders.py
index 0c64a7e4..a9a3a319 100644
--- a/ultralytics/models/sam/modules/decoders.py
+++ b/ultralytics/models/sam/modules/decoders.py
@@ -98,7 +98,11 @@ class MaskDecoder(nn.Module):
         sparse_prompt_embeddings: torch.Tensor,
         dense_prompt_embeddings: torch.Tensor,
     ) -> Tuple[torch.Tensor, torch.Tensor]:
-        """Predicts masks. See 'forward' for more details."""
+        """
+        Predicts masks.
+
+        See 'forward' for more details.
+        """
         # Concatenate output tokens
         output_tokens = torch.cat([self.iou_token.weight, self.mask_tokens.weight], dim=0)
         output_tokens = output_tokens.unsqueeze(0).expand(sparse_prompt_embeddings.size(0), -1, -1)
diff --git a/ultralytics/models/sam/modules/encoders.py b/ultralytics/models/sam/modules/encoders.py
index eb9352f9..b4c07744 100644
--- a/ultralytics/models/sam/modules/encoders.py
+++ b/ultralytics/models/sam/modules/encoders.py
@@ -100,6 +100,9 @@ class ImageEncoderViT(nn.Module):
         )
 
     def forward(self, x: torch.Tensor) -> torch.Tensor:
+        """Processes input through patch embedding, applies positional embedding if present, and passes through blocks
+        and neck.
+        """
         x = self.patch_embed(x)
         if self.pos_embed is not None:
             x = x + self.pos_embed
@@ -157,8 +160,8 @@ class PromptEncoder(nn.Module):
 
     def get_dense_pe(self) -> torch.Tensor:
         """
-        Returns the positional encoding used to encode point prompts,
-        applied to a dense set of points the shape of the image encoding.
+        Returns the positional encoding used to encode point prompts, applied to a dense set of points the shape of the
+        image encoding.
 
         Returns:
           torch.Tensor: Positional encoding with shape 1x(embed_dim)x(embedding_h)x(embedding_w)
@@ -204,9 +207,7 @@ class PromptEncoder(nn.Module):
         boxes: Optional[torch.Tensor],
         masks: Optional[torch.Tensor],
     ) -> int:
-        """
-        Gets the batch size of the output given the batch size of the input prompts.
-        """
+        """Gets the batch size of the output given the batch size of the input prompts."""
         if points is not None:
             return points[0].shape[0]
         elif boxes is not None:
@@ -217,6 +218,7 @@ class PromptEncoder(nn.Module):
             return 1
 
     def _get_device(self) -> torch.device:
+        """Returns the device of the first point embedding's weight tensor."""
         return self.point_embeddings[0].weight.device
 
     def forward(
@@ -259,11 +261,10 @@ class PromptEncoder(nn.Module):
 
 
 class PositionEmbeddingRandom(nn.Module):
-    """
-    Positional encoding using random spatial frequencies.
-    """
+    """Positional encoding using random spatial frequencies."""
 
     def __init__(self, num_pos_feats: int = 64, scale: Optional[float] = None) -> None:
+        """Initializes a position embedding using random spatial frequencies."""
         super().__init__()
         if scale is None or scale <= 0.0:
             scale = 1.0
@@ -304,7 +305,7 @@ class PositionEmbeddingRandom(nn.Module):
 
 
 class Block(nn.Module):
-    """Transformer blocks with support of window attention and residual propagation blocks"""
+    """Transformer blocks with support of window attention and residual propagation blocks."""
 
     def __init__(
         self,
@@ -351,6 +352,7 @@ class Block(nn.Module):
         self.window_size = window_size
 
     def forward(self, x: torch.Tensor) -> torch.Tensor:
+        """Executes a forward pass through the transformer block with window attention and non-overlapping windows."""
         shortcut = x
         x = self.norm1(x)
         # Window partition
@@ -404,6 +406,7 @@ class Attention(nn.Module):
             self.rel_pos_w = nn.Parameter(torch.zeros(2 * input_size[1] - 1, head_dim))
 
     def forward(self, x: torch.Tensor) -> torch.Tensor:
+        """Applies the forward operation including attention, normalization, MLP, and indexing within window limits."""
         B, H, W, _ = x.shape
         # qkv with shape (3, B, nHead, H * W, C)
         qkv = self.qkv(x).reshape(B, H * W, 3, self.num_heads, -1).permute(2, 0, 3, 1, 4)
@@ -448,6 +451,7 @@ def window_unpartition(windows: torch.Tensor, window_size: int, pad_hw: Tuple[in
                        hw: Tuple[int, int]) -> torch.Tensor:
     """
     Window unpartition into original sequences and removing padding.
+
     Args:
         windows (tensor): input tokens with [B * num_windows, window_size, window_size, C].
         window_size (int): window size.
@@ -540,9 +544,7 @@ def add_decomposed_rel_pos(
 
 
 class PatchEmbed(nn.Module):
-    """
-    Image to Patch Embedding.
-    """
+    """Image to Patch Embedding."""
 
     def __init__(
             self,
@@ -565,4 +567,5 @@ class PatchEmbed(nn.Module):
         self.proj = nn.Conv2d(in_chans, embed_dim, kernel_size=kernel_size, stride=stride, padding=padding)
 
     def forward(self, x: torch.Tensor) -> torch.Tensor:
+        """Computes patch embedding by applying convolution and transposing resulting tensor."""
         return self.proj(x).permute(0, 2, 3, 1)  # B C H W -> B H W C
diff --git a/ultralytics/models/sam/modules/tiny_encoder.py b/ultralytics/models/sam/modules/tiny_encoder.py
index ca8de50b..d96b3032 100644
--- a/ultralytics/models/sam/modules/tiny_encoder.py
+++ b/ultralytics/models/sam/modules/tiny_encoder.py
@@ -23,6 +23,9 @@ from ultralytics.utils.instance import to_2tuple
 class Conv2d_BN(torch.nn.Sequential):
 
     def __init__(self, a, b, ks=1, stride=1, pad=0, dilation=1, groups=1, bn_weight_init=1):
+        """Initializes the MBConv model with given input channels, output channels, expansion ratio, activation, and
+        drop path.
+        """
         super().__init__()
         self.add_module('c', torch.nn.Conv2d(a, b, ks, stride, pad, dilation, groups, bias=False))
         bn = torch.nn.BatchNorm2d(b)
@@ -34,6 +37,9 @@ class Conv2d_BN(torch.nn.Sequential):
 class PatchEmbed(nn.Module):
 
     def __init__(self, in_chans, embed_dim, resolution, activation):
+        """Initialize the PatchMerging class with specified input, output dimensions, resolution and activation
+        function.
+        """
         super().__init__()
         img_size: Tuple[int, int] = to_2tuple(resolution)
         self.patches_resolution = (img_size[0] // 4, img_size[1] // 4)
@@ -48,12 +54,16 @@ class PatchEmbed(nn.Module):
         )
 
     def forward(self, x):
+        """Runs input tensor 'x' through the PatchMerging model's sequence of operations."""
         return self.seq(x)
 
 
 class MBConv(nn.Module):
 
     def __init__(self, in_chans, out_chans, expand_ratio, activation, drop_path):
+        """Initializes a convolutional layer with specified dimensions, input resolution, depth, and activation
+        function.
+        """
         super().__init__()
         self.in_chans = in_chans
         self.hidden_chans = int(in_chans * expand_ratio)
@@ -73,6 +83,7 @@ class MBConv(nn.Module):
         self.drop_path = nn.Identity()
 
     def forward(self, x):
+        """Implements the forward pass for the model architecture."""
         shortcut = x
         x = self.conv1(x)
         x = self.act1(x)
@@ -87,6 +98,9 @@ class MBConv(nn.Module):
 class PatchMerging(nn.Module):
 
     def __init__(self, input_resolution, dim, out_dim, activation):
+        """Initializes the ConvLayer with specific dimension, input resolution, depth, activation, drop path, and other
+        optional parameters.
+        """
         super().__init__()
 
         self.input_resolution = input_resolution
@@ -99,6 +113,7 @@ class PatchMerging(nn.Module):
         self.conv3 = Conv2d_BN(out_dim, out_dim, 1, 1, 0)
 
     def forward(self, x):
+        """Applies forward pass on the input utilizing convolution and activation layers, and returns the result."""
         if x.ndim == 3:
             H, W = self.input_resolution
             B = len(x)
@@ -149,6 +164,7 @@ class ConvLayer(nn.Module):
             input_resolution, dim=dim, out_dim=out_dim, activation=activation)
 
     def forward(self, x):
+        """Processes the input through a series of convolutional layers and returns the activated output."""
         for blk in self.blocks:
             x = checkpoint.checkpoint(blk, x) if self.use_checkpoint else blk(x)
         return x if self.downsample is None else self.downsample(x)
@@ -157,6 +173,7 @@ class ConvLayer(nn.Module):
 class Mlp(nn.Module):
 
     def __init__(self, in_features, hidden_features=None, out_features=None, act_layer=nn.GELU, drop=0.):
+        """Initializes Attention module with the given parameters including dimension, key_dim, number of heads, etc."""
         super().__init__()
         out_features = out_features or in_features
         hidden_features = hidden_features or in_features
@@ -167,6 +184,7 @@ class Mlp(nn.Module):
         self.drop = nn.Dropout(drop)
 
     def forward(self, x):
+        """Applies operations on input x and returns modified x, runs downsample if not None."""
         x = self.norm(x)
         x = self.fc1(x)
         x = self.act(x)
@@ -216,6 +234,7 @@ class Attention(torch.nn.Module):
 
     @torch.no_grad()
     def train(self, mode=True):
+        """Sets the module in training mode and handles attribute 'ab' based on the mode."""
         super().train(mode)
         if mode and hasattr(self, 'ab'):
             del self.ab
@@ -298,6 +317,9 @@ class TinyViTBlock(nn.Module):
         self.local_conv = Conv2d_BN(dim, dim, ks=local_conv_size, stride=1, pad=pad, groups=dim)
 
     def forward(self, x):
+        """Applies attention-based transformation or padding to input 'x' before passing it through a local
+        convolution.
+        """
         H, W = self.input_resolution
         B, L, C = x.shape
         assert L == H * W, 'input feature has wrong size'
@@ -337,6 +359,9 @@ class TinyViTBlock(nn.Module):
         return x + self.drop_path(self.mlp(x))
 
     def extra_repr(self) -> str:
+        """Returns a formatted string representing the TinyViTBlock's parameters: dimension, input resolution, number of
+        attentions heads, window size, and MLP ratio.
+        """
         return f'dim={self.dim}, input_resolution={self.input_resolution}, num_heads={self.num_heads}, ' \
                f'window_size={self.window_size}, mlp_ratio={self.mlp_ratio}'
 
@@ -402,23 +427,28 @@ class BasicLayer(nn.Module):
             input_resolution, dim=dim, out_dim=out_dim, activation=activation)
 
     def forward(self, x):
+        """Performs forward propagation on the input tensor and returns a normalized tensor."""
         for blk in self.blocks:
             x = checkpoint.checkpoint(blk, x) if self.use_checkpoint else blk(x)
         return x if self.downsample is None else self.downsample(x)
 
     def extra_repr(self) -> str:
+        """Returns a string representation of the extra_repr function with the layer's parameters."""
         return f'dim={self.dim}, input_resolution={self.input_resolution}, depth={self.depth}'
 
 
 class LayerNorm2d(nn.Module):
+    """A PyTorch implementation of Layer Normalization in 2D."""
 
     def __init__(self, num_channels: int, eps: float = 1e-6) -> None:
+        """Initialize LayerNorm2d with the number of channels and an optional epsilon."""
         super().__init__()
         self.weight = nn.Parameter(torch.ones(num_channels))
         self.bias = nn.Parameter(torch.zeros(num_channels))
         self.eps = eps
 
     def forward(self, x: torch.Tensor) -> torch.Tensor:
+        """Perform a forward pass, normalizing the input tensor."""
         u = x.mean(1, keepdim=True)
         s = (x - u).pow(2).mean(1, keepdim=True)
         x = (x - u) / torch.sqrt(s + self.eps)
@@ -518,6 +548,7 @@ class TinyViT(nn.Module):
         )
 
     def set_layer_lr_decay(self, layer_lr_decay):
+        """Sets the learning rate decay for each layer in the TinyViT model."""
         decay_rate = layer_lr_decay
 
         # layers -> blocks (depth)
@@ -525,6 +556,7 @@ class TinyViT(nn.Module):
         lr_scales = [decay_rate ** (depth - i - 1) for i in range(depth)]
 
         def _set_lr_scale(m, scale):
+            """Sets the learning rate scale for each layer in the model based on the layer's depth."""
             for p in m.parameters():
                 p.lr_scale = scale
 
@@ -544,12 +576,14 @@ class TinyViT(nn.Module):
             p.param_name = k
 
         def _check_lr_scale(m):
+            """Checks if the learning rate scale attribute is present in module's parameters."""
             for p in m.parameters():
                 assert hasattr(p, 'lr_scale'), p.param_name
 
         self.apply(_check_lr_scale)
 
     def _init_weights(self, m):
+        """Initializes weights for linear layers and layer normalization in the given module."""
         if isinstance(m, nn.Linear):
             # NOTE: This initialization is needed only for training.
             # trunc_normal_(m.weight, std=.02)
@@ -561,11 +595,12 @@ class TinyViT(nn.Module):
 
     @torch.jit.ignore
     def no_weight_decay_keywords(self):
+        """Returns a dictionary of parameter names where weight decay should not be applied."""
         return {'attention_biases'}
 
     def forward_features(self, x):
-        # x: (N, C, H, W)
-        x = self.patch_embed(x)
+        """Runs the input through the model layers and returns the transformed output."""
+        x = self.patch_embed(x)  # x input is (N, C, H, W)
 
         x = self.layers[0](x)
         start_i = 1
@@ -579,4 +614,5 @@ class TinyViT(nn.Module):
         return self.neck(x)
 
     def forward(self, x):
+        """Executes a forward pass on the input tensor through the constructed model layers."""
         return self.forward_features(x)
diff --git a/ultralytics/models/sam/modules/transformer.py b/ultralytics/models/sam/modules/transformer.py
index f925538b..95a04666 100644
--- a/ultralytics/models/sam/modules/transformer.py
+++ b/ultralytics/models/sam/modules/transformer.py
@@ -21,8 +21,7 @@ class TwoWayTransformer(nn.Module):
         attention_downsample_rate: int = 2,
     ) -> None:
         """
-        A transformer decoder that attends to an input image using
-        queries whose positional embedding is supplied.
+        A transformer decoder that attends to an input image using queries whose positional embedding is supplied.
 
         Args:
           depth (int): number of layers in the transformer
@@ -171,8 +170,7 @@ class TwoWayAttentionBlock(nn.Module):
 
 
 class Attention(nn.Module):
-    """
-    An attention layer that allows for downscaling the size of the embedding after projection to queries, keys, and
+    """An attention layer that allows for downscaling the size of the embedding after projection to queries, keys, and
     values.
     """
 
diff --git a/ultralytics/models/sam/predict.py b/ultralytics/models/sam/predict.py
index e8a8197b..31e0da93 100644
--- a/ultralytics/models/sam/predict.py
+++ b/ultralytics/models/sam/predict.py
@@ -19,6 +19,7 @@ from .build import build_sam
 class Predictor(BasePredictor):
 
     def __init__(self, cfg=DEFAULT_CFG, overrides=None, _callbacks=None):
+        """Initializes the Predictor class with default or provided configuration, overrides, and callbacks."""
         if overrides is None:
             overrides = {}
         overrides.update(dict(task='segment', mode='predict', imgsz=1024))
@@ -34,7 +35,8 @@ class Predictor(BasePredictor):
         self.segment_all = False
 
     def preprocess(self, im):
-        """Prepares input image before inference.
+        """
+        Prepares input image before inference.
 
         Args:
             im (torch.Tensor | List(np.ndarray)): BCHW for tensor, [(HWC) x B] for list.
@@ -189,7 +191,8 @@ class Predictor(BasePredictor):
                  stability_score_thresh=0.95,
                  stability_score_offset=0.95,
                  crop_nms_thresh=0.7):
-        """Segment the whole image.
+        """
+        Segment the whole image.
 
         Args:
             im (torch.Tensor): The preprocessed image, (N, C, H, W).
@@ -360,14 +363,15 @@ class Predictor(BasePredictor):
         self.prompts = prompts
 
     def reset_image(self):
+        """Resets the image and its features to None."""
         self.im = None
         self.features = None
 
     @staticmethod
     def remove_small_regions(masks, min_area=0, nms_thresh=0.7):
         """
-        Removes small disconnected regions and holes in masks, then reruns
-        box NMS to remove any new duplicates. Requires open-cv as a dependency.
+        Removes small disconnected regions and holes in masks, then reruns box NMS to remove any new duplicates.
+        Requires open-cv as a dependency.
 
         Args:
             masks (torch.Tensor): Masks, (N, H, W).
diff --git a/ultralytics/models/utils/loss.py b/ultralytics/models/utils/loss.py
index 95406e1f..77eadce7 100644
--- a/ultralytics/models/utils/loss.py
+++ b/ultralytics/models/utils/loss.py
@@ -47,6 +47,7 @@ class DETRLoss(nn.Module):
         self.device = None
 
     def _get_loss_class(self, pred_scores, targets, gt_scores, num_gts, postfix=''):
+        """Computes the classification loss based on predictions, target values, and ground truth scores."""
         # logits: [b, query, num_classes], gt_class: list[[n, 1]]
         name_class = f'loss_class{postfix}'
         bs, nq = pred_scores.shape[:2]
@@ -68,6 +69,9 @@ class DETRLoss(nn.Module):
         return {name_class: loss_cls.squeeze() * self.loss_gain['class']}
 
     def _get_loss_bbox(self, pred_bboxes, gt_bboxes, postfix=''):
+        """Calculates and returns the bounding box loss and GIoU loss for the predicted and ground truth bounding
+        boxes.
+        """
         # boxes: [b, query, 4], gt_bbox: list[[n, 4]]
         name_bbox = f'loss_bbox{postfix}'
         name_giou = f'loss_giou{postfix}'
@@ -125,7 +129,7 @@ class DETRLoss(nn.Module):
                       postfix='',
                       masks=None,
                       gt_mask=None):
-        """Get auxiliary losses"""
+        """Get auxiliary losses."""
         # NOTE: loss class, bbox, giou, mask, dice
         loss = torch.zeros(5 if masks is not None else 3, device=pred_bboxes.device)
         if match_indices is None and self.use_uni_match:
@@ -166,12 +170,14 @@ class DETRLoss(nn.Module):
 
     @staticmethod
     def _get_index(match_indices):
+        """Returns batch indices, source indices, and destination indices from provided match indices."""
         batch_idx = torch.cat([torch.full_like(src, i) for i, (src, _) in enumerate(match_indices)])
         src_idx = torch.cat([src for (src, _) in match_indices])
         dst_idx = torch.cat([dst for (_, dst) in match_indices])
         return (batch_idx, src_idx), dst_idx
 
     def _get_assigned_bboxes(self, pred_bboxes, gt_bboxes, match_indices):
+        """Assigns predicted bounding boxes to ground truth bounding boxes based on the match indices."""
         pred_assigned = torch.cat([
             t[I] if len(I) > 0 else torch.zeros(0, t.shape[-1], device=self.device)
             for t, (I, _) in zip(pred_bboxes, match_indices)])
@@ -190,7 +196,7 @@ class DETRLoss(nn.Module):
                   gt_mask=None,
                   postfix='',
                   match_indices=None):
-        """Get losses"""
+        """Get losses."""
         if match_indices is None:
             match_indices = self.matcher(pred_bboxes,
                                          pred_scores,
@@ -250,22 +256,43 @@ class DETRLoss(nn.Module):
 
 
 class RTDETRDetectionLoss(DETRLoss):
+    """
+    Real-Time DeepTracker (RT-DETR) Detection Loss class that extends the DETRLoss.
+
+    This class computes the detection loss for the RT-DETR model, which includes the standard detection loss as well as
+    an additional denoising training loss when provided with denoising metadata.
+    """
 
     def forward(self, preds, batch, dn_bboxes=None, dn_scores=None, dn_meta=None):
+        """
+        Forward pass to compute the detection loss.
+
+        Args:
+            preds (tuple): Predicted bounding boxes and scores.
+            batch (dict): Batch data containing ground truth information.
+            dn_bboxes (torch.Tensor, optional): Denoising bounding boxes. Default is None.
+            dn_scores (torch.Tensor, optional): Denoising scores. Default is None.
+            dn_meta (dict, optional): Metadata for denoising. Default is None.
+
+        Returns:
+            (dict): Dictionary containing the total loss and, if applicable, the denoising loss.
+        """
         pred_bboxes, pred_scores = preds
         total_loss = super().forward(pred_bboxes, pred_scores, batch)
 
+        # Check for denoising metadata to compute denoising training loss
         if dn_meta is not None:
             dn_pos_idx, dn_num_group = dn_meta['dn_pos_idx'], dn_meta['dn_num_group']
             assert len(batch['gt_groups']) == len(dn_pos_idx)
 
-            # Denoising match indices
+            # Get the match indices for denoising
             match_indices = self.get_dn_match_indices(dn_pos_idx, dn_num_group, batch['gt_groups'])
 
-            # Compute denoising training loss
+            # Compute the denoising training loss
             dn_loss = super().forward(dn_bboxes, dn_scores, batch, postfix='_dn', match_indices=match_indices)
             total_loss.update(dn_loss)
         else:
+            # If no denoising metadata is provided, set denoising loss to zero
             total_loss.update({f'{k}_dn': torch.tensor(0., device=self.device) for k in total_loss.keys()})
 
         return total_loss
@@ -276,12 +303,12 @@ class RTDETRDetectionLoss(DETRLoss):
         Get the match indices for denoising.
 
         Args:
-            dn_pos_idx (List[torch.Tensor]): A list includes positive indices of denoising.
-            dn_num_group (int): The number of groups of denoising.
-            gt_groups (List(int)): a list of batch size length includes the number of gts of each image.
+            dn_pos_idx (List[torch.Tensor]): List of tensors containing positive indices for denoising.
+            dn_num_group (int): Number of denoising groups.
+            gt_groups (List[int]): List of integers representing the number of ground truths for each image.
 
         Returns:
-            dn_match_indices (List(tuple)): Matched indices.
+            (List[tuple]): List of tuples containing matched indices for denoising.
         """
         dn_match_indices = []
         idx_groups = torch.as_tensor([0, *gt_groups[:-1]]).cumsum_(0)
diff --git a/ultralytics/models/utils/ops.py b/ultralytics/models/utils/ops.py
index abce97a6..99357d19 100644
--- a/ultralytics/models/utils/ops.py
+++ b/ultralytics/models/utils/ops.py
@@ -11,8 +11,8 @@ from ultralytics.utils.ops import xywh2xyxy, xyxy2xywh
 
 class HungarianMatcher(nn.Module):
     """
-    A module implementing the HungarianMatcher, which is a differentiable module to solve the assignment problem in
-    an end-to-end fashion.
+    A module implementing the HungarianMatcher, which is a differentiable module to solve the assignment problem in an
+    end-to-end fashion.
 
     HungarianMatcher performs optimal assignment over the predicted and ground truth bounding boxes using a cost
     function that considers classification scores, bounding box coordinates, and optionally, mask predictions.
@@ -32,6 +32,9 @@ class HungarianMatcher(nn.Module):
     """
 
     def __init__(self, cost_gain=None, use_fl=True, with_mask=False, num_sample_points=12544, alpha=0.25, gamma=2.0):
+        """Initializes HungarianMatcher with cost coefficients, Focal Loss, mask prediction, sample points, and alpha
+        gamma factors.
+        """
         super().__init__()
         if cost_gain is None:
             cost_gain = {'class': 1, 'bbox': 5, 'giou': 2, 'mask': 1, 'dice': 1}
@@ -45,8 +48,8 @@ class HungarianMatcher(nn.Module):
     def forward(self, pred_bboxes, pred_scores, gt_bboxes, gt_cls, gt_groups, masks=None, gt_mask=None):
         """
         Forward pass for HungarianMatcher. This function computes costs based on prediction and ground truth
-        (classification cost, L1 cost between boxes and GIoU cost between boxes) and finds the optimal matching
-        between predictions and ground truth based on these costs.
+        (classification cost, L1 cost between boxes and GIoU cost between boxes) and finds the optimal matching between
+        predictions and ground truth based on these costs.
 
         Args:
             pred_bboxes (Tensor): Predicted bounding boxes with shape [batch_size, num_queries, 4].
@@ -153,9 +156,9 @@ def get_cdn_group(batch,
                   box_noise_scale=1.0,
                   training=False):
     """
-    Get contrastive denoising training group. This function creates a contrastive denoising training group with
-    positive and negative samples from the ground truths (gt). It applies noise to the class labels and bounding
-    box coordinates, and returns the modified labels, bounding boxes, attention mask and meta information.
+    Get contrastive denoising training group. This function creates a contrastive denoising training group with positive
+    and negative samples from the ground truths (gt). It applies noise to the class labels and bounding box coordinates,
+    and returns the modified labels, bounding boxes, attention mask and meta information.
 
     Args:
         batch (dict): A dict that includes 'gt_cls' (torch.Tensor with shape [num_gts, ]), 'gt_bboxes'
@@ -191,12 +194,12 @@ def get_cdn_group(batch,
     gt_bbox = batch['bboxes']  # bs*num, 4
     b_idx = batch['batch_idx']
 
-    # each group has positive and negative queries.
+    # Each group has positive and negative queries.
     dn_cls = gt_cls.repeat(2 * num_group)  # (2*num_group*bs*num, )
     dn_bbox = gt_bbox.repeat(2 * num_group, 1)  # 2*num_group*bs*num, 4
     dn_b_idx = b_idx.repeat(2 * num_group).view(-1)  # (2*num_group*bs*num, )
 
-    # positive and negative mask
+    # Positive and negative mask
     # (bs*num*num_group, ), the second total_num*num_group part as negative samples
     neg_idx = torch.arange(total_num * num_group, dtype=torch.long, device=gt_bbox.device) + num_group * total_num
 
@@ -220,10 +223,9 @@ def get_cdn_group(batch,
         known_bbox += rand_part * diff
         known_bbox.clip_(min=0.0, max=1.0)
         dn_bbox = xyxy2xywh(known_bbox)
-        dn_bbox = inverse_sigmoid(dn_bbox)
+        dn_bbox = torch.logit(dn_bbox, eps=1e-6)  # inverse sigmoid
 
-    # total denoising queries
-    num_dn = int(max_nums * 2 * num_group)
+    num_dn = int(max_nums * 2 * num_group)  # total denoising queries
     # class_embed = torch.cat([class_embed, torch.zeros([1, class_embed.shape[-1]], device=class_embed.device)])
     dn_cls_embed = class_embed[dn_cls]  # bs*num * 2 * num_group, 256
     padding_cls = torch.zeros(bs, num_dn, dn_cls_embed.shape[-1], device=gt_cls.device)
@@ -256,9 +258,3 @@ def get_cdn_group(batch,
 
     return padding_cls.to(class_embed.device), padding_bbox.to(class_embed.device), attn_mask.to(
         class_embed.device), dn_meta
-
-
-def inverse_sigmoid(x, eps=1e-6):
-    """Inverse sigmoid function."""
-    x = x.clip(min=0., max=1.)
-    return torch.log(x / (1 - x + eps) + eps)
diff --git a/ultralytics/models/yolo/classify/predict.py b/ultralytics/models/yolo/classify/predict.py
index a22616e5..ca463b67 100644
--- a/ultralytics/models/yolo/classify/predict.py
+++ b/ultralytics/models/yolo/classify/predict.py
@@ -26,6 +26,7 @@ class ClassificationPredictor(BasePredictor):
     """
 
     def __init__(self, cfg=DEFAULT_CFG, overrides=None, _callbacks=None):
+        """Initializes ClassificationPredictor setting the task to 'classify'."""
         super().__init__(cfg, overrides, _callbacks)
         self.args.task = 'classify'
 
diff --git a/ultralytics/models/yolo/classify/train.py b/ultralytics/models/yolo/classify/train.py
index 0829f05b..c59f2853 100644
--- a/ultralytics/models/yolo/classify/train.py
+++ b/ultralytics/models/yolo/classify/train.py
@@ -79,6 +79,7 @@ class ClassificationTrainer(BaseTrainer):
         return ckpt
 
     def build_dataset(self, img_path, mode='train', batch=None):
+        """Creates a ClassificationDataset instance given an image path, and mode (train/test etc.)."""
         return ClassificationDataset(root=img_path, args=self.args, augment=mode == 'train', prefix=mode)
 
     def get_dataloader(self, dataset_path, batch_size=16, rank=0, mode='train'):
@@ -113,8 +114,9 @@ class ClassificationTrainer(BaseTrainer):
 
     def label_loss_items(self, loss_items=None, prefix='train'):
         """
-        Returns a loss dict with labelled training loss items tensor. Not needed for classification but necessary for
-        segmentation & detection
+        Returns a loss dict with labelled training loss items tensor.
+
+        Not needed for classification but necessary for segmentation & detection
         """
         keys = [f'{prefix}/{x}' for x in self.loss_names]
         if loss_items is None:
diff --git a/ultralytics/models/yolo/classify/val.py b/ultralytics/models/yolo/classify/val.py
index 0748e27f..3ebf3808 100644
--- a/ultralytics/models/yolo/classify/val.py
+++ b/ultralytics/models/yolo/classify/val.py
@@ -78,6 +78,7 @@ class ClassificationValidator(BaseValidator):
         return self.metrics.results_dict
 
     def build_dataset(self, img_path):
+        """Creates and returns a ClassificationDataset instance using given image path and preprocessing parameters."""
         return ClassificationDataset(root=img_path, args=self.args, augment=False, prefix=self.args.split)
 
     def get_dataloader(self, dataset_path, batch_size):
diff --git a/ultralytics/models/yolo/detect/train.py b/ultralytics/models/yolo/detect/train.py
index 56d9243c..d0028c6e 100644
--- a/ultralytics/models/yolo/detect/train.py
+++ b/ultralytics/models/yolo/detect/train.py
@@ -57,7 +57,7 @@ class DetectionTrainer(BaseTrainer):
         return batch
 
     def set_model_attributes(self):
-        """nl = de_parallel(self.model).model[-1].nl  # number of detection layers (to scale hyps)."""
+        """Nl = de_parallel(self.model).model[-1].nl  # number of detection layers (to scale hyps)."""
         # self.args.box *= 3 / nl  # scale to layers
         # self.args.cls *= self.data["nc"] / 80 * 3 / nl  # scale to classes and layers
         # self.args.cls *= (self.args.imgsz / 640) ** 2 * 3 / nl  # scale to image size and layers
@@ -80,8 +80,9 @@ class DetectionTrainer(BaseTrainer):
 
     def label_loss_items(self, loss_items=None, prefix='train'):
         """
-        Returns a loss dict with labelled training loss items tensor. Not needed for classification but necessary for
-        segmentation & detection
+        Returns a loss dict with labelled training loss items tensor.
+
+        Not needed for classification but necessary for segmentation & detection
         """
         keys = [f'{prefix}/{x}' for x in self.loss_names]
         if loss_items is not None:
diff --git a/ultralytics/models/yolo/model.py b/ultralytics/models/yolo/model.py
index b85d46bd..ef1b41ab 100644
--- a/ultralytics/models/yolo/model.py
+++ b/ultralytics/models/yolo/model.py
@@ -6,13 +6,11 @@ from ultralytics.nn.tasks import ClassificationModel, DetectionModel, PoseModel,
 
 
 class YOLO(Model):
-    """
-    YOLO (You Only Look Once) object detection model.
-    """
+    """YOLO (You Only Look Once) object detection model."""
 
     @property
     def task_map(self):
-        """Map head to model, trainer, validator, and predictor classes"""
+        """Map head to model, trainer, validator, and predictor classes."""
         return {
             'classify': {
                 'model': ClassificationModel,
diff --git a/ultralytics/models/yolo/pose/predict.py b/ultralytics/models/yolo/pose/predict.py
index 14ae40b1..d00cea02 100644
--- a/ultralytics/models/yolo/pose/predict.py
+++ b/ultralytics/models/yolo/pose/predict.py
@@ -21,6 +21,7 @@ class PosePredictor(DetectionPredictor):
     """
 
     def __init__(self, cfg=DEFAULT_CFG, overrides=None, _callbacks=None):
+        """Initializes PosePredictor, sets task to 'pose' and logs a warning for using 'mps' as device."""
         super().__init__(cfg, overrides, _callbacks)
         self.args.task = 'pose'
         if isinstance(self.args.device, str) and self.args.device.lower() == 'mps':
diff --git a/ultralytics/models/yolo/segment/predict.py b/ultralytics/models/yolo/segment/predict.py
index 7d51f7d4..ba44a482 100644
--- a/ultralytics/models/yolo/segment/predict.py
+++ b/ultralytics/models/yolo/segment/predict.py
@@ -21,10 +21,12 @@ class SegmentationPredictor(DetectionPredictor):
     """
 
     def __init__(self, cfg=DEFAULT_CFG, overrides=None, _callbacks=None):
+        """Initializes the SegmentationPredictor with the provided configuration, overrides, and callbacks."""
         super().__init__(cfg, overrides, _callbacks)
         self.args.task = 'segment'
 
     def postprocess(self, preds, img, orig_imgs):
+        """Applies non-max suppression and processes detections for each image in an input batch."""
         p = ops.non_max_suppression(preds[0],
                                     self.args.conf,
                                     self.args.iou,
diff --git a/ultralytics/models/yolo/segment/val.py b/ultralytics/models/yolo/segment/val.py
index 0a2acb41..599b0d53 100644
--- a/ultralytics/models/yolo/segment/val.py
+++ b/ultralytics/models/yolo/segment/val.py
@@ -144,7 +144,7 @@ class SegmentationValidator(DetectionValidator):
 
     def _process_batch(self, detections, labels, pred_masks=None, gt_masks=None, overlap=False, masks=False):
         """
-        Return correct prediction matrix
+        Return correct prediction matrix.
 
         Args:
             detections (array[N, 6]), x1, y1, x2, y2, conf, class
diff --git a/ultralytics/nn/autobackend.py b/ultralytics/nn/autobackend.py
index 61ca6db6..ab5cb2fb 100644
--- a/ultralytics/nn/autobackend.py
+++ b/ultralytics/nn/autobackend.py
@@ -20,7 +20,11 @@ from ultralytics.utils.downloads import attempt_download_asset, is_url
 
 
 def check_class_names(names):
-    """Check class names. Map imagenet class codes to human-readable names if required. Convert lists to dicts."""
+    """
+    Check class names.
+
+    Map imagenet class codes to human-readable names if required. Convert lists to dicts.
+    """
     if isinstance(names, list):  # names is a list
         names = dict(enumerate(names))  # convert to dict
     if isinstance(names, dict):
@@ -37,6 +41,32 @@ def check_class_names(names):
 
 
 class AutoBackend(nn.Module):
+    """
+    Handles dynamic backend selection for running inference using Ultralytics YOLO models.
+
+    The AutoBackend class is designed to provide an abstraction layer for various inference engines. It supports a wide
+    range of formats, each with specific naming conventions as outlined below:
+
+        Supported Formats and Naming Conventions:
+            | Format                | File Suffix      |
+            |-----------------------|------------------|
+            | PyTorch               | *.pt             |
+            | TorchScript           | *.torchscript    |
+            | ONNX Runtime          | *.onnx           |
+            | ONNX OpenCV DNN       | *.onnx (dnn=True)|
+            | OpenVINO              | *openvino_model/ |
+            | CoreML                | *.mlpackage      |
+            | TensorRT              | *.engine         |
+            | TensorFlow SavedModel | *_saved_model    |
+            | TensorFlow GraphDef   | *.pb             |
+            | TensorFlow Lite       | *.tflite         |
+            | TensorFlow Edge TPU   | *_edgetpu.tflite |
+            | PaddlePaddle          | *_paddle_model   |
+            | ncnn                  | *_ncnn_model     |
+
+    This class offers dynamic backend switching capabilities based on the input model format, making it easier to deploy
+    models across various platforms.
+    """
 
     @torch.no_grad()
     def __init__(self,
@@ -48,33 +78,16 @@ class AutoBackend(nn.Module):
                  fuse=True,
                  verbose=True):
         """
-        MultiBackend class for python inference on various platforms using Ultralytics YOLO.
+        Initialize the AutoBackend for inference.
 
         Args:
-            weights (str): The path to the weights file. Default: 'yolov8n.pt'
-            device (torch.device): The device to run the model on.
-            dnn (bool): Use OpenCV DNN module for inference if True, defaults to False.
-            data (str | Path | optional): Additional data.yaml file for class names.
-            fp16 (bool): If True, use half precision. Default: False
-            fuse (bool): Whether to fuse the model or not. Default: True
-            verbose (bool): Whether to run in verbose mode or not. Default: True
-
-        Supported formats and their naming conventions:
-            | Format                | Suffix           |
-            |-----------------------|------------------|
-            | PyTorch               | *.pt             |
-            | TorchScript           | *.torchscript    |
-            | ONNX Runtime          | *.onnx           |
-            | ONNX OpenCV DNN       | *.onnx dnn=True  |
-            | OpenVINO              | *.xml            |
-            | CoreML                | *.mlpackage      |
-            | TensorRT              | *.engine         |
-            | TensorFlow SavedModel | *_saved_model    |
-            | TensorFlow GraphDef   | *.pb             |
-            | TensorFlow Lite       | *.tflite         |
-            | TensorFlow Edge TPU   | *_edgetpu.tflite |
-            | PaddlePaddle          | *_paddle_model   |
-            | ncnn                  | *_ncnn_model     |
+            weights (str): Path to the model weights file. Defaults to 'yolov8n.pt'.
+            device (torch.device): Device to run the model on. Defaults to CPU.
+            dnn (bool): Use OpenCV DNN module for ONNX inference. Defaults to False.
+            data (str | Path | optional): Path to the additional data.yaml file containing class names. Optional.
+            fp16 (bool): Enable half-precision inference. Supported only on specific backends. Defaults to False.
+            fuse (bool): Fuse Conv2D + BatchNorm layers for optimization. Defaults to True.
+            verbose (bool): Enable verbose logging. Defaults to True.
         """
         super().__init__()
         w = str(weights[0] if isinstance(weights, list) else weights)
@@ -440,14 +453,14 @@ class AutoBackend(nn.Module):
 
     def from_numpy(self, x):
         """
-         Convert a numpy array to a tensor.
+        Convert a numpy array to a tensor.
 
-         Args:
-             x (np.ndarray): The array to be converted.
+        Args:
+            x (np.ndarray): The array to be converted.
 
-         Returns:
-             (torch.Tensor): The converted tensor
-         """
+        Returns:
+            (torch.Tensor): The converted tensor
+        """
         return torch.tensor(x).to(self.device) if isinstance(x, np.ndarray) else x
 
     def warmup(self, imgsz=(1, 3, 640, 640)):
@@ -476,7 +489,7 @@ class AutoBackend(nn.Module):
     @staticmethod
     def _model_type(p='path/to/model.pt'):
         """
-        This function takes a path to a model file and returns the model type
+        This function takes a path to a model file and returns the model type.
 
         Args:
             p: path to the model file. Defaults to path/to/model.pt
diff --git a/ultralytics/nn/modules/__init__.py b/ultralytics/nn/modules/__init__.py
index b6dc6c44..584a394f 100644
--- a/ultralytics/nn/modules/__init__.py
+++ b/ultralytics/nn/modules/__init__.py
@@ -1,16 +1,20 @@
 # Ultralytics YOLO 🚀, AGPL-3.0 license
 """
-Ultralytics modules. Visualize with:
+Ultralytics modules.
 
-from ultralytics.nn.modules import *
-import torch
-import os
+Example:
+    Visualize a module with Netron.
+    ```python
+    from ultralytics.nn.modules import *
+    import torch
+    import os
 
-x = torch.ones(1, 128, 40, 40)
-m = Conv(128, 128)
-f = f'{m._get_name()}.onnx'
-torch.onnx.export(m, x, f)
-os.system(f'onnxsim {f} {f} && open {f}')
+    x = torch.ones(1, 128, 40, 40)
+    m = Conv(128, 128)
+    f = f'{m._get_name()}.onnx'
+    torch.onnx.export(m, x, f)
+    os.system(f'onnxsim {f} {f} && open {f}')
+    ```
 """
 
 from .block import (C1, C2, C3, C3TR, DFL, SPP, SPPF, Bottleneck, BottleneckCSP, C2f, C3Ghost, C3x, GhostBottleneck,
diff --git a/ultralytics/nn/modules/block.py b/ultralytics/nn/modules/block.py
index d8183d84..593ae24c 100644
--- a/ultralytics/nn/modules/block.py
+++ b/ultralytics/nn/modules/block.py
@@ -1,7 +1,5 @@
 # Ultralytics YOLO 🚀, AGPL-3.0 license
-"""
-Block modules
-"""
+"""Block modules."""
 
 import torch
 import torch.nn as nn
@@ -17,6 +15,7 @@ __all__ = ('DFL', 'HGBlock', 'HGStem', 'SPP', 'SPPF', 'C1', 'C2', 'C3', 'C2f', '
 class DFL(nn.Module):
     """
     Integral module of Distribution Focal Loss (DFL).
+
     Proposed in Generalized Focal Loss https://ieeexplore.ieee.org/document/9792391
     """
 
@@ -51,11 +50,14 @@ class Proto(nn.Module):
 
 
 class HGStem(nn.Module):
-    """StemBlock of PPHGNetV2 with 5 convolutions and one maxpool2d.
+    """
+    StemBlock of PPHGNetV2 with 5 convolutions and one maxpool2d.
+
     https://github.com/PaddlePaddle/PaddleDetection/blob/develop/ppdet/modeling/backbones/hgnet_v2.py
     """
 
     def __init__(self, c1, cm, c2):
+        """Initialize the SPP layer with input/output channels and specified kernel sizes for max pooling."""
         super().__init__()
         self.stem1 = Conv(c1, cm, 3, 2, act=nn.ReLU())
         self.stem2a = Conv(cm, cm // 2, 2, 1, 0, act=nn.ReLU())
@@ -79,11 +81,14 @@ class HGStem(nn.Module):
 
 
 class HGBlock(nn.Module):
-    """HG_Block of PPHGNetV2 with 2 convolutions and LightConv.
+    """
+    HG_Block of PPHGNetV2 with 2 convolutions and LightConv.
+
     https://github.com/PaddlePaddle/PaddleDetection/blob/develop/ppdet/modeling/backbones/hgnet_v2.py
     """
 
     def __init__(self, c1, cm, c2, k=3, n=6, lightconv=False, shortcut=False, act=nn.ReLU()):
+        """Initializes a CSP Bottleneck with 1 convolution using specified input and output channels."""
         super().__init__()
         block = LightConv if lightconv else Conv
         self.m = nn.ModuleList(block(c1 if i == 0 else cm, cm, k=k, act=act) for i in range(n))
@@ -218,6 +223,7 @@ class RepC3(nn.Module):
     """Rep C3."""
 
     def __init__(self, c1, c2, n=3, e=1.0):
+        """Initialize CSP Bottleneck with a single convolution using input channels, output channels, and number."""
         super().__init__()
         c_ = int(c2 * e)  # hidden channels
         self.cv1 = Conv(c1, c2, 1, 1)
diff --git a/ultralytics/nn/modules/conv.py b/ultralytics/nn/modules/conv.py
index 77e99c00..21a27009 100644
--- a/ultralytics/nn/modules/conv.py
+++ b/ultralytics/nn/modules/conv.py
@@ -1,7 +1,5 @@
 # Ultralytics YOLO 🚀, AGPL-3.0 license
-"""
-Convolution modules
-"""
+"""Convolution modules."""
 
 import math
 
@@ -69,7 +67,9 @@ class Conv2(Conv):
 
 
 class LightConv(nn.Module):
-    """Light convolution with args(ch_in, ch_out, kernel).
+    """
+    Light convolution with args(ch_in, ch_out, kernel).
+
     https://github.com/PaddlePaddle/PaddleDetection/blob/develop/ppdet/modeling/backbones/hgnet_v2.py
     """
 
@@ -148,12 +148,15 @@ class GhostConv(nn.Module):
 
 class RepConv(nn.Module):
     """
-    RepConv is a basic rep-style block, including training and deploy status. This module is used in RT-DETR.
+    RepConv is a basic rep-style block, including training and deploy status.
+
+    This module is used in RT-DETR.
     Based on https://github.com/DingXiaoH/RepVGG/blob/main/repvgg.py
     """
     default_act = nn.SiLU()  # default activation
 
     def __init__(self, c1, c2, k=3, s=1, p=1, g=1, d=1, act=True, bn=False, deploy=False):
+        """Initializes Light Convolution layer with inputs, outputs & optional activation function."""
         super().__init__()
         assert k == 3 and p == 1
         self.g = g
@@ -166,27 +169,30 @@ class RepConv(nn.Module):
         self.conv2 = Conv(c1, c2, 1, s, p=(p - k // 2), g=g, act=False)
 
     def forward_fuse(self, x):
-        """Forward process"""
+        """Forward process."""
         return self.act(self.conv(x))
 
     def forward(self, x):
-        """Forward process"""
+        """Forward process."""
         id_out = 0 if self.bn is None else self.bn(x)
         return self.act(self.conv1(x) + self.conv2(x) + id_out)
 
     def get_equivalent_kernel_bias(self):
+        """Returns equivalent kernel and bias by adding 3x3 kernel, 1x1 kernel and identity kernel with their biases."""
         kernel3x3, bias3x3 = self._fuse_bn_tensor(self.conv1)
         kernel1x1, bias1x1 = self._fuse_bn_tensor(self.conv2)
         kernelid, biasid = self._fuse_bn_tensor(self.bn)
         return kernel3x3 + self._pad_1x1_to_3x3_tensor(kernel1x1) + kernelid, bias3x3 + bias1x1 + biasid
 
     def _pad_1x1_to_3x3_tensor(self, kernel1x1):
+        """Pads a 1x1 tensor to a 3x3 tensor."""
         if kernel1x1 is None:
             return 0
         else:
             return torch.nn.functional.pad(kernel1x1, [1, 1, 1, 1])
 
     def _fuse_bn_tensor(self, branch):
+        """Generates appropriate kernels and biases for convolution by fusing branches of the neural network."""
         if branch is None:
             return 0, 0
         if isinstance(branch, Conv):
@@ -214,6 +220,7 @@ class RepConv(nn.Module):
         return kernel * t, beta - running_mean * gamma / std
 
     def fuse_convs(self):
+        """Combines two convolution layers into a single layer and removes unused attributes from the class."""
         if hasattr(self, 'conv'):
             return
         kernel, bias = self.get_equivalent_kernel_bias()
@@ -243,12 +250,14 @@ class ChannelAttention(nn.Module):
     """Channel-attention module https://github.com/open-mmlab/mmdetection/tree/v3.0.0rc1/configs/rtmdet."""
 
     def __init__(self, channels: int) -> None:
+        """Initializes the class and sets the basic configurations and instance variables required."""
         super().__init__()
         self.pool = nn.AdaptiveAvgPool2d(1)
         self.fc = nn.Conv2d(channels, channels, 1, 1, 0, bias=True)
         self.act = nn.Sigmoid()
 
     def forward(self, x: torch.Tensor) -> torch.Tensor:
+        """Applies forward pass using activation on convolutions of the input, optionally using batch normalization."""
         return x * self.act(self.fc(self.pool(x)))
 
 
diff --git a/ultralytics/nn/modules/head.py b/ultralytics/nn/modules/head.py
index 0b02eb3c..9e993d79 100644
--- a/ultralytics/nn/modules/head.py
+++ b/ultralytics/nn/modules/head.py
@@ -1,7 +1,5 @@
 # Ultralytics YOLO 🚀, AGPL-3.0 license
-"""
-Model head modules
-"""
+"""Model head modules."""
 
 import math
 
@@ -229,6 +227,7 @@ class RTDETRDecoder(nn.Module):
         self._reset_parameters()
 
     def forward(self, x, batch=None):
+        """Runs the forward pass of the module, returning bounding box and classification scores for the input."""
         from ultralytics.models.utils.ops import get_cdn_group
 
         # input projection and embedding
@@ -265,6 +264,7 @@ class RTDETRDecoder(nn.Module):
         return y if self.export else (y, x)
 
     def _generate_anchors(self, shapes, grid_size=0.05, dtype=torch.float32, device='cpu', eps=1e-2):
+        """Generates anchor bounding boxes for given shapes with specific grid size and validates them."""
         anchors = []
         for i, (h, w) in enumerate(shapes):
             sy = torch.arange(end=h, dtype=dtype, device=device)
@@ -284,6 +284,7 @@ class RTDETRDecoder(nn.Module):
         return anchors, valid_mask
 
     def _get_encoder_input(self, x):
+        """Processes and returns encoder inputs by getting projection features from input and concatenating them."""
         # get projection features
         x = [self.input_proj[i](feat) for i, feat in enumerate(x)]
         # get encoder inputs
@@ -301,6 +302,7 @@ class RTDETRDecoder(nn.Module):
         return feats, shapes
 
     def _get_decoder_input(self, feats, shapes, dn_embed=None, dn_bbox=None):
+        """Generates and prepares the input required for the decoder from the provided features and shapes."""
         bs = len(feats)
         # prepare input for decoder
         anchors, valid_mask = self._generate_anchors(shapes, dtype=feats.dtype, device=feats.device)
@@ -339,6 +341,7 @@ class RTDETRDecoder(nn.Module):
 
     # TODO
     def _reset_parameters(self):
+        """Initializes or resets the parameters of the model's various components with predefined weights and biases."""
         # class and bbox head init
         bias_cls = bias_init_with_prob(0.01) / 80 * self.nc
         # NOTE: the weight initialization in `linear_init_` would cause NaN when training with custom datasets.
diff --git a/ultralytics/nn/modules/transformer.py b/ultralytics/nn/modules/transformer.py
index 9a51d2cb..4b7c0868 100644
--- a/ultralytics/nn/modules/transformer.py
+++ b/ultralytics/nn/modules/transformer.py
@@ -1,7 +1,5 @@
 # Ultralytics YOLO 🚀, AGPL-3.0 license
-"""
-Transformer modules
-"""
+"""Transformer modules."""
 
 import math
 
@@ -18,9 +16,10 @@ __all__ = ('TransformerEncoderLayer', 'TransformerLayer', 'TransformerBlock', 'M
 
 
 class TransformerEncoderLayer(nn.Module):
-    """Transformer Encoder."""
+    """Defines a single layer of the transformer encoder."""
 
     def __init__(self, c1, cm=2048, num_heads=8, dropout=0.0, act=nn.GELU(), normalize_before=False):
+        """Initialize the TransformerEncoderLayer with specified parameters."""
         super().__init__()
         from ...utils.torch_utils import TORCH_1_9
         if not TORCH_1_9:
@@ -41,10 +40,11 @@ class TransformerEncoderLayer(nn.Module):
         self.normalize_before = normalize_before
 
     def with_pos_embed(self, tensor, pos=None):
-        """Add position embeddings if given."""
+        """Add position embeddings to the tensor if provided."""
         return tensor if pos is None else tensor + pos
 
     def forward_post(self, src, src_mask=None, src_key_padding_mask=None, pos=None):
+        """Performs forward pass with post-normalization."""
         q = k = self.with_pos_embed(src, pos)
         src2 = self.ma(q, k, value=src, attn_mask=src_mask, key_padding_mask=src_key_padding_mask)[0]
         src = src + self.dropout1(src2)
@@ -54,6 +54,7 @@ class TransformerEncoderLayer(nn.Module):
         return self.norm2(src)
 
     def forward_pre(self, src, src_mask=None, src_key_padding_mask=None, pos=None):
+        """Performs forward pass with pre-normalization."""
         src2 = self.norm1(src)
         q = k = self.with_pos_embed(src2, pos)
         src2 = self.ma(q, k, value=src2, attn_mask=src_mask, key_padding_mask=src_key_padding_mask)[0]
@@ -70,11 +71,14 @@ class TransformerEncoderLayer(nn.Module):
 
 
 class AIFI(TransformerEncoderLayer):
+    """Defines the AIFI transformer layer."""
 
     def __init__(self, c1, cm=2048, num_heads=8, dropout=0, act=nn.GELU(), normalize_before=False):
+        """Initialize the AIFI instance with specified parameters."""
         super().__init__(c1, cm, num_heads, dropout, act, normalize_before)
 
     def forward(self, x):
+        """Forward pass for the AIFI transformer layer."""
         c, h, w = x.shape[1:]
         pos_embed = self.build_2d_sincos_position_embedding(w, h, c)
         # flatten [B, C, H, W] to [B, HxW, C]
@@ -82,7 +86,8 @@ class AIFI(TransformerEncoderLayer):
         return x.permute(0, 2, 1).view([-1, c, h, w]).contiguous()
 
     @staticmethod
-    def build_2d_sincos_position_embedding(w, h, embed_dim=256, temperature=10000.):
+    def build_2d_sincos_position_embedding(w, h, embed_dim=256, temperature=10000.0):
+        """Builds 2D sine-cosine position embedding."""
         grid_w = torch.arange(int(w), dtype=torch.float32)
         grid_h = torch.arange(int(h), dtype=torch.float32)
         grid_w, grid_h = torch.meshgrid(grid_w, grid_h, indexing='ij')
@@ -140,27 +145,32 @@ class TransformerBlock(nn.Module):
 
 
 class MLPBlock(nn.Module):
+    """Implements a single block of a multi-layer perceptron."""
 
     def __init__(self, embedding_dim, mlp_dim, act=nn.GELU):
+        """Initialize the MLPBlock with specified embedding dimension, MLP dimension, and activation function."""
         super().__init__()
         self.lin1 = nn.Linear(embedding_dim, mlp_dim)
         self.lin2 = nn.Linear(mlp_dim, embedding_dim)
         self.act = act()
 
     def forward(self, x: torch.Tensor) -> torch.Tensor:
+        """Forward pass for the MLPBlock."""
         return self.lin2(self.act(self.lin1(x)))
 
 
 class MLP(nn.Module):
-    """ Very simple multi-layer perceptron (also called FFN)"""
+    """Implements a simple multi-layer perceptron (also called FFN)."""
 
     def __init__(self, input_dim, hidden_dim, output_dim, num_layers):
+        """Initialize the MLP with specified input, hidden, output dimensions and number of layers."""
         super().__init__()
         self.num_layers = num_layers
         h = [hidden_dim] * (num_layers - 1)
         self.layers = nn.ModuleList(nn.Linear(n, k) for n, k in zip([input_dim] + h, h + [output_dim]))
 
     def forward(self, x):
+        """Forward pass for the entire MLP."""
         for i, layer in enumerate(self.layers):
             x = F.relu(layer(x)) if i < self.num_layers - 1 else layer(x)
         return x
@@ -168,17 +178,22 @@ class MLP(nn.Module):
 
 class LayerNorm2d(nn.Module):
     """
-    LayerNorm2d module from https://github.com/facebookresearch/detectron2/blob/main/detectron2/layers/batch_norm.py
+    2D Layer Normalization module inspired by Detectron2 and ConvNeXt implementations.
+
+    Original implementation at
+    https://github.com/facebookresearch/detectron2/blob/main/detectron2/layers/batch_norm.py
     https://github.com/facebookresearch/ConvNeXt/blob/d1fa8f6fef0a165b27399986cc2bdacc92777e40/models/convnext.py#L119
     """
 
     def __init__(self, num_channels, eps=1e-6):
+        """Initialize LayerNorm2d with the given parameters."""
         super().__init__()
         self.weight = nn.Parameter(torch.ones(num_channels))
         self.bias = nn.Parameter(torch.zeros(num_channels))
         self.eps = eps
 
     def forward(self, x):
+        """Perform forward pass for 2D layer normalization."""
         u = x.mean(1, keepdim=True)
         s = (x - u).pow(2).mean(1, keepdim=True)
         x = (x - u) / torch.sqrt(s + self.eps)
@@ -187,11 +202,13 @@ class LayerNorm2d(nn.Module):
 
 class MSDeformAttn(nn.Module):
     """
-    Original Multi-Scale Deformable Attention Module.
+    Multi-Scale Deformable Attention Module based on Deformable-DETR and PaddleDetection implementations.
+
     https://github.com/fundamentalvision/Deformable-DETR/blob/main/models/ops/modules/ms_deform_attn.py
     """
 
     def __init__(self, d_model=256, n_levels=4, n_heads=8, n_points=4):
+        """Initialize MSDeformAttn with the given parameters."""
         super().__init__()
         if d_model % n_heads != 0:
             raise ValueError(f'd_model must be divisible by n_heads, but got {d_model} and {n_heads}')
@@ -214,6 +231,7 @@ class MSDeformAttn(nn.Module):
         self._reset_parameters()
 
     def _reset_parameters(self):
+        """Reset module parameters."""
         constant_(self.sampling_offsets.weight.data, 0.)
         thetas = torch.arange(self.n_heads, dtype=torch.float32) * (2.0 * math.pi / self.n_heads)
         grid_init = torch.stack([thetas.cos(), thetas.sin()], -1)
@@ -232,7 +250,10 @@ class MSDeformAttn(nn.Module):
 
     def forward(self, query, refer_bbox, value, value_shapes, value_mask=None):
         """
+        Perform forward pass for multi-scale deformable attention.
+
         https://github.com/PaddlePaddle/PaddleDetection/blob/develop/ppdet/modeling/transformers/deformable_transformer.py
+
         Args:
             query (torch.Tensor): [bs, query_length, C]
             refer_bbox (torch.Tensor): [bs, query_length, n_levels, 2], range in [0, 1], top-left (0,0),
@@ -272,24 +293,27 @@ class MSDeformAttn(nn.Module):
 
 class DeformableTransformerDecoderLayer(nn.Module):
     """
+    Deformable Transformer Decoder Layer inspired by PaddleDetection and Deformable-DETR implementations.
+
     https://github.com/PaddlePaddle/PaddleDetection/blob/develop/ppdet/modeling/transformers/deformable_transformer.py
     https://github.com/fundamentalvision/Deformable-DETR/blob/main/models/deformable_transformer.py
     """
 
     def __init__(self, d_model=256, n_heads=8, d_ffn=1024, dropout=0., act=nn.ReLU(), n_levels=4, n_points=4):
+        """Initialize the DeformableTransformerDecoderLayer with the given parameters."""
         super().__init__()
 
-        # self attention
+        # Self attention
         self.self_attn = nn.MultiheadAttention(d_model, n_heads, dropout=dropout)
         self.dropout1 = nn.Dropout(dropout)
         self.norm1 = nn.LayerNorm(d_model)
 
-        # cross attention
+        # Cross attention
         self.cross_attn = MSDeformAttn(d_model, n_levels, n_heads, n_points)
         self.dropout2 = nn.Dropout(dropout)
         self.norm2 = nn.LayerNorm(d_model)
 
-        # ffn
+        # FFN
         self.linear1 = nn.Linear(d_model, d_ffn)
         self.act = act
         self.dropout3 = nn.Dropout(dropout)
@@ -299,37 +323,44 @@ class DeformableTransformerDecoderLayer(nn.Module):
 
     @staticmethod
     def with_pos_embed(tensor, pos):
+        """Add positional embeddings to the input tensor, if provided."""
         return tensor if pos is None else tensor + pos
 
     def forward_ffn(self, tgt):
+        """Perform forward pass through the Feed-Forward Network part of the layer."""
         tgt2 = self.linear2(self.dropout3(self.act(self.linear1(tgt))))
         tgt = tgt + self.dropout4(tgt2)
         return self.norm3(tgt)
 
     def forward(self, embed, refer_bbox, feats, shapes, padding_mask=None, attn_mask=None, query_pos=None):
-        # self attention
+        """Perform the forward pass through the entire decoder layer."""
+
+        # Self attention
         q = k = self.with_pos_embed(embed, query_pos)
         tgt = self.self_attn(q.transpose(0, 1), k.transpose(0, 1), embed.transpose(0, 1),
                              attn_mask=attn_mask)[0].transpose(0, 1)
         embed = embed + self.dropout1(tgt)
         embed = self.norm1(embed)
 
-        # cross attention
+        # Cross attention
         tgt = self.cross_attn(self.with_pos_embed(embed, query_pos), refer_bbox.unsqueeze(2), feats, shapes,
                               padding_mask)
         embed = embed + self.dropout2(tgt)
         embed = self.norm2(embed)
 
-        # ffn
+        # FFN
         return self.forward_ffn(embed)
 
 
 class DeformableTransformerDecoder(nn.Module):
     """
+    Implementation of Deformable Transformer Decoder based on PaddleDetection.
+
     https://github.com/PaddlePaddle/PaddleDetection/blob/develop/ppdet/modeling/transformers/deformable_transformer.py
     """
 
     def __init__(self, hidden_dim, decoder_layer, num_layers, eval_idx=-1):
+        """Initialize the DeformableTransformerDecoder with the given parameters."""
         super().__init__()
         self.layers = _get_clones(decoder_layer, num_layers)
         self.num_layers = num_layers
@@ -347,6 +378,7 @@ class DeformableTransformerDecoder(nn.Module):
             pos_mlp,
             attn_mask=None,
             padding_mask=None):
+        """Perform the forward pass through the entire decoder."""
         output = embed
         dec_bboxes = []
         dec_cls = []
diff --git a/ultralytics/nn/modules/utils.py b/ultralytics/nn/modules/utils.py
index f8636dc4..c7bec7af 100644
--- a/ultralytics/nn/modules/utils.py
+++ b/ultralytics/nn/modules/utils.py
@@ -1,7 +1,5 @@
 # Ultralytics YOLO 🚀, AGPL-3.0 license
-"""
-Module utils
-"""
+"""Module utils."""
 
 import copy
 import math
@@ -16,15 +14,17 @@ __all__ = 'multi_scale_deformable_attn_pytorch', 'inverse_sigmoid'
 
 
 def _get_clones(module, n):
+    """Create a list of cloned modules from the given module."""
     return nn.ModuleList([copy.deepcopy(module) for _ in range(n)])
 
 
 def bias_init_with_prob(prior_prob=0.01):
-    """initialize conv/fc bias value according to a given probability value."""
+    """Initialize conv/fc bias value according to a given probability value."""
     return float(-np.log((1 - prior_prob) / prior_prob))  # return bias_init
 
 
 def linear_init_(module):
+    """Initialize the weights and biases of a linear module."""
     bound = 1 / math.sqrt(module.weight.shape[0])
     uniform_(module.weight, -bound, bound)
     if hasattr(module, 'bias') and module.bias is not None:
@@ -32,6 +32,7 @@ def linear_init_(module):
 
 
 def inverse_sigmoid(x, eps=1e-5):
+    """Calculate the inverse sigmoid function for a tensor."""
     x = x.clamp(min=0, max=1)
     x1 = x.clamp(min=eps)
     x2 = (1 - x).clamp(min=eps)
@@ -43,6 +44,7 @@ def multi_scale_deformable_attn_pytorch(value: torch.Tensor, value_spatial_shape
                                         attention_weights: torch.Tensor) -> torch.Tensor:
     """
     Multi-scale deformable attention.
+
     https://github.com/IDEA-Research/detrex/blob/main/detrex/layers/multi_scale_deform_attn.py
     """
 
diff --git a/ultralytics/nn/tasks.py b/ultralytics/nn/tasks.py
index 24153d24..6145146b 100644
--- a/ultralytics/nn/tasks.py
+++ b/ultralytics/nn/tasks.py
@@ -25,14 +25,11 @@ except ImportError:
 
 
 class BaseModel(nn.Module):
-    """
-    The BaseModel class serves as a base class for all the models in the Ultralytics YOLO family.
-    """
+    """The BaseModel class serves as a base class for all the models in the Ultralytics YOLO family."""
 
     def forward(self, x, *args, **kwargs):
         """
-        Forward pass of the model on a single scale.
-        Wrapper for `_forward_once` method.
+        Forward pass of the model on a single scale. Wrapper for `_forward_once` method.
 
         Args:
             x (torch.Tensor | dict): The input image tensor or a dict including image tensor and gt labels.
@@ -93,8 +90,8 @@ class BaseModel(nn.Module):
 
     def _profile_one_layer(self, m, x, dt):
         """
-        Profile the computation time and FLOPs of a single layer of the model on a given input.
-        Appends the results to the provided list.
+        Profile the computation time and FLOPs of a single layer of the model on a given input. Appends the results to
+        the provided list.
 
         Args:
             m (nn.Module): The layer to be profiled.
@@ -158,7 +155,7 @@ class BaseModel(nn.Module):
 
     def info(self, detailed=False, verbose=True, imgsz=640):
         """
-        Prints model information
+        Prints model information.
 
         Args:
             detailed (bool): if True, prints out detailed information about the model. Defaults to False
@@ -175,7 +172,7 @@ class BaseModel(nn.Module):
             fn (function): the function to apply to the model
 
         Returns:
-            A model that is a Detect() object.
+            (BaseModel): An updated BaseModel object.
         """
         self = super()._apply(fn)
         m = self.model[-1]  # Detect()
@@ -202,7 +199,7 @@ class BaseModel(nn.Module):
 
     def loss(self, batch, preds=None):
         """
-        Compute loss
+        Compute loss.
 
         Args:
             batch (dict): Batch to compute loss on
@@ -215,6 +212,7 @@ class BaseModel(nn.Module):
         return self.criterion(preds, batch)
 
     def init_criterion(self):
+        """Initialize the loss criterion for the BaseModel."""
         raise NotImplementedError('compute_loss() needs to be implemented by task heads')
 
 
@@ -222,6 +220,7 @@ class DetectionModel(BaseModel):
     """YOLOv8 detection model."""
 
     def __init__(self, cfg='yolov8n.yaml', ch=3, nc=None, verbose=True):  # model, input channels, number of classes
+        """Initialize the YOLOv8 detection model with the given config and parameters."""
         super().__init__()
         self.yaml = cfg if isinstance(cfg, dict) else yaml_model_load(cfg)  # cfg dict
 
@@ -289,6 +288,7 @@ class DetectionModel(BaseModel):
         return y
 
     def init_criterion(self):
+        """Initialize the loss criterion for the DetectionModel."""
         return v8DetectionLoss(self)
 
 
@@ -300,6 +300,7 @@ class SegmentationModel(DetectionModel):
         super().__init__(cfg=cfg, ch=ch, nc=nc, verbose=verbose)
 
     def init_criterion(self):
+        """Initialize the loss criterion for the SegmentationModel."""
         return v8SegmentationLoss(self)
 
 
@@ -316,6 +317,7 @@ class PoseModel(DetectionModel):
         super().__init__(cfg=cfg, ch=ch, nc=nc, verbose=verbose)
 
     def init_criterion(self):
+        """Initialize the loss criterion for the PoseModel."""
         return v8PoseLoss(self)
 
 
@@ -365,22 +367,59 @@ class ClassificationModel(BaseModel):
                     m[i] = nn.Conv2d(m[i].in_channels, nc, m[i].kernel_size, m[i].stride, bias=m[i].bias is not None)
 
     def init_criterion(self):
-        """Compute the classification loss between predictions and true labels."""
+        """Initialize the loss criterion for the ClassificationModel."""
         return v8ClassificationLoss()
 
 
 class RTDETRDetectionModel(DetectionModel):
+    """
+    RTDETR (Real-time DEtection and Tracking using Transformers) Detection Model class.
+
+    This class is responsible for constructing the RTDETR architecture, defining loss functions, and
+    facilitating both the training and inference processes. RTDETR is an object detection and tracking model
+    that extends from the DetectionModel base class.
+
+    Attributes:
+        cfg (str): The configuration file path or preset string. Default is 'rtdetr-l.yaml'.
+        ch (int): Number of input channels. Default is 3 (RGB).
+        nc (int, optional): Number of classes for object detection. Default is None.
+        verbose (bool): Specifies if summary statistics are shown during initialization. Default is True.
+
+    Methods:
+        init_criterion: Initializes the criterion used for loss calculation.
+        loss: Computes and returns the loss during training.
+        predict: Performs a forward pass through the network and returns the output.
+    """
 
     def __init__(self, cfg='rtdetr-l.yaml', ch=3, nc=None, verbose=True):
+        """
+        Initialize the RTDETRDetectionModel.
+
+        Args:
+            cfg (str): Configuration file name or path.
+            ch (int): Number of input channels.
+            nc (int, optional): Number of classes. Defaults to None.
+            verbose (bool, optional): Print additional information during initialization. Defaults to True.
+        """
         super().__init__(cfg=cfg, ch=ch, nc=nc, verbose=verbose)
 
     def init_criterion(self):
-        """Compute the classification loss between predictions and true labels."""
+        """Initialize the loss criterion for the RTDETRDetectionModel."""
         from ultralytics.models.utils.loss import RTDETRDetectionLoss
 
         return RTDETRDetectionLoss(nc=self.nc, use_vfl=True)
 
     def loss(self, batch, preds=None):
+        """
+        Compute the loss for the given batch of data.
+
+        Args:
+            batch (dict): Dictionary containing image and label data.
+            preds (torch.Tensor, optional): Precomputed model predictions. Defaults to None.
+
+        Returns:
+            tuple: A tuple containing the total loss and main three losses in a tensor.
+        """
         if not hasattr(self, 'criterion'):
             self.criterion = self.init_criterion()
 
@@ -417,16 +456,17 @@ class RTDETRDetectionModel(DetectionModel):
 
     def predict(self, x, profile=False, visualize=False, batch=None, augment=False):
         """
-        Perform a forward pass through the network.
+        Perform a forward pass through the model.
 
         Args:
-            x (torch.Tensor): The input tensor to the model
-            profile (bool):  Print the computation time of each layer if True, defaults to False.
-            visualize (bool): Save the feature maps of the model if True, defaults to False
-            batch (dict): A dict including gt boxes and labels from dataloader.
+            x (torch.Tensor): The input tensor.
+            profile (bool, optional): If True, profile the computation time for each layer. Defaults to False.
+            visualize (bool, optional): If True, save feature maps for visualization. Defaults to False.
+            batch (dict, optional): Ground truth data for evaluation. Defaults to None.
+            augment (bool, optional): If True, perform data augmentation during inference. Defaults to False.
 
         Returns:
-            (torch.Tensor): The last output of the model.
+            torch.Tensor: Model's output tensor.
         """
         y, dt = [], []  # outputs
         for m in self.model[:-1]:  # except the head part
@@ -708,9 +748,9 @@ def yaml_model_load(path):
 
 def guess_model_scale(model_path):
     """
-    Takes a path to a YOLO model's YAML file as input and extracts the size character of the model's scale.
-    The function uses regular expression matching to find the pattern of the model scale in the YAML file name,
-    which is denoted by n, s, m, l, or x. The function returns the size character of the model scale as a string.
+    Takes a path to a YOLO model's YAML file as input and extracts the size character of the model's scale. The function
+    uses regular expression matching to find the pattern of the model scale in the YAML file name, which is denoted by
+    n, s, m, l, or x. The function returns the size character of the model scale as a string.
 
     Args:
         model_path (str | Path): The path to the YOLO model's YAML file.
diff --git a/ultralytics/trackers/bot_sort.py b/ultralytics/trackers/bot_sort.py
index d42d46eb..543d7526 100644
--- a/ultralytics/trackers/bot_sort.py
+++ b/ultralytics/trackers/bot_sort.py
@@ -12,6 +12,33 @@ from .utils.kalman_filter import KalmanFilterXYWH
 
 
 class BOTrack(STrack):
+    """
+    An extended version of the STrack class for YOLOv8, adding object tracking features.
+
+    Attributes:
+        shared_kalman (KalmanFilterXYWH): A shared Kalman filter for all instances of BOTrack.
+        smooth_feat (np.ndarray): Smoothed feature vector.
+        curr_feat (np.ndarray): Current feature vector.
+        features (deque): A deque to store feature vectors with a maximum length defined by `feat_history`.
+        alpha (float): Smoothing factor for the exponential moving average of features.
+        mean (np.ndarray): The mean state of the Kalman filter.
+        covariance (np.ndarray): The covariance matrix of the Kalman filter.
+
+    Methods:
+        update_features(feat): Update features vector and smooth it using exponential moving average.
+        predict(): Predicts the mean and covariance using Kalman filter.
+        re_activate(new_track, frame_id, new_id): Reactivates a track with updated features and optionally new ID.
+        update(new_track, frame_id): Update the YOLOv8 instance with new track and frame ID.
+        tlwh: Property that gets the current position in tlwh format `(top left x, top left y, width, height)`.
+        multi_predict(stracks): Predicts the mean and covariance of multiple object tracks using shared Kalman filter.
+        convert_coords(tlwh): Converts tlwh bounding box coordinates to xywh format.
+        tlwh_to_xywh(tlwh): Convert bounding box to xywh format `(center x, center y, width, height)`.
+
+    Usage:
+        bo_track = BOTrack(tlwh, score, cls, feat)
+        bo_track.predict()
+        bo_track.update(new_track, frame_id)
+    """
     shared_kalman = KalmanFilterXYWH()
 
     def __init__(self, tlwh, score, cls, feat=None, feat_history=50):
@@ -59,9 +86,7 @@ class BOTrack(STrack):
 
     @property
     def tlwh(self):
-        """Get current position in bounding box format `(top left x, top left y,
-        width, height)`.
-        """
+        """Get current position in bounding box format `(top left x, top left y, width, height)`."""
         if self.mean is None:
             return self._tlwh.copy()
         ret = self.mean[:4].copy()
@@ -90,15 +115,37 @@ class BOTrack(STrack):
 
     @staticmethod
     def tlwh_to_xywh(tlwh):
-        """Convert bounding box to format `(center x, center y, width,
-        height)`.
-        """
+        """Convert bounding box to format `(center x, center y, width, height)`."""
         ret = np.asarray(tlwh).copy()
         ret[:2] += ret[2:] / 2
         return ret
 
 
 class BOTSORT(BYTETracker):
+    """
+    An extended version of the BYTETracker class for YOLOv8, designed for object tracking with ReID and GMC algorithm.
+
+    Attributes:
+        proximity_thresh (float): Threshold for spatial proximity (IoU) between tracks and detections.
+        appearance_thresh (float): Threshold for appearance similarity (ReID embeddings) between tracks and detections.
+        encoder (object): Object to handle ReID embeddings, set to None if ReID is not enabled.
+        gmc (GMC): An instance of the GMC algorithm for data association.
+        args (object): Parsed command-line arguments containing tracking parameters.
+
+    Methods:
+        get_kalmanfilter(): Returns an instance of KalmanFilterXYWH for object tracking.
+        init_track(dets, scores, cls, img): Initialize track with detections, scores, and classes.
+        get_dists(tracks, detections): Get distances between tracks and detections using IoU and (optionally) ReID.
+        multi_predict(tracks): Predict and track multiple objects with YOLOv8 model.
+
+    Usage:
+        bot_sort = BOTSORT(args, frame_rate)
+        bot_sort.init_track(dets, scores, cls, img)
+        bot_sort.multi_predict(tracks)
+
+    Note:
+        The class is designed to work with the YOLOv8 object detection model and supports ReID only if enabled via args.
+    """
 
     def __init__(self, args, frame_rate=30):
         """Initialize YOLOv8 object with ReID module and GMC algorithm."""
diff --git a/ultralytics/trackers/byte_tracker.py b/ultralytics/trackers/byte_tracker.py
index 04958cda..40954c98 100644
--- a/ultralytics/trackers/byte_tracker.py
+++ b/ultralytics/trackers/byte_tracker.py
@@ -8,10 +8,43 @@ from .utils.kalman_filter import KalmanFilterXYAH
 
 
 class STrack(BaseTrack):
+    """
+    Single object tracking representation that uses Kalman filtering for state estimation.
+
+    This class is responsible for storing all the information regarding individual tracklets and performs state updates
+    and predictions based on Kalman filter.
+
+    Attributes:
+        shared_kalman (KalmanFilterXYAH): Shared Kalman filter that is used across all STrack instances for prediction.
+        _tlwh (np.ndarray): Private attribute to store top-left corner coordinates and width and height of bounding box.
+        kalman_filter (KalmanFilterXYAH): Instance of Kalman filter used for this particular object track.
+        mean (np.ndarray): Mean state estimate vector.
+        covariance (np.ndarray): Covariance of state estimate.
+        is_activated (bool): Boolean flag indicating if the track has been activated.
+        score (float): Confidence score of the track.
+        tracklet_len (int): Length of the tracklet.
+        cls (any): Class label for the object.
+        idx (int): Index or identifier for the object.
+        frame_id (int): Current frame ID.
+        start_frame (int): Frame where the object was first detected.
+
+    Methods:
+        predict(): Predict the next state of the object using Kalman filter.
+        multi_predict(stracks): Predict the next states for multiple tracks.
+        multi_gmc(stracks, H): Update multiple track states using a homography matrix.
+        activate(kalman_filter, frame_id): Activate a new tracklet.
+        re_activate(new_track, frame_id, new_id): Reactivate a previously lost tracklet.
+        update(new_track, frame_id): Update the state of a matched track.
+        convert_coords(tlwh): Convert bounding box to x-y-angle-height format.
+        tlwh_to_xyah(tlwh): Convert tlwh bounding box to xyah format.
+        tlbr_to_tlwh(tlbr): Convert tlbr bounding box to tlwh format.
+        tlwh_to_tlbr(tlwh): Convert tlwh bounding box to tlbr format.
+    """
+
     shared_kalman = KalmanFilterXYAH()
 
     def __init__(self, tlwh, score, cls):
-        """wait activate."""
+        """Initialize new STrack instance."""
         self._tlwh = np.asarray(self.tlbr_to_tlwh(tlwh[:-1]), dtype=np.float32)
         self.kalman_filter = None
         self.mean, self.covariance = None, None
@@ -92,10 +125,11 @@ class STrack(BaseTrack):
 
     def update(self, new_track, frame_id):
         """
-        Update a matched track
-        :type new_track: STrack
-        :type frame_id: int
-        :return:
+        Update the state of a matched track.
+
+        Args:
+            new_track (STrack): The new track containing updated information.
+            frame_id (int): The ID of the current frame.
         """
         self.frame_id = frame_id
         self.tracklet_len += 1
@@ -116,9 +150,7 @@ class STrack(BaseTrack):
 
     @property
     def tlwh(self):
-        """Get current position in bounding box format `(top left x, top left y,
-        width, height)`.
-        """
+        """Get current position in bounding box format (top left x, top left y, width, height)."""
         if self.mean is None:
             return self._tlwh.copy()
         ret = self.mean[:4].copy()
@@ -128,17 +160,15 @@ class STrack(BaseTrack):
 
     @property
     def tlbr(self):
-        """Convert bounding box to format `(min x, min y, max x, max y)`, i.e.,
-        `(top left, bottom right)`.
-        """
+        """Convert bounding box to format (min x, min y, max x, max y), i.e., (top left, bottom right)."""
         ret = self.tlwh.copy()
         ret[2:] += ret[:2]
         return ret
 
     @staticmethod
     def tlwh_to_xyah(tlwh):
-        """Convert bounding box to format `(center x, center y, aspect ratio,
-        height)`, where the aspect ratio is `width / height`.
+        """Convert bounding box to format (center x, center y, aspect ratio, height), where the aspect ratio is width /
+        height.
         """
         ret = np.asarray(tlwh).copy()
         ret[:2] += ret[2:] / 2
@@ -165,6 +195,33 @@ class STrack(BaseTrack):
 
 
 class BYTETracker:
+    """
+    BYTETracker: A tracking algorithm built on top of YOLOv8 for object detection and tracking.
+
+    The class is responsible for initializing, updating, and managing the tracks for detected objects in a video
+    sequence. It maintains the state of tracked, lost, and removed tracks over frames, utilizes Kalman filtering for
+    predicting the new object locations, and performs data association.
+
+    Attributes:
+        tracked_stracks (list[STrack]): List of successfully activated tracks.
+        lost_stracks (list[STrack]): List of lost tracks.
+        removed_stracks (list[STrack]): List of removed tracks.
+        frame_id (int): The current frame ID.
+        args (namespace): Command-line arguments.
+        max_time_lost (int): The maximum frames for a track to be considered as 'lost'.
+        kalman_filter (object): Kalman Filter object.
+
+    Methods:
+        update(results, img=None): Updates object tracker with new detections.
+        get_kalmanfilter(): Returns a Kalman filter object for tracking bounding boxes.
+        init_track(dets, scores, cls, img=None): Initialize object tracking with detections.
+        get_dists(tracks, detections): Calculates the distance between tracks and detections.
+        multi_predict(tracks): Predicts the location of tracks.
+        reset_id(): Resets the ID counter of STrack.
+        joint_stracks(tlista, tlistb): Combines two lists of stracks.
+        sub_stracks(tlista, tlistb): Filters out the stracks present in the second list from the first list.
+        remove_duplicate_stracks(stracksa, stracksb): Removes duplicate stracks based on IOU.
+    """
 
     def __init__(self, args, frame_rate=30):
         """Initialize a YOLOv8 object to track objects with given arguments and frame rate."""
@@ -234,8 +291,7 @@ class BYTETracker:
             else:
                 track.re_activate(det, self.frame_id, new_id=False)
                 refind_stracks.append(track)
-        # Step 3: Second association, with low score detection boxes
-        # association the untrack to the low score detections
+        # Step 3: Second association, with low score detection boxes association the untrack to the low score detections
         detections_second = self.init_track(dets_second, scores_second, cls_second, img)
         r_tracked_stracks = [strack_pool[i] for i in u_track if strack_pool[i].state == TrackState.Tracked]
         # TODO
diff --git a/ultralytics/trackers/track.py b/ultralytics/trackers/track.py
index cfb4b08a..cf06c033 100644
--- a/ultralytics/trackers/track.py
+++ b/ultralytics/trackers/track.py
@@ -60,7 +60,6 @@ def register_tracker(model, persist):
     Args:
         model (object): The model object to register tracking callbacks for.
         persist (bool): Whether to persist the trackers if they already exist.
-
     """
     model.add_callback('on_predict_start', partial(on_predict_start, persist=persist))
     model.add_callback('on_predict_postprocess_end', on_predict_postprocess_end)
diff --git a/ultralytics/trackers/utils/gmc.py b/ultralytics/trackers/utils/gmc.py
index 4d91df45..24e4907d 100644
--- a/ultralytics/trackers/utils/gmc.py
+++ b/ultralytics/trackers/utils/gmc.py
@@ -9,6 +9,29 @@ from ultralytics.utils import LOGGER
 
 
 class GMC:
+    """
+    Generalized Motion Compensation (GMC) class for tracking and object detection in video frames.
+
+    This class provides methods for tracking and detecting objects based on several tracking algorithms including ORB,
+    SIFT, ECC, and Sparse Optical Flow. It also supports downscaling of  frames for computational efficiency.
+
+    Attributes:
+        method (str): The method used for tracking. Options include 'orb', 'sift', 'ecc', 'sparseOptFlow', 'none'.
+        downscale (int): Factor by which to downscale the frames for processing.
+        prevFrame (np.array): Stores the previous frame for tracking.
+        prevKeyPoints (list): Stores the keypoints from the previous frame.
+        prevDescriptors (np.array): Stores the descriptors from the previous frame.
+        initializedFirstFrame (bool): Flag to indicate if the first frame has been processed.
+
+    Methods:
+        __init__(self, method='sparseOptFlow', downscale=2): Initializes a GMC object with the specified method
+                                                              and downscale factor.
+        apply(self, raw_frame, detections=None): Applies the chosen method to a raw frame and optionally uses
+                                                 provided detections.
+        applyEcc(self, raw_frame, detections=None): Applies the ECC algorithm to a raw frame.
+        applyFeatures(self, raw_frame, detections=None): Applies feature-based methods like ORB or SIFT to a raw frame.
+        applySparseOptFlow(self, raw_frame, detections=None): Applies the Sparse Optical Flow method to a raw frame.
+    """
 
     def __init__(self, method='sparseOptFlow', downscale=2):
         """Initialize a video tracker with specified parameters."""
diff --git a/ultralytics/trackers/utils/kalman_filter.py b/ultralytics/trackers/utils/kalman_filter.py
index 9527ede7..d7408274 100644
--- a/ultralytics/trackers/utils/kalman_filter.py
+++ b/ultralytics/trackers/utils/kalman_filter.py
@@ -8,8 +8,8 @@ class KalmanFilterXYAH:
     """
     For bytetrack. A simple Kalman filter for tracking bounding boxes in image space.
 
-    The 8-dimensional state space (x, y, a, h, vx, vy, va, vh) contains the bounding box center position (x, y),
-    aspect ratio a, height h, and their respective velocities.
+    The 8-dimensional state space (x, y, a, h, vx, vy, va, vh) contains the bounding box center position (x, y), aspect
+    ratio a, height h, and their respective velocities.
 
     Object motion follows a constant velocity model. The bounding box location (x, y, a, h) is taken as direct
     observation of the state space (linear observation model).
@@ -182,8 +182,8 @@ class KalmanFilterXYAH:
     def gating_distance(self, mean, covariance, measurements, only_position=False, metric='maha'):
         """
         Compute gating distance between state distribution and measurements. A suitable distance threshold can be
-        obtained from `chi2inv95`. If `only_position` is False, the chi-square distribution has 4 degrees of
-        freedom, otherwise 2.
+        obtained from `chi2inv95`. If `only_position` is False, the chi-square distribution has 4 degrees of freedom,
+        otherwise 2.
 
         Parameters
         ----------
@@ -223,8 +223,8 @@ class KalmanFilterXYWH(KalmanFilterXYAH):
     """
     For BoT-SORT. A simple Kalman filter for tracking bounding boxes in image space.
 
-    The 8-dimensional state space (x, y, w, h, vx, vy, vw, vh) contains the bounding box center position (x, y),
-    width w, height h, and their respective velocities.
+    The 8-dimensional state space (x, y, w, h, vx, vy, vw, vh) contains the bounding box center position (x, y), width
+    w, height h, and their respective velocities.
 
     Object motion follows a constant velocity model. The bounding box location (x, y, w, h) is taken as direct
     observation of the state space (linear observation model).
diff --git a/ultralytics/utils/__init__.py b/ultralytics/utils/__init__.py
index c4490136..d8e57431 100644
--- a/ultralytics/utils/__init__.py
+++ b/ultralytics/utils/__init__.py
@@ -117,6 +117,7 @@ class TQDM(tqdm_original):
     """
 
     def __init__(self, *args, **kwargs):
+        """Initialize custom Ultralytics tqdm class with different default arguments."""
         # Set new default values (these can still be overridden when calling TQDM)
         kwargs['disable'] = not VERBOSE or kwargs.get('disable', False)  # logical 'and' with default value if passed
         kwargs.setdefault('bar_format', TQDM_BAR_FORMAT)  # override default value if passed
@@ -124,8 +125,7 @@ class TQDM(tqdm_original):
 
 
 class SimpleClass:
-    """
-    Ultralytics SimpleClass is a base class providing helpful string representation, error reporting, and attribute
+    """Ultralytics SimpleClass is a base class providing helpful string representation, error reporting, and attribute
     access methods for easier debugging and usage.
     """
 
@@ -154,8 +154,7 @@ class SimpleClass:
 
 
 class IterableSimpleNamespace(SimpleNamespace):
-    """
-    Ultralytics IterableSimpleNamespace is an extension class of SimpleNamespace that adds iterable functionality and
+    """Ultralytics IterableSimpleNamespace is an extension class of SimpleNamespace that adds iterable functionality and
     enables usage with dict() and for loops.
     """
 
@@ -256,8 +255,8 @@ class EmojiFilter(logging.Filter):
     """
     A custom logging filter class for removing emojis in log messages.
 
-    This filter is particularly useful for ensuring compatibility with Windows terminals
-    that may not support the display of emojis in log messages.
+    This filter is particularly useful for ensuring compatibility with Windows terminals that may not support the
+    display of emojis in log messages.
     """
 
     def filter(self, record):
@@ -275,9 +274,9 @@ if WINDOWS:  # emoji-safe logging
 
 class ThreadingLocked:
     """
-    A decorator class for ensuring thread-safe execution of a function or method.
-    This class can be used as a decorator to make sure that if the decorated function
-    is called from multiple threads, only one thread at a time will be able to execute the function.
+    A decorator class for ensuring thread-safe execution of a function or method. This class can be used as a decorator
+    to make sure that if the decorated function is called from multiple threads, only one thread at a time will be able
+    to execute the function.
 
     Attributes:
         lock (threading.Lock): A lock object used to manage access to the decorated function.
@@ -294,13 +293,16 @@ class ThreadingLocked:
     """
 
     def __init__(self):
+        """Initializes the decorator class for thread-safe execution of a function or method."""
         self.lock = threading.Lock()
 
     def __call__(self, f):
+        """Run thread-safe execution of function or method."""
         from functools import wraps
 
         @wraps(f)
         def decorated(*args, **kwargs):
+            """Applies thread-safety to the decorated function or method."""
             with self.lock:
                 return f(*args, **kwargs)
 
@@ -424,8 +426,7 @@ def is_kaggle():
 
 def is_jupyter():
     """
-    Check if the current script is running inside a Jupyter Notebook.
-    Verified on Colab, Jupyterlab, Kaggle, Paperspace.
+    Check if the current script is running inside a Jupyter Notebook. Verified on Colab, Jupyterlab, Kaggle, Paperspace.
 
     Returns:
         (bool): True if running inside a Jupyter Notebook, False otherwise.
@@ -529,8 +530,8 @@ def is_github_actions_ci() -> bool:
 
 def is_git_dir():
     """
-    Determines whether the current file is part of a git repository.
-    If the current file is not part of a git repository, returns None.
+    Determines whether the current file is part of a git repository. If the current file is not part of a git
+    repository, returns None.
 
     Returns:
         (bool): True if current file is part of a git repository.
@@ -540,8 +541,8 @@ def is_git_dir():
 
 def get_git_dir():
     """
-    Determines whether the current file is part of a git repository and if so, returns the repository root directory.
-    If the current file is not part of a git repository, returns None.
+    Determines whether the current file is part of a git repository and if so, returns the repository root directory. If
+    the current file is not part of a git repository, returns None.
 
     Returns:
         (Path | None): Git root directory if found or None if not found.
@@ -578,7 +579,8 @@ def get_git_branch():
 
 
 def get_default_args(func):
-    """Returns a dictionary of default arguments for a function.
+    """
+    Returns a dictionary of default arguments for a function.
 
     Args:
         func (callable): The function to inspect.
@@ -710,7 +712,11 @@ def remove_colorstr(input_string):
 
 
 class TryExcept(contextlib.ContextDecorator):
-    """YOLOv8 TryExcept class. Usage: @TryExcept() decorator or 'with TryExcept():' context manager."""
+    """
+    YOLOv8 TryExcept class.
+
+    Use as @TryExcept() decorator or 'with TryExcept():' context manager.
+    """
 
     def __init__(self, msg='', verbose=True):
         """Initialize TryExcept class with optional message and verbosity settings."""
@@ -729,7 +735,11 @@ class TryExcept(contextlib.ContextDecorator):
 
 
 def threaded(func):
-    """Multi-threads a target function and returns thread. Usage: @threaded decorator."""
+    """
+    Multi-threads a target function and returns thread.
+
+    Use as @threaded decorator.
+    """
 
     def wrapper(*args, **kwargs):
         """Multi-threads a given function and returns the thread."""
@@ -824,6 +834,9 @@ class SettingsManager(dict):
     """
 
     def __init__(self, file=SETTINGS_YAML, version='0.0.4'):
+        """Initialize the SettingsManager with default settings, load and validate current settings from the YAML
+        file.
+        """
         import copy
         import hashlib
 
diff --git a/ultralytics/utils/autobatch.py b/ultralytics/utils/autobatch.py
index 4e9ed07c..89f7e99f 100644
--- a/ultralytics/utils/autobatch.py
+++ b/ultralytics/utils/autobatch.py
@@ -1,7 +1,5 @@
 # Ultralytics YOLO 🚀, AGPL-3.0 license
-"""
-Functions for estimating the best YOLO batch size to use a fraction of the available CUDA memory in PyTorch.
-"""
+"""Functions for estimating the best YOLO batch size to use a fraction of the available CUDA memory in PyTorch."""
 
 from copy import deepcopy
 
diff --git a/ultralytics/utils/benchmarks.py b/ultralytics/utils/benchmarks.py
index e4135bc8..bf86b535 100644
--- a/ultralytics/utils/benchmarks.py
+++ b/ultralytics/utils/benchmarks.py
@@ -1,6 +1,6 @@
 # Ultralytics YOLO 🚀, AGPL-3.0 license
 """
-Benchmark a YOLO model formats for speed and accuracy
+Benchmark a YOLO model formats for speed and accuracy.
 
 Usage:
     from ultralytics.utils.benchmarks import ProfileModels, benchmark
@@ -194,6 +194,7 @@ class ProfileModels:
         self.device = device or torch.device(0 if torch.cuda.is_available() else 'cpu')
 
     def profile(self):
+        """Logs the benchmarking results of a model, checks metrics against floor and returns the results."""
         files = self.get_files()
 
         if not files:
@@ -235,6 +236,7 @@ class ProfileModels:
         return output
 
     def get_files(self):
+        """Returns a list of paths for all relevant model files given by the user."""
         files = []
         for path in self.paths:
             path = Path(path)
@@ -250,10 +252,14 @@ class ProfileModels:
         return [Path(file) for file in sorted(files)]
 
     def get_onnx_model_info(self, onnx_file: str):
+        """Retrieves the information including number of layers, parameters, gradients and FLOPs for an ONNX model
+        file.
+        """
         # return (num_layers, num_params, num_gradients, num_flops)
         return 0.0, 0.0, 0.0, 0.0
 
     def iterative_sigma_clipping(self, data, sigma=2, max_iters=3):
+        """Applies an iterative sigma clipping algorithm to the given data times number of iterations."""
         data = np.array(data)
         for _ in range(max_iters):
             mean, std = np.mean(data), np.std(data)
@@ -264,6 +270,7 @@ class ProfileModels:
         return data
 
     def profile_tensorrt_model(self, engine_file: str, eps: float = 1e-3):
+        """Profiles the TensorRT model, measuring average run time and standard deviation among runs."""
         if not self.trt or not Path(engine_file).is_file():
             return 0.0, 0.0
 
@@ -292,6 +299,9 @@ class ProfileModels:
         return np.mean(run_times), np.std(run_times)
 
     def profile_onnx_model(self, onnx_file: str, eps: float = 1e-3):
+        """Profiles an ONNX model by executing it multiple times and returns the mean and standard deviation of run
+        times.
+        """
         check_requirements('onnxruntime')
         import onnxruntime as ort
 
@@ -344,10 +354,12 @@ class ProfileModels:
         return np.mean(run_times), np.std(run_times)
 
     def generate_table_row(self, model_name, t_onnx, t_engine, model_info):
+        """Generates a formatted string for a table row that includes model performance and metric details."""
         layers, params, gradients, flops = model_info
         return f'| {model_name:18s} | {self.imgsz} | - | {t_onnx[0]:.2f} ± {t_onnx[1]:.2f} ms | {t_engine[0]:.2f} ± {t_engine[1]:.2f} ms | {params / 1e6:.1f} | {flops:.1f} |'
 
     def generate_results_dict(self, model_name, t_onnx, t_engine, model_info):
+        """Generates a dictionary of model details including name, parameters, GFLOPS and speed metrics."""
         layers, params, gradients, flops = model_info
         return {
             'model/name': model_name,
@@ -357,6 +369,7 @@ class ProfileModels:
             'model/speed_TensorRT(ms)': round(t_engine[0], 3)}
 
     def print_table(self, table_rows):
+        """Formats and prints a comparison table for different models with given statistics and performance data."""
         gpu = torch.cuda.get_device_name(0) if torch.cuda.is_available() else 'GPU'
         header = f'| Model | size<br><sup>(pixels) | mAP<sup>val<br>50-95 | Speed<br><sup>CPU ONNX<br>(ms) | Speed<br><sup>{gpu} TensorRT<br>(ms) | params<br><sup>(M) | FLOPs<br><sup>(B) |'
         separator = '|-------------|---------------------|--------------------|------------------------------|-----------------------------------|------------------|-----------------|'
diff --git a/ultralytics/utils/callbacks/base.py b/ultralytics/utils/callbacks/base.py
index ace8bfbf..211ae5bf 100644
--- a/ultralytics/utils/callbacks/base.py
+++ b/ultralytics/utils/callbacks/base.py
@@ -1,7 +1,5 @@
 # Ultralytics YOLO 🚀, AGPL-3.0 license
-"""
-Base callbacks
-"""
+"""Base callbacks."""
 
 from collections import defaultdict
 from copy import deepcopy
diff --git a/ultralytics/utils/callbacks/comet.py b/ultralytics/utils/callbacks/comet.py
index 2da71a95..e8016f4e 100644
--- a/ultralytics/utils/callbacks/comet.py
+++ b/ultralytics/utils/callbacks/comet.py
@@ -26,31 +26,38 @@ except (ImportError, AssertionError):
 
 
 def _get_comet_mode():
+    """Returns the mode of comet set in the environment variables, defaults to 'online' if not set."""
     return os.getenv('COMET_MODE', 'online')
 
 
 def _get_comet_model_name():
+    """Returns the model name for Comet from the environment variable 'COMET_MODEL_NAME' or defaults to 'YOLOv8'."""
     return os.getenv('COMET_MODEL_NAME', 'YOLOv8')
 
 
 def _get_eval_batch_logging_interval():
+    """Get the evaluation batch logging interval from environment variable or use default value 1."""
     return int(os.getenv('COMET_EVAL_BATCH_LOGGING_INTERVAL', 1))
 
 
 def _get_max_image_predictions_to_log():
+    """Get the maximum number of image predictions to log from the environment variables."""
     return int(os.getenv('COMET_MAX_IMAGE_PREDICTIONS', 100))
 
 
 def _scale_confidence_score(score):
+    """Scales the given confidence score by a factor specified in an environment variable."""
     scale = float(os.getenv('COMET_MAX_CONFIDENCE_SCORE', 100.0))
     return score * scale
 
 
 def _should_log_confusion_matrix():
+    """Determines if the confusion matrix should be logged based on the environment variable settings."""
     return os.getenv('COMET_EVAL_LOG_CONFUSION_MATRIX', 'false').lower() == 'true'
 
 
 def _should_log_image_predictions():
+    """Determines whether to log image predictions based on a specified environment variable."""
     return os.getenv('COMET_EVAL_LOG_IMAGE_PREDICTIONS', 'true').lower() == 'true'
 
 
@@ -104,9 +111,10 @@ def _fetch_trainer_metadata(trainer):
 
 
 def _scale_bounding_box_to_original_image_shape(box, resized_image_shape, original_image_shape, ratio_pad):
-    """YOLOv8 resizes images during training and the label values
-    are normalized based on this resized shape. This function rescales the
-    bounding box labels to the original image shape.
+    """
+    YOLOv8 resizes images during training and the label values are normalized based on this resized shape.
+
+    This function rescales the bounding box labels to the original image shape.
     """
 
     resized_image_height, resized_image_width = resized_image_shape
diff --git a/ultralytics/utils/callbacks/dvc.py b/ultralytics/utils/callbacks/dvc.py
index 98e232b8..7fa05c6b 100644
--- a/ultralytics/utils/callbacks/dvc.py
+++ b/ultralytics/utils/callbacks/dvc.py
@@ -25,6 +25,7 @@ except (ImportError, AssertionError, TypeError):
 
 
 def _log_images(path, prefix=''):
+    """Logs images at specified path with an optional prefix using DVCLive."""
     if live:
         name = path.name
 
@@ -38,6 +39,7 @@ def _log_images(path, prefix=''):
 
 
 def _log_plots(plots, prefix=''):
+    """Logs plot images for training progress if they have not been previously processed."""
     for name, params in plots.items():
         timestamp = params['timestamp']
         if _processed_plots.get(name) != timestamp:
@@ -46,6 +48,7 @@ def _log_plots(plots, prefix=''):
 
 
 def _log_confusion_matrix(validator):
+    """Logs the confusion matrix for the given validator using DVCLive."""
     targets = []
     preds = []
     matrix = validator.confusion_matrix.matrix
@@ -62,6 +65,7 @@ def _log_confusion_matrix(validator):
 
 
 def on_pretrain_routine_start(trainer):
+    """Initializes DVCLive logger for training metadata during pre-training routine."""
     try:
         global live
         live = dvclive.Live(save_dvc_exp=True, cache_images=True)
@@ -71,20 +75,24 @@ def on_pretrain_routine_start(trainer):
 
 
 def on_pretrain_routine_end(trainer):
+    """Logs plots related to the training process at the end of the pretraining routine."""
     _log_plots(trainer.plots, 'train')
 
 
 def on_train_start(trainer):
+    """Logs the training parameters if DVCLive logging is active."""
     if live:
         live.log_params(trainer.args)
 
 
 def on_train_epoch_start(trainer):
+    """Sets the global variable _training_epoch value to True at the start of training each epoch."""
     global _training_epoch
     _training_epoch = True
 
 
 def on_fit_epoch_end(trainer):
+    """Logs training metrics and model info, and advances to next step on the end of each fit epoch."""
     global _training_epoch
     if live and _training_epoch:
         all_metrics = {**trainer.label_loss_items(trainer.tloss, prefix='train'), **trainer.metrics, **trainer.lr}
@@ -104,6 +112,7 @@ def on_fit_epoch_end(trainer):
 
 
 def on_train_end(trainer):
+    """Logs the best metrics, plots, and confusion matrix at the end of training if DVCLive is active."""
     if live:
         # At the end log the best metrics. It runs validator on the best model internally.
         all_metrics = {**trainer.label_loss_items(trainer.tloss, prefix='train'), **trainer.metrics, **trainer.lr}
diff --git a/ultralytics/utils/callbacks/neptune.py b/ultralytics/utils/callbacks/neptune.py
index 40916a3c..088e3f8e 100644
--- a/ultralytics/utils/callbacks/neptune.py
+++ b/ultralytics/utils/callbacks/neptune.py
@@ -31,14 +31,13 @@ def _log_images(imgs_dict, group=''):
 
 
 def _log_plot(title, plot_path):
-    """Log plots to the NeptuneAI experiment logger."""
     """
-        Log image as plot in the plot section of NeptuneAI
+    Log plots to the NeptuneAI experiment logger.
 
-        arguments:
-        title (str) Title of the plot
-        plot_path (PosixPath or str) Path to the saved image file
-        """
+    Args:
+        title (str): Title of the plot.
+        plot_path (PosixPath | str): Path to the saved image file.
+    """
     import matplotlib.image as mpimg
     import matplotlib.pyplot as plt
 
diff --git a/ultralytics/utils/callbacks/wb.py b/ultralytics/utils/callbacks/wb.py
index dd2f5d32..b901e3cc 100644
--- a/ultralytics/utils/callbacks/wb.py
+++ b/ultralytics/utils/callbacks/wb.py
@@ -17,6 +17,7 @@ except (ImportError, AssertionError):
 
 
 def _log_plots(plots, step):
+    """Logs plots from the input dictionary if they haven't been logged already at the specified step."""
     for name, params in plots.items():
         timestamp = params['timestamp']
         if _processed_plots.get(name) != timestamp:
diff --git a/ultralytics/utils/checks.py b/ultralytics/utils/checks.py
index 956f1b32..184ce06e 100644
--- a/ultralytics/utils/checks.py
+++ b/ultralytics/utils/checks.py
@@ -64,8 +64,8 @@ def parse_requirements(file_path=ROOT.parent / 'requirements.txt', package=''):
 
 def parse_version(version='0.0.0') -> tuple:
     """
-    Convert a version string to a tuple of integers, ignoring any extra non-numeric string attached to the version.
-    This function replaces deprecated 'pkg_resources.parse_version(v)'
+    Convert a version string to a tuple of integers, ignoring any extra non-numeric string attached to the version. This
+    function replaces deprecated 'pkg_resources.parse_version(v)'.
 
     Args:
         version (str): Version string, i.e. '2.0.1+cpu'
@@ -372,8 +372,10 @@ def check_torchvision():
     Checks the installed versions of PyTorch and Torchvision to ensure they're compatible.
 
     This function checks the installed versions of PyTorch and Torchvision, and warns if they're incompatible according
-    to the provided compatibility table based on https://github.com/pytorch/vision#installation. The
-    compatibility table is a dictionary where the keys are PyTorch versions and the values are lists of compatible
+    to the provided compatibility table based on:
+    https://github.com/pytorch/vision#installation.
+
+    The compatibility table is a dictionary where the keys are PyTorch versions and the values are lists of compatible
     Torchvision versions.
     """
 
@@ -527,9 +529,9 @@ def collect_system_info():
 
 def check_amp(model):
     """
-    This function checks the PyTorch Automatic Mixed Precision (AMP) functionality of a YOLOv8 model.
-    If the checks fail, it means there are anomalies with AMP on the system that may cause NaN losses or zero-mAP
-    results, so AMP will be disabled during training.
+    This function checks the PyTorch Automatic Mixed Precision (AMP) functionality of a YOLOv8 model. If the checks
+    fail, it means there are anomalies with AMP on the system that may cause NaN losses or zero-mAP results, so AMP will
+    be disabled during training.
 
     Args:
         model (nn.Module): A YOLOv8 model instance.
@@ -606,7 +608,8 @@ def print_args(args: Optional[dict] = None, show_file=True, show_func=False):
 
 
 def cuda_device_count() -> int:
-    """Get the number of NVIDIA GPUs available in the environment.
+    """
+    Get the number of NVIDIA GPUs available in the environment.
 
     Returns:
         (int): The number of NVIDIA GPUs available.
@@ -626,7 +629,8 @@ def cuda_device_count() -> int:
 
 
 def cuda_is_available() -> bool:
-    """Check if CUDA is available in the environment.
+    """
+    Check if CUDA is available in the environment.
 
     Returns:
         (bool): True if one or more NVIDIA GPUs are available, False otherwise.
diff --git a/ultralytics/utils/dist.py b/ultralytics/utils/dist.py
index 11900985..aaef2b94 100644
--- a/ultralytics/utils/dist.py
+++ b/ultralytics/utils/dist.py
@@ -13,7 +13,8 @@ from .torch_utils import TORCH_1_9
 
 
 def find_free_network_port() -> int:
-    """Finds a free port on localhost.
+    """
+    Finds a free port on localhost.
 
     It is useful in single-node training when we don't want to connect to a real main node but have to set the
     `MASTER_PORT` environment variable.
diff --git a/ultralytics/utils/downloads.py b/ultralytics/utils/downloads.py
index 3db1d3ec..10983a6e 100644
--- a/ultralytics/utils/downloads.py
+++ b/ultralytics/utils/downloads.py
@@ -69,8 +69,8 @@ def delete_dsstore(path, files_to_delete=('.DS_Store', '__MACOSX')):
 
 def zip_directory(directory, compress=True, exclude=('.DS_Store', '__MACOSX'), progress=True):
     """
-    Zips the contents of a directory, excluding files containing strings in the exclude list.
-    The resulting zip file is named after the directory and placed alongside it.
+    Zips the contents of a directory, excluding files containing strings in the exclude list. The resulting zip file is
+    named after the directory and placed alongside it.
 
     Args:
         directory (str | Path): The path to the directory to be zipped.
@@ -341,7 +341,11 @@ def get_github_assets(repo='ultralytics/assets', version='latest', retry=False):
 
 
 def attempt_download_asset(file, repo='ultralytics/assets', release='v0.0.0'):
-    """Attempt file download from GitHub release assets if not found locally. release = 'latest', 'v6.2', etc."""
+    """
+    Attempt file download from GitHub release assets if not found locally.
+
+    release = 'latest', 'v6.2', etc.
+    """
     from ultralytics.utils import SETTINGS  # scoped for circular import
 
     # YOLOv3/5u updates
diff --git a/ultralytics/utils/files.py b/ultralytics/utils/files.py
index 0102c4b6..82e8ebfe 100644
--- a/ultralytics/utils/files.py
+++ b/ultralytics/utils/files.py
@@ -30,9 +30,9 @@ class WorkingDirectory(contextlib.ContextDecorator):
 @contextmanager
 def spaces_in_path(path):
     """
-    Context manager to handle paths with spaces in their names.
-    If a path contains spaces, it replaces them with underscores, copies the file/directory to the new path,
-    executes the context code block, then copies the file/directory back to its original location.
+    Context manager to handle paths with spaces in their names. If a path contains spaces, it replaces them with
+    underscores, copies the file/directory to the new path, executes the context code block, then copies the
+    file/directory back to its original location.
 
     Args:
         path (str | Path): The original path.
diff --git a/ultralytics/utils/instance.py b/ultralytics/utils/instance.py
index 4e2e4380..28f1f654 100644
--- a/ultralytics/utils/instance.py
+++ b/ultralytics/utils/instance.py
@@ -32,9 +32,14 @@ __all__ = 'Bboxes',  # tuple or list
 
 
 class Bboxes:
-    """Bounding Boxes class. Only numpy variables are supported."""
+    """
+    Bounding Boxes class.
+
+    Only numpy variables are supported.
+    """
 
     def __init__(self, bboxes, format='xyxy') -> None:
+        """Initializes the Bboxes class with bounding box data in a specified format."""
         assert format in _formats, f'Invalid bounding box format: {format}, format must be one of {_formats}'
         bboxes = bboxes[None, :] if bboxes.ndim == 1 else bboxes
         assert bboxes.ndim == 2
@@ -194,7 +199,7 @@ class Instances:
         return self._bboxes.areas()
 
     def scale(self, scale_w, scale_h, bbox_only=False):
-        """this might be similar with denormalize func but without normalized sign."""
+        """This might be similar with denormalize func but without normalized sign."""
         self._bboxes.mul(scale=(scale_w, scale_h, scale_w, scale_h))
         if bbox_only:
             return
@@ -307,7 +312,11 @@ class Instances:
             self.keypoints[..., 1] = self.keypoints[..., 1].clip(0, h)
 
     def remove_zero_area_boxes(self):
-        """Remove zero-area boxes, i.e. after clipping some boxes may have zero width or height. This removes them."""
+        """
+        Remove zero-area boxes, i.e. after clipping some boxes may have zero width or height.
+
+        This removes them.
+        """
         good = self.bbox_areas > 0
         if not all(good):
             self._bboxes = self._bboxes[good]
diff --git a/ultralytics/utils/loss.py b/ultralytics/utils/loss.py
index dacf326f..62186678 100644
--- a/ultralytics/utils/loss.py
+++ b/ultralytics/utils/loss.py
@@ -13,7 +13,11 @@ from .tal import bbox2dist
 
 
 class VarifocalLoss(nn.Module):
-    """Varifocal loss by Zhang et al. https://arxiv.org/abs/2008.13367."""
+    """
+    Varifocal loss by Zhang et al.
+
+    https://arxiv.org/abs/2008.13367.
+    """
 
     def __init__(self):
         """Initialize the VarifocalLoss class."""
@@ -33,6 +37,7 @@ class FocalLoss(nn.Module):
     """Wraps focal loss around existing loss_fcn(), i.e. criteria = FocalLoss(nn.BCEWithLogitsLoss(), gamma=1.5)."""
 
     def __init__(self, ):
+        """Initializer for FocalLoss class with no parameters."""
         super().__init__()
 
     @staticmethod
@@ -93,6 +98,7 @@ class KeypointLoss(nn.Module):
     """Criterion class for computing training losses."""
 
     def __init__(self, sigmas) -> None:
+        """Initialize the KeypointLoss class."""
         super().__init__()
         self.sigmas = sigmas
 
diff --git a/ultralytics/utils/metrics.py b/ultralytics/utils/metrics.py
index 574d4039..36957e9f 100644
--- a/ultralytics/utils/metrics.py
+++ b/ultralytics/utils/metrics.py
@@ -1,7 +1,5 @@
 # Ultralytics YOLO 🚀, AGPL-3.0 license
-"""
-Model validation metrics
-"""
+"""Model validation metrics."""
 
 import math
 import warnings
@@ -195,7 +193,7 @@ class ConfusionMatrix:
 
     def process_cls_preds(self, preds, targets):
         """
-        Update confusion matrix for classification task
+        Update confusion matrix for classification task.
 
         Args:
             preds (Array[N, min(nc,5)]): Predicted class labels.
@@ -308,9 +306,7 @@ class ConfusionMatrix:
             on_plot(plot_fname)
 
     def print(self):
-        """
-        Print the confusion matrix to the console.
-        """
+        """Print the confusion matrix to the console."""
         for i in range(self.nc + 1):
             LOGGER.info(' '.join(map(str, self.matrix[i])))
 
@@ -440,7 +436,6 @@ def ap_per_class(tp,
             f1 (np.ndarray): F1-score values at each confidence threshold.
             ap (np.ndarray): Average precision for each class at different IoU thresholds.
             unique_classes (np.ndarray): An array of unique classes that have data.
-
     """
 
     # Sort by objectness
@@ -498,32 +493,33 @@ def ap_per_class(tp,
 
 class Metric(SimpleClass):
     """
-        Class for computing evaluation metrics for YOLOv8 model.
+    Class for computing evaluation metrics for YOLOv8 model.
 
-        Attributes:
-            p (list): Precision for each class. Shape: (nc,).
-            r (list): Recall for each class. Shape: (nc,).
-            f1 (list): F1 score for each class. Shape: (nc,).
-            all_ap (list): AP scores for all classes and all IoU thresholds. Shape: (nc, 10).
-            ap_class_index (list): Index of class for each AP score. Shape: (nc,).
-            nc (int): Number of classes.
+    Attributes:
+        p (list): Precision for each class. Shape: (nc,).
+        r (list): Recall for each class. Shape: (nc,).
+        f1 (list): F1 score for each class. Shape: (nc,).
+        all_ap (list): AP scores for all classes and all IoU thresholds. Shape: (nc, 10).
+        ap_class_index (list): Index of class for each AP score. Shape: (nc,).
+        nc (int): Number of classes.
 
-        Methods:
-            ap50(): AP at IoU threshold of 0.5 for all classes. Returns: List of AP scores. Shape: (nc,) or [].
-            ap(): AP at IoU thresholds from 0.5 to 0.95 for all classes. Returns: List of AP scores. Shape: (nc,) or [].
-            mp(): Mean precision of all classes. Returns: Float.
-            mr(): Mean recall of all classes. Returns: Float.
-            map50(): Mean AP at IoU threshold of 0.5 for all classes. Returns: Float.
-            map75(): Mean AP at IoU threshold of 0.75 for all classes. Returns: Float.
-            map(): Mean AP at IoU thresholds from 0.5 to 0.95 for all classes. Returns: Float.
-            mean_results(): Mean of results, returns mp, mr, map50, map.
-            class_result(i): Class-aware result, returns p[i], r[i], ap50[i], ap[i].
-            maps(): mAP of each class. Returns: Array of mAP scores, shape: (nc,).
-            fitness(): Model fitness as a weighted combination of metrics. Returns: Float.
-            update(results): Update metric attributes with new evaluation results.
-        """
+    Methods:
+        ap50(): AP at IoU threshold of 0.5 for all classes. Returns: List of AP scores. Shape: (nc,) or [].
+        ap(): AP at IoU thresholds from 0.5 to 0.95 for all classes. Returns: List of AP scores. Shape: (nc,) or [].
+        mp(): Mean precision of all classes. Returns: Float.
+        mr(): Mean recall of all classes. Returns: Float.
+        map50(): Mean AP at IoU threshold of 0.5 for all classes. Returns: Float.
+        map75(): Mean AP at IoU threshold of 0.75 for all classes. Returns: Float.
+        map(): Mean AP at IoU thresholds from 0.5 to 0.95 for all classes. Returns: Float.
+        mean_results(): Mean of results, returns mp, mr, map50, map.
+        class_result(i): Class-aware result, returns p[i], r[i], ap50[i], ap[i].
+        maps(): mAP of each class. Returns: Array of mAP scores, shape: (nc,).
+        fitness(): Model fitness as a weighted combination of metrics. Returns: Float.
+        update(results): Update metric attributes with new evaluation results.
+    """
 
     def __init__(self) -> None:
+        """Initializes a Metric instance for computing evaluation metrics for the YOLOv8 model."""
         self.p = []  # (nc, )
         self.r = []  # (nc, )
         self.f1 = []  # (nc, )
@@ -606,12 +602,12 @@ class Metric(SimpleClass):
         return [self.mp, self.mr, self.map50, self.map]
 
     def class_result(self, i):
-        """class-aware result, return p[i], r[i], ap50[i], ap[i]."""
+        """Class-aware result, return p[i], r[i], ap50[i], ap[i]."""
         return self.p[i], self.r[i], self.ap50[i], self.ap[i]
 
     @property
     def maps(self):
-        """mAP of each class."""
+        """MAP of each class."""
         maps = np.zeros(self.nc) + self.map
         for i, c in enumerate(self.ap_class_index):
             maps[c] = self.ap[i]
@@ -672,6 +668,7 @@ class DetMetrics(SimpleClass):
     """
 
     def __init__(self, save_dir=Path('.'), plot=False, on_plot=None, names=()) -> None:
+        """Initialize a DetMetrics instance with a save directory, plot flag, callback function, and class names."""
         self.save_dir = save_dir
         self.plot = plot
         self.on_plot = on_plot
@@ -756,6 +753,7 @@ class SegmentMetrics(SimpleClass):
     """
 
     def __init__(self, save_dir=Path('.'), plot=False, on_plot=None, names=()) -> None:
+        """Initialize a SegmentMetrics instance with a save directory, plot flag, callback function, and class names."""
         self.save_dir = save_dir
         self.plot = plot
         self.on_plot = on_plot
@@ -865,6 +863,7 @@ class PoseMetrics(SegmentMetrics):
     """
 
     def __init__(self, save_dir=Path('.'), plot=False, on_plot=None, names=()) -> None:
+        """Initialize the PoseMetrics class with directory path, class names, and plotting options."""
         super().__init__(save_dir, plot, names)
         self.save_dir = save_dir
         self.plot = plot
@@ -954,6 +953,7 @@ class ClassifyMetrics(SimpleClass):
     """
 
     def __init__(self) -> None:
+        """Initialize a ClassifyMetrics instance."""
         self.top1 = 0
         self.top5 = 0
         self.speed = {'preprocess': 0.0, 'inference': 0.0, 'loss': 0.0, 'postprocess': 0.0}
diff --git a/ultralytics/utils/ops.py b/ultralytics/utils/ops.py
index 9089d0fa..5d37c591 100644
--- a/ultralytics/utils/ops.py
+++ b/ultralytics/utils/ops.py
@@ -50,6 +50,7 @@ class Profile(contextlib.ContextDecorator):
         self.t += self.dt  # accumulate dt
 
     def __str__(self):
+        """Returns a human-readable string representing the accumulated elapsed time in the profiler."""
         return f'Elapsed time is {self.t} s'
 
     def time(self):
@@ -303,7 +304,7 @@ def clip_coords(coords, shape):
 
 def scale_image(masks, im0_shape, ratio_pad=None):
     """
-    Takes a mask, and resizes it to the original image size
+    Takes a mask, and resizes it to the original image size.
 
     Args:
         masks (np.ndarray): resized and padded masks/images, [h, w, num]/[h, w, 3].
@@ -403,8 +404,8 @@ def xywhn2xyxy(x, w=640, h=640, padw=0, padh=0):
 
 def xyxy2xywhn(x, w=640, h=640, clip=False, eps=0.0):
     """
-    Convert bounding box coordinates from (x1, y1, x2, y2) format to (x, y, width, height, normalized) format.
-    x, y, width and height are normalized to image dimensions
+    Convert bounding box coordinates from (x1, y1, x2, y2) format to (x, y, width, height, normalized) format. x, y,
+    width and height are normalized to image dimensions.
 
     Args:
         x (np.ndarray | torch.Tensor): The input bounding box coordinates in (x1, y1, x2, y2) format.
@@ -445,7 +446,7 @@ def xywh2ltwh(x):
 
 def xyxy2ltwh(x):
     """
-    Convert nx4 bounding boxes from [x1, y1, x2, y2] to [x1, y1, w, h], where xy1=top-left, xy2=bottom-right
+    Convert nx4 bounding boxes from [x1, y1, x2, y2] to [x1, y1, w, h], where xy1=top-left, xy2=bottom-right.
 
     Args:
         x (np.ndarray | torch.Tensor): The input tensor with the bounding boxes coordinates in the xyxy format
@@ -461,7 +462,7 @@ def xyxy2ltwh(x):
 
 def ltwh2xywh(x):
     """
-    Convert nx4 boxes from [x1, y1, w, h] to [x, y, w, h] where xy1=top-left, xy=center
+    Convert nx4 boxes from [x1, y1, w, h] to [x, y, w, h] where xy1=top-left, xy=center.
 
     Args:
         x (torch.Tensor): the input tensor
@@ -544,7 +545,7 @@ def xywhr2xyxyxyxy(center):
 
 def ltwh2xyxy(x):
     """
-    It converts the bounding box from [x1, y1, w, h] to [x1, y1, x2, y2] where xy1=top-left, xy2=bottom-right
+    It converts the bounding box from [x1, y1, w, h] to [x1, y1, x2, y2] where xy1=top-left, xy2=bottom-right.
 
     Args:
         x (np.ndarray | torch.Tensor): the input image
@@ -616,8 +617,8 @@ def crop_mask(masks, boxes):
 
 def process_mask_upsample(protos, masks_in, bboxes, shape):
     """
-    Takes the output of the mask head, and applies the mask to the bounding boxes. This produces masks of higher
-    quality but is slower.
+    Takes the output of the mask head, and applies the mask to the bounding boxes. This produces masks of higher quality
+    but is slower.
 
     Args:
         protos (torch.Tensor): [mask_dim, mask_h, mask_w]
@@ -713,7 +714,7 @@ def scale_masks(masks, shape, padding=True):
 
 def scale_coords(img1_shape, coords, img0_shape, ratio_pad=None, normalize=False, padding=True):
     """
-    Rescale segment coordinates (xy) from img1_shape to img0_shape
+    Rescale segment coordinates (xy) from img1_shape to img0_shape.
 
     Args:
         img1_shape (tuple): The shape of the image that the coords are from.
diff --git a/ultralytics/utils/patches.py b/ultralytics/utils/patches.py
index a1457639..541cf45a 100644
--- a/ultralytics/utils/patches.py
+++ b/ultralytics/utils/patches.py
@@ -1,7 +1,5 @@
 # Ultralytics YOLO 🚀, AGPL-3.0 license
-"""
-Monkey patches to update/extend functionality of existing functions
-"""
+"""Monkey patches to update/extend functionality of existing functions."""
 
 from pathlib import Path
 
@@ -14,7 +12,8 @@ _imshow = cv2.imshow  # copy to avoid recursion errors
 
 
 def imread(filename: str, flags: int = cv2.IMREAD_COLOR):
-    """Read an image from a file.
+    """
+    Read an image from a file.
 
     Args:
         filename (str): Path to the file to read.
@@ -27,7 +26,8 @@ def imread(filename: str, flags: int = cv2.IMREAD_COLOR):
 
 
 def imwrite(filename: str, img: np.ndarray, params=None):
-    """Write an image to a file.
+    """
+    Write an image to a file.
 
     Args:
         filename (str): Path to the file to write.
@@ -45,7 +45,8 @@ def imwrite(filename: str, img: np.ndarray, params=None):
 
 
 def imshow(winname: str, mat: np.ndarray):
-    """Displays an image in the specified window.
+    """
+    Displays an image in the specified window.
 
     Args:
         winname (str): Name of the window.
@@ -59,7 +60,8 @@ _torch_save = torch.save  # copy to avoid recursion errors
 
 
 def torch_save(*args, **kwargs):
-    """Use dill (if exists) to serialize the lambda functions where pickle does not do this.
+    """
+    Use dill (if exists) to serialize the lambda functions where pickle does not do this.
 
     Args:
         *args (tuple): Positional arguments to pass to torch.save.
diff --git a/ultralytics/utils/plotting.py b/ultralytics/utils/plotting.py
index bfd2aaa1..88fb73c2 100644
--- a/ultralytics/utils/plotting.py
+++ b/ultralytics/utils/plotting.py
@@ -316,7 +316,8 @@ def plot_labels(boxes, cls, names=(), save_dir=Path(''), on_plot=None):
 
 
 def save_one_box(xyxy, im, file=Path('im.jpg'), gain=1.02, pad=10, square=False, BGR=False, save=True):
-    """Save image crop as {file} with crop size multiple {gain} and {pad} pixels. Save and/or return crop.
+    """
+    Save image crop as {file} with crop size multiple {gain} and {pad} pixels. Save and/or return crop.
 
     This function takes a bounding box and an image, and then saves a cropped portion of the image according
     to the bounding box. Optionally, the crop can be squared, and the function allows for gain and padding
diff --git a/ultralytics/utils/torch_utils.py b/ultralytics/utils/torch_utils.py
index 0ea74088..ea6d7a6b 100644
--- a/ultralytics/utils/torch_utils.py
+++ b/ultralytics/utils/torch_utils.py
@@ -205,7 +205,11 @@ def fuse_deconv_and_bn(deconv, bn):
 
 
 def model_info(model, detailed=False, verbose=True, imgsz=640):
-    """Model information. imgsz may be int or list, i.e. imgsz=640 or imgsz=[640, 320]."""
+    """
+    Model information.
+
+    imgsz may be int or list, i.e. imgsz=640 or imgsz=[640, 320].
+    """
     if not verbose:
         return
     n_p = get_num_params(model)  # number of parameters
@@ -517,13 +521,11 @@ def profile(input, ops, n=10, device=None):
 
 
 class EarlyStopping:
-    """
-    Early stopping class that stops training when a specified number of epochs have passed without improvement.
-    """
+    """Early stopping class that stops training when a specified number of epochs have passed without improvement."""
 
     def __init__(self, patience=50):
         """
-        Initialize early stopping object
+        Initialize early stopping object.
 
         Args:
             patience (int, optional): Number of epochs to wait after fitness stops improving before stopping.
@@ -535,7 +537,7 @@ class EarlyStopping:
 
     def __call__(self, epoch, fitness):
         """
-        Check whether to stop training
+        Check whether to stop training.
 
         Args:
             epoch (int): Current epoch of training
diff --git a/ultralytics/utils/triton.py b/ultralytics/utils/triton.py
index c48e418a..45bb6e5b 100644
--- a/ultralytics/utils/triton.py
+++ b/ultralytics/utils/triton.py
@@ -7,7 +7,8 @@ import numpy as np
 
 
 class TritonRemoteModel:
-    """Client for interacting with a remote Triton Inference Server model.
+    """
+    Client for interacting with a remote Triton Inference Server model.
 
     Attributes:
         endpoint (str): The name of the model on the Triton server.