From d2cf7acce0dde0a5aece4b4e50132c40a09acd23 Mon Sep 17 00:00:00 2001
From: Glenn Jocher <glenn.jocher@ultralytics.com>
Date: Thu, 31 Aug 2023 01:38:42 +0200
Subject: [PATCH] `ultralytics 8.0.167` Tuner updates and HUB Pose and Classify
 fixes (#4656)

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
---
 .github/workflows/ci.yaml                  |  3 ++
 ultralytics/__init__.py                    |  4 +-
 ultralytics/data/utils.py                  | 46 +++++++++++------
 ultralytics/engine/exporter.py             | 11 +++--
 ultralytics/engine/model.py                |  7 +--
 ultralytics/engine/trainer.py              |  9 ++--
 ultralytics/engine/tuner.py                | 45 ++++++++++-------
 ultralytics/engine/validator.py            | 11 ++---
 ultralytics/models/utils/loss.py           | 57 +++++++++++-----------
 ultralytics/models/utils/ops.py            | 57 +++++++++++-----------
 ultralytics/models/yolo/classify/val.py    |  2 +-
 ultralytics/utils/__init__.py              |  1 +
 ultralytics/utils/callbacks/clearml.py     |  4 +-
 ultralytics/utils/callbacks/comet.py       |  4 +-
 ultralytics/utils/callbacks/dvc.py         |  5 +-
 ultralytics/utils/callbacks/mlflow.py      |  4 +-
 ultralytics/utils/callbacks/neptune.py     |  4 +-
 ultralytics/utils/callbacks/raytune.py     |  3 +-
 ultralytics/utils/callbacks/tensorboard.py |  4 +-
 ultralytics/utils/callbacks/wb.py          |  4 +-
 ultralytics/utils/downloads.py             | 33 +++++++------
 21 files changed, 174 insertions(+), 144 deletions(-)

diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml
index f7f39757..1d7da14c 100644
--- a/.github/workflows/ci.yaml
+++ b/.github/workflows/ci.yaml
@@ -112,6 +112,9 @@ jobs:
         run: |
           python -m pip install --upgrade pip wheel
           pip install -e ".[export]" coverage --extra-index-url https://download.pytorch.org/whl/cpu
+          # Fix SavedModel issue "partially initialized module 'jax' has no attribute 'version' (most likely due to a circular import)" in https://github.com/google/jax/discussions/14036
+          # pip install -U 'jax!=0.4.15' 'jaxlib!=0.4.15'
+          # yolo settings tensorboard=False
           yolo export format=tflite imgsz=32 || true
       - name: Check environment
         run: |
diff --git a/ultralytics/__init__.py b/ultralytics/__init__.py
index 4de51cc2..c1c5dd13 100644
--- a/ultralytics/__init__.py
+++ b/ultralytics/__init__.py
@@ -1,6 +1,6 @@
 # Ultralytics YOLO 🚀, AGPL-3.0 license
 
-__version__ = '8.0.166'
+__version__ = '8.0.167'
 
 from ultralytics.models import RTDETR, SAM, YOLO
 from ultralytics.models.fastsam import FastSAM
@@ -9,4 +9,4 @@ from ultralytics.utils import SETTINGS as settings
 from ultralytics.utils.checks import check_yolo as checks
 from ultralytics.utils.downloads import download
 
-__all__ = '__version__', 'YOLO', 'NAS', 'SAM', 'FastSAM', 'RTDETR', 'checks', 'download', 'settings'  # allow simpler import
+__all__ = '__version__', 'YOLO', 'NAS', 'SAM', 'FastSAM', 'RTDETR', 'checks', 'download', 'settings'
diff --git a/ultralytics/data/utils.py b/ultralytics/data/utils.py
index bf5535a1..2100cef5 100644
--- a/ultralytics/data/utils.py
+++ b/ultralytics/data/utils.py
@@ -202,6 +202,28 @@ def polygons2masks_overlap(imgsz, segments, downsample_ratio=1):
     return masks, index
 
 
+def find_dataset_yaml(path: Path) -> Path:
+    """
+    Find and return the YAML file associated with a Detect, Segment or Pose dataset.
+
+    This function searches for a YAML file at the root level of the provided directory first, and if not found, it
+    performs a recursive search. It prefers YAML files that have the samestem as the provided path. An AssertionError
+    is raised if no YAML file is found or if multiple YAML files are found.
+
+    Args:
+        path (Path): The directory path to search for the YAML file.
+
+    Returns:
+        (Path): The path of the found YAML file.
+    """
+    files = list(path.glob('*.yaml')) or list(path.rglob('*.yaml'))  # try root level first and then recursive
+    assert files, f"No YAML file found in '{path.resolve()}'"
+    if len(files) > 1:
+        files = [f for f in files if f.stem == path.stem]  # prefer *.yaml files that match
+    assert len(files) == 1, f"Expected 1 YAML file in '{path.resolve()}', but found {len(files)}.\n{files}"
+    return files[0]
+
+
 def check_det_dataset(dataset, autodownload=True):
     """
     Download, verify, and/or unzip a dataset if not found locally.
@@ -223,8 +245,8 @@ def check_det_dataset(dataset, autodownload=True):
     # Download (optional)
     extract_dir = ''
     if isinstance(data, (str, Path)) and (zipfile.is_zipfile(data) or is_tarfile(data)):
-        new_dir = safe_download(data, dir=DATASETS_DIR, unzip=True, delete=False, curl=False)
-        data = next((DATASETS_DIR / new_dir).rglob('*.yaml'))
+        new_dir = safe_download(data, dir=DATASETS_DIR, unzip=True, delete=False)
+        data = find_dataset_yaml(DATASETS_DIR / new_dir)
         extract_dir, autodownload = data.parent, False
 
     # Read YAML (optional)
@@ -316,6 +338,10 @@ def check_cls_dataset(dataset, split=''):
             - 'names' (dict): A dictionary of class names in the dataset.
     """
 
+    # Download (optional if dataset=https://file.zip is passed directly)
+    if str(dataset).startswith(('http:/', 'https:/')):
+        dataset = safe_download(dataset, dir=DATASETS_DIR, unzip=True, delete=False)
+
     dataset = Path(dataset)
     data_dir = (dataset if dataset.is_dir() else (DATASETS_DIR / dataset)).resolve()
     if not data_dir.is_dir():
@@ -329,8 +355,8 @@ def check_cls_dataset(dataset, split=''):
         s = f"Dataset download success ✅ ({time.time() - t:.1f}s), saved to {colorstr('bold', data_dir)}\n"
         LOGGER.info(s)
     train_set = data_dir / 'train'
-    val_set = data_dir / 'val' if (data_dir / 'val').exists() else data_dir / 'validation' if (
-        data_dir / 'validation').exists() else None  # data/test or data/val
+    val_set = data_dir / 'val' if (data_dir / 'val').exists() else data_dir / 'validation' if \
+        (data_dir / 'validation').exists() else None  # data/test or data/val
     test_set = data_dir / 'test' if (data_dir / 'test').exists() else None  # data/val or data/test
     if split == 'val' and not val_set:
         LOGGER.warning("WARNING ⚠️ Dataset 'split=val' not found, using 'split=test' instead.")
@@ -414,16 +440,6 @@ class HUBDatasetStats:
         self.stats = {'nc': len(data['names']), 'names': list(data['names'].values())}  # statistics dictionary
         self.data = data
 
-    @staticmethod
-    def _find_yaml(dir):
-        """Return data.yaml file."""
-        files = list(dir.glob('*.yaml')) or list(dir.rglob('*.yaml'))  # try root level first and then recursive
-        assert files, f"No *.yaml file found in '{dir.resolve()}'"
-        if len(files) > 1:
-            files = [f for f in files if f.stem == dir.stem]  # prefer *.yaml files that match dir name
-        assert len(files) == 1, f"Expected 1 *.yaml file in '{dir.resolve()}', but found {len(files)}.\n{files}"
-        return files[0]
-
     def _unzip(self, path):
         """Unzip data.zip."""
         if not str(path).endswith('.zip'):  # path is data.yaml
@@ -431,7 +447,7 @@ class HUBDatasetStats:
         unzip_dir = unzip_file(path, path=path.parent)
         assert unzip_dir.is_dir(), f'Error unzipping {path}, {unzip_dir} not found. ' \
                                    f'path/to/abc.zip MUST unzip to path/to/abc/'
-        return True, str(unzip_dir), self._find_yaml(unzip_dir)  # zipped, data_dir, yaml_path
+        return True, str(unzip_dir), find_dataset_yaml(unzip_dir)  # zipped, data_dir, yaml_path
 
     def _hub_ops(self, f):
         """Saves a compressed image for HUB previews."""
diff --git a/ultralytics/engine/exporter.py b/ultralytics/engine/exporter.py
index abe648e6..c81099e8 100644
--- a/ultralytics/engine/exporter.py
+++ b/ultralytics/engine/exporter.py
@@ -438,18 +438,16 @@ class Exporter:
             Path(asset).unlink()  # delete zip
             pnnx.chmod(0o777)  # set read, write, and execute permissions for everyone
 
-        use_ncnn = True
         ncnn_args = [
             f'ncnnparam={f / "model.ncnn.param"}',
             f'ncnnbin={f / "model.ncnn.bin"}',
-            f'ncnnpy={f / "model_ncnn.py"}', ] if use_ncnn else []
+            f'ncnnpy={f / "model_ncnn.py"}', ]
 
-        use_pnnx = False
         pnnx_args = [
             f'pnnxparam={f / "model.pnnx.param"}',
             f'pnnxbin={f / "model.pnnx.bin"}',
             f'pnnxpy={f / "model_pnnx.py"}',
-            f'pnnxonnx={f / "model.pnnx.onnx"}', ] if use_pnnx else []
+            f'pnnxonnx={f / "model.pnnx.onnx"}', ]
 
         cmd = [
             str(pnnx),
@@ -462,7 +460,10 @@ class Exporter:
         f.mkdir(exist_ok=True)  # make ncnn_model directory
         LOGGER.info(f"{prefix} running '{' '.join(cmd)}'")
         subprocess.run(cmd, check=True)
-        for f_debug in 'debug.bin', 'debug.param', 'debug2.bin', 'debug2.param':  # remove debug files
+
+        # Remove debug files
+        pnnx_files = [x.split('=')[-1] for x in pnnx_args]
+        for f_debug in ('debug.bin', 'debug.param', 'debug2.bin', 'debug2.param', *pnnx_files):
             Path(f_debug).unlink(missing_ok=True)
 
         yaml_save(f / 'metadata.yaml', self.metadata)  # add metadata.yaml
diff --git a/ultralytics/engine/model.py b/ultralytics/engine/model.py
index fe638e69..ac573236 100644
--- a/ultralytics/engine/model.py
+++ b/ultralytics/engine/model.py
@@ -341,7 +341,8 @@ class Model:
         self.trainer.train()
         # Update model and cfg after training
         if RANK in (-1, 0):
-            self.model, _ = attempt_load_one_weight(str(self.trainer.best))
+            ckpt = self.trainer.best if self.trainer.best.exists() else self.trainer.last
+            self.model, _ = attempt_load_one_weight(ckpt)
             self.overrides = self.model.args
             self.metrics = getattr(self.trainer.validator, 'metrics', None)  # TODO: no metrics returned by DDP
         return self.metrics
@@ -360,9 +361,9 @@ class Model:
         else:
             from .tuner import Tuner
 
-            custom = {}  # method defaults
+            custom = {'plots': False, 'save': False}  # method defaults
             args = {**self.overrides, **custom, **kwargs, 'mode': 'train'}  # highest priority args on the right
-            return Tuner(args=args, _callbacks=self.callbacks)(model=self.model, iterations=iterations)
+            return Tuner(args=args, _callbacks=self.callbacks)(model=self, iterations=iterations)
 
     def to(self, device):
         """
diff --git a/ultralytics/engine/trainer.py b/ultralytics/engine/trainer.py
index 6fb5f8bd..0f300ed2 100644
--- a/ultralytics/engine/trainer.py
+++ b/ultralytics/engine/trainer.py
@@ -115,7 +115,7 @@ class BaseTrainer:
         try:
             if self.args.task == 'classify':
                 self.data = check_cls_dataset(self.args.data)
-            elif self.args.data.split('.')[-1] in ('yaml', 'yml') or self.args.task in ('detect', 'segment'):
+            elif self.args.data.split('.')[-1] in ('yaml', 'yml') or self.args.task in ('detect', 'segment', 'pose'):
                 self.data = check_det_dataset(self.args.data)
                 if 'yaml_file' in self.data:
                     self.args.data = self.data['yaml_file']  # for validating 'yolo train data=url.zip' usage
@@ -251,9 +251,8 @@ class BaseTrainer:
         self.args.imgsz = check_imgsz(self.args.imgsz, stride=gs, floor=gs, max_dim=1)
 
         # Batch size
-        if self.batch_size == -1:
-            if RANK == -1:  # single-GPU only, estimate best batch size
-                self.args.batch = self.batch_size = check_train_batch_size(self.model, self.args.imgsz, self.amp)
+        if self.batch_size == -1 and RANK == -1:  # single-GPU only, estimate best batch size
+            self.args.batch = self.batch_size = check_train_batch_size(self.model, self.args.imgsz, self.amp)
 
         # Dataloaders
         batch_size = self.batch_size // max(world_size, 1)
@@ -262,7 +261,7 @@ class BaseTrainer:
             self.test_loader = self.get_dataloader(self.testset, batch_size=batch_size * 2, rank=-1, mode='val')
             self.validator = self.get_validator()
             metric_keys = self.validator.metrics.keys + self.label_loss_items(prefix='val')
-            self.metrics = dict(zip(metric_keys, [0] * len(metric_keys)))  # TODO: init metrics for plot_results()?
+            self.metrics = dict(zip(metric_keys, [0] * len(metric_keys)))
             self.ema = ModelEMA(self.model)
             if self.args.plots:
                 self.plot_training_labels()
diff --git a/ultralytics/engine/tuner.py b/ultralytics/engine/tuner.py
index dc44fc9b..443d01d1 100644
--- a/ultralytics/engine/tuner.py
+++ b/ultralytics/engine/tuner.py
@@ -18,6 +18,7 @@ Example:
 """
 import random
 import time
+from copy import deepcopy
 
 import numpy as np
 
@@ -51,7 +52,7 @@ class Tuner:
          from ultralytics import YOLO
 
          model = YOLO('yolov8n.pt')
-         model.tune(data='coco8.yaml', imgsz=640, epochs=100, iterations=10)
+         model.tune(data='coco8.yaml', imgsz=640, epochs=100, iterations=10, val=False, cache=True)
          ```
      """
 
@@ -63,11 +64,11 @@ class Tuner:
             args (dict, optional): Configuration for hyperparameter evolution.
         """
         self.args = get_cfg(overrides=args)
-        self.space = {
+        self.space = {  # key: (min, max, gain(optionaL))
             # 'optimizer': tune.choice(['SGD', 'Adam', 'AdamW', 'NAdam', 'RAdam', 'RMSProp']),
             'lr0': (1e-5, 1e-1),
             'lrf': (0.01, 1.0),  # final OneCycleLR learning rate (lr0 * lrf)
-            'momentum': (0.6, 0.98),  # SGD momentum/Adam beta1
+            'momentum': (0.6, 0.98, 0.3),  # SGD momentum/Adam beta1
             'weight_decay': (0.0, 0.001),  # optimizer weight decay 5e-4
             'warmup_epochs': (0.0, 5.0),  # warmup epochs (fractions ok)
             'warmup_momentum': (0.0, 0.95),  # warmup initial momentum
@@ -86,13 +87,13 @@ class Tuner:
             'mosaic': (0.0, 1.0),  # image mixup (probability)
             'mixup': (0.0, 1.0),  # image mixup (probability)
             'copy_paste': (0.0, 1.0)}  # segment copy-paste (probability)
-        self.tune_dir = get_save_dir(self.args, name='tune')
+        self.tune_dir = get_save_dir(self.args, name='_tune')
         self.evolve_csv = self.tune_dir / 'evolve.csv'
         self.callbacks = _callbacks or callbacks.get_default_callbacks()
         callbacks.add_integration_callbacks(self)
         LOGGER.info(f"Initialized Tuner instance with 'tune_dir={self.tune_dir}'.")
 
-    def _mutate(self, parent='single', n=5, mutation=0.8, sigma=0.2, return_best=False):
+    def _mutate(self, parent='single', n=5, mutation=0.8, sigma=0.2):
         """
         Mutates the hyperparameters based on bounds and scaling factors specified in `self.space`.
 
@@ -111,10 +112,7 @@ class Tuner:
             fitness = x[:, 0]  # first column
             n = min(n, len(x))  # number of previous results to consider
             x = x[np.argsort(-fitness)][:n]  # top n mutations
-            if return_best:
-                return {k: float(x[0, i + 1]) for i, k in enumerate(self.space.keys())}
-            fitness = x[:, 0]  # first column
-            w = fitness - fitness.min() + 1E-6  # weights (sum > 0)
+            w = x[:, 0] - x[:, 0].min() + 1E-6  # weights (sum > 0)
             if parent == 'single' or len(x) == 1:
                 # x = x[random.randint(0, n - 1)]  # random selection
                 x = x[random.choices(range(n), weights=w)[0]]  # weighted selection
@@ -124,7 +122,7 @@ class Tuner:
             # Mutate
             r = np.random  # method
             r.seed(int(time.time()))
-            g = np.array([self.space[k][0] for k in self.space.keys()])  # gains 0-1
+            g = np.array([v[2] if len(v) == 3 else 1.0 for k, v in self.space.items()])  # gains 0-1
             ng = len(self.space)
             v = np.ones(ng)
             while all(v == 1):  # mutate until a change occurs (prevent duplicates)
@@ -152,7 +150,7 @@ class Tuner:
         4. Log the fitness score and mutated hyperparameters to a CSV file.
 
         Args:
-           model (YOLO): A pre-initialized YOLO model to be used for training.
+           model (Model): A pre-initialized YOLO model to be used for training.
            iterations (int): The number of generations to run the evolution for.
 
         Note:
@@ -160,6 +158,7 @@ class Tuner:
            Ensure this path is set correctly in the Tuner instance.
         """
 
+        t0 = time.time()
         self.tune_dir.mkdir(parents=True, exist_ok=True)
         for i in range(iterations):
             # Mutate hyperparameters
@@ -167,17 +166,27 @@ class Tuner:
             LOGGER.info(f'{prefix} Starting iteration {i + 1}/{iterations} with hyperparameters: {mutated_hyp}')
 
             # Initialize and train YOLOv8 model
-            model = YOLO('yolov8n.pt')
-            train_args = {**vars(self.args), **mutated_hyp}
-            results = model.train(**train_args)
+            try:
+                train_args = {**vars(self.args), **mutated_hyp}
+                fitness = (deepcopy(model) or YOLO(self.args.model)).train(**train_args).fitness  # results.fitness
+            except Exception as e:
+                LOGGER.warning(f'WARNING ❌️ training failure for hyperparameter tuning iteration {i}\n{e}')
+                fitness = 0.0
 
             # Save results and mutated_hyp to evolve_csv
+            log_row = [round(fitness, 5)] + [mutated_hyp[k] for k in self.space.keys()]
             headers = '' if self.evolve_csv.exists() else (','.join(['fitness_score'] + list(self.space.keys())) + '\n')
-            log_row = [results.fitness] + [mutated_hyp[k] for k in self.space.keys()]
             with open(self.evolve_csv, 'a') as f:
                 f.write(headers + ','.join(map(str, log_row)) + '\n')
 
-        LOGGER.info(f'{prefix} All iterations complete. Results saved to {colorstr("bold", self.tune_dir)}')
-        best_hyp = self._mutate(return_best=True)  # best hyps
-        yaml_save(self.tune_dir / 'best.yaml', best_hyp)
+        # Print tuning results
+        x = np.loadtxt(self.evolve_csv, ndmin=2, delimiter=',', skiprows=1)
+        fitness = x[:, 0]  # first column
+        i = np.argsort(-fitness)[0]  # best fitness index
+        LOGGER.info(f'\n{prefix} All iterations complete ✅ ({time.time() - t0:.2f}s)\n'
+                    f'{prefix} Results saved to {colorstr("bold", self.tune_dir)}\n'
+                    f'{prefix} Best fitness={fitness[i]} observed at iteration {i}')
+
+        # Save turning results
+        yaml_save(self.tune_dir / 'best.yaml', data={k: float(x[0, i + 1]) for i, k in enumerate(self.space.keys())})
         yaml_print(self.tune_dir / 'best.yaml')
diff --git a/ultralytics/engine/validator.py b/ultralytics/engine/validator.py
index e445368c..97a3a9ac 100644
--- a/ultralytics/engine/validator.py
+++ b/ultralytics/engine/validator.py
@@ -111,12 +111,12 @@ class BaseValidator:
         if self.training:
             self.device = trainer.device
             self.data = trainer.data
-            model = trainer.ema.ema or trainer.model
             self.args.half = self.device.type != 'cpu'  # force FP16 val during training
+            model = trainer.ema.ema or trainer.model
             model = model.half() if self.args.half else model.float()
-            self.model = model
+            # self.model = model
             self.loss = torch.zeros_like(trainer.loss_items, device=trainer.device)
-            self.args.plots = trainer.stopper.possible_stop or (trainer.epoch == trainer.epochs - 1)
+            self.args.plots &= trainer.stopper.possible_stop or (trainer.epoch == trainer.epochs - 1)
             model.eval()
         else:
             callbacks.add_integration_callbacks(self)
@@ -126,7 +126,7 @@ class BaseValidator:
                                 dnn=self.args.dnn,
                                 data=self.args.data,
                                 fp16=self.args.half)
-            self.model = model
+            # self.model = model
             self.device = model.device  # update device
             self.args.half = model.fp16  # update half
             stride, pt, jit, engine = model.stride, model.pt, model.jit, model.engine
@@ -297,8 +297,7 @@ class BaseValidator:
 
     def on_plot(self, name, data=None):
         """Registers plots (e.g. to be consumed in callbacks)"""
-        path = Path(name)
-        self.plots[path] = {'data': data, 'timestamp': time.time()}
+        self.plots[Path(name)] = {'data': data, 'timestamp': time.time()}
 
     # TODO: may need to put these following functions into callback
     def plot_val_samples(self, batch, ni):
diff --git a/ultralytics/models/utils/loss.py b/ultralytics/models/utils/loss.py
index 9f95e5f0..95406e1f 100644
--- a/ultralytics/models/utils/loss.py
+++ b/ultralytics/models/utils/loss.py
@@ -84,35 +84,36 @@ class DETRLoss(nn.Module):
         loss[name_giou] = self.loss_gain['giou'] * loss[name_giou]
         return {k: v.squeeze() for k, v in loss.items()}
 
-    def _get_loss_mask(self, masks, gt_mask, match_indices, postfix=''):
-        # masks: [b, query, h, w], gt_mask: list[[n, H, W]]
-        name_mask = f'loss_mask{postfix}'
-        name_dice = f'loss_dice{postfix}'
+    # This function is for future RT-DETR Segment models
+    # def _get_loss_mask(self, masks, gt_mask, match_indices, postfix=''):
+    #     # masks: [b, query, h, w], gt_mask: list[[n, H, W]]
+    #     name_mask = f'loss_mask{postfix}'
+    #     name_dice = f'loss_dice{postfix}'
+    #
+    #     loss = {}
+    #     if sum(len(a) for a in gt_mask) == 0:
+    #         loss[name_mask] = torch.tensor(0., device=self.device)
+    #         loss[name_dice] = torch.tensor(0., device=self.device)
+    #         return loss
+    #
+    #     num_gts = len(gt_mask)
+    #     src_masks, target_masks = self._get_assigned_bboxes(masks, gt_mask, match_indices)
+    #     src_masks = F.interpolate(src_masks.unsqueeze(0), size=target_masks.shape[-2:], mode='bilinear')[0]
+    #     # TODO: torch does not have `sigmoid_focal_loss`, but it's not urgent since we don't use mask branch for now.
+    #     loss[name_mask] = self.loss_gain['mask'] * F.sigmoid_focal_loss(src_masks, target_masks,
+    #                                                                     torch.tensor([num_gts], dtype=torch.float32))
+    #     loss[name_dice] = self.loss_gain['dice'] * self._dice_loss(src_masks, target_masks, num_gts)
+    #     return loss
 
-        loss = {}
-        if sum(len(a) for a in gt_mask) == 0:
-            loss[name_mask] = torch.tensor(0., device=self.device)
-            loss[name_dice] = torch.tensor(0., device=self.device)
-            return loss
-
-        num_gts = len(gt_mask)
-        src_masks, target_masks = self._get_assigned_bboxes(masks, gt_mask, match_indices)
-        src_masks = F.interpolate(src_masks.unsqueeze(0), size=target_masks.shape[-2:], mode='bilinear')[0]
-        # TODO: torch does not have `sigmoid_focal_loss`, but it's not urgent since we don't use mask branch for now.
-        loss[name_mask] = self.loss_gain['mask'] * F.sigmoid_focal_loss(src_masks, target_masks,
-                                                                        torch.tensor([num_gts], dtype=torch.float32))
-        loss[name_dice] = self.loss_gain['dice'] * self._dice_loss(src_masks, target_masks, num_gts)
-        return loss
-
-    @staticmethod
-    def _dice_loss(inputs, targets, num_gts):
-        inputs = F.sigmoid(inputs)
-        inputs = inputs.flatten(1)
-        targets = targets.flatten(1)
-        numerator = 2 * (inputs * targets).sum(1)
-        denominator = inputs.sum(-1) + targets.sum(-1)
-        loss = 1 - (numerator + 1) / (denominator + 1)
-        return loss.sum() / num_gts
+    # This function is for future RT-DETR Segment models
+    # @staticmethod
+    # def _dice_loss(inputs, targets, num_gts):
+    #     inputs = F.sigmoid(inputs).flatten(1)
+    #     targets = targets.flatten(1)
+    #     numerator = 2 * (inputs * targets).sum(1)
+    #     denominator = inputs.sum(-1) + targets.sum(-1)
+    #     loss = 1 - (numerator + 1) / (denominator + 1)
+    #     return loss.sum() / num_gts
 
     def _get_loss_aux(self,
                       pred_bboxes,
diff --git a/ultralytics/models/utils/ops.py b/ultralytics/models/utils/ops.py
index e7f829b6..0b5d8d86 100644
--- a/ultralytics/models/utils/ops.py
+++ b/ultralytics/models/utils/ops.py
@@ -110,34 +110,35 @@ class HungarianMatcher(nn.Module):
         return [(torch.tensor(i, dtype=torch.long), torch.tensor(j, dtype=torch.long) + gt_groups[k])
                 for k, (i, j) in enumerate(indices)]
 
-    def _cost_mask(self, bs, num_gts, masks=None, gt_mask=None):
-        assert masks is not None and gt_mask is not None, 'Make sure the input has `mask` and `gt_mask`'
-        # all masks share the same set of points for efficient matching
-        sample_points = torch.rand([bs, 1, self.num_sample_points, 2])
-        sample_points = 2.0 * sample_points - 1.0
-
-        out_mask = F.grid_sample(masks.detach(), sample_points, align_corners=False).squeeze(-2)
-        out_mask = out_mask.flatten(0, 1)
-
-        tgt_mask = torch.cat(gt_mask).unsqueeze(1)
-        sample_points = torch.cat([a.repeat(b, 1, 1, 1) for a, b in zip(sample_points, num_gts) if b > 0])
-        tgt_mask = F.grid_sample(tgt_mask, sample_points, align_corners=False).squeeze([1, 2])
-
-        with torch.cuda.amp.autocast(False):
-            # binary cross entropy cost
-            pos_cost_mask = F.binary_cross_entropy_with_logits(out_mask, torch.ones_like(out_mask), reduction='none')
-            neg_cost_mask = F.binary_cross_entropy_with_logits(out_mask, torch.zeros_like(out_mask), reduction='none')
-            cost_mask = torch.matmul(pos_cost_mask, tgt_mask.T) + torch.matmul(neg_cost_mask, 1 - tgt_mask.T)
-            cost_mask /= self.num_sample_points
-
-            # dice cost
-            out_mask = F.sigmoid(out_mask)
-            numerator = 2 * torch.matmul(out_mask, tgt_mask.T)
-            denominator = out_mask.sum(-1, keepdim=True) + tgt_mask.sum(-1).unsqueeze(0)
-            cost_dice = 1 - (numerator + 1) / (denominator + 1)
-
-            C = self.cost_gain['mask'] * cost_mask + self.cost_gain['dice'] * cost_dice
-        return C
+    # This function is for future RT-DETR Segment models
+    # def _cost_mask(self, bs, num_gts, masks=None, gt_mask=None):
+    #     assert masks is not None and gt_mask is not None, 'Make sure the input has `mask` and `gt_mask`'
+    #     # all masks share the same set of points for efficient matching
+    #     sample_points = torch.rand([bs, 1, self.num_sample_points, 2])
+    #     sample_points = 2.0 * sample_points - 1.0
+    #
+    #     out_mask = F.grid_sample(masks.detach(), sample_points, align_corners=False).squeeze(-2)
+    #     out_mask = out_mask.flatten(0, 1)
+    #
+    #     tgt_mask = torch.cat(gt_mask).unsqueeze(1)
+    #     sample_points = torch.cat([a.repeat(b, 1, 1, 1) for a, b in zip(sample_points, num_gts) if b > 0])
+    #     tgt_mask = F.grid_sample(tgt_mask, sample_points, align_corners=False).squeeze([1, 2])
+    #
+    #     with torch.cuda.amp.autocast(False):
+    #         # binary cross entropy cost
+    #         pos_cost_mask = F.binary_cross_entropy_with_logits(out_mask, torch.ones_like(out_mask), reduction='none')
+    #         neg_cost_mask = F.binary_cross_entropy_with_logits(out_mask, torch.zeros_like(out_mask), reduction='none')
+    #         cost_mask = torch.matmul(pos_cost_mask, tgt_mask.T) + torch.matmul(neg_cost_mask, 1 - tgt_mask.T)
+    #         cost_mask /= self.num_sample_points
+    #
+    #         # dice cost
+    #         out_mask = F.sigmoid(out_mask)
+    #         numerator = 2 * torch.matmul(out_mask, tgt_mask.T)
+    #         denominator = out_mask.sum(-1, keepdim=True) + tgt_mask.sum(-1).unsqueeze(0)
+    #         cost_dice = 1 - (numerator + 1) / (denominator + 1)
+    #
+    #         C = self.cost_gain['mask'] * cost_mask + self.cost_gain['dice'] * cost_dice
+    #     return C
 
 
 def get_cdn_group(batch,
diff --git a/ultralytics/models/yolo/classify/val.py b/ultralytics/models/yolo/classify/val.py
index 80606292..272c98e8 100644
--- a/ultralytics/models/yolo/classify/val.py
+++ b/ultralytics/models/yolo/classify/val.py
@@ -55,7 +55,7 @@ class ClassificationValidator(BaseValidator):
 
     def update_metrics(self, preds, batch):
         """Updates running metrics with model predictions and batch targets."""
-        n5 = min(len(self.model.names), 5)
+        n5 = min(len(self.names), 5)
         self.pred.append(preds.argsort(1, descending=True)[:, :n5])
         self.targets.append(batch['cls'])
 
diff --git a/ultralytics/utils/__init__.py b/ultralytics/utils/__init__.py
index 1c23af4e..0fc67a2e 100644
--- a/ultralytics/utils/__init__.py
+++ b/ultralytics/utils/__init__.py
@@ -325,6 +325,7 @@ def yaml_load(file='data.yaml', append_filename=False):
     Returns:
         (dict): YAML data and file name.
     """
+    assert Path(file).suffix in ('.yaml', '.yml'), f'Attempting to load non-YAML file {file} with yaml_load()'
     with open(file, errors='ignore', encoding='utf-8') as f:
         s = f.read()  # string
 
diff --git a/ultralytics/utils/callbacks/clearml.py b/ultralytics/utils/callbacks/clearml.py
index 5460aca6..ee251dd4 100644
--- a/ultralytics/utils/callbacks/clearml.py
+++ b/ultralytics/utils/callbacks/clearml.py
@@ -9,14 +9,14 @@ from ultralytics.utils import LOGGER, SETTINGS, TESTS_RUNNING
 from ultralytics.utils.torch_utils import model_info_for_loggers
 
 try:
+    assert not TESTS_RUNNING  # do not log pytest
+    assert SETTINGS['clearml'] is True  # verify integration is enabled
     import clearml
     from clearml import Task
     from clearml.binding.frameworks.pytorch_bind import PatchPyTorchModelIO
     from clearml.binding.matplotlib_bind import PatchedMatplotlib
 
     assert hasattr(clearml, '__version__')  # verify package is not directory
-    assert not TESTS_RUNNING  # do not log pytest
-    assert SETTINGS['clearml'] is True  # verify integration is enabled
 except (ImportError, AssertionError):
     clearml = None
 
diff --git a/ultralytics/utils/callbacks/comet.py b/ultralytics/utils/callbacks/comet.py
index a6f6c403..c065b836 100644
--- a/ultralytics/utils/callbacks/comet.py
+++ b/ultralytics/utils/callbacks/comet.py
@@ -7,11 +7,11 @@ from ultralytics.utils import LOGGER, RANK, SETTINGS, TESTS_RUNNING, ops
 from ultralytics.utils.torch_utils import model_info_for_loggers
 
 try:
+    assert not TESTS_RUNNING  # do not log pytest
+    assert SETTINGS['comet'] is True  # verify integration is enabled
     import comet_ml
 
-    assert not TESTS_RUNNING  # do not log pytest
     assert hasattr(comet_ml, '__version__')  # verify package is not directory
-    assert SETTINGS['comet'] is True  # verify integration is enabled
 except (ImportError, AssertionError):
     comet_ml = None
 
diff --git a/ultralytics/utils/callbacks/dvc.py b/ultralytics/utils/callbacks/dvc.py
index bf7498bf..90f6f444 100644
--- a/ultralytics/utils/callbacks/dvc.py
+++ b/ultralytics/utils/callbacks/dvc.py
@@ -10,13 +10,12 @@ from ultralytics.utils import LOGGER, SETTINGS, TESTS_RUNNING
 from ultralytics.utils.torch_utils import model_info_for_loggers
 
 try:
+    assert not TESTS_RUNNING  # do not log pytest
+    assert SETTINGS['dvc'] is True  # verify integration is enabled
     from importlib.metadata import version
 
     import dvclive
 
-    assert not TESTS_RUNNING  # do not log pytest
-    assert SETTINGS['dvc'] is True  # verify integration is enabled
-
     ver = version('dvclive')
     if pkg.parse_version(ver) < pkg.parse_version('2.11.0'):
         LOGGER.debug(f'DVCLive is detected but version {ver} is incompatible (>=2.11 required).')
diff --git a/ultralytics/utils/callbacks/mlflow.py b/ultralytics/utils/callbacks/mlflow.py
index b3256d6f..1d9a0462 100644
--- a/ultralytics/utils/callbacks/mlflow.py
+++ b/ultralytics/utils/callbacks/mlflow.py
@@ -7,11 +7,11 @@ from pathlib import Path
 from ultralytics.utils import LOGGER, SETTINGS, TESTS_RUNNING, colorstr
 
 try:
+    assert not TESTS_RUNNING  # do not log pytest
+    assert SETTINGS['mlflow'] is True  # verify integration is enabled
     import mlflow
 
-    assert not TESTS_RUNNING  # do not log pytest
     assert hasattr(mlflow, '__version__')  # verify package is not directory
-    assert SETTINGS['mlflow'] is True  # verify integration is enabled
 except (ImportError, AssertionError):
     mlflow = None
 
diff --git a/ultralytics/utils/callbacks/neptune.py b/ultralytics/utils/callbacks/neptune.py
index f72a63b2..3488c3e9 100644
--- a/ultralytics/utils/callbacks/neptune.py
+++ b/ultralytics/utils/callbacks/neptune.py
@@ -7,12 +7,12 @@ from ultralytics.utils import LOGGER, SETTINGS, TESTS_RUNNING
 from ultralytics.utils.torch_utils import model_info_for_loggers
 
 try:
+    assert not TESTS_RUNNING  # do not log pytest
+    assert SETTINGS['neptune'] is True  # verify integration is enabled
     import neptune
     from neptune.types import File
 
-    assert not TESTS_RUNNING  # do not log pytest
     assert hasattr(neptune, '__version__')
-    assert SETTINGS['neptune'] is True  # verify integration is enabled
 except (ImportError, AssertionError):
     neptune = None
 
diff --git a/ultralytics/utils/callbacks/raytune.py b/ultralytics/utils/callbacks/raytune.py
index 0ca1d2b3..f73c48cc 100644
--- a/ultralytics/utils/callbacks/raytune.py
+++ b/ultralytics/utils/callbacks/raytune.py
@@ -3,11 +3,10 @@
 from ultralytics.utils import SETTINGS
 
 try:
+    assert SETTINGS['raytune'] is True  # verify integration is enabled
     import ray
     from ray import tune
     from ray.air import session
-
-    assert SETTINGS['raytune'] is True  # verify integration is enabled
 except (ImportError, AssertionError):
     tune = None
 
diff --git a/ultralytics/utils/callbacks/tensorboard.py b/ultralytics/utils/callbacks/tensorboard.py
index 4cd5708b..fd13b0ef 100644
--- a/ultralytics/utils/callbacks/tensorboard.py
+++ b/ultralytics/utils/callbacks/tensorboard.py
@@ -3,11 +3,9 @@
 from ultralytics.utils import LOGGER, SETTINGS, TESTS_RUNNING, colorstr
 
 try:
-    from torch.utils.tensorboard import SummaryWriter
-
     assert not TESTS_RUNNING  # do not log pytest
     assert SETTINGS['tensorboard'] is True  # verify integration is enabled
-
+    from torch.utils.tensorboard import SummaryWriter
 # TypeError for handling 'Descriptors cannot not be created directly.' protobuf errors in Windows
 except (ImportError, AssertionError, TypeError):
     SummaryWriter = None
diff --git a/ultralytics/utils/callbacks/wb.py b/ultralytics/utils/callbacks/wb.py
index 4a8ee242..0a1b0c4e 100644
--- a/ultralytics/utils/callbacks/wb.py
+++ b/ultralytics/utils/callbacks/wb.py
@@ -4,11 +4,11 @@ from ultralytics.utils import SETTINGS, TESTS_RUNNING
 from ultralytics.utils.torch_utils import model_info_for_loggers
 
 try:
+    assert not TESTS_RUNNING  # do not log pytest
+    assert SETTINGS['wandb'] is True  # verify integration is enabled
     import wandb as wb
 
     assert hasattr(wb, '__version__')
-    assert not TESTS_RUNNING  # do not log pytest
-    assert SETTINGS['wandb'] is True  # verify integration is enabled
 except (ImportError, AssertionError):
     wb = None
 
diff --git a/ultralytics/utils/downloads.py b/ultralytics/utils/downloads.py
index b088562e..4e494eba 100644
--- a/ultralytics/utils/downloads.py
+++ b/ultralytics/utils/downloads.py
@@ -160,7 +160,7 @@ def unzip_file(file, path=None, exclude=('.DS_Store', '__MACOSX'), exist_ok=Fals
         # Check if destination directory already exists and contains files
         if path.exists() and any(path.iterdir()) and not exist_ok:
             # If it exists and is not empty, return the path without unzipping
-            LOGGER.info(f'Skipping {file} unzip (already unzipped)')
+            LOGGER.warning(f'WARNING ⚠️ Skipping {file} unzip as destination directory {path} is not empty.')
             return path
 
         for f in tqdm(files,
@@ -185,22 +185,25 @@ def check_disk_space(url='https://ultralytics.com/assets/coco128.zip', sf=1.5, h
     Returns:
         (bool): True if there is sufficient disk space, False otherwise.
     """
-    with contextlib.suppress(Exception):
-        gib = 1 << 30  # bytes per GiB
-        data = int(requests.head(url).headers['Content-Length']) / gib  # file size (GB)
-        total, used, free = (x / gib for x in shutil.disk_usage('/'))  # bytes
-        if data * sf < free:
-            return True  # sufficient space
+    r = requests.head(url)  # response
 
-        # Insufficient space
-        text = (f'WARNING ⚠️ Insufficient free disk space {free:.1f} GB < {data * sf:.3f} GB required, '
-                f'Please free {data * sf - free:.1f} GB additional disk space and try again.')
-        if hard:
-            raise MemoryError(text)
-        LOGGER.warning(text)
-        return False
+    # Check response
+    assert r.status_code < 400, f'URL error for {url}: {r.status_code} {r.reason}'
 
-    return True
+    # Check file size
+    gib = 1 << 30  # bytes per GiB
+    data = int(r.headers.get('Content-Length', 0)) / gib  # file size (GB)
+    total, used, free = (x / gib for x in shutil.disk_usage('/'))  # bytes
+    if data * sf < free:
+        return True  # sufficient space
+
+    # Insufficient space
+    text = (f'WARNING ⚠️ Insufficient free disk space {free:.1f} GB < {data * sf:.3f} GB required, '
+            f'Please free {data * sf - free:.1f} GB additional disk space and try again.')
+    if hard:
+        raise MemoryError(text)
+    LOGGER.warning(text)
+    return False
 
 
 def get_google_drive_file_info(link):