diff --git a/MANIFEST.in b/MANIFEST.in
index 56f106b8..d25a2f50 100644
--- a/MANIFEST.in
+++ b/MANIFEST.in
@@ -2,7 +2,6 @@ include *.md
 include requirements.txt
 include LICENSE
 include setup.py
-recursive-exclude __pycache__ *
 include ultralytics/assets/bus.jpg
 include ultralytics/assets/zidane.jpg
 recursive-include ultralytics *.yaml
diff --git a/docs/guides/kfold-cross-validation.md b/docs/guides/kfold-cross-validation.md
index 423836c8..edcce0f8 100644
--- a/docs/guides/kfold-cross-validation.md
+++ b/docs/guides/kfold-cross-validation.md
@@ -83,6 +83,7 @@ Without further ado, let's dive in!
 3. Now, read the contents of the dataset YAML file and extract the indices of the class labels.
 
     ```python
+    yaml_file = 'path/to/data.yaml'  # your data YAML with data directories and names dictionary
     with open(yaml_file, 'r', encoding="utf8") as y:
         classes = yaml.safe_load(y)['names']
     cls_idx = sorted(classes.keys())
@@ -177,10 +178,18 @@ The ideal scenario is for all class ratios to be reasonably similar for each spl
 4. Next, we create the directories and dataset YAML files for each split.
 
     ```python
+    supported_extensions = ['.jpg', '.jpeg', '.png']
+    
+    # Initialize an empty list to store image file paths
+    images = []
+    
+    # Loop through supported extensions and gather image files
+    for ext in supported_extensions:
+        images.extend(sorted((dataset_path / 'images').rglob(f"*{ext}")))
+    
+    # Create the necessary directories and dataset YAML files (unchanged)
     save_path = Path(dataset_path / f'{datetime.date.today().isoformat()}_{ksplit}-Fold_Cross-val')
     save_path.mkdir(parents=True, exist_ok=True)
-    
-    images = sorted((dataset_path / 'images').rglob("*.jpg"))  # change file extension as needed
     ds_yamls = []
     
     for split in folds_df.columns:
@@ -216,8 +225,7 @@ The ideal scenario is for all class ratios to be reasonably similar for each spl
             img_to_path = save_path / split / k_split / 'images'
             lbl_to_path = save_path / split / k_split / 'labels'
         
-            # Copy image and label files to new directory 
-            # Might throw a SamefileError if file already exists
+            # Copy image and label files to new directory (SamefileError if file already exists)
             shutil.copy(image, img_to_path / image.name)
             shutil.copy(label, lbl_to_path / label.name)
     ```
@@ -244,9 +252,15 @@ fold_lbl_distrb.to_csv(save_path / "kfold_label_distribution.csv")
 
     ```python
     results = {}
+    
+    # Define your additional arguments here
+    batch = 16
+    project = 'kfold_demo'
+    epochs = 100
+
     for k in range(ksplit):
         dataset_yaml = ds_yamls[k]
-        model.train(data=dataset_yaml, *args, **kwargs)  # Include any training arguments
+        model.train(data=dataset_yaml,epochs=epochs, batch=batch, project=project)  # include any train arguments
         results[k] = model.metrics  # save output metrics for further analysis
     ```
 
diff --git a/requirements.txt b/requirements.txt
index ed1093f7..83c99182 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -2,7 +2,7 @@
 # Example: pip install -r requirements.txt
 
 # Base ----------------------------------------
-matplotlib>=3.2.2
+matplotlib>=3.3.0
 numpy>=1.22.2 # pinned by Snyk to avoid a vulnerability
 opencv-python>=4.6.0
 pillow>=7.1.2
diff --git a/tests/test_cli.py b/tests/test_cli.py
index 73710139..caa37cec 100644
--- a/tests/test_cli.py
+++ b/tests/test_cli.py
@@ -6,7 +6,10 @@ from pathlib import Path
 import pytest
 
 from ultralytics.utils import ASSETS, SETTINGS
+from ultralytics.utils.checks import cuda_device_count, cuda_is_available
 
+CUDA_IS_AVAILABLE = cuda_is_available()
+CUDA_DEVICE_COUNT = cuda_device_count()
 WEIGHTS_DIR = Path(SETTINGS['weights_dir'])
 TASK_ARGS = [
     ('detect', 'yolov8n', 'coco8.yaml'),
@@ -117,6 +120,8 @@ def test_mobilesam():
 # Slow Tests -----------------------------------------------------------------------------------------------------------
 @pytest.mark.slow
 @pytest.mark.parametrize('task,model,data', TASK_ARGS)
+@pytest.mark.skipif(not CUDA_IS_AVAILABLE, reason='CUDA is not available')
+@pytest.mark.skipif(CUDA_DEVICE_COUNT < 2, reason='DDP is not available')
 def test_train_gpu(task, model, data):
     run(f'yolo train {task} model={model}.yaml data={data} imgsz=32 epochs=1 device=0')  # single GPU
     run(f'yolo train {task} model={model}.pt data={data} imgsz=32 epochs=1 device=0,1')  # multi GPU
diff --git a/tests/test_cuda.py b/tests/test_cuda.py
index 36d19fa4..7f3d87df 100644
--- a/tests/test_cuda.py
+++ b/tests/test_cuda.py
@@ -7,9 +7,10 @@ import torch
 
 from ultralytics import YOLO, download
 from ultralytics.utils import ASSETS, SETTINGS
+from ultralytics.utils.checks import cuda_device_count, cuda_is_available
 
-CUDA_IS_AVAILABLE = torch.cuda.is_available()
-CUDA_DEVICE_COUNT = torch.cuda.device_count()
+CUDA_IS_AVAILABLE = cuda_is_available()
+CUDA_DEVICE_COUNT = cuda_device_count()
 
 DATASETS_DIR = Path(SETTINGS['datasets_dir'])
 WEIGHTS_DIR = Path(SETTINGS['weights_dir'])
@@ -18,10 +19,8 @@ DATA = 'coco8.yaml'
 
 
 def test_checks():
-    from ultralytics.utils.checks import cuda_device_count, cuda_is_available
-
-    assert cuda_device_count() == CUDA_DEVICE_COUNT
-    assert cuda_is_available() == CUDA_IS_AVAILABLE
+    assert torch.cuda.is_available() == CUDA_IS_AVAILABLE
+    assert torch.cuda.device_count() == CUDA_DEVICE_COUNT
 
 
 @pytest.mark.skipif(not CUDA_IS_AVAILABLE, reason='CUDA is not available')
diff --git a/tests/test_python.py b/tests/test_python.py
index ad3ed096..a4299bf8 100644
--- a/tests/test_python.py
+++ b/tests/test_python.py
@@ -14,7 +14,7 @@ from torchvision.transforms import ToTensor
 from ultralytics import RTDETR, YOLO
 from ultralytics.cfg import TASK2DATA
 from ultralytics.data.build import load_inference_source
-from ultralytics.utils import ASSETS, DEFAULT_CFG, LINUX, MACOS, ONLINE, ROOT, SETTINGS, WINDOWS
+from ultralytics.utils import ASSETS, DEFAULT_CFG, LINUX, MACOS, ONLINE, ROOT, SETTINGS, WINDOWS, is_dir_writeable
 from ultralytics.utils.downloads import download
 from ultralytics.utils.torch_utils import TORCH_1_9
 
@@ -23,6 +23,7 @@ MODEL = WEIGHTS_DIR / 'path with spaces' / 'yolov8n.pt'  # test spaces in path
 CFG = 'yolov8n.yaml'
 SOURCE = ASSETS / 'bus.jpg'
 TMP = (ROOT / '../tests/tmp').resolve()  # temp directory for test files
+IS_TMP_WRITEABLE = is_dir_writeable(TMP)
 
 
 def test_model_forward():
@@ -58,6 +59,7 @@ def test_model_profile():
     _ = model.predict(im, profile=True)
 
 
+@pytest.mark.skipif(not IS_TMP_WRITEABLE, reason='directory is not writeable')
 def test_predict_txt():
     # Write a list of sources (file, dir, glob, recursive glob) to a txt file
     txt_file = TMP / 'sources.txt'
@@ -128,6 +130,7 @@ def test_predict_grey_and_4ch():
 
 
 @pytest.mark.skipif(not ONLINE, reason='environment is offline')
+@pytest.mark.skipif(not IS_TMP_WRITEABLE, reason='directory is not writeable')
 def test_track_stream():
     # Test YouTube streaming inference (short 10 frame video) with non-default ByteTrack tracker
     # imgsz=160 required for tracking for higher confidence and better matches
diff --git a/ultralytics/__init__.py b/ultralytics/__init__.py
index fbe135b2..a84d2558 100644
--- a/ultralytics/__init__.py
+++ b/ultralytics/__init__.py
@@ -1,6 +1,6 @@
 # Ultralytics YOLO 🚀, AGPL-3.0 license
 
-__version__ = '8.0.180'
+__version__ = '8.0.181'
 
 from ultralytics.models import RTDETR, SAM, YOLO
 from ultralytics.models.fastsam import FastSAM
diff --git a/ultralytics/models/utils/ops.py b/ultralytics/models/utils/ops.py
index eb1ebfbb..abce97a6 100644
--- a/ultralytics/models/utils/ops.py
+++ b/ultralytics/models/utils/ops.py
@@ -103,6 +103,9 @@ class HungarianMatcher(nn.Module):
         if self.with_mask:
             C += self._cost_mask(bs, gt_groups, masks, gt_mask)
 
+        # Set invalid values (NaNs and infinities) to 0 (fixes ValueError: matrix contains invalid numeric entries)
+        C[C.isnan() | C.isinf()] = 0.0
+
         C = C.view(bs, nq, -1).cpu()
         indices = [linear_sum_assignment(c[i]) for i, c in enumerate(C.split(gt_groups, -1))]
         gt_groups = torch.as_tensor([0, *gt_groups[:-1]]).cumsum_(0)
diff --git a/ultralytics/utils/callbacks/mlflow.py b/ultralytics/utils/callbacks/mlflow.py
index 8d4501b3..efd580b3 100644
--- a/ultralytics/utils/callbacks/mlflow.py
+++ b/ultralytics/utils/callbacks/mlflow.py
@@ -1,6 +1,6 @@
 # Ultralytics YOLO 🚀, AGPL-3.0 license
 
-from ultralytics.utils import LOGGER, ROOT, SETTINGS, TESTS_RUNNING, colorstr
+from ultralytics.utils import LOGGER, SETTINGS, TESTS_RUNNING, colorstr
 
 try:
     assert not TESTS_RUNNING  # do not log pytest
@@ -8,7 +8,7 @@ try:
     import mlflow
 
     assert hasattr(mlflow, '__version__')  # verify package is not directory
-
+    PREFIX = colorstr('MLFlow:')
     import os
     import re
 
@@ -25,15 +25,13 @@ def on_pretrain_routine_end(trainer):
 
     if mlflow:
         mlflow_location = os.environ['MLFLOW_TRACKING_URI']  # "http://192.168.xxx.xxx:5000"
+        LOGGER.debug(f'{PREFIX} tracking uri: {mlflow_location}')
         mlflow.set_tracking_uri(mlflow_location)
-
         experiment_name = os.environ.get('MLFLOW_EXPERIMENT_NAME') or trainer.args.project or '/Shared/YOLOv8'
         run_name = os.environ.get('MLFLOW_RUN') or trainer.args.name
-        experiment = mlflow.get_experiment_by_name(experiment_name)
-        if experiment is None:
-            mlflow.create_experiment(experiment_name)
-        mlflow.set_experiment(experiment_name)
+        experiment = mlflow.set_experiment(experiment_name)  # change since mlflow does this now by default
 
+        mlflow.autolog()
         prefix = colorstr('MLFlow: ')
         try:
             run, active_run = mlflow, mlflow.active_run()
@@ -58,10 +56,9 @@ def on_train_end(trainer):
     if mlflow:
         run.log_artifact(trainer.last)
         run.log_artifact(trainer.best)
-        run.pyfunc.log_model(artifact_path=experiment_name,
-                             code_path=[str(ROOT.parent)],
-                             artifacts={'model_path': str(trainer.save_dir)},
-                             python_model=run.pyfunc.PythonModel())
+        run.log_artifact(trainer.save_dir)
+        mlflow.end_run()
+        LOGGER.debug(f'{PREFIX} ending run')
 
 
 callbacks = {