diff --git a/MANIFEST.in b/MANIFEST.in index 56f106b8..d25a2f50 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -2,7 +2,6 @@ include *.md include requirements.txt include LICENSE include setup.py -recursive-exclude __pycache__ * include ultralytics/assets/bus.jpg include ultralytics/assets/zidane.jpg recursive-include ultralytics *.yaml diff --git a/docs/guides/kfold-cross-validation.md b/docs/guides/kfold-cross-validation.md index 423836c8..edcce0f8 100644 --- a/docs/guides/kfold-cross-validation.md +++ b/docs/guides/kfold-cross-validation.md @@ -83,6 +83,7 @@ Without further ado, let's dive in! 3. Now, read the contents of the dataset YAML file and extract the indices of the class labels. ```python + yaml_file = 'path/to/data.yaml' # your data YAML with data directories and names dictionary with open(yaml_file, 'r', encoding="utf8") as y: classes = yaml.safe_load(y)['names'] cls_idx = sorted(classes.keys()) @@ -177,10 +178,18 @@ The ideal scenario is for all class ratios to be reasonably similar for each spl 4. Next, we create the directories and dataset YAML files for each split. ```python + supported_extensions = ['.jpg', '.jpeg', '.png'] + + # Initialize an empty list to store image file paths + images = [] + + # Loop through supported extensions and gather image files + for ext in supported_extensions: + images.extend(sorted((dataset_path / 'images').rglob(f"*{ext}"))) + + # Create the necessary directories and dataset YAML files (unchanged) save_path = Path(dataset_path / f'{datetime.date.today().isoformat()}_{ksplit}-Fold_Cross-val') save_path.mkdir(parents=True, exist_ok=True) - - images = sorted((dataset_path / 'images').rglob("*.jpg")) # change file extension as needed ds_yamls = [] for split in folds_df.columns: @@ -216,8 +225,7 @@ The ideal scenario is for all class ratios to be reasonably similar for each spl img_to_path = save_path / split / k_split / 'images' lbl_to_path = save_path / split / k_split / 'labels' - # Copy image and label files to new directory - # Might throw a SamefileError if file already exists + # Copy image and label files to new directory (SamefileError if file already exists) shutil.copy(image, img_to_path / image.name) shutil.copy(label, lbl_to_path / label.name) ``` @@ -244,9 +252,15 @@ fold_lbl_distrb.to_csv(save_path / "kfold_label_distribution.csv") ```python results = {} + + # Define your additional arguments here + batch = 16 + project = 'kfold_demo' + epochs = 100 + for k in range(ksplit): dataset_yaml = ds_yamls[k] - model.train(data=dataset_yaml, *args, **kwargs) # Include any training arguments + model.train(data=dataset_yaml,epochs=epochs, batch=batch, project=project) # include any train arguments results[k] = model.metrics # save output metrics for further analysis ``` diff --git a/requirements.txt b/requirements.txt index ed1093f7..83c99182 100644 --- a/requirements.txt +++ b/requirements.txt @@ -2,7 +2,7 @@ # Example: pip install -r requirements.txt # Base ---------------------------------------- -matplotlib>=3.2.2 +matplotlib>=3.3.0 numpy>=1.22.2 # pinned by Snyk to avoid a vulnerability opencv-python>=4.6.0 pillow>=7.1.2 diff --git a/tests/test_cli.py b/tests/test_cli.py index 73710139..caa37cec 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -6,7 +6,10 @@ from pathlib import Path import pytest from ultralytics.utils import ASSETS, SETTINGS +from ultralytics.utils.checks import cuda_device_count, cuda_is_available +CUDA_IS_AVAILABLE = cuda_is_available() +CUDA_DEVICE_COUNT = cuda_device_count() WEIGHTS_DIR = Path(SETTINGS['weights_dir']) TASK_ARGS = [ ('detect', 'yolov8n', 'coco8.yaml'), @@ -117,6 +120,8 @@ def test_mobilesam(): # Slow Tests ----------------------------------------------------------------------------------------------------------- @pytest.mark.slow @pytest.mark.parametrize('task,model,data', TASK_ARGS) +@pytest.mark.skipif(not CUDA_IS_AVAILABLE, reason='CUDA is not available') +@pytest.mark.skipif(CUDA_DEVICE_COUNT < 2, reason='DDP is not available') def test_train_gpu(task, model, data): run(f'yolo train {task} model={model}.yaml data={data} imgsz=32 epochs=1 device=0') # single GPU run(f'yolo train {task} model={model}.pt data={data} imgsz=32 epochs=1 device=0,1') # multi GPU diff --git a/tests/test_cuda.py b/tests/test_cuda.py index 36d19fa4..7f3d87df 100644 --- a/tests/test_cuda.py +++ b/tests/test_cuda.py @@ -7,9 +7,10 @@ import torch from ultralytics import YOLO, download from ultralytics.utils import ASSETS, SETTINGS +from ultralytics.utils.checks import cuda_device_count, cuda_is_available -CUDA_IS_AVAILABLE = torch.cuda.is_available() -CUDA_DEVICE_COUNT = torch.cuda.device_count() +CUDA_IS_AVAILABLE = cuda_is_available() +CUDA_DEVICE_COUNT = cuda_device_count() DATASETS_DIR = Path(SETTINGS['datasets_dir']) WEIGHTS_DIR = Path(SETTINGS['weights_dir']) @@ -18,10 +19,8 @@ DATA = 'coco8.yaml' def test_checks(): - from ultralytics.utils.checks import cuda_device_count, cuda_is_available - - assert cuda_device_count() == CUDA_DEVICE_COUNT - assert cuda_is_available() == CUDA_IS_AVAILABLE + assert torch.cuda.is_available() == CUDA_IS_AVAILABLE + assert torch.cuda.device_count() == CUDA_DEVICE_COUNT @pytest.mark.skipif(not CUDA_IS_AVAILABLE, reason='CUDA is not available') diff --git a/tests/test_python.py b/tests/test_python.py index ad3ed096..a4299bf8 100644 --- a/tests/test_python.py +++ b/tests/test_python.py @@ -14,7 +14,7 @@ from torchvision.transforms import ToTensor from ultralytics import RTDETR, YOLO from ultralytics.cfg import TASK2DATA from ultralytics.data.build import load_inference_source -from ultralytics.utils import ASSETS, DEFAULT_CFG, LINUX, MACOS, ONLINE, ROOT, SETTINGS, WINDOWS +from ultralytics.utils import ASSETS, DEFAULT_CFG, LINUX, MACOS, ONLINE, ROOT, SETTINGS, WINDOWS, is_dir_writeable from ultralytics.utils.downloads import download from ultralytics.utils.torch_utils import TORCH_1_9 @@ -23,6 +23,7 @@ MODEL = WEIGHTS_DIR / 'path with spaces' / 'yolov8n.pt' # test spaces in path CFG = 'yolov8n.yaml' SOURCE = ASSETS / 'bus.jpg' TMP = (ROOT / '../tests/tmp').resolve() # temp directory for test files +IS_TMP_WRITEABLE = is_dir_writeable(TMP) def test_model_forward(): @@ -58,6 +59,7 @@ def test_model_profile(): _ = model.predict(im, profile=True) +@pytest.mark.skipif(not IS_TMP_WRITEABLE, reason='directory is not writeable') def test_predict_txt(): # Write a list of sources (file, dir, glob, recursive glob) to a txt file txt_file = TMP / 'sources.txt' @@ -128,6 +130,7 @@ def test_predict_grey_and_4ch(): @pytest.mark.skipif(not ONLINE, reason='environment is offline') +@pytest.mark.skipif(not IS_TMP_WRITEABLE, reason='directory is not writeable') def test_track_stream(): # Test YouTube streaming inference (short 10 frame video) with non-default ByteTrack tracker # imgsz=160 required for tracking for higher confidence and better matches diff --git a/ultralytics/__init__.py b/ultralytics/__init__.py index fbe135b2..a84d2558 100644 --- a/ultralytics/__init__.py +++ b/ultralytics/__init__.py @@ -1,6 +1,6 @@ # Ultralytics YOLO 🚀, AGPL-3.0 license -__version__ = '8.0.180' +__version__ = '8.0.181' from ultralytics.models import RTDETR, SAM, YOLO from ultralytics.models.fastsam import FastSAM diff --git a/ultralytics/models/utils/ops.py b/ultralytics/models/utils/ops.py index eb1ebfbb..abce97a6 100644 --- a/ultralytics/models/utils/ops.py +++ b/ultralytics/models/utils/ops.py @@ -103,6 +103,9 @@ class HungarianMatcher(nn.Module): if self.with_mask: C += self._cost_mask(bs, gt_groups, masks, gt_mask) + # Set invalid values (NaNs and infinities) to 0 (fixes ValueError: matrix contains invalid numeric entries) + C[C.isnan() | C.isinf()] = 0.0 + C = C.view(bs, nq, -1).cpu() indices = [linear_sum_assignment(c[i]) for i, c in enumerate(C.split(gt_groups, -1))] gt_groups = torch.as_tensor([0, *gt_groups[:-1]]).cumsum_(0) diff --git a/ultralytics/utils/callbacks/mlflow.py b/ultralytics/utils/callbacks/mlflow.py index 8d4501b3..efd580b3 100644 --- a/ultralytics/utils/callbacks/mlflow.py +++ b/ultralytics/utils/callbacks/mlflow.py @@ -1,6 +1,6 @@ # Ultralytics YOLO 🚀, AGPL-3.0 license -from ultralytics.utils import LOGGER, ROOT, SETTINGS, TESTS_RUNNING, colorstr +from ultralytics.utils import LOGGER, SETTINGS, TESTS_RUNNING, colorstr try: assert not TESTS_RUNNING # do not log pytest @@ -8,7 +8,7 @@ try: import mlflow assert hasattr(mlflow, '__version__') # verify package is not directory - + PREFIX = colorstr('MLFlow:') import os import re @@ -25,15 +25,13 @@ def on_pretrain_routine_end(trainer): if mlflow: mlflow_location = os.environ['MLFLOW_TRACKING_URI'] # "http://192.168.xxx.xxx:5000" + LOGGER.debug(f'{PREFIX} tracking uri: {mlflow_location}') mlflow.set_tracking_uri(mlflow_location) - experiment_name = os.environ.get('MLFLOW_EXPERIMENT_NAME') or trainer.args.project or '/Shared/YOLOv8' run_name = os.environ.get('MLFLOW_RUN') or trainer.args.name - experiment = mlflow.get_experiment_by_name(experiment_name) - if experiment is None: - mlflow.create_experiment(experiment_name) - mlflow.set_experiment(experiment_name) + experiment = mlflow.set_experiment(experiment_name) # change since mlflow does this now by default + mlflow.autolog() prefix = colorstr('MLFlow: ') try: run, active_run = mlflow, mlflow.active_run() @@ -58,10 +56,9 @@ def on_train_end(trainer): if mlflow: run.log_artifact(trainer.last) run.log_artifact(trainer.best) - run.pyfunc.log_model(artifact_path=experiment_name, - code_path=[str(ROOT.parent)], - artifacts={'model_path': str(trainer.save_dir)}, - python_model=run.pyfunc.PythonModel()) + run.log_artifact(trainer.save_dir) + mlflow.end_run() + LOGGER.debug(f'{PREFIX} ending run') callbacks = {