diff --git a/docs/en/modes/train.md b/docs/en/modes/train.md index 820ff4ba..5c97f6cf 100644 --- a/docs/en/modes/train.md +++ b/docs/en/modes/train.md @@ -242,6 +242,7 @@ Augmentation techniques are essential for improving the robustness and performan | `perspective` | `float` | `0.0` | `0.0 - 0.001` | Applies a random perspective transformation to the image, enhancing the model's ability to understand objects in 3D space. | | `flipud` | `float` | `0.0` | `0.0 - 1.0` | Flips the image upside down with the specified probability, increasing the data variability without affecting the object's characteristics. | | `fliplr` | `float` | `0.5` | `0.0 - 1.0` | Flips the image left to right with the specified probability, useful for learning symmetrical objects and increasing dataset diversity. | +| `bgr` | `float` | `0.0` | `0.0 - 1.0` | Flips the image channels from RGB to BGR with the specified probability, useful for increasing robustness to incorrect channel ordering. | | `mosaic` | `float` | `1.0` | `0.0 - 1.0` | Combines four training images into one, simulating different scene compositions and object interactions. Highly effective for complex scene understanding. | | `mixup` | `float` | `0.0` | `0.0 - 1.0` | Blends two images and their labels, creating a composite image. Enhances the model's ability to generalize by introducing label noise and visual variability. | | `copy_paste` | `float` | `0.0` | `0.0 - 1.0` | Copies objects from one image and pastes them onto another, useful for increasing object instances and learning object occlusion. | diff --git a/docs/en/usage/cfg.md b/docs/en/usage/cfg.md index 062c5bac..07d6db54 100644 --- a/docs/en/usage/cfg.md +++ b/docs/en/usage/cfg.md @@ -147,7 +147,7 @@ Inference arguments: |-----------------|----------------|------------------------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| | `source` | `str` | `'ultralytics/assets'` | Specifies the data source for inference. Can be an image path, video file, directory, URL, or device ID for live feeds. Supports a wide range of formats and sources, enabling flexible application across different types of input. | | `conf` | `float` | `0.25` | Sets the minimum confidence threshold for detections. Objects detected with confidence below this threshold will be disregarded. Adjusting this value can help reduce false positives. | -| `iou` | `float` | `0.7` | Intersection Over Union (IoU) threshold for Non-Maximum Suppression (NMS). Lower values result in fewer detections by eliminating overlapping boxes, useful for reducing duplicates. | +| `iou` | `float` | `0.7` | Intersection Over Union (IoU) threshold for Non-Maximum Suppression (NMS). Lower values result in fewer detections by eliminating overlapping boxes, useful for reducing duplicates. | | `imgsz` | `int or tuple` | `640` | Defines the image size for inference. Can be a single integer `640` for square resizing or a (height, width) tuple. Proper sizing can improve detection accuracy and processing speed. | | `half` | `bool` | `False` | Enables half-precision (FP16) inference, which can speed up model inference on supported GPUs with minimal impact on accuracy. | | `device` | `str` | `None` | Specifies the device for inference (e.g., `cpu`, `cuda:0` or `0`). Allows users to select between CPU, a specific GPU, or other compute devices for model execution. | @@ -241,6 +241,7 @@ Augmentation techniques are essential for improving the robustness and performan | `perspective` | `float` | `0.0` | `0.0 - 0.001` | Applies a random perspective transformation to the image, enhancing the model's ability to understand objects in 3D space. | | `flipud` | `float` | `0.0` | `0.0 - 1.0` | Flips the image upside down with the specified probability, increasing the data variability without affecting the object's characteristics. | | `fliplr` | `float` | `0.5` | `0.0 - 1.0` | Flips the image left to right with the specified probability, useful for learning symmetrical objects and increasing dataset diversity. | +| `bgr` | `float` | `0.0` | `0.0 - 1.0` | Flips the image channels from RGB to BGR with the specified probability, useful for increasing robustness to incorrect channel ordering. | | `mosaic` | `float` | `1.0` | `0.0 - 1.0` | Combines four training images into one, simulating different scene compositions and object interactions. Highly effective for complex scene understanding. | | `mixup` | `float` | `0.0` | `0.0 - 1.0` | Blends two images and their labels, creating a composite image. Enhances the model's ability to generalize by introducing label noise and visual variability. | | `copy_paste` | `float` | `0.0` | `0.0 - 1.0` | Copies objects from one image and pastes them onto another, useful for increasing object instances and learning object occlusion. | diff --git a/ultralytics/cfg/__init__.py b/ultralytics/cfg/__init__.py index e6a8469a..4dab8102 100644 --- a/ultralytics/cfg/__init__.py +++ b/ultralytics/cfg/__init__.py @@ -112,6 +112,7 @@ CFG_FRACTION_KEYS = { "perspective", "flipud", "fliplr", + "bgr", "mosaic", "mixup", "copy_paste", diff --git a/ultralytics/cfg/default.yaml b/ultralytics/cfg/default.yaml index 5ddd2573..2e165845 100644 --- a/ultralytics/cfg/default.yaml +++ b/ultralytics/cfg/default.yaml @@ -111,6 +111,7 @@ shear: 0.0 # (float) image shear (+/- deg) perspective: 0.0 # (float) image perspective (+/- fraction), range 0-0.001 flipud: 0.0 # (float) image flip up-down (probability) fliplr: 0.5 # (float) image flip left-right (probability) +bgr: 0.0 # (float) image channel BGR (probability) mosaic: 1.0 # (float) image mosaic (probability) mixup: 0.0 # (float) image mixup (probability) copy_paste: 0.0 # (float) segment copy-paste (probability) diff --git a/ultralytics/data/augment.py b/ultralytics/data/augment.py index a16f60b0..aab3e626 100644 --- a/ultralytics/data/augment.py +++ b/ultralytics/data/augment.py @@ -886,6 +886,7 @@ class Format: mask_ratio (int): Downsample ratio for masks. Default is 4. mask_overlap (bool): Whether to overlap masks. Default is True. batch_idx (bool): Keep batch indexes. Default is True. + bgr (float): The probability to return BGR images. Default is 0.0. """ def __init__( @@ -898,6 +899,7 @@ class Format: mask_ratio=4, mask_overlap=True, batch_idx=True, + bgr=0.0, ): """Initializes the Format class with given parameters.""" self.bbox_format = bbox_format @@ -908,6 +910,7 @@ class Format: self.mask_ratio = mask_ratio self.mask_overlap = mask_overlap self.batch_idx = batch_idx # keep the batch indexes + self.bgr = bgr def __call__(self, labels): """Return formatted image, classes, bounding boxes & keypoints to be used by 'collate_fn'.""" @@ -948,7 +951,8 @@ class Format: """Format the image for YOLO from Numpy array to PyTorch tensor.""" if len(img.shape) < 3: img = np.expand_dims(img, -1) - img = np.ascontiguousarray(img.transpose(2, 0, 1)[::-1]) + img = img.transpose(2, 0, 1) + img = np.ascontiguousarray(img[::-1] if random.uniform(0, 1) > self.bgr else img) img = torch.from_numpy(img) return img diff --git a/ultralytics/data/dataset.py b/ultralytics/data/dataset.py index f358b9b0..42b7cc1d 100644 --- a/ultralytics/data/dataset.py +++ b/ultralytics/data/dataset.py @@ -167,6 +167,7 @@ class YOLODataset(BaseDataset): batch_idx=True, mask_ratio=hyp.mask_ratio, mask_overlap=hyp.overlap_mask, + bgr=hyp.bgr if self.augment else 0.0, # only affect training. ) ) return transforms diff --git a/ultralytics/engine/tuner.py b/ultralytics/engine/tuner.py index a009e73a..f4fe57e7 100644 --- a/ultralytics/engine/tuner.py +++ b/ultralytics/engine/tuner.py @@ -95,6 +95,7 @@ class Tuner: "perspective": (0.0, 0.001), # image perspective (+/- fraction), range 0-0.001 "flipud": (0.0, 1.0), # image flip up-down (probability) "fliplr": (0.0, 1.0), # image flip left-right (probability) + "bgr": (0.0, 1.0), # image channel bgr (probability) "mosaic": (0.0, 1.0), # image mixup (probability) "mixup": (0.0, 1.0), # image mixup (probability) "copy_paste": (0.0, 1.0), # segment copy-paste (probability) diff --git a/ultralytics/utils/tuner.py b/ultralytics/utils/tuner.py index efcf6766..305c60a4 100644 --- a/ultralytics/utils/tuner.py +++ b/ultralytics/utils/tuner.py @@ -78,6 +78,7 @@ def run_ray_tune( "perspective": tune.uniform(0.0, 0.001), # image perspective (+/- fraction), range 0-0.001 "flipud": tune.uniform(0.0, 1.0), # image flip up-down (probability) "fliplr": tune.uniform(0.0, 1.0), # image flip left-right (probability) + "bgr": tune.uniform(0.0, 1.0), # image channel BGR (probability) "mosaic": tune.uniform(0.0, 1.0), # image mixup (probability) "mixup": tune.uniform(0.0, 1.0), # image mixup (probability) "copy_paste": tune.uniform(0.0, 1.0), # segment copy-paste (probability)