Merge 1ef6cbcec52d28b0a9fe67276115d4ff45ddffed into 453c6e38a51e9d1d5a2aa5fb7f1014a711913397

Update README.md
added pgt_coeff to argparser
2025-07-19 05:55:39 +08:00 · 2025-03-14 00:35:45 -04:00 · 2025-03-14 10:53:57 +08:00 · 2024-11-01 11:19:22 -04:00 · 2024-11-01 11:04:12 -04:00 · 2024-10-23 20:33:28 -04:00
28 changed files with 3455 additions and 25 deletions
--- a/.gitignore
+++ b/.gitignore
@ -51,6 +51,7 @@ coverage.xml
 .hypothesis/
 .pytest_cache/
 mlruns/
 figures/
 # Translations
 *.mo
--- a/README.md
+++ b/README.md
@ -10,7 +10,7 @@ Please check out our new release on [**YOLOE**](https://github.com/THU-MIG/yoloe
  Comparison of performance, training cost, and inference efficiency between YOLOE (Ours) and YOLO-Worldv2 in terms of open text prompts.
 </p>
-**YOLOE(ye)** is a highly **efficient**, **unified**, and **open** object detection and segmentation model for real-time seeing anything, like human eye, under different prompt mechanisms, like *texts*, *visual inputs*, and *prompt-free paradigm*.
+**YOLOE(ye)** is a highly **efficient**, **unified**, and **open** object detection and segmentation model for real-time seeing anything, like human eye, under different prompt mechanisms, like *texts*, *visual inputs*, and *prompt-free paradigm*, with **zero inference and transferring overhead** compared with closed-set YOLOs.
 <p align="center">
  <img src="https://github.com/THU-MIG/yoloe/blob/main/figures/visualization.svg" width=96%> <br>
--- a/run_attribution.py
+++ b/run_attribution.py
@ -0,0 +1,49 @@
 from ultralytics import YOLOv10, YOLO
 # from ultralytics.engine.pgt_trainer import PGTTrainer
 # from ultralytics import BaseTrainer
 # from ultralytics.engine.trainer import BaseTrainer
 import os
 from ultralytics.models.yolo.segment import PGTSegmentationTrainer
 # Set CUDA device (only needed for multi-gpu machines) 
 os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID" 
 os.environ["CUDA_VISIBLE_DEVICES"] = "4" 
 # model = YOLOv10()
 # model = YOLO('yolov8n-seg.yaml').load('yolov8n.pt')  # build from YAML and transfer weights
 # model = YOLO()
 # If you want to finetune the model with pretrained weights, you could load the 
 # pretrained weights like below
 # model = YOLOv10.from_pretrained('jameslahm/yolov10{n/s/m/b/l/x}')
 # or
 # wget https://github.com/THU-MIG/yolov10/releases/download/v1.1/yolov10{n/s/m/b/l/x}.pt
 model = YOLOv10('yolov10n.pt', task='segment')
 args = dict(model='yolov10n.pt', data='coco128-seg.yaml')
 trainer = PGTSegmentationTrainer(overrides=args)
 trainer.train(
            # debug=True, 
            #   args = dict(pgt_coeff=0.1),
              )
 # model.train(
 #             # data='coco.yaml', 
 #             data='coco128-seg.yaml', 
 #             trainer=model._smart_load("pgt_trainer"), # This is needed to generate attributions (will be used later to train via PGT)
 #             # Add return_images as input parameter
 #             epochs=500, batch=16, imgsz=640,
 #             debug=True, # If debug = True, the attributions will be saved in the figures folder
 #             # cfg='/home/nielseni6/PythonScripts/yolov10/ultralytics/cfg/models/v8/yolov8-seg.yaml',
 #             # overrides=dict(task="segment"),
 #             )
 # Save the trained model
 model.save('yolov10_coco_trained.pt')
 # Evaluate the model on the validation set
 results = model.val(data='coco.yaml')
 # Print the evaluation results
 print(results)
--- a/run_pgt_train.py
+++ b/run_pgt_train.py
@ -0,0 +1,63 @@
 from ultralytics import YOLOv10, YOLO, YOLOv10PGT
 # from ultralytics.engine.pgt_trainer import PGTTrainer
 import os
 from ultralytics.models.yolo.segment import PGTSegmentationTrainer
 import argparse
 from datetime import datetime
 import torch
 # nohup python run_pgt_train.py --device 7 > ./output_logs/gpu7_yolov10_pgt_train.log 2>&1 & 
 def main(args):
    model = YOLOv10PGT('yolov10n.pt')
    if args.pgt_coeff is None:
        model.train(data=args.data_yaml, epochs=args.epochs, batch=args.batch_size)
    else:
        model.train(    
                    data=args.data_yaml, 
                    epochs=args.epochs, 
                    batch=args.batch_size,
                    # amp=False,
                    pgt_coeff=args.pgt_coeff,
                    # cfg='pgt_train.yaml',  # Load and train model with the config file
                    )
    # If you want to finetune the model with pretrained weights, you could load the 
    # pretrained weights like below 
    # model = YOLOv10.from_pretrained('jameslahm/yolov10{n/s/m/b/l/x}')
    # or
    # wget https://github.com/THU-MIG/yolov10/releases/download/v1.1/yolov10{n/s/m/b/l/x}.pt
    # model = YOLOv10('yolov10n.pt', task='segment')
    # Create a directory to save model weights if it doesn't exist
    model_weights_dir = 'model_weights'
    if not os.path.exists(model_weights_dir):
        os.makedirs(model_weights_dir)
    # Save the trained model with a unique name based on the current date and time
    current_time = datetime.now().strftime('%Y%m%d_%H%M%S')
    data_yaml_base = os.path.splitext(os.path.basename(args.data_yaml))[0]
    model_save_path = os.path.join(model_weights_dir, f'yolov10_{data_yaml_base}_trained_{current_time}.pt')
    model.save(model_save_path)
    # torch.save(trainer.model.state_dict(), model_save_path)
    # Evaluate the model on the validation set
    results = model.val(data=args.data_yaml)
    # Print the evaluation results
    print(results)
 if __name__ == "__main__":
    parser = argparse.ArgumentParser(description='Train YOLOv10 model with PGT segmentation.')
    parser.add_argument('--device', type=str, default='0', help='CUDA device number')
    parser.add_argument('--batch_size', type=int, default=32, help='Batch size for training')
    parser.add_argument('--epochs', type=int, default=100, help='Number of epochs for training')
    parser.add_argument('--data_yaml', type=str, default='coco.yaml', help='Path to the data YAML file')
    parser.add_argument('--pgt_coeff', type=float, default=None, help='Coefficient for PGT')
    args = parser.parse_args()
    # Set CUDA device (only needed for multi-gpu machines)
    os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
    os.environ["CUDA_VISIBLE_DEVICES"] = args.device
    main(args)
--- a/run_train.py
+++ b/run_train.py
@ -0,0 +1,83 @@
 from ultralytics import YOLOv10, YOLO
 # from ultralytics.engine.pgt_trainer import PGTTrainer
 # from ultralytics import BaseTrainer
 # from ultralytics.engine.trainer import BaseTrainer
 import os
 # Set CUDA device (only needed for multi-gpu machines) 
 os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID" 
 os.environ["CUDA_VISIBLE_DEVICES"] = "4" 
 model = YOLOv10()
 # model = YOLO()
 # If you want to finetune the model with pretrained weights, you could load the 
 # pretrained weights like below
 # model = YOLOv10.from_pretrained('jameslahm/yolov10{n/s/m/b/l/x}')
 # or
 # wget https://github.com/THU-MIG/yolov10/releases/download/v1.1/yolov10{n/s/m/b/l/x}.pt
 # model = YOLOv10('yolov10m.pt')
 model.train(data='coco.yaml', 
            # Add return_images as input parameter
            epochs=500, batch=16, imgsz=640,
            )
 # Save the trained model
 model.save('yolov10_coco_trained.pt')
 # Evaluate the model on the validation set
 results = model.val(data='coco.yaml')
 # Print the evaluation results
 print(results)
 # import torch
 # from torch.utils.data import DataLoader
 # from torchvision import datasets, transforms
 # # Define the transformation for the dataset
 # transform = transforms.Compose([
 #     transforms.Resize((640, 640)),
 #     transforms.ToTensor()
 # ])
 # # Load the COCO dataset
 # train_dataset = datasets.CocoDetection(root='data/nielseni6/coco/train2017', annFile='/data/nielseni6/coco/annotations/instances_train2017.json', transform=transform)
 # val_dataset = datasets.CocoDetection(root='data/nielseni6/coco/val2017', annFile='/data/nielseni6/coco/annotations/instances_val2017.json', transform=transform)
 # # Create data loaders
 # train_loader = DataLoader(train_dataset, batch_size=256, shuffle=True, num_workers=4)
 # val_loader = DataLoader(val_dataset, batch_size=256, shuffle=False, num_workers=4)
 # model = YOLOv10()
 # # Define the optimizer
 # optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
 # # Training loop
 # for epoch in range(500):
 #     model.train()
 #     for images, targets in train_loader:
 #         images = images.to('cuda')
 #         targets = [{k: v.to('cuda') for k, v in t.items()} for t in targets]
 #         loss = model(images, targets)
 #         loss.backward()
 #         optimizer.step()
 #         optimizer.zero_grad()
 #     # Validation loop
 #     model.eval()
 #     with torch.no_grad():
 #         for images, targets in val_loader:
 #             images = images.to('cuda')
 #             targets = [{k: v.to('cuda') for k, v in t.items()} for t in targets]
 #             results = model(images, targets)
 # # Save the trained model
 # model.save('yolov10_coco_trained.pt')
 # # Evaluate the model on the validation set
 # results = model.val(data='coco.yaml')
 # # Print the evaluation results
 # print(results)
--- a/run_val.py
+++ b/run_val.py
@ -0,0 +1,32 @@
 from ultralytics import YOLOv10, YOLO, YOLOv10PGT
 # from ultralytics.engine.pgt_trainer import PGTTrainer
 import os
 from ultralytics.models.yolo.segment import PGTSegmentationTrainer
 import argparse
 from datetime import datetime
 # nohup python run_pgt_train.py --device 1 > ./output_logs/gpu1_yolov10_pgt_train.log 2>&1 & 
 def main(args):
    model = YOLOv10PGT(args.model_path)
    # Evaluate the model on the validation set
    results = model.val(data=args.data_yaml)
    # Print the evaluation results
    print(results)
 if __name__ == "__main__":
    parser = argparse.ArgumentParser(description='Train YOLOv10 model with PGT segmentation.')
    parser.add_argument('--device', type=str, default='1', help='CUDA device number')
    parser.add_argument('--batch_size', type=int, default=64, help='Batch size for training')
    parser.add_argument('--epochs', type=int, default=100, help='Number of epochs for training')
    parser.add_argument('--data_yaml', type=str, default='coco.yaml', help='Path to the data YAML file')
    parser.add_argument('--model_path', type=str, default='yolov10n.pt', help='Path to the model file')
    args = parser.parse_args()
    # Set CUDA device (only needed for multi-gpu machines)
    os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
    os.environ["CUDA_VISIBLE_DEVICES"] = args.device
    main(args)
--- a/ultralytics/init.py
+++ b/ultralytics/init.py
@ -3,7 +3,7 @@
 __version__ = "8.1.34"
 from ultralytics.data.explorer.explorer import Explorer
-from ultralytics.models import RTDETR, SAM, YOLO, YOLOWorld, YOLOv10
+from ultralytics.models import RTDETR, SAM, YOLO, YOLOWorld, YOLOv10, YOLOv10PGT
 from ultralytics.models.fastsam import FastSAM
 from ultralytics.models.nas import NAS
 from ultralytics.utils import ASSETS, SETTINGS as settings
@ -23,5 +23,6 @@ __all__ = (
    "download",
    "settings",
    "Explorer",
-    "YOLOv10"
+    "YOLOv10",
    "YOLOv10PGT",
 )
--- a/ultralytics/cfg/default.yaml
+++ b/ultralytics/cfg/default.yaml
@ -41,6 +41,7 @@ overlap_mask: True # (bool) masks should overlap during training (segment train
 mask_ratio: 4 # (int) mask downsample ratio (segment train only)
 # Classification
 dropout: 0.0 # (float) use dropout regularization (classify train only)
 pgt_coeff: 2.0 # (float) PGT loss coefficient 
 # Val/Test settings ----------------------------------------------------------------------------------------------------
 val: True # (bool) validate/test during training
--- a/ultralytics/cfg/pgt_train.yaml
+++ b/ultralytics/cfg/pgt_train.yaml
@ -0,0 +1,128 @@
 # Ultralytics YOLO 🚀, AGPL-3.0 license
 # Default training settings and hyperparameters for medium-augmentation COCO training
 task: detect # (str) YOLO task, i.e. detect, segment, classify, pose
 mode: train # (str) YOLO mode, i.e. train, val, predict, export, track, benchmark
 # Train settings -------------------------------------------------------------------------------------------------------
 model: # (str, optional) path to model file, i.e. yolov8n.pt, yolov8n.yaml
 data: # (str, optional) path to data file, i.e. coco128.yaml
 epochs: 100 # (int) number of epochs to train for
 time: # (float, optional) number of hours to train for, overrides epochs if supplied
 patience: 100 # (int) epochs to wait for no observable improvement for early stopping of training
 batch: 16 # (int) number of images per batch (-1 for AutoBatch)
 imgsz: 640 # (int | list) input images size as int for train and val modes, or list[w,h] for predict and export modes
 save: True # (bool) save train checkpoints and predict results
 save_period: -1 # (int) Save checkpoint every x epochs (disabled if < 1)
 val_period: 1 # (int) Validation every x epochs
 cache: False # (bool) True/ram, disk or False. Use cache for data loading
 device: # (int | str | list, optional) device to run on, i.e. cuda device=0 or device=0,1,2,3 or device=cpu
 workers: 8 # (int) number of worker threads for data loading (per RANK if DDP)
 project: # (str, optional) project name
 name: # (str, optional) experiment name, results saved to 'project/name' directory
 exist_ok: False # (bool) whether to overwrite existing experiment
 pretrained: True # (bool | str) whether to use a pretrained model (bool) or a model to load weights from (str)
 optimizer: auto # (str) optimizer to use, choices=[SGD, Adam, Adamax, AdamW, NAdam, RAdam, RMSProp, auto]
 verbose: True # (bool) whether to print verbose output
 seed: 0 # (int) random seed for reproducibility
 deterministic: True # (bool) whether to enable deterministic mode
 single_cls: False # (bool) train multi-class data as single-class
 rect: False # (bool) rectangular training if mode='train' or rectangular validation if mode='val'
 cos_lr: False # (bool) use cosine learning rate scheduler
 close_mosaic: 10 # (int) disable mosaic augmentation for final epochs (0 to disable)
 resume: False # (bool) resume training from last checkpoint
 amp: True # (bool) Automatic Mixed Precision (AMP) training, choices=[True, False], True runs AMP check
 fraction: 1.0 # (float) dataset fraction to train on (default is 1.0, all images in train set)
 profile: False # (bool) profile ONNX and TensorRT speeds during training for loggers
 freeze: None # (int | list, optional) freeze first n layers, or freeze list of layer indices during training
 multi_scale: False # (bool) Whether to use multiscale during training
 # Segmentation
 overlap_mask: True # (bool) masks should overlap during training (segment train only)
 mask_ratio: 4 # (int) mask downsample ratio (segment train only)
 # Classification
 dropout: 0.0 # (float) use dropout regularization (classify train only)
 pgt_coeff: 1.0 # (float) PGT loss coefficient 
 # Val/Test settings ----------------------------------------------------------------------------------------------------
 val: True # (bool) validate/test during training
 split: val # (str) dataset split to use for validation, i.e. 'val', 'test' or 'train'
 save_json: False # (bool) save results to JSON file
 save_hybrid: False # (bool) save hybrid version of labels (labels + additional predictions)
 conf: # (float, optional) object confidence threshold for detection (default 0.25 predict, 0.001 val)
 iou: 0.7 # (float) intersection over union (IoU) threshold for NMS
 max_det: 300 # (int) maximum number of detections per image
 half: False # (bool) use half precision (FP16)
 dnn: False # (bool) use OpenCV DNN for ONNX inference
 plots: True # (bool) save plots and images during train/val
 # Predict settings -----------------------------------------------------------------------------------------------------
 source: # (str, optional) source directory for images or videos
 vid_stride: 1 # (int) video frame-rate stride
 stream_buffer: False # (bool) buffer all streaming frames (True) or return the most recent frame (False)
 visualize: False # (bool) visualize model features
 augment: False # (bool) apply image augmentation to prediction sources
 agnostic_nms: False # (bool) class-agnostic NMS
 classes: # (int | list[int], optional) filter results by class, i.e. classes=0, or classes=[0,2,3]
 retina_masks: False # (bool) use high-resolution segmentation masks
 embed: # (list[int], optional) return feature vectors/embeddings from given layers
 # Visualize settings ---------------------------------------------------------------------------------------------------
 show: False # (bool) show predicted images and videos if environment allows
 save_frames: False # (bool) save predicted individual video frames
 save_txt: False # (bool) save results as .txt file
 save_conf: False # (bool) save results with confidence scores
 save_crop: False # (bool) save cropped images with results
 show_labels: True # (bool) show prediction labels, i.e. 'person'
 show_conf: True # (bool) show prediction confidence, i.e. '0.99'
 show_boxes: True # (bool) show prediction boxes
 line_width: # (int, optional) line width of the bounding boxes. Scaled to image size if None.
 # Export settings ------------------------------------------------------------------------------------------------------
 format: torchscript # (str) format to export to, choices at https://docs.ultralytics.com/modes/export/#export-formats
 keras: False # (bool) use Kera=s
 optimize: False # (bool) TorchScript: optimize for mobile
 int8: False # (bool) CoreML/TF INT8 quantization
 dynamic: False # (bool) ONNX/TF/TensorRT: dynamic axes
 simplify: False # (bool) ONNX: simplify model using `onnxslim`
 opset: # (int, optional) ONNX: opset version
 workspace: 4 # (int) TensorRT: workspace size (GB)
 nms: False # (bool) CoreML: add NMS
 # Hyperparameters ------------------------------------------------------------------------------------------------------
 lr0: 0.01 # (float) initial learning rate (i.e. SGD=1E-2, Adam=1E-3)
 lrf: 0.01 # (float) final learning rate (lr0 * lrf)
 momentum: 0.937 # (float) SGD momentum/Adam beta1
 weight_decay: 0.0005 # (float) optimizer weight decay 5e-4
 warmup_epochs: 3.0 # (float) warmup epochs (fractions ok)
 warmup_momentum: 0.8 # (float) warmup initial momentum
 warmup_bias_lr: 0.1 # (float) warmup initial bias lr
 box: 7.5 # (float) box loss gain
 cls: 0.5 # (float) cls loss gain (scale with pixels)
 dfl: 1.5 # (float) dfl loss gain
 pose: 12.0 # (float) pose loss gain
 kobj: 1.0 # (float) keypoint obj loss gain
 label_smoothing: 0.0 # (float) label smoothing (fraction)
 nbs: 64 # (int) nominal batch size
 hsv_h: 0.015 # (float) image HSV-Hue augmentation (fraction)
 hsv_s: 0.7 # (float) image HSV-Saturation augmentation (fraction)
 hsv_v: 0.4 # (float) image HSV-Value augmentation (fraction)
 degrees: 0.0 # (float) image rotation (+/- deg)
 translate: 0.1 # (float) image translation (+/- fraction)
 scale: 0.5 # (float) image scale (+/- gain)
 shear: 0.0 # (float) image shear (+/- deg)
 perspective: 0.0 # (float) image perspective (+/- fraction), range 0-0.001
 flipud: 0.0 # (float) image flip up-down (probability)
 fliplr: 0.5 # (float) image flip left-right (probability)
 bgr: 0.0 # (float) image channel BGR (probability)
 mosaic: 1.0 # (float) image mosaic (probability)
 mixup: 0.0 # (float) image mixup (probability)
 copy_paste: 0.0 # (float) segment copy-paste (probability)
 auto_augment: randaugment # (str) auto augmentation policy for classification (randaugment, autoaugment, augmix)
 erasing: 0.4 # (float) probability of random erasing during classification training (0-1)
 crop_fraction: 1.0 # (float) image crop fraction for classification evaluation/inference (0-1)
 # Custom config.yaml ---------------------------------------------------------------------------------------------------
 cfg: # (str, optional) for overriding defaults.yaml
 # Tracker settings ------------------------------------------------------------------------------------------------------
 tracker: botsort.yaml # (str) tracker type, choices=[botsort.yaml, bytetrack.yaml]
--- a/ultralytics/engine/model.py
+++ b/ultralytics/engine/model.py
@ -387,6 +387,7 @@ class Model(nn.Module):
        source: Union[str, Path, int, list, tuple, np.ndarray, torch.Tensor] = None,
        stream: bool = False,
        predictor=None,
        return_images: bool = False,
        **kwargs,
    ) -> list:
        """
@ -438,7 +439,7 @@ class Model(nn.Module):
                self.predictor.save_dir = get_save_dir(self.predictor.args)
        if prompts and hasattr(self.predictor, "set_prompts"):  # for SAM-type models
            self.predictor.set_prompts(prompts)
-        return self.predictor.predict_cli(source=source) if is_cli else self.predictor(source=source, stream=stream)
+        return self.predictor.predict_cli(source=source) if is_cli else self.predictor(source=source, stream=stream, return_images=return_images)
    def track(
        self,
@ -592,6 +593,7 @@ class Model(nn.Module):
    def train(
        self,
        trainer=None,
        debug=False,
        **kwargs,
    ):
        """
@ -654,6 +656,87 @@ class Model(nn.Module):
                    pass
        self.trainer.hub_session = self.session  # attach optional HUB session
        if debug:
            self.trainer.train(debug=debug)
        else:
            self.trainer.train()
        # Update model and cfg after training
        if RANK in (-1, 0):
            ckpt = self.trainer.best if self.trainer.best.exists() else self.trainer.last
            self.model, _ = attempt_load_one_weight(ckpt)
            self.overrides = self.model.args
            self.metrics = getattr(self.trainer.validator, "metrics", None)  # TODO: no metrics returned by DDP
        return self.metrics
    def train_pgt( # Currently unused, but should be considered if changes to the train function are made
        self,
        trainer=None,
        debug=False,
        **kwargs,
    ):
        """
        Trains the model using the specified dataset and training configuration.
        This method facilitates model training with a range of customizable settings and configurations. It supports
        training with a custom trainer or the default training approach defined in the method. The method handles
        different scenarios, such as resuming training from a checkpoint, integrating with Ultralytics HUB, and
        updating model and configuration after training.
        When using Ultralytics HUB, if the session already has a loaded model, the method prioritizes HUB training
        arguments and issues a warning if local arguments are provided. It checks for pip updates and combines default
        configurations, method-specific defaults, and user-provided arguments to configure the training process. After
        training, it updates the model and its configurations, and optionally attaches metrics.
        Args:
            trainer (BaseTrainer, optional): An instance of a custom trainer class for training the model. If None, the
                method uses a default trainer. Defaults to None.
            **kwargs (any): Arbitrary keyword arguments representing the training configuration. These arguments are
                used to customize various aspects of the training process.
        Returns:
            (dict | None): Training metrics if available and training is successful; otherwise, None.
        Raises:
            AssertionError: If the model is not a PyTorch model.
            PermissionError: If there is a permission issue with the HUB session.
            ModuleNotFoundError: If the HUB SDK is not installed.
        """
        self._check_is_pytorch_model()
        if hasattr(self.session, "model") and self.session.model.id:  # Ultralytics HUB session with loaded model
            if any(kwargs):
                LOGGER.warning("WARNING ⚠️ using HUB training arguments, ignoring local training arguments.")
            kwargs = self.session.train_args  # overwrite kwargs
        checks.check_pip_update_available()
        overrides = yaml_load(checks.check_yaml(kwargs["cfg"])) if kwargs.get("cfg") else self.overrides
        custom = {"data": DEFAULT_CFG_DICT["data"] or TASK2DATA[self.task]}  # method defaults
        args = {**overrides, **custom, **kwargs, "mode": "train"}  # highest priority args on the right
        if args.get("resume"):
            args["resume"] = self.ckpt_path
        self.trainer = (trainer or self._smart_load("trainer"))(overrides=args, _callbacks=self.callbacks)
        if not args.get("resume"):  # manually set model only if not resuming
            self.trainer.model = self.trainer.get_model(weights=self.model if self.ckpt else None, cfg=self.model.yaml)
            self.model = self.trainer.model
            if SETTINGS["hub"] is True and not self.session:
                # Create a model in HUB
                try:
                    self.session = self._get_hub_session(self.model_name)
                    if self.session:
                        self.session.create_model(args)
                        # Check model was created
                        if not getattr(self.session.model, "id", None):
                            self.session = None
                except (PermissionError, ModuleNotFoundError):
                    # Ignore PermissionError and ModuleNotFoundError which indicates hub-sdk not installed
                    pass
        self.trainer.hub_session = self.session  # attach optional HUB session
        if debug:
            self.trainer.train(debug=debug)
        else:
            self.trainer.train()
        # Update model and cfg after training
        if RANK in (-1, 0):
--- a/ultralytics/engine/pgt_trainer.py
+++ b/ultralytics/engine/pgt_trainer.py
@ -0,0 +1,850 @@
 # Ultralytics YOLO 🚀, AGPL-3.0 license
 """
 Train a model on a dataset.
 Usage:
    $ yolo mode=train model=yolov8n.pt data=coco128.yaml imgsz=640 epochs=100 batch=16
 """
 import math
 import os
 import subprocess
 import time
 import warnings
 from copy import deepcopy
 from datetime import datetime, timedelta
 from pathlib import Path
 import numpy as np
 import torch
 from torch import distributed as dist
 from torch import nn, optim
 import matplotlib.pyplot as plt
 import torchvision.transforms as T
 from ultralytics.cfg import get_cfg, get_save_dir
 from ultralytics.data.utils import check_cls_dataset, check_det_dataset
 from ultralytics.nn.tasks import attempt_load_one_weight, attempt_load_weights
 from ultralytics.utils import (
    DEFAULT_CFG,
    LOGGER,
    RANK,
    TQDM,
    __version__,
    callbacks,
    clean_url,
    colorstr,
    emojis,
    yaml_save,
 )
 from ultralytics.utils.autobatch import check_train_batch_size
 from ultralytics.utils.checks import check_amp, check_file, check_imgsz, check_model_file_from_stem, print_args
 from ultralytics.utils.dist import ddp_cleanup, generate_ddp_command
 from ultralytics.utils.files import get_latest_run
 from ultralytics.utils.torch_utils import (
    EarlyStopping,
    ModelEMA,
    de_parallel,
    init_seeds,
    one_cycle,
    select_device,
    strip_optimizer,
 )
 from ultralytics.utils.loss import v8DetectionLoss
 from ultralytics.utils.plaus_functs import get_dist_reg, plaus_loss_fn
 import matplotlib.path as matplotlib_path
 class PGTTrainer:
    """
    BaseTrainer.
    A base class for creating trainers.
    Attributes:
        args (SimpleNamespace): Configuration for the trainer.
        validator (BaseValidator): Validator instance.
        model (nn.Module): Model instance.
        callbacks (defaultdict): Dictionary of callbacks.
        save_dir (Path): Directory to save results.
        wdir (Path): Directory to save weights.
        last (Path): Path to the last checkpoint.
        best (Path): Path to the best checkpoint.
        save_period (int): Save checkpoint every x epochs (disabled if < 1).
        batch_size (int): Batch size for training.
        epochs (int): Number of epochs to train for.
        start_epoch (int): Starting epoch for training.
        device (torch.device): Device to use for training.
        amp (bool): Flag to enable AMP (Automatic Mixed Precision).
        scaler (amp.GradScaler): Gradient scaler for AMP.
        data (str): Path to data.
        trainset (torch.utils.data.Dataset): Training dataset.
        testset (torch.utils.data.Dataset): Testing dataset.
        ema (nn.Module): EMA (Exponential Moving Average) of the model.
        resume (bool): Resume training from a checkpoint.
        lf (nn.Module): Loss function.
        scheduler (torch.optim.lr_scheduler._LRScheduler): Learning rate scheduler.
        best_fitness (float): The best fitness value achieved.
        fitness (float): Current fitness value.
        loss (float): Current loss value.
        tloss (float): Total loss value.
        loss_names (list): List of loss names.
        csv (Path): Path to results CSV file.
    """
    def __init__(self, cfg=DEFAULT_CFG, overrides=None, _callbacks=None):
        """
        Initializes the BaseTrainer class.
        Args:
            cfg (str, optional): Path to a configuration file. Defaults to DEFAULT_CFG.
            overrides (dict, optional): Configuration overrides. Defaults to None.
        """
        self.args = get_cfg(cfg, overrides)
        self.check_resume(overrides)
        self.device = select_device(self.args.device, self.args.batch)
        self.validator = None
        self.metrics = None
        self.plots = {}
        init_seeds(self.args.seed + 1 + RANK, deterministic=self.args.deterministic)
        # Dirs
        self.save_dir = get_save_dir(self.args)
        self.args.name = self.save_dir.name  # update name for loggers
        self.wdir = self.save_dir / "weights"  # weights dir
        if RANK in (-1, 0):
            self.wdir.mkdir(parents=True, exist_ok=True)  # make dir
            self.args.save_dir = str(self.save_dir)
            yaml_save(self.save_dir / "args.yaml", vars(self.args))  # save run args
        self.last, self.best = self.wdir / "last.pt", self.wdir / "best.pt"  # checkpoint paths
        self.save_period = self.args.save_period
        self.batch_size = self.args.batch
        self.epochs = self.args.epochs
        self.start_epoch = 0
        if RANK == -1:
            print_args(vars(self.args))
        # Device
        if self.device.type in ("cpu", "mps"):
            self.args.workers = 0  # faster CPU training as time dominated by inference, not dataloading
        # Model and Dataset
        self.model = check_model_file_from_stem(self.args.model)  # add suffix, i.e. yolov8n -> yolov8n.pt
        try:
            if self.args.task == "classify":
                self.data = check_cls_dataset(self.args.data)
            elif self.args.data.split(".")[-1] in ("yaml", "yml") or self.args.task in (
                "detect",
                "segment",
                "pose",
                "obb",
            ):
                self.data = check_det_dataset(self.args.data)
                if "yaml_file" in self.data:
                    self.args.data = self.data["yaml_file"]  # for validating 'yolo train data=url.zip' usage
        except Exception as e:
            raise RuntimeError(emojis(f"Dataset '{clean_url(self.args.data)}' error ❌ {e}")) from e
        self.trainset, self.testset = self.get_dataset(self.data)
        self.ema = None
        # Optimization utils init
        self.lf = None
        self.scheduler = None
        # Epoch level metrics
        self.best_fitness = None
        self.fitness = None
        self.loss = None
        self.tloss = None
        self.loss_names = ["Loss"]
        self.csv = self.save_dir / "results.csv"
        self.plot_idx = [0, 1, 2]
        self.num = int(time.time())
        # Callbacks
        self.callbacks = _callbacks or callbacks.get_default_callbacks()
        if RANK in (-1, 0):
            callbacks.add_integration_callbacks(self)
    def add_callback(self, event: str, callback):
        """Appends the given callback."""
        self.callbacks[event].append(callback)
    def set_callback(self, event: str, callback):
        """Overrides the existing callbacks with the given callback."""
        self.callbacks[event] = [callback]
    def run_callbacks(self, event: str):
        """Run all existing callbacks associated with a particular event."""
        for callback in self.callbacks.get(event, []):
            callback(self)
    def train(self, debug=False):
        """Allow device='', device=None on Multi-GPU systems to default to device=0."""
        if isinstance(self.args.device, str) and len(self.args.device):  # i.e. device='0' or device='0,1,2,3'
            world_size = len(self.args.device.split(","))
        elif isinstance(self.args.device, (tuple, list)):  # i.e. device=[0, 1, 2, 3] (multi-GPU from CLI is list)
            world_size = len(self.args.device)
        elif torch.cuda.is_available():  # i.e. device=None or device='' or device=number
            world_size = 1  # default to device 0
        else:  # i.e. device='cpu' or 'mps'
            world_size = 0
        # Run subprocess if DDP training, else train normally
        if world_size > 1 and "LOCAL_RANK" not in os.environ:
            # Argument checks
            if self.args.rect:
                LOGGER.warning("WARNING ⚠️ 'rect=True' is incompatible with Multi-GPU training, setting 'rect=False'")
                self.args.rect = False
            if self.args.batch == -1:
                LOGGER.warning(
                    "WARNING ⚠️ 'batch=-1' for AutoBatch is incompatible with Multi-GPU training, setting "
                    "default 'batch=16'"
                )
                self.args.batch = 16
            # Command
            cmd, file = generate_ddp_command(world_size, self)
            try:
                LOGGER.info(f'{colorstr("DDP:")} debug command {" ".join(cmd)}')
                subprocess.run(cmd, check=True)
            except Exception as e:
                raise e
            finally:
                ddp_cleanup(self, str(file))
        else:
            self._do_train(world_size, debug=debug)
    def _setup_scheduler(self):
        """Initialize training learning rate scheduler."""
        if self.args.cos_lr:
            self.lf = one_cycle(1, self.args.lrf, self.epochs)  # cosine 1->hyp['lrf']
        else:
            self.lf = lambda x: max(1 - x / self.epochs, 0) * (1.0 - self.args.lrf) + self.args.lrf  # linear
        self.scheduler = optim.lr_scheduler.LambdaLR(self.optimizer, lr_lambda=self.lf)
    def _setup_ddp(self, world_size):
        """Initializes and sets the DistributedDataParallel parameters for training."""
        torch.cuda.set_device(RANK)
        self.device = torch.device("cuda", RANK)
        # LOGGER.info(f'DDP info: RANK {RANK}, WORLD_SIZE {world_size}, DEVICE {self.device}')
        os.environ["NCCL_BLOCKING_WAIT"] = "1"  # set to enforce timeout
        dist.init_process_group(
            backend="nccl" if dist.is_nccl_available() else "gloo",
            timeout=timedelta(seconds=10800),  # 3 hours
            rank=RANK,
            world_size=world_size,
        )
    def _setup_train(self, world_size):
        """Builds dataloaders and optimizer on correct rank process."""
        # Model
        self.run_callbacks("on_pretrain_routine_start")
        ckpt = self.setup_model()
        self.model = self.model.to(self.device)
        self.set_model_attributes()
        # Freeze layers
        freeze_list = (
            self.args.freeze
            if isinstance(self.args.freeze, list)
            else range(self.args.freeze)
            if isinstance(self.args.freeze, int)
            else []
        )
        always_freeze_names = [".dfl"]  # always freeze these layers
        freeze_layer_names = [f"model.{x}." for x in freeze_list] + always_freeze_names
        for k, v in self.model.named_parameters():
            # v.register_hook(lambda x: torch.nan_to_num(x))  # NaN to 0 (commented for erratic training results)
            if any(x in k for x in freeze_layer_names):
                LOGGER.info(f"Freezing layer '{k}'")
                v.requires_grad = False
            elif not v.requires_grad and v.dtype.is_floating_point:  # only floating point Tensor can require gradients
                LOGGER.info(
                    f"WARNING ⚠️ setting 'requires_grad=True' for frozen layer '{k}'. "
                    "See ultralytics.engine.trainer for customization of frozen layers."
                )
                v.requires_grad = True
        # Check AMP
        self.amp = torch.tensor(self.args.amp).to(self.device)  # True or False
        if self.amp and RANK in (-1, 0):  # Single-GPU and DDP
            callbacks_backup = callbacks.default_callbacks.copy()  # backup callbacks as check_amp() resets them
            self.amp = torch.tensor(check_amp(self.model), device=self.device)
            callbacks.default_callbacks = callbacks_backup  # restore callbacks
        if RANK > -1 and world_size > 1:  # DDP
            dist.broadcast(self.amp, src=0)  # broadcast the tensor from rank 0 to all other ranks (returns None)
        self.amp = bool(self.amp)  # as boolean
        self.scaler = torch.cuda.amp.GradScaler(enabled=self.amp)
        if world_size > 1:
            self.model = nn.parallel.DistributedDataParallel(self.model, device_ids=[RANK])
        # Check imgsz
        gs = max(int(self.model.stride.max() if hasattr(self.model, "stride") else 32), 32)  # grid size (max stride)
        self.args.imgsz = check_imgsz(self.args.imgsz, stride=gs, floor=gs, max_dim=1)
        self.stride = gs  # for multiscale training
        # Batch size
        if self.batch_size == -1 and RANK == -1:  # single-GPU only, estimate best batch size
            self.args.batch = self.batch_size = check_train_batch_size(self.model, self.args.imgsz, self.amp)
        # Dataloaders
        batch_size = self.batch_size // max(world_size, 1)
        self.train_loader = self.get_dataloader(self.trainset, batch_size=batch_size, rank=RANK, mode="train")
        if RANK in (-1, 0):
            # Note: When training DOTA dataset, double batch size could get OOM on images with >2000 objects.
            self.test_loader = self.get_dataloader(
                self.testset, batch_size=batch_size if self.args.task == "obb" else batch_size * 2, rank=-1, mode="val"
            )
            self.validator = self.get_validator()
            metric_keys = self.validator.metrics.keys + self.label_loss_items(prefix="val")
            self.metrics = dict(zip(metric_keys, [0] * len(metric_keys)))
            self.ema = ModelEMA(self.model)
            if self.args.plots:
                self.plot_training_labels()
        # Optimizer
        self.accumulate = max(round(self.args.nbs / self.batch_size), 1)  # accumulate loss before optimizing
        weight_decay = self.args.weight_decay * self.batch_size * self.accumulate / self.args.nbs  # scale weight_decay
        iterations = math.ceil(len(self.train_loader.dataset) / max(self.batch_size, self.args.nbs)) * self.epochs
        self.optimizer = self.build_optimizer(
            model=self.model,
            name=self.args.optimizer,
            lr=self.args.lr0,
            momentum=self.args.momentum,
            decay=weight_decay,
            iterations=iterations,
        )
        # Scheduler
        self._setup_scheduler()
        self.stopper, self.stop = EarlyStopping(patience=self.args.patience), False
        self.resume_training(ckpt)
        self.scheduler.last_epoch = self.start_epoch - 1  # do not move
        self.run_callbacks("on_pretrain_routine_end")
    def _do_train(self, world_size=1, debug=False):
        """Train completed, evaluate and plot if specified by arguments."""
        if world_size > 1:
            self._setup_ddp(world_size)
        self._setup_train(world_size)
        nb = len(self.train_loader)  # number of batches
        nw = max(round(self.args.warmup_epochs * nb), 100) if self.args.warmup_epochs > 0 else -1  # warmup iterations
        last_opt_step = -1
        self.epoch_time = None
        self.epoch_time_start = time.time()
        self.train_time_start = time.time()
        self.run_callbacks("on_train_start")
        LOGGER.info(
            f'Image sizes {self.args.imgsz} train, {self.args.imgsz} val\n'
            f'Using {self.train_loader.num_workers * (world_size or 1)} dataloader workers\n'
            f"Logging results to {colorstr('bold', self.save_dir)}\n"
            f'Starting training for ' + (f"{self.args.time} hours..." if self.args.time else f"{self.epochs} epochs...")
        )
        if self.args.close_mosaic:
            base_idx = (self.epochs - self.args.close_mosaic) * nb
            self.plot_idx.extend([base_idx, base_idx + 1, base_idx + 2])
        epoch = self.start_epoch
        while True:
            self.epoch = epoch
            self.run_callbacks("on_train_epoch_start")
            self.model.train()
            if RANK != -1:
                self.train_loader.sampler.set_epoch(epoch)
            pbar = enumerate(self.train_loader)
            # Update dataloader attributes (optional)
            if epoch == (self.epochs - self.args.close_mosaic):
                self._close_dataloader_mosaic()
                self.train_loader.reset()
            if RANK in (-1, 0):
                LOGGER.info(self.progress_string())
                pbar = TQDM(enumerate(self.train_loader), total=nb)
            self.tloss = None
            self.optimizer.zero_grad()
            for i, batch in pbar:
                self.run_callbacks("on_train_batch_start")
                # Warmup
                ni = i + nb * epoch
                if ni <= nw:
                    xi = [0, nw]  # x interp
                    self.accumulate = max(1, int(np.interp(ni, xi, [1, self.args.nbs / self.batch_size]).round()))
                    for j, x in enumerate(self.optimizer.param_groups):
                        # Bias lr falls from 0.1 to lr0, all other lrs rise from 0.0 to lr0
                        x["lr"] = np.interp(
                            ni, xi, [self.args.warmup_bias_lr if j == 0 else 0.0, x["initial_lr"] * self.lf(epoch)]
                        )
                        if "momentum" in x:
                            x["momentum"] = np.interp(ni, xi, [self.args.warmup_momentum, self.args.momentum])
                # Forward
                with torch.cuda.amp.autocast(self.amp):
                    batch = self.preprocess_batch(batch)
                    batch['img'] = batch['img'].requires_grad_(True)
                    self.loss, self.loss_items = self.model(batch)
                    # (self.loss, self.loss_items), images = self.model(batch, return_images=True)
                # smask = get_dist_reg(images, batch['masks'])
                # grad = torch.autograd.grad(self.loss, images, retain_graph=True)[0]
                # grad = torch.abs(grad)
                # self.args.pgt_coeff = 1.1
                # plaus_loss = plaus_loss_fn(grad, smask, self.args.pgt_coeff)
                # self.loss_items = torch.cat((self.loss_items, plaus_loss.unsqueeze(0)))
                # self.loss += plaus_loss
                debug_ = debug
                if debug_ and (i % 25 == 0):
                    debug_ = False
                    plot_grads(batch, self, i)
                if RANK != -1:
                    self.loss *= world_size
                self.tloss = (
                    (self.tloss * i + self.loss_items) / (i + 1) if self.tloss is not None else self.loss_items
                )
                # Backward
                self.scaler.scale(self.loss).backward()
                # Optimize - https://pytorch.org/docs/master/notes/amp_examples.html
                if ni - last_opt_step >= self.accumulate:
                    self.optimizer_step()
                    last_opt_step = ni
                    # Timed stopping
                    if self.args.time:
                        self.stop = (time.time() - self.train_time_start) > (self.args.time * 3600)
                        if RANK != -1:  # if DDP training
                            broadcast_list = [self.stop if RANK == 0 else None]
                            dist.broadcast_object_list(broadcast_list, 0)  # broadcast 'stop' to all ranks
                            self.stop = broadcast_list[0]
                        if self.stop:  # training time exceeded
                            break
                # Log
                mem = f"{torch.cuda.memory_reserved() / 1E9 if torch.cuda.is_available() else 0:.3g}G"  # (GB)
                loss_len = self.tloss.shape[0] if len(self.tloss.shape) else 1
                losses = self.tloss if loss_len > 1 else torch.unsqueeze(self.tloss, 0)
                if RANK in (-1, 0):
                    pbar.set_description(
                        ("%11s" * 2 + "%11.4g" * (2 + loss_len))
                        % (f"{epoch + 1}/{self.epochs}", mem, *losses, batch["cls"].shape[0], batch["img"].shape[-1])
                    )
                    self.run_callbacks("on_batch_end")
                    if self.args.plots and ni in self.plot_idx:
                        self.plot_training_samples(batch, ni)
                        plot_grads(batch, self, ni)
                self.run_callbacks("on_train_batch_end")
            self.lr = {f"lr/pg{ir}": x["lr"] for ir, x in enumerate(self.optimizer.param_groups)}  # for loggers
            self.run_callbacks("on_train_epoch_end")
            if RANK in (-1, 0):
                final_epoch = epoch + 1 == self.epochs
                self.ema.update_attr(self.model, include=["yaml", "nc", "args", "names", "stride", "class_weights"])
                # Validation
                if (self.args.val and (((epoch+1) % self.args.val_period == 0) or (self.epochs - epoch) <= 10)) \
                    or final_epoch or self.stopper.possible_stop or self.stop:
                    self.metrics, self.fitness = self.validate()
                self.save_metrics(metrics={**self.label_loss_items(self.tloss), **self.metrics, **self.lr})
                self.stop |= self.stopper(epoch + 1, self.fitness) or final_epoch
                if self.args.time:
                    self.stop |= (time.time() - self.train_time_start) > (self.args.time * 3600)
                # Save model
                if self.args.save or final_epoch:
                    self.save_model()
                    self.run_callbacks("on_model_save")
            # Scheduler
            t = time.time()
            self.epoch_time = t - self.epoch_time_start
            self.epoch_time_start = t
            with warnings.catch_warnings():
                warnings.simplefilter("ignore")  # suppress 'Detected lr_scheduler.step() before optimizer.step()'
                if self.args.time:
                    mean_epoch_time = (t - self.train_time_start) / (epoch - self.start_epoch + 1)
                    self.epochs = self.args.epochs = math.ceil(self.args.time * 3600 / mean_epoch_time)
                    self._setup_scheduler()
                    self.scheduler.last_epoch = self.epoch  # do not move
                    self.stop |= epoch >= self.epochs  # stop if exceeded epochs
                self.scheduler.step()
            self.run_callbacks("on_fit_epoch_end")
            torch.cuda.empty_cache()  # clear GPU memory at end of epoch, may help reduce CUDA out of memory errors
            # Early Stopping
            if RANK != -1:  # if DDP training
                broadcast_list = [self.stop if RANK == 0 else None]
                dist.broadcast_object_list(broadcast_list, 0)  # broadcast 'stop' to all ranks
                self.stop = broadcast_list[0]
            if self.stop:
                break  # must break all DDP ranks
            epoch += 1
        if RANK in (-1, 0):
            # Do final val with best.pt
            LOGGER.info(
                f"\n{epoch - self.start_epoch + 1} epochs completed in "
                f"{(time.time() - self.train_time_start) / 3600:.3f} hours."
            )
            self.final_eval()
            if self.args.plots:
                self.plot_metrics()
            self.run_callbacks("on_train_end")
        torch.cuda.empty_cache()
        self.run_callbacks("teardown")
    def save_model(self):
        """Save model training checkpoints with additional metadata."""
        import pandas as pd  # scope for faster startup
        metrics = {**self.metrics, **{"fitness": self.fitness}}
        results = {k.strip(): v for k, v in pd.read_csv(self.csv).to_dict(orient="list").items()}
        ckpt = {
            "epoch": self.epoch,
            "best_fitness": self.best_fitness,
            "model": deepcopy(de_parallel(self.model)).half(),
            "ema": deepcopy(self.ema.ema).half(),
            "updates": self.ema.updates,
            "optimizer": self.optimizer.state_dict(),
            "train_args": vars(self.args),  # save as dict
            "train_metrics": metrics,
            "train_results": results,
            "date": datetime.now().isoformat(),
            "version": __version__,
            "license": "AGPL-3.0 (https://ultralytics.com/license)",
            "docs": "https://docs.ultralytics.com",
        }
        # Save last and best
        torch.save(ckpt, self.last)
        if self.best_fitness == self.fitness:
            torch.save(ckpt, self.best)
        if (self.save_period > 0) and (self.epoch > 0) and (self.epoch % self.save_period == 0):
            torch.save(ckpt, self.wdir / f"epoch{self.epoch}.pt")
    @staticmethod
    def get_dataset(data):
        """
        Get train, val path from data dict if it exists.
        Returns None if data format is not recognized.
        """
        return data["train"], data.get("val") or data.get("test")
    def setup_model(self):
        """Load/create/download model for any task."""
        if isinstance(self.model, torch.nn.Module):  # if model is loaded beforehand. No setup needed
            return
        model, weights = self.model, None
        ckpt = None
        if str(model).endswith(".pt"):
            weights, ckpt = attempt_load_one_weight(model)
            cfg = ckpt["model"].yaml
        else:
            cfg = model
        self.model = self.get_model(cfg=cfg, weights=weights, verbose=RANK == -1)  # calls Model(cfg, weights)
        return ckpt
    def optimizer_step(self):
        """Perform a single step of the training optimizer with gradient clipping and EMA update."""
        self.scaler.unscale_(self.optimizer)  # unscale gradients
        torch.nn.utils.clip_grad_norm_(self.model.parameters(), max_norm=10.0)  # clip gradients
        self.scaler.step(self.optimizer)
        self.scaler.update()
        self.optimizer.zero_grad()
        if self.ema:
            self.ema.update(self.model)
    def preprocess_batch(self, batch):
        """Allows custom preprocessing model inputs and ground truths depending on task type."""
        return batch
    def validate(self):
        """
        Runs validation on test set using self.validator.
        The returned dict is expected to contain "fitness" key.
        """
        metrics = self.validator(self)
        fitness = metrics.pop("fitness", -self.loss.detach().cpu().numpy())  # use loss as fitness measure if not found
        if not self.best_fitness or self.best_fitness < fitness:
            self.best_fitness = fitness
        return metrics, fitness
    def get_model(self, cfg=None, weights=None, verbose=True):
        """Get model and raise NotImplementedError for loading cfg files."""
        raise NotImplementedError("This task trainer doesn't support loading cfg files")
    def get_validator(self):
        """Returns a NotImplementedError when the get_validator function is called."""
        raise NotImplementedError("get_validator function not implemented in trainer")
    def get_dataloader(self, dataset_path, batch_size=16, rank=0, mode="train"):
        """Returns dataloader derived from torch.data.Dataloader."""
        raise NotImplementedError("get_dataloader function not implemented in trainer")
    def build_dataset(self, img_path, mode="train", batch=None):
        """Build dataset."""
        raise NotImplementedError("build_dataset function not implemented in trainer")
    def label_loss_items(self, loss_items=None, prefix="train"):
        """
        Returns a loss dict with labelled training loss items tensor.
        Note:
            This is not needed for classification but necessary for segmentation & detection
        """
        return {"loss": loss_items} if loss_items is not None else ["loss"]
    def set_model_attributes(self):
        """To set or update model parameters before training."""
        self.model.names = self.data["names"]
    def build_targets(self, preds, targets):
        """Builds target tensors for training YOLO model."""
        pass
    def progress_string(self):
        """Returns a string describing training progress."""
        return ""
    # TODO: may need to put these following functions into callback
    def plot_training_samples(self, batch, ni):
        """Plots training samples during YOLO training."""
        pass
    def plot_training_labels(self):
        """Plots training labels for YOLO model."""
        pass
    def save_metrics(self, metrics):
        """Saves training metrics to a CSV file."""
        keys, vals = list(metrics.keys()), list(metrics.values())
        n = len(metrics) + 1  # number of cols
        s = "" if self.csv.exists() else (("%23s," * n % tuple(["epoch"] + keys)).rstrip(",") + "\n")  # header
        with open(self.csv, "a") as f:
            f.write(s + ("%23.5g," * n % tuple([self.epoch + 1] + vals)).rstrip(",") + "\n")
    def plot_metrics(self):
        """Plot and display metrics visually."""
        pass
    def on_plot(self, name, data=None):
        """Registers plots (e.g. to be consumed in callbacks)"""
        path = Path(name)
        self.plots[path] = {"data": data, "timestamp": time.time()}
    def final_eval(self):
        """Performs final evaluation and validation for object detection YOLO model."""
        for f in self.last, self.best:
            if f.exists():
                strip_optimizer(f)  # strip optimizers
                if f is self.best:
                    LOGGER.info(f"\nValidating {f}...")
                    self.validator.args.plots = self.args.plots
                    self.metrics = self.validator(model=f)
                    self.metrics.pop("fitness", None)
                    self.run_callbacks("on_fit_epoch_end")
    def check_resume(self, overrides):
        """Check if resume checkpoint exists and update arguments accordingly."""
        resume = self.args.resume
        if resume:
            try:
                exists = isinstance(resume, (str, Path)) and Path(resume).exists()
                last = Path(check_file(resume) if exists else get_latest_run())
                # Check that resume data YAML exists, otherwise strip to force re-download of dataset
                ckpt_args = attempt_load_weights(last).args
                if not Path(ckpt_args["data"]).exists():
                    ckpt_args["data"] = self.args.data
                resume = True
                self.args = get_cfg(ckpt_args)
                self.args.model = self.args.resume = str(last)  # reinstate model
                for k in "imgsz", "batch", "device":  # allow arg updates to reduce memory or update device on resume
                    if k in overrides:
                        setattr(self.args, k, overrides[k])
            except Exception as e:
                raise FileNotFoundError(
                    "Resume checkpoint not found. Please pass a valid checkpoint to resume from, "
                    "i.e. 'yolo train resume model=path/to/last.pt'"
                ) from e
        self.resume = resume
    def resume_training(self, ckpt):
        """Resume YOLO training from given epoch and best fitness."""
        if ckpt is None or not self.resume:
            return
        best_fitness = 0.0
        start_epoch = ckpt["epoch"] + 1
        if ckpt["optimizer"] is not None:
            self.optimizer.load_state_dict(ckpt["optimizer"])  # optimizer
            best_fitness = ckpt["best_fitness"]
        if self.ema and ckpt.get("ema"):
            self.ema.ema.load_state_dict(ckpt["ema"].float().state_dict())  # EMA
            self.ema.updates = ckpt["updates"]
        assert start_epoch > 0, (
            f"{self.args.model} training to {self.epochs} epochs is finished, nothing to resume.\n"
            f"Start a new training without resuming, i.e. 'yolo train model={self.args.model}'"
        )
        LOGGER.info(f"Resuming training {self.args.model} from epoch {start_epoch + 1} to {self.epochs} total epochs")
        if self.epochs < start_epoch:
            LOGGER.info(
                f"{self.model} has been trained for {ckpt['epoch']} epochs. Fine-tuning for {self.epochs} more epochs."
            )
            self.epochs += ckpt["epoch"]  # finetune additional epochs
        self.best_fitness = best_fitness
        self.start_epoch = start_epoch
        if start_epoch > (self.epochs - self.args.close_mosaic):
            self._close_dataloader_mosaic()
    def _close_dataloader_mosaic(self):
        """Update dataloaders to stop using mosaic augmentation."""
        if hasattr(self.train_loader.dataset, "mosaic"):
            self.train_loader.dataset.mosaic = False
        if hasattr(self.train_loader.dataset, "close_mosaic"):
            LOGGER.info("Closing dataloader mosaic")
            self.train_loader.dataset.close_mosaic(hyp=self.args)
    def build_optimizer(self, model, name="auto", lr=0.001, momentum=0.9, decay=1e-5, iterations=1e5):
        """
        Constructs an optimizer for the given model, based on the specified optimizer name, learning rate, momentum,
        weight decay, and number of iterations.
        Args:
            model (torch.nn.Module): The model for which to build an optimizer.
            name (str, optional): The name of the optimizer to use. If 'auto', the optimizer is selected
                based on the number of iterations. Default: 'auto'.
            lr (float, optional): The learning rate for the optimizer. Default: 0.001.
            momentum (float, optional): The momentum factor for the optimizer. Default: 0.9.
            decay (float, optional): The weight decay for the optimizer. Default: 1e-5.
            iterations (float, optional): The number of iterations, which determines the optimizer if
                name is 'auto'. Default: 1e5.
        Returns:
            (torch.optim.Optimizer): The constructed optimizer.
        """
        g = [], [], []  # optimizer parameter groups
        bn = tuple(v for k, v in nn.__dict__.items() if "Norm" in k)  # normalization layers, i.e. BatchNorm2d()
        if name == "auto":
            LOGGER.info(
                f"{colorstr('optimizer:')} 'optimizer=auto' found, "
                f"ignoring 'lr0={self.args.lr0}' and 'momentum={self.args.momentum}' and "
                f"determining best 'optimizer', 'lr0' and 'momentum' automatically... "
            )
            nc = getattr(model, "nc", 10)  # number of classes
            lr_fit = round(0.002 * 5 / (4 + nc), 6)  # lr0 fit equation to 6 decimal places
            name, lr, momentum = ("SGD", 0.01, 0.9) if iterations > 10000 else ("AdamW", lr_fit, 0.9)
            self.args.warmup_bias_lr = 0.0  # no higher than 0.01 for Adam
        for module_name, module in model.named_modules():
            for param_name, param in module.named_parameters(recurse=False):
                fullname = f"{module_name}.{param_name}" if module_name else param_name
                if "bias" in fullname:  # bias (no decay)
                    g[2].append(param)
                elif isinstance(module, bn):  # weight (no decay)
                    g[1].append(param)
                else:  # weight (with decay)
                    g[0].append(param)
        if name in ("Adam", "Adamax", "AdamW", "NAdam", "RAdam"):
            optimizer = getattr(optim, name, optim.Adam)(g[2], lr=lr, betas=(momentum, 0.999), weight_decay=0.0)
        elif name == "RMSProp":
            optimizer = optim.RMSprop(g[2], lr=lr, momentum=momentum)
        elif name == "SGD":
            optimizer = optim.SGD(g[2], lr=lr, momentum=momentum, nesterov=True)
        else:
            raise NotImplementedError(
                f"Optimizer '{name}' not found in list of available optimizers "
                f"[Adam, AdamW, NAdam, RAdam, RMSProp, SGD, auto]."
                "To request support for addition optimizers please visit https://github.com/ultralytics/ultralytics."
            )
        optimizer.add_param_group({"params": g[0], "weight_decay": decay})  # add g0 with weight_decay
        optimizer.add_param_group({"params": g[1], "weight_decay": 0.0})  # add g1 (BatchNorm2d weights)
        LOGGER.info(
            f"{colorstr('optimizer:')} {type(optimizer).__name__}(lr={lr}, momentum={momentum}) with parameter groups "
            f'{len(g[1])} weight(decay=0.0), {len(g[0])} weight(decay={decay}), {len(g[2])} bias(decay=0.0)'
        )
        return optimizer
 def plot_grads(batch, obj, i, nsamples=16):
    # Create a tensor of zeros with the same size as images
    images = batch['img'].requires_grad_(True)
    mask = torch.zeros_like(images, dtype=torch.float32)
    smask = get_dist_reg(images, batch['masks'])
    loss, loss_items = obj.model(batch)
    grad = torch.autograd.grad(loss, images, retain_graph=True)[0]
    grad = torch.abs(grad)
    batch_size = images.shape[0]
    imgsz = torch.tensor(batch['resized_shape'][0]).to(obj.device)
    targets = torch.cat((batch["batch_idx"].view(-1, 1), batch["cls"].view(-1, 1), batch["bboxes"]), 1)
    targets = v8DetectionLoss.preprocess(obj, targets=targets.to(obj.device), batch_size=batch_size, scale_tensor=imgsz[[1, 0, 1, 0]])
    gt_labels, gt_bboxes = targets.split((1, 4), 2)  # cls, xyxy
    mask_gt = gt_bboxes.sum(2, keepdim=True).gt_(0)
    # Iterate over each bounding box and set the corresponding pixels to 1
    for irx, bboxes in enumerate(gt_bboxes):
        for idx in range(len(bboxes)):
            x1, y1, x2, y2 = bboxes[idx]
            x1, y1, x2, y2 = int(torch.round(x1)), int(torch.round(y1)), int(torch.round(x2)), int(torch.round(y2))
            mask[irx, :, y1:y2, x1:x2] = 1.0
    save_imgs = True
    if save_imgs:
        # Convert tensors to numpy arrays
        images_np = images.detach().cpu().numpy().transpose(0, 2, 3, 1)
        grad_np = grad.detach().cpu().numpy().transpose(0, 2, 3, 1)
        mask_np = mask.detach().cpu().numpy().transpose(0, 2, 3, 1)
        seg_mask_np = smask.detach().cpu().numpy().transpose(0, 2, 3, 1)
        # Normalize grad for visualization
        grad_np = (grad_np - grad_np.min()) / (grad_np.max() - grad_np.min())
        range_val = min(nsamples, images_np.shape[0])
        for ix in range(range_val):
            fig, ax = plt.subplots(1, 6, figsize=(30, 5))
            ax[0].imshow(images_np[ix])
            ax[0].set_title('Image')
            ax[1].imshow(grad_np[ix], cmap='jet')
            ax[1].set_title('Gradient')
            ax[2].imshow(images_np[ix])
            ax[2].imshow(grad_np[ix], cmap='jet', alpha=0.5)
            ax[2].set_title('Overlay')
            ax[3].imshow(mask_np[ix], cmap='gray')
            ax[3].set_title('Mask')
            ax[4].imshow(seg_mask_np[ix], cmap='gray')
            ax[4].set_title('Segmentation Mask')
            # Plot image with bounding boxes
            ax[5].imshow(images_np[ix])
            for bbox, cls in zip(gt_bboxes[ix], gt_labels[ix]):
                x1, y1, x2, y2 = bbox
                x1, y1, x2, y2 = int(torch.round(x1)), int(torch.round(y1)), int(torch.round(x2)), int(torch.round(y2))
                rect = plt.Rectangle((x1, y1), x2 - x1, y2 - y1, fill=False, edgecolor=np.random.rand(3,), linewidth=2)
                ax[5].add_patch(rect)
                ax[5].text(x1, y1, f'{int(cls)}', color='white', fontsize=12, bbox=dict(facecolor='black', alpha=0.5))
            ax[5].set_title('Bounding Boxes')
            save_dir_attr = f"{obj.save_dir._str}/attributions"
            if not os.path.exists(save_dir_attr):
                os.makedirs(save_dir_attr)
            plt.savefig(f'{save_dir_attr}/debug_epoch_{obj.epoch}_batch_{i}_image_{ix}.png')
            plt.close(fig)
--- a/ultralytics/engine/pgt_validator.py
+++ b/ultralytics/engine/pgt_validator.py
@ -0,0 +1,347 @@
 # Ultralytics YOLO 🚀, AGPL-3.0 license
 """
 Check a model's accuracy on a test or val split of a dataset.
 Usage:
    $ yolo mode=val model=yolov8n.pt data=coco128.yaml imgsz=640
 Usage - formats:
    $ yolo mode=val model=yolov8n.pt                 # PyTorch
                          yolov8n.torchscript        # TorchScript
                          yolov8n.onnx               # ONNX Runtime or OpenCV DNN with dnn=True
                          yolov8n_openvino_model     # OpenVINO
                          yolov8n.engine             # TensorRT
                          yolov8n.mlpackage          # CoreML (macOS-only)
                          yolov8n_saved_model        # TensorFlow SavedModel
                          yolov8n.pb                 # TensorFlow GraphDef
                          yolov8n.tflite             # TensorFlow Lite
                          yolov8n_edgetpu.tflite     # TensorFlow Edge TPU
                          yolov8n_paddle_model       # PaddlePaddle
                          yolov8n_ncnn_model         # NCNN
 """
 import json
 import time
 from pathlib import Path
 import numpy as np
 import torch
 from ultralytics.cfg import get_cfg, get_save_dir
 from ultralytics.data.utils import check_cls_dataset, check_det_dataset
 from ultralytics.nn.autobackend import AutoBackend
 from ultralytics.utils import LOGGER, TQDM, callbacks, colorstr, emojis
 from ultralytics.utils.checks import check_imgsz
 from ultralytics.utils.ops import Profile
 from ultralytics.utils.torch_utils import de_parallel, select_device, smart_inference_mode
 class PGTValidator:
    """
    BaseValidator.
    A base class for creating validators.
    Attributes:
        args (SimpleNamespace): Configuration for the validator.
        dataloader (DataLoader): Dataloader to use for validation.
        pbar (tqdm): Progress bar to update during validation.
        model (nn.Module): Model to validate.
        data (dict): Data dictionary.
        device (torch.device): Device to use for validation.
        batch_i (int): Current batch index.
        training (bool): Whether the model is in training mode.
        names (dict): Class names.
        seen: Records the number of images seen so far during validation.
        stats: Placeholder for statistics during validation.
        confusion_matrix: Placeholder for a confusion matrix.
        nc: Number of classes.
        iouv: (torch.Tensor): IoU thresholds from 0.50 to 0.95 in spaces of 0.05.
        jdict (dict): Dictionary to store JSON validation results.
        speed (dict): Dictionary with keys 'preprocess', 'inference', 'loss', 'postprocess' and their respective
                      batch processing times in milliseconds.
        save_dir (Path): Directory to save results.
        plots (dict): Dictionary to store plots for visualization.
        callbacks (dict): Dictionary to store various callback functions.
    """
    def __init__(self, dataloader=None, save_dir=None, pbar=None, args=None, _callbacks=None):
        """
        Initializes a BaseValidator instance.
        Args:
            dataloader (torch.utils.data.DataLoader): Dataloader to be used for validation.
            save_dir (Path, optional): Directory to save results.
            pbar (tqdm.tqdm): Progress bar for displaying progress.
            args (SimpleNamespace): Configuration for the validator.
            _callbacks (dict): Dictionary to store various callback functions.
        """
        self.args = get_cfg(overrides=args)
        self.dataloader = dataloader
        self.pbar = pbar
        self.stride = None
        self.data = None
        self.device = None
        self.batch_i = None
        self.training = True
        self.names = None
        self.seen = None
        self.stats = None
        self.confusion_matrix = None
        self.nc = None
        self.iouv = None
        self.jdict = None
        self.speed = {"preprocess": 0.0, "inference": 0.0, "loss": 0.0, "postprocess": 0.0}
        self.save_dir = save_dir or get_save_dir(self.args)
        (self.save_dir / "labels" if self.args.save_txt else self.save_dir).mkdir(parents=True, exist_ok=True)
        if self.args.conf is None:
            self.args.conf = 0.001  # default conf=0.001
        self.args.imgsz = check_imgsz(self.args.imgsz, max_dim=1)
        self.plots = {}
        self.callbacks = _callbacks or callbacks.get_default_callbacks()
    # @smart_inference_mode()
    def __call__(self, trainer=None, model=None):
        """Supports validation of a pre-trained model if passed or a model being trained if trainer is passed (trainer
        gets priority).
        """
        self.training = trainer is not None
        augment = self.args.augment and (not self.training)
        if self.training:
            self.device = trainer.device
            self.data = trainer.data
            # self.args.half = self.device.type != "cpu"  # force FP16 val during training
            model = trainer.ema.ema or trainer.model
            model = model.half() if self.args.half else model.float()
            # self.model = model
            self.loss = torch.zeros_like(trainer.loss_items, device=trainer.device)
            self.args.plots &= trainer.stopper.possible_stop or (trainer.epoch == trainer.epochs - 1)
            model.eval()
        else:
            callbacks.add_integration_callbacks(self)
            model = AutoBackend(
                weights=model or self.args.model,
                device=select_device(self.args.device, self.args.batch),
                dnn=self.args.dnn,
                data=self.args.data,
                fp16=self.args.half,
            )
            # self.model = model
            self.device = model.device  # update device
            self.args.half = model.fp16  # update half
            stride, pt, jit, engine = model.stride, model.pt, model.jit, model.engine
            imgsz = check_imgsz(self.args.imgsz, stride=stride)
            if engine:
                self.args.batch = model.batch_size
            elif not pt and not jit:
                self.args.batch = 1  # export.py models default to batch-size 1
                LOGGER.info(f"Forcing batch=1 square inference (1,3,{imgsz},{imgsz}) for non-PyTorch models")
            if str(self.args.data).split(".")[-1] in ("yaml", "yml"):
                self.data = check_det_dataset(self.args.data)
            elif self.args.task == "classify":
                self.data = check_cls_dataset(self.args.data, split=self.args.split)
            else:
                raise FileNotFoundError(emojis(f"Dataset '{self.args.data}' for task={self.args.task} not found ❌"))
            if self.device.type in ("cpu", "mps"):
                self.args.workers = 0  # faster CPU val as time dominated by inference, not dataloading
            if not pt:
                self.args.rect = False
            self.stride = model.stride  # used in get_dataloader() for padding
            self.dataloader = self.dataloader or self.get_dataloader(self.data.get(self.args.split), self.args.batch)
            model.eval()
            model.warmup(imgsz=(1 if pt else self.args.batch, 3, imgsz, imgsz))  # warmup
        self.run_callbacks("on_val_start")
        dt = (
            Profile(device=self.device),
            Profile(device=self.device),
            Profile(device=self.device),
            Profile(device=self.device),
        )
        bar = TQDM(self.dataloader, desc=self.get_desc(), total=len(self.dataloader))
        self.init_metrics(de_parallel(model))
        self.jdict = []  # empty before each val
        for batch_i, batch in enumerate(bar):
            self.run_callbacks("on_val_batch_start")
            self.batch_i = batch_i
            # Preprocess
            with dt[0]:
                batch = self.preprocess(batch)
            # Inference
            with dt[1]:
                model.zero_grad()
                preds = model(batch["img"].requires_grad_(True), augment=augment)
            # Loss
            with dt[2]:
                if self.training:
                    self.loss += model.loss(batch, preds)[1]
                    model.zero_grad()
            # Postprocess
            with dt[3]:
                preds = self.postprocess(preds)
            self.update_metrics(preds, batch)
            if self.args.plots and batch_i < 3:
                self.plot_val_samples(batch, batch_i)
                self.plot_predictions(batch, preds, batch_i)
            self.run_callbacks("on_val_batch_end")
        stats = self.get_stats()
        self.check_stats(stats)
        self.speed = dict(zip(self.speed.keys(), (x.t / len(self.dataloader.dataset) * 1e3 for x in dt)))
        self.finalize_metrics()
        if not (self.args.save_json and self.is_coco and len(self.jdict)):
            self.print_results()
        self.run_callbacks("on_val_end")
        if self.training:
            model.float()
            if self.args.save_json and self.jdict:
                with open(str(self.save_dir / "predictions.json"), "w") as f:
                    LOGGER.info(f"Saving {f.name}...")
                    json.dump(self.jdict, f)  # flatten and save
                stats = self.eval_json(stats)  # update stats
                stats['fitness'] = stats['metrics/mAP50-95(B)']
            results = {**stats, **trainer.label_loss_items(self.loss.cpu() / len(self.dataloader), prefix="val")}
            return {k: round(float(v), 5) for k, v in results.items()}  # return results as 5 decimal place floats
        else:
            LOGGER.info(
                "Speed: %.1fms preprocess, %.1fms inference, %.1fms loss, %.1fms postprocess per image"
                % tuple(self.speed.values())
            )
            if self.args.save_json and self.jdict:
                with open(str(self.save_dir / "predictions.json"), "w") as f:
                    LOGGER.info(f"Saving {f.name}...")
                    json.dump(self.jdict, f)  # flatten and save
                stats = self.eval_json(stats)  # update stats
            if self.args.plots or self.args.save_json:
                LOGGER.info(f"Results saved to {colorstr('bold', self.save_dir)}")
            return stats
    def match_predictions(self, pred_classes, true_classes, iou, use_scipy=False):
        """
        Matches predictions to ground truth objects (pred_classes, true_classes) using IoU.
        Args:
            pred_classes (torch.Tensor): Predicted class indices of shape(N,).
            true_classes (torch.Tensor): Target class indices of shape(M,).
            iou (torch.Tensor): An NxM tensor containing the pairwise IoU values for predictions and ground of truth
            use_scipy (bool): Whether to use scipy for matching (more precise).
        Returns:
            (torch.Tensor): Correct tensor of shape(N,10) for 10 IoU thresholds.
        """
        # Dx10 matrix, where D - detections, 10 - IoU thresholds
        correct = np.zeros((pred_classes.shape[0], self.iouv.shape[0])).astype(bool)
        # LxD matrix where L - labels (rows), D - detections (columns)
        correct_class = true_classes[:, None] == pred_classes
        iou = iou * correct_class  # zero out the wrong classes
        iou = iou.cpu().numpy()
        for i, threshold in enumerate(self.iouv.cpu().tolist()):
            if use_scipy:
                # WARNING: known issue that reduces mAP in https://github.com/ultralytics/ultralytics/pull/4708
                import scipy  # scope import to avoid importing for all commands
                cost_matrix = iou * (iou >= threshold)
                if cost_matrix.any():
                    labels_idx, detections_idx = scipy.optimize.linear_sum_assignment(cost_matrix, maximize=True)
                    valid = cost_matrix[labels_idx, detections_idx] > 0
                    if valid.any():
                        correct[detections_idx[valid], i] = True
            else:
                matches = np.nonzero(iou >= threshold)  # IoU > threshold and classes match
                matches = np.array(matches).T
                if matches.shape[0]:
                    if matches.shape[0] > 1:
                        matches = matches[iou[matches[:, 0], matches[:, 1]].argsort()[::-1]]
                        matches = matches[np.unique(matches[:, 1], return_index=True)[1]]
                        # matches = matches[matches[:, 2].argsort()[::-1]]
                        matches = matches[np.unique(matches[:, 0], return_index=True)[1]]
                    correct[matches[:, 1].astype(int), i] = True
        return torch.tensor(correct, dtype=torch.bool, device=pred_classes.device)
    def add_callback(self, event: str, callback):
        """Appends the given callback."""
        self.callbacks[event].append(callback)
    def run_callbacks(self, event: str):
        """Runs all callbacks associated with a specified event."""
        for callback in self.callbacks.get(event, []):
            callback(self)
    def get_dataloader(self, dataset_path, batch_size):
        """Get data loader from dataset path and batch size."""
        raise NotImplementedError("get_dataloader function not implemented for this validator")
    def build_dataset(self, img_path):
        """Build dataset."""
        raise NotImplementedError("build_dataset function not implemented in validator")
    def preprocess(self, batch):
        """Preprocesses an input batch."""
        return batch
    def postprocess(self, preds):
        """Describes and summarizes the purpose of 'postprocess()' but no details mentioned."""
        return preds
    def init_metrics(self, model):
        """Initialize performance metrics for the YOLO model."""
        pass
    def update_metrics(self, preds, batch):
        """Updates metrics based on predictions and batch."""
        pass
    def finalize_metrics(self, *args, **kwargs):
        """Finalizes and returns all metrics."""
        pass
    def get_stats(self):
        """Returns statistics about the model's performance."""
        return {}
    def check_stats(self, stats):
        """Checks statistics."""
        pass
    def print_results(self):
        """Prints the results of the model's predictions."""
        pass
    def get_desc(self):
        """Get description of the YOLO model."""
        pass
    @property
    def metric_keys(self):
        """Returns the metric keys used in YOLO training/validation."""
        return []
    def on_plot(self, name, data=None):
        """Registers plots (e.g. to be consumed in callbacks)"""
        self.plots[Path(name)] = {"data": data, "timestamp": time.time()}
    # TODO: may need to put these following functions into callback
    def plot_val_samples(self, batch, ni):
        """Plots validation samples during training."""
        pass
    def plot_predictions(self, batch, preds, ni):
        """Plots YOLO model predictions on batch images."""
        pass
    def pred_to_json(self, preds, batch):
        """Convert predictions to JSON format."""
        pass
    def eval_json(self, stats):
        """Evaluate and return JSON format of prediction statistics."""
        pass
--- a/ultralytics/engine/predictor.py
+++ b/ultralytics/engine/predictor.py
@ -206,7 +206,7 @@ class BasePredictor:
        self.vid_writer = {}
    @smart_inference_mode()
-    def stream_inference(self, source=None, model=None, *args, **kwargs):
+    def stream_inference(self, source=None, model=None, return_images = False, *args, **kwargs):
        """Streams real-time inference on camera feed and saves results to file."""
        if self.args.verbose:
            LOGGER.info("")
@ -243,6 +243,9 @@ class BasePredictor:
                with profilers[0]:
                    im = self.preprocess(im0s)
                if return_images:
                    im = im.requires_grad_(True)
                # Inference
                with profilers[1]:
                    preds = self.inference(im, *args, **kwargs)
@ -272,7 +275,7 @@ class BasePredictor:
                    LOGGER.info("\n".join(s))
                self.run_callbacks("on_predict_batch_end")
-                yield from self.results
+                yield from (self.results, im)
        # Release assets
        for v in self.vid_writer.values():
--- a/ultralytics/models/init.py
+++ b/ultralytics/models/init.py
@ -3,6 +3,6 @@
 from .rtdetr import RTDETR
 from .sam import SAM
 from .yolo import YOLO, YOLOWorld
-from .yolov10 import YOLOv10
+from .yolov10 import YOLOv10, YOLOv10PGT
-__all__ = "YOLO", "RTDETR", "SAM", "YOLOWorld", "YOLOv10"  # allow simpler import
+__all__ = "YOLO", "RTDETR", "SAM", "YOLOWorld", "YOLOv10", "YOLOv10PGT"  # allow simpler import
--- a/ultralytics/models/yolo/detect/init.py
+++ b/ultralytics/models/yolo/detect/init.py
@ -1,7 +1,9 @@
 # Ultralytics YOLO 🚀, AGPL-3.0 license
 from .predict import DetectionPredictor
 from .pgt_train import PGTDetectionTrainer
 from .train import DetectionTrainer
 from .val import DetectionValidator
 from .pgt_val import PGTDetectionValidator
-__all__ = "DetectionPredictor", "DetectionTrainer", "DetectionValidator"
+__all__ = "DetectionPredictor", "DetectionTrainer", "DetectionValidator", "PGTDetectionTrainer", "PGTDetectionValidator"
--- a/ultralytics/models/yolo/detect/pgt_train.py
+++ b/ultralytics/models/yolo/detect/pgt_train.py
@ -0,0 +1,144 @@
 # Ultralytics YOLO 🚀, AGPL-3.0 license
 import math
 import random
 from copy import copy
 import numpy as np
 import torch.nn as nn
 from ultralytics.data import build_dataloader, build_yolo_dataset
 from ultralytics.engine.trainer import BaseTrainer
 from ultralytics.engine.pgt_trainer import PGTTrainer
 from ultralytics.models import yolo
 from ultralytics.nn.tasks import DetectionModel
 from ultralytics.utils import LOGGER, RANK
 from ultralytics.utils.plotting import plot_images, plot_labels, plot_results
 from ultralytics.utils.torch_utils import de_parallel, torch_distributed_zero_first
 class PGTDetectionTrainer(PGTTrainer):
    """
    A class extending the BaseTrainer class for training based on a detection model.
    Example:
        ```python
        from ultralytics.models.yolo.detect import DetectionTrainer
        args = dict(model='yolov8n.pt', data='coco8.yaml', epochs=3)
        trainer = DetectionTrainer(overrides=args)
        trainer.train()
        ```
    """
    def build_dataset(self, img_path, mode="train", batch=None):
        """
        Build YOLO Dataset.
        Args:
            img_path (str): Path to the folder containing images.
            mode (str): `train` mode or `val` mode, users are able to customize different augmentations for each mode.
            batch (int, optional): Size of batches, this is for `rect`. Defaults to None.
        """
        gs = max(int(de_parallel(self.model).stride.max() if self.model else 0), 32)
        return build_yolo_dataset(self.args, img_path, batch, self.data, mode=mode, rect=mode == "val", stride=gs)
    def get_dataloader(self, dataset_path, batch_size=16, rank=0, mode="train"):
        """Construct and return dataloader."""
        assert mode in ["train", "val"]
        with torch_distributed_zero_first(rank):  # init dataset *.cache only once if DDP
            dataset = self.build_dataset(dataset_path, mode, batch_size)
        shuffle = mode == "train"
        if getattr(dataset, "rect", False) and shuffle:
            LOGGER.warning("WARNING ⚠️ 'rect=True' is incompatible with DataLoader shuffle, setting shuffle=False")
            shuffle = False
        workers = self.args.workers if mode == "train" else self.args.workers * 2
        return build_dataloader(dataset, batch_size, workers, shuffle, rank)  # return dataloader
    def preprocess_batch(self, batch):
        """Preprocesses a batch of images by scaling and converting to float."""
        batch["img"] = batch["img"].to(self.device, non_blocking=True).float() / 255
        if self.args.multi_scale:
            imgs = batch["img"]
            sz = (
                random.randrange(self.args.imgsz * 0.5, self.args.imgsz * 1.5 + self.stride)
                // self.stride
                * self.stride
            )  # size
            sf = sz / max(imgs.shape[2:])  # scale factor
            if sf != 1:
                ns = [
                    math.ceil(x * sf / self.stride) * self.stride for x in imgs.shape[2:]
                ]  # new shape (stretched to gs-multiple)
                imgs = nn.functional.interpolate(imgs, size=ns, mode="bilinear", align_corners=False)
            batch["img"] = imgs
        return batch
    def set_model_attributes(self):
        """Nl = de_parallel(self.model).model[-1].nl  # number of detection layers (to scale hyps)."""
        # self.args.box *= 3 / nl  # scale to layers
        # self.args.cls *= self.data["nc"] / 80 * 3 / nl  # scale to classes and layers
        # self.args.cls *= (self.args.imgsz / 640) ** 2 * 3 / nl  # scale to image size and layers
        self.model.nc = self.data["nc"]  # attach number of classes to model
        self.model.names = self.data["names"]  # attach class names to model
        self.model.args = self.args  # attach hyperparameters to model
        # TODO: self.model.class_weights = labels_to_class_weights(dataset.labels, nc).to(device) * nc
    def get_model(self, cfg=None, weights=None, verbose=True):
        """Return a YOLO detection model."""
        model = DetectionModel(cfg, nc=self.data["nc"], verbose=verbose and RANK == -1)
        if weights:
            model.load(weights)
        return model
    def get_validator(self):
        """Returns a DetectionValidator for YOLO model validation."""
        self.loss_names = "box_loss", "cls_loss", "dfl_loss"
        return yolo.detect.DetectionValidator(
            self.test_loader, save_dir=self.save_dir, args=copy(self.args), _callbacks=self.callbacks
        )
    def label_loss_items(self, loss_items=None, prefix="train"):
        """
        Returns a loss dict with labelled training loss items tensor.
        Not needed for classification but necessary for segmentation & detection
        """
        keys = [f"{prefix}/{x}" for x in self.loss_names]
        if loss_items is not None:
            loss_items = [round(float(x), 5) for x in loss_items]  # convert tensors to 5 decimal place floats
            return dict(zip(keys, loss_items))
        else:
            return keys
    def progress_string(self):
        """Returns a formatted string of training progress with epoch, GPU memory, loss, instances and size."""
        return ("\n" + "%11s" * (4 + len(self.loss_names))) % (
            "Epoch",
            "GPU_mem",
            *self.loss_names,
            "Instances",
            "Size",
        )
    def plot_training_samples(self, batch, ni):
        """Plots training samples with their annotations."""
        plot_images(
            images=batch["img"],
            batch_idx=batch["batch_idx"],
            cls=batch["cls"].squeeze(-1),
            bboxes=batch["bboxes"],
            paths=batch["im_file"],
            fname=self.save_dir / f"train_batch{ni}.jpg",
            on_plot=self.on_plot,
        )
    def plot_metrics(self):
        """Plots metrics from a CSV file."""
        plot_results(file=self.csv, on_plot=self.on_plot)  # save results.png
    def plot_training_labels(self):
        """Create a labeled training plot of the YOLO model."""
        boxes = np.concatenate([lb["bboxes"] for lb in self.train_loader.dataset.labels], 0)
        cls = np.concatenate([lb["cls"] for lb in self.train_loader.dataset.labels], 0)
        plot_labels(boxes, cls.squeeze(), names=self.data["names"], save_dir=self.save_dir, on_plot=self.on_plot)
--- a/ultralytics/models/yolo/detect/pgt_val.py
+++ b/ultralytics/models/yolo/detect/pgt_val.py
@ -0,0 +1,300 @@
 # Ultralytics YOLO 🚀, AGPL-3.0 license
 import os
 from pathlib import Path
 import numpy as np
 import torch
 from ultralytics.data import build_dataloader, build_yolo_dataset, converter
 from ultralytics.engine.validator import BaseValidator
 from ultralytics.engine.pgt_validator import PGTValidator
 from ultralytics.utils import LOGGER, ops
 from ultralytics.utils.checks import check_requirements
 from ultralytics.utils.metrics import ConfusionMatrix, DetMetrics, box_iou
 from ultralytics.utils.plotting import output_to_target, plot_images
 class PGTDetectionValidator(PGTValidator):
    """
    A class extending the BaseValidator class for validation based on a detection model.
    Example:
        ```python
        from ultralytics.models.yolo.detect import DetectionValidator
        args = dict(model='yolov8n.pt', data='coco8.yaml')
        validator = DetectionValidator(args=args)
        validator()
        ```
    """
    def __init__(self, dataloader=None, save_dir=None, pbar=None, args=None, _callbacks=None):
        """Initialize detection model with necessary variables and settings."""
        super().__init__(dataloader, save_dir, pbar, args, _callbacks)
        self.nt_per_class = None
        self.is_coco = False
        self.class_map = None
        self.args.task = "detect"
        self.metrics = DetMetrics(save_dir=self.save_dir, on_plot=self.on_plot)
        self.iouv = torch.linspace(0.5, 0.95, 10)  # IoU vector for mAP@0.5:0.95
        self.niou = self.iouv.numel()
        self.lb = []  # for autolabelling
    def preprocess(self, batch):
        """Preprocesses batch of images for YOLO training."""
        batch["img"] = batch["img"].to(self.device, non_blocking=True)
        batch["img"] = (batch["img"].half() if self.args.half else batch["img"].float()) / 255
        for k in ["batch_idx", "cls", "bboxes"]:
            batch[k] = batch[k].to(self.device)
        if self.args.save_hybrid:
            height, width = batch["img"].shape[2:]
            nb = len(batch["img"])
            bboxes = batch["bboxes"] * torch.tensor((width, height, width, height), device=self.device)
            self.lb = (
                [
                    torch.cat([batch["cls"][batch["batch_idx"] == i], bboxes[batch["batch_idx"] == i]], dim=-1)
                    for i in range(nb)
                ]
                if self.args.save_hybrid
                else []
            )  # for autolabelling
        return batch
    def init_metrics(self, model):
        """Initialize evaluation metrics for YOLO."""
        val = self.data.get(self.args.split, "")  # validation path
        self.is_coco = isinstance(val, str) and "coco" in val and val.endswith(f"{os.sep}val2017.txt")  # is COCO
        self.class_map = converter.coco80_to_coco91_class() if self.is_coco else list(range(1000))
        self.args.save_json |= self.is_coco  # run on final val if training COCO
        self.names = model.names
        self.nc = len(model.names)
        self.metrics.names = self.names
        self.metrics.plot = self.args.plots
        self.confusion_matrix = ConfusionMatrix(nc=self.nc, conf=self.args.conf)
        self.seen = 0
        self.jdict = []
        self.stats = dict(tp=[], conf=[], pred_cls=[], target_cls=[])
    def get_desc(self):
        """Return a formatted string summarizing class metrics of YOLO model."""
        return ("%22s" + "%11s" * 6) % ("Class", "Images", "Instances", "Box(P", "R", "mAP50", "mAP50-95)")
    def postprocess(self, preds):
        """Apply Non-maximum suppression to prediction outputs."""
        return ops.non_max_suppression(
            preds,
            self.args.conf,
            self.args.iou,
            labels=self.lb,
            multi_label=True,
            agnostic=self.args.single_cls,
            max_det=self.args.max_det,
        )
    def _prepare_batch(self, si, batch):
        """Prepares a batch of images and annotations for validation."""
        idx = batch["batch_idx"] == si
        cls = batch["cls"][idx].squeeze(-1)
        bbox = batch["bboxes"][idx]
        ori_shape = batch["ori_shape"][si]
        imgsz = batch["img"].shape[2:]
        ratio_pad = batch["ratio_pad"][si]
        if len(cls):
            bbox = ops.xywh2xyxy(bbox) * torch.tensor(imgsz, device=self.device)[[1, 0, 1, 0]]  # target boxes
            ops.scale_boxes(imgsz, bbox, ori_shape, ratio_pad=ratio_pad)  # native-space labels
        return dict(cls=cls, bbox=bbox, ori_shape=ori_shape, imgsz=imgsz, ratio_pad=ratio_pad)
    def _prepare_pred(self, pred, pbatch):
        """Prepares a batch of images and annotations for validation."""
        predn = pred.clone()
        ops.scale_boxes(
            pbatch["imgsz"], predn[:, :4], pbatch["ori_shape"], ratio_pad=pbatch["ratio_pad"]
        )  # native-space pred
        return predn
    def update_metrics(self, preds, batch):
        """Metrics."""
        for si, pred in enumerate(preds):
            self.seen += 1
            npr = len(pred)
            stat = dict(
                conf=torch.zeros(0, device=self.device),
                pred_cls=torch.zeros(0, device=self.device),
                tp=torch.zeros(npr, self.niou, dtype=torch.bool, device=self.device),
            )
            pbatch = self._prepare_batch(si, batch)
            cls, bbox = pbatch.pop("cls"), pbatch.pop("bbox")
            nl = len(cls)
            stat["target_cls"] = cls
            if npr == 0:
                if nl:
                    for k in self.stats.keys():
                        self.stats[k].append(stat[k])
                    if self.args.plots:
                        self.confusion_matrix.process_batch(detections=None, gt_bboxes=bbox, gt_cls=cls)
                continue
            # Predictions
            if self.args.single_cls:
                pred[:, 5] = 0
            predn = self._prepare_pred(pred, pbatch)
            stat["conf"] = predn[:, 4]
            stat["pred_cls"] = predn[:, 5]
            # Evaluate
            if nl:
                stat["tp"] = self._process_batch(predn, bbox, cls)
                if self.args.plots:
                    self.confusion_matrix.process_batch(predn, bbox, cls)
            for k in self.stats.keys():
                self.stats[k].append(stat[k])
            # Save
            if self.args.save_json:
                self.pred_to_json(predn, batch["im_file"][si])
            if self.args.save_txt:
                file = self.save_dir / "labels" / f'{Path(batch["im_file"][si]).stem}.txt'
                self.save_one_txt(predn, self.args.save_conf, pbatch["ori_shape"], file)
    def finalize_metrics(self, *args, **kwargs):
        """Set final values for metrics speed and confusion matrix."""
        self.metrics.speed = self.speed
        self.metrics.confusion_matrix = self.confusion_matrix
    def get_stats(self):
        """Returns metrics statistics and results dictionary."""
        stats = {k: torch.cat(v, 0).cpu().numpy() for k, v in self.stats.items()}  # to numpy
        if len(stats) and stats["tp"].any():
            self.metrics.process(**stats)
        self.nt_per_class = np.bincount(
            stats["target_cls"].astype(int), minlength=self.nc
        )  # number of targets per class
        return self.metrics.results_dict
    def print_results(self):
        """Prints training/validation set metrics per class."""
        pf = "%22s" + "%11i" * 2 + "%11.3g" * len(self.metrics.keys)  # print format
        LOGGER.info(pf % ("all", self.seen, self.nt_per_class.sum(), *self.metrics.mean_results()))
        if self.nt_per_class.sum() == 0:
            LOGGER.warning(f"WARNING ⚠️ no labels found in {self.args.task} set, can not compute metrics without labels")
        # Print results per class
        if self.args.verbose and not self.training and self.nc > 1 and len(self.stats):
            for i, c in enumerate(self.metrics.ap_class_index):
                LOGGER.info(pf % (self.names[c], self.seen, self.nt_per_class[c], *self.metrics.class_result(i)))
        if self.args.plots:
            for normalize in True, False:
                self.confusion_matrix.plot(
                    save_dir=self.save_dir, names=self.names.values(), normalize=normalize, on_plot=self.on_plot
                )
    def _process_batch(self, detections, gt_bboxes, gt_cls):
        """
        Return correct prediction matrix.
        Args:
            detections (torch.Tensor): Tensor of shape [N, 6] representing detections.
                Each detection is of the format: x1, y1, x2, y2, conf, class.
            labels (torch.Tensor): Tensor of shape [M, 5] representing labels.
                Each label is of the format: class, x1, y1, x2, y2.
        Returns:
            (torch.Tensor): Correct prediction matrix of shape [N, 10] for 10 IoU levels.
        """
        iou = box_iou(gt_bboxes, detections[:, :4])
        return self.match_predictions(detections[:, 5], gt_cls, iou)
    def build_dataset(self, img_path, mode="val", batch=None):
        """
        Build YOLO Dataset.
        Args:
            img_path (str): Path to the folder containing images.
            mode (str): `train` mode or `val` mode, users are able to customize different augmentations for each mode.
            batch (int, optional): Size of batches, this is for `rect`. Defaults to None.
        """
        return build_yolo_dataset(self.args, img_path, batch, self.data, mode=mode, stride=self.stride)
    def get_dataloader(self, dataset_path, batch_size):
        """Construct and return dataloader."""
        dataset = self.build_dataset(dataset_path, batch=batch_size, mode="val")
        return build_dataloader(dataset, batch_size, self.args.workers, shuffle=False, rank=-1)  # return dataloader
    def plot_val_samples(self, batch, ni):
        """Plot validation image samples."""
        plot_images(
            batch["img"],
            batch["batch_idx"],
            batch["cls"].squeeze(-1),
            batch["bboxes"],
            paths=batch["im_file"],
            fname=self.save_dir / f"val_batch{ni}_labels.jpg",
            names=self.names,
            on_plot=self.on_plot,
        )
    def plot_predictions(self, batch, preds, ni):
        """Plots predicted bounding boxes on input images and saves the result."""
        plot_images(
            batch["img"],
            *output_to_target(preds, max_det=self.args.max_det),
            paths=batch["im_file"],
            fname=self.save_dir / f"val_batch{ni}_pred.jpg",
            names=self.names,
            on_plot=self.on_plot,
        )  # pred
    def save_one_txt(self, predn, save_conf, shape, file):
        """Save YOLO detections to a txt file in normalized coordinates in a specific format."""
        gn = torch.tensor(shape)[[1, 0, 1, 0]]  # normalization gain whwh
        for *xyxy, conf, cls in predn.tolist():
            xywh = (ops.xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist()  # normalized xywh
            line = (cls, *xywh, conf) if save_conf else (cls, *xywh)  # label format
            with open(file, "a") as f:
                f.write(("%g " * len(line)).rstrip() % line + "\n")
    def pred_to_json(self, predn, filename):
        """Serialize YOLO predictions to COCO json format."""
        stem = Path(filename).stem
        image_id = int(stem) if stem.isnumeric() else stem
        box = ops.xyxy2xywh(predn[:, :4])  # xywh
        box[:, :2] -= box[:, 2:] / 2  # xy center to top-left corner
        for p, b in zip(predn.tolist(), box.tolist()):
            self.jdict.append(
                {
                    "image_id": image_id,
                    "category_id": self.class_map[int(p[5])],
                    "bbox": [round(x, 3) for x in b],
                    "score": round(p[4], 5),
                }
            )
    def eval_json(self, stats):
        """Evaluates YOLO output in JSON format and returns performance statistics."""
        if self.args.save_json and self.is_coco and len(self.jdict):
            anno_json = self.data["path"] / "annotations/instances_val2017.json"  # annotations
            pred_json = self.save_dir / "predictions.json"  # predictions
            LOGGER.info(f"\nEvaluating pycocotools mAP using {pred_json} and {anno_json}...")
            try:  # https://github.com/cocodataset/cocoapi/blob/master/PythonAPI/pycocoEvalDemo.ipynb
                check_requirements("pycocotools>=2.0.6")
                from pycocotools.coco import COCO  # noqa
                from pycocotools.cocoeval import COCOeval  # noqa
                for x in anno_json, pred_json:
                    assert x.is_file(), f"{x} file not found"
                anno = COCO(str(anno_json))  # init annotations api
                pred = anno.loadRes(str(pred_json))  # init predictions api (must pass string, not Path)
                eval = COCOeval(anno, pred, "bbox")
                if self.is_coco:
                    eval.params.imgIds = [int(Path(x).stem) for x in self.dataloader.dataset.im_files]  # images to eval
                eval.evaluate()
                eval.accumulate()
                eval.summarize()
                stats[self.metrics.keys[-1]], stats[self.metrics.keys[-2]] = eval.stats[:2]  # update mAP50-95 and mAP50
            except Exception as e:
                LOGGER.warning(f"pycocotools unable to run: {e}")
        return stats
--- a/ultralytics/models/yolo/segment/init.py
+++ b/ultralytics/models/yolo/segment/init.py
@ -3,5 +3,6 @@
 from .predict import SegmentationPredictor
 from .train import SegmentationTrainer
 from .val import SegmentationValidator
 from .pgt_train import PGTSegmentationTrainer
-__all__ = "SegmentationPredictor", "SegmentationTrainer", "SegmentationValidator"
+__all__ = "SegmentationPredictor", "SegmentationTrainer", "SegmentationValidator", "PGTSegmentationTrainer"
--- a/ultralytics/models/yolo/segment/pgt_train.py
+++ b/ultralytics/models/yolo/segment/pgt_train.py
@ -0,0 +1,74 @@
 # Ultralytics YOLO 🚀, AGPL-3.0 license
 from copy import copy
 from ultralytics.models import yolo
 from ultralytics.nn.tasks import SegmentationModel, DetectionModel
 from ultralytics.utils import DEFAULT_CFG, RANK
 # from ultralytics.utils import yaml_load, IterableSimpleNamespace, ROOT
 from ultralytics.utils.plotting import plot_images, plot_results
 from ultralytics.models.yolov10.model import YOLOv10PGTDetectionModel
 from ultralytics.models.yolov10.val import YOLOv10PGTDetectionValidator
 # # Default configuration
 # DEFAULT_CFG_DICT = yaml_load(ROOT / "cfg/pgt_train.yaml")
 # for k, v in DEFAULT_CFG_DICT.items():
 #     if isinstance(v, str) and v.lower() == "none":
 #         DEFAULT_CFG_DICT[k] = None
 # DEFAULT_CFG_KEYS = DEFAULT_CFG_DICT.keys()
 # DEFAULT_CFG = IterableSimpleNamespace(**DEFAULT_CFG_DICT)
 class PGTSegmentationTrainer(yolo.detect.PGTDetectionTrainer):
    """
    A class extending the DetectionTrainer class for training based on a segmentation model.
    Example:
        ```python
        from ultralytics.models.yolo.segment import SegmentationTrainer
        args = dict(model='yolov8n-seg.pt', data='coco8-seg.yaml', epochs=3)
        trainer = SegmentationTrainer(overrides=args)
        trainer.train()
        ```
    """
    def __init__(self, cfg=DEFAULT_CFG, overrides=None, _callbacks=None):
        """Initialize a SegmentationTrainer object with given arguments."""
        if overrides is None:
            overrides = {}
        overrides["task"] = "segment"
        super().__init__(cfg, overrides, _callbacks)
    def get_model(self, cfg=None, weights=None, verbose=True):
        """Return SegmentationModel initialized with specified config and weights."""
        model = YOLOv10PGTDetectionModel(cfg, nc=self.data["nc"], verbose=verbose and RANK == -1)
        if weights:
            model.load(weights)
        return model
    def get_validator(self):
        """Return an instance of SegmentationValidator for validation of YOLO model."""
        self.loss_names = "box_om", "cls_om", "dfl_om", "box_oo", "cls_oo", "dfl_oo", "pgt_loss",
        return YOLOv10PGTDetectionValidator(
            self.test_loader, save_dir=self.save_dir, args=copy(self.args), _callbacks=self.callbacks
        )
    def plot_training_samples(self, batch, ni):
        """Creates a plot of training sample images with labels and box coordinates."""
        plot_images(
            batch["img"],
            batch["batch_idx"],
            batch["cls"].squeeze(-1),
            batch["bboxes"],
            masks=batch["masks"],
            paths=batch["im_file"],
            fname=self.save_dir / f"train_batch{ni}.jpg",
            on_plot=self.on_plot,
        )
    def plot_metrics(self):
        """Plots training/val metrics."""
        plot_results(file=self.csv, segment=True, on_plot=self.on_plot)  # save results.png
--- a/ultralytics/models/yolov10/init.py
+++ b/ultralytics/models/yolov10/init.py
@ -1,5 +1,5 @@
-from .model import YOLOv10
+from .model import YOLOv10, YOLOv10PGT
 from .predict import YOLOv10DetectionPredictor
 from .val import YOLOv10DetectionValidator
-__all__ = "YOLOv10DetectionPredictor", "YOLOv10DetectionValidator", "YOLOv10"
+__all__ = "YOLOv10DetectionPredictor", "YOLOv10DetectionValidator", "YOLOv10", "YOLOv10PGT"
--- a/ultralytics/models/yolov10/model.py
+++ b/ultralytics/models/yolov10/model.py
@ -1,8 +1,11 @@
 from ultralytics.engine.model import Model
-from ultralytics.nn.tasks import YOLOv10DetectionModel
+from ultralytics.nn.tasks import YOLOv10DetectionModel, YOLOv10PGTDetectionModel
 from .val import YOLOv10DetectionValidator
 from .predict import YOLOv10DetectionPredictor
 from .train import YOLOv10DetectionTrainer
 from .pgt_train import YOLOv10PGTDetectionTrainer
 # from ..yolo.segment import PGTSegmentationTrainer
 # from .pgt_trainer import YOLOv10DetectionTrainer
 from huggingface_hub import PyTorchModelHubMixin
 from .card import card_template_text
@ -30,6 +33,39 @@ class YOLOv10(Model, PyTorchModelHubMixin, model_card_template=card_template_tex
            "detect": {
                "model": YOLOv10DetectionModel,
                "trainer": YOLOv10DetectionTrainer,
                "pgt_trainer": YOLOv10PGTDetectionTrainer,
                "validator": YOLOv10DetectionValidator,
                "predictor": YOLOv10DetectionPredictor,
            },
        }
 def _get_pgt_segmentation_trainer():
    from ..yolo.segment import PGTSegmentationTrainer
    return PGTSegmentationTrainer
 class YOLOv10PGT(Model, PyTorchModelHubMixin, model_card_template=card_template_text):
    def __init__(self, model="yolov10n.pt", task=None, verbose=False, 
                 names=None):
        super().__init__(model=model, task=task, verbose=verbose)
        if names is not None:
            setattr(self.model, 'names', names)
    def push_to_hub(self, repo_name, **kwargs):
        config = kwargs.get('config', {})
        config['names'] = self.names
        config['model'] = self.model.yaml['yaml_file']
        config['task'] = self.task
        kwargs['config'] = config
        super().push_to_hub(repo_name, **kwargs)
    @property
    def task_map(self):
        """Map head to model, trainer, validator, and predictor classes."""
        return {
            "detect": {
                "model": YOLOv10DetectionModel,
                "trainer": _get_pgt_segmentation_trainer(),
                "validator": YOLOv10DetectionValidator,
                "predictor": YOLOv10DetectionPredictor,
            },
--- a/ultralytics/models/yolov10/pgt_train.py
+++ b/ultralytics/models/yolov10/pgt_train.py
@ -0,0 +1,21 @@
 from ultralytics.models.yolo.detect import DetectionTrainer
 from ultralytics.models.yolo.detect import PGTDetectionTrainer
 from .val import YOLOv10DetectionValidator
 from .model import YOLOv10DetectionModel
 from copy import copy
 from ultralytics.utils import RANK
 class YOLOv10PGTDetectionTrainer(PGTDetectionTrainer):
    def get_validator(self):
        """Returns a DetectionValidator for YOLO model validation."""
        self.loss_names = "box_om", "cls_om", "dfl_om", "box_oo", "cls_oo", "dfl_oo", 
        return YOLOv10DetectionValidator(
            self.test_loader, save_dir=self.save_dir, args=copy(self.args), _callbacks=self.callbacks
        )
    def get_model(self, cfg=None, weights=None, verbose=True):
        """Return a YOLO detection model."""
        model = YOLOv10DetectionModel(cfg, nc=self.data["nc"], verbose=verbose and RANK == -1)
        if weights:
            model.load(weights)
        return model
--- a/ultralytics/models/yolov10/val.py
+++ b/ultralytics/models/yolov10/val.py
@ -1,4 +1,4 @@
-from ultralytics.models.yolo.detect import DetectionValidator
+from ultralytics.models.yolo.detect import DetectionValidator, PGTDetectionValidator
 from ultralytics.utils import ops
 import torch
@ -22,3 +22,24 @@ class YOLOv10DetectionValidator(DetectionValidator):
            boxes, scores, labels = ops.v10postprocess(preds, self.args.max_det, self.nc)
            bboxes = ops.xywh2xyxy(boxes)
            return torch.cat([bboxes, scores.unsqueeze(-1), labels.unsqueeze(-1)], dim=-1)
 class YOLOv10PGTDetectionValidator(PGTDetectionValidator):
    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)
        self.args.save_json |= self.is_coco
    def postprocess(self, preds):
        if isinstance(preds, dict):
            preds = preds["one2one"]
        if isinstance(preds, (list, tuple)):
            preds = preds[0]
        # Acknowledgement: Thanks to sanha9999 in #190 and #181!
        if preds.shape[-1] == 6:
            return preds
        else:
            preds = preds.transpose(-1, -2)
            boxes, scores, labels = ops.v10postprocess(preds, self.args.max_det, self.nc)
            bboxes = ops.xywh2xyxy(boxes)
            return torch.cat([bboxes, scores.unsqueeze(-1), labels.unsqueeze(-1)], dim=-1)
--- a/ultralytics/nn/tasks.py
+++ b/ultralytics/nn/tasks.py
@ -57,7 +57,7 @@ from ultralytics.nn.modules import (
 )
 from ultralytics.utils import DEFAULT_CFG_DICT, DEFAULT_CFG_KEYS, LOGGER, colorstr, emojis, yaml_load
 from ultralytics.utils.checks import check_requirements, check_suffix, check_yaml
-from ultralytics.utils.loss import v8ClassificationLoss, v8DetectionLoss, v8OBBLoss, v8PoseLoss, v8SegmentationLoss, v10DetectLoss
+from ultralytics.utils.loss import v8ClassificationLoss, v8DetectionLoss, v8OBBLoss, v8PoseLoss, v8SegmentationLoss, v10DetectLoss, v10PGTDetectLoss
 from ultralytics.utils.plotting import feature_visualization
 from ultralytics.utils.torch_utils import (
    fuse_conv_and_bn,
@ -93,7 +93,7 @@ class BaseModel(nn.Module):
            return self.loss(x, *args, **kwargs)
        return self.predict(x, *args, **kwargs)
-    def predict(self, x, profile=False, visualize=False, augment=False, embed=None):
+    def predict(self, x, profile=False, visualize=False, augment=False, embed=None, return_images=False):
        """
        Perform a forward pass through the network.
@ -107,9 +107,12 @@ class BaseModel(nn.Module):
        Returns:
            (torch.Tensor): The last output of the model.
        """
        if return_images:
            x = x.requires_grad_(True)
        if augment:
            return self._predict_augment(x)
-        return self._predict_once(x, profile, visualize, embed)
+        out = self._predict_once(x, profile, visualize, embed)
        return (out, x) if return_images else out
    def _predict_once(self, x, profile=False, visualize=False, embed=None):
        """
@ -140,13 +143,13 @@ class BaseModel(nn.Module):
                    return torch.unbind(torch.cat(embeddings, 1), dim=0)
        return x
-    def _predict_augment(self, x):
+    def _predict_augment(self, x, *args, **kwargs):
        """Perform augmentations on input image x and return augmented inference."""
        LOGGER.warning(
            f"WARNING ⚠️ {self.__class__.__name__} does not support augmented inference yet. "
            f"Reverting to single-scale inference instead."
        )
-        return self._predict_once(x)
+        return self._predict_once(x, *args, **kwargs)
    def _profile_one_layer(self, m, x, dt):
        """
@ -260,7 +263,7 @@ class BaseModel(nn.Module):
        if verbose:
            LOGGER.info(f"Transferred {len(csd)}/{len(self.model.state_dict())} items from pretrained weights")
-    def loss(self, batch, preds=None):
+    def loss(self, batch, preds=None, return_images=False):
        """
        Compute loss.
@ -271,8 +274,12 @@ class BaseModel(nn.Module):
        if not hasattr(self, "criterion"):
            self.criterion = self.init_criterion()
-        preds = self.forward(batch["img"]) if preds is None else preds
+        preds = self.forward(batch["img"], return_images=return_images) if preds is None else preds
-        return self.criterion(preds, batch)
+        if return_images:
            preds, im = preds
        loss = self.criterion(preds, batch)
        out = loss if not return_images else (loss, im)
        return out
    def init_criterion(self):
        """Initialize the loss criterion for the BaseModel."""
@ -645,6 +652,10 @@ class YOLOv10DetectionModel(DetectionModel):
    def init_criterion(self):
        return v10DetectLoss(self)
 class YOLOv10PGTDetectionModel(DetectionModel):
    def init_criterion(self):
        return v10PGTDetectLoss(self, pgt_coeff=self.args.pgt_coeff if hasattr(self.args, 'pgt_coeff') else None)
 class Ensemble(nn.ModuleList):
    """Ensemble of models."""
--- a/ultralytics/utils/loss.py
+++ b/ultralytics/utils/loss.py
@ -9,7 +9,7 @@ from ultralytics.utils.ops import crop_mask, xywh2xyxy, xyxy2xywh
 from ultralytics.utils.tal import RotatedTaskAlignedAssigner, TaskAlignedAssigner, dist2bbox, dist2rbox, make_anchors
 from .metrics import bbox_iou, probiou
 from .tal import bbox2dist
-
+from ultralytics.utils.plaus_functs import get_dist_reg, plaus_loss_fn
 class VarifocalLoss(nn.Module):
    """
@ -725,3 +725,46 @@ class v10DetectLoss:
        one2one = preds["one2one"]
        loss_one2one = self.one2one(one2one, batch)
        return loss_one2many[0] + loss_one2one[0], torch.cat((loss_one2many[1], loss_one2one[1]))
 class v10PGTDetectLoss:
    def __init__(self, model, pgt_coeff):
        self.one2many = v8DetectionLoss(model, tal_topk=10)
        self.one2one = v8DetectionLoss(model, tal_topk=1)
        self.pgt_coeff = pgt_coeff if pgt_coeff is not None else 2.0
    def __call__(self, preds, batch, return_plaus=True, pgt_coeff=None):
        if pgt_coeff is not None:
            self.pgt_coeff = pgt_coeff
        batch['img'] = batch['img'].requires_grad_(True)
        one2many = preds["one2many"]
        loss_one2many = self.one2many(one2many, batch)
        one2one = preds["one2one"]
        loss_one2one = self.one2one(one2one, batch)
        loss = loss_one2many[0] + loss_one2one[0]
        if return_plaus:
            smask = get_dist_reg(batch['img'], batch['masks'])#.requires_grad_(True)
            try:
                grad = torch.autograd.grad(loss, batch['img'], 
                                           retain_graph=True, 
                                           create_graph=True,
                                           )[0]
            except:
                grad = torch.autograd.grad(loss, batch['img'], 
                                           retain_graph=True, 
                                           create_graph=False,
                                           )[0]        
            grad = grad ** 2
            plaus_loss = plaus_loss_fn(grad, smask, self.pgt_coeff)
            # self.loss_items = torch.cat((self.loss_items, plaus_loss.unsqueeze(0)))
            loss += plaus_loss
            return loss, torch.cat((loss_one2many[1], loss_one2one[1], plaus_loss.unsqueeze(0)))
        else:
            return loss, torch.cat((loss_one2many[1], loss_one2one[1]))
--- a/ultralytics/utils/plaus_functs.py
+++ b/ultralytics/utils/plaus_functs.py
@ -0,0 +1,844 @@
 import torch
 import numpy as np
 # from plot_functs import * 
 from .plot_functs import normalize_tensor, overlay_mask, imshow
 import math   
 import time
 import matplotlib.path as mplPath
 from matplotlib.path import Path
 # from utils.general import non_max_suppression, xyxy2xywh, scale_coords
 from ultralytics.utils.ops import crop_mask, xywh2xyxy, xyxy2xywh, non_max_suppression
 from .metrics import bbox_iou
 import torchvision.transforms as T
 def plaus_loss_fn(grad, smask, pgt_coeff, square=True):
    ################## Compute the PGT Loss ##################
    # Positive regularization term for incentivizing pixels near the target to have high attribution
    dist_attr_pos = attr_reg(grad, (1.0 - smask)) # dist_reg = seg_mask
    # Negative regularization term for incentivizing pixels far from the target to have low attribution
    dist_attr_neg = attr_reg(grad, smask)
    # Calculate plausibility regularization term
    # dist_reg = dist_attr_pos - dist_attr_neg
    dist_reg = ((dist_attr_pos / torch.mean(grad)) - (dist_attr_neg / torch.mean(grad)))
    plaus_reg = (((1.0 + dist_reg) / 2.0))
    # Calculate plausibility loss
    plaus_loss = ((1 - plaus_reg) ** 2 if square else (1 - plaus_reg)) * pgt_coeff
    return plaus_loss
 def get_dist_reg(images, seg_mask):
    seg_mask = T.Resize((images.shape[2], images.shape[3]), antialias=True)(seg_mask).to(images.device)
    seg_mask = seg_mask.to(dtype=torch.float32).unsqueeze(1).repeat(1, 3, 1, 1)
    seg_mask[seg_mask > 0] = 1.0
    smask = torch.zeros_like(seg_mask)
    sigmas = [20.0 + (i_sig * 20.0) for i_sig in range(8)]
    for k_it, sigma in enumerate(sigmas):
        # Apply Gaussian blur to the mask
        kernel_size = int(sigma + 50)
        if kernel_size % 2 == 0:
            kernel_size += 1
        seg_mask1 = T.GaussianBlur(kernel_size=(kernel_size, kernel_size), sigma=sigma)(seg_mask)
        if torch.max(seg_mask1) > 1.0:
            # seg_mask1 = (seg_mask1 - seg_mask1.min()) / (seg_mask1.max() - seg_mask1.min())
            seg_mask1 = normalize_tensor(seg_mask1)
        smask = torch.max(smask, seg_mask1)
    smask = normalize_tensor(smask)
    return smask
 def get_gradient(img, grad_wrt, norm=False, absolute=True, grayscale=False, keepmean=False):
    """
    Compute the gradient of an image with respect to a given tensor.
    Args:
        img (torch.Tensor): The input image tensor.
        grad_wrt (torch.Tensor): The tensor with respect to which the gradient is computed.
        norm (bool, optional): Whether to normalize the gradient. Defaults to True.
        absolute (bool, optional): Whether to take the absolute values of the gradients. Defaults to True.
        grayscale (bool, optional): Whether to convert the gradient to grayscale. Defaults to True.
        keepmean (bool, optional): Whether to keep the mean value of the attribution map. Defaults to False.
    Returns:
        torch.Tensor: The computed attribution map.
    """
    if (grad_wrt.shape != torch.Size([1])) and (grad_wrt.shape != torch.Size([])):
        grad_wrt_outputs = torch.ones_like(grad_wrt).clone().detach()#.requires_grad_(True)#.retains_grad_(True)
    else:
        grad_wrt_outputs = None
    attribution_map = torch.autograd.grad(grad_wrt, img, 
                                    grad_outputs=grad_wrt_outputs, 
                                    create_graph=True, # Create graph to allow for higher order derivatives but slows down computation significantly
                                    )[0]
    if absolute:
        attribution_map = torch.abs(attribution_map) # attribution_map ** 2 # Take absolute values of gradients
    if grayscale: # Convert to grayscale, saves vram and computation time for plaus_eval
        attribution_map = torch.sum(attribution_map, 1, keepdim=True)
    if norm:
        if keepmean:
            attmean = torch.mean(attribution_map)
            attmin = torch.min(attribution_map)
            attmax = torch.max(attribution_map)
        attribution_map = normalize_batch(attribution_map) # Normalize attribution maps per image in batch
        if keepmean:
            attribution_map -= attribution_map.mean()
            attribution_map += (attmean / (attmax - attmin))
    return attribution_map
 def get_gaussian(img, grad_wrt, norm=True, absolute=True, grayscale=True, keepmean=False):
    """
    Generate Gaussian noise based on the input image.
    Args:
        img (torch.Tensor): Input image.
        grad_wrt: Gradient with respect to the input image.
        norm (bool, optional): Whether to normalize the generated noise. Defaults to True.
        absolute (bool, optional): Whether to take the absolute values of the gradients. Defaults to True.
        grayscale (bool, optional): Whether to convert the noise to grayscale. Defaults to True.
        keepmean (bool, optional): Whether to keep the mean of the noise. Defaults to False.
    Returns:
        torch.Tensor: Generated Gaussian noise.
    """
    gaussian_noise = torch.randn_like(img)
    if absolute:
        gaussian_noise = torch.abs(gaussian_noise) # Take absolute values of gradients
    if grayscale: # Convert to grayscale, saves vram and computation time for plaus_eval
        gaussian_noise = torch.sum(gaussian_noise, 1, keepdim=True)
    if norm:
        if keepmean:
            attmean = torch.mean(gaussian_noise)
            attmin = torch.min(gaussian_noise)
            attmax = torch.max(gaussian_noise)
        gaussian_noise = normalize_batch(gaussian_noise) # Normalize attribution maps per image in batch
        if keepmean:
            gaussian_noise -= gaussian_noise.mean()
            gaussian_noise += (attmean / (attmax - attmin))
    return gaussian_noise
 def get_plaus_score(targets_out, attr, debug=False, corners=False, imgs=None, eps = 1e-7):
    # TODO: Remove imgs from this function and only take it as input if debug is True
    """
    Calculates the plausibility score based on the given inputs.
    Args:
        imgs (torch.Tensor): The input images.
        targets_out (torch.Tensor): The output targets.
        attr (torch.Tensor): The attribute tensor.
        debug (bool, optional): Whether to enable debug mode. Defaults to False.
    Returns:
        torch.Tensor: The plausibility score.
    """
    # # if imgs is None:
    # #     imgs = torch.zeros_like(attr)
    # # with torch.no_grad():
    # target_inds = targets_out[:, 0].int()
    # xyxy_batch = targets_out[:, 2:6]# * pre_gen_gains[out_num]
    # num_pixels = torch.tile(torch.tensor([attr.shape[2], attr.shape[3], attr.shape[2], attr.shape[3]], device=attr.device), (xyxy_batch.shape[0], 1))
    # # num_pixels = torch.tile(torch.tensor([1.0, 1.0, 1.0, 1.0], device=imgs.device), (xyxy_batch.shape[0], 1))
    # xyxy_corners = (corners_coords_batch(xyxy_batch) * num_pixels).int()
    # co = xyxy_corners
    # if corners:
    #     co = targets_out[:, 2:6].int()
    # coords_map = torch.zeros_like(attr, dtype=torch.bool)
    # # rows = np.arange(co.shape[0])
    # x1, x2 = co[:,1], co[:,3]
    # y1, y2 = co[:,0], co[:,2]
    # for ic in range(co.shape[0]): # potential for speedup here with torch indexing instead of for loop
    #     coords_map[target_inds[ic], :,x1[ic]:x2[ic],y1[ic]:y2[ic]] = True
    if torch.isnan(attr).any():
        attr = torch.nan_to_num(attr, nan=0.0)
    coords_map = get_bbox_map(targets_out, attr)
    plaus_score = ((torch.sum((attr * coords_map))) / (torch.sum(attr)))
    if debug:
        for i in range(len(coords_map)):
            coords_map3ch = torch.cat([coords_map[i][:1], coords_map[i][:1], coords_map[i][:1]], dim=0)
            test_bbox = torch.zeros_like(imgs[i])
            test_bbox[coords_map3ch] = imgs[i][coords_map3ch]
            imshow(test_bbox, save_path='figs/test_bbox')
            if imgs is None:
                imgs = torch.zeros_like(attr)
            imshow(imgs[i], save_path='figs/im0')
            imshow(attr[i], save_path='figs/attr')
    # with torch.no_grad():
    # # att_select = attr[coords_map]
    # att_select = attr * coords_map.to(torch.float32)
    # att_total = attr
    # IoU_num = torch.sum(att_select)
    # IoU_denom = torch.sum(att_total)
    # IoU_ = (IoU_num / IoU_denom)
    # plaus_score = IoU_
    # # plaus_score = ((torch.sum(attr[coords_map])) / (torch.sum(attr)))
    return plaus_score
 def get_attr_corners(targets_out, attr, debug=False, corners=False, imgs=None, eps = 1e-7):
    # TODO: Remove imgs from this function and only take it as input if debug is True
    """
    Calculates the plausibility score based on the given inputs.
    Args:
        imgs (torch.Tensor): The input images.
        targets_out (torch.Tensor): The output targets.
        attr (torch.Tensor): The attribute tensor.
        debug (bool, optional): Whether to enable debug mode. Defaults to False.
    Returns:
        torch.Tensor: The plausibility score.
    """
    # if imgs is None:
    #     imgs = torch.zeros_like(attr)
    # with torch.no_grad():
    target_inds = targets_out[:, 0].int()
    xyxy_batch = targets_out[:, 2:6]# * pre_gen_gains[out_num]
    num_pixels = torch.tile(torch.tensor([attr.shape[2], attr.shape[3], attr.shape[2], attr.shape[3]], device=attr.device), (xyxy_batch.shape[0], 1))
    # num_pixels = torch.tile(torch.tensor([1.0, 1.0, 1.0, 1.0], device=imgs.device), (xyxy_batch.shape[0], 1))
    xyxy_corners = (corners_coords_batch(xyxy_batch) * num_pixels).int()
    co = xyxy_corners
    if corners:
        co = targets_out[:, 2:6].int()
    coords_map = torch.zeros_like(attr, dtype=torch.bool)
    # rows = np.arange(co.shape[0])
    x1, x2 = co[:,1], co[:,3]
    y1, y2 = co[:,0], co[:,2]
    for ic in range(co.shape[0]): # potential for speedup here with torch indexing instead of for loop
        coords_map[target_inds[ic], :,x1[ic]:x2[ic],y1[ic]:y2[ic]] = True
    if torch.isnan(attr).any():
        attr = torch.nan_to_num(attr, nan=0.0)
    if debug:
        for i in range(len(coords_map)):
            coords_map3ch = torch.cat([coords_map[i][:1], coords_map[i][:1], coords_map[i][:1]], dim=0)
            test_bbox = torch.zeros_like(imgs[i])
            test_bbox[coords_map3ch] = imgs[i][coords_map3ch]
            imshow(test_bbox, save_path='figs/test_bbox')
            imshow(imgs[i], save_path='figs/im0')
            imshow(attr[i], save_path='figs/attr')
    # att_select = attr[coords_map]
    # with torch.no_grad():
    # IoU_num = (torch.sum(attr[coords_map]))
    # IoU_denom = torch.sum(attr)
    # IoU_ = (IoU_num / (IoU_denom))
    # IoU_ = torch.max(attr[coords_map]) - torch.max(attr[~coords_map])
    co = (xyxy_batch * num_pixels).int()
    x1 = co[:,1] + 1
    y1 = co[:,0] + 1
    # with torch.no_grad():
    attr_ = torch.sum(attr, 1, keepdim=True)
    corners_attr = None #torch.zeros(len(xyxy_batch), 4, device=attr.device)
    for ic in range(co.shape[0]):
        attr0 = attr_[target_inds[ic], :,:x1[ic],:y1[ic]]
        attr1 = attr_[target_inds[ic], :,:x1[ic],y1[ic]:]
        attr2 = attr_[target_inds[ic], :,x1[ic]:,:y1[ic]]
        attr3 = attr_[target_inds[ic], :,x1[ic]:,y1[ic]:]
        x_0, y_0 = max_indices_2d(attr0[0])
        x_1, y_1 = max_indices_2d(attr1[0])
        x_2, y_2 = max_indices_2d(attr2[0])
        x_3, y_3 = max_indices_2d(attr3[0])
        y_1 += y1[ic]
        x_2 += x1[ic]
        x_3 += x1[ic]
        y_3 += y1[ic]
        max_corners = torch.cat([torch.min(x_0, x_2).unsqueeze(0) / attr_.shape[2],
                                    torch.min(y_0, y_1).unsqueeze(0) / attr_.shape[3],
                                    torch.max(x_1, x_3).unsqueeze(0) / attr_.shape[2],
                                    torch.max(y_2, y_3).unsqueeze(0) / attr_.shape[3]])
        if corners_attr is None:
            corners_attr = max_corners
        else:
            corners_attr = torch.cat([corners_attr, max_corners], dim=0)
        # corners_attr[ic] = max_corners
        # corners_attr = attr[:,0,:4,0]
    corners_attr = corners_attr.view(-1, 4)
    # corners_attr = torch.stack(corners_attr, dim=0)
    IoU_ = bbox_iou(corners_attr.T, xyxy_batch, x1y1x2y2=False, metric='CIoU')
    plaus_score = IoU_.mean()
    return plaus_score
 def max_indices_2d(x_inp):
    # values, indices = x.reshape(x.size(0), -1).max(dim=-1)
    torch.max(x_inp,)
    index = torch.argmax(x_inp)
    x = index // x_inp.shape[1]
    y = index % x_inp.shape[1]
    # x, y = divmod(index.item(), x_inp.shape[1])
    return torch.cat([x.unsqueeze(0), y.unsqueeze(0)])
 def point_in_polygon(poly, grid):
    # t0 = time.time()
    num_points = poly.shape[0]
    j = num_points - 1
    oddNodes = torch.zeros_like(grid[..., 0], dtype=torch.bool)
    for i in range(num_points):
        cond1 = (poly[i, 1] < grid[..., 1]) & (poly[j, 1] >= grid[..., 1])
        cond2 = (poly[j, 1] < grid[..., 1]) & (poly[i, 1] >= grid[..., 1])
        cond3 = (grid[..., 0] - poly[i, 0]) < (poly[j, 0] - poly[i, 0]) * (grid[..., 1] - poly[i, 1]) / (poly[j, 1] - poly[i, 1])
        oddNodes = oddNodes ^ (cond1 | cond2) & cond3
        j = i
    # t1 = time.time()
    # print(f'point in polygon time: {t1-t0}')
    return oddNodes
 def point_in_polygon_gpu(poly, grid):
    num_points = poly.shape[0]
    i = torch.arange(num_points)
    j = (i - 1) % num_points
    # Expand dimensions
    # t0 = time.time()
    poly_expanded = poly.unsqueeze(-1).unsqueeze(-1).expand(-1, -1, grid.shape[0], grid.shape[0])
    # t1 = time.time()
    cond1 = (poly_expanded[i, 1] < grid[..., 1]) & (poly_expanded[j, 1] >= grid[..., 1])
    cond2 = (poly_expanded[j, 1] < grid[..., 1]) & (poly_expanded[i, 1] >= grid[..., 1])
    cond3 = (grid[..., 0] - poly_expanded[i, 0]) < (poly_expanded[j, 0] - poly_expanded[i, 0]) * (grid[..., 1] - poly_expanded[i, 1]) / (poly_expanded[j, 1] - poly_expanded[i, 1])
    # t2 = time.time()
    oddNodes = torch.zeros_like(grid[..., 0], dtype=torch.bool)
    cond = (cond1 | cond2) & cond3
    # t3 = time.time()
    # efficiently perform xor using gpu and avoiding cpu as much as possible
    c = []
    while len(cond) > 1: 
        if len(cond) % 2 == 1: # odd number of elements
            c.append(cond[-1])
            cond = cond[:-1]
        cond = torch.bitwise_xor(cond[:int(len(cond)/2)], cond[int(len(cond)/2):])
    for c_ in c:
        cond = torch.bitwise_xor(cond, c_)
    oddNodes = cond
    # t4 = time.time()
    # for c in cond:
    #     oddNodes = oddNodes ^ c
    # print(f'expand time: {t1-t0} | cond123 time: {t2-t1} | cond logic time: {t3-t2} |  bitwise xor time: {t4-t3}')
    # print(f'point in polygon time gpu: {t4-t0}')
    # oddNodes = oddNodes ^ (cond1 | cond2) & cond3
    return oddNodes
 def bitmap_for_polygon(poly, h, w):
    y = torch.arange(h).to(poly.device).float()
    x = torch.arange(w).to(poly.device).float()
    grid_y, grid_x = torch.meshgrid(y, x)
    grid = torch.stack((grid_x, grid_y), dim=-1)
    bitmap = point_in_polygon(poly, grid)
    return bitmap.unsqueeze(0)
 def corners_coords(center_xywh):
    center_x, center_y, w, h = center_xywh
    x = center_x - w/2
    y = center_y - h/2
    return torch.tensor([x, y, x+w, y+h])
 def corners_coords_batch(center_xywh):
    center_x, center_y = center_xywh[:,0], center_xywh[:,1]
    w, h = center_xywh[:,2], center_xywh[:,3]
    x = center_x - w/2
    y = center_y - h/2
    return torch.stack([x, y, x+w, y+h], dim=1)
 def normalize_batch(x):
    """
    Normalize a batch of tensors along each channel.
    Args:
        x (torch.Tensor): Input tensor of shape (batch_size, channels, height, width).
    Returns:
        torch.Tensor: Normalized tensor of the same shape as the input.
    """
    mins = torch.zeros((x.shape[0], *(1,)*len(x.shape[1:])), device=x.device)
    maxs = torch.zeros((x.shape[0], *(1,)*len(x.shape[1:])), device=x.device)
    for i in range(x.shape[0]):
        mins[i] = x[i].min()
        maxs[i] = x[i].max()
    x_ = (x - mins) / (maxs - mins)
    return x_
 def normalize_batch_nonan(x):
    """
    Normalize a batch of tensors along each channel.
    Args:
        x (torch.Tensor): Input tensor of shape (batch_size, channels, height, width).
    Returns:
        torch.Tensor: Normalized tensor of the same shape as the input.
    """
    mins = torch.zeros((x.shape[0], *(1,)*len(x.shape[1:])), device=x.device)
    maxs = torch.zeros((x.shape[0], *(1,)*len(x.shape[1:])), device=x.device)
    x_ = torch.zeros_like(x)
    for i in range(x.shape[0]):
        if torch.all(x[i] == 0):
            x_[i] = x[i]
        else:
            mins[i] = x[i].min()
            maxs[i] = x[i].max()
            x_[i] = (x[i] - mins[i]) / (maxs[i] - mins[i])
    return x_
 def get_detections(model_clone, img):
    """
    Get detections from a model given an input image and targets.
    Args:
        model (nn.Module): The model to use for detection.
        img (torch.Tensor): The input image tensor.
    Returns:
        torch.Tensor: The detected bounding boxes.
    """
    model_clone.eval() # Set model to evaluation mode
    # Run inference
    with torch.no_grad():
        det_out, out = model_clone(img)
    # model_.train()
    del img 
    return det_out, out
 def get_labels(det_out, imgs, targets, opt):
    ###################### Get predicted labels ###################### 
    nb, _, height, width = imgs.shape  # batch size, channels, height, width 
    targets_ = targets.clone() 
    targets_[:, 2:] = targets_[:, 2:] * torch.Tensor([width, height, width, height]).to(imgs.device)  # to pixels
    lb = [targets_[targets_[:, 0] == i, 1:] for i in range(nb)] if opt.save_hybrid else []  # for autolabelling
    o = non_max_suppression(det_out, conf_thres=0.001, iou_thres=0.6, labels=lb, multi_label=True)
    pred_labels = [] 
    for si, pred in enumerate(o):
        labels = targets_[targets_[:, 0] == si, 1:]
        nl = len(labels) 
        predn = pred.clone()
        # Get the indices that sort the values in column 5 in ascending order
        sort_indices = torch.argsort(pred[:, 4], dim=0, descending=True)
        # Apply the sorting indices to the tensor
        sorted_pred = predn[sort_indices]
        # Remove predictions with less than 0.1 confidence
        n_conf = int(torch.sum(sorted_pred[:,4]>0.1)) + 1
        sorted_pred = sorted_pred[:n_conf]
        new_col = torch.ones((sorted_pred.shape[0], 1), device=imgs.device) * si
        preds = torch.cat((new_col, sorted_pred[:, [5, 0, 1, 2, 3]]), dim=1)
        preds[:, 2:] = xyxy2xywh(preds[:, 2:])  # xywh
        gn = torch.tensor([width, height])[[1, 0, 1, 0]]  # normalization gain whwh
        preds[:, 2:] /= gn.to(imgs.device)  # from pixels
        pred_labels.append(preds)
    pred_labels = torch.cat(pred_labels, 0).to(imgs.device)
    return pred_labels
    ##################################################################
 from torchvision.utils import make_grid
 def get_center_coords(attr):
    img_tensor = img_tensor / img_tensor.max()
    # Define a brightness threshold
    threshold = 0.95
    # Create a binary mask of the bright pixels
    mask = img_tensor > threshold
    # Get the coordinates of the bright pixels
    y_coords, x_coords = torch.where(mask)
    # Calculate the centroid of the bright pixels
    centroid_x = x_coords.float().mean().item()
    centroid_y = y_coords.float().mean().item()
    print(f'The central bright point is at ({centroid_x}, {centroid_y})')
    return
 def get_distance_grids(attr, targets, imgs=None, focus_coeff=0.5, debug=False):
    """
    Compute the distance grids from each pixel to the target coordinates.
    Args:
        attr (torch.Tensor): Attribution maps.
        targets (torch.Tensor): Target coordinates.
        focus_coeff (float, optional): Focus coefficient, smaller means more focused. Defaults to 0.5.
        debug (bool, optional): Whether to visualize debug information. Defaults to False.
    Returns:
        torch.Tensor: Distance grids.
    """
    # Assign the height and width of the input tensor to variables
    height, width = attr.shape[-1], attr.shape[-2]
    # attr = torch.abs(attr) # Take absolute values of gradients
    # attr = normalize_batch(attr) # Normalize attribution maps per image in batch
    # Create a grid of indices
    xx, yy = torch.stack(torch.meshgrid(torch.arange(height), torch.arange(width))).to(attr.device)
    idx_grid = torch.stack((xx, yy), dim=-1).float()
    # Expand the grid to match the batch size
    idx_batch_grid = idx_grid.expand(attr.shape[0], -1, -1, -1)
    # Initialize a list to store the distance grids
    dist_grids_ = [[]] * attr.shape[0]
    # Loop over batches
    for j in range(attr.shape[0]):
        # Get the rows where the first column is the current unique value
        rows = targets[targets[:, 0] == j]
        if len(rows) != 0: 
            # Create a tensor for the target coordinates
            xy = rows[:,2:4] # y, x
            # Flip the x and y coordinates and scale them to the image size
            xy[:, 0], xy[:, 1] = xy[:, 1] * width, xy[:, 0] * height # y, x to x, y
            xy_center = xy.unsqueeze(1).unsqueeze(1)#.requires_grad_(True) 
            # Compute the Euclidean distance from each pixel to the target coordinates
            dists = torch.norm(idx_batch_grid[j].expand(len(xy_center), -1, -1, -1) - xy_center, dim=-1)
            # Pick the closest distance to any target for each pixel 
            dist_grid_ = torch.min(dists, dim=0)[0].unsqueeze(0) 
            dist_grid = torch.cat([dist_grid_, dist_grid_, dist_grid_], dim=0) if attr.shape[1] == 3 else dist_grid_
        else:
            # Set grid to zero if no targets are present
            dist_grid = torch.zeros_like(attr[j])
        dist_grids_[j] = dist_grid
    # Convert the list of distance grids to a tensor for faster computation
    dist_grids = normalize_batch(torch.stack(dist_grids_)) ** focus_coeff
    if torch.isnan(dist_grids).any():
        dist_grids = torch.nan_to_num(dist_grids, nan=0.0)
    if debug:
        for i in range(len(dist_grids)):
            if ((i % 8) == 0):
                grid_show = torch.cat([dist_grids[i][:1], dist_grids[i][:1], dist_grids[i][:1]], dim=0)
                imshow(grid_show, save_path='figs/dist_grids')
                if imgs is None:
                    imgs = torch.zeros_like(attr)
                imshow(imgs[i], save_path='figs/im0')
                img_overlay = (overlay_mask(imgs[i], dist_grids[i][0], alpha = 0.75))
                imshow(img_overlay, save_path='figs/dist_grid_overlay')
                weighted_attr = (dist_grids[i] * attr[i])
                imshow(weighted_attr, save_path='figs/weighted_attr')
                imshow(attr[i], save_path='figs/attr')
    return dist_grids
 def attr_reg(attribution_map, distance_map):
    # dist_attr = distance_map * attribution_map 
    dist_attr = torch.mean(distance_map * attribution_map)#, dim=(1, 2, 3)) 
    # del distance_map, attribution_map
    return dist_attr
 def get_bbox_map(targets_out, attr, corners=False):
    target_inds = targets_out[:, 0].int()
    xyxy_batch = targets_out[:, 2:6]# * pre_gen_gains[out_num]
    num_pixels = torch.tile(torch.tensor([attr.shape[2], attr.shape[3], attr.shape[2], attr.shape[3]], device=attr.device), (xyxy_batch.shape[0], 1))
    # num_pixels = torch.tile(torch.tensor([1.0, 1.0, 1.0, 1.0], device=imgs.device), (xyxy_batch.shape[0], 1))
    xyxy_corners = (corners_coords_batch(xyxy_batch) * num_pixels).int()
    co = xyxy_corners
    if corners:
        co = targets_out[:, 2:6].int()
    coords_map = torch.zeros_like(attr, dtype=torch.bool)
    # rows = np.arange(co.shape[0])
    x1, x2 = co[:,1], co[:,3]
    y1, y2 = co[:,0], co[:,2]
    for ic in range(co.shape[0]): # potential for speedup here with torch indexing instead of for loop
        coords_map[target_inds[ic], :,x1[ic]:x2[ic],y1[ic]:y2[ic]] = True
    bbox_map = coords_map.to(torch.float32)
    return bbox_map
 ######################################## BCE #######################################
 def get_plaus_loss(targets, attribution_map, opt, imgs=None, debug=False, only_loss=False):
    # if imgs is None:
    #     imgs = torch.zeros_like(attribution_map)
    # Calculate Plausibility IoU with attribution maps
    # attribution_map.retains_grad = True
    if not only_loss:
        plaus_score = get_plaus_score(targets_out = targets, attr = attribution_map.clone().detach().requires_grad_(True), imgs = imgs)
    else:
        plaus_score = torch.tensor(0.0)
    # attribution_map = normalize_batch(attribution_map) # Normalize attribution maps per image in batch
    # Calculate distance regularization
    distance_map = get_distance_grids(attribution_map, targets, imgs, opt.focus_coeff)
    # distance_map = torch.ones_like(attribution_map)
    if opt.dist_x_bbox:
        bbox_map = get_bbox_map(targets, attribution_map).to(torch.bool)
        distance_map[bbox_map] = 0.0
        # distance_map = distance_map * (1 - bbox_map)
    # Positive regularization term for incentivizing pixels near the target to have high attribution
    dist_attr_pos = attr_reg(attribution_map, (1.0 - distance_map))
    # Negative regularization term for incentivizing pixels far from the target to have low attribution
    dist_attr_neg = attr_reg(attribution_map, distance_map)
    # Calculate plausibility regularization term
    # dist_reg = dist_attr_pos - dist_attr_neg
    dist_reg = ((dist_attr_pos / torch.mean(attribution_map)) - (dist_attr_neg / torch.mean(attribution_map)))
    # dist_reg = torch.mean((dist_attr_pos / torch.mean(attribution_map, dim=(1, 2, 3))) - (dist_attr_neg / torch.mean(attribution_map, dim=(1, 2, 3)))) 
    # dist_reg = (torch.mean(torch.exp((dist_attr_pos / torch.mean(attribution_map, dim=(1, 2, 3)))) + \
    #                             torch.exp(1 - (dist_attr_neg / torch.mean(attribution_map, dim=(1, 2, 3)))))) \
    #                             / 2.5
    if opt.bbox_coeff != 0.0:
        bbox_map = get_bbox_map(targets, attribution_map)
        attr_bbox_pos = attr_reg(attribution_map, bbox_map)
        attr_bbox_neg = attr_reg(attribution_map, (1.0 - bbox_map))
        bbox_reg = attr_bbox_pos - attr_bbox_neg
        # bbox_reg = (attr_bbox_pos / torch.mean(attribution_map)) - (attr_bbox_neg / torch.mean(attribution_map))
    else:
        bbox_reg = 0.0
    bbox_map = get_bbox_map(targets, attribution_map)
    plaus_score = ((torch.sum((attribution_map * bbox_map))) / (torch.sum(attribution_map)))
    # iou_loss = (1.0 - plaus_score)
    if not opt.dist_reg_only:
        dist_reg_loss = (((1.0 + dist_reg) / 2.0))
        plaus_reg = (plaus_score * opt.iou_coeff) + \
                    (((dist_reg_loss * opt.dist_coeff) + \
                      (bbox_reg * opt.bbox_coeff))\
                    # ((((((1.0 + dist_reg) / 2.0) - 1.0) * opt.dist_coeff) + ((((1.0 + bbox_reg) / 2.0) - 1.0) * opt.bbox_coeff))\
                    # / (plaus_score) \
                    )
    else:
        plaus_reg = (((1.0 + dist_reg) / 2.0))
        # plaus_reg = dist_reg 
    # Calculate plausibility loss
    plaus_loss = (1 - plaus_reg) * opt.pgt_coeff
    # plaus_loss = (plaus_reg) * opt.pgt_coeff
    if only_loss:
        return plaus_loss
    if not debug:
        return plaus_loss, (plaus_score, dist_reg, plaus_reg,)
    else:
        return plaus_loss, (plaus_score, dist_reg, plaus_reg,), distance_map
 ####################################################################################
 #### ALL FUNCTIONS BELOW ARE DEPRECIATED AND WILL BE REMOVED IN FUTURE VERSIONS ####
 ####################################################################################
 def generate_vanilla_grad(model, input_tensor, loss_func = None, 
                          targets_list=None, targets=None, metric=None, out_num = 1, 
                          n_max_labels=3, norm=True, abs=True, grayscale=True, 
                          class_specific_attr = True, device='cpu'):    
    """
    Generate vanilla gradients for the given model and input tensor.
    Args:
        model (nn.Module): The model to generate gradients for.
        input_tensor (torch.Tensor): The input tensor for which gradients are computed.
        loss_func (callable, optional): The loss function to compute gradients with respect to. Defaults to None.
        targets_list (list, optional): The list of target tensors. Defaults to None.
        metric (callable, optional): The metric function to evaluate the loss. Defaults to None.
        out_num (int, optional): The index of the output tensor to compute gradients with respect to. Defaults to 1.
        n_max_labels (int, optional): The maximum number of labels to consider. Defaults to 3.
        norm (bool, optional): Whether to normalize the attribution map. Defaults to True.
        abs (bool, optional): Whether to take the absolute values of gradients. Defaults to True.
        grayscale (bool, optional): Whether to convert the attribution map to grayscale. Defaults to True.
        class_specific_attr (bool, optional): Whether to compute class-specific attribution maps. Defaults to True.
        device (str, optional): The device to use for computation. Defaults to 'cpu'.
    Returns:
        torch.Tensor: The generated vanilla gradients.
    """
    # Set model.train() at the beginning and revert back to original mode (model.eval() or model.train()) at the end
    train_mode = model.training
    if not train_mode:
        model.train()
    input_tensor.requires_grad = True # Set requires_grad attribute of tensor. Important for computing gradients
    model.zero_grad() # Zero gradients
    inpt = input_tensor
    # Forward pass
    train_out = model(inpt) # training outputs (no inference outputs in train mode)
    # train_out[1] = torch.Size([4, 3, 80, 80, 7]) HxWx(#anchorxC) cls (class probabilities)
    # train_out[0] = torch.Size([4, 3, 160, 160, 7]) HxWx(#anchorx4) box or reg (location and scaling)
    # train_out[2] = torch.Size([4, 3, 40, 40, 7]) HxWx(#anchorx1) obj (objectness score or confidence)
    if class_specific_attr:
        n_attr_list, index_classes = [], []
        for i in range(len(input_tensor)):
            if len(targets_list[i]) > n_max_labels:
                targets_list[i] = targets_list[i][:n_max_labels]
            if targets_list[i].numel() != 0:
                # unique_classes = torch.unique(targets_list[i][:,1])
                class_numbers = targets_list[i][:,1]
                index_classes.append([[0, 1, 2, 3, 4, int(uc)] for uc in class_numbers])
                num_attrs = len(targets_list[i])
                # index_classes.append([0, 1, 2, 3, 4] + [int(uc + 5) for uc in unique_classes])
                # num_attrs = 1 #len(unique_classes)# if loss_func else len(targets_list[i])
                n_attr_list.append(num_attrs)
            else:
                index_classes.append([0, 1, 2, 3, 4])
                n_attr_list.append(0)
        targets_list_filled = [targ.clone().detach() for targ in targets_list]
        labels_len = [len(targets_list[ih]) for ih in range(len(targets_list))]
        max_labels = np.max(labels_len)
        max_index = np.argmax(labels_len)
        for i in range(len(targets_list)):
            # targets_list_filled[i] = targets_list[i]
            if len(targets_list_filled[i]) < max_labels:
                tlist = [targets_list_filled[i]] * math.ceil(max_labels / len(targets_list_filled[i]))
                targets_list_filled[i] = torch.cat(tlist)[:max_labels].unsqueeze(0)
            else:
                targets_list_filled[i] = targets_list_filled[i].unsqueeze(0)
        for i in range(len(targets_list_filled)-1,-1,-1):
            if targets_list_filled[i].numel() == 0:
                targets_list_filled.pop(i)
        targets_list_filled = torch.cat(targets_list_filled)
    n_img_attrs = len(input_tensor) if class_specific_attr else 1
    n_img_attrs = 1 if loss_func else n_img_attrs
    attrs_batch = []
    for i_batch in range(n_img_attrs):
        if loss_func and class_specific_attr:
            i_batch = max_index
        # inpt = input_tensor[i_batch].unsqueeze(0)
        # ##################################################################
        # model.zero_grad() # Zero gradients
        # train_out = model(inpt)  # training outputs (no inference outputs in train mode)
        # ##################################################################
        n_label_attrs = n_attr_list[i_batch] if class_specific_attr else 1
        n_label_attrs = 1 if not class_specific_attr else n_label_attrs
        attrs_img = []
        for i_attr in range(n_label_attrs):
            if loss_func is None:
                grad_wrt = train_out[out_num]
                if class_specific_attr:
                    grad_wrt = train_out[out_num][:,:,:,:,index_classes[i_batch][i_attr]]
                grad_wrt_outputs = torch.ones_like(grad_wrt)
            else:
                # if class_specific_attr:
                #     targets = targets_list[:][i_attr]
                # n_targets = len(targets_list[i_batch])
                if class_specific_attr:
                    target_indiv = targets_list_filled[:,i_attr] # batch image input
                else:
                    target_indiv = targets
                # target_indiv = targets_list[i_batch][i_attr].unsqueeze(0) # single image input
                # target_indiv[:,0] = 0 # this indicates the batch index of the target, should be 0 since we are only doing one image at a time
                try:
                    loss, loss_items = loss_func(train_out, target_indiv, inpt, metric=metric)  # loss scaled by batch_size
                except:
                    target_indiv = target_indiv.to(device)
                    inpt = inpt.to(device)
                    for tro in train_out:
                        tro = tro.to(device)
                    print("Error in loss function, trying again with device specified")
                    loss, loss_items = loss_func(train_out, target_indiv, inpt, metric=metric)
                grad_wrt = loss
                grad_wrt_outputs = None
            model.zero_grad() # Zero gradients
            gradients = torch.autograd.grad(grad_wrt, inpt, 
                                                grad_outputs=grad_wrt_outputs, 
                                                retain_graph=True, 
                                                # create_graph=True, # Create graph to allow for higher order derivatives but slows down computation significantly
                                                )
            # Convert gradients to numpy array and back to ensure full separation from graph
            # attribution_map = torch.tensor(torch.sum(gradients[0], 1, keepdim=True).clone().detach().cpu().numpy())
            attribution_map = gradients[0]#.clone().detach() # without converting to numpy
            if grayscale: # Convert to grayscale, saves vram and computation time for plaus_eval
                attribution_map = torch.sum(attribution_map, 1, keepdim=True)
            if abs:
                attribution_map = torch.abs(attribution_map) # Take absolute values of gradients
            if norm:
                attribution_map = normalize_batch(attribution_map) # Normalize attribution maps per image in batch
            attrs_img.append(attribution_map)
        if len(attrs_img) == 0:
            attrs_batch.append((torch.zeros_like(inpt).unsqueeze(0)).to(device))
        else:
            attrs_batch.append(torch.stack(attrs_img).to(device))
    # out_attr = torch.tensor(attribution_map).unsqueeze(0).to(device) if ((loss_func) or (not class_specific_attr)) else torch.stack(attrs_batch).to(device)
    # out_attr = [attrs_batch[0]] * len(input_tensor) if ((loss_func) or (not class_specific_attr)) else attrs_batch
    out_attr = attrs_batch
    # Set model back to original mode
    if not train_mode:
        model.eval()
    return out_attr
 class RVNonLinearFunc(torch.nn.Module):
    """
    Custom Bayesian ReLU activation function for random variables.
    Attributes:
        None
    """
    def __init__(self, func):
        super(RVNonLinearFunc, self).__init__()
        self.func = func
    def forward(self, mu_in, Sigma_in):
        """
        Forward pass of the Bayesian ReLU activation function.
        Args:
            mu_in (torch.Tensor): A tensor of shape (batch_size, input_size),
                representing the mean input to the ReLU activation function.
            Sigma_in (torch.Tensor): A tensor of shape (batch_size, input_size, input_size),
                representing the covariance input to the ReLU activation function.
        Returns:
            Tuple[torch.Tensor, torch.Tensor]: A tuple of two tensors,
                including the mean of the output and the covariance of the output.
        """
        # Collect stats
        batch_size = mu_in.size(0)
        # Mean
        mu_out = self.func(mu_in)
        # Compute the derivative of the ReLU activation function with respect to the input mean
        gradi = torch.autograd.grad(mu_out, mu_in, grad_outputs=torch.ones_like(mu_out), create_graph=True)[0].view(batch_size,-1)
        # add an extra dimension to gradi at position 2 and 1
        grad1 = gradi.unsqueeze(dim=2)
        grad2 = gradi.unsqueeze(dim=1)
        # compute the outer product of grad1 and grad2
        outer_product = torch.bmm(grad1, grad2)
        # element-wise multiply Sigma_in with the outer product
        # and return the result
        Sigma_out = torch.mul(Sigma_in, outer_product)
        return mu_out, Sigma_out
--- a/ultralytics/utils/plot_functs.py
+++ b/ultralytics/utils/plot_functs.py
@ -0,0 +1,154 @@
 import numpy as np
 import matplotlib.pyplot as plt
 import torch
 import torch.nn as nn
 class Subplots:
    def __init__(self, figsize = (40, 5)):
        self.fig = plt.figure(figsize=figsize)
    def plot_img_list(self, img_list, savedir='figs/test', 
                    nrows = 1, rownum = 0, 
                    hold = False, coltitles=[], rowtitle=''):
        for i, img in enumerate(img_list):
            try:
                npimg = img.clone().detach().cpu().numpy()
            except:
                npimg = img
            tpimg = np.transpose(npimg, (1, 2, 0))
            lenrow = int((len(img_list)))
            ax = self.fig.add_subplot(nrows, lenrow, i+1+(rownum*lenrow))
            if len(coltitles) > i:
                ax.set_title(coltitles[i])
            if i == 0:
                ax.annotate(rowtitle, xy=((-0.06 * len(rowtitle)), 0.4),# xytext=(-ax.yaxis.labelpad - pad, 0),
                xycoords='axes fraction', textcoords='offset points',
                size='large', ha='center', va='baseline')
                # ax.set_ylabel(rowtitle, rotation=90)
            ax.imshow(tpimg)
            ax.axis('off')
        if not hold:
            self.fig.tight_layout()
            plt.savefig(f'{savedir}.png')
            plt.clf()
            plt.close('all')
 def VisualizeNumpyImageGrayscale(image_3d):
    r"""Returns a 3D tensor as a grayscale normalized between 0 and 1 2D tensor.
    """
    vmin = np.min(image_3d)
    image_2d = image_3d - vmin
    vmax = np.max(image_2d)
    return (image_2d / vmax)
 def normalize_numpy(image_3d):
    r"""Returns a 3D tensor as a grayscale normalized between 0 and 1 2D tensor.
    """
    vmin = np.min(image_3d)
    image_2d = image_3d - vmin
    vmax = np.max(image_2d)
    return (image_2d / vmax)
 # def normalize_tensor(image_3d): 
 #     r"""Returns a 3D tensor as a grayscale normalized between 0 and 1 2D tensor.
 #     """
 #     vmin = torch.min(image_3d)
 #     image_2d = image_3d - vmin
 #     vmax = torch.max(image_2d)
 #     return (image_2d / vmax)
 def normalize_tensor(image_3d): 
    r"""Returns a 3D tensor as a grayscale normalized between 0 and 1 2D tensor.
    """
    image_2d = (image_3d - torch.min(image_3d))
    return (image_2d / torch.max(image_2d))
 def format_img(img_):
    np_img = img_.numpy()
    tp_img = np.transpose(np_img, (1, 2, 0))
    return tp_img
 def imshow(img, save_path=None):
    try:
        npimg = img.clone().detach().cpu().numpy()
    except:
        npimg = img
    tpimg = np.transpose(npimg, (1, 2, 0))
    plt.imshow(tpimg)
    # plt.axis('off')
    plt.tight_layout()
    if save_path != None:
        plt.savefig(str(str(save_path) + ".png"))
    #plt.show()a
 def imshow_img(img, imsave_path):
    # works for tensors and numpy arrays
    try:
        npimg = VisualizeNumpyImageGrayscale(img.numpy())
    except:
        npimg = VisualizeNumpyImageGrayscale(img)
    npimg = np.transpose(npimg, (2, 0, 1))
    imshow(npimg, save_path=imsave_path)
    print("Saving image as ", imsave_path)
 def returnGrad(img, labels, model, compute_loss, loss_metric, augment=None, device = 'cpu'):
    model.train()
    model.to(device)
    img = img.to(device)
    img.requires_grad_(True)
    labels.to(device).requires_grad_(True)
    model.requires_grad_(True)
    cuda = device.type != 'cpu'
    scaler = amp.GradScaler(enabled=cuda)
    pred = model(img)
    # out, train_out = model(img, augment=augment)  # inference and training outputs
    loss, loss_items = compute_loss(pred, labels, metric=loss_metric)#[1][:3]  # box, obj, cls
    # loss = criterion(pred, torch.tensor([int(torch.max(pred[0], 0)[1])]).to(device))
    # loss = torch.sum(loss).requires_grad_(True)
    with torch.autograd.set_detect_anomaly(True):
        scaler.scale(loss).backward(inputs=img)
    # loss.backward()
 #    S_c = torch.max(pred[0].data, 0)[0]
    Sc_dx = img.grad
    model.eval()
    Sc_dx = torch.tensor(Sc_dx, dtype=torch.float32)
    return Sc_dx
 def calculate_snr(img, attr, dB=True):
    try:
        img_np = img.detach().cpu().numpy()
        attr_np = attr.detach().cpu().numpy()
    except:
        img_np = img
        attr_np = attr
    # Calculate the signal power
    signal_power = np.mean(img_np**2)
    # Calculate the noise power
    noise_power = np.mean(attr_np**2)
    if dB == True:
        # Calculate SNR in dB
        snr = 10 * np.log10(signal_power / noise_power)
    else:
        # Calculate SNR
        snr = signal_power / noise_power
    return snr
 def overlay_mask(img, mask, colormap: str = "jet", alpha: float = 0.7):
    cmap = plt.get_cmap(colormap)
    npmask = np.array(mask.clone().detach().cpu().squeeze(0))
    # cmpmask = ((255 * cmap(npmask)[:, :, :3]).astype(np.uint8)).transpose((2, 0, 1))
    cmpmask = (cmap(npmask)[:, :, :3]).transpose((2, 0, 1))
    overlayed_imgnp = ((alpha * (np.asarray(img.clone().detach().cpu())) + (1 - alpha) * cmpmask))
    overlayed_tensor = torch.tensor(overlayed_imgnp, device=img.device)
    return overlayed_tensor
--- a/ultralytics/utils/plotting.py
+++ b/ultralytics/utils/plotting.py
@ -717,7 +717,7 @@ def plot_images(
 ):
    """Plot image grid with labels."""
    if isinstance(images, torch.Tensor):
-        images = images.cpu().float().numpy()
+        images = images.detach().cpu().float().numpy()
    if isinstance(cls, torch.Tensor):
        cls = cls.cpu().numpy()
    if isinstance(bboxes, torch.Tensor):
@ -837,6 +837,144 @@ def plot_images(
    if on_plot:
        on_plot(fname)
@threaded
 def plot_gradients(
    images,
    batch_idx,
    cls,
    bboxes=np.zeros(0, dtype=np.float32),
    confs=None,
    masks=np.zeros(0, dtype=np.uint8),
    kpts=np.zeros((0, 51), dtype=np.float32),
    paths=None,
    fname="images.jpg",
    names=None,
    on_plot=None,
    max_subplots=16,
    save=True,
    conf_thres=0.25,
 ):
    """Plot image grid with labels."""
    if isinstance(images, torch.Tensor):
        images = images.detach().cpu().float().numpy()
    if isinstance(cls, torch.Tensor):
        cls = cls.cpu().numpy()
    if isinstance(bboxes, torch.Tensor):
        bboxes = bboxes.cpu().numpy()
    if isinstance(masks, torch.Tensor):
        masks = masks.cpu().numpy().astype(int)
    if isinstance(kpts, torch.Tensor):
        kpts = kpts.cpu().numpy()
    if isinstance(batch_idx, torch.Tensor):
        batch_idx = batch_idx.cpu().numpy()
    max_size = 1920  # max image size
    bs, _, h, w = images.shape  # batch size, _, height, width
    bs = min(bs, max_subplots)  # limit plot images
    ns = np.ceil(bs**0.5)  # number of subplots (square)
    if np.max(images[0]) <= 1:
        images *= 255  # de-normalise (optional)
    # Build Image
    mosaic = np.full((int(ns * h), int(ns * w), 3), 255, dtype=np.uint8)  # init
    for i in range(bs):
        x, y = int(w * (i // ns)), int(h * (i % ns))  # block origin
        mosaic[y : y + h, x : x + w, :] = images[i].transpose(1, 2, 0)
    # Resize (optional)
    scale = max_size / ns / max(h, w)
    if scale < 1:
        h = math.ceil(scale * h)
        w = math.ceil(scale * w)
        mosaic = cv2.resize(mosaic, tuple(int(x * ns) for x in (w, h)))
    # Annotate
    fs = int((h + w) * ns * 0.01)  # font size
    annotator = Annotator(mosaic, line_width=round(fs / 10), font_size=fs, pil=True, example=names)
    for i in range(bs):
        x, y = int(w * (i // ns)), int(h * (i % ns))  # block origin
        annotator.rectangle([x, y, x + w, y + h], None, (255, 255, 255), width=2)  # borders
        if paths:
            annotator.text((x + 5, y + 5), text=Path(paths[i]).name[:40], txt_color=(220, 220, 220))  # filenames
        if len(cls) > 0:
            idx = batch_idx == i
            classes = cls[idx].astype("int")
            labels = confs is None
            if len(bboxes):
                boxes = bboxes[idx]
                conf = confs[idx] if confs is not None else None  # check for confidence presence (label vs pred)
                is_obb = boxes.shape[-1] == 5  # xywhr
                boxes = ops.xywhr2xyxyxyxy(boxes) if is_obb else ops.xywh2xyxy(boxes)
                if len(boxes):
                    if boxes[:, :4].max() <= 1.1:  # if normalized with tolerance 0.1
                        boxes[..., 0::2] *= w  # scale to pixels
                        boxes[..., 1::2] *= h
                    elif scale < 1:  # absolute coords need scale if image scales
                        boxes[..., :4] *= scale
                boxes[..., 0::2] += x
                boxes[..., 1::2] += y
                for j, box in enumerate(boxes.astype(np.int64).tolist()):
                    c = classes[j]
                    color = colors(c)
                    c = names.get(c, c) if names else c
                    if labels or conf[j] > conf_thres:
                        label = f"{c}" if labels else f"{c} {conf[j]:.1f}"
                        annotator.box_label(box, label, color=color, rotated=is_obb)
            elif len(classes):
                for c in classes:
                    color = colors(c)
                    c = names.get(c, c) if names else c
                    annotator.text((x, y), f"{c}", txt_color=color, box_style=True)
            # Plot keypoints
            if len(kpts):
                kpts_ = kpts[idx].copy()
                if len(kpts_):
                    if kpts_[..., 0].max() <= 1.01 or kpts_[..., 1].max() <= 1.01:  # if normalized with tolerance .01
                        kpts_[..., 0] *= w  # scale to pixels
                        kpts_[..., 1] *= h
                    elif scale < 1:  # absolute coords need scale if image scales
                        kpts_ *= scale
                kpts_[..., 0] += x
                kpts_[..., 1] += y
                for j in range(len(kpts_)):
                    if labels or conf[j] > conf_thres:
                        annotator.kpts(kpts_[j])
            # Plot masks
            if len(masks):
                if idx.shape[0] == masks.shape[0]:  # overlap_masks=False
                    image_masks = masks[idx]
                else:  # overlap_masks=True
                    image_masks = masks[[i]]  # (1, 640, 640)
                    nl = idx.sum()
                    index = np.arange(nl).reshape((nl, 1, 1)) + 1
                    image_masks = np.repeat(image_masks, nl, axis=0)
                    image_masks = np.where(image_masks == index, 1.0, 0.0)
                im = np.asarray(annotator.im).copy()
                for j in range(len(image_masks)):
                    if labels or conf[j] > conf_thres:
                        color = colors(classes[j])
                        mh, mw = image_masks[j].shape
                        if mh != h or mw != w:
                            mask = image_masks[j].astype(np.uint8)
                            mask = cv2.resize(mask, (w, h))
                            mask = mask.astype(bool)
                        else:
                            mask = image_masks[j].astype(bool)
                        with contextlib.suppress(Exception):
                            im[y : y + h, x : x + w, :][mask] = (
                                im[y : y + h, x : x + w, :][mask] * 0.4 + np.array(color) * 0.6
                            )
                annotator.fromarray(im)
    if not save:
        return np.asarray(annotator.im)
    annotator.im.save(fname)  # save
    if on_plot:
        on_plot(fname)
@plt_settings()
 def plot_results(file="path/to/results.csv", dir="", segment=False, pose=False, classify=False, on_plot=None):
Author	SHA1	Message	Date
Ian E. Nielsen	37f38bc9a2	Merge 1ef6cbcec52d28b0a9fe67276115d4ff45ddffed into 453c6e38a51e9d1d5a2aa5fb7f1014a711913397	2025-03-14 00:35:45 -04:00
Wang Ao	453c6e38a5	Update README.md	2025-03-14 10:53:57 +08:00
nielseni6	1ef6cbcec5	added pgt_coeff to argparser	2024-11-01 11:19:22 -04:00
nielseni6	21e660fde9	Fixed issues with pgt training, adding the pgt loss to the YOLO loss function. PGT loss can now be tracked during training.	2024-11-01 11:04:12 -04:00
nielseni6	44c912647e	added working validation	2024-10-23 20:33:28 -04:00
nielseni6	9524af3bfe	Model loading using YOLOv10PGT added, and pgt_coeff is now a cfg parameter	2024-10-23 20:28:51 -04:00
nielseni6	943a333fae	Merge branch 'PGT_and_attribution' of https://github.com/nielseni6/yolov10 into PGT_and_attribution	2024-10-22 23:41:53 -04:00
nielseni6	0efbfe7f4d	updated grad to create_graph during training, enabling better pgt_loss optimization	2024-10-22 23:40:46 -04:00
nielseni6	2ccec65edc	fixed val and saving weights	2024-10-21 13:56:29 -04:00
nielseni6	411157c18a	fixed yaml default	2024-10-21 13:05:25 -04:00
nielseni6	5953d3c9c6	changed save path to not override previous training	2024-10-21 13:04:57 -04:00
nielseni6	3a449d5a6c	Fixed PGT by including it in the loss function	2024-10-21 12:20:24 -04:00
nielseni6	38fa59edf2	PGT Training now functioning. Run run_pgt_train.py to train the model.	2024-10-16 19:26:41 -04:00
nielseni6	2a95a652bd	fixed run_val	2024-10-15 11:53:27 -04:00
nielseni6	b02bf58de6	added attribution visualization with run_attribution.py	2024-10-15 11:51:37 -04:00