Update docs (#71)

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
2025-07-19 05:55:39 +08:00 · 2022-12-12 09:21:00 +05:30 · 2022-12-12 09:21:00 +05:30 · d85b44f259
commit d85b44f259
parent e629335f6d
11 changed files with 286 additions and 35 deletions
--- a/docs/conf.md
+++ b/docs/conf.md
@ -0,0 +1,109 @@
+## Ultralytics YOLO
+
+Default training settings and hyperparameters for medium-augmentation COCO training
+
+### Setting the operation type
+???+ note "Operation"
+
+    | Key    | Value    | Description                                                                                                                                                                                 |
+    |--------|----------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+    | task  | `detect` | Set the task via CLI. See Tasks for all supported tasks like - `detect`, `segment`, `classify`.<br> - `init` is a special case that creates a copy of default.yaml configs to the current working dir |
+    | mode  | `train`  | Set the mode via CLI. It can be `train`, `val`, `predict`   |
+    | resume  | `False`  | Resume last given task when set to `True`. <br> Resume from a given checkpoint is `model.pt` is passed  |
+    | model | null     | Set the model. Format can differ for task type. Supports `model_name`, `model.yaml` & `model.pt`                                                                                            |
+    | data  | null     | Set the data. Format can differ for task type. Supports `data.yaml`, `data_folder`, `dataset_name`|
+
+### Training settings
+??? note "Train"
+    | Key              | Value  | Description                                                                     |
+    |------------------|--------|---------------------------------------------------------------------------------|
+    | device          | ''      | cuda device, i.e. 0 or 0,1,2,3 or cpu. `''` selects available cuda 0 device    |
+    | epochs          | 100    | Number of epochs to train                                                       |
+    | workers         | 8      | Number of cpu workers used per process. Scales automatically with DDP           |
+    | batch_size      | 16     | Batch size of the dataloader                                                    |
+    | img_size        | 640    | Image size of data in dataloader                                                |
+    | optimizer       | SGD    | Optimizer used. Supported optimizer are: `Adam`, `SGD`, `RMSProp`               |
+    | single_cls      | False  | Train on multi-class data as single-class                                       |
+    | image_weights   | False  | Use weighted image selection for training                                       |
+    | rect            | False  | Enable rectangular training                                                     |
+    | cos_lr          | False  | Use cosine LR scheduler                        |
+    | lr0             | 0.01   | Initial learning rate                          |
+    | lrf             | 0.01   | Final OneCycleLR learning rate                 |
+    | momentum        | 0.937  | Use as `momentum` for SGD and `beta1` for Adam |
+    | weight_decay    | 0.0005 | Optimizer weight decay                         |
+    | warmup_epochs   | 3.0    | Warmup epochs. Fractions are ok.               |
+    | warmup_momentum | 0.8    | Warmup initial momentum                        |
+    | warmup_bias_lr  | 0.1    | Warmup initial bias lr                         |
+    | box             | 0.05   | Box loss gain                                  |
+    | cls             | 0.5    | cls loss gain                                  |
+    | cls_pw          | 1.0    | cls BCELoss positive_weight                    |
+    | obj             | 1.0    | bj loss gain (scale with pixels)               |
+    | obj_pw          | 1.0    | obj BCELoss positive_weight                    |
+    | iou_t           | 0.20   | IOU training threshold                         |
+    | anchor_t        | 4.0    | anchor-multiple threshold                      |
+    | fl_gamma        | 0.0    | focal loss gamma                               |
+    | label_smoothing | 0.0    |                                                |
+    | nbs             | 64     | nominal batch size                             |
+    | overlap_mask    | `True` | **Segmentation**: Use mask overlapping during training |
+    | mask_ratio      | 4      | **Segmentation**: Set mask downsampling         |
+    | dropout         | `False`| **Classification**: Use dropout while training   |
+### Prediction Settings
+??? note "Prediction"
+    | Key            | Value                | Description                                        |
+    |----------------|----------------------|----------------------------------------------------|
+    | source         | `ultralytics/assets` | Input source. Accepts image, folder, video, url    |
+    | view_img       | `False`              | View the prediction images                         |
+    | save_txt       | `False`              | Save the results in a txt file                     |
+    | save_conf      | `False`              | Save the condidence scores                         |
+    | save_crop      | `Fasle`              |                                                    |
+    | hide_labels    | `False`              | Hide the labels                                    |
+    | hide_conf      | `False`              | Hide the confidence scores                         |
+    | vid_stride     | `False`              | Input video frame-rate stride                      |
+    | line_thickness | `3`                  | Bounding-box thickness (pixels)                    |
+    | visualize      | `False`              | Visualize model features                           |
+    | augment        | `False`              | Augmented inference                                |
+    | agnostic_nms   | `False`              | Class-agnostic NMS                                 |
+    | retina_masks   | `False`              | **Segmentation:** High resolution masks            |
+
+
+### Validation settings
+??? note "Validation"
+    | Key         | Value   | Description                       |
+    |-------------|---------|-----------------------------------|
+    | noval       | `False` | ???                               |
+    | save_json   | `False` |                                   |
+    | save_hybrid | `False` |                                   |
+    | conf_thres  | `0.001` | Confidence threshold              |
+    | iou_thres   | `0.6`   | IoU threshold                     |
+    | max_det     | `300`   | Maximum number of detections      |
+    | half        | `True`  | Use .half() mode.                 |
+    | dnn         | `False` | Use OpenCV DNN for ONNX inference |
+    | plots       | `False` |                                   |
+
+### Augmentation settings
+??? note "Augmentation"
+
+    | hsv_h       | 0.015 | Image HSV-Hue augmentation (fraction)           |
+    |-------------|-------|-------------------------------------------------|
+    | hsv_s       | 0.7   | Image HSV-Saturation augmentation (fraction)    |
+    | hsv_v       | 0.4   | Image HSV-Value augmentation (fraction)         |
+    | degrees     | 0.0   | Image rotation (+/- deg)                        |
+    | translate   | 0.1   | Image translation (+/- fraction)                |
+    | scale       | 0.5   | Image scale (+/- gain)                          |
+    | shear       | 0.0   | Image shear (+/- deg)                           |
+    | perspective | 0.0   | Image perspective (+/- fraction), range 0-0.001 |
+    | flipud      | 0.0   | Image flip up-down (probability)                |
+    | fliplr      | 0.5   | Image flip left-right (probability)             |
+    | mosaic      | 1.0   | Image mosaic (probability)                      |
+    | mixup       | 0.0   | Image mixup (probability)                       |
+    | copy_paste  | 0.0   | Segment copy-paste (probability)                |
+
+### Logging, checkpoints, plotting and file management
+??? note "files"
+    | Key       | Value   | Description                                                                                 |
+    |-----------|---------|---------------------------------------------------------------------------------------------|
+    | project:  | 'runs'  | The project name                                                                            |
+    | name:     | 'exp'   | The run name. `exp` gets automatically incremented if not specified, i.e, `exp`, `exp2` ... |
+    | exist_ok: | `False` | ???                                                                                         |
+    | plots     | `False` | **Validation**: Save plots while validation                                                 |
+    | nosave    | `False` | Don't save any plots, models or files                                                       |
--- a/docs/quickstart.md
+++ b/docs/quickstart.md
@ -37,6 +37,7 @@ Ultralytics YOLO comes with pythonic Model and Trainer interface.
    ```python
    import ultralytics
    from ultralytics import YOLO
+
    model = YOLO()
    model.new("s-seg.yaml") # automatically detects task type
    model.load("s-seg.pt") # load checkpoint
--- a/docs/reference/base_trainer.md
+++ b/docs/reference/base_trainer.md
@ -0,0 +1,5 @@
+All task Trainers are inherited from `BaseTrainer` class that contains the model training and optimzation routine boilerplate. You can override any function of these Trainers to suit your needs.
+
+---
+### BaseTrainer API Reference
+:::ultralytics.yolo.engine.trainer.BaseTrainer
--- a/docs/reference/model.md
+++ b/docs/reference/model.md
@ -0,0 +1 @@
+::: ultralytics.yolo.engine.model
--- a/docs/reference/ref.md
+++ b/docs/reference/ref.md
--- a/docs/sdk.md
+++ b/docs/sdk.md
@ -1,11 +1,70 @@
-# Python SDK
+## Using YOLO models
+This is the simplest way of simply using yolo models in a python environment. It can be imported from the `ultralytics` module.

-We provide 2 pythonic interfaces for YOLO models:
+!!! example "Usage"
+    === "Training"
+        ```python
+        from ultralytics import YOLO

-<b> Model Interface </b> - To simply build, load, train or run inference on a model in a python application
+        model = YOLO()
+        model.new("n.yaml") # pass any model type
+        model.train(data="coco128.yaml", epochs=5)
+        ```

-<b> Trainer Interface </b> - To customize trainier elements depending on the task. Suitable for R&D ideas like architecutres.
+    === "Training pretrained"
+        ```python
+        from ultralytics import YOLO

-______________________________________________________________________
+        model = YOLO()
+        model.load("n.pt") # pass any model type
+        model(...) # inference
+        model.train(data="coco128.yaml", epochs=5)
+        ```

-### Model Interface
+    === "Resume Training"
+        ```python
+        from ultralytics import YOLO
+
+        model = YOLO()
+        model.resume(task="detect") # resume last detection training
+        model.resume(task="detect", model="last.pt") # resume from a given model
+        ```
+
+    More functionality coming soon
+
+To know more about using `YOLO` models, refer Model class refernce
+
+[Model reference](#){ .md-button .md-button--primary}
+
+---
+### Customizing Tasks with Trainers
+`YOLO` model class is a high-level wrapper on the Trainer classes. Each YOLO task has its own trainer that inherits from `BaseTrainer`. 
+You can easily cusotmize Trainers to support custom tasks or explore R&D ideas.
+
+!!! tip "Trainer Examples"
+    === "DetectionTrainer"
+        ```python
+        from ultralytics import yolo
+
+        trainer = yolo.DetectionTrainer(data=..., epochs=1) # override default configs
+        trainer.train()
+        ```
+
+    === "SegmentationTrainer"
+        ```python
+        from ultralytics import yolo
+
+        trainer = yolo.SegmentationTrainer(data=..., epochs=1) # override default configs
+        trainer.train()
+        ```
+    === "ClassificationTrainer"
+        ```python
+        from ultralytics import yolo
+
+        trainer = yolo.ClassificationTrainer(data=..., epochs=1) # override default configs
+        trainer.train()
+        ```
+
+Learn more about Customizing `Trainers`, `Validators` and `Predictors` to suit your project needs in the Customization Section. More details about the base engine classes is available in the reference section.
+
+[Customization tutorials](#){ .md-button .md-button--primary}
--- a/docs/stylesheets/style.css
+++ b/docs/stylesheets/style.css
@ -0,0 +1,31 @@
+th, td {
+    border: 1px solid var(--md-typeset-table-color);
+    border-spacing: 0px;
+    border-bottom: none;
+    border-left: none;
+    border-top: none;
+}
+
+.md-typeset__table {
+    line-height: 1;
+}
+
+.md-typeset__table table:not([class]) {
+    font-size: .74rem;
+    border-right: none;
+}
+
+.md-typeset__table table:not([class]) td,
+.md-typeset__table table:not([class]) th {
+    padding: 15px;
+}
+
+/* light mode alternating table bg colors */
+.md-typeset__table tr:nth-child(2n) {
+    background-color: #f8f8f8;
+}
+
+/* dark mode alternating table bg colors */
+[data-md-color-scheme="slate"] .md-typeset__table tr:nth-child(2n) {
+    background-color: hsla(var(--md-hue),25%,25%,1)
+}
--- a/docs/trainer.md
+++ b/docs/trainer.md
--- a/mkdocs.yml
+++ b/mkdocs.yml
@ -41,13 +41,16 @@ theme:
    - search.suggest
    - toc.follow

+extra_css:
+  - stylesheets/style.css
+
 markdown_extensions:
  # Div text decorators
  - admonition
  - pymdownx.details
  - pymdownx.superfences
  - tables
-
+  - attr_list
  # Syntax highlight
  - pymdownx.highlight:
      anchor_linenums: true
@ -75,11 +78,18 @@ plugins:
 nav:
  - Quickstart: quickstart.md
  - CLI: cli.md
-  - Python SDK: sdk.md
-  - Trainer: trainer.md
+  - Python Interface: sdk.md
  - Configuration: conf.md
  - Tasks:
      - Detection: tasks/detection.md
      - Segmentation: tasks/segmentation.md
      - Classification: tasks/classification.md
-  - Reference: reference/ref.md
+  - Customization Tutorials:
+      - Customize Trainer: customize/train.md
+      - Customize Validator: customize/val.md
+      - Customize Predictor: customize/predict.md
+  - Reference:
+      - YOLO Models: reference/model.md
+      - Trainer :
+          - BaseTrainer: reference/base_trainer.md
+
--- a/ultralytics/yolo/engine/model.py
+++ b/ultralytics/yolo/engine/model.py
@ -1,26 +1,31 @@
-"""
-Top-level YOLO model interface. First principle usage example - https://github.com/ultralytics/ultralytics/issues/13
-"""
 import torch
 import yaml

 from ultralytics import yolo
 from ultralytics.yolo.utils import LOGGER
 from ultralytics.yolo.utils.checks import check_yaml
+from ultralytics.yolo.utils.files import yaml_load
 from ultralytics.yolo.utils.modeling import attempt_load_weights
 from ultralytics.yolo.utils.modeling.tasks import ClassificationModel, DetectionModel, SegmentationModel

 # map head: [model, trainer]
 MODEL_MAP = {
-    "classify": [ClassificationModel, 'yolo.VERSION.classify.ClassificationTrainer'],
-    "detect": [DetectionModel, 'yolo.VERSION.detect.DetectionTrainer'],
-    "segment": [SegmentationModel, 'yolo.VERSION.segment.SegmentationTrainer']}
+    "classify": [ClassificationModel, 'yolo.TYPE.classify.ClassificationTrainer'],
+    "detect": [DetectionModel, 'yolo.TYPE.detect.DetectionTrainer'],
+    "segment": [SegmentationModel, 'yolo.TYPE.segment.SegmentationTrainer']}


 class YOLO:
+    """
+    Python interface which emulates a model-like behaviour by wrapping trainers.
+    """

-    def __init__(self, version=8) -> None:
-        self.version = version
+    def __init__(self, type="v8") -> None:
+        """
+        Args:
+            type (str): Type/version of models to use
+        """
+        self.type = type
        self.ModelClass = None
        self.TrainerClass = None
        self.model = None
@ -29,20 +34,36 @@ class YOLO:
        self.ckpt = None

    def new(self, cfg: str):
+        """
+        Initializes a new model and infers the task type from the model definitions
+
+        Args:
+            cfg (str): model configuration file
+        """
        cfg = check_yaml(cfg)  # check YAML
        with open(cfg, encoding='ascii', errors='ignore') as f:
            cfg = yaml.safe_load(f)  # model dict
        self.ModelClass, self.TrainerClass, self.task = self._guess_model_trainer_and_task(cfg["head"][-1][-2])
        self.model = self.ModelClass(cfg)  # initialize

-    def load(self, weights):
+    def load(self, weights: str):
+        """
+        Initializes a new model and infers the task type from the model head
+
+        Args:
+            weights (str): model checkpoint to be loaded
+
+        """
        self.ckpt = torch.load(weights, map_location="cpu")
        self.task = self.ckpt["train_args"]["task"]
        _, trainer_class_literal = MODEL_MAP[self.task]
-        self.TrainerClass = eval(trainer_class_literal.replace("VERSION", f"v{self.version}"))
+        self.TrainerClass = eval(trainer_class_literal.replace("TYPE", f"v{self.type}"))
        self.model = attempt_load_weights(weights)

    def reset(self):
+        """
+        Resets the model modules .
+        """
        for m in self.model.modules():
            if hasattr(m, 'reset_parameters'):
                m.reset_parameters()
@ -50,32 +71,46 @@ class YOLO:
            p.requires_grad = True

    def train(self, **kwargs):
-        if 'data' not in kwargs:
-            raise Exception("data is required to train")
+        """
+        Trains the model on given dataset.
+
+        Args:
+            **kwargs (Any): Any number of arguments representing the training configuration. List of all args can be found in 'config' section.
+                            You can pass all arguments as a yaml file in `cfg`. Other args are ignored if `cfg` file is passed
+        """
        if not self.model and not self.ckpt:
            raise Exception("model not initialized. Use .new() or .load()")

-        kwargs["task"] = self.task
-        kwargs["mode"] = "train"
-        self.trainer = self.TrainerClass(overrides=kwargs)
+        overrides = kwargs
+        if kwargs.get("cfg"):
+            LOGGER.info(f"cfg file passed. Overriding default params with {kwargs['cfg']}.")
+            overrides = yaml_load(check_yaml(kwargs["cfg"]))
+        overrides["task"] = self.task
+        overrides["mode"] = "train"
+        if not overrides.get("data"):
+            raise Exception("dataset not provided! Please check if you have defined `data` in you configs")
+
+        self.trainer = self.TrainerClass(overrides=overrides)
        # load pre-trained weights if found, else use the loaded model
        self.trainer.model = self.trainer.load_model(weights=self.ckpt) if self.ckpt else self.model
        self.trainer.train()

-    def resume(self, task=None, model=None):
-        if not task:
-            raise Exception(
-                "pass the task type and/or model(optional) from which you want to resume: `model.resume(task="
-                ")`")
+    def resume(self, task, model=None):
+        """
+        Resume a training task.
+
+        Args:
+            task (str): The task type you want to resume. Automatically finds the last run to resume if `model` is not specified.
+            model (str): [Optional] The model checkpoint to resume from. If not found, the last run of the given task type is resumed.
+        """
        if task.lower() not in MODEL_MAP:
            raise Exception(f"unrecognised task - {task}. Supported tasks are {MODEL_MAP.keys()}")
        _, trainer_class_literal = MODEL_MAP[task.lower()]
-        self.TrainerClass = eval(trainer_class_literal.replace("VERSION", f"v{self.version}"))
+        self.TrainerClass = eval(trainer_class_literal.replace("TYPE", f"v{self.type}"))
        self.trainer = self.TrainerClass(overrides={"task": task.lower(), "resume": model if model else True})
        self.trainer.train()

    def _guess_model_trainer_and_task(self, head):
-        # TODO: warn
        task = None
        if head.lower() in ["classify", "classifier", "cls", "fc"]:
            task = "classify"
@ -85,7 +120,7 @@ class YOLO:
            task = "segment"
        model_class, trainer_class = MODEL_MAP[task]
        # warning: eval is unsafe. Use with caution
-        trainer_class = eval(trainer_class.replace("VERSION", f"v{self.version}"))
+        trainer_class = eval(trainer_class.replace("TYPE", f"{self.type}"))

        return model_class, trainer_class, task

--- a/ultralytics/yolo/engine/trainer.py
+++ b/ultralytics/yolo/engine/trainer.py
@ -35,8 +35,8 @@ RANK = int(os.getenv('RANK', -1))

 class BaseTrainer:

-    def __init__(self, config=DEFAULT_CONFIG, overrides={}):
-        self.args = get_config(config, overrides)
+    def __init__(self, cfg=DEFAULT_CONFIG, overrides={}):
+        self.args = get_config(cfg, overrides)
        self.check_resume()
        init_seeds(self.args.seed + 1 + RANK, deterministic=self.args.deterministic)