mirror of
https://github.com/THU-MIG/yolov10.git
synced 2025-05-24 06:14:55 +08:00
[Docs]: Link buttons, add autobackend, BaseModel and ops (#130)
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Glenn Jocher <glenn.jocher@ultralytics.com>
This commit is contained in:
parent
af6e3c536b
commit
8996c5c6cf
@ -34,11 +34,10 @@ To use pythonic interface of Ultralytics YOLO model
|
|||||||
```python
|
```python
|
||||||
from ultralytics import YOLO
|
from ultralytics import YOLO
|
||||||
|
|
||||||
model = YOLO.new("yolov8n.yaml") # create a new model from scratch
|
model = YOLO("yolov8n.yaml") # create a new model from scratch
|
||||||
model = YOLO.load(
|
model = YOLO(
|
||||||
"yolov8n.pt"
|
"yolov8n.pt"
|
||||||
) # load a pretrained model (recommended for best training results)
|
) # load a pretrained model (recommended for best training results)
|
||||||
|
|
||||||
results = model.train(data="coco128.yaml", epochs=100, imgsz=640, ...)
|
results = model.train(data="coco128.yaml", epochs=100, imgsz=640, ...)
|
||||||
results = model.val()
|
results = model.val()
|
||||||
results = model.predict(source="bus.jpg")
|
results = model.predict(source="bus.jpg")
|
||||||
|
@ -33,7 +33,7 @@ CLI requires no customization or code. You can simply run all tasks from the ter
|
|||||||
```bash
|
```bash
|
||||||
yolo task=detect mode=train model=s.yaml device=\'0,1,2,3\'
|
yolo task=detect mode=train model=s.yaml device=\'0,1,2,3\'
|
||||||
```
|
```
|
||||||
[CLI Guide](#){ .md-button .md-button--primary}
|
[CLI Guide](cli.md){ .md-button .md-button--primary}
|
||||||
|
|
||||||
## Python API
|
## Python API
|
||||||
Ultralytics YOLO comes with pythonic Model and Trainer interface.
|
Ultralytics YOLO comes with pythonic Model and Trainer interface.
|
||||||
@ -42,10 +42,9 @@ Ultralytics YOLO comes with pythonic Model and Trainer interface.
|
|||||||
import ultralytics
|
import ultralytics
|
||||||
from ultralytics import YOLO
|
from ultralytics import YOLO
|
||||||
|
|
||||||
model = YOLO()
|
model = YOLO("s-seg.yaml") # automatically detects task type
|
||||||
model.new("s-seg.yaml") # automatically detects task type
|
model = YOLO("s-seg.pt") # load checkpoint
|
||||||
model.load("s-seg.pt") # load checkpoint
|
|
||||||
model.train(data="coco128-segments", epochs=1, lr0=0.01, ...)
|
model.train(data="coco128-segments", epochs=1, lr0=0.01, ...)
|
||||||
model.train(data="coco128-segments", epochs=1, lr0=0.01, device="0,1,2,3") # DDP mode
|
model.train(data="coco128-segments", epochs=1, lr0=0.01, device="0,1,2,3") # DDP mode
|
||||||
```
|
```
|
||||||
[API Guide](#){ .md-button .md-button--primary}
|
[API Guide](sdk.md){ .md-button .md-button--primary}
|
||||||
|
15
docs/reference/nn.md
Normal file
15
docs/reference/nn.md
Normal file
@ -0,0 +1,15 @@
|
|||||||
|
# nn Module
|
||||||
|
Ultralytics nn module contains 3 main components:
|
||||||
|
|
||||||
|
1. **AutoBackend**: A module that can run inference on all popular model formats
|
||||||
|
2. **BaseModel**: `BaseModel` class defines the operations supported by tasks like Detection and Segmentation
|
||||||
|
3. **modules**: Optimized and reusable neural network blocks built on PyTorch.
|
||||||
|
|
||||||
|
## AutoBackend
|
||||||
|
:::ultralytics.nn.autobackend.AutoBackend
|
||||||
|
|
||||||
|
## BaseModel
|
||||||
|
:::ultralytics.nn.tasks.BaseModel
|
||||||
|
|
||||||
|
## Modules
|
||||||
|
TODO
|
162
docs/reference/ops.md
Normal file
162
docs/reference/ops.md
Normal file
@ -0,0 +1,162 @@
|
|||||||
|
This module contains optimized deep learning related operations used in the Ultralytics YOLO framework
|
||||||
|
## Non-max suppression
|
||||||
|
:::ultralytics.ops.non_max_suppression
|
||||||
|
handler: python
|
||||||
|
options:
|
||||||
|
show_source: false
|
||||||
|
show_root_toc_entry: false
|
||||||
|
---
|
||||||
|
## Scale boxes
|
||||||
|
:::ultralytics.ops.scale_boxes
|
||||||
|
handler: python
|
||||||
|
options:
|
||||||
|
show_source: false
|
||||||
|
show_root_toc_entry: false
|
||||||
|
---
|
||||||
|
## Scale image
|
||||||
|
:::ultralytics.ops.scale_image
|
||||||
|
handler: python
|
||||||
|
options:
|
||||||
|
show_source: false
|
||||||
|
show_root_toc_entry: false
|
||||||
|
---
|
||||||
|
## clip boxes
|
||||||
|
:::ultralytics.ops.clip_boxes
|
||||||
|
handler: python
|
||||||
|
options:
|
||||||
|
show_source: false
|
||||||
|
show_root_toc_entry: false
|
||||||
|
---
|
||||||
|
# Box Format Conversion
|
||||||
|
## xyxy2xywh
|
||||||
|
:::ultralytics.ops.xyxy2xywh
|
||||||
|
handler: python
|
||||||
|
options:
|
||||||
|
show_source: false
|
||||||
|
show_root_toc_entry: false
|
||||||
|
---
|
||||||
|
## xywh2xyxy
|
||||||
|
:::ultralytics.ops.xywh2xyxy
|
||||||
|
handler: python
|
||||||
|
options:
|
||||||
|
show_source: false
|
||||||
|
show_root_toc_entry: false
|
||||||
|
---
|
||||||
|
## xywhn2xyxy
|
||||||
|
:::ultralytics.ops.xywhn2xyxy
|
||||||
|
handler: python
|
||||||
|
options:
|
||||||
|
show_source: false
|
||||||
|
show_root_toc_entry: false
|
||||||
|
---
|
||||||
|
## xyxy2xywhn
|
||||||
|
:::ultralytics.ops.xyxy2xywhn
|
||||||
|
handler: python
|
||||||
|
options:
|
||||||
|
show_source: false
|
||||||
|
show_root_toc_entry: false
|
||||||
|
---
|
||||||
|
## xyn2xy
|
||||||
|
:::ultralytics.ops.xyn2xy
|
||||||
|
handler: python
|
||||||
|
options:
|
||||||
|
show_source: false
|
||||||
|
show_root_toc_entry: false
|
||||||
|
---
|
||||||
|
## xywh2ltwh
|
||||||
|
:::ultralytics.ops.xywh2ltwh
|
||||||
|
handler: python
|
||||||
|
options:
|
||||||
|
show_source: false
|
||||||
|
show_root_toc_entry: false
|
||||||
|
---
|
||||||
|
## xyxy2ltwh
|
||||||
|
:::ultralytics.ops.xyxy2ltwh
|
||||||
|
handler: python
|
||||||
|
options:
|
||||||
|
show_source: false
|
||||||
|
show_root_toc_entry: false
|
||||||
|
---
|
||||||
|
## ltwh2xywh
|
||||||
|
:::ultralytics.ops.ltwh2xywh
|
||||||
|
handler: python
|
||||||
|
options:
|
||||||
|
show_source: false
|
||||||
|
show_root_toc_entry: false
|
||||||
|
---
|
||||||
|
## ltwh2xyxy
|
||||||
|
:::ultralytics.ops.ltwh2xyxy
|
||||||
|
handler: python
|
||||||
|
options:
|
||||||
|
show_source: false
|
||||||
|
show_root_toc_entry: false
|
||||||
|
---
|
||||||
|
## segment2box
|
||||||
|
:::ultralytics.ops.segment2box
|
||||||
|
handler: python
|
||||||
|
options:
|
||||||
|
show_source: false
|
||||||
|
show_root_toc_entry: false
|
||||||
|
---
|
||||||
|
# Mask Operations
|
||||||
|
## resample_segments
|
||||||
|
:::ultralytics.ops.resample_segments
|
||||||
|
handler: python
|
||||||
|
options:
|
||||||
|
show_source: false
|
||||||
|
show_root_toc_entry: false
|
||||||
|
---
|
||||||
|
## crop_mask
|
||||||
|
:::ultralytics.ops.crop_mask
|
||||||
|
handler: python
|
||||||
|
options:
|
||||||
|
show_source: false
|
||||||
|
show_root_toc_entry: false
|
||||||
|
---
|
||||||
|
## process_mask_upsample
|
||||||
|
:::ultralytics.ops.process_mask_upsample
|
||||||
|
handler: python
|
||||||
|
options:
|
||||||
|
show_source: false
|
||||||
|
show_root_toc_entry: false
|
||||||
|
---
|
||||||
|
## process_mask
|
||||||
|
:::ultralytics.ops.process_mask
|
||||||
|
handler: python
|
||||||
|
options:
|
||||||
|
show_source: false
|
||||||
|
show_root_toc_entry: false
|
||||||
|
---
|
||||||
|
## process_mask_native
|
||||||
|
:::ultralytics.ops.process_mask_native
|
||||||
|
handler: python
|
||||||
|
options:
|
||||||
|
show_source: false
|
||||||
|
show_root_toc_entry: false
|
||||||
|
---
|
||||||
|
## scale_segments
|
||||||
|
:::ultralytics.ops.scale_segments
|
||||||
|
handler: python
|
||||||
|
options:
|
||||||
|
show_source: false
|
||||||
|
show_root_toc_entry: false
|
||||||
|
---
|
||||||
|
## masks2segments
|
||||||
|
:::ultralytics.ops.masks2segments
|
||||||
|
handler: python
|
||||||
|
options:
|
||||||
|
show_source: false
|
||||||
|
show_root_toc_entry: false
|
||||||
|
---
|
||||||
|
## clip_segments
|
||||||
|
:::ultralytics.ops.clip_segments
|
||||||
|
handler: python
|
||||||
|
options:
|
||||||
|
show_source: false
|
||||||
|
show_root_toc_entry: false
|
||||||
|
---
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
15
docs/sdk.md
15
docs/sdk.md
@ -6,8 +6,7 @@ This is the simplest way of simply using yolo models in a python environment. It
|
|||||||
```python
|
```python
|
||||||
from ultralytics import YOLO
|
from ultralytics import YOLO
|
||||||
|
|
||||||
model = YOLO()
|
model = YOLO("yolov8n.yaml")
|
||||||
model.new("n.yaml") # pass any model type
|
|
||||||
model(img_tensor) # Or model.forward(). inference.
|
model(img_tensor) # Or model.forward(). inference.
|
||||||
model.train(data="coco128.yaml", epochs=5)
|
model.train(data="coco128.yaml", epochs=5)
|
||||||
```
|
```
|
||||||
@ -16,10 +15,9 @@ This is the simplest way of simply using yolo models in a python environment. It
|
|||||||
```python
|
```python
|
||||||
from ultralytics import YOLO
|
from ultralytics import YOLO
|
||||||
|
|
||||||
model = YOLO()
|
model = YOLO("yolov8n.pt") # pass any model type
|
||||||
model.load("n.pt") # pass any model type
|
|
||||||
model(...) # inference
|
model(...) # inference
|
||||||
model.train(data="coco128.yaml", epochs=5)
|
model.train(epochs=5)
|
||||||
```
|
```
|
||||||
|
|
||||||
=== "Resume Training"
|
=== "Resume Training"
|
||||||
@ -35,8 +33,7 @@ This is the simplest way of simply using yolo models in a python environment. It
|
|||||||
```python
|
```python
|
||||||
from ultralytics import YOLO
|
from ultralytics import YOLO
|
||||||
|
|
||||||
model = YOLO()
|
model = YOLO("model.pt")
|
||||||
model.load("model.pt")
|
|
||||||
model.predict(source="0") # accepts all formats - img/folder/vid.*(mp4/format). 0 for webcam
|
model.predict(source="0") # accepts all formats - img/folder/vid.*(mp4/format). 0 for webcam
|
||||||
model.predict(source="folder", view_img=True) # Display preds. Accepts all yolo predict arguments
|
model.predict(source="folder", view_img=True) # Display preds. Accepts all yolo predict arguments
|
||||||
|
|
||||||
@ -48,7 +45,7 @@ This is the simplest way of simply using yolo models in a python environment. It
|
|||||||
```python
|
```python
|
||||||
from ultralytics import YOLO
|
from ultralytics import YOLO
|
||||||
|
|
||||||
model = YOLO()
|
model = YOLO("model.pt")
|
||||||
model.fuse()
|
model.fuse()
|
||||||
model.info(verbose=True) # Print model information
|
model.info(verbose=True) # Print model information
|
||||||
model.export(format=) # TODO:
|
model.export(format=) # TODO:
|
||||||
@ -61,7 +58,7 @@ This is the simplest way of simply using yolo models in a python environment. It
|
|||||||
|
|
||||||
To know more about using `YOLO` models, refer Model class refernce
|
To know more about using `YOLO` models, refer Model class refernce
|
||||||
|
|
||||||
[Model reference](#){ .md-button .md-button--primary}
|
[Model reference](reference/model.md){ .md-button .md-button--primary}
|
||||||
|
|
||||||
---
|
---
|
||||||
### Customizing Tasks with Trainers
|
### Customizing Tasks with Trainers
|
||||||
|
@ -51,6 +51,7 @@ markdown_extensions:
|
|||||||
- pymdownx.superfences
|
- pymdownx.superfences
|
||||||
- tables
|
- tables
|
||||||
- attr_list
|
- attr_list
|
||||||
|
- def_list
|
||||||
# Syntax highlight
|
# Syntax highlight
|
||||||
- pymdownx.highlight:
|
- pymdownx.highlight:
|
||||||
anchor_linenums: true
|
anchor_linenums: true
|
||||||
@ -84,14 +85,16 @@ nav:
|
|||||||
- Detection: tasks/detection.md
|
- Detection: tasks/detection.md
|
||||||
- Segmentation: tasks/segmentation.md
|
- Segmentation: tasks/segmentation.md
|
||||||
- Classification: tasks/classification.md
|
- Classification: tasks/classification.md
|
||||||
- Customization Tutorials:
|
- Advanced Tutorials:
|
||||||
- Customize Trainer: customize/train.md
|
- Customize Trainer: customize/train.md
|
||||||
- Customize Validator: customize/val.md
|
- Customize Validator: customize/val.md
|
||||||
- Customize Predictor: customize/predict.md
|
- Customize Predictor: customize/predict.md
|
||||||
- Reference:
|
- Reference:
|
||||||
- YOLO Models: reference/model.md
|
- Python Model interface: reference/model.md
|
||||||
- Engine:
|
- Engine:
|
||||||
- Trainer: reference/base_trainer.md
|
- Trainer: reference/base_trainer.md
|
||||||
- Validator: reference/base_val.md
|
- Validator: reference/base_val.md
|
||||||
- Predictor: reference/base_pred.md
|
- Predictor: reference/base_pred.md
|
||||||
- Exporter: reference/exporter.md
|
- Exporter: reference/exporter.md
|
||||||
|
- nn Module: reference/nn.md
|
||||||
|
- operations: reference/ops.md
|
||||||
|
@ -1,5 +1,6 @@
|
|||||||
__version__ = "8.0.0.dev0"
|
__version__ = "8.0.0.dev0"
|
||||||
|
|
||||||
from ultralytics.yolo.engine.model import YOLO
|
from ultralytics.yolo.engine.model import YOLO
|
||||||
|
from ultralytics.yolo.utils import ops
|
||||||
|
|
||||||
__all__ = ["__version__", "YOLO", "hub"] # allow simpler import
|
__all__ = ["__version__", "YOLO", "hub"] # allow simpler import
|
||||||
|
@ -17,22 +17,36 @@ from ultralytics.yolo.utils.ops import xywh2xyxy
|
|||||||
|
|
||||||
|
|
||||||
class AutoBackend(nn.Module):
|
class AutoBackend(nn.Module):
|
||||||
# YOLOv5 MultiBackend class for python inference on various backends
|
|
||||||
def __init__(self, weights='yolov8n.pt', device=torch.device('cpu'), dnn=False, data=None, fp16=False, fuse=True):
|
|
||||||
# Usage:
|
|
||||||
# PyTorch: weights = *.pt
|
|
||||||
# TorchScript: *.torchscript
|
|
||||||
# ONNX Runtime: *.onnx
|
|
||||||
# ONNX OpenCV DNN: *.onnx --dnn
|
|
||||||
# OpenVINO: *.xml
|
|
||||||
# CoreML: *.mlmodel
|
|
||||||
# TensorRT: *.engine
|
|
||||||
# TensorFlow SavedModel: *_saved_model
|
|
||||||
# TensorFlow GraphDef: *.pb
|
|
||||||
# TensorFlow Lite: *.tflite
|
|
||||||
# TensorFlow Edge TPU: *_edgetpu.tflite
|
|
||||||
# PaddlePaddle: *_paddle_model
|
|
||||||
|
|
||||||
|
def __init__(self, weights='yolov8n.pt', device=torch.device('cpu'), dnn=False, data=None, fp16=False, fuse=True):
|
||||||
|
"""
|
||||||
|
Ultralytics YOLO MultiBackend class for python inference on various backends
|
||||||
|
|
||||||
|
Args:
|
||||||
|
weights: the path to the weights file. Defaults to yolov8n.pt
|
||||||
|
device: The device to run the model on.
|
||||||
|
dnn: If you want to use OpenCV's DNN module to run the inference, set this to True. Defaults to
|
||||||
|
False
|
||||||
|
data: a dictionary containing the following keys:
|
||||||
|
fp16: If true, will use half precision. Defaults to False
|
||||||
|
fuse: whether to fuse the model or not. Defaults to True
|
||||||
|
|
||||||
|
Supported format and their usage:
|
||||||
|
| Platform | weights |
|
||||||
|
|-----------------------|------------------|
|
||||||
|
| PyTorch | *.pt |
|
||||||
|
| TorchScript | *.torchscript |
|
||||||
|
| ONNX Runtime | *.onnx |
|
||||||
|
| ONNX OpenCV DNN | *.onnx --dnn |
|
||||||
|
| OpenVINO | *.xml |
|
||||||
|
| CoreML | *.mlmodel |
|
||||||
|
| TensorRT | *.engine |
|
||||||
|
| TensorFlow SavedModel | *_saved_model |
|
||||||
|
| TensorFlow GraphDef | *.pb |
|
||||||
|
| TensorFlow Lite | *.tflite |
|
||||||
|
| TensorFlow Edge TPU | *_edgetpu.tflite |
|
||||||
|
| PaddlePaddle | *_paddle_model |
|
||||||
|
"""
|
||||||
super().__init__()
|
super().__init__()
|
||||||
w = str(weights[0] if isinstance(weights, list) else weights)
|
w = str(weights[0] if isinstance(weights, list) else weights)
|
||||||
nn_module = isinstance(weights, torch.nn.Module)
|
nn_module = isinstance(weights, torch.nn.Module)
|
||||||
@ -215,6 +229,15 @@ class AutoBackend(nn.Module):
|
|||||||
self.__dict__.update(locals()) # assign all variables to self
|
self.__dict__.update(locals()) # assign all variables to self
|
||||||
|
|
||||||
def forward(self, im, augment=False, visualize=False):
|
def forward(self, im, augment=False, visualize=False):
|
||||||
|
"""
|
||||||
|
Runs inference on the given model
|
||||||
|
|
||||||
|
Args:
|
||||||
|
im: the image tensor
|
||||||
|
augment: whether to augment the image. Defaults to False
|
||||||
|
visualize: if True, then the network will output the feature maps of the last convolutional layer.
|
||||||
|
Defaults to False
|
||||||
|
"""
|
||||||
# YOLOv5 MultiBackend inference
|
# YOLOv5 MultiBackend inference
|
||||||
b, ch, h, w = im.shape # batch, channel, height, width
|
b, ch, h, w = im.shape # batch, channel, height, width
|
||||||
if self.fp16 and im.dtype != torch.float16:
|
if self.fp16 and im.dtype != torch.float16:
|
||||||
@ -297,10 +320,21 @@ class AutoBackend(nn.Module):
|
|||||||
return self.from_numpy(y)
|
return self.from_numpy(y)
|
||||||
|
|
||||||
def from_numpy(self, x):
|
def from_numpy(self, x):
|
||||||
|
"""
|
||||||
|
`from_numpy` converts a numpy array to a tensor
|
||||||
|
|
||||||
|
Args:
|
||||||
|
x: the numpy array to convert
|
||||||
|
"""
|
||||||
return torch.from_numpy(x).to(self.device) if isinstance(x, np.ndarray) else x
|
return torch.from_numpy(x).to(self.device) if isinstance(x, np.ndarray) else x
|
||||||
|
|
||||||
def warmup(self, imgsz=(1, 3, 640, 640)):
|
def warmup(self, imgsz=(1, 3, 640, 640)):
|
||||||
# Warmup model by running inference once
|
"""
|
||||||
|
Warmup model by running inference once
|
||||||
|
|
||||||
|
Args:
|
||||||
|
imgsz: the size of the image you want to run inference on.
|
||||||
|
"""
|
||||||
warmup_types = self.pt, self.jit, self.onnx, self.engine, self.saved_model, self.pb, self.triton, self.nn_module
|
warmup_types = self.pt, self.jit, self.onnx, self.engine, self.saved_model, self.pb, self.triton, self.nn_module
|
||||||
if any(warmup_types) and (self.device.type != 'cpu' or self.triton):
|
if any(warmup_types) and (self.device.type != 'cpu' or self.triton):
|
||||||
im = torch.empty(*imgsz, dtype=torch.half if self.fp16 else torch.float, device=self.device) # input
|
im = torch.empty(*imgsz, dtype=torch.half if self.fp16 else torch.float, device=self.device) # input
|
||||||
@ -309,6 +343,12 @@ class AutoBackend(nn.Module):
|
|||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _model_type(p='path/to/model.pt'):
|
def _model_type(p='path/to/model.pt'):
|
||||||
|
"""
|
||||||
|
This function takes a path to a model file and returns the model type
|
||||||
|
|
||||||
|
Args:
|
||||||
|
p: path to the model file. Defaults to path/to/model.pt
|
||||||
|
"""
|
||||||
# Return model type from model path, i.e. path='path/to/model.onnx' -> type=onnx
|
# Return model type from model path, i.e. path='path/to/model.onnx' -> type=onnx
|
||||||
# types = [pt, jit, onnx, xml, engine, coreml, saved_model, pb, tflite, edgetpu, tfjs, paddle]
|
# types = [pt, jit, onnx, xml, engine, coreml, saved_model, pb, tflite, edgetpu, tfjs, paddle]
|
||||||
from ultralytics.yolo.engine.exporter import export_formats
|
from ultralytics.yolo.engine.exporter import export_formats
|
||||||
@ -323,6 +363,12 @@ class AutoBackend(nn.Module):
|
|||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _load_metadata(f=Path('path/to/meta.yaml')):
|
def _load_metadata(f=Path('path/to/meta.yaml')):
|
||||||
|
"""
|
||||||
|
> Loads the metadata from a yaml file
|
||||||
|
|
||||||
|
Args:
|
||||||
|
f: The path to the metadata file.
|
||||||
|
"""
|
||||||
from ultralytics.yolo.utils.files import yaml_load
|
from ultralytics.yolo.utils.files import yaml_load
|
||||||
|
|
||||||
# Load metadata from meta.yaml if it exists
|
# Load metadata from meta.yaml if it exists
|
||||||
|
@ -17,11 +17,36 @@ from ultralytics.yolo.utils.torch_utils import (fuse_conv_and_bn, initialize_wei
|
|||||||
|
|
||||||
|
|
||||||
class BaseModel(nn.Module):
|
class BaseModel(nn.Module):
|
||||||
# YOLOv5 base model
|
'''
|
||||||
|
The BaseModel class is a base class for all the models in the Ultralytics YOLO family.
|
||||||
|
'''
|
||||||
|
|
||||||
def forward(self, x, profile=False, visualize=False):
|
def forward(self, x, profile=False, visualize=False):
|
||||||
return self._forward_once(x, profile, visualize) # single-scale inference, train
|
"""
|
||||||
|
> `forward` is a wrapper for `_forward_once` that runs the model on a single scale
|
||||||
|
|
||||||
|
Args:
|
||||||
|
x: the input image
|
||||||
|
profile: whether to profile the model. Defaults to False
|
||||||
|
visualize: if True, will return the intermediate feature maps. Defaults to False
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
The output of the network.
|
||||||
|
"""
|
||||||
|
return self._forward_once(x, profile, visualize)
|
||||||
|
|
||||||
def _forward_once(self, x, profile=False, visualize=False):
|
def _forward_once(self, x, profile=False, visualize=False):
|
||||||
|
"""
|
||||||
|
> Forward pass of the network
|
||||||
|
|
||||||
|
Args:
|
||||||
|
x: input to the model
|
||||||
|
profile: if True, the time taken for each layer will be printed. Defaults to False
|
||||||
|
visualize: If True, it will save the feature maps of the model. Defaults to False
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
The last layer of the model.
|
||||||
|
"""
|
||||||
y, dt = [], [] # outputs
|
y, dt = [], [] # outputs
|
||||||
for m in self.model:
|
for m in self.model:
|
||||||
if m.f != -1: # if not from previous layer
|
if m.f != -1: # if not from previous layer
|
||||||
@ -36,6 +61,15 @@ class BaseModel(nn.Module):
|
|||||||
return x
|
return x
|
||||||
|
|
||||||
def _profile_one_layer(self, m, x, dt):
|
def _profile_one_layer(self, m, x, dt):
|
||||||
|
"""
|
||||||
|
It takes a model, an input, and a list of times, and it profiles the model on the input, appending
|
||||||
|
the time to the list
|
||||||
|
|
||||||
|
Args:
|
||||||
|
m: the model
|
||||||
|
x: the input image
|
||||||
|
dt: list of time taken for each layer
|
||||||
|
"""
|
||||||
c = m == self.model[-1] # is final layer, copy input as inplace fix
|
c = m == self.model[-1] # is final layer, copy input as inplace fix
|
||||||
o = thop.profile(m, inputs=(x.copy() if c else x,), verbose=False)[0] / 1E9 * 2 if thop else 0 # FLOPs
|
o = thop.profile(m, inputs=(x.copy() if c else x,), verbose=False)[0] / 1E9 * 2 if thop else 0 # FLOPs
|
||||||
t = time_sync()
|
t = time_sync()
|
||||||
@ -48,7 +82,13 @@ class BaseModel(nn.Module):
|
|||||||
if c:
|
if c:
|
||||||
LOGGER.info(f"{sum(dt):10.2f} {'-':>10s} {'-':>10s} Total")
|
LOGGER.info(f"{sum(dt):10.2f} {'-':>10s} {'-':>10s} Total")
|
||||||
|
|
||||||
def fuse(self): # fuse model Conv2d() + BatchNorm2d() layers
|
def fuse(self):
|
||||||
|
"""
|
||||||
|
> It takes a model and fuses the Conv2d() and BatchNorm2d() layers into a single layer
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
The model is being returned.
|
||||||
|
"""
|
||||||
LOGGER.info('Fusing layers... ')
|
LOGGER.info('Fusing layers... ')
|
||||||
for m in self.model.modules():
|
for m in self.model.modules():
|
||||||
if isinstance(m, (Conv, DWConv)) and hasattr(m, 'bn'):
|
if isinstance(m, (Conv, DWConv)) and hasattr(m, 'bn'):
|
||||||
@ -58,11 +98,27 @@ class BaseModel(nn.Module):
|
|||||||
self.info()
|
self.info()
|
||||||
return self
|
return self
|
||||||
|
|
||||||
def info(self, verbose=False, imgsz=640): # print model information
|
def info(self, verbose=False, imgsz=640):
|
||||||
|
"""
|
||||||
|
Prints model information
|
||||||
|
|
||||||
|
Args:
|
||||||
|
verbose: if True, prints out the model information. Defaults to False
|
||||||
|
imgsz: the size of the image that the model will be trained on. Defaults to 640
|
||||||
|
"""
|
||||||
model_info(self, verbose, imgsz)
|
model_info(self, verbose, imgsz)
|
||||||
|
|
||||||
def _apply(self, fn):
|
def _apply(self, fn):
|
||||||
# Apply to(), cpu(), cuda(), half() to model tensors that are not parameters or registered buffers
|
"""
|
||||||
|
`_apply()` is a function that applies a function to all the tensors in the model that are not
|
||||||
|
parameters or registered buffers
|
||||||
|
|
||||||
|
Args:
|
||||||
|
fn: the function to apply to the model
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
A model that is a Detect() object.
|
||||||
|
"""
|
||||||
self = super()._apply(fn)
|
self = super()._apply(fn)
|
||||||
m = self.model[-1] # Detect()
|
m = self.model[-1] # Detect()
|
||||||
if isinstance(m, (Detect, Segment)):
|
if isinstance(m, (Detect, Segment)):
|
||||||
@ -72,6 +128,12 @@ class BaseModel(nn.Module):
|
|||||||
return self
|
return self
|
||||||
|
|
||||||
def load(self, weights):
|
def load(self, weights):
|
||||||
|
"""
|
||||||
|
> This function loads the weights of the model from a file
|
||||||
|
|
||||||
|
Args:
|
||||||
|
weights: The weights to load into the model.
|
||||||
|
"""
|
||||||
# Force all tasks to implement this function
|
# Force all tasks to implement this function
|
||||||
raise NotImplementedError("This function needs to be implemented by derived classes!")
|
raise NotImplementedError("This function needs to be implemented by derived classes!")
|
||||||
|
|
||||||
|
@ -47,6 +47,17 @@ def coco80_to_coco91_class(): # converts 80-index (val2014) to 91-index (paper)
|
|||||||
|
|
||||||
|
|
||||||
def segment2box(segment, width=640, height=640):
|
def segment2box(segment, width=640, height=640):
|
||||||
|
"""
|
||||||
|
> Convert 1 segment label to 1 box label, applying inside-image constraint, i.e. (xy1, xy2, ...) to
|
||||||
|
(xyxy)
|
||||||
|
Args:
|
||||||
|
segment: the segment label
|
||||||
|
width: the width of the image. Defaults to 640
|
||||||
|
height: The height of the image. Defaults to 640
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
the minimum and maximum x and y values of the segment.
|
||||||
|
"""
|
||||||
# Convert 1 segment label to 1 box label, applying inside-image constraint, i.e. (xy1, xy2, ...) to (xyxy)
|
# Convert 1 segment label to 1 box label, applying inside-image constraint, i.e. (xy1, xy2, ...) to (xyxy)
|
||||||
x, y = segment.T # segment xy
|
x, y = segment.T # segment xy
|
||||||
inside = (x >= 0) & (y >= 0) & (x <= width) & (y <= height)
|
inside = (x >= 0) & (y >= 0) & (x <= width) & (y <= height)
|
||||||
@ -55,7 +66,18 @@ def segment2box(segment, width=640, height=640):
|
|||||||
|
|
||||||
|
|
||||||
def scale_boxes(img1_shape, boxes, img0_shape, ratio_pad=None):
|
def scale_boxes(img1_shape, boxes, img0_shape, ratio_pad=None):
|
||||||
# Rescale boxes (xyxy) from img1_shape to img0_shape
|
"""
|
||||||
|
> Rescale boxes (xyxy) from img1_shape to img0_shape
|
||||||
|
Args:
|
||||||
|
img1_shape: The shape of the image that the bounding boxes are for.
|
||||||
|
boxes: the bounding boxes of the objects in the image
|
||||||
|
img0_shape: the shape of the original image
|
||||||
|
ratio_pad: a tuple of (ratio, pad)
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
The boxes are being returned.
|
||||||
|
"""
|
||||||
|
#
|
||||||
if ratio_pad is None: # calculate from img0_shape
|
if ratio_pad is None: # calculate from img0_shape
|
||||||
gain = min(img1_shape[0] / img0_shape[0], img1_shape[1] / img0_shape[1]) # gain = old / new
|
gain = min(img1_shape[0] / img0_shape[0], img1_shape[1] / img0_shape[1]) # gain = old / new
|
||||||
pad = (img1_shape[1] - img0_shape[1] * gain) / 2, (img1_shape[0] - img0_shape[0] * gain) / 2 # wh padding
|
pad = (img1_shape[1] - img0_shape[1] * gain) / 2, (img1_shape[0] - img0_shape[0] * gain) / 2 # wh padding
|
||||||
@ -70,18 +92,6 @@ def scale_boxes(img1_shape, boxes, img0_shape, ratio_pad=None):
|
|||||||
return boxes
|
return boxes
|
||||||
|
|
||||||
|
|
||||||
def clip_boxes(boxes, shape):
|
|
||||||
# Clip boxes (xyxy) to image shape (height, width)
|
|
||||||
if isinstance(boxes, torch.Tensor): # faster individually
|
|
||||||
boxes[..., 0].clamp_(0, shape[1]) # x1
|
|
||||||
boxes[..., 1].clamp_(0, shape[0]) # y1
|
|
||||||
boxes[..., 2].clamp_(0, shape[1]) # x2
|
|
||||||
boxes[..., 3].clamp_(0, shape[0]) # y2
|
|
||||||
else: # np.array (faster grouped)
|
|
||||||
boxes[..., [0, 2]] = boxes[..., [0, 2]].clip(0, shape[1]) # x1, x2
|
|
||||||
boxes[..., [1, 3]] = boxes[..., [1, 3]].clip(0, shape[0]) # y1, y2
|
|
||||||
|
|
||||||
|
|
||||||
def make_divisible(x, divisor):
|
def make_divisible(x, divisor):
|
||||||
# Returns nearest x divisible by divisor
|
# Returns nearest x divisible by divisor
|
||||||
if isinstance(divisor, torch.Tensor):
|
if isinstance(divisor, torch.Tensor):
|
||||||
@ -101,7 +111,7 @@ def non_max_suppression(
|
|||||||
nm=0, # number of masks
|
nm=0, # number of masks
|
||||||
):
|
):
|
||||||
"""
|
"""
|
||||||
Perform non-maximum suppression (NMS) on a set of boxes, with support for masks and multiple labels per box.
|
> Perform non-maximum suppression (NMS) on a set of boxes, with support for masks and multiple labels per box.
|
||||||
|
|
||||||
Arguments:
|
Arguments:
|
||||||
prediction (torch.Tensor): A tensor of shape (batch_size, num_boxes, num_classes + 4 + num_masks)
|
prediction (torch.Tensor): A tensor of shape (batch_size, num_boxes, num_classes + 4 + num_masks)
|
||||||
@ -217,6 +227,25 @@ def non_max_suppression(
|
|||||||
return output
|
return output
|
||||||
|
|
||||||
|
|
||||||
|
def clip_boxes(boxes, shape):
|
||||||
|
"""
|
||||||
|
> It takes a list of bounding boxes and a shape (height, width) and clips the bounding boxes to the
|
||||||
|
shape
|
||||||
|
|
||||||
|
Args:
|
||||||
|
boxes: the bounding boxes to clip
|
||||||
|
shape: the shape of the image
|
||||||
|
"""
|
||||||
|
if isinstance(boxes, torch.Tensor): # faster individually
|
||||||
|
boxes[..., 0].clamp_(0, shape[1]) # x1
|
||||||
|
boxes[..., 1].clamp_(0, shape[0]) # y1
|
||||||
|
boxes[..., 2].clamp_(0, shape[1]) # x2
|
||||||
|
boxes[..., 3].clamp_(0, shape[0]) # y2
|
||||||
|
else: # np.array (faster grouped)
|
||||||
|
boxes[..., [0, 2]] = boxes[..., [0, 2]].clip(0, shape[1]) # x1, x2
|
||||||
|
boxes[..., [1, 3]] = boxes[..., [1, 3]].clip(0, shape[0]) # y1, y2
|
||||||
|
|
||||||
|
|
||||||
def clip_coords(boxes, shape):
|
def clip_coords(boxes, shape):
|
||||||
# Clip bounding xyxy bounding boxes to image shape (height, width)
|
# Clip bounding xyxy bounding boxes to image shape (height, width)
|
||||||
if isinstance(boxes, torch.Tensor): # faster individually
|
if isinstance(boxes, torch.Tensor): # faster individually
|
||||||
@ -231,9 +260,16 @@ def clip_coords(boxes, shape):
|
|||||||
|
|
||||||
def scale_image(im1_shape, masks, im0_shape, ratio_pad=None):
|
def scale_image(im1_shape, masks, im0_shape, ratio_pad=None):
|
||||||
"""
|
"""
|
||||||
img1_shape: model input shape, [h, w]
|
> It takes a mask, and resizes it to the original image size
|
||||||
img0_shape: origin pic shape, [h, w, 3]
|
|
||||||
masks: [h, w, num]
|
Args:
|
||||||
|
im1_shape: model input shape, [h, w]
|
||||||
|
masks: [h, w, num]
|
||||||
|
im0_shape: the original image shape
|
||||||
|
ratio_pad: the ratio of the padding to the original image.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
The masks are being returned.
|
||||||
"""
|
"""
|
||||||
# Rescale coordinates (xyxy) from im1_shape to im0_shape
|
# Rescale coordinates (xyxy) from im1_shape to im0_shape
|
||||||
if ratio_pad is None: # calculate from im0_shape
|
if ratio_pad is None: # calculate from im0_shape
|
||||||
@ -258,7 +294,16 @@ def scale_image(im1_shape, masks, im0_shape, ratio_pad=None):
|
|||||||
|
|
||||||
|
|
||||||
def xyxy2xywh(x):
|
def xyxy2xywh(x):
|
||||||
# Convert nx4 boxes from [x1, y1, x2, y2] to [x, y, w, h] where xy1=top-left, xy2=bottom-right
|
"""
|
||||||
|
> It takes a list of bounding boxes, and converts them from the format [x1, y1, x2, y2] to [x, y, w,
|
||||||
|
h] where xy1=top-left, xy2=bottom-right
|
||||||
|
|
||||||
|
Args:
|
||||||
|
x: the input tensor
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
the center of the box, the width and the height of the box.
|
||||||
|
"""
|
||||||
y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x)
|
y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x)
|
||||||
y[..., 0] = (x[..., 0] + x[..., 2]) / 2 # x center
|
y[..., 0] = (x[..., 0] + x[..., 2]) / 2 # x center
|
||||||
y[..., 1] = (x[..., 1] + x[..., 3]) / 2 # y center
|
y[..., 1] = (x[..., 1] + x[..., 3]) / 2 # y center
|
||||||
@ -268,7 +313,15 @@ def xyxy2xywh(x):
|
|||||||
|
|
||||||
|
|
||||||
def xywh2xyxy(x):
|
def xywh2xyxy(x):
|
||||||
# Convert nx4 boxes from [x, y, w, h] to [x1, y1, x2, y2] where xy1=top-left, xy2=bottom-right
|
"""
|
||||||
|
> It converts the bounding box from x,y,w,h to x1,y1,x2,y2 where xy1=top-left, xy2=bottom-right
|
||||||
|
|
||||||
|
Args:
|
||||||
|
x: the input tensor
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
the top left and bottom right coordinates of the bounding box.
|
||||||
|
"""
|
||||||
y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x)
|
y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x)
|
||||||
y[..., 0] = x[..., 0] - x[..., 2] / 2 # top left x
|
y[..., 0] = x[..., 0] - x[..., 2] / 2 # top left x
|
||||||
y[..., 1] = x[..., 1] - x[..., 3] / 2 # top left y
|
y[..., 1] = x[..., 1] - x[..., 3] / 2 # top left y
|
||||||
@ -278,7 +331,19 @@ def xywh2xyxy(x):
|
|||||||
|
|
||||||
|
|
||||||
def xywhn2xyxy(x, w=640, h=640, padw=0, padh=0):
|
def xywhn2xyxy(x, w=640, h=640, padw=0, padh=0):
|
||||||
# Convert nx4 boxes from [x, y, w, h] normalized to [x1, y1, x2, y2] where xy1=top-left, xy2=bottom-right
|
"""
|
||||||
|
> It converts the normalized coordinates to the actual coordinates [x1, y1, x2, y2] where xy1=top-left, xy2=bottom-right
|
||||||
|
|
||||||
|
Args:
|
||||||
|
x: the bounding box coordinates
|
||||||
|
w: width of the image. Defaults to 640
|
||||||
|
h: height of the image. Defaults to 640
|
||||||
|
padw: padding width. Defaults to 0
|
||||||
|
padh: height of the padding. Defaults to 0
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
the xyxy coordinates of the bounding box.
|
||||||
|
"""
|
||||||
y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x)
|
y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x)
|
||||||
y[..., 0] = w * (x[..., 0] - x[..., 2] / 2) + padw # top left x
|
y[..., 0] = w * (x[..., 0] - x[..., 2] / 2) + padw # top left x
|
||||||
y[..., 1] = h * (x[..., 1] - x[..., 3] / 2) + padh # top left y
|
y[..., 1] = h * (x[..., 1] - x[..., 3] / 2) + padh # top left y
|
||||||
@ -288,7 +353,20 @@ def xywhn2xyxy(x, w=640, h=640, padw=0, padh=0):
|
|||||||
|
|
||||||
|
|
||||||
def xyxy2xywhn(x, w=640, h=640, clip=False, eps=0.0):
|
def xyxy2xywhn(x, w=640, h=640, clip=False, eps=0.0):
|
||||||
# Convert nx4 boxes from [x1, y1, x2, y2] to [x, y, w, h] normalized where xy1=top-left, xy2=bottom-right
|
"""
|
||||||
|
> It takes in a list of bounding boxes, and returns a list of bounding boxes, but with the x and y
|
||||||
|
coordinates normalized to the width and height of the image
|
||||||
|
|
||||||
|
Args:
|
||||||
|
x: the bounding box coordinates
|
||||||
|
w: width of the image. Defaults to 640
|
||||||
|
h: height of the image. Defaults to 640
|
||||||
|
clip: If True, the boxes will be clipped to the image boundaries. Defaults to False
|
||||||
|
eps: the minimum value of the box's width and height.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
the xywhn format of the bounding boxes.
|
||||||
|
"""
|
||||||
if clip:
|
if clip:
|
||||||
clip_boxes(x, (h - eps, w - eps)) # warning: inplace clip
|
clip_boxes(x, (h - eps, w - eps)) # warning: inplace clip
|
||||||
y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x)
|
y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x)
|
||||||
@ -300,7 +378,19 @@ def xyxy2xywhn(x, w=640, h=640, clip=False, eps=0.0):
|
|||||||
|
|
||||||
|
|
||||||
def xyn2xy(x, w=640, h=640, padw=0, padh=0):
|
def xyn2xy(x, w=640, h=640, padw=0, padh=0):
|
||||||
# Convert normalized segments into pixel segments, shape (n,2)
|
"""
|
||||||
|
> It converts normalized segments into pixel segments of shape (n,2)
|
||||||
|
|
||||||
|
Args:
|
||||||
|
x: the normalized coordinates of the bounding box
|
||||||
|
w: width of the image. Defaults to 640
|
||||||
|
h: height of the image. Defaults to 640
|
||||||
|
padw: padding width. Defaults to 0
|
||||||
|
padh: padding height. Defaults to 0
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
the x and y coordinates of the top left corner of the bounding box.
|
||||||
|
"""
|
||||||
y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x)
|
y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x)
|
||||||
y[..., 0] = w * x[..., 0] + padw # top left x
|
y[..., 0] = w * x[..., 0] + padw # top left x
|
||||||
y[..., 1] = h * x[..., 1] + padh # top left y
|
y[..., 1] = h * x[..., 1] + padh # top left y
|
||||||
@ -308,7 +398,15 @@ def xyn2xy(x, w=640, h=640, padw=0, padh=0):
|
|||||||
|
|
||||||
|
|
||||||
def xywh2ltwh(x):
|
def xywh2ltwh(x):
|
||||||
# Convert nx4 boxes from [x, y, w, h] to [x1, y1, w, h] where xy1=top-left
|
"""
|
||||||
|
> It converts the bounding box from [x, y, w, h] to [x1, y1, w, h] where xy1=top-left
|
||||||
|
|
||||||
|
Args:
|
||||||
|
x: the x coordinate of the center of the bounding box
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
the top left x and y coordinates of the bounding box.
|
||||||
|
"""
|
||||||
y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x)
|
y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x)
|
||||||
y[:, 0] = x[:, 0] - x[:, 2] / 2 # top left x
|
y[:, 0] = x[:, 0] - x[:, 2] / 2 # top left x
|
||||||
y[:, 1] = x[:, 1] - x[:, 3] / 2 # top left y
|
y[:, 1] = x[:, 1] - x[:, 3] / 2 # top left y
|
||||||
@ -316,7 +414,15 @@ def xywh2ltwh(x):
|
|||||||
|
|
||||||
|
|
||||||
def xyxy2ltwh(x):
|
def xyxy2ltwh(x):
|
||||||
# Convert nx4 boxes from [x1, y1, x2, y2] to [x1, y1, w, h] where xy1=top-left, xy2=bottom-right
|
"""
|
||||||
|
> Convert nx4 boxes from [x1, y1, x2, y2] to [x1, y1, w, h] where xy1=top-left, xy2=bottom-right
|
||||||
|
|
||||||
|
Args:
|
||||||
|
x: the input tensor
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
the xyxy2ltwh function.
|
||||||
|
"""
|
||||||
y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x)
|
y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x)
|
||||||
y[:, 2] = x[:, 2] - x[:, 0] # width
|
y[:, 2] = x[:, 2] - x[:, 0] # width
|
||||||
y[:, 3] = x[:, 3] - x[:, 1] # height
|
y[:, 3] = x[:, 3] - x[:, 1] # height
|
||||||
@ -324,7 +430,12 @@ def xyxy2ltwh(x):
|
|||||||
|
|
||||||
|
|
||||||
def ltwh2xywh(x):
|
def ltwh2xywh(x):
|
||||||
# Convert nx4 boxes from [x1, y1, w, h] to [x, y, w, h] where xy1=top-left, xy=center
|
"""
|
||||||
|
> Convert nx4 boxes from [x1, y1, w, h] to [x, y, w, h] where xy1=top-left, xy=center
|
||||||
|
|
||||||
|
Args:
|
||||||
|
x: the input tensor
|
||||||
|
"""
|
||||||
y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x)
|
y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x)
|
||||||
y[:, 0] = x[:, 0] + x[:, 2] / 2 # center x
|
y[:, 0] = x[:, 0] + x[:, 2] / 2 # center x
|
||||||
y[:, 1] = x[:, 1] + x[:, 3] / 2 # center y
|
y[:, 1] = x[:, 1] + x[:, 3] / 2 # center y
|
||||||
@ -332,7 +443,16 @@ def ltwh2xywh(x):
|
|||||||
|
|
||||||
|
|
||||||
def ltwh2xyxy(x):
|
def ltwh2xyxy(x):
|
||||||
# Convert nx4 boxes from [x1, y1, w, h] to [x1, y1, x2, y2] where xy1=top-left, xy2=bottom-right
|
"""
|
||||||
|
> It converts the bounding box from [x1, y1, w, h] to [x1, y1, x2, y2] where xy1=top-left,
|
||||||
|
xy2=bottom-right
|
||||||
|
|
||||||
|
Args:
|
||||||
|
x: the input image
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
the xyxy coordinates of the bounding boxes.
|
||||||
|
"""
|
||||||
y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x)
|
y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x)
|
||||||
y[:, 2] = x[:, 2] + x[:, 0] # width
|
y[:, 2] = x[:, 2] + x[:, 0] # width
|
||||||
y[:, 3] = x[:, 3] + x[:, 1] # height
|
y[:, 3] = x[:, 3] + x[:, 1] # height
|
||||||
@ -340,7 +460,16 @@ def ltwh2xyxy(x):
|
|||||||
|
|
||||||
|
|
||||||
def segments2boxes(segments):
|
def segments2boxes(segments):
|
||||||
# Convert segment labels to box labels, i.e. (cls, xy1, xy2, ...) to (cls, xywh)
|
"""
|
||||||
|
> It converts segment labels to box labels, i.e. (cls, xy1, xy2, ...) to (cls, xywh)
|
||||||
|
|
||||||
|
Args:
|
||||||
|
segments: list of segments, each segment is a list of points, each point is a list of x, y
|
||||||
|
coordinates
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
the xywh coordinates of the bounding boxes.
|
||||||
|
"""
|
||||||
boxes = []
|
boxes = []
|
||||||
for s in segments:
|
for s in segments:
|
||||||
x, y = s.T # segment xy
|
x, y = s.T # segment xy
|
||||||
@ -349,7 +478,17 @@ def segments2boxes(segments):
|
|||||||
|
|
||||||
|
|
||||||
def resample_segments(segments, n=1000):
|
def resample_segments(segments, n=1000):
|
||||||
# Up-sample an (n,2) segment
|
"""
|
||||||
|
> It takes a list of segments (n,2) and returns a list of segments (n,2) where each segment has been
|
||||||
|
up-sampled to n points
|
||||||
|
|
||||||
|
Args:
|
||||||
|
segments: a list of (n,2) arrays, where n is the number of points in the segment.
|
||||||
|
n: number of points to resample the segment to. Defaults to 1000
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
the resampled segments.
|
||||||
|
"""
|
||||||
for i, s in enumerate(segments):
|
for i, s in enumerate(segments):
|
||||||
s = np.concatenate((s, s[0:1, :]), axis=0)
|
s = np.concatenate((s, s[0:1, :]), axis=0)
|
||||||
x = np.linspace(0, len(s) - 1, n)
|
x = np.linspace(0, len(s) - 1, n)
|
||||||
@ -360,13 +499,15 @@ def resample_segments(segments, n=1000):
|
|||||||
|
|
||||||
def crop_mask(masks, boxes):
|
def crop_mask(masks, boxes):
|
||||||
"""
|
"""
|
||||||
"Crop" predicted masks by zeroing out everything not in the predicted bbox.
|
> It takes a mask and a bounding box, and returns a mask that is cropped to the bounding box
|
||||||
Vectorized by Chong (thanks Chong).
|
|
||||||
Args:
|
|
||||||
- masks should be a size [h, w, n] tensor of masks
|
|
||||||
- boxes should be a size [n, 4] tensor of bbox coords in relative point form
|
|
||||||
"""
|
|
||||||
|
|
||||||
|
Args:
|
||||||
|
masks: [h, w, n] tensor of masks
|
||||||
|
boxes: [n, 4] tensor of bbox coords in relative point form
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
The masks are being cropped to the bounding box.
|
||||||
|
"""
|
||||||
n, h, w = masks.shape
|
n, h, w = masks.shape
|
||||||
x1, y1, x2, y2 = torch.chunk(boxes[:, :, None], 4, 1) # x1 shape(1,1,n)
|
x1, y1, x2, y2 = torch.chunk(boxes[:, :, None], 4, 1) # x1 shape(1,1,n)
|
||||||
r = torch.arange(w, device=masks.device, dtype=x1.dtype)[None, None, :] # rows shape(1,w,1)
|
r = torch.arange(w, device=masks.device, dtype=x1.dtype)[None, None, :] # rows shape(1,w,1)
|
||||||
@ -377,14 +518,18 @@ def crop_mask(masks, boxes):
|
|||||||
|
|
||||||
def process_mask_upsample(protos, masks_in, bboxes, shape):
|
def process_mask_upsample(protos, masks_in, bboxes, shape):
|
||||||
"""
|
"""
|
||||||
Crop after upsample.
|
> It takes the output of the mask head, and applies the mask to the bounding boxes. This produces masks of higher
|
||||||
proto_out: [mask_dim, mask_h, mask_w]
|
quality but is slower.
|
||||||
out_masks: [n, mask_dim], n is number of masks after nms
|
|
||||||
bboxes: [n, 4], n is number of masks after nms
|
|
||||||
shape:input_image_size, (h, w)
|
|
||||||
return: h, w, n
|
|
||||||
"""
|
|
||||||
|
|
||||||
|
Args:
|
||||||
|
protos: [mask_dim, mask_h, mask_w]
|
||||||
|
masks_in: [n, mask_dim], n is number of masks after nms
|
||||||
|
bboxes: [n, 4], n is number of masks after nms
|
||||||
|
shape: the size of the input image
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
mask
|
||||||
|
"""
|
||||||
c, mh, mw = protos.shape # CHW
|
c, mh, mw = protos.shape # CHW
|
||||||
masks = (masks_in @ protos.float().view(c, -1)).sigmoid().view(-1, mh, mw)
|
masks = (masks_in @ protos.float().view(c, -1)).sigmoid().view(-1, mh, mw)
|
||||||
masks = F.interpolate(masks[None], shape, mode='bilinear', align_corners=False)[0] # CHW
|
masks = F.interpolate(masks[None], shape, mode='bilinear', align_corners=False)[0] # CHW
|
||||||
@ -394,12 +539,17 @@ def process_mask_upsample(protos, masks_in, bboxes, shape):
|
|||||||
|
|
||||||
def process_mask(protos, masks_in, bboxes, shape, upsample=False):
|
def process_mask(protos, masks_in, bboxes, shape, upsample=False):
|
||||||
"""
|
"""
|
||||||
Crop before upsample.
|
> It takes the output of the mask head, and applies the mask to the bounding boxes. This is faster but produces
|
||||||
proto_out: [mask_dim, mask_h, mask_w]
|
downsampled quality of mask
|
||||||
out_masks: [n, mask_dim], n is number of masks after nms
|
|
||||||
bboxes: [n, 4], n is number of masks after nms
|
Args:
|
||||||
shape:input_image_size, (h, w)
|
protos: [mask_dim, mask_h, mask_w]
|
||||||
return: h, w, n
|
masks_in: [n, mask_dim], n is number of masks after nms
|
||||||
|
bboxes: [n, 4], n is number of masks after nms
|
||||||
|
shape: the size of the input image
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
mask
|
||||||
"""
|
"""
|
||||||
|
|
||||||
c, mh, mw = protos.shape # CHW
|
c, mh, mw = protos.shape # CHW
|
||||||
@ -420,12 +570,16 @@ def process_mask(protos, masks_in, bboxes, shape, upsample=False):
|
|||||||
|
|
||||||
def process_mask_native(protos, masks_in, bboxes, shape):
|
def process_mask_native(protos, masks_in, bboxes, shape):
|
||||||
"""
|
"""
|
||||||
Crop after upsample.
|
> It takes the output of the mask head, and crops it after upsampling to the bounding boxes.
|
||||||
protos: [mask_dim, mask_h, mask_w]
|
|
||||||
masks_in: [n, mask_dim], n is number of masks after nms
|
Args:
|
||||||
bboxes: [n, 4], n is number of masks after nms
|
protos: [mask_dim, mask_h, mask_w]
|
||||||
shape: input_image_size, (h, w)
|
masks_in: [n, mask_dim], n is number of masks after nms
|
||||||
return: h, w, n
|
bboxes: [n, 4], n is number of masks after nms
|
||||||
|
shape: input_image_size, (h, w)
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
masks: [h, w, n]
|
||||||
"""
|
"""
|
||||||
c, mh, mw = protos.shape # CHW
|
c, mh, mw = protos.shape # CHW
|
||||||
masks = (masks_in @ protos.float().view(c, -1)).sigmoid().view(-1, mh, mw)
|
masks = (masks_in @ protos.float().view(c, -1)).sigmoid().view(-1, mh, mw)
|
||||||
@ -441,7 +595,19 @@ def process_mask_native(protos, masks_in, bboxes, shape):
|
|||||||
|
|
||||||
|
|
||||||
def scale_segments(img1_shape, segments, img0_shape, ratio_pad=None, normalize=False):
|
def scale_segments(img1_shape, segments, img0_shape, ratio_pad=None, normalize=False):
|
||||||
# Rescale coords (xyxy) from img1_shape to img0_shape
|
"""
|
||||||
|
> Rescale segment coords (xyxy) from img1_shape to img0_shape
|
||||||
|
|
||||||
|
Args:
|
||||||
|
img1_shape: The shape of the image that the segments are from.
|
||||||
|
segments: the segments to be scaled
|
||||||
|
img0_shape: the shape of the image that the segmentation is being applied to
|
||||||
|
ratio_pad: the ratio of the image size to the padded image size.
|
||||||
|
normalize: If True, the coordinates will be normalized to the range [0, 1]. Defaults to False
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
the segmented image.
|
||||||
|
"""
|
||||||
if ratio_pad is None: # calculate from img0_shape
|
if ratio_pad is None: # calculate from img0_shape
|
||||||
gain = min(img1_shape[0] / img0_shape[0], img1_shape[1] / img0_shape[1]) # gain = old / new
|
gain = min(img1_shape[0] / img0_shape[0], img1_shape[1] / img0_shape[1]) # gain = old / new
|
||||||
pad = (img1_shape[1] - img0_shape[1] * gain) / 2, (img1_shape[0] - img0_shape[0] * gain) / 2 # wh padding
|
pad = (img1_shape[1] - img0_shape[1] * gain) / 2, (img1_shape[0] - img0_shape[0] * gain) / 2 # wh padding
|
||||||
@ -460,7 +626,16 @@ def scale_segments(img1_shape, segments, img0_shape, ratio_pad=None, normalize=F
|
|||||||
|
|
||||||
|
|
||||||
def masks2segments(masks, strategy='largest'):
|
def masks2segments(masks, strategy='largest'):
|
||||||
# Convert masks(n,160,160) into segments(n,xy)
|
"""
|
||||||
|
> It takes a list of masks(n,h,w) and returns a list of segments(n,xy)
|
||||||
|
|
||||||
|
Args:
|
||||||
|
masks: the output of the model, which is a tensor of shape (batch_size, 160, 160)
|
||||||
|
strategy: 'concat' or 'largest'. Defaults to largest
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
segments (List): list of segment masks
|
||||||
|
"""
|
||||||
segments = []
|
segments = []
|
||||||
for x in masks.int().cpu().numpy().astype('uint8'):
|
for x in masks.int().cpu().numpy().astype('uint8'):
|
||||||
c = cv2.findContours(x, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)[0]
|
c = cv2.findContours(x, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)[0]
|
||||||
@ -476,7 +651,14 @@ def masks2segments(masks, strategy='largest'):
|
|||||||
|
|
||||||
|
|
||||||
def clip_segments(segments, shape):
|
def clip_segments(segments, shape):
|
||||||
# Clip segments (xy1,xy2,...) to image shape (height, width)
|
"""
|
||||||
|
> It takes a list of line segments (x1,y1,x2,y2) and clips them to the image shape (height, width)
|
||||||
|
|
||||||
|
Args:
|
||||||
|
segments: a list of segments, each segment is a list of points, each point is a list of x,y
|
||||||
|
coordinates
|
||||||
|
shape: the shape of the image
|
||||||
|
"""
|
||||||
if isinstance(segments, torch.Tensor): # faster individually
|
if isinstance(segments, torch.Tensor): # faster individually
|
||||||
segments[:, 0].clamp_(0, shape[1]) # x
|
segments[:, 0].clamp_(0, shape[1]) # x
|
||||||
segments[:, 1].clamp_(0, shape[0]) # y
|
segments[:, 1].clamp_(0, shape[0]) # y
|
||||||
|
Loading…
x
Reference in New Issue
Block a user