mirror of
https://github.com/THU-MIG/yolov10.git
synced 2025-05-23 21:44:22 +08:00
ultralytics 8.0.65
YOLOv8 Pose models (#1347)
Signed-off-by: dependabot[bot] <support@github.com> Co-authored-by: Glenn Jocher <glenn.jocher@ultralytics.com> Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Mert Can Demir <validatedev@gmail.com> Co-authored-by: Laughing <61612323+Laughing-q@users.noreply.github.com> Co-authored-by: Fabian Greavu <fabiangreavu@gmail.com> Co-authored-by: Yonghye Kwon <developer.0hye@gmail.com> Co-authored-by: Eric Pedley <ericpedley@gmail.com> Co-authored-by: JustasBart <40023722+JustasBart@users.noreply.github.com> Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Co-authored-by: Aarni Koskela <akx@iki.fi> Co-authored-by: Sergio Sanchez <sergio.ssm.97@gmail.com> Co-authored-by: Bogdan Gheorghe <112427971+bogdan-galileo@users.noreply.github.com> Co-authored-by: Jaap van de Loosdrecht <jaap@vdlmv.nl> Co-authored-by: Noobtoss <96134731+Noobtoss@users.noreply.github.com> Co-authored-by: nerdyespresso <106761627+nerdyespresso@users.noreply.github.com> Co-authored-by: Farid Inawan <frdteknikelektro@gmail.com> Co-authored-by: Laughing-q <1185102784@qq.com> Co-authored-by: Alexander Duda <Alexander.Duda@me.com> Co-authored-by: Mehran Ghandehari <mehran.maps@gmail.com> Co-authored-by: Snyk bot <snyk-bot@snyk.io> Co-authored-by: majid nasiri <majnasai@gmail.com>
This commit is contained in:
parent
9af3e69b1a
commit
1cb92d7f42
95
.github/workflows/ci.yaml
vendored
95
.github/workflows/ci.yaml
vendored
@ -43,21 +43,7 @@ jobs:
|
|||||||
python --version
|
python --version
|
||||||
pip --version
|
pip --version
|
||||||
pip list
|
pip list
|
||||||
- name: Test HUB training (Python Usage 1)
|
- name: Test HUB training
|
||||||
shell: python
|
|
||||||
env:
|
|
||||||
APIKEY: ${{ secrets.ULTRALYTICS_HUB_APIKEY }}
|
|
||||||
run: |
|
|
||||||
import os
|
|
||||||
from pathlib import Path
|
|
||||||
from ultralytics import YOLO, hub
|
|
||||||
from ultralytics.yolo.utils import USER_CONFIG_DIR
|
|
||||||
Path(USER_CONFIG_DIR / 'settings.yaml').unlink()
|
|
||||||
key = os.environ['APIKEY']
|
|
||||||
hub.reset_model(key)
|
|
||||||
model = YOLO('https://hub.ultralytics.com/models/' + key)
|
|
||||||
model.train()
|
|
||||||
- name: Test HUB training (Python Usage 2)
|
|
||||||
shell: python
|
shell: python
|
||||||
env:
|
env:
|
||||||
APIKEY: ${{ secrets.ULTRALYTICS_HUB_APIKEY }}
|
APIKEY: ${{ secrets.ULTRALYTICS_HUB_APIKEY }}
|
||||||
@ -73,36 +59,6 @@ jobs:
|
|||||||
hub.login(key)
|
hub.login(key)
|
||||||
model = YOLO('https://hub.ultralytics.com/models/' + model_id)
|
model = YOLO('https://hub.ultralytics.com/models/' + model_id)
|
||||||
model.train()
|
model.train()
|
||||||
- name: Test HUB training (Python Usage 3)
|
|
||||||
shell: python
|
|
||||||
env:
|
|
||||||
APIKEY: ${{ secrets.ULTRALYTICS_HUB_APIKEY }}
|
|
||||||
run: |
|
|
||||||
import os
|
|
||||||
from pathlib import Path
|
|
||||||
from ultralytics import YOLO, hub
|
|
||||||
from ultralytics.yolo.utils import USER_CONFIG_DIR
|
|
||||||
Path(USER_CONFIG_DIR / 'settings.yaml').unlink()
|
|
||||||
key = os.environ['APIKEY']
|
|
||||||
hub.reset_model(key)
|
|
||||||
model = YOLO(key)
|
|
||||||
model.train()
|
|
||||||
- name: Test HUB training (Python Usage 4)
|
|
||||||
shell: python
|
|
||||||
env:
|
|
||||||
APIKEY: ${{ secrets.ULTRALYTICS_HUB_APIKEY }}
|
|
||||||
run: |
|
|
||||||
import os
|
|
||||||
from pathlib import Path
|
|
||||||
from ultralytics import YOLO, hub
|
|
||||||
from ultralytics.yolo.utils import USER_CONFIG_DIR
|
|
||||||
Path(USER_CONFIG_DIR / 'settings.yaml').unlink()
|
|
||||||
key = os.environ['APIKEY']
|
|
||||||
hub.reset_model(key)
|
|
||||||
key, model_id = key.split('_')
|
|
||||||
hub.login(key)
|
|
||||||
model = YOLO(model_id)
|
|
||||||
model.train()
|
|
||||||
|
|
||||||
Benchmarks:
|
Benchmarks:
|
||||||
runs-on: ${{ matrix.os }}
|
runs-on: ${{ matrix.os }}
|
||||||
@ -154,6 +110,11 @@ jobs:
|
|||||||
run: |
|
run: |
|
||||||
from ultralytics.yolo.utils.benchmarks import benchmark
|
from ultralytics.yolo.utils.benchmarks import benchmark
|
||||||
benchmark(model='${{ matrix.model }}-cls.pt', imgsz=160, half=False, hard_fail=0.61)
|
benchmark(model='${{ matrix.model }}-cls.pt', imgsz=160, half=False, hard_fail=0.61)
|
||||||
|
- name: Benchmark PoseModel
|
||||||
|
shell: python
|
||||||
|
run: |
|
||||||
|
from ultralytics.yolo.utils.benchmarks import benchmark
|
||||||
|
benchmark(model='${{ matrix.model }}-pose.pt', imgsz=160, half=False, hard_fail=0.0)
|
||||||
- name: Benchmark Summary
|
- name: Benchmark Summary
|
||||||
run: |
|
run: |
|
||||||
cat benchmarks.log
|
cat benchmarks.log
|
||||||
@ -200,30 +161,38 @@ jobs:
|
|||||||
python --version
|
python --version
|
||||||
pip --version
|
pip --version
|
||||||
pip list
|
pip list
|
||||||
- name: Test detection
|
- name: Test Detect
|
||||||
shell: bash # for Windows compatibility
|
shell: bash # for Windows compatibility
|
||||||
run: |
|
run: |
|
||||||
yolo task=detect mode=train data=coco8.yaml model=yolov8n.yaml epochs=1 imgsz=32
|
yolo detect train data=coco8.yaml model=yolov8n.yaml epochs=1 imgsz=32
|
||||||
yolo task=detect mode=train data=coco8.yaml model=yolov8n.pt epochs=1 imgsz=32
|
yolo detect train data=coco8.yaml model=yolov8n.pt epochs=1 imgsz=32
|
||||||
yolo task=detect mode=val data=coco8.yaml model=runs/detect/train/weights/last.pt imgsz=32
|
yolo detect val data=coco8.yaml model=runs/detect/train/weights/last.pt imgsz=32
|
||||||
yolo task=detect mode=predict model=runs/detect/train/weights/last.pt imgsz=32 source=ultralytics/assets/bus.jpg
|
yolo detect predict model=runs/detect/train/weights/last.pt imgsz=32 source=ultralytics/assets/bus.jpg
|
||||||
yolo mode=export model=runs/detect/train/weights/last.pt imgsz=32 format=torchscript
|
yolo export model=runs/detect/train/weights/last.pt imgsz=32 format=torchscript
|
||||||
- name: Test segmentation
|
- name: Test Segment
|
||||||
shell: bash # for Windows compatibility
|
shell: bash # for Windows compatibility
|
||||||
run: |
|
run: |
|
||||||
yolo task=segment mode=train data=coco8-seg.yaml model=yolov8n-seg.yaml epochs=1 imgsz=32
|
yolo segment train data=coco8-seg.yaml model=yolov8n-seg.yaml epochs=1 imgsz=32
|
||||||
yolo task=segment mode=train data=coco8-seg.yaml model=yolov8n-seg.pt epochs=1 imgsz=32
|
yolo segment train data=coco8-seg.yaml model=yolov8n-seg.pt epochs=1 imgsz=32
|
||||||
yolo task=segment mode=val data=coco8-seg.yaml model=runs/segment/train/weights/last.pt imgsz=32
|
yolo segment val data=coco8-seg.yaml model=runs/segment/train/weights/last.pt imgsz=32
|
||||||
yolo task=segment mode=predict model=runs/segment/train/weights/last.pt imgsz=32 source=ultralytics/assets/bus.jpg
|
yolo segment predict model=runs/segment/train/weights/last.pt imgsz=32 source=ultralytics/assets/bus.jpg
|
||||||
yolo mode=export model=runs/segment/train/weights/last.pt imgsz=32 format=torchscript
|
yolo export model=runs/segment/train/weights/last.pt imgsz=32 format=torchscript
|
||||||
- name: Test classification
|
- name: Test Classify
|
||||||
shell: bash # for Windows compatibility
|
shell: bash # for Windows compatibility
|
||||||
run: |
|
run: |
|
||||||
yolo task=classify mode=train data=imagenet10 model=yolov8n-cls.yaml epochs=1 imgsz=32
|
yolo classify train data=imagenet10 model=yolov8n-cls.yaml epochs=1 imgsz=32
|
||||||
yolo task=classify mode=train data=imagenet10 model=yolov8n-cls.pt epochs=1 imgsz=32
|
yolo classify train data=imagenet10 model=yolov8n-cls.pt epochs=1 imgsz=32
|
||||||
yolo task=classify mode=val data=imagenet10 model=runs/classify/train/weights/last.pt imgsz=32
|
yolo classify val data=imagenet10 model=runs/classify/train/weights/last.pt imgsz=32
|
||||||
yolo task=classify mode=predict model=runs/classify/train/weights/last.pt imgsz=32 source=ultralytics/assets/bus.jpg
|
yolo classify predict model=runs/classify/train/weights/last.pt imgsz=32 source=ultralytics/assets/bus.jpg
|
||||||
yolo mode=export model=runs/classify/train/weights/last.pt imgsz=32 format=torchscript
|
yolo export model=runs/classify/train/weights/last.pt imgsz=32 format=torchscript
|
||||||
|
- name: Test Pose
|
||||||
|
shell: bash # for Windows compatibility
|
||||||
|
run: |
|
||||||
|
yolo pose train data=coco8-pose.yaml model=yolov8n-pose.yaml epochs=1 imgsz=32
|
||||||
|
yolo pose train data=coco8-pose.yaml model=yolov8n-pose.pt epochs=1 imgsz=32
|
||||||
|
yolo pose val data=coco8-pose.yaml model=runs/pose/train/weights/last.pt imgsz=32
|
||||||
|
yolo pose predict model=runs/pose/train/weights/last.pt imgsz=32 source=ultralytics/assets/bus.jpg
|
||||||
|
yolo export model=runs/pose/train/weights/last.pt imgsz=32 format=torchscript
|
||||||
- name: Pytest tests
|
- name: Pytest tests
|
||||||
shell: bash # for Windows compatibility
|
shell: bash # for Windows compatibility
|
||||||
run: pytest tests
|
run: pytest tests
|
||||||
|
27
README.md
27
README.md
@ -109,7 +109,10 @@ YOLOv8 [Python Docs](https://docs.ultralytics.com/usage/python) for more example
|
|||||||
|
|
||||||
## <div align="center">Models</div>
|
## <div align="center">Models</div>
|
||||||
|
|
||||||
All YOLOv8 pretrained models are available here. Detect, Segment and Pose models are pretrained on the [COCO](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/datasets/coco.yaml) dataset, while Classify models are pretrained on the [ImageNet](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/datasets/ImageNet.yaml) dataset.
|
All YOLOv8 pretrained models are available here. Detect, Segment and Pose models are pretrained on
|
||||||
|
the [COCO](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/datasets/coco.yaml) dataset, while Classify
|
||||||
|
models are pretrained on
|
||||||
|
the [ImageNet](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/datasets/ImageNet.yaml) dataset.
|
||||||
|
|
||||||
[Models](https://github.com/ultralytics/ultralytics/tree/main/ultralytics/models) download automatically from the latest
|
[Models](https://github.com/ultralytics/ultralytics/tree/main/ultralytics/models) download automatically from the latest
|
||||||
Ultralytics [release](https://github.com/ultralytics/assets/releases) on first use.
|
Ultralytics [release](https://github.com/ultralytics/assets/releases) on first use.
|
||||||
@ -174,6 +177,28 @@ See [Classification Docs](https://docs.ultralytics.com/tasks/classify/) for usag
|
|||||||
|
|
||||||
</details>
|
</details>
|
||||||
|
|
||||||
|
<details><summary>Pose</summary>
|
||||||
|
|
||||||
|
See [Pose Docs](https://docs.ultralytics.com/tasks/) for usage examples with these models.
|
||||||
|
|
||||||
|
| Model | size<br><sup>(pixels) | mAP<sup>box<br>50-95 | mAP<sup>pose<br>50-95 | Speed<br><sup>CPU ONNX<br>(ms) | Speed<br><sup>A100 TensorRT<br>(ms) | params<br><sup>(M) | FLOPs<br><sup>(B) |
|
||||||
|
| ---------------------------------------------------------------------------------------------------- | --------------------- | -------------------- | --------------------- | ------------------------------ | ----------------------------------- | ------------------ | ----------------- |
|
||||||
|
| [YOLOv8n-pose](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8n-pose.pt) | 640 | - | 49.7 | - | - | 3.3 | 9.2 |
|
||||||
|
| [YOLOv8s-pose](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8s-pose.pt) | 640 | - | 59.2 | - | - | 11.6 | 30.2 |
|
||||||
|
| [YOLOv8m-pose](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8m-pose.pt) | 640 | - | 63.6 | - | - | 26.4 | 81.0 |
|
||||||
|
| [YOLOv8l-pose](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8l-pose.pt) | 640 | - | 67.0 | - | - | 44.4 | 168.6 |
|
||||||
|
| [YOLOv8x-pose](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8x-pose.pt) | 640 | - | 68.9 | - | - | 69.4 | 263.2 |
|
||||||
|
| [YOLOv8x-pose-p6](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8x-pose-p6.pt) | 1280 | - | 71.5 | - | - | 99.1 | 1066.4 |
|
||||||
|
|
||||||
|
- **mAP<sup>val</sup>** values are for single-model single-scale on [COCO Keypoints val2017](http://cocodataset.org)
|
||||||
|
dataset.
|
||||||
|
<br>Reproduce by `yolo val pose data=coco-pose.yaml device=0`
|
||||||
|
- **Speed** averaged over COCO val images using an [Amazon EC2 P4d](https://aws.amazon.com/ec2/instance-types/p4/)
|
||||||
|
instance.
|
||||||
|
<br>Reproduce by `yolo val pose data=coco8-pose.yaml batch=1 device=0|cpu`
|
||||||
|
|
||||||
|
</details>
|
||||||
|
|
||||||
## <div align="center">Integrations</div>
|
## <div align="center">Integrations</div>
|
||||||
|
|
||||||
<br>
|
<br>
|
||||||
|
@ -163,6 +163,28 @@ Ultralytics [发布页](https://github.com/ultralytics/ultralytics/releases) 自
|
|||||||
|
|
||||||
</details>
|
</details>
|
||||||
|
|
||||||
|
<details><summary>Pose</summary>
|
||||||
|
|
||||||
|
See [Pose Docs](https://docs.ultralytics.com/tasks/) for usage examples with these models.
|
||||||
|
|
||||||
|
| Model | size<br><sup>(pixels) | mAP<sup>box<br>50-95 | mAP<sup>pose<br>50-95 | Speed<br><sup>CPU ONNX<br>(ms) | Speed<br><sup>A100 TensorRT<br>(ms) | params<br><sup>(M) | FLOPs<br><sup>(B) |
|
||||||
|
| ---------------------------------------------------------------------------------------------------- | --------------------- | -------------------- | --------------------- | ------------------------------ | ----------------------------------- | ------------------ | ----------------- |
|
||||||
|
| [YOLOv8n-pose](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8n-pose.pt) | 640 | - | 49.7 | - | - | 3.3 | 9.2 |
|
||||||
|
| [YOLOv8s-pose](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8s-pose.pt) | 640 | - | 59.2 | - | - | 11.6 | 30.2 |
|
||||||
|
| [YOLOv8m-pose](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8m-pose.pt) | 640 | - | 63.6 | - | - | 26.4 | 81.0 |
|
||||||
|
| [YOLOv8l-pose](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8l-pose.pt) | 640 | - | 67.0 | - | - | 44.4 | 168.6 |
|
||||||
|
| [YOLOv8x-pose](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8x-pose.pt) | 640 | - | 68.9 | - | - | 69.4 | 263.2 |
|
||||||
|
| [YOLOv8x-pose-p6](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8x-pose-p6.pt) | 1280 | - | 71.5 | - | - | 99.1 | 1066.4 |
|
||||||
|
|
||||||
|
- **mAP<sup>val</sup>** values are for single-model single-scale on [COCO Keypoints val2017](http://cocodataset.org)
|
||||||
|
dataset.
|
||||||
|
<br>Reproduce by `yolo val pose data=coco-pose.yaml device=0`
|
||||||
|
- **Speed** averaged over COCO val images using an [Amazon EC2 P4d](https://aws.amazon.com/ec2/instance-types/p4/)
|
||||||
|
instance.
|
||||||
|
<br>Reproduce by `yolo val pose data=coco8-pose.yaml batch=1 device=0|cpu`
|
||||||
|
|
||||||
|
</details>
|
||||||
|
|
||||||
## <div align="center">模块集成</div>
|
## <div align="center">模块集成</div>
|
||||||
|
|
||||||
<br>
|
<br>
|
||||||
|
@ -2,7 +2,7 @@
|
|||||||
# Builds ultralytics/ultralytics:latest image on DockerHub https://hub.docker.com/r/ultralytics/ultralytics
|
# Builds ultralytics/ultralytics:latest image on DockerHub https://hub.docker.com/r/ultralytics/ultralytics
|
||||||
# Image is CUDA-optimized for YOLOv8 single/multi-GPU training and inference
|
# Image is CUDA-optimized for YOLOv8 single/multi-GPU training and inference
|
||||||
|
|
||||||
# Start FROM PyTorch image https://hub.docker.com/r/pytorch/pytorch
|
# Start FROM PyTorch image https://hub.docker.com/r/pytorch/pytorch or nvcr.io/nvidia/pytorch:23.03-py3
|
||||||
FROM pytorch/pytorch:2.0.0-cuda11.7-cudnn8-runtime
|
FROM pytorch/pytorch:2.0.0-cuda11.7-cudnn8-runtime
|
||||||
|
|
||||||
# Downloads to user config dir
|
# Downloads to user config dir
|
||||||
|
@ -42,4 +42,4 @@ Since its launch YOLO has been employed in various applications, including auton
|
|||||||
|
|
||||||
YOLOv8 is designed with a strong focus on speed, size, and accuracy, making it a compelling choice for various vision AI tasks. It outperforms previous versions by incorporating innovations like a new backbone network, a new anchor-free split head, and new loss functions. These improvements enable YOLOv8 to deliver superior results, while maintaining a compact size and exceptional speed.
|
YOLOv8 is designed with a strong focus on speed, size, and accuracy, making it a compelling choice for various vision AI tasks. It outperforms previous versions by incorporating innovations like a new backbone network, a new anchor-free split head, and new loss functions. These improvements enable YOLOv8 to deliver superior results, while maintaining a compact size and exceptional speed.
|
||||||
|
|
||||||
Additionally, YOLOv8 supports a full range of vision AI tasks, including [detection](tasks/detect.md), [segmentation](tasks/segment.md), [pose estimation](tasks/keypoints.md), [tracking](modes/track.md), and [classification](tasks/classify.md). This versatility allows users to leverage YOLOv8's capabilities across diverse applications and domains.
|
Additionally, YOLOv8 supports a full range of vision AI tasks, including [detection](tasks/detect.md), [segmentation](tasks/segment.md), [pose estimation](tasks/pose.md), [tracking](modes/track.md), and [classification](tasks/classify.md). This versatility allows users to leverage YOLOv8's capabilities across diverse applications and domains.
|
||||||
|
@ -1,7 +1,7 @@
|
|||||||
<img width="1024" src="https://github.com/ultralytics/assets/raw/main/yolov8/banner-integrations.png">
|
<img width="1024" src="https://github.com/ultralytics/assets/raw/main/yolov8/banner-integrations.png">
|
||||||
|
|
||||||
**Benchmark mode** is used to profile the speed and accuracy of various export formats for YOLOv8. The benchmarks
|
**Benchmark mode** is used to profile the speed and accuracy of various export formats for YOLOv8. The benchmarks
|
||||||
provide information on the size of the exported format, its `mAP50-95` metrics (for object detection and segmentation)
|
provide information on the size of the exported format, its `mAP50-95` metrics (for object detection, segmentation and pose)
|
||||||
or `accuracy_top5` metrics (for classification), and the inference time in milliseconds per image across various export
|
or `accuracy_top5` metrics (for classification), and the inference time in milliseconds per image across various export
|
||||||
formats like ONNX, OpenVINO, TensorRT and others. This information can help users choose the optimal export format for
|
formats like ONNX, OpenVINO, TensorRT and others. This information can help users choose the optimal export format for
|
||||||
their specific use case based on their requirements for speed and accuracy.
|
their specific use case based on their requirements for speed and accuracy.
|
||||||
|
@ -54,7 +54,7 @@ for applications such as surveillance systems or self-driving cars.
|
|||||||
## [Benchmark](benchmark.md)
|
## [Benchmark](benchmark.md)
|
||||||
|
|
||||||
Benchmark mode is used to profile the speed and accuracy of various export formats for YOLOv8. The benchmarks provide
|
Benchmark mode is used to profile the speed and accuracy of various export formats for YOLOv8. The benchmarks provide
|
||||||
information on the size of the exported format, its `mAP50-95` metrics (for object detection and segmentation)
|
information on the size of the exported format, its `mAP50-95` metrics (for object detection, segmentation and pose)
|
||||||
or `accuracy_top5` metrics (for classification), and the inference time in milliseconds per image across various export
|
or `accuracy_top5` metrics (for classification), and the inference time in milliseconds per image across various export
|
||||||
formats like ONNX, OpenVINO, TensorRT and others. This information can help users choose the optimal export format for
|
formats like ONNX, OpenVINO, TensorRT and others. This information can help users choose the optimal export format for
|
||||||
their specific use case based on their requirements for speed and accuracy.
|
their specific use case based on their requirements for speed and accuracy.
|
||||||
|
@ -88,6 +88,8 @@ task.
|
|||||||
| `box` | `7.5` | box loss gain |
|
| `box` | `7.5` | box loss gain |
|
||||||
| `cls` | `0.5` | cls loss gain (scale with pixels) |
|
| `cls` | `0.5` | cls loss gain (scale with pixels) |
|
||||||
| `dfl` | `1.5` | dfl loss gain |
|
| `dfl` | `1.5` | dfl loss gain |
|
||||||
|
| `pose` | `12.0` | pose loss gain (pose-only) |
|
||||||
|
| `kobj` | `2.0` | keypoint obj loss gain (pose-only) |
|
||||||
| `fl_gamma` | `0.0` | focal loss gamma (efficientDet default gamma=1.5) |
|
| `fl_gamma` | `0.0` | focal loss gamma (efficientDet default gamma=1.5) |
|
||||||
| `label_smoothing` | `0.0` | label smoothing (fraction) |
|
| `label_smoothing` | `0.0` | label smoothing (fraction) |
|
||||||
| `nbs` | `64` | nominal batch size |
|
| `nbs` | `64` | nominal batch size |
|
||||||
|
@ -175,9 +175,9 @@ show_source: false
|
|||||||
show_root_toc_entry: false
|
show_root_toc_entry: false
|
||||||
---
|
---
|
||||||
|
|
||||||
## scale_segments
|
## scale_coords
|
||||||
|
|
||||||
:::ultralytics.yolo.utils.ops.scale_segments
|
:::ultralytics.yolo.utils.ops.scale_coords
|
||||||
handler: python
|
handler: python
|
||||||
options:
|
options:
|
||||||
show_source: false
|
show_source: false
|
||||||
@ -193,9 +193,9 @@ show_source: false
|
|||||||
show_root_toc_entry: false
|
show_root_toc_entry: false
|
||||||
---
|
---
|
||||||
|
|
||||||
## clip_segments
|
## clip_coords
|
||||||
|
|
||||||
:::ultralytics.yolo.utils.ops.clip_segments
|
:::ultralytics.yolo.utils.ops.clip_coords
|
||||||
handler: python
|
handler: python
|
||||||
options:
|
options:
|
||||||
show_source: false
|
show_source: false
|
||||||
|
@ -122,7 +122,7 @@ Use a trained YOLOv8n-cls model to run predictions on images.
|
|||||||
yolo classify predict model=path/to/best.pt source='https://ultralytics.com/images/bus.jpg' # predict with custom model
|
yolo classify predict model=path/to/best.pt source='https://ultralytics.com/images/bus.jpg' # predict with custom model
|
||||||
```
|
```
|
||||||
|
|
||||||
Read more details of `predict` in our [Predict](https://docs.ultralytics.com/modes/predict/) page.
|
See full `predict` mode details in the [Predict](https://docs.ultralytics.com/modes/predict/) page.
|
||||||
|
|
||||||
## Export
|
## Export
|
||||||
|
|
||||||
@ -150,7 +150,7 @@ Export a YOLOv8n-cls model to a different format like ONNX, CoreML, etc.
|
|||||||
```
|
```
|
||||||
|
|
||||||
Available YOLOv8-cls export formats are in the table below. You can predict or validate directly on exported models,
|
Available YOLOv8-cls export formats are in the table below. You can predict or validate directly on exported models,
|
||||||
i.e. `yolo predict model=yolov8n-cls.onnx`.
|
i.e. `yolo predict model=yolov8n-cls.onnx`. Usage examples are shown for your model after export completes.
|
||||||
|
|
||||||
| Format | `format` Argument | Model | Metadata |
|
| Format | `format` Argument | Model | Metadata |
|
||||||
|--------------------------------------------------------------------|-------------------|-------------------------------|----------|
|
|--------------------------------------------------------------------|-------------------|-------------------------------|----------|
|
||||||
@ -167,3 +167,4 @@ i.e. `yolo predict model=yolov8n-cls.onnx`.
|
|||||||
| [TF.js](https://www.tensorflow.org/js) | `tfjs` | `yolov8n-cls_web_model/` | ✅ |
|
| [TF.js](https://www.tensorflow.org/js) | `tfjs` | `yolov8n-cls_web_model/` | ✅ |
|
||||||
| [PaddlePaddle](https://github.com/PaddlePaddle) | `paddle` | `yolov8n-cls_paddle_model/` | ✅ |
|
| [PaddlePaddle](https://github.com/PaddlePaddle) | `paddle` | `yolov8n-cls_paddle_model/` | ✅ |
|
||||||
|
|
||||||
|
See full `export` details in the [Export](https://docs.ultralytics.com/modes/export/) page.
|
||||||
|
@ -123,7 +123,7 @@ Use a trained YOLOv8n model to run predictions on images.
|
|||||||
yolo detect predict model=path/to/best.pt source='https://ultralytics.com/images/bus.jpg' # predict with custom model
|
yolo detect predict model=path/to/best.pt source='https://ultralytics.com/images/bus.jpg' # predict with custom model
|
||||||
```
|
```
|
||||||
|
|
||||||
Read more details of `predict` in our [Predict](https://docs.ultralytics.com/modes/predict/) page.
|
See full `predict` mode details in the [Predict](https://docs.ultralytics.com/modes/predict/) page.
|
||||||
|
|
||||||
## Export
|
## Export
|
||||||
|
|
||||||
@ -151,7 +151,7 @@ Export a YOLOv8n model to a different format like ONNX, CoreML, etc.
|
|||||||
```
|
```
|
||||||
|
|
||||||
Available YOLOv8 export formats are in the table below. You can predict or validate directly on exported models,
|
Available YOLOv8 export formats are in the table below. You can predict or validate directly on exported models,
|
||||||
i.e. `yolo predict model=yolov8n.onnx`.
|
i.e. `yolo predict model=yolov8n.onnx`. Usage examples are shown for your model after export completes.
|
||||||
|
|
||||||
| Format | `format` Argument | Model | Metadata |
|
| Format | `format` Argument | Model | Metadata |
|
||||||
|--------------------------------------------------------------------|-------------------|---------------------------|----------|
|
|--------------------------------------------------------------------|-------------------|---------------------------|----------|
|
||||||
@ -167,3 +167,5 @@ i.e. `yolo predict model=yolov8n.onnx`.
|
|||||||
| [TF Edge TPU](https://coral.ai/docs/edgetpu/models-intro/) | `edgetpu` | `yolov8n_edgetpu.tflite` | ✅ |
|
| [TF Edge TPU](https://coral.ai/docs/edgetpu/models-intro/) | `edgetpu` | `yolov8n_edgetpu.tflite` | ✅ |
|
||||||
| [TF.js](https://www.tensorflow.org/js) | `tfjs` | `yolov8n_web_model/` | ✅ |
|
| [TF.js](https://www.tensorflow.org/js) | `tfjs` | `yolov8n_web_model/` | ✅ |
|
||||||
| [PaddlePaddle](https://github.com/PaddlePaddle) | `paddle` | `yolov8n_paddle_model/` | ✅ |
|
| [PaddlePaddle](https://github.com/PaddlePaddle) | `paddle` | `yolov8n_paddle_model/` | ✅ |
|
||||||
|
|
||||||
|
See full `export` details in the [Export](https://docs.ultralytics.com/modes/export/) page.
|
||||||
|
@ -2,7 +2,7 @@
|
|||||||
|
|
||||||
YOLOv8 is an AI framework that supports multiple computer vision **tasks**. The framework can be used to
|
YOLOv8 is an AI framework that supports multiple computer vision **tasks**. The framework can be used to
|
||||||
perform [detection](detect.md), [segmentation](segment.md), [classification](classify.md),
|
perform [detection](detect.md), [segmentation](segment.md), [classification](classify.md),
|
||||||
and [keypoints](keypoints.md) detection. Each of these tasks has a different objective and use case.
|
and [pose](pose.md) estimation. Each of these tasks has a different objective and use case.
|
||||||
|
|
||||||
<img width="1024" src="https://user-images.githubusercontent.com/26833433/212094133-6bb8c21c-3d47-41df-a512-81c5931054ae.png">
|
<img width="1024" src="https://user-images.githubusercontent.com/26833433/212094133-6bb8c21c-3d47-41df-a512-81c5931054ae.png">
|
||||||
|
|
||||||
@ -29,15 +29,13 @@ images based on their content. It uses a variant of the EfficientNet architectur
|
|||||||
|
|
||||||
[Classification Examples](classify.md){ .md-button .md-button--primary}
|
[Classification Examples](classify.md){ .md-button .md-button--primary}
|
||||||
|
|
||||||
<!--
|
## [Pose](pose.md)
|
||||||
## [Keypoints](keypoints.md)
|
|
||||||
|
|
||||||
Keypoints detection is a task that involves detecting specific points in an image or video frame. These points are
|
Pose/keypoint detection is a task that involves detecting specific points in an image or video frame. These points are
|
||||||
referred to as keypoints and are used to track movement or pose estimation. YOLOv8 can detect keypoints in an image or
|
referred to as keypoints and are used to track movement or pose estimation. YOLOv8 can detect keypoints in an image or
|
||||||
video frame with high accuracy and speed.
|
video frame with high accuracy and speed.
|
||||||
|
|
||||||
[Keypoints Examples](keypoints.md){ .md-button .md-button--primary}
|
[Pose Examples](pose.md){ .md-button .md-button--primary}
|
||||||
-->
|
|
||||||
|
|
||||||
## Conclusion
|
## Conclusion
|
||||||
|
|
||||||
|
@ -1,149 +0,0 @@
|
|||||||
Key Point Estimation is a task that involves identifying the location of specific points in an image, usually referred
|
|
||||||
to as keypoints. The keypoints can represent various parts of the object such as joints, landmarks, or other distinctive
|
|
||||||
features. The locations of the keypoints are usually represented as a set of 2D `[x, y]` or 3D `[x, y, visible]`
|
|
||||||
coordinates.
|
|
||||||
|
|
||||||
<img width="1024" src="https://user-images.githubusercontent.com/26833433/212094133-6bb8c21c-3d47-41df-a512-81c5931054ae.png">
|
|
||||||
|
|
||||||
The output of a keypoint detector is a set of points that represent the keypoints on the object in the image, usually
|
|
||||||
along with the confidence scores for each point. Keypoint estimation is a good choice when you need to identify specific
|
|
||||||
parts of an object in a scene, and their location in relation to each other.
|
|
||||||
|
|
||||||
!!! tip "Tip"
|
|
||||||
|
|
||||||
YOLOv8 _keypoints_ models use the `-kpts` suffix, i.e. `yolov8n-kpts.pt`. These models are trained on the COCO dataset and are suitable for a variety of keypoint estimation tasks.
|
|
||||||
|
|
||||||
[Models](https://github.com/ultralytics/ultralytics/tree/main/ultralytics/models/v8){ .md-button .md-button--primary}
|
|
||||||
|
|
||||||
## Train TODO
|
|
||||||
|
|
||||||
Train an OpenPose model on a custom dataset of keypoints using the OpenPose framework. For more information on how to
|
|
||||||
train an OpenPose model on a custom dataset, see the OpenPose Training page.
|
|
||||||
|
|
||||||
!!! example ""
|
|
||||||
|
|
||||||
=== "Python"
|
|
||||||
|
|
||||||
```python
|
|
||||||
from ultralytics import YOLO
|
|
||||||
|
|
||||||
# Load a model
|
|
||||||
model = YOLO('yolov8n.yaml') # build a new model from YAML
|
|
||||||
model = YOLO('yolov8n.pt') # load a pretrained model (recommended for training)
|
|
||||||
model = YOLO('yolov8n.yaml').load('yolov8n.pt') # build from YAML and transfer weights
|
|
||||||
|
|
||||||
# Train the model
|
|
||||||
model.train(data='coco128.yaml', epochs=100, imgsz=640)
|
|
||||||
```
|
|
||||||
=== "CLI"
|
|
||||||
|
|
||||||
```bash
|
|
||||||
# Build a new model from YAML and start training from scratch
|
|
||||||
yolo detect train data=coco128.yaml model=yolov8n.yaml epochs=100 imgsz=640
|
|
||||||
|
|
||||||
# Start training from a pretrained *.pt model
|
|
||||||
yolo detect train data=coco128.yaml model=yolov8n.pt epochs=100 imgsz=640
|
|
||||||
|
|
||||||
# Build a new model from YAML, transfer pretrained weights to it and start training
|
|
||||||
yolo detect train data=coco128.yaml model=yolov8n.yaml pretrained=yolov8n.pt epochs=100 imgsz=640
|
|
||||||
```
|
|
||||||
|
|
||||||
## Val TODO
|
|
||||||
|
|
||||||
Validate trained YOLOv8n model accuracy on the COCO128 dataset. No argument need to passed as the `model` retains it's
|
|
||||||
training `data` and arguments as model attributes.
|
|
||||||
|
|
||||||
!!! example ""
|
|
||||||
|
|
||||||
=== "Python"
|
|
||||||
|
|
||||||
```python
|
|
||||||
from ultralytics import YOLO
|
|
||||||
|
|
||||||
# Load a model
|
|
||||||
model = YOLO('yolov8n.pt') # load an official model
|
|
||||||
model = YOLO('path/to/best.pt') # load a custom model
|
|
||||||
|
|
||||||
# Validate the model
|
|
||||||
metrics = model.val() # no arguments needed, dataset and settings remembered
|
|
||||||
metrics.box.map # map50-95
|
|
||||||
metrics.box.map50 # map50
|
|
||||||
metrics.box.map75 # map75
|
|
||||||
metrics.box.maps # a list contains map50-95 of each category
|
|
||||||
```
|
|
||||||
=== "CLI"
|
|
||||||
|
|
||||||
```bash
|
|
||||||
yolo detect val model=yolov8n.pt # val official model
|
|
||||||
yolo detect val model=path/to/best.pt # val custom model
|
|
||||||
```
|
|
||||||
|
|
||||||
## Predict TODO
|
|
||||||
|
|
||||||
Use a trained YOLOv8n model to run predictions on images.
|
|
||||||
|
|
||||||
!!! example ""
|
|
||||||
|
|
||||||
=== "Python"
|
|
||||||
|
|
||||||
```python
|
|
||||||
from ultralytics import YOLO
|
|
||||||
|
|
||||||
# Load a model
|
|
||||||
model = YOLO('yolov8n.pt') # load an official model
|
|
||||||
model = YOLO('path/to/best.pt') # load a custom model
|
|
||||||
|
|
||||||
# Predict with the model
|
|
||||||
results = model('https://ultralytics.com/images/bus.jpg') # predict on an image
|
|
||||||
```
|
|
||||||
=== "CLI"
|
|
||||||
|
|
||||||
```bash
|
|
||||||
yolo detect predict model=yolov8n.pt source='https://ultralytics.com/images/bus.jpg' # predict with official model
|
|
||||||
yolo detect predict model=path/to/best.pt source='https://ultralytics.com/images/bus.jpg' # predict with custom model
|
|
||||||
```
|
|
||||||
|
|
||||||
Read more details of `predict` in our [Predict](https://docs.ultralytics.com/modes/predict/) page.
|
|
||||||
|
|
||||||
## Export TODO
|
|
||||||
|
|
||||||
Export a YOLOv8n model to a different format like ONNX, CoreML, etc.
|
|
||||||
|
|
||||||
!!! example ""
|
|
||||||
|
|
||||||
=== "Python"
|
|
||||||
|
|
||||||
```python
|
|
||||||
from ultralytics import YOLO
|
|
||||||
|
|
||||||
# Load a model
|
|
||||||
model = YOLO('yolov8n.pt') # load an official model
|
|
||||||
model = YOLO('path/to/best.pt') # load a custom trained
|
|
||||||
|
|
||||||
# Export the model
|
|
||||||
model.export(format='onnx')
|
|
||||||
```
|
|
||||||
=== "CLI"
|
|
||||||
|
|
||||||
```bash
|
|
||||||
yolo export model=yolov8n.pt format=onnx # export official model
|
|
||||||
yolo export model=path/to/best.pt format=onnx # export custom trained model
|
|
||||||
```
|
|
||||||
|
|
||||||
Available YOLOv8-pose export formats are in the table below. You can predict or validate directly on exported models,
|
|
||||||
i.e. `yolo predict model=yolov8n-pose.onnx`.
|
|
||||||
|
|
||||||
| Format | `format` Argument | Model | Metadata |
|
|
||||||
|--------------------------------------------------------------------|-------------------|---------------------------|----------|
|
|
||||||
| [PyTorch](https://pytorch.org/) | - | `yolov8n.pt` | ✅ |
|
|
||||||
| [TorchScript](https://pytorch.org/docs/stable/jit.html) | `torchscript` | `yolov8n.torchscript` | ✅ |
|
|
||||||
| [ONNX](https://onnx.ai/) | `onnx` | `yolov8n.onnx` | ✅ |
|
|
||||||
| [OpenVINO](https://docs.openvino.ai/latest/index.html) | `openvino` | `yolov8n_openvino_model/` | ✅ |
|
|
||||||
| [TensorRT](https://developer.nvidia.com/tensorrt) | `engine` | `yolov8n.engine` | ✅ |
|
|
||||||
| [CoreML](https://github.com/apple/coremltools) | `coreml` | `yolov8n.mlmodel` | ✅ |
|
|
||||||
| [TF SavedModel](https://www.tensorflow.org/guide/saved_model) | `saved_model` | `yolov8n_saved_model/` | ✅ |
|
|
||||||
| [TF GraphDef](https://www.tensorflow.org/api_docs/python/tf/Graph) | `pb` | `yolov8n.pb` | ❌ |
|
|
||||||
| [TF Lite](https://www.tensorflow.org/lite) | `tflite` | `yolov8n.tflite` | ✅ |
|
|
||||||
| [TF Edge TPU](https://coral.ai/docs/edgetpu/models-intro/) | `edgetpu` | `yolov8n_edgetpu.tflite` | ✅ |
|
|
||||||
| [TF.js](https://www.tensorflow.org/js) | `tfjs` | `yolov8n_web_model/` | ✅ |
|
|
||||||
| [PaddlePaddle](https://github.com/PaddlePaddle) | `paddle` | `yolov8n_paddle_model/` | ✅ |
|
|
175
docs/tasks/pose.md
Normal file
175
docs/tasks/pose.md
Normal file
@ -0,0 +1,175 @@
|
|||||||
|
Pose estimation is a task that involves identifying the location of specific points in an image, usually referred
|
||||||
|
to as keypoints. The keypoints can represent various parts of the object such as joints, landmarks, or other distinctive
|
||||||
|
features. The locations of the keypoints are usually represented as a set of 2D `[x, y]` or 3D `[x, y, visible]`
|
||||||
|
coordinates.
|
||||||
|
|
||||||
|
<img width="1024" src="https://user-images.githubusercontent.com/26833433/212094133-6bb8c21c-3d47-41df-a512-81c5931054ae.png">
|
||||||
|
|
||||||
|
The output of a pose estimation model is a set of points that represent the keypoints on an object in the image, usually
|
||||||
|
along with the confidence scores for each point. Pose estimation is a good choice when you need to identify specific
|
||||||
|
parts of an object in a scene, and their location in relation to each other.
|
||||||
|
|
||||||
|
!!! tip "Tip"
|
||||||
|
|
||||||
|
YOLOv8 _pose_ models use the `-pose` suffix, i.e. `yolov8n-pose.pt`. These models are trained on the [COCO keypoints](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/datasets/coco-pose.yaml) dataset and are suitable for a variety of pose estimation tasks.
|
||||||
|
|
||||||
|
## [Models](https://github.com/ultralytics/ultralytics/tree/main/ultralytics/models/v8)
|
||||||
|
|
||||||
|
YOLOv8 pretrained Pose models are shown here. Detect, Segment and Pose models are pretrained on
|
||||||
|
the [COCO](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/datasets/coco.yaml) dataset, while Classify
|
||||||
|
models are pretrained on
|
||||||
|
the [ImageNet](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/datasets/ImageNet.yaml) dataset.
|
||||||
|
|
||||||
|
[Models](https://github.com/ultralytics/ultralytics/tree/main/ultralytics/models) download automatically from the latest
|
||||||
|
Ultralytics [release](https://github.com/ultralytics/assets/releases) on first use.
|
||||||
|
|
||||||
|
| Model | size<br><sup>(pixels) | mAP<sup>box<br>50-95 | mAP<sup>pose<br>50-95 | Speed<br><sup>CPU ONNX<br>(ms) | Speed<br><sup>A100 TensorRT<br>(ms) | params<br><sup>(M) | FLOPs<br><sup>(B) |
|
||||||
|
|------------------------------------------------------------------------------------------------------|-----------------------|----------------------|-----------------------|--------------------------------|-------------------------------------|--------------------|-------------------|
|
||||||
|
| [YOLOv8n-pose](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8n-pose.pt) | 640 | - | 49.7 | - | - | 3.3 | 9.2 |
|
||||||
|
| [YOLOv8s-pose](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8s-pose.pt) | 640 | - | 59.2 | - | - | 11.6 | 30.2 |
|
||||||
|
| [YOLOv8m-pose](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8m-pose.pt) | 640 | - | 63.6 | - | - | 26.4 | 81.0 |
|
||||||
|
| [YOLOv8l-pose](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8l-pose.pt) | 640 | - | 67.0 | - | - | 44.4 | 168.6 |
|
||||||
|
| [YOLOv8x-pose](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8x-pose.pt) | 640 | - | 68.9 | - | - | 69.4 | 263.2 |
|
||||||
|
| [YOLOv8x-pose-p6](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8x-pose-p6.pt) | 1280 | - | 71.5 | - | - | 99.1 | 1066.4 |
|
||||||
|
|
||||||
|
- **mAP<sup>val</sup>** values are for single-model single-scale on [COCO Keypoints val2017](http://cocodataset.org)
|
||||||
|
dataset.
|
||||||
|
<br>Reproduce by `yolo val pose data=coco-pose.yaml device=0`
|
||||||
|
- **Speed** averaged over COCO val images using an [Amazon EC2 P4d](https://aws.amazon.com/ec2/instance-types/p4/)
|
||||||
|
instance.
|
||||||
|
<br>Reproduce by `yolo val pose data=coco8-pose.yaml batch=1 device=0|cpu`
|
||||||
|
|
||||||
|
## Train
|
||||||
|
|
||||||
|
Train a YOLOv8-pose model on the COCO128-pose dataset.
|
||||||
|
|
||||||
|
!!! example ""
|
||||||
|
|
||||||
|
=== "Python"
|
||||||
|
|
||||||
|
```python
|
||||||
|
from ultralytics import YOLO
|
||||||
|
|
||||||
|
# Load a model
|
||||||
|
model = YOLO('yolov8n-pose.yaml') # build a new model from YAML
|
||||||
|
model = YOLO('yolov8n-pose.pt') # load a pretrained model (recommended for training)
|
||||||
|
model = YOLO('yolov8n-pose.yaml').load('yolov8n-pose.pt') # build from YAML and transfer weights
|
||||||
|
|
||||||
|
# Train the model
|
||||||
|
model.train(data='coco128-pose.yaml', epochs=100, imgsz=640)
|
||||||
|
```
|
||||||
|
=== "CLI"
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Build a new model from YAML and start training from scratch
|
||||||
|
yolo detect train data=coco128-pose.yaml model=yolov8n-pose.yaml epochs=100 imgsz=640
|
||||||
|
|
||||||
|
# Start training from a pretrained *.pt model
|
||||||
|
yolo detect train data=coco128-pose.yaml model=yolov8n-pose.pt epochs=100 imgsz=640
|
||||||
|
|
||||||
|
# Build a new model from YAML, transfer pretrained weights to it and start training
|
||||||
|
yolo detect train data=coco128-pose.yaml model=yolov8n-pose.yaml pretrained=yolov8n-pose.pt epochs=100 imgsz=640
|
||||||
|
```
|
||||||
|
|
||||||
|
## Val
|
||||||
|
|
||||||
|
Validate trained YOLOv8n-pose model accuracy on the COCO128-pose dataset. No argument need to passed as the `model`
|
||||||
|
retains it's
|
||||||
|
training `data` and arguments as model attributes.
|
||||||
|
|
||||||
|
!!! example ""
|
||||||
|
|
||||||
|
=== "Python"
|
||||||
|
|
||||||
|
```python
|
||||||
|
from ultralytics import YOLO
|
||||||
|
|
||||||
|
# Load a model
|
||||||
|
model = YOLO('yolov8n-pose.pt') # load an official model
|
||||||
|
model = YOLO('path/to/best.pt') # load a custom model
|
||||||
|
|
||||||
|
# Validate the model
|
||||||
|
metrics = model.val() # no arguments needed, dataset and settings remembered
|
||||||
|
metrics.box.map # map50-95
|
||||||
|
metrics.box.map50 # map50
|
||||||
|
metrics.box.map75 # map75
|
||||||
|
metrics.box.maps # a list contains map50-95 of each category
|
||||||
|
```
|
||||||
|
=== "CLI"
|
||||||
|
|
||||||
|
```bash
|
||||||
|
yolo pose val model=yolov8n-pose.pt # val official model
|
||||||
|
yolo pose val model=path/to/best.pt # val custom model
|
||||||
|
```
|
||||||
|
|
||||||
|
## Predict
|
||||||
|
|
||||||
|
Use a trained YOLOv8n-pose model to run predictions on images.
|
||||||
|
|
||||||
|
!!! example ""
|
||||||
|
|
||||||
|
=== "Python"
|
||||||
|
|
||||||
|
```python
|
||||||
|
from ultralytics import YOLO
|
||||||
|
|
||||||
|
# Load a model
|
||||||
|
model = YOLO('yolov8n-pose.pt') # load an official model
|
||||||
|
model = YOLO('path/to/best.pt') # load a custom model
|
||||||
|
|
||||||
|
# Predict with the model
|
||||||
|
results = model('https://ultralytics.com/images/bus.jpg') # predict on an image
|
||||||
|
```
|
||||||
|
=== "CLI"
|
||||||
|
|
||||||
|
```bash
|
||||||
|
yolo pose predict model=yolov8n.pt source='https://ultralytics.com/images/bus.jpg' # predict with official model
|
||||||
|
yolo pose predict model=path/to/best.pt source='https://ultralytics.com/images/bus.jpg' # predict with custom model
|
||||||
|
```
|
||||||
|
|
||||||
|
See full `predict` mode details in the [Predict](https://docs.ultralytics.com/modes/predict/) page.
|
||||||
|
|
||||||
|
## Export
|
||||||
|
|
||||||
|
Export a YOLOv8n model to a different format like ONNX, CoreML, etc.
|
||||||
|
|
||||||
|
!!! example ""
|
||||||
|
|
||||||
|
=== "Python"
|
||||||
|
|
||||||
|
```python
|
||||||
|
from ultralytics import YOLO
|
||||||
|
|
||||||
|
# Load a model
|
||||||
|
model = YOLO('yolov8n.pt') # load an official model
|
||||||
|
model = YOLO('path/to/best.pt') # load a custom trained
|
||||||
|
|
||||||
|
# Export the model
|
||||||
|
model.export(format='onnx')
|
||||||
|
```
|
||||||
|
=== "CLI"
|
||||||
|
|
||||||
|
```bash
|
||||||
|
yolo export model=yolov8n.pt format=onnx # export official model
|
||||||
|
yolo export model=path/to/best.pt format=onnx # export custom trained model
|
||||||
|
```
|
||||||
|
|
||||||
|
Available YOLOv8-pose export formats are in the table below. You can predict or validate directly on exported models,
|
||||||
|
i.e. `yolo predict model=yolov8n-pose.onnx`. Usage examples are shown for your model after export completes.
|
||||||
|
|
||||||
|
| Format | `format` Argument | Model | Metadata |
|
||||||
|
|--------------------------------------------------------------------|-------------------|--------------------------------|----------|
|
||||||
|
| [PyTorch](https://pytorch.org/) | - | `yolov8n-pose.pt` | ✅ |
|
||||||
|
| [TorchScript](https://pytorch.org/docs/stable/jit.html) | `torchscript` | `yolov8n-pose.torchscript` | ✅ |
|
||||||
|
| [ONNX](https://onnx.ai/) | `onnx` | `yolov8n-pose.onnx` | ✅ |
|
||||||
|
| [OpenVINO](https://docs.openvino.ai/latest/index.html) | `openvino` | `yolov8n-pose_openvino_model/` | ✅ |
|
||||||
|
| [TensorRT](https://developer.nvidia.com/tensorrt) | `engine` | `yolov8n-pose.engine` | ✅ |
|
||||||
|
| [CoreML](https://github.com/apple/coremltools) | `coreml` | `yolov8n-pose.mlmodel` | ✅ |
|
||||||
|
| [TF SavedModel](https://www.tensorflow.org/guide/saved_model) | `saved_model` | `yolov8n-pose_saved_model/` | ✅ |
|
||||||
|
| [TF GraphDef](https://www.tensorflow.org/api_docs/python/tf/Graph) | `pb` | `yolov8n-pose.pb` | ❌ |
|
||||||
|
| [TF Lite](https://www.tensorflow.org/lite) | `tflite` | `yolov8n-pose.tflite` | ✅ |
|
||||||
|
| [TF Edge TPU](https://coral.ai/docs/edgetpu/models-intro/) | `edgetpu` | `yolov8n-pose_edgetpu.tflite` | ✅ |
|
||||||
|
| [TF.js](https://www.tensorflow.org/js) | `tfjs` | `yolov8n-pose_web_model/` | ✅ |
|
||||||
|
| [PaddlePaddle](https://github.com/PaddlePaddle) | `paddle` | `yolov8n-pose_paddle_model/` | ✅ |
|
||||||
|
|
||||||
|
See full `export` details in the [Export](https://docs.ultralytics.com/modes/export/) page.
|
@ -127,7 +127,7 @@ Use a trained YOLOv8n-seg model to run predictions on images.
|
|||||||
yolo segment predict model=path/to/best.pt source='https://ultralytics.com/images/bus.jpg' # predict with custom model
|
yolo segment predict model=path/to/best.pt source='https://ultralytics.com/images/bus.jpg' # predict with custom model
|
||||||
```
|
```
|
||||||
|
|
||||||
Read more details of `predict` in our [Predict](https://docs.ultralytics.com/modes/predict/) page.
|
See full `predict` mode details in the [Predict](https://docs.ultralytics.com/modes/predict/) page.
|
||||||
|
|
||||||
## Export
|
## Export
|
||||||
|
|
||||||
@ -155,7 +155,7 @@ Export a YOLOv8n-seg model to a different format like ONNX, CoreML, etc.
|
|||||||
```
|
```
|
||||||
|
|
||||||
Available YOLOv8-seg export formats are in the table below. You can predict or validate directly on exported models,
|
Available YOLOv8-seg export formats are in the table below. You can predict or validate directly on exported models,
|
||||||
i.e. `yolo predict model=yolov8n-seg.onnx`.
|
i.e. `yolo predict model=yolov8n-seg.onnx`. Usage examples are shown for your model after export completes.
|
||||||
|
|
||||||
| Format | `format` Argument | Model | Metadata |
|
| Format | `format` Argument | Model | Metadata |
|
||||||
|--------------------------------------------------------------------|-------------------|-------------------------------|----------|
|
|--------------------------------------------------------------------|-------------------|-------------------------------|----------|
|
||||||
@ -172,4 +172,4 @@ i.e. `yolo predict model=yolov8n-seg.onnx`.
|
|||||||
| [TF.js](https://www.tensorflow.org/js) | `tfjs` | `yolov8n-seg_web_model/` | ✅ |
|
| [TF.js](https://www.tensorflow.org/js) | `tfjs` | `yolov8n-seg_web_model/` | ✅ |
|
||||||
| [PaddlePaddle](https://github.com/PaddlePaddle) | `paddle` | `yolov8n-seg_paddle_model/` | ✅ |
|
| [PaddlePaddle](https://github.com/PaddlePaddle) | `paddle` | `yolov8n-seg_paddle_model/` | ✅ |
|
||||||
|
|
||||||
|
See full `export` details in the [Export](https://docs.ultralytics.com/modes/export/) page.
|
||||||
|
@ -110,6 +110,8 @@ The training settings for YOLO models encompass various hyperparameters and conf
|
|||||||
| `box` | `7.5` | box loss gain |
|
| `box` | `7.5` | box loss gain |
|
||||||
| `cls` | `0.5` | cls loss gain (scale with pixels) |
|
| `cls` | `0.5` | cls loss gain (scale with pixels) |
|
||||||
| `dfl` | `1.5` | dfl loss gain |
|
| `dfl` | `1.5` | dfl loss gain |
|
||||||
|
| `pose` | `12.0` | pose loss gain (pose-only) |
|
||||||
|
| `kobj` | `2.0` | keypoint obj loss gain (pose-only) |
|
||||||
| `fl_gamma` | `0.0` | focal loss gamma (efficientDet default gamma=1.5) |
|
| `fl_gamma` | `0.0` | focal loss gamma (efficientDet default gamma=1.5) |
|
||||||
| `label_smoothing` | `0.0` | label smoothing (fraction) |
|
| `label_smoothing` | `0.0` | label smoothing (fraction) |
|
||||||
| `nbs` | `64` | nominal batch size |
|
| `nbs` | `64` | nominal batch size |
|
||||||
|
@ -74,7 +74,7 @@ trainer.add_callback("on_train_epoch_end", log_model) # Adds to existing callba
|
|||||||
trainer.train()
|
trainer.train()
|
||||||
```
|
```
|
||||||
|
|
||||||
To know more about Callback triggering events and entry point, checkout our Callbacks guide # TODO
|
To know more about Callback triggering events and entry point, checkout our [Callbacks Guide](callbacks.md)
|
||||||
|
|
||||||
## Other engine components
|
## Other engine components
|
||||||
|
|
||||||
|
@ -59,7 +59,6 @@ accurately predict the classes and locations of objects in an image.
|
|||||||
|
|
||||||
=== "Resume"
|
=== "Resume"
|
||||||
```python
|
```python
|
||||||
# TODO: Resume feature is under development and should be released soon.
|
|
||||||
model = YOLO("last.pt")
|
model = YOLO("last.pt")
|
||||||
model.train(resume=True)
|
model.train(resume=True)
|
||||||
```
|
```
|
||||||
|
@ -32,11 +32,11 @@
|
|||||||
" <a href=\"https://www.kaggle.com/ultralytics/yolov8\"><img src=\"https://kaggle.com/static/images/open-in-kaggle.svg\" alt=\"Open In Kaggle\"></a>\n",
|
" <a href=\"https://www.kaggle.com/ultralytics/yolov8\"><img src=\"https://kaggle.com/static/images/open-in-kaggle.svg\" alt=\"Open In Kaggle\"></a>\n",
|
||||||
"<br>\n",
|
"<br>\n",
|
||||||
"\n",
|
"\n",
|
||||||
"Welcome to the Ultralytics YOLOv8 🚀 notebook! <a href=\"https://github.com/ultralytics/ultralytics\">YOLOv8</a> is the latest version of the YOLO (You Only Look Once) object detection and image segmentation model developed by <a href=\"https://ultralytics.com\">Ultralytics</a>. This notebook serves as the starting point for exploring the various resources available to help you get started with YOLOv8 and understand its features and capabilities.\n",
|
"Welcome to the Ultralytics YOLOv8 🚀 notebook! <a href=\"https://github.com/ultralytics/ultralytics\">YOLOv8</a> is the latest version of the YOLO (You Only Look Once) AI models developed by <a href=\"https://ultralytics.com\">Ultralytics</a>. This notebook serves as the starting point for exploring the various resources available to help you get started with YOLOv8 and understand its features and capabilities.\n",
|
||||||
"\n",
|
"\n",
|
||||||
"The YOLOv8 models are designed to be fast, accurate, and easy to use, making them an excellent choice for a wide range of object detection and image segmentation tasks. They can be trained on large datasets and are capable of running on a variety of hardware platforms, from CPUs to GPUs.\n",
|
"YOLOv8 models are fast, accurate, and easy to use, making them ideal for various object detection and image segmentation tasks. They can be trained on large datasets and run on diverse hardware platforms, from CPUs to GPUs.\n",
|
||||||
"\n",
|
"\n",
|
||||||
"Whether you are a seasoned machine learning practitioner or new to the field, we hope that the resources in this notebook will help you get the most out of YOLOv8. Please feel free to browse the <a href=\"https://docs.ultralytics.com/\">YOLOv8 Docs</a> and reach out to us with any questions or feedback.\n",
|
"We hope that the resources in this notebook will help you get the most out of YOLOv8. Please browse the YOLOv8 <a href=\"https://docs.ultralytics.com/\">Docs</a> for details, raise an issue on <a href=\"https://github.com/ultralytics/ultralytics\">GitHub</a> for support, and join our <a href=\"https://discord.gg/n6cFeSPZdD\">Discord</a> community for questions and discussions!\n",
|
||||||
"\n",
|
"\n",
|
||||||
"</div>"
|
"</div>"
|
||||||
]
|
]
|
||||||
@ -66,7 +66,7 @@
|
|||||||
"import ultralytics\n",
|
"import ultralytics\n",
|
||||||
"ultralytics.checks()"
|
"ultralytics.checks()"
|
||||||
],
|
],
|
||||||
"execution_count": 1,
|
"execution_count": null,
|
||||||
"outputs": [
|
"outputs": [
|
||||||
{
|
{
|
||||||
"output_type": "stream",
|
"output_type": "stream",
|
||||||
@ -86,7 +86,7 @@
|
|||||||
"source": [
|
"source": [
|
||||||
"# 1. Predict\n",
|
"# 1. Predict\n",
|
||||||
"\n",
|
"\n",
|
||||||
"YOLOv8 may be used directly in the Command Line Interface (CLI) with a `yolo` command for a variety of tasks and modes and accepts additional arguments, i.e. `imgsz=640`. See a full list of available `yolo` [arguments](https://docs.ultralytics.com/usage/cfg/) in the YOLOv8 [Docs](https://docs.ultralytics.com).\n"
|
"YOLOv8 may be used directly in the Command Line Interface (CLI) with a `yolo` command for a variety of tasks and modes and accepts additional arguments, i.e. `imgsz=640`. See a full list of available `yolo` [arguments](https://docs.ultralytics.com/usage/cfg/) and other details in the [YOLOv8 Predict Docs](https://docs.ultralytics.com/modes/train/).\n"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@ -102,7 +102,7 @@
|
|||||||
"# Run inference on an image with YOLOv8n\n",
|
"# Run inference on an image with YOLOv8n\n",
|
||||||
"!yolo predict model=yolov8n.pt source='https://ultralytics.com/images/zidane.jpg'"
|
"!yolo predict model=yolov8n.pt source='https://ultralytics.com/images/zidane.jpg'"
|
||||||
],
|
],
|
||||||
"execution_count": 3,
|
"execution_count": null,
|
||||||
"outputs": [
|
"outputs": [
|
||||||
{
|
{
|
||||||
"output_type": "stream",
|
"output_type": "stream",
|
||||||
@ -135,7 +135,7 @@
|
|||||||
},
|
},
|
||||||
"source": [
|
"source": [
|
||||||
"# 2. Val\n",
|
"# 2. Val\n",
|
||||||
"Validate a model's accuracy on the [COCO](https://cocodataset.org/#home) dataset's `val` or `test` splits. The latest YOLOv8 [models](https://github.com/ultralytics/ultralytics#models) are downloaded automatically the first time they are used."
|
"Validate a model's accuracy on the [COCO](https://cocodataset.org/#home) dataset's `val` or `test` splits. The latest YOLOv8 [models](https://github.com/ultralytics/ultralytics#models) are downloaded automatically the first time they are used. See [YOLOv8 Val Docs](https://docs.ultralytics.com/modes/val/) for more information."
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@ -165,7 +165,7 @@
|
|||||||
"# Validate YOLOv8n on COCO128 val\n",
|
"# Validate YOLOv8n on COCO128 val\n",
|
||||||
"!yolo val model=yolov8n.pt data=coco128.yaml"
|
"!yolo val model=yolov8n.pt data=coco128.yaml"
|
||||||
],
|
],
|
||||||
"execution_count": 4,
|
"execution_count": null,
|
||||||
"outputs": [
|
"outputs": [
|
||||||
{
|
{
|
||||||
"output_type": "stream",
|
"output_type": "stream",
|
||||||
@ -273,7 +273,7 @@
|
|||||||
"\n",
|
"\n",
|
||||||
"<p align=\"\"><a href=\"https://roboflow.com/?ref=ultralytics\"><img width=\"1000\" src=\"https://github.com/ultralytics/assets/raw/main/yolov8/banner-integrations.png\"/></a></p>\n",
|
"<p align=\"\"><a href=\"https://roboflow.com/?ref=ultralytics\"><img width=\"1000\" src=\"https://github.com/ultralytics/assets/raw/main/yolov8/banner-integrations.png\"/></a></p>\n",
|
||||||
"\n",
|
"\n",
|
||||||
"Train YOLOv8 on [Detection](https://docs.ultralytics.com/tasks/detect/), [Segmentation](https://docs.ultralytics.com/tasks/segment/) and [Classification](https://docs.ultralytics.com/tasks/classify/) datasets."
|
"Train YOLOv8 on [Detect](https://docs.ultralytics.com/tasks/detect/), [Segment](https://docs.ultralytics.com/tasks/segment/), [Classify](https://docs.ultralytics.com/tasks/classify/) and [Pose](https://docs.ultralytics.com/tasks/pose/) datasets. See [YOLOv8 Train Docs](https://docs.ultralytics.com/modes/train/) for more information."
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@ -289,7 +289,7 @@
|
|||||||
"# Train YOLOv8n on COCO128 for 3 epochs\n",
|
"# Train YOLOv8n on COCO128 for 3 epochs\n",
|
||||||
"!yolo train model=yolov8n.pt data=coco128.yaml epochs=3 imgsz=640"
|
"!yolo train model=yolov8n.pt data=coco128.yaml epochs=3 imgsz=640"
|
||||||
],
|
],
|
||||||
"execution_count": 5,
|
"execution_count": null,
|
||||||
"outputs": [
|
"outputs": [
|
||||||
{
|
{
|
||||||
"output_type": "stream",
|
"output_type": "stream",
|
||||||
@ -449,7 +449,7 @@
|
|||||||
"source": [
|
"source": [
|
||||||
"# 4. Export\n",
|
"# 4. Export\n",
|
||||||
"\n",
|
"\n",
|
||||||
"Export a YOLOv8 model to any supported format with the `format` argument, i.e. `format=onnx`.\n",
|
"Export a YOLOv8 model to any supported format below with the `format` argument, i.e. `format=onnx`. See [YOLOv8 Export Docs](https://docs.ultralytics.com/modes/export/) for more information.\n",
|
||||||
"\n",
|
"\n",
|
||||||
"- 💡 ProTip: Export to [ONNX](https://onnx.ai/) or [OpenVINO](https://docs.openvino.ai/latest/index.html) for up to 3x CPU speedup. \n",
|
"- 💡 ProTip: Export to [ONNX](https://onnx.ai/) or [OpenVINO](https://docs.openvino.ai/latest/index.html) for up to 3x CPU speedup. \n",
|
||||||
"- 💡 ProTip: Export to [TensorRT](https://developer.nvidia.com/tensorrt) for up to 5x GPU speedup.\n",
|
"- 💡 ProTip: Export to [TensorRT](https://developer.nvidia.com/tensorrt) for up to 5x GPU speedup.\n",
|
||||||
@ -487,7 +487,7 @@
|
|||||||
"id": "CYIjW4igCjqD",
|
"id": "CYIjW4igCjqD",
|
||||||
"outputId": "49b5bb9d-2c16-415b-c3e7-ec95c15a9e62"
|
"outputId": "49b5bb9d-2c16-415b-c3e7-ec95c15a9e62"
|
||||||
},
|
},
|
||||||
"execution_count": 6,
|
"execution_count": null,
|
||||||
"outputs": [
|
"outputs": [
|
||||||
{
|
{
|
||||||
"output_type": "stream",
|
"output_type": "stream",
|
||||||
@ -515,7 +515,7 @@
|
|||||||
"source": [
|
"source": [
|
||||||
"# 5. Python Usage\n",
|
"# 5. Python Usage\n",
|
||||||
"\n",
|
"\n",
|
||||||
"YOLOv8 was reimagined using Python-first principles for the most seamless Python YOLO experience yet. YOLOv8 models can be loaded from a trained checkpoint or created from scratch. Then methods are used to train, val, predict, and export the model. See a detailed Python usage examples in the YOLOv8 [Docs](https://docs.ultralytics.com/usage/python/)."
|
"YOLOv8 was reimagined using Python-first principles for the most seamless Python YOLO experience yet. YOLOv8 models can be loaded from a trained checkpoint or created from scratch. Then methods are used to train, val, predict, and export the model. See detailed Python usage examples in the [YOLOv8 Python Docs](https://docs.ultralytics.com/usage/python/)."
|
||||||
],
|
],
|
||||||
"metadata": {
|
"metadata": {
|
||||||
"id": "kUMOQ0OeDBJG"
|
"id": "kUMOQ0OeDBJG"
|
||||||
@ -547,7 +547,7 @@
|
|||||||
"source": [
|
"source": [
|
||||||
"# 6. Tasks\n",
|
"# 6. Tasks\n",
|
||||||
"\n",
|
"\n",
|
||||||
"YOLOv8 can train, val, predict and export models for the 3 primary tasks in vision AI: detection, segmentation and classification.\n",
|
"YOLOv8 can train, val, predict and export models for the most common tasks in vision AI: [Detect](https://docs.ultralytics.com/tasks/detect/), [Segment](https://docs.ultralytics.com/tasks/segment/), [Classify](https://docs.ultralytics.com/tasks/classify/) and [Pose](https://docs.ultralytics.com/tasks/pose/). See [YOLOv8 Tasks Docs](https://docs.ultralytics.com/tasks/) for more information.\n",
|
||||||
"\n",
|
"\n",
|
||||||
"<img width=\"1024\" src=\"https://user-images.githubusercontent.com/26833433/212094133-6bb8c21c-3d47-41df-a512-81c5931054ae.png\">\n"
|
"<img width=\"1024\" src=\"https://user-images.githubusercontent.com/26833433/212094133-6bb8c21c-3d47-41df-a512-81c5931054ae.png\">\n"
|
||||||
],
|
],
|
||||||
@ -636,6 +636,33 @@
|
|||||||
"execution_count": null,
|
"execution_count": null,
|
||||||
"outputs": []
|
"outputs": []
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"source": [
|
||||||
|
"## 4. Pose\n",
|
||||||
|
"\n",
|
||||||
|
"YOLOv8 _pose_ models use the `-pose` suffix, i.e. `yolov8n-pose.pt` and are pretrained on COCO Keypoints. See [Pose Docs](https://docs.ultralytics.com/tasks/pose/) for full details."
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"id": "SpIaFLiO11TG"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"source": [
|
||||||
|
"# Load YOLOv8n-pose, train it on COCO8-pose for 3 epochs and predict an image with it\n",
|
||||||
|
"from ultralytics import YOLO\n",
|
||||||
|
"\n",
|
||||||
|
"model = YOLO('yolov8n-pose.pt') # load a pretrained YOLOv8n classification model\n",
|
||||||
|
"model.train(data='coco8-pose.yaml', epochs=3) # train the model\n",
|
||||||
|
"model('https://ultralytics.com/images/bus.jpg') # predict on an image"
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"id": "si4aKFNg19vX"
|
||||||
|
},
|
||||||
|
"execution_count": null,
|
||||||
|
"outputs": []
|
||||||
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {
|
"metadata": {
|
||||||
|
@ -65,7 +65,7 @@ extra:
|
|||||||
data: 0
|
data: 0
|
||||||
note: >-
|
note: >-
|
||||||
Thanks for your feedback!<br>
|
Thanks for your feedback!<br>
|
||||||
<a href="https://github.com/ultralytics/ultralytics/issues/new?title=Docs+Feedback+for+{title}+page+at+{url}&labels=enhancement&template=feature-request.yml" target="_blank" rel="noopener">Tell us what we can improve.</a>
|
<a href="https://github.com/ultralytics/ultralytics/issues/new?title=Docs+Feedback+for+{title}+page+at+https://docs.ultralytics.com/{url}&labels=enhancement&template=feature-request.yml" target="_blank" rel="noopener">Tell us what we can improve.</a>
|
||||||
|
|
||||||
social:
|
social:
|
||||||
- icon: fontawesome/brands/github
|
- icon: fontawesome/brands/github
|
||||||
@ -134,7 +134,7 @@ nav:
|
|||||||
- Detect: tasks/detect.md
|
- Detect: tasks/detect.md
|
||||||
- Segment: tasks/segment.md
|
- Segment: tasks/segment.md
|
||||||
- Classify: tasks/classify.md
|
- Classify: tasks/classify.md
|
||||||
# - Keypoints: tasks/keypoints.md
|
- Pose: tasks/pose.md
|
||||||
- Usage:
|
- Usage:
|
||||||
- CLI: usage/cli.md
|
- CLI: usage/cli.md
|
||||||
- Python: usage/python.md
|
- Python: usage/python.md
|
||||||
|
@ -33,6 +33,10 @@ def test_train_cls():
|
|||||||
run(f'yolo train classify model={CFG}-cls.yaml data=imagenet10 imgsz=32 epochs=1')
|
run(f'yolo train classify model={CFG}-cls.yaml data=imagenet10 imgsz=32 epochs=1')
|
||||||
|
|
||||||
|
|
||||||
|
def test_train_pose():
|
||||||
|
run(f'yolo train pose model={CFG}-pose.yaml data=coco8-pose.yaml imgsz=32 epochs=1')
|
||||||
|
|
||||||
|
|
||||||
# Val checks -----------------------------------------------------------------------------------------------------------
|
# Val checks -----------------------------------------------------------------------------------------------------------
|
||||||
def test_val_detect():
|
def test_val_detect():
|
||||||
run(f'yolo val detect model={MODEL}.pt data=coco8.yaml imgsz=32')
|
run(f'yolo val detect model={MODEL}.pt data=coco8.yaml imgsz=32')
|
||||||
@ -46,6 +50,10 @@ def test_val_classify():
|
|||||||
run(f'yolo val classify model={MODEL}-cls.pt data=imagenet10 imgsz=32')
|
run(f'yolo val classify model={MODEL}-cls.pt data=imagenet10 imgsz=32')
|
||||||
|
|
||||||
|
|
||||||
|
def test_val_pose():
|
||||||
|
run(f'yolo val pose model={MODEL}-pose.pt data=coco8-pose.yaml imgsz=32')
|
||||||
|
|
||||||
|
|
||||||
# Predict checks -------------------------------------------------------------------------------------------------------
|
# Predict checks -------------------------------------------------------------------------------------------------------
|
||||||
def test_predict_detect():
|
def test_predict_detect():
|
||||||
run(f"yolo predict model={MODEL}.pt source={ROOT / 'assets'} imgsz=32 save save_crop save_txt")
|
run(f"yolo predict model={MODEL}.pt source={ROOT / 'assets'} imgsz=32 save save_crop save_txt")
|
||||||
@ -63,6 +71,10 @@ def test_predict_classify():
|
|||||||
run(f"yolo predict model={MODEL}-cls.pt source={ROOT / 'assets'} imgsz=32 save save_txt")
|
run(f"yolo predict model={MODEL}-cls.pt source={ROOT / 'assets'} imgsz=32 save save_txt")
|
||||||
|
|
||||||
|
|
||||||
|
def test_predict_pose():
|
||||||
|
run(f"yolo predict model={MODEL}-pose.pt source={ROOT / 'assets'} imgsz=32 save save_txt")
|
||||||
|
|
||||||
|
|
||||||
# Export checks --------------------------------------------------------------------------------------------------------
|
# Export checks --------------------------------------------------------------------------------------------------------
|
||||||
def test_export_detect_torchscript():
|
def test_export_detect_torchscript():
|
||||||
run(f'yolo export model={MODEL}.pt format=torchscript')
|
run(f'yolo export model={MODEL}.pt format=torchscript')
|
||||||
@ -76,6 +88,10 @@ def test_export_classify_torchscript():
|
|||||||
run(f'yolo export model={MODEL}-cls.pt format=torchscript')
|
run(f'yolo export model={MODEL}-cls.pt format=torchscript')
|
||||||
|
|
||||||
|
|
||||||
|
def test_export_classify_pose():
|
||||||
|
run(f'yolo export model={MODEL}-pose.pt format=torchscript')
|
||||||
|
|
||||||
|
|
||||||
def test_export_detect_edgetpu(enabled=False):
|
def test_export_detect_edgetpu(enabled=False):
|
||||||
if enabled and LINUX:
|
if enabled and LINUX:
|
||||||
run(f'yolo export model={MODEL}.pt format=edgetpu')
|
run(f'yolo export model={MODEL}.pt format=edgetpu')
|
||||||
|
@ -1,6 +1,6 @@
|
|||||||
# Ultralytics YOLO 🚀, GPL-3.0 license
|
# Ultralytics YOLO 🚀, GPL-3.0 license
|
||||||
|
|
||||||
__version__ = '8.0.65'
|
__version__ = '8.0.66'
|
||||||
|
|
||||||
from ultralytics.hub import start
|
from ultralytics.hub import start
|
||||||
from ultralytics.yolo.engine.model import YOLO
|
from ultralytics.yolo.engine.model import YOLO
|
||||||
|
38
ultralytics/datasets/coco-pose.yaml
Normal file
38
ultralytics/datasets/coco-pose.yaml
Normal file
@ -0,0 +1,38 @@
|
|||||||
|
# Ultralytics YOLO 🚀, GPL-3.0 license
|
||||||
|
# COCO 2017 dataset http://cocodataset.org by Microsoft
|
||||||
|
# Example usage: yolo train data=coco-pose.yaml
|
||||||
|
# parent
|
||||||
|
# ├── ultralytics
|
||||||
|
# └── datasets
|
||||||
|
# └── coco-pose ← downloads here (20.1 GB)
|
||||||
|
|
||||||
|
|
||||||
|
# Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
|
||||||
|
path: ../datasets/coco-pose # dataset root dir
|
||||||
|
train: train2017.txt # train images (relative to 'path') 118287 images
|
||||||
|
val: val2017.txt # val images (relative to 'path') 5000 images
|
||||||
|
test: test-dev2017.txt # 20288 of 40670 images, submit to https://competitions.codalab.org/competitions/20794
|
||||||
|
|
||||||
|
# Keypoints
|
||||||
|
kpt_shape: [17, 3] # number of keypoints, number of dims (2 for x,y or 3 for x,y,visible)
|
||||||
|
flip_idx: [0, 2, 1, 4, 3, 6, 5, 8, 7, 10, 9, 12, 11, 14, 13, 16, 15]
|
||||||
|
|
||||||
|
# Classes
|
||||||
|
names:
|
||||||
|
0: person
|
||||||
|
|
||||||
|
# Download script/URL (optional)
|
||||||
|
download: |
|
||||||
|
from ultralytics.yolo.utils.downloads import download
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
# Download labels
|
||||||
|
dir = Path(yaml['path']) # dataset root dir
|
||||||
|
url = 'https://github.com/ultralytics/yolov5/releases/download/v1.0/'
|
||||||
|
urls = [url + 'coco2017labels-pose.zip'] # labels
|
||||||
|
download(urls, dir=dir.parent)
|
||||||
|
# Download data
|
||||||
|
urls = ['http://images.cocodataset.org/zips/train2017.zip', # 19G, 118k images
|
||||||
|
'http://images.cocodataset.org/zips/val2017.zip', # 1G, 5k images
|
||||||
|
'http://images.cocodataset.org/zips/test2017.zip'] # 7G, 41k images (optional)
|
||||||
|
download(urls, dir=dir / 'images', threads=3)
|
25
ultralytics/datasets/coco8-pose.yaml
Normal file
25
ultralytics/datasets/coco8-pose.yaml
Normal file
@ -0,0 +1,25 @@
|
|||||||
|
# Ultralytics YOLO 🚀, GPL-3.0 license
|
||||||
|
# COCO8-pose dataset (first 8 images from COCO train2017) by Ultralytics
|
||||||
|
# Example usage: yolo train data=coco8-pose.yaml
|
||||||
|
# parent
|
||||||
|
# ├── ultralytics
|
||||||
|
# └── datasets
|
||||||
|
# └── coco8-pose ← downloads here (1 MB)
|
||||||
|
|
||||||
|
|
||||||
|
# Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
|
||||||
|
path: ../datasets/coco8-pose # dataset root dir
|
||||||
|
train: images/train # train images (relative to 'path') 4 images
|
||||||
|
val: images/val # val images (relative to 'path') 4 images
|
||||||
|
test: # test images (optional)
|
||||||
|
|
||||||
|
# Keypoints
|
||||||
|
kpt_shape: [17, 3] # number of keypoints, number of dims (2 for x,y or 3 for x,y,visible)
|
||||||
|
flip_idx: [0, 2, 1, 4, 3, 6, 5, 8, 7, 10, 9, 12, 11, 14, 13, 16, 15]
|
||||||
|
|
||||||
|
# Classes
|
||||||
|
names:
|
||||||
|
0: person
|
||||||
|
|
||||||
|
# Download script/URL (optional)
|
||||||
|
download: https://ultralytics.com/assets/coco8-pose.zip
|
@ -44,13 +44,14 @@ Any of these models can be used by loading their configs or pretrained checkpoin
|
|||||||
|
|
||||||
### 1. YOLOv8
|
### 1. YOLOv8
|
||||||
|
|
||||||
**About** - Cutting edge Detection, Segmentation and Classification models developed by Ultralytics. </br>
|
**About** - Cutting edge Detection, Segmentation, Classification and Pose models developed by Ultralytics. </br>
|
||||||
|
|
||||||
Available Models:
|
Available Models:
|
||||||
|
|
||||||
- Detection - `yolov8n`, `yolov8s`, `yolov8m`, `yolov8l`, `yolov8x`
|
- Detection - `yolov8n`, `yolov8s`, `yolov8m`, `yolov8l`, `yolov8x`
|
||||||
- Instance Segmentation - `yolov8n-seg`, `yolov8s-seg`, `yolov8m-seg`, `yolov8l-seg`, `yolov8x-seg`
|
- Instance Segmentation - `yolov8n-seg`, `yolov8s-seg`, `yolov8m-seg`, `yolov8l-seg`, `yolov8x-seg`
|
||||||
- Classification - `yolov8n-cls`, `yolov8s-cls`, `yolov8m-cls`, `yolov8l-cls`, `yolov8x-cls`
|
- Classification - `yolov8n-cls`, `yolov8s-cls`, `yolov8m-cls`, `yolov8l-cls`, `yolov8x-cls`
|
||||||
|
- Pose - `yolov8n-pose`, `yolov8s-pose`, `yolov8m-pose`, `yolov8l-pose`, `yolov8x-pose`, `yolov8x-pose-p6`
|
||||||
|
|
||||||
<details><summary>Performance</summary>
|
<details><summary>Performance</summary>
|
||||||
|
|
||||||
@ -84,6 +85,17 @@ Available Models:
|
|||||||
| [YOLOv8l-cls](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8l-cls.pt) | 224 | 78.0 | 94.1 | 163.0 | 0.87 | 37.5 | 99.7 |
|
| [YOLOv8l-cls](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8l-cls.pt) | 224 | 78.0 | 94.1 | 163.0 | 0.87 | 37.5 | 99.7 |
|
||||||
| [YOLOv8x-cls](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8x-cls.pt) | 224 | 78.4 | 94.3 | 232.0 | 1.01 | 57.4 | 154.8 |
|
| [YOLOv8x-cls](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8x-cls.pt) | 224 | 78.4 | 94.3 | 232.0 | 1.01 | 57.4 | 154.8 |
|
||||||
|
|
||||||
|
### Pose
|
||||||
|
|
||||||
|
| Model | size<br><sup>(pixels) | mAP<sup>box<br>50-95 | mAP<sup>pose<br>50-95 | Speed<br><sup>CPU ONNX<br>(ms) | Speed<br><sup>A100 TensorRT<br>(ms) | params<br><sup>(M) | FLOPs<br><sup>(B) |
|
||||||
|
| ---------------------------------------------------------------------------------------------------- | --------------------- | -------------------- | --------------------- | ------------------------------ | ----------------------------------- | ------------------ | ----------------- |
|
||||||
|
| [YOLOv8n-pose](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8n-pose.pt) | 640 | - | 49.7 | - | - | 3.3 | 9.2 |
|
||||||
|
| [YOLOv8s-pose](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8s-pose.pt) | 640 | - | 59.2 | - | - | 11.6 | 30.2 |
|
||||||
|
| [YOLOv8m-pose](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8m-pose.pt) | 640 | - | 63.6 | - | - | 26.4 | 81.0 |
|
||||||
|
| [YOLOv8l-pose](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8l-pose.pt) | 640 | - | 67.0 | - | - | 44.4 | 168.6 |
|
||||||
|
| [YOLOv8x-pose](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8x-pose.pt) | 640 | - | 68.9 | - | - | 69.4 | 263.2 |
|
||||||
|
| [YOLOv8x-pose-p6](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8x-pose-p6.pt) | 1280 | - | 71.5 | - | - | 99.1 | 1066.4 |
|
||||||
|
|
||||||
</details>
|
</details>
|
||||||
|
|
||||||
### 2. YOLOv5u
|
### 2. YOLOv5u
|
||||||
|
57
ultralytics/models/v8/yolov8-pose-p6.yaml
Normal file
57
ultralytics/models/v8/yolov8-pose-p6.yaml
Normal file
@ -0,0 +1,57 @@
|
|||||||
|
# Ultralytics YOLO 🚀, GPL-3.0 license
|
||||||
|
# YOLOv8 object detection model with P3-P6 outputs. For Usage examples see https://docs.ultralytics.com/tasks/detect
|
||||||
|
|
||||||
|
# Parameters
|
||||||
|
nc: 1 # number of classes
|
||||||
|
kpt_shape: [17, 3] # number of keypoints, number of dims (2 for x,y or 3 for x,y,visible)
|
||||||
|
scales: # model compound scaling constants, i.e. 'model=yolov8n-p6.yaml' will call yolov8-p6.yaml with scale 'n'
|
||||||
|
# [depth, width, max_channels]
|
||||||
|
n: [0.33, 0.25, 1024]
|
||||||
|
s: [0.33, 0.50, 1024]
|
||||||
|
m: [0.67, 0.75, 768]
|
||||||
|
l: [1.00, 1.00, 512]
|
||||||
|
x: [1.00, 1.25, 512]
|
||||||
|
|
||||||
|
# YOLOv8.0x6 backbone
|
||||||
|
backbone:
|
||||||
|
# [from, repeats, module, args]
|
||||||
|
- [-1, 1, Conv, [64, 3, 2]] # 0-P1/2
|
||||||
|
- [-1, 1, Conv, [128, 3, 2]] # 1-P2/4
|
||||||
|
- [-1, 3, C2f, [128, True]]
|
||||||
|
- [-1, 1, Conv, [256, 3, 2]] # 3-P3/8
|
||||||
|
- [-1, 6, C2f, [256, True]]
|
||||||
|
- [-1, 1, Conv, [512, 3, 2]] # 5-P4/16
|
||||||
|
- [-1, 6, C2f, [512, True]]
|
||||||
|
- [-1, 1, Conv, [768, 3, 2]] # 7-P5/32
|
||||||
|
- [-1, 3, C2f, [768, True]]
|
||||||
|
- [-1, 1, Conv, [1024, 3, 2]] # 9-P6/64
|
||||||
|
- [-1, 3, C2f, [1024, True]]
|
||||||
|
- [-1, 1, SPPF, [1024, 5]] # 11
|
||||||
|
|
||||||
|
# YOLOv8.0x6 head
|
||||||
|
head:
|
||||||
|
- [-1, 1, nn.Upsample, [None, 2, 'nearest']]
|
||||||
|
- [[-1, 8], 1, Concat, [1]] # cat backbone P5
|
||||||
|
- [-1, 3, C2, [768, False]] # 14
|
||||||
|
|
||||||
|
- [-1, 1, nn.Upsample, [None, 2, 'nearest']]
|
||||||
|
- [[-1, 6], 1, Concat, [1]] # cat backbone P4
|
||||||
|
- [-1, 3, C2, [512, False]] # 17
|
||||||
|
|
||||||
|
- [-1, 1, nn.Upsample, [None, 2, 'nearest']]
|
||||||
|
- [[-1, 4], 1, Concat, [1]] # cat backbone P3
|
||||||
|
- [-1, 3, C2, [256, False]] # 20 (P3/8-small)
|
||||||
|
|
||||||
|
- [-1, 1, Conv, [256, 3, 2]]
|
||||||
|
- [[-1, 17], 1, Concat, [1]] # cat head P4
|
||||||
|
- [-1, 3, C2, [512, False]] # 23 (P4/16-medium)
|
||||||
|
|
||||||
|
- [-1, 1, Conv, [512, 3, 2]]
|
||||||
|
- [[-1, 14], 1, Concat, [1]] # cat head P5
|
||||||
|
- [-1, 3, C2, [768, False]] # 26 (P5/32-large)
|
||||||
|
|
||||||
|
- [-1, 1, Conv, [768, 3, 2]]
|
||||||
|
- [[-1, 11], 1, Concat, [1]] # cat head P6
|
||||||
|
- [-1, 3, C2, [1024, False]] # 29 (P6/64-xlarge)
|
||||||
|
|
||||||
|
- [[20, 23, 26, 29], 1, Pose, [nc, kpt_shape]] # Pose(P3, P4, P5, P6)
|
47
ultralytics/models/v8/yolov8-pose.yaml
Normal file
47
ultralytics/models/v8/yolov8-pose.yaml
Normal file
@ -0,0 +1,47 @@
|
|||||||
|
# Ultralytics YOLO 🚀, GPL-3.0 license
|
||||||
|
# YOLOv8-pose keypoints/pose estimation model. For Usage examples see https://docs.ultralytics.com/tasks/pose
|
||||||
|
|
||||||
|
# Parameters
|
||||||
|
nc: 1 # number of classes
|
||||||
|
kpt_shape: [17, 3] # number of keypoints, number of dims (2 for x,y or 3 for x,y,visible)
|
||||||
|
scales: # model compound scaling constants, i.e. 'model=yolov8n-pose.yaml' will call yolov8-pose.yaml with scale 'n'
|
||||||
|
# [depth, width, max_channels]
|
||||||
|
n: [0.33, 0.25, 1024]
|
||||||
|
s: [0.33, 0.50, 1024]
|
||||||
|
m: [0.67, 0.75, 768]
|
||||||
|
l: [1.00, 1.00, 512]
|
||||||
|
x: [1.00, 1.25, 512]
|
||||||
|
|
||||||
|
# YOLOv8.0n backbone
|
||||||
|
backbone:
|
||||||
|
# [from, repeats, module, args]
|
||||||
|
- [-1, 1, Conv, [64, 3, 2]] # 0-P1/2
|
||||||
|
- [-1, 1, Conv, [128, 3, 2]] # 1-P2/4
|
||||||
|
- [-1, 3, C2f, [128, True]]
|
||||||
|
- [-1, 1, Conv, [256, 3, 2]] # 3-P3/8
|
||||||
|
- [-1, 6, C2f, [256, True]]
|
||||||
|
- [-1, 1, Conv, [512, 3, 2]] # 5-P4/16
|
||||||
|
- [-1, 6, C2f, [512, True]]
|
||||||
|
- [-1, 1, Conv, [1024, 3, 2]] # 7-P5/32
|
||||||
|
- [-1, 3, C2f, [1024, True]]
|
||||||
|
- [-1, 1, SPPF, [1024, 5]] # 9
|
||||||
|
|
||||||
|
# YOLOv8.0n head
|
||||||
|
head:
|
||||||
|
- [-1, 1, nn.Upsample, [None, 2, 'nearest']]
|
||||||
|
- [[-1, 6], 1, Concat, [1]] # cat backbone P4
|
||||||
|
- [-1, 3, C2f, [512]] # 12
|
||||||
|
|
||||||
|
- [-1, 1, nn.Upsample, [None, 2, 'nearest']]
|
||||||
|
- [[-1, 4], 1, Concat, [1]] # cat backbone P3
|
||||||
|
- [-1, 3, C2f, [256]] # 15 (P3/8-small)
|
||||||
|
|
||||||
|
- [-1, 1, Conv, [256, 3, 2]]
|
||||||
|
- [[-1, 12], 1, Concat, [1]] # cat head P4
|
||||||
|
- [-1, 3, C2f, [512]] # 18 (P4/16-medium)
|
||||||
|
|
||||||
|
- [-1, 1, Conv, [512, 3, 2]]
|
||||||
|
- [[-1, 9], 1, Concat, [1]] # cat head P5
|
||||||
|
- [-1, 3, C2f, [1024]] # 21 (P5/32-large)
|
||||||
|
|
||||||
|
- [[15, 18, 21], 1, Pose, [nc, kpt_shape]] # Pose(P3, P4, P5)
|
@ -91,8 +91,10 @@ class AutoBackend(nn.Module):
|
|||||||
if nn_module:
|
if nn_module:
|
||||||
model = weights.to(device)
|
model = weights.to(device)
|
||||||
model = model.fuse(verbose=verbose) if fuse else model
|
model = model.fuse(verbose=verbose) if fuse else model
|
||||||
names = model.module.names if hasattr(model, 'module') else model.names # get class names
|
if hasattr(model, 'kpt_shape'):
|
||||||
|
kpt_shape = model.kpt_shape # pose-only
|
||||||
stride = max(int(model.stride.max()), 32) # model stride
|
stride = max(int(model.stride.max()), 32) # model stride
|
||||||
|
names = model.module.names if hasattr(model, 'module') else model.names # get class names
|
||||||
model.half() if fp16 else model.float()
|
model.half() if fp16 else model.float()
|
||||||
self.model = model # explicitly assign for to(), cpu(), cuda(), half()
|
self.model = model # explicitly assign for to(), cpu(), cuda(), half()
|
||||||
pt = True
|
pt = True
|
||||||
@ -102,6 +104,8 @@ class AutoBackend(nn.Module):
|
|||||||
device=device,
|
device=device,
|
||||||
inplace=True,
|
inplace=True,
|
||||||
fuse=fuse)
|
fuse=fuse)
|
||||||
|
if hasattr(model, 'kpt_shape'):
|
||||||
|
kpt_shape = model.kpt_shape # pose-only
|
||||||
stride = max(int(model.stride.max()), 32) # model stride
|
stride = max(int(model.stride.max()), 32) # model stride
|
||||||
names = model.module.names if hasattr(model, 'module') else model.names # get class names
|
names = model.module.names if hasattr(model, 'module') else model.names # get class names
|
||||||
model.half() if fp16 else model.float()
|
model.half() if fp16 else model.float()
|
||||||
@ -268,13 +272,14 @@ class AutoBackend(nn.Module):
|
|||||||
for k, v in metadata.items():
|
for k, v in metadata.items():
|
||||||
if k in ('stride', 'batch'):
|
if k in ('stride', 'batch'):
|
||||||
metadata[k] = int(v)
|
metadata[k] = int(v)
|
||||||
elif k in ('imgsz', 'names') and isinstance(v, str):
|
elif k in ('imgsz', 'names', 'kpt_shape') and isinstance(v, str):
|
||||||
metadata[k] = eval(v)
|
metadata[k] = eval(v)
|
||||||
stride = metadata['stride']
|
stride = metadata['stride']
|
||||||
task = metadata['task']
|
task = metadata['task']
|
||||||
batch = metadata['batch']
|
batch = metadata['batch']
|
||||||
imgsz = metadata['imgsz']
|
imgsz = metadata['imgsz']
|
||||||
names = metadata['names']
|
names = metadata['names']
|
||||||
|
kpt_shape = metadata.get('kpt_shape')
|
||||||
elif not (pt or triton or nn_module):
|
elif not (pt or triton or nn_module):
|
||||||
LOGGER.warning(f"WARNING ⚠️ Metadata not found for 'model={weights}'")
|
LOGGER.warning(f"WARNING ⚠️ Metadata not found for 'model={weights}'")
|
||||||
|
|
||||||
|
@ -378,7 +378,9 @@ class Ensemble(nn.ModuleList):
|
|||||||
return y, None # inference, train output
|
return y, None # inference, train output
|
||||||
|
|
||||||
|
|
||||||
# heads
|
# Model heads below ----------------------------------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
class Detect(nn.Module):
|
class Detect(nn.Module):
|
||||||
# YOLOv8 Detect head for detection models
|
# YOLOv8 Detect head for detection models
|
||||||
dynamic = False # force grid reconstruction
|
dynamic = False # force grid reconstruction
|
||||||
@ -394,7 +396,6 @@ class Detect(nn.Module):
|
|||||||
self.reg_max = 16 # DFL channels (ch[0] // 16 to scale 4/8/12/16/20 for n/s/m/l/x)
|
self.reg_max = 16 # DFL channels (ch[0] // 16 to scale 4/8/12/16/20 for n/s/m/l/x)
|
||||||
self.no = nc + self.reg_max * 4 # number of outputs per anchor
|
self.no = nc + self.reg_max * 4 # number of outputs per anchor
|
||||||
self.stride = torch.zeros(self.nl) # strides computed during build
|
self.stride = torch.zeros(self.nl) # strides computed during build
|
||||||
|
|
||||||
c2, c3 = max((16, ch[0] // 4, self.reg_max * 4)), max(ch[0], self.nc) # channels
|
c2, c3 = max((16, ch[0] // 4, self.reg_max * 4)), max(ch[0], self.nc) # channels
|
||||||
self.cv2 = nn.ModuleList(
|
self.cv2 = nn.ModuleList(
|
||||||
nn.Sequential(Conv(x, c2, 3), Conv(c2, c2, 3), nn.Conv2d(c2, 4 * self.reg_max, 1)) for x in ch)
|
nn.Sequential(Conv(x, c2, 3), Conv(c2, c2, 3), nn.Conv2d(c2, 4 * self.reg_max, 1)) for x in ch)
|
||||||
@ -454,6 +455,36 @@ class Segment(Detect):
|
|||||||
return (torch.cat([x, mc], 1), p) if self.export else (torch.cat([x[0], mc], 1), (x[1], mc, p))
|
return (torch.cat([x, mc], 1), p) if self.export else (torch.cat([x[0], mc], 1), (x[1], mc, p))
|
||||||
|
|
||||||
|
|
||||||
|
class Pose(Detect):
|
||||||
|
# YOLOv8 Pose head for keypoints models
|
||||||
|
def __init__(self, nc=80, kpt_shape=(17, 3), ch=()):
|
||||||
|
super().__init__(nc, ch)
|
||||||
|
self.kpt_shape = kpt_shape # number of keypoints, number of dims (2 for x,y or 3 for x,y,visible)
|
||||||
|
self.nk = kpt_shape[0] * kpt_shape[1] # number of keypoints total
|
||||||
|
self.detect = Detect.forward
|
||||||
|
|
||||||
|
c4 = max(ch[0] // 4, self.nk)
|
||||||
|
self.cv4 = nn.ModuleList(nn.Sequential(Conv(x, c4, 3), Conv(c4, c4, 3), nn.Conv2d(c4, self.nk, 1)) for x in ch)
|
||||||
|
|
||||||
|
def forward(self, x):
|
||||||
|
bs = x[0].shape[0] # batch size
|
||||||
|
kpt = torch.cat([self.cv4[i](x[i]).view(bs, self.nk, -1) for i in range(self.nl)], -1) # (bs, 17*3, h*w)
|
||||||
|
x = self.detect(self, x)
|
||||||
|
if self.training:
|
||||||
|
return x, kpt
|
||||||
|
pred_kpt = self.kpts_decode(kpt)
|
||||||
|
return torch.cat([x, pred_kpt], 1) if self.export else (torch.cat([x[0], pred_kpt], 1), (x[1], kpt))
|
||||||
|
|
||||||
|
def kpts_decode(self, kpts):
|
||||||
|
ndim = self.kpt_shape[1]
|
||||||
|
y = kpts.clone()
|
||||||
|
if ndim == 3:
|
||||||
|
y[:, 2::3].sigmoid_() # inplace sigmoid
|
||||||
|
y[:, 0::ndim] = (y[:, 0::ndim] * 2.0 + (self.anchors[0] - 0.5)) * self.strides
|
||||||
|
y[:, 1::ndim] = (y[:, 1::ndim] * 2.0 + (self.anchors[1] - 0.5)) * self.strides
|
||||||
|
return y
|
||||||
|
|
||||||
|
|
||||||
class Classify(nn.Module):
|
class Classify(nn.Module):
|
||||||
# YOLOv8 classification head, i.e. x(b,c1,20,20) to x(b,c2)
|
# YOLOv8 classification head, i.e. x(b,c1,20,20) to x(b,c2)
|
||||||
def __init__(self, c1, c2, k=1, s=1, p=None, g=1): # ch_in, ch_out, kernel, stride, padding, groups
|
def __init__(self, c1, c2, k=1, s=1, p=None, g=1): # ch_in, ch_out, kernel, stride, padding, groups
|
||||||
|
@ -10,7 +10,7 @@ import torch.nn as nn
|
|||||||
|
|
||||||
from ultralytics.nn.modules import (C1, C2, C3, C3TR, SPP, SPPF, Bottleneck, BottleneckCSP, C2f, C3Ghost, C3x, Classify,
|
from ultralytics.nn.modules import (C1, C2, C3, C3TR, SPP, SPPF, Bottleneck, BottleneckCSP, C2f, C3Ghost, C3x, Classify,
|
||||||
Concat, Conv, ConvTranspose, Detect, DWConv, DWConvTranspose2d, Ensemble, Focus,
|
Concat, Conv, ConvTranspose, Detect, DWConv, DWConvTranspose2d, Ensemble, Focus,
|
||||||
GhostBottleneck, GhostConv, Segment)
|
GhostBottleneck, GhostConv, Pose, Segment)
|
||||||
from ultralytics.yolo.utils import DEFAULT_CFG_DICT, DEFAULT_CFG_KEYS, LOGGER, colorstr, emojis, yaml_load
|
from ultralytics.yolo.utils import DEFAULT_CFG_DICT, DEFAULT_CFG_KEYS, LOGGER, colorstr, emojis, yaml_load
|
||||||
from ultralytics.yolo.utils.checks import check_requirements, check_suffix, check_yaml
|
from ultralytics.yolo.utils.checks import check_requirements, check_suffix, check_yaml
|
||||||
from ultralytics.yolo.utils.torch_utils import (fuse_conv_and_bn, fuse_deconv_and_bn, initialize_weights,
|
from ultralytics.yolo.utils.torch_utils import (fuse_conv_and_bn, fuse_deconv_and_bn, initialize_weights,
|
||||||
@ -183,10 +183,10 @@ class DetectionModel(BaseModel):
|
|||||||
|
|
||||||
# Build strides
|
# Build strides
|
||||||
m = self.model[-1] # Detect()
|
m = self.model[-1] # Detect()
|
||||||
if isinstance(m, (Detect, Segment)):
|
if isinstance(m, (Detect, Segment, Pose)):
|
||||||
s = 256 # 2x min stride
|
s = 256 # 2x min stride
|
||||||
m.inplace = self.inplace
|
m.inplace = self.inplace
|
||||||
forward = lambda x: self.forward(x)[0] if isinstance(m, Segment) else self.forward(x)
|
forward = lambda x: self.forward(x)[0] if isinstance(m, (Segment, Pose)) else self.forward(x)
|
||||||
m.stride = torch.tensor([s / x.shape[-2] for x in forward(torch.zeros(1, ch, s, s))]) # forward
|
m.stride = torch.tensor([s / x.shape[-2] for x in forward(torch.zeros(1, ch, s, s))]) # forward
|
||||||
self.stride = m.stride
|
self.stride = m.stride
|
||||||
m.bias_init() # only run once
|
m.bias_init() # only run once
|
||||||
@ -242,12 +242,23 @@ class DetectionModel(BaseModel):
|
|||||||
class SegmentationModel(DetectionModel):
|
class SegmentationModel(DetectionModel):
|
||||||
# YOLOv8 segmentation model
|
# YOLOv8 segmentation model
|
||||||
def __init__(self, cfg='yolov8n-seg.yaml', ch=3, nc=None, verbose=True):
|
def __init__(self, cfg='yolov8n-seg.yaml', ch=3, nc=None, verbose=True):
|
||||||
super().__init__(cfg, ch, nc, verbose)
|
super().__init__(cfg=cfg, ch=ch, nc=nc, verbose=verbose)
|
||||||
|
|
||||||
def _forward_augment(self, x):
|
def _forward_augment(self, x):
|
||||||
raise NotImplementedError(emojis('WARNING ⚠️ SegmentationModel has not supported augment inference yet!'))
|
raise NotImplementedError(emojis('WARNING ⚠️ SegmentationModel has not supported augment inference yet!'))
|
||||||
|
|
||||||
|
|
||||||
|
class PoseModel(DetectionModel):
|
||||||
|
# YOLOv8 pose model
|
||||||
|
def __init__(self, cfg='yolov8n-pose.yaml', ch=3, nc=None, data_kpt_shape=(None, None), verbose=True):
|
||||||
|
if not isinstance(cfg, dict):
|
||||||
|
cfg = yaml_model_load(cfg) # load model YAML
|
||||||
|
if any(data_kpt_shape) and list(data_kpt_shape) != list(cfg['kpt_shape']):
|
||||||
|
LOGGER.info(f"Overriding model.yaml kpt_shape={cfg['kpt_shape']} with kpt_shape={data_kpt_shape}")
|
||||||
|
cfg['kpt_shape'] = data_kpt_shape
|
||||||
|
super().__init__(cfg=cfg, ch=ch, nc=nc, verbose=verbose)
|
||||||
|
|
||||||
|
|
||||||
class ClassificationModel(BaseModel):
|
class ClassificationModel(BaseModel):
|
||||||
# YOLOv8 classification model
|
# YOLOv8 classification model
|
||||||
def __init__(self,
|
def __init__(self,
|
||||||
@ -425,7 +436,7 @@ def parse_model(d, ch, verbose=True): # model_dict, input_channels(3)
|
|||||||
# Args
|
# Args
|
||||||
max_channels = float('inf')
|
max_channels = float('inf')
|
||||||
nc, act, scales = (d.get(x) for x in ('nc', 'act', 'scales'))
|
nc, act, scales = (d.get(x) for x in ('nc', 'act', 'scales'))
|
||||||
depth, width = (d.get(x, 1.0) for x in ('depth_multiple', 'width_multiple'))
|
depth, width, kpt_shape = (d.get(x, 1.0) for x in ('depth_multiple', 'width_multiple', 'kpt_shape'))
|
||||||
if scales:
|
if scales:
|
||||||
scale = d.get('scale')
|
scale = d.get('scale')
|
||||||
if not scale:
|
if not scale:
|
||||||
@ -464,7 +475,7 @@ def parse_model(d, ch, verbose=True): # model_dict, input_channels(3)
|
|||||||
args = [ch[f]]
|
args = [ch[f]]
|
||||||
elif m is Concat:
|
elif m is Concat:
|
||||||
c2 = sum(ch[x] for x in f)
|
c2 = sum(ch[x] for x in f)
|
||||||
elif m in (Detect, Segment):
|
elif m in (Detect, Segment, Pose):
|
||||||
args.append([ch[x] for x in f])
|
args.append([ch[x] for x in f])
|
||||||
if m is Segment:
|
if m is Segment:
|
||||||
args[2] = make_divisible(min(args[2], max_channels) * width, 8)
|
args[2] = make_divisible(min(args[2], max_channels) * width, 8)
|
||||||
@ -543,6 +554,8 @@ def guess_model_task(model):
|
|||||||
return 'detect'
|
return 'detect'
|
||||||
if m == 'segment':
|
if m == 'segment':
|
||||||
return 'segment'
|
return 'segment'
|
||||||
|
if m == 'pose':
|
||||||
|
return 'pose'
|
||||||
|
|
||||||
# Guess from model cfg
|
# Guess from model cfg
|
||||||
if isinstance(model, dict):
|
if isinstance(model, dict):
|
||||||
@ -565,6 +578,8 @@ def guess_model_task(model):
|
|||||||
return 'segment'
|
return 'segment'
|
||||||
elif isinstance(m, Classify):
|
elif isinstance(m, Classify):
|
||||||
return 'classify'
|
return 'classify'
|
||||||
|
elif isinstance(m, Pose):
|
||||||
|
return 'pose'
|
||||||
|
|
||||||
# Guess from model filename
|
# Guess from model filename
|
||||||
if isinstance(model, (str, Path)):
|
if isinstance(model, (str, Path)):
|
||||||
@ -573,10 +588,12 @@ def guess_model_task(model):
|
|||||||
return 'segment'
|
return 'segment'
|
||||||
elif '-cls' in model.stem or 'classify' in model.parts:
|
elif '-cls' in model.stem or 'classify' in model.parts:
|
||||||
return 'classify'
|
return 'classify'
|
||||||
|
elif '-pose' in model.stem or 'pose' in model.parts:
|
||||||
|
return 'pose'
|
||||||
elif 'detect' in model.parts:
|
elif 'detect' in model.parts:
|
||||||
return 'detect'
|
return 'detect'
|
||||||
|
|
||||||
# Unable to determine task from model
|
# Unable to determine task from model
|
||||||
LOGGER.warning("WARNING ⚠️ Unable to automatically guess model task, assuming 'task=detect'. "
|
LOGGER.warning("WARNING ⚠️ Unable to automatically guess model task, assuming 'task=detect'. "
|
||||||
"Explicitly define task for your model, i.e. 'task=detect', 'task=segment' or 'task=classify'.")
|
"Explicitly define task for your model, i.e. 'task=detect', 'segment', 'classify', or 'pose'.")
|
||||||
return 'detect' # assume detect
|
return 'detect' # assume detect
|
||||||
|
@ -33,10 +33,9 @@ def on_predict_postprocess_end(predictor):
|
|||||||
tracks = predictor.trackers[i].update(det, im0s[i])
|
tracks = predictor.trackers[i].update(det, im0s[i])
|
||||||
if len(tracks) == 0:
|
if len(tracks) == 0:
|
||||||
continue
|
continue
|
||||||
predictor.results[i].update(boxes=torch.as_tensor(tracks[:, :-1]))
|
|
||||||
if predictor.results[i].masks is not None:
|
|
||||||
idx = tracks[:, -1].tolist()
|
idx = tracks[:, -1].tolist()
|
||||||
predictor.results[i].masks = predictor.results[i].masks[idx]
|
predictor.results[i] = predictor.results[i][idx]
|
||||||
|
predictor.results[i].update(boxes=torch.as_tensor(tracks[:, :-1]))
|
||||||
|
|
||||||
|
|
||||||
def register_tracker(model):
|
def register_tracker(model):
|
||||||
|
@ -18,13 +18,13 @@ TASKS = 'detect', 'segment', 'classify', 'pose'
|
|||||||
TASK2DATA = {
|
TASK2DATA = {
|
||||||
'detect': 'coco128.yaml',
|
'detect': 'coco128.yaml',
|
||||||
'segment': 'coco128-seg.yaml',
|
'segment': 'coco128-seg.yaml',
|
||||||
'pose': 'coco128-pose.yaml',
|
'classify': 'imagenet100',
|
||||||
'classify': 'imagenet100'}
|
'pose': 'coco128-pose.yaml'}
|
||||||
TASK2MODEL = {
|
TASK2MODEL = {
|
||||||
'detect': 'yolov8n.pt',
|
'detect': 'yolov8n.pt',
|
||||||
'segment': 'yolov8n-seg.pt',
|
'segment': 'yolov8n-seg.pt',
|
||||||
'pose': 'yolov8n-pose.yaml',
|
'classify': 'yolov8n-cls.pt',
|
||||||
'classify': 'yolov8n-cls.pt'} # temp
|
'pose': 'yolov8n-pose.yaml'}
|
||||||
|
|
||||||
CLI_HELP_MSG = \
|
CLI_HELP_MSG = \
|
||||||
f"""
|
f"""
|
||||||
|
@ -88,6 +88,8 @@ warmup_bias_lr: 0.1 # warmup initial bias lr
|
|||||||
box: 7.5 # box loss gain
|
box: 7.5 # box loss gain
|
||||||
cls: 0.5 # cls loss gain (scale with pixels)
|
cls: 0.5 # cls loss gain (scale with pixels)
|
||||||
dfl: 1.5 # dfl loss gain
|
dfl: 1.5 # dfl loss gain
|
||||||
|
pose: 12.0 # pose loss gain
|
||||||
|
kobj: 1.0 # keypoint obj loss gain
|
||||||
fl_gamma: 0.0 # focal loss gamma (efficientDet default gamma=1.5)
|
fl_gamma: 0.0 # focal loss gamma (efficientDet default gamma=1.5)
|
||||||
label_smoothing: 0.0 # label smoothing (fraction)
|
label_smoothing: 0.0 # label smoothing (fraction)
|
||||||
nbs: 64 # nominal batch size
|
nbs: 64 # nominal batch size
|
||||||
|
@ -16,6 +16,8 @@ from ..utils.metrics import bbox_ioa
|
|||||||
from ..utils.ops import segment2box
|
from ..utils.ops import segment2box
|
||||||
from .utils import polygons2masks, polygons2masks_overlap
|
from .utils import polygons2masks, polygons2masks_overlap
|
||||||
|
|
||||||
|
POSE_FLIPLR_INDEX = [0, 2, 1, 4, 3, 6, 5, 8, 7, 10, 9, 12, 11, 14, 13, 16, 15]
|
||||||
|
|
||||||
|
|
||||||
# TODO: we might need a BaseTransform to make all these augments be compatible with both classification and semantic
|
# TODO: we might need a BaseTransform to make all these augments be compatible with both classification and semantic
|
||||||
class BaseTransform:
|
class BaseTransform:
|
||||||
@ -309,27 +311,22 @@ class RandomPerspective:
|
|||||||
"""apply affine to keypoints.
|
"""apply affine to keypoints.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
keypoints(ndarray): keypoints, [N, 17, 2].
|
keypoints(ndarray): keypoints, [N, 17, 3].
|
||||||
M(ndarray): affine matrix.
|
M(ndarray): affine matrix.
|
||||||
Return:
|
Return:
|
||||||
new_keypoints(ndarray): keypoints after affine, [N, 17, 2].
|
new_keypoints(ndarray): keypoints after affine, [N, 17, 3].
|
||||||
"""
|
"""
|
||||||
n = len(keypoints)
|
n, nkpt = keypoints.shape[:2]
|
||||||
if n == 0:
|
if n == 0:
|
||||||
return keypoints
|
return keypoints
|
||||||
new_keypoints = np.ones((n * 17, 3))
|
xy = np.ones((n * nkpt, 3))
|
||||||
new_keypoints[:, :2] = keypoints.reshape(n * 17, 2) # num_kpt is hardcoded to 17
|
visible = keypoints[..., 2].reshape(n * nkpt, 1)
|
||||||
new_keypoints = new_keypoints @ M.T # transform
|
xy[:, :2] = keypoints[..., :2].reshape(n * nkpt, 2)
|
||||||
new_keypoints = (new_keypoints[:, :2] / new_keypoints[:, 2:3]).reshape(n, 34) # perspective rescale or affine
|
xy = xy @ M.T # transform
|
||||||
new_keypoints[keypoints.reshape(-1, 34) == 0] = 0
|
xy = xy[:, :2] / xy[:, 2:3] # perspective rescale or affine
|
||||||
x_kpts = new_keypoints[:, list(range(0, 34, 2))]
|
out_mask = (xy[:, 0] < 0) | (xy[:, 1] < 0) | (xy[:, 0] > self.size[0]) | (xy[:, 1] > self.size[1])
|
||||||
y_kpts = new_keypoints[:, list(range(1, 34, 2))]
|
visible[out_mask] = 0
|
||||||
|
return np.concatenate([xy, visible], axis=-1).reshape(n, nkpt, 3)
|
||||||
x_kpts[np.logical_or.reduce((x_kpts < 0, x_kpts > self.size[0], y_kpts < 0, y_kpts > self.size[1]))] = 0
|
|
||||||
y_kpts[np.logical_or.reduce((x_kpts < 0, x_kpts > self.size[0], y_kpts < 0, y_kpts > self.size[1]))] = 0
|
|
||||||
new_keypoints[:, list(range(0, 34, 2))] = x_kpts
|
|
||||||
new_keypoints[:, list(range(1, 34, 2))] = y_kpts
|
|
||||||
return new_keypoints.reshape(n, 17, 2)
|
|
||||||
|
|
||||||
def __call__(self, labels):
|
def __call__(self, labels):
|
||||||
"""
|
"""
|
||||||
@ -415,12 +412,13 @@ class RandomHSV:
|
|||||||
|
|
||||||
class RandomFlip:
|
class RandomFlip:
|
||||||
|
|
||||||
def __init__(self, p=0.5, direction='horizontal') -> None:
|
def __init__(self, p=0.5, direction='horizontal', flip_idx=None) -> None:
|
||||||
assert direction in ['horizontal', 'vertical'], f'Support direction `horizontal` or `vertical`, got {direction}'
|
assert direction in ['horizontal', 'vertical'], f'Support direction `horizontal` or `vertical`, got {direction}'
|
||||||
assert 0 <= p <= 1.0
|
assert 0 <= p <= 1.0
|
||||||
|
|
||||||
self.p = p
|
self.p = p
|
||||||
self.direction = direction
|
self.direction = direction
|
||||||
|
self.flip_idx = flip_idx
|
||||||
|
|
||||||
def __call__(self, labels):
|
def __call__(self, labels):
|
||||||
img = labels['img']
|
img = labels['img']
|
||||||
@ -437,6 +435,9 @@ class RandomFlip:
|
|||||||
if self.direction == 'horizontal' and random.random() < self.p:
|
if self.direction == 'horizontal' and random.random() < self.p:
|
||||||
img = np.fliplr(img)
|
img = np.fliplr(img)
|
||||||
instances.fliplr(w)
|
instances.fliplr(w)
|
||||||
|
# for keypoints
|
||||||
|
if self.flip_idx is not None and instances.keypoints is not None:
|
||||||
|
instances.keypoints = np.ascontiguousarray(instances.keypoints[:, self.flip_idx, :])
|
||||||
labels['img'] = np.ascontiguousarray(img)
|
labels['img'] = np.ascontiguousarray(img)
|
||||||
labels['instances'] = instances
|
labels['instances'] = instances
|
||||||
return labels
|
return labels
|
||||||
@ -633,7 +634,7 @@ class Format:
|
|||||||
labels['cls'] = torch.from_numpy(cls) if nl else torch.zeros(nl)
|
labels['cls'] = torch.from_numpy(cls) if nl else torch.zeros(nl)
|
||||||
labels['bboxes'] = torch.from_numpy(instances.bboxes) if nl else torch.zeros((nl, 4))
|
labels['bboxes'] = torch.from_numpy(instances.bboxes) if nl else torch.zeros((nl, 4))
|
||||||
if self.return_keypoint:
|
if self.return_keypoint:
|
||||||
labels['keypoints'] = torch.from_numpy(instances.keypoints) if nl else torch.zeros((nl, 17, 2))
|
labels['keypoints'] = torch.from_numpy(instances.keypoints)
|
||||||
# then we can use collate_fn
|
# then we can use collate_fn
|
||||||
if self.batch_idx:
|
if self.batch_idx:
|
||||||
labels['batch_idx'] = torch.zeros(nl)
|
labels['batch_idx'] = torch.zeros(nl)
|
||||||
@ -672,13 +673,17 @@ def v8_transforms(dataset, imgsz, hyp):
|
|||||||
perspective=hyp.perspective,
|
perspective=hyp.perspective,
|
||||||
pre_transform=LetterBox(new_shape=(imgsz, imgsz)),
|
pre_transform=LetterBox(new_shape=(imgsz, imgsz)),
|
||||||
)])
|
)])
|
||||||
|
flip_idx = dataset.data.get('flip_idx', None) # for keypoints augmentation
|
||||||
|
if dataset.use_keypoints and flip_idx is None and hyp.fliplr > 0.0:
|
||||||
|
hyp.fliplr = 0.0
|
||||||
|
LOGGER.warning("WARNING ⚠️ No `flip_idx` provided while training keypoints, setting augmentation 'fliplr=0.0'")
|
||||||
return Compose([
|
return Compose([
|
||||||
pre_transform,
|
pre_transform,
|
||||||
MixUp(dataset, pre_transform=pre_transform, p=hyp.mixup),
|
MixUp(dataset, pre_transform=pre_transform, p=hyp.mixup),
|
||||||
Albumentations(p=1.0),
|
Albumentations(p=1.0),
|
||||||
RandomHSV(hgain=hyp.hsv_h, sgain=hyp.hsv_s, vgain=hyp.hsv_v),
|
RandomHSV(hgain=hyp.hsv_h, sgain=hyp.hsv_s, vgain=hyp.hsv_v),
|
||||||
RandomFlip(direction='vertical', p=hyp.flipud),
|
RandomFlip(direction='vertical', p=hyp.flipud),
|
||||||
RandomFlip(direction='horizontal', p=hyp.fliplr)]) # transforms
|
RandomFlip(direction='horizontal', p=hyp.fliplr, flip_idx=flip_idx)]) # transforms
|
||||||
|
|
||||||
|
|
||||||
# Classification augmentations -----------------------------------------------------------------------------------------
|
# Classification augmentations -----------------------------------------------------------------------------------------
|
||||||
|
@ -61,7 +61,7 @@ def seed_worker(worker_id): # noqa
|
|||||||
random.seed(worker_seed)
|
random.seed(worker_seed)
|
||||||
|
|
||||||
|
|
||||||
def build_dataloader(cfg, batch, img_path, stride=32, rect=False, names=None, rank=-1, mode='train'):
|
def build_dataloader(cfg, batch, img_path, data_info, stride=32, rect=False, rank=-1, mode='train'):
|
||||||
assert mode in ['train', 'val']
|
assert mode in ['train', 'val']
|
||||||
shuffle = mode == 'train'
|
shuffle = mode == 'train'
|
||||||
if cfg.rect and shuffle:
|
if cfg.rect and shuffle:
|
||||||
@ -81,9 +81,9 @@ def build_dataloader(cfg, batch, img_path, stride=32, rect=False, names=None, ra
|
|||||||
pad=0.0 if mode == 'train' else 0.5,
|
pad=0.0 if mode == 'train' else 0.5,
|
||||||
prefix=colorstr(f'{mode}: '),
|
prefix=colorstr(f'{mode}: '),
|
||||||
use_segments=cfg.task == 'segment',
|
use_segments=cfg.task == 'segment',
|
||||||
use_keypoints=cfg.task == 'keypoint',
|
use_keypoints=cfg.task == 'pose',
|
||||||
names=names,
|
classes=cfg.classes,
|
||||||
classes=cfg.classes)
|
data=data_info)
|
||||||
|
|
||||||
batch = min(batch, len(dataset))
|
batch = min(batch, len(dataset))
|
||||||
nd = torch.cuda.device_count() # number of CUDA devices
|
nd = torch.cuda.device_count() # number of CUDA devices
|
||||||
|
@ -57,11 +57,11 @@ class YOLODataset(BaseDataset):
|
|||||||
single_cls=False,
|
single_cls=False,
|
||||||
use_segments=False,
|
use_segments=False,
|
||||||
use_keypoints=False,
|
use_keypoints=False,
|
||||||
names=None,
|
data=None,
|
||||||
classes=None):
|
classes=None):
|
||||||
self.use_segments = use_segments
|
self.use_segments = use_segments
|
||||||
self.use_keypoints = use_keypoints
|
self.use_keypoints = use_keypoints
|
||||||
self.names = names
|
self.data = data
|
||||||
assert not (self.use_segments and self.use_keypoints), 'Can not use both segments and keypoints.'
|
assert not (self.use_segments and self.use_keypoints), 'Can not use both segments and keypoints.'
|
||||||
super().__init__(img_path, imgsz, cache, augment, hyp, prefix, rect, batch_size, stride, pad, single_cls,
|
super().__init__(img_path, imgsz, cache, augment, hyp, prefix, rect, batch_size, stride, pad, single_cls,
|
||||||
classes)
|
classes)
|
||||||
@ -77,10 +77,16 @@ class YOLODataset(BaseDataset):
|
|||||||
nm, nf, ne, nc, msgs = 0, 0, 0, 0, [] # number missing, found, empty, corrupt, messages
|
nm, nf, ne, nc, msgs = 0, 0, 0, 0, [] # number missing, found, empty, corrupt, messages
|
||||||
desc = f'{self.prefix}Scanning {path.parent / path.stem}...'
|
desc = f'{self.prefix}Scanning {path.parent / path.stem}...'
|
||||||
total = len(self.im_files)
|
total = len(self.im_files)
|
||||||
|
nc = len(self.data['names'])
|
||||||
|
nkpt, ndim = self.data.get('kpt_shape', (0, 0))
|
||||||
|
if self.use_keypoints and (nkpt <= 0 or ndim not in (2, 3)):
|
||||||
|
raise ValueError("'kpt_shape' in data.yaml missing or incorrect. Should be a list with [number of "
|
||||||
|
"keypoints, number of dims (2 for x,y or 3 for x,y,visible)], i.e. 'kpt_shape: [17, 3]'")
|
||||||
with ThreadPool(NUM_THREADS) as pool:
|
with ThreadPool(NUM_THREADS) as pool:
|
||||||
results = pool.imap(func=verify_image_label,
|
results = pool.imap(func=verify_image_label,
|
||||||
iterable=zip(self.im_files, self.label_files, repeat(self.prefix),
|
iterable=zip(self.im_files, self.label_files, repeat(self.prefix),
|
||||||
repeat(self.use_keypoints), repeat(len(self.names))))
|
repeat(self.use_keypoints), repeat(len(self.data['names'])), repeat(nkpt),
|
||||||
|
repeat(ndim)))
|
||||||
pbar = tqdm(results, desc=desc, total=total, bar_format=TQDM_BAR_FORMAT)
|
pbar = tqdm(results, desc=desc, total=total, bar_format=TQDM_BAR_FORMAT)
|
||||||
for im_file, lb, shape, segments, keypoint, nm_f, nf_f, ne_f, nc_f, msg in pbar:
|
for im_file, lb, shape, segments, keypoint, nm_f, nf_f, ne_f, nc_f, msg in pbar:
|
||||||
nm += nm_f
|
nm += nm_f
|
||||||
|
@ -6,10 +6,10 @@ import json
|
|||||||
import os
|
import os
|
||||||
import subprocess
|
import subprocess
|
||||||
import time
|
import time
|
||||||
|
import zipfile
|
||||||
from multiprocessing.pool import ThreadPool
|
from multiprocessing.pool import ThreadPool
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from tarfile import is_tarfile
|
from tarfile import is_tarfile
|
||||||
from zipfile import is_zipfile
|
|
||||||
|
|
||||||
import cv2
|
import cv2
|
||||||
import numpy as np
|
import numpy as np
|
||||||
@ -61,7 +61,7 @@ def exif_size(img):
|
|||||||
|
|
||||||
def verify_image_label(args):
|
def verify_image_label(args):
|
||||||
# Verify one image-label pair
|
# Verify one image-label pair
|
||||||
im_file, lb_file, prefix, keypoint, num_cls = args
|
im_file, lb_file, prefix, keypoint, num_cls, nkpt, ndim = args
|
||||||
# number (missing, found, empty, corrupt), message, segments, keypoints
|
# number (missing, found, empty, corrupt), message, segments, keypoints
|
||||||
nm, nf, ne, nc, msg, segments, keypoints = 0, 0, 0, 0, '', [], None
|
nm, nf, ne, nc, msg, segments, keypoints = 0, 0, 0, 0, '', [], None
|
||||||
try:
|
try:
|
||||||
@ -92,25 +92,19 @@ def verify_image_label(args):
|
|||||||
nl = len(lb)
|
nl = len(lb)
|
||||||
if nl:
|
if nl:
|
||||||
if keypoint:
|
if keypoint:
|
||||||
assert lb.shape[1] == 56, 'labels require 56 columns each'
|
assert lb.shape[1] == (5 + nkpt * ndim), f'labels require {(5 + nkpt * ndim)} columns each'
|
||||||
assert (lb[:, 5::3] <= 1).all(), 'non-normalized or out of bounds coordinate labels'
|
assert (lb[:, 5::ndim] <= 1).all(), 'non-normalized or out of bounds coordinate labels'
|
||||||
assert (lb[:, 6::3] <= 1).all(), 'non-normalized or out of bounds coordinate labels'
|
assert (lb[:, 6::ndim] <= 1).all(), 'non-normalized or out of bounds coordinate labels'
|
||||||
kpts = np.zeros((lb.shape[0], 39))
|
|
||||||
for i in range(len(lb)):
|
|
||||||
kpt = np.delete(lb[i, 5:], np.arange(2, lb.shape[1] - 5, 3)) # remove occlusion param from GT
|
|
||||||
kpts[i] = np.hstack((lb[i, :5], kpt))
|
|
||||||
lb = kpts
|
|
||||||
assert lb.shape[1] == 39, 'labels require 39 columns each after removing occlusion parameter'
|
|
||||||
else:
|
else:
|
||||||
assert lb.shape[1] == 5, f'labels require 5 columns, {lb.shape[1]} columns detected'
|
assert lb.shape[1] == 5, f'labels require 5 columns, {lb.shape[1]} columns detected'
|
||||||
assert (lb[:, 1:] <= 1).all(), \
|
assert (lb[:, 1:] <= 1).all(), \
|
||||||
f'non-normalized or out of bounds coordinates {lb[:, 1:][lb[:, 1:] > 1]}'
|
f'non-normalized or out of bounds coordinates {lb[:, 1:][lb[:, 1:] > 1]}'
|
||||||
|
assert (lb >= 0).all(), f'negative label values {lb[lb < 0]}'
|
||||||
# All labels
|
# All labels
|
||||||
max_cls = int(lb[:, 0].max()) # max label count
|
max_cls = int(lb[:, 0].max()) # max label count
|
||||||
assert max_cls <= num_cls, \
|
assert max_cls <= num_cls, \
|
||||||
f'Label class {max_cls} exceeds dataset class count {num_cls}. ' \
|
f'Label class {max_cls} exceeds dataset class count {num_cls}. ' \
|
||||||
f'Possible class labels are 0-{num_cls - 1}'
|
f'Possible class labels are 0-{num_cls - 1}'
|
||||||
assert (lb >= 0).all(), f'negative label values {lb[lb < 0]}'
|
|
||||||
_, i = np.unique(lb, axis=0, return_index=True)
|
_, i = np.unique(lb, axis=0, return_index=True)
|
||||||
if len(i) < nl: # duplicate row check
|
if len(i) < nl: # duplicate row check
|
||||||
lb = lb[i] # remove duplicates
|
lb = lb[i] # remove duplicates
|
||||||
@ -119,12 +113,18 @@ def verify_image_label(args):
|
|||||||
msg = f'{prefix}WARNING ⚠️ {im_file}: {nl - len(i)} duplicate labels removed'
|
msg = f'{prefix}WARNING ⚠️ {im_file}: {nl - len(i)} duplicate labels removed'
|
||||||
else:
|
else:
|
||||||
ne = 1 # label empty
|
ne = 1 # label empty
|
||||||
lb = np.zeros((0, 39), dtype=np.float32) if keypoint else np.zeros((0, 5), dtype=np.float32)
|
lb = np.zeros((0, (5 + nkpt * ndim)), dtype=np.float32) if keypoint else np.zeros(
|
||||||
|
(0, 5), dtype=np.float32)
|
||||||
else:
|
else:
|
||||||
nm = 1 # label missing
|
nm = 1 # label missing
|
||||||
lb = np.zeros((0, 39), dtype=np.float32) if keypoint else np.zeros((0, 5), dtype=np.float32)
|
lb = np.zeros((0, (5 + nkpt * ndim)), dtype=np.float32) if keypoint else np.zeros((0, 5), dtype=np.float32)
|
||||||
if keypoint:
|
if keypoint:
|
||||||
keypoints = lb[:, 5:].reshape(-1, 17, 2)
|
keypoints = lb[:, 5:].reshape(-1, nkpt, ndim)
|
||||||
|
if ndim == 2:
|
||||||
|
kpt_mask = np.ones(keypoints.shape[:2], dtype=np.float32)
|
||||||
|
kpt_mask = np.where(keypoints[..., 0] < 0, 0.0, kpt_mask)
|
||||||
|
kpt_mask = np.where(keypoints[..., 1] < 0, 0.0, kpt_mask)
|
||||||
|
keypoints = np.concatenate([keypoints, kpt_mask[..., None]], axis=-1) # (nl, nkpt, 3)
|
||||||
lb = lb[:, :5]
|
lb = lb[:, :5]
|
||||||
return im_file, lb, shape, segments, keypoints, nm, nf, ne, nc, msg
|
return im_file, lb, shape, segments, keypoints, nm, nf, ne, nc, msg
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
@ -195,7 +195,7 @@ def check_det_dataset(dataset, autodownload=True):
|
|||||||
|
|
||||||
# Download (optional)
|
# Download (optional)
|
||||||
extract_dir = ''
|
extract_dir = ''
|
||||||
if isinstance(data, (str, Path)) and (is_zipfile(data) or is_tarfile(data)):
|
if isinstance(data, (str, Path)) and (zipfile.is_zipfile(data) or is_tarfile(data)):
|
||||||
new_dir = safe_download(data, dir=DATASETS_DIR, unzip=True, delete=False, curl=False)
|
new_dir = safe_download(data, dir=DATASETS_DIR, unzip=True, delete=False, curl=False)
|
||||||
data = next((DATASETS_DIR / new_dir).rglob('*.yaml'))
|
data = next((DATASETS_DIR / new_dir).rglob('*.yaml'))
|
||||||
extract_dir, autodownload = data.parent, False
|
extract_dir, autodownload = data.parent, False
|
||||||
@ -356,23 +356,8 @@ class HUBDatasetStats():
|
|||||||
assert dir.is_dir(), f'Error unzipping {path}, {dir} not found. path/to/abc.zip MUST unzip to path/to/abc/'
|
assert dir.is_dir(), f'Error unzipping {path}, {dir} not found. path/to/abc.zip MUST unzip to path/to/abc/'
|
||||||
return True, str(dir), self._find_yaml(dir) # zipped, data_dir, yaml_path
|
return True, str(dir), self._find_yaml(dir) # zipped, data_dir, yaml_path
|
||||||
|
|
||||||
def _hub_ops(self, f, max_dim=1920):
|
def _hub_ops(self, f):
|
||||||
# HUB ops for 1 image 'f': resize and save at reduced quality in /dataset-hub for web/app viewing
|
compress_one_image(f, self.im_dir / Path(f).name) # save to dataset-hub
|
||||||
f_new = self.im_dir / Path(f).name # dataset-hub image filename
|
|
||||||
try: # use PIL
|
|
||||||
im = Image.open(f)
|
|
||||||
r = max_dim / max(im.height, im.width) # ratio
|
|
||||||
if r < 1.0: # image too large
|
|
||||||
im = im.resize((int(im.width * r), int(im.height * r)))
|
|
||||||
im.save(f_new, 'JPEG', quality=50, optimize=True) # save
|
|
||||||
except Exception as e: # use OpenCV
|
|
||||||
LOGGER.info(f'WARNING ⚠️ HUB ops PIL failure {f}: {e}')
|
|
||||||
im = cv2.imread(f)
|
|
||||||
im_height, im_width = im.shape[:2]
|
|
||||||
r = max_dim / max(im_height, im_width) # ratio
|
|
||||||
if r < 1.0: # image too large
|
|
||||||
im = cv2.resize(im, (int(im_width * r), int(im_height * r)), interpolation=cv2.INTER_AREA)
|
|
||||||
cv2.imwrite(str(f_new), im)
|
|
||||||
|
|
||||||
def get_json(self, save=False, verbose=False):
|
def get_json(self, save=False, verbose=False):
|
||||||
# Return dataset JSON for Ultralytics HUB
|
# Return dataset JSON for Ultralytics HUB
|
||||||
@ -426,3 +411,93 @@ class HUBDatasetStats():
|
|||||||
pass
|
pass
|
||||||
LOGGER.info(f'Done. All images saved to {self.im_dir}')
|
LOGGER.info(f'Done. All images saved to {self.im_dir}')
|
||||||
return self.im_dir
|
return self.im_dir
|
||||||
|
|
||||||
|
|
||||||
|
def compress_one_image(f, f_new=None, max_dim=1920, quality=50):
|
||||||
|
"""
|
||||||
|
Compresses a single image file to reduced size while preserving its aspect ratio and quality using either the
|
||||||
|
Python Imaging Library (PIL) or OpenCV library. If the input image is smaller than the maximum dimension, it will
|
||||||
|
not be resized.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
f (str): The path to the input image file.
|
||||||
|
f_new (str, optional): The path to the output image file. If not specified, the input file will be overwritten.
|
||||||
|
max_dim (int, optional): The maximum dimension (width or height) of the output image. Default is 1920 pixels.
|
||||||
|
quality (int, optional): The image compression quality as a percentage. Default is 50%.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
None
|
||||||
|
|
||||||
|
Usage:
|
||||||
|
from pathlib import Path
|
||||||
|
from ultralytics.yolo.data.utils import compress_one_image
|
||||||
|
for f in Path('/Users/glennjocher/Downloads/dataset').rglob('*.jpg'):
|
||||||
|
compress_one_image(f)
|
||||||
|
"""
|
||||||
|
try: # use PIL
|
||||||
|
im = Image.open(f)
|
||||||
|
r = max_dim / max(im.height, im.width) # ratio
|
||||||
|
if r < 1.0: # image too large
|
||||||
|
im = im.resize((int(im.width * r), int(im.height * r)))
|
||||||
|
im.save(f_new or f, 'JPEG', quality=quality, optimize=True) # save
|
||||||
|
except Exception as e: # use OpenCV
|
||||||
|
LOGGER.info(f'WARNING ⚠️ HUB ops PIL failure {f}: {e}')
|
||||||
|
im = cv2.imread(f)
|
||||||
|
im_height, im_width = im.shape[:2]
|
||||||
|
r = max_dim / max(im_height, im_width) # ratio
|
||||||
|
if r < 1.0: # image too large
|
||||||
|
im = cv2.resize(im, (int(im_width * r), int(im_height * r)), interpolation=cv2.INTER_AREA)
|
||||||
|
cv2.imwrite(str(f_new or f), im)
|
||||||
|
|
||||||
|
|
||||||
|
def delete_dsstore(path):
|
||||||
|
"""
|
||||||
|
Deletes all ".DS_store" files under a specified directory.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
path (str, optional): The directory path where the ".DS_store" files should be deleted.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
None
|
||||||
|
|
||||||
|
Usage:
|
||||||
|
from ultralytics.yolo.data.utils import delete_dsstore
|
||||||
|
delete_dsstore('/Users/glennjocher/Downloads/dataset')
|
||||||
|
|
||||||
|
Note:
|
||||||
|
".DS_store" files are created by the Apple operating system and contain metadata about folders and files. They
|
||||||
|
are hidden system files and can cause issues when transferring files between different operating systems.
|
||||||
|
"""
|
||||||
|
# Delete Apple .DS_store files
|
||||||
|
files = list(Path(path).rglob('.DS_store'))
|
||||||
|
LOGGER.info(f'Deleting *.DS_store files: {files}')
|
||||||
|
for f in files:
|
||||||
|
f.unlink()
|
||||||
|
|
||||||
|
|
||||||
|
def zip_directory(dir, use_zipfile_library=True):
|
||||||
|
"""Zips a directory and saves the archive to the specified output path.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
dir (str): The path to the directory to be zipped.
|
||||||
|
use_zipfile_library (bool): Whether to use zipfile library or shutil for zipping.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
None
|
||||||
|
|
||||||
|
Usage:
|
||||||
|
from ultralytics.yolo.data.utils import zip_directory
|
||||||
|
zip_directory('/Users/glennjocher/Downloads/playground')
|
||||||
|
|
||||||
|
zip -r coco8-pose.zip coco8-pose
|
||||||
|
"""
|
||||||
|
delete_dsstore(dir)
|
||||||
|
if use_zipfile_library:
|
||||||
|
dir = Path(dir)
|
||||||
|
with zipfile.ZipFile(dir.with_suffix('.zip'), 'w', zipfile.ZIP_DEFLATED) as zip_file:
|
||||||
|
for file_path in dir.glob('**/*'):
|
||||||
|
if file_path.is_file():
|
||||||
|
zip_file.write(file_path, file_path.relative_to(dir))
|
||||||
|
else:
|
||||||
|
import shutil
|
||||||
|
shutil.make_archive(dir, 'zip', dir)
|
||||||
|
@ -209,8 +209,8 @@ class Exporter:
|
|||||||
self.file = file
|
self.file = file
|
||||||
self.output_shape = tuple(y.shape) if isinstance(y, torch.Tensor) else tuple(tuple(x.shape) for x in y)
|
self.output_shape = tuple(y.shape) if isinstance(y, torch.Tensor) else tuple(tuple(x.shape) for x in y)
|
||||||
self.pretty_name = Path(self.model.yaml.get('yaml_file', self.file)).stem.replace('yolo', 'YOLO')
|
self.pretty_name = Path(self.model.yaml.get('yaml_file', self.file)).stem.replace('yolo', 'YOLO')
|
||||||
description = f'Ultralytics {self.pretty_name} model ' + f'trained on {Path(self.args.data).name}' \
|
trained_on = f'trained on {Path(self.args.data).name}' if self.args.data else '(untrained)'
|
||||||
if self.args.data else '(untrained)'
|
description = f'Ultralytics {self.pretty_name} model {trained_on}'
|
||||||
self.metadata = {
|
self.metadata = {
|
||||||
'description': description,
|
'description': description,
|
||||||
'author': 'Ultralytics',
|
'author': 'Ultralytics',
|
||||||
@ -221,6 +221,8 @@ class Exporter:
|
|||||||
'batch': self.args.batch,
|
'batch': self.args.batch,
|
||||||
'imgsz': self.imgsz,
|
'imgsz': self.imgsz,
|
||||||
'names': model.names} # model metadata
|
'names': model.names} # model metadata
|
||||||
|
if model.task == 'pose':
|
||||||
|
self.metadata['kpt_shape'] = model.kpt_shape
|
||||||
|
|
||||||
LOGGER.info(f"\n{colorstr('PyTorch:')} starting from {file} with input shape {tuple(im.shape)} BCHW and "
|
LOGGER.info(f"\n{colorstr('PyTorch:')} starting from {file} with input shape {tuple(im.shape)} BCHW and "
|
||||||
f'output shape(s) {self.output_shape} ({file_size(file):.1f} MB)')
|
f'output shape(s) {self.output_shape} ({file_size(file):.1f} MB)')
|
||||||
@ -295,7 +297,8 @@ class Exporter:
|
|||||||
check_requirements(requirements)
|
check_requirements(requirements)
|
||||||
import onnx # noqa
|
import onnx # noqa
|
||||||
|
|
||||||
LOGGER.info(f'\n{prefix} starting export with onnx {onnx.__version__}...')
|
opset_version = self.args.opset or get_latest_opset()
|
||||||
|
LOGGER.info(f'\n{prefix} starting export with onnx {onnx.__version__} opset {opset_version}...')
|
||||||
f = str(self.file.with_suffix('.onnx'))
|
f = str(self.file.with_suffix('.onnx'))
|
||||||
|
|
||||||
output_names = ['output0', 'output1'] if isinstance(self.model, SegmentationModel) else ['output0']
|
output_names = ['output0', 'output1'] if isinstance(self.model, SegmentationModel) else ['output0']
|
||||||
@ -313,7 +316,7 @@ class Exporter:
|
|||||||
self.im.cpu() if dynamic else self.im,
|
self.im.cpu() if dynamic else self.im,
|
||||||
f,
|
f,
|
||||||
verbose=False,
|
verbose=False,
|
||||||
opset_version=self.args.opset or get_latest_opset(),
|
opset_version=opset_version,
|
||||||
do_constant_folding=True, # WARNING: DNN inference with torch>=1.12 may require do_constant_folding=False
|
do_constant_folding=True, # WARNING: DNN inference with torch>=1.12 may require do_constant_folding=False
|
||||||
input_names=['images'],
|
input_names=['images'],
|
||||||
output_names=output_names,
|
output_names=output_names,
|
||||||
@ -377,7 +380,6 @@ class Exporter:
|
|||||||
yaml_save(Path(f) / 'metadata.yaml', self.metadata) # add metadata.yaml
|
yaml_save(Path(f) / 'metadata.yaml', self.metadata) # add metadata.yaml
|
||||||
return f, None
|
return f, None
|
||||||
|
|
||||||
@try_export
|
|
||||||
def _export_coreml(self, prefix=colorstr('CoreML:')):
|
def _export_coreml(self, prefix=colorstr('CoreML:')):
|
||||||
# YOLOv8 CoreML export
|
# YOLOv8 CoreML export
|
||||||
check_requirements('coremltools>=6.0')
|
check_requirements('coremltools>=6.0')
|
||||||
@ -410,8 +412,8 @@ class Exporter:
|
|||||||
model = self.model
|
model = self.model
|
||||||
elif self.model.task == 'detect':
|
elif self.model.task == 'detect':
|
||||||
model = iOSDetectModel(self.model, self.im) if self.args.nms else self.model
|
model = iOSDetectModel(self.model, self.im) if self.args.nms else self.model
|
||||||
elif self.model.task == 'segment':
|
else:
|
||||||
# TODO CoreML Segmentation model pipelining
|
# TODO CoreML Segment and Pose model pipelining
|
||||||
model = self.model
|
model = self.model
|
||||||
|
|
||||||
ts = torch.jit.trace(model.eval(), self.im, strict=False) # TorchScript model
|
ts = torch.jit.trace(model.eval(), self.im, strict=False) # TorchScript model
|
||||||
|
@ -5,8 +5,8 @@ from pathlib import Path
|
|||||||
from typing import Union
|
from typing import Union
|
||||||
|
|
||||||
from ultralytics import yolo # noqa
|
from ultralytics import yolo # noqa
|
||||||
from ultralytics.nn.tasks import (ClassificationModel, DetectionModel, SegmentationModel, attempt_load_one_weight,
|
from ultralytics.nn.tasks import (ClassificationModel, DetectionModel, PoseModel, SegmentationModel,
|
||||||
guess_model_task, nn, yaml_model_load)
|
attempt_load_one_weight, guess_model_task, nn, yaml_model_load)
|
||||||
from ultralytics.yolo.cfg import get_cfg
|
from ultralytics.yolo.cfg import get_cfg
|
||||||
from ultralytics.yolo.engine.exporter import Exporter
|
from ultralytics.yolo.engine.exporter import Exporter
|
||||||
from ultralytics.yolo.utils import (DEFAULT_CFG, DEFAULT_CFG_DICT, DEFAULT_CFG_KEYS, LOGGER, RANK, ROOT, callbacks,
|
from ultralytics.yolo.utils import (DEFAULT_CFG, DEFAULT_CFG_DICT, DEFAULT_CFG_KEYS, LOGGER, RANK, ROOT, callbacks,
|
||||||
@ -25,7 +25,8 @@ TASK_MAP = {
|
|||||||
yolo.v8.detect.DetectionPredictor],
|
yolo.v8.detect.DetectionPredictor],
|
||||||
'segment': [
|
'segment': [
|
||||||
SegmentationModel, yolo.v8.segment.SegmentationTrainer, yolo.v8.segment.SegmentationValidator,
|
SegmentationModel, yolo.v8.segment.SegmentationTrainer, yolo.v8.segment.SegmentationValidator,
|
||||||
yolo.v8.segment.SegmentationPredictor]}
|
yolo.v8.segment.SegmentationPredictor],
|
||||||
|
'pose': [PoseModel, yolo.v8.pose.PoseTrainer, yolo.v8.pose.PoseValidator, yolo.v8.pose.PosePredictor]}
|
||||||
|
|
||||||
|
|
||||||
class YOLO:
|
class YOLO:
|
||||||
@ -195,7 +196,7 @@ class YOLO:
|
|||||||
self.model.load(weights)
|
self.model.load(weights)
|
||||||
return self
|
return self
|
||||||
|
|
||||||
def info(self, verbose=False):
|
def info(self, verbose=True):
|
||||||
"""
|
"""
|
||||||
Logs model info.
|
Logs model info.
|
||||||
|
|
||||||
|
@ -246,6 +246,7 @@ class BasePredictor:
|
|||||||
dnn=self.args.dnn,
|
dnn=self.args.dnn,
|
||||||
data=self.args.data,
|
data=self.args.data,
|
||||||
fp16=self.args.half,
|
fp16=self.args.half,
|
||||||
|
fuse=True,
|
||||||
verbose=verbose)
|
verbose=verbose)
|
||||||
self.device = device
|
self.device = device
|
||||||
self.model.eval()
|
self.model.eval()
|
||||||
|
@ -17,6 +17,53 @@ from ultralytics.yolo.utils.plotting import Annotator, colors
|
|||||||
from ultralytics.yolo.utils.torch_utils import TORCHVISION_0_10
|
from ultralytics.yolo.utils.torch_utils import TORCHVISION_0_10
|
||||||
|
|
||||||
|
|
||||||
|
class BaseTensor(SimpleClass):
|
||||||
|
"""
|
||||||
|
|
||||||
|
Attributes:
|
||||||
|
tensor (torch.Tensor): A tensor.
|
||||||
|
orig_shape (tuple): Original image size, in the format (height, width).
|
||||||
|
|
||||||
|
Methods:
|
||||||
|
cpu(): Returns a copy of the tensor on CPU memory.
|
||||||
|
numpy(): Returns a copy of the tensor as a numpy array.
|
||||||
|
cuda(): Returns a copy of the tensor on GPU memory.
|
||||||
|
to(): Returns a copy of the tensor with the specified device and dtype.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, tensor, orig_shape) -> None:
|
||||||
|
super().__init__()
|
||||||
|
assert isinstance(tensor, torch.Tensor)
|
||||||
|
self.tensor = tensor
|
||||||
|
self.orig_shape = orig_shape
|
||||||
|
|
||||||
|
@property
|
||||||
|
def shape(self):
|
||||||
|
return self.data.shape
|
||||||
|
|
||||||
|
@property
|
||||||
|
def data(self):
|
||||||
|
return self.tensor
|
||||||
|
|
||||||
|
def cpu(self):
|
||||||
|
return self.__class__(self.data.cpu(), self.orig_shape)
|
||||||
|
|
||||||
|
def numpy(self):
|
||||||
|
return self.__class__(self.data.numpy(), self.orig_shape)
|
||||||
|
|
||||||
|
def cuda(self):
|
||||||
|
return self.__class__(self.data.cuda(), self.orig_shape)
|
||||||
|
|
||||||
|
def to(self, *args, **kwargs):
|
||||||
|
return self.__class__(self.data.to(*args, **kwargs), self.orig_shape)
|
||||||
|
|
||||||
|
def __len__(self): # override len(results)
|
||||||
|
return len(self.data)
|
||||||
|
|
||||||
|
def __getitem__(self, idx):
|
||||||
|
return self.__class__(self.data[idx], self.orig_shape)
|
||||||
|
|
||||||
|
|
||||||
class Results(SimpleClass):
|
class Results(SimpleClass):
|
||||||
"""
|
"""
|
||||||
A class for storing and manipulating inference results.
|
A class for storing and manipulating inference results.
|
||||||
@ -40,22 +87,23 @@ class Results(SimpleClass):
|
|||||||
_keys (tuple): A tuple of attribute names for non-empty attributes.
|
_keys (tuple): A tuple of attribute names for non-empty attributes.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self, orig_img, path, names, boxes=None, masks=None, probs=None) -> None:
|
def __init__(self, orig_img, path, names, boxes=None, masks=None, probs=None, keypoints=None) -> None:
|
||||||
self.orig_img = orig_img
|
self.orig_img = orig_img
|
||||||
self.orig_shape = orig_img.shape[:2]
|
self.orig_shape = orig_img.shape[:2]
|
||||||
self.boxes = Boxes(boxes, self.orig_shape) if boxes is not None else None # native size boxes
|
self.boxes = Boxes(boxes, self.orig_shape) if boxes is not None else None # native size boxes
|
||||||
self.masks = Masks(masks, self.orig_shape) if masks is not None else None # native size or imgsz masks
|
self.masks = Masks(masks, self.orig_shape) if masks is not None else None # native size or imgsz masks
|
||||||
self.probs = probs if probs is not None else None
|
self.probs = probs if probs is not None else None
|
||||||
|
self.keypoints = keypoints if keypoints is not None else None
|
||||||
self.names = names
|
self.names = names
|
||||||
self.path = path
|
self.path = path
|
||||||
self._keys = ('boxes', 'masks', 'probs')
|
self._keys = ('boxes', 'masks', 'probs', 'keypoints')
|
||||||
|
|
||||||
def pandas(self):
|
def pandas(self):
|
||||||
pass
|
pass
|
||||||
# TODO masks.pandas + boxes.pandas + cls.pandas
|
# TODO masks.pandas + boxes.pandas + cls.pandas
|
||||||
|
|
||||||
def __getitem__(self, idx):
|
def __getitem__(self, idx):
|
||||||
r = Results(orig_img=self.orig_img, path=self.path, names=self.names)
|
r = self.new()
|
||||||
for k in self.keys:
|
for k in self.keys:
|
||||||
setattr(r, k, getattr(self, k)[idx])
|
setattr(r, k, getattr(self, k)[idx])
|
||||||
return r
|
return r
|
||||||
@ -69,25 +117,25 @@ class Results(SimpleClass):
|
|||||||
self.probs = probs
|
self.probs = probs
|
||||||
|
|
||||||
def cpu(self):
|
def cpu(self):
|
||||||
r = Results(orig_img=self.orig_img, path=self.path, names=self.names)
|
r = self.new()
|
||||||
for k in self.keys:
|
for k in self.keys:
|
||||||
setattr(r, k, getattr(self, k).cpu())
|
setattr(r, k, getattr(self, k).cpu())
|
||||||
return r
|
return r
|
||||||
|
|
||||||
def numpy(self):
|
def numpy(self):
|
||||||
r = Results(orig_img=self.orig_img, path=self.path, names=self.names)
|
r = self.new()
|
||||||
for k in self.keys:
|
for k in self.keys:
|
||||||
setattr(r, k, getattr(self, k).numpy())
|
setattr(r, k, getattr(self, k).numpy())
|
||||||
return r
|
return r
|
||||||
|
|
||||||
def cuda(self):
|
def cuda(self):
|
||||||
r = Results(orig_img=self.orig_img, path=self.path, names=self.names)
|
r = self.new()
|
||||||
for k in self.keys:
|
for k in self.keys:
|
||||||
setattr(r, k, getattr(self, k).cuda())
|
setattr(r, k, getattr(self, k).cuda())
|
||||||
return r
|
return r
|
||||||
|
|
||||||
def to(self, *args, **kwargs):
|
def to(self, *args, **kwargs):
|
||||||
r = Results(orig_img=self.orig_img, path=self.path, names=self.names)
|
r = self.new()
|
||||||
for k in self.keys:
|
for k in self.keys:
|
||||||
setattr(r, k, getattr(self, k).to(*args, **kwargs))
|
setattr(r, k, getattr(self, k).to(*args, **kwargs))
|
||||||
return r
|
return r
|
||||||
@ -96,6 +144,9 @@ class Results(SimpleClass):
|
|||||||
for k in self.keys:
|
for k in self.keys:
|
||||||
return len(getattr(self, k))
|
return len(getattr(self, k))
|
||||||
|
|
||||||
|
def new(self):
|
||||||
|
return Results(orig_img=self.orig_img, path=self.path, names=self.names)
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def keys(self):
|
def keys(self):
|
||||||
return [k for k in self._keys if getattr(self, k) is not None]
|
return [k for k in self._keys if getattr(self, k) is not None]
|
||||||
@ -109,6 +160,7 @@ class Results(SimpleClass):
|
|||||||
pil=False,
|
pil=False,
|
||||||
example='abc',
|
example='abc',
|
||||||
img=None,
|
img=None,
|
||||||
|
kpt_line=True,
|
||||||
labels=True,
|
labels=True,
|
||||||
boxes=True,
|
boxes=True,
|
||||||
masks=True,
|
masks=True,
|
||||||
@ -126,6 +178,7 @@ class Results(SimpleClass):
|
|||||||
pil (bool): Whether to return the image as a PIL Image.
|
pil (bool): Whether to return the image as a PIL Image.
|
||||||
example (str): An example string to display. Useful for indicating the expected format of the output.
|
example (str): An example string to display. Useful for indicating the expected format of the output.
|
||||||
img (numpy.ndarray): Plot to another image. if not, plot to original image.
|
img (numpy.ndarray): Plot to another image. if not, plot to original image.
|
||||||
|
kpt_line (bool): Whether to draw lines connecting keypoints.
|
||||||
labels (bool): Whether to plot the label of bounding boxes.
|
labels (bool): Whether to plot the label of bounding boxes.
|
||||||
boxes (bool): Whether to plot the bounding boxes.
|
boxes (bool): Whether to plot the bounding boxes.
|
||||||
masks (bool): Whether to plot the masks.
|
masks (bool): Whether to plot the masks.
|
||||||
@ -146,11 +199,12 @@ class Results(SimpleClass):
|
|||||||
pred_masks, show_masks = self.masks, masks
|
pred_masks, show_masks = self.masks, masks
|
||||||
pred_probs, show_probs = self.probs, probs
|
pred_probs, show_probs = self.probs, probs
|
||||||
names = self.names
|
names = self.names
|
||||||
|
keypoints = self.keypoints
|
||||||
if pred_boxes and show_boxes:
|
if pred_boxes and show_boxes:
|
||||||
for d in reversed(pred_boxes):
|
for d in reversed(pred_boxes):
|
||||||
c, conf, id = int(d.cls), float(d.conf) if conf else None, None if d.id is None else int(d.id.item())
|
c, conf, id = int(d.cls), float(d.conf) if conf else None, None if d.id is None else int(d.id.item())
|
||||||
name = ('' if id is None else f'id:{id} ') + names[c]
|
name = ('' if id is None else f'id:{id} ') + names[c]
|
||||||
label = (name if not conf else f'{name} {conf:.2f}') if labels else None
|
label = (f'{name} {conf:.2f}' if conf else name) if labels else None
|
||||||
annotator.box_label(d.xyxy.squeeze(), label, color=colors(c, True))
|
annotator.box_label(d.xyxy.squeeze(), label, color=colors(c, True))
|
||||||
|
|
||||||
if pred_masks and show_masks:
|
if pred_masks and show_masks:
|
||||||
@ -168,10 +222,14 @@ class Results(SimpleClass):
|
|||||||
text = f"{', '.join(f'{names[j] if names else j} {pred_probs[j]:.2f}' for j in top5i)}, "
|
text = f"{', '.join(f'{names[j] if names else j} {pred_probs[j]:.2f}' for j in top5i)}, "
|
||||||
annotator.text((32, 32), text, txt_color=(255, 255, 255)) # TODO: allow setting colors
|
annotator.text((32, 32), text, txt_color=(255, 255, 255)) # TODO: allow setting colors
|
||||||
|
|
||||||
|
if keypoints is not None:
|
||||||
|
for k in reversed(keypoints):
|
||||||
|
annotator.kpts(k, self.orig_shape, kpt_line=kpt_line)
|
||||||
|
|
||||||
return np.asarray(annotator.im) if annotator.pil else annotator.im
|
return np.asarray(annotator.im) if annotator.pil else annotator.im
|
||||||
|
|
||||||
|
|
||||||
class Boxes(SimpleClass):
|
class Boxes(BaseTensor):
|
||||||
"""
|
"""
|
||||||
A class for storing and manipulating detection boxes.
|
A class for storing and manipulating detection boxes.
|
||||||
|
|
||||||
@ -246,37 +304,15 @@ class Boxes(SimpleClass):
|
|||||||
def xywhn(self):
|
def xywhn(self):
|
||||||
return self.xywh / self.orig_shape[[1, 0, 1, 0]]
|
return self.xywh / self.orig_shape[[1, 0, 1, 0]]
|
||||||
|
|
||||||
def cpu(self):
|
|
||||||
return Boxes(self.boxes.cpu(), self.orig_shape)
|
|
||||||
|
|
||||||
def numpy(self):
|
|
||||||
return Boxes(self.boxes.numpy(), self.orig_shape)
|
|
||||||
|
|
||||||
def cuda(self):
|
|
||||||
return Boxes(self.boxes.cuda(), self.orig_shape)
|
|
||||||
|
|
||||||
def to(self, *args, **kwargs):
|
|
||||||
return Boxes(self.boxes.to(*args, **kwargs), self.orig_shape)
|
|
||||||
|
|
||||||
def pandas(self):
|
def pandas(self):
|
||||||
LOGGER.info('results.pandas() method not yet implemented')
|
LOGGER.info('results.pandas() method not yet implemented')
|
||||||
|
|
||||||
@property
|
|
||||||
def shape(self):
|
|
||||||
return self.boxes.shape
|
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def data(self):
|
def data(self):
|
||||||
return self.boxes
|
return self.boxes
|
||||||
|
|
||||||
def __len__(self): # override len(results)
|
|
||||||
return len(self.boxes)
|
|
||||||
|
|
||||||
def __getitem__(self, idx):
|
class Masks(BaseTensor):
|
||||||
return Boxes(self.boxes[idx], self.orig_shape)
|
|
||||||
|
|
||||||
|
|
||||||
class Masks(SimpleClass):
|
|
||||||
"""
|
"""
|
||||||
A class for storing and manipulating detection masks.
|
A class for storing and manipulating detection masks.
|
||||||
|
|
||||||
@ -316,7 +352,7 @@ class Masks(SimpleClass):
|
|||||||
def xyn(self):
|
def xyn(self):
|
||||||
# Segments (normalized)
|
# Segments (normalized)
|
||||||
return [
|
return [
|
||||||
ops.scale_segments(self.masks.shape[1:], x, self.orig_shape, normalize=True)
|
ops.scale_coords(self.masks.shape[1:], x, self.orig_shape, normalize=True)
|
||||||
for x in ops.masks2segments(self.masks)]
|
for x in ops.masks2segments(self.masks)]
|
||||||
|
|
||||||
@property
|
@property
|
||||||
@ -324,31 +360,9 @@ class Masks(SimpleClass):
|
|||||||
def xy(self):
|
def xy(self):
|
||||||
# Segments (pixels)
|
# Segments (pixels)
|
||||||
return [
|
return [
|
||||||
ops.scale_segments(self.masks.shape[1:], x, self.orig_shape, normalize=False)
|
ops.scale_coords(self.masks.shape[1:], x, self.orig_shape, normalize=False)
|
||||||
for x in ops.masks2segments(self.masks)]
|
for x in ops.masks2segments(self.masks)]
|
||||||
|
|
||||||
@property
|
|
||||||
def shape(self):
|
|
||||||
return self.masks.shape
|
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def data(self):
|
def data(self):
|
||||||
return self.masks
|
return self.masks
|
||||||
|
|
||||||
def cpu(self):
|
|
||||||
return Masks(self.masks.cpu(), self.orig_shape)
|
|
||||||
|
|
||||||
def numpy(self):
|
|
||||||
return Masks(self.masks.numpy(), self.orig_shape)
|
|
||||||
|
|
||||||
def cuda(self):
|
|
||||||
return Masks(self.masks.cuda(), self.orig_shape)
|
|
||||||
|
|
||||||
def to(self, *args, **kwargs):
|
|
||||||
return Masks(self.masks.to(*args, **kwargs), self.orig_shape)
|
|
||||||
|
|
||||||
def __len__(self): # override len(results)
|
|
||||||
return len(self.masks)
|
|
||||||
|
|
||||||
def __getitem__(self, idx):
|
|
||||||
return Masks(self.masks[idx], self.orig_shape)
|
|
||||||
|
@ -75,11 +75,13 @@ def benchmark(model=Path(SETTINGS['weights_dir']) / 'yolov8n.pt', imgsz=160, hal
|
|||||||
|
|
||||||
# Validate
|
# Validate
|
||||||
if model.task == 'detect':
|
if model.task == 'detect':
|
||||||
data, key = 'coco128.yaml', 'metrics/mAP50-95(B)'
|
data, key = 'coco8.yaml', 'metrics/mAP50-95(B)'
|
||||||
elif model.task == 'segment':
|
elif model.task == 'segment':
|
||||||
data, key = 'coco128-seg.yaml', 'metrics/mAP50-95(M)'
|
data, key = 'coco8-seg.yaml', 'metrics/mAP50-95(M)'
|
||||||
elif model.task == 'classify':
|
elif model.task == 'classify':
|
||||||
data, key = 'imagenet100', 'metrics/accuracy_top5'
|
data, key = 'imagenet100', 'metrics/accuracy_top5'
|
||||||
|
elif model.task == 'pose':
|
||||||
|
data, key = 'coco8-pose.yaml', 'metrics/mAP50-95(P)'
|
||||||
|
|
||||||
results = export.val(data=data, batch=1, imgsz=imgsz, plots=False, device=device, half=half, verbose=False)
|
results = export.val(data=data, batch=1, imgsz=imgsz, plots=False, device=device, half=half, verbose=False)
|
||||||
metric, speed = results.results_dict[key], results.speed['inference']
|
metric, speed = results.results_dict[key], results.speed['inference']
|
||||||
|
@ -14,9 +14,9 @@ from tqdm import tqdm
|
|||||||
|
|
||||||
from ultralytics.yolo.utils import LOGGER, checks, emojis, is_online
|
from ultralytics.yolo.utils import LOGGER, checks, emojis, is_online
|
||||||
|
|
||||||
GITHUB_ASSET_NAMES = [f'yolov8{size}{suffix}.pt' for size in 'nsmlx' for suffix in ('', '6', '-cls', '-seg')] + \
|
GITHUB_ASSET_NAMES = [f'yolov8{k}{suffix}.pt' for k in 'nsmlx' for suffix in ('', '6', '-cls', '-seg', '-pose')] + \
|
||||||
[f'yolov5{size}u.pt' for size in 'nsmlx'] + \
|
[f'yolov5{k}u.pt' for k in 'nsmlx'] + \
|
||||||
[f'yolov3{size}u.pt' for size in ('', '-spp', '-tiny')]
|
[f'yolov3{k}u.pt' for k in ('', '-spp', '-tiny')]
|
||||||
GITHUB_ASSET_STEMS = [Path(k).stem for k in GITHUB_ASSET_NAMES]
|
GITHUB_ASSET_STEMS = [Path(k).stem for k in GITHUB_ASSET_NAMES]
|
||||||
|
|
||||||
|
|
||||||
|
@ -168,7 +168,7 @@ class Instances:
|
|||||||
Args:
|
Args:
|
||||||
bboxes (ndarray): bboxes with shape [N, 4].
|
bboxes (ndarray): bboxes with shape [N, 4].
|
||||||
segments (list | ndarray): segments.
|
segments (list | ndarray): segments.
|
||||||
keypoints (ndarray): keypoints with shape [N, 17, 2].
|
keypoints (ndarray): keypoints(x, y, visible) with shape [N, 17, 3].
|
||||||
"""
|
"""
|
||||||
if segments is None:
|
if segments is None:
|
||||||
segments = []
|
segments = []
|
||||||
|
@ -54,3 +54,17 @@ class BboxLoss(nn.Module):
|
|||||||
wr = 1 - wl # weight right
|
wr = 1 - wl # weight right
|
||||||
return (F.cross_entropy(pred_dist, tl.view(-1), reduction='none').view(tl.shape) * wl +
|
return (F.cross_entropy(pred_dist, tl.view(-1), reduction='none').view(tl.shape) * wl +
|
||||||
F.cross_entropy(pred_dist, tr.view(-1), reduction='none').view(tl.shape) * wr).mean(-1, keepdim=True)
|
F.cross_entropy(pred_dist, tr.view(-1), reduction='none').view(tl.shape) * wr).mean(-1, keepdim=True)
|
||||||
|
|
||||||
|
|
||||||
|
class KeypointLoss(nn.Module):
|
||||||
|
|
||||||
|
def __init__(self, sigmas) -> None:
|
||||||
|
super().__init__()
|
||||||
|
self.sigmas = sigmas
|
||||||
|
|
||||||
|
def forward(self, pred_kpts, gt_kpts, kpt_mask, area):
|
||||||
|
d = (pred_kpts[..., 0] - gt_kpts[..., 0]) ** 2 + (pred_kpts[..., 1] - gt_kpts[..., 1]) ** 2
|
||||||
|
kpt_loss_factor = (torch.sum(kpt_mask != 0) + torch.sum(kpt_mask == 0)) / (torch.sum(kpt_mask != 0) + 1e-9)
|
||||||
|
# e = d / (2 * (area * self.sigmas) ** 2 + 1e-9) # from formula
|
||||||
|
e = d / (2 * self.sigmas) ** 2 / (area + 1e-9) / 2 # from cocoeval
|
||||||
|
return kpt_loss_factor * ((1 - torch.exp(-e)) * kpt_mask).mean()
|
||||||
|
@ -13,6 +13,8 @@ import torch.nn as nn
|
|||||||
|
|
||||||
from ultralytics.yolo.utils import LOGGER, SimpleClass, TryExcept
|
from ultralytics.yolo.utils import LOGGER, SimpleClass, TryExcept
|
||||||
|
|
||||||
|
OKS_SIGMA = np.array([.26, .25, .25, .35, .35, .79, .79, .72, .72, .62, .62, 1.07, 1.07, .87, .87, .89, .89]) / 10.0
|
||||||
|
|
||||||
|
|
||||||
# boxes
|
# boxes
|
||||||
def box_area(box):
|
def box_area(box):
|
||||||
@ -108,8 +110,8 @@ def bbox_iou(box1, box2, xywh=True, GIoU=False, DIoU=False, CIoU=False, eps=1e-7
|
|||||||
|
|
||||||
def mask_iou(mask1, mask2, eps=1e-7):
|
def mask_iou(mask1, mask2, eps=1e-7):
|
||||||
"""
|
"""
|
||||||
mask1: [N, n] m1 means number of predicted objects
|
mask1: [N, n] m1 means number of gt objects
|
||||||
mask2: [M, n] m2 means number of gt objects
|
mask2: [M, n] m2 means number of predicted objects
|
||||||
Note: n means image_w x image_h
|
Note: n means image_w x image_h
|
||||||
Returns: masks iou, [N, M]
|
Returns: masks iou, [N, M]
|
||||||
"""
|
"""
|
||||||
@ -118,16 +120,18 @@ def mask_iou(mask1, mask2, eps=1e-7):
|
|||||||
return intersection / (union + eps)
|
return intersection / (union + eps)
|
||||||
|
|
||||||
|
|
||||||
def masks_iou(mask1, mask2, eps=1e-7):
|
def kpt_iou(kpt1, kpt2, area, sigma, eps=1e-7):
|
||||||
|
"""OKS
|
||||||
|
kpt1: [N, 17, 3], gt
|
||||||
|
kpt2: [M, 17, 3], pred
|
||||||
|
area: [N], areas from gt
|
||||||
"""
|
"""
|
||||||
mask1: [N, n] m1 means number of predicted objects
|
d = (kpt1[:, None, :, 0] - kpt2[..., 0]) ** 2 + (kpt1[:, None, :, 1] - kpt2[..., 1]) ** 2 # (N, M, 17)
|
||||||
mask2: [N, n] m2 means number of gt objects
|
sigma = torch.tensor(sigma, device=kpt1.device, dtype=kpt1.dtype) # (17, )
|
||||||
Note: n means image_w x image_h
|
kpt_mask = kpt1[..., 2] != 0 # (N, 17)
|
||||||
Returns: masks iou, (N, )
|
e = d / (2 * sigma) ** 2 / (area[:, None, None] + eps) / 2 # from cocoeval
|
||||||
"""
|
# e = d / ((area[None, :, None] + eps) * sigma) ** 2 / 2 # from formula
|
||||||
intersection = (mask1 * mask2).sum(1).clamp(0) # (N, )
|
return (torch.exp(-e) * kpt_mask[:, None]).sum(-1) / (kpt_mask.sum(-1)[:, None] + eps)
|
||||||
union = (mask1.sum(1) + mask2.sum(1))[None] - intersection # (area1 + area2) - intersection
|
|
||||||
return intersection / (union + eps)
|
|
||||||
|
|
||||||
|
|
||||||
def smooth_BCE(eps=0.1): # https://github.com/ultralytics/yolov3/issues/238#issuecomment-598028441
|
def smooth_BCE(eps=0.1): # https://github.com/ultralytics/yolov3/issues/238#issuecomment-598028441
|
||||||
@ -649,13 +653,13 @@ class SegmentMetrics(SimpleClass):
|
|||||||
self.seg = Metric()
|
self.seg = Metric()
|
||||||
self.speed = {'preprocess': 0.0, 'inference': 0.0, 'loss': 0.0, 'postprocess': 0.0}
|
self.speed = {'preprocess': 0.0, 'inference': 0.0, 'loss': 0.0, 'postprocess': 0.0}
|
||||||
|
|
||||||
def process(self, tp_m, tp_b, conf, pred_cls, target_cls):
|
def process(self, tp_b, tp_m, conf, pred_cls, target_cls):
|
||||||
"""
|
"""
|
||||||
Processes the detection and segmentation metrics over the given set of predictions.
|
Processes the detection and segmentation metrics over the given set of predictions.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
tp_m (list): List of True Positive masks.
|
|
||||||
tp_b (list): List of True Positive boxes.
|
tp_b (list): List of True Positive boxes.
|
||||||
|
tp_m (list): List of True Positive masks.
|
||||||
conf (list): List of confidence scores.
|
conf (list): List of confidence scores.
|
||||||
pred_cls (list): List of predicted classes.
|
pred_cls (list): List of predicted classes.
|
||||||
target_cls (list): List of target classes.
|
target_cls (list): List of target classes.
|
||||||
@ -712,6 +716,100 @@ class SegmentMetrics(SimpleClass):
|
|||||||
return dict(zip(self.keys + ['fitness'], self.mean_results() + [self.fitness]))
|
return dict(zip(self.keys + ['fitness'], self.mean_results() + [self.fitness]))
|
||||||
|
|
||||||
|
|
||||||
|
class PoseMetrics(SegmentMetrics):
|
||||||
|
"""
|
||||||
|
Calculates and aggregates detection and pose metrics over a given set of classes.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
save_dir (Path): Path to the directory where the output plots should be saved. Default is the current directory.
|
||||||
|
plot (bool): Whether to save the detection and segmentation plots. Default is False.
|
||||||
|
names (list): List of class names. Default is an empty list.
|
||||||
|
|
||||||
|
Attributes:
|
||||||
|
save_dir (Path): Path to the directory where the output plots should be saved.
|
||||||
|
plot (bool): Whether to save the detection and segmentation plots.
|
||||||
|
names (list): List of class names.
|
||||||
|
box (Metric): An instance of the Metric class to calculate box detection metrics.
|
||||||
|
pose (Metric): An instance of the Metric class to calculate mask segmentation metrics.
|
||||||
|
speed (dict): Dictionary to store the time taken in different phases of inference.
|
||||||
|
|
||||||
|
Methods:
|
||||||
|
process(tp_m, tp_b, conf, pred_cls, target_cls): Processes metrics over the given set of predictions.
|
||||||
|
mean_results(): Returns the mean of the detection and segmentation metrics over all the classes.
|
||||||
|
class_result(i): Returns the detection and segmentation metrics of class `i`.
|
||||||
|
maps: Returns the mean Average Precision (mAP) scores for IoU thresholds ranging from 0.50 to 0.95.
|
||||||
|
fitness: Returns the fitness scores, which are a single weighted combination of metrics.
|
||||||
|
ap_class_index: Returns the list of indices of classes used to compute Average Precision (AP).
|
||||||
|
results_dict: Returns the dictionary containing all the detection and segmentation metrics and fitness score.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, save_dir=Path('.'), plot=False, names=()) -> None:
|
||||||
|
super().__init__(save_dir, plot, names)
|
||||||
|
self.save_dir = save_dir
|
||||||
|
self.plot = plot
|
||||||
|
self.names = names
|
||||||
|
self.box = Metric()
|
||||||
|
self.pose = Metric()
|
||||||
|
self.speed = {'preprocess': 0.0, 'inference': 0.0, 'loss': 0.0, 'postprocess': 0.0}
|
||||||
|
|
||||||
|
def __getattr__(self, attr):
|
||||||
|
name = self.__class__.__name__
|
||||||
|
raise AttributeError(f"'{name}' object has no attribute '{attr}'. See valid attributes below.\n{self.__doc__}")
|
||||||
|
|
||||||
|
def process(self, tp_b, tp_p, conf, pred_cls, target_cls):
|
||||||
|
"""
|
||||||
|
Processes the detection and pose metrics over the given set of predictions.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
tp_b (list): List of True Positive boxes.
|
||||||
|
tp_p (list): List of True Positive keypoints.
|
||||||
|
conf (list): List of confidence scores.
|
||||||
|
pred_cls (list): List of predicted classes.
|
||||||
|
target_cls (list): List of target classes.
|
||||||
|
"""
|
||||||
|
|
||||||
|
results_pose = ap_per_class(tp_p,
|
||||||
|
conf,
|
||||||
|
pred_cls,
|
||||||
|
target_cls,
|
||||||
|
plot=self.plot,
|
||||||
|
save_dir=self.save_dir,
|
||||||
|
names=self.names,
|
||||||
|
prefix='Pose')[2:]
|
||||||
|
self.pose.nc = len(self.names)
|
||||||
|
self.pose.update(results_pose)
|
||||||
|
results_box = ap_per_class(tp_b,
|
||||||
|
conf,
|
||||||
|
pred_cls,
|
||||||
|
target_cls,
|
||||||
|
plot=self.plot,
|
||||||
|
save_dir=self.save_dir,
|
||||||
|
names=self.names,
|
||||||
|
prefix='Box')[2:]
|
||||||
|
self.box.nc = len(self.names)
|
||||||
|
self.box.update(results_box)
|
||||||
|
|
||||||
|
@property
|
||||||
|
def keys(self):
|
||||||
|
return [
|
||||||
|
'metrics/precision(B)', 'metrics/recall(B)', 'metrics/mAP50(B)', 'metrics/mAP50-95(B)',
|
||||||
|
'metrics/precision(P)', 'metrics/recall(P)', 'metrics/mAP50(P)', 'metrics/mAP50-95(P)']
|
||||||
|
|
||||||
|
def mean_results(self):
|
||||||
|
return self.box.mean_results() + self.pose.mean_results()
|
||||||
|
|
||||||
|
def class_result(self, i):
|
||||||
|
return self.box.class_result(i) + self.pose.class_result(i)
|
||||||
|
|
||||||
|
@property
|
||||||
|
def maps(self):
|
||||||
|
return self.box.maps + self.pose.maps
|
||||||
|
|
||||||
|
@property
|
||||||
|
def fitness(self):
|
||||||
|
return self.pose.fitness() + self.box.fitness()
|
||||||
|
|
||||||
|
|
||||||
class ClassifyMetrics(SimpleClass):
|
class ClassifyMetrics(SimpleClass):
|
||||||
"""
|
"""
|
||||||
Class for computing classification metrics including top-1 and top-5 accuracy.
|
Class for computing classification metrics including top-1 and top-5 accuracy.
|
||||||
|
@ -281,28 +281,23 @@ def clip_boxes(boxes, shape):
|
|||||||
boxes[..., [1, 3]] = boxes[..., [1, 3]].clip(0, shape[0]) # y1, y2
|
boxes[..., [1, 3]] = boxes[..., [1, 3]].clip(0, shape[0]) # y1, y2
|
||||||
|
|
||||||
|
|
||||||
def clip_coords(boxes, shape):
|
def clip_coords(coords, shape):
|
||||||
"""
|
"""
|
||||||
Clip bounding xyxy bounding boxes to image shape (height, width).
|
Clip line coordinates to the image boundaries.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
boxes (torch.Tensor or numpy.ndarray): Bounding boxes to be clipped.
|
coords (torch.Tensor) or (numpy.ndarray): A list of line coordinates.
|
||||||
shape (tuple): The shape of the image. (height, width)
|
shape (tuple): A tuple of integers representing the size of the image in the format (height, width).
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
None
|
(None): The function modifies the input `coordinates` in place, by clipping each coordinate to the image boundaries.
|
||||||
|
|
||||||
Note:
|
|
||||||
The input `boxes` is modified in-place, there is no return value.
|
|
||||||
"""
|
"""
|
||||||
if isinstance(boxes, torch.Tensor): # faster individually
|
if isinstance(coords, torch.Tensor): # faster individually
|
||||||
boxes[:, 0].clamp_(0, shape[1]) # x1
|
coords[..., 0].clamp_(0, shape[1]) # x
|
||||||
boxes[:, 1].clamp_(0, shape[0]) # y1
|
coords[..., 1].clamp_(0, shape[0]) # y
|
||||||
boxes[:, 2].clamp_(0, shape[1]) # x2
|
|
||||||
boxes[:, 3].clamp_(0, shape[0]) # y2
|
|
||||||
else: # np.array (faster grouped)
|
else: # np.array (faster grouped)
|
||||||
boxes[:, [0, 2]] = boxes[:, [0, 2]].clip(0, shape[1]) # x1, x2
|
coords[..., 0] = coords[..., 0].clip(0, shape[1]) # x
|
||||||
boxes[:, [1, 3]] = boxes[:, [1, 3]].clip(0, shape[0]) # y1, y2
|
coords[..., 1] = coords[..., 1].clip(0, shape[0]) # y
|
||||||
|
|
||||||
|
|
||||||
def scale_image(im1_shape, masks, im0_shape, ratio_pad=None):
|
def scale_image(im1_shape, masks, im0_shape, ratio_pad=None):
|
||||||
@ -577,17 +572,18 @@ def process_mask_upsample(protos, masks_in, bboxes, shape):
|
|||||||
|
|
||||||
def process_mask(protos, masks_in, bboxes, shape, upsample=False):
|
def process_mask(protos, masks_in, bboxes, shape, upsample=False):
|
||||||
"""
|
"""
|
||||||
It takes the output of the mask head, and applies the mask to the bounding boxes. This is faster but produces
|
Apply masks to bounding boxes using the output of the mask head.
|
||||||
downsampled quality of mask
|
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
protos (torch.Tensor): [mask_dim, mask_h, mask_w]
|
protos (torch.Tensor): A tensor of shape [mask_dim, mask_h, mask_w].
|
||||||
masks_in (torch.Tensor): [n, mask_dim], n is number of masks after nms
|
masks_in (torch.Tensor): A tensor of shape [n, mask_dim], where n is the number of masks after NMS.
|
||||||
bboxes (torch.Tensor): [n, 4], n is number of masks after nms
|
bboxes (torch.Tensor): A tensor of shape [n, 4], where n is the number of masks after NMS.
|
||||||
shape (tuple): the size of the input image (h,w)
|
shape (tuple): A tuple of integers representing the size of the input image in the format (h, w).
|
||||||
|
upsample (bool): A flag to indicate whether to upsample the mask to the original image size. Default is False.
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
(torch.Tensor): The processed masks.
|
(torch.Tensor): A binary mask tensor of shape [n, h, w], where n is the number of masks after NMS, and h and w
|
||||||
|
are the height and width of the input image. The mask is applied to the bounding boxes.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
c, mh, mw = protos.shape # CHW
|
c, mh, mw = protos.shape # CHW
|
||||||
@ -632,19 +628,19 @@ def process_mask_native(protos, masks_in, bboxes, shape):
|
|||||||
return masks.gt_(0.5)
|
return masks.gt_(0.5)
|
||||||
|
|
||||||
|
|
||||||
def scale_segments(img1_shape, segments, img0_shape, ratio_pad=None, normalize=False):
|
def scale_coords(img1_shape, coords, img0_shape, ratio_pad=None, normalize=False):
|
||||||
"""
|
"""
|
||||||
Rescale segment coordinates (xyxy) from img1_shape to img0_shape
|
Rescale segment coordinates (xyxy) from img1_shape to img0_shape
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
img1_shape (tuple): The shape of the image that the segments are from.
|
img1_shape (tuple): The shape of the image that the coords are from.
|
||||||
segments (torch.Tensor): the segments to be scaled
|
coords (torch.Tensor): the coords to be scaled
|
||||||
img0_shape (tuple): the shape of the image that the segmentation is being applied to
|
img0_shape (tuple): the shape of the image that the segmentation is being applied to
|
||||||
ratio_pad (tuple): the ratio of the image size to the padded image size.
|
ratio_pad (tuple): the ratio of the image size to the padded image size.
|
||||||
normalize (bool): If True, the coordinates will be normalized to the range [0, 1]. Defaults to False
|
normalize (bool): If True, the coordinates will be normalized to the range [0, 1]. Defaults to False
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
segments (torch.Tensor): the segmented image.
|
coords (torch.Tensor): the segmented image.
|
||||||
"""
|
"""
|
||||||
if ratio_pad is None: # calculate from img0_shape
|
if ratio_pad is None: # calculate from img0_shape
|
||||||
gain = min(img1_shape[0] / img0_shape[0], img1_shape[1] / img0_shape[1]) # gain = old / new
|
gain = min(img1_shape[0] / img0_shape[0], img1_shape[1] / img0_shape[1]) # gain = old / new
|
||||||
@ -653,14 +649,15 @@ def scale_segments(img1_shape, segments, img0_shape, ratio_pad=None, normalize=F
|
|||||||
gain = ratio_pad[0][0]
|
gain = ratio_pad[0][0]
|
||||||
pad = ratio_pad[1]
|
pad = ratio_pad[1]
|
||||||
|
|
||||||
segments[:, 0] -= pad[0] # x padding
|
coords[..., 0] -= pad[0] # x padding
|
||||||
segments[:, 1] -= pad[1] # y padding
|
coords[..., 1] -= pad[1] # y padding
|
||||||
segments /= gain
|
coords[..., 0] /= gain
|
||||||
clip_segments(segments, img0_shape)
|
coords[..., 1] /= gain
|
||||||
|
clip_coords(coords, img0_shape)
|
||||||
if normalize:
|
if normalize:
|
||||||
segments[:, 0] /= img0_shape[1] # width
|
coords[..., 0] /= img0_shape[1] # width
|
||||||
segments[:, 1] /= img0_shape[0] # height
|
coords[..., 1] /= img0_shape[0] # height
|
||||||
return segments
|
return coords
|
||||||
|
|
||||||
|
|
||||||
def masks2segments(masks, strategy='largest'):
|
def masks2segments(masks, strategy='largest'):
|
||||||
@ -688,23 +685,6 @@ def masks2segments(masks, strategy='largest'):
|
|||||||
return segments
|
return segments
|
||||||
|
|
||||||
|
|
||||||
def clip_segments(segments, shape):
|
|
||||||
"""
|
|
||||||
It takes a list of line segments (x1,y1,x2,y2) and clips them to the image shape (height, width)
|
|
||||||
|
|
||||||
Args:
|
|
||||||
segments (list): a list of segments, each segment is a list of points, each point is a list of x,y
|
|
||||||
coordinates
|
|
||||||
shape (tuple): the shape of the image
|
|
||||||
"""
|
|
||||||
if isinstance(segments, torch.Tensor): # faster individually
|
|
||||||
segments[:, 0].clamp_(0, shape[1]) # x
|
|
||||||
segments[:, 1].clamp_(0, shape[0]) # y
|
|
||||||
else: # np.array (faster grouped)
|
|
||||||
segments[:, 0] = segments[:, 0].clip(0, shape[1]) # x
|
|
||||||
segments[:, 1] = segments[:, 1].clip(0, shape[0]) # y
|
|
||||||
|
|
||||||
|
|
||||||
def clean_str(s):
|
def clean_str(s):
|
||||||
"""
|
"""
|
||||||
Cleans a string by replacing special characters with underscore _
|
Cleans a string by replacing special characters with underscore _
|
||||||
|
@ -16,7 +16,7 @@ from ultralytics.yolo.utils import LOGGER, TryExcept, threaded
|
|||||||
|
|
||||||
from .checks import check_font, check_version, is_ascii
|
from .checks import check_font, check_version, is_ascii
|
||||||
from .files import increment_path
|
from .files import increment_path
|
||||||
from .ops import clip_coords, scale_image, xywh2xyxy, xyxy2xywh
|
from .ops import clip_boxes, scale_image, xywh2xyxy, xyxy2xywh
|
||||||
|
|
||||||
matplotlib.rc('font', **{'size': 11})
|
matplotlib.rc('font', **{'size': 11})
|
||||||
matplotlib.use('Agg') # for writing to files only
|
matplotlib.use('Agg') # for writing to files only
|
||||||
@ -30,6 +30,11 @@ class Colors:
|
|||||||
'2C99A8', '00C2FF', '344593', '6473FF', '0018EC', '8438FF', '520085', 'CB38FF', 'FF95C8', 'FF37C7')
|
'2C99A8', '00C2FF', '344593', '6473FF', '0018EC', '8438FF', '520085', 'CB38FF', 'FF95C8', 'FF37C7')
|
||||||
self.palette = [self.hex2rgb(f'#{c}') for c in hexs]
|
self.palette = [self.hex2rgb(f'#{c}') for c in hexs]
|
||||||
self.n = len(self.palette)
|
self.n = len(self.palette)
|
||||||
|
self.pose_palette = np.array([[255, 128, 0], [255, 153, 51], [255, 178, 102], [230, 230, 0], [255, 153, 255],
|
||||||
|
[153, 204, 255], [255, 102, 255], [255, 51, 255], [102, 178, 255], [51, 153, 255],
|
||||||
|
[255, 153, 153], [255, 102, 102], [255, 51, 51], [153, 255, 153], [102, 255, 102],
|
||||||
|
[51, 255, 51], [0, 255, 0], [0, 0, 255], [255, 0, 0], [255, 255, 255]],
|
||||||
|
dtype=np.uint8)
|
||||||
|
|
||||||
def __call__(self, i, bgr=False):
|
def __call__(self, i, bgr=False):
|
||||||
c = self.palette[int(i) % self.n]
|
c = self.palette[int(i) % self.n]
|
||||||
@ -62,6 +67,12 @@ class Annotator:
|
|||||||
else: # use cv2
|
else: # use cv2
|
||||||
self.im = im
|
self.im = im
|
||||||
self.lw = line_width or max(round(sum(im.shape) / 2 * 0.003), 2) # line width
|
self.lw = line_width or max(round(sum(im.shape) / 2 * 0.003), 2) # line width
|
||||||
|
# pose
|
||||||
|
self.skeleton = [[16, 14], [14, 12], [17, 15], [15, 13], [12, 13], [6, 12], [7, 13], [6, 7], [6, 8], [7, 9],
|
||||||
|
[8, 10], [9, 11], [2, 3], [1, 2], [1, 3], [2, 4], [3, 5], [4, 6], [5, 7]]
|
||||||
|
|
||||||
|
self.limb_color = colors.pose_palette[[9, 9, 9, 9, 7, 7, 7, 0, 0, 0, 0, 0, 16, 16, 16, 16, 16, 16, 16]]
|
||||||
|
self.kpt_color = colors.pose_palette[[16, 16, 16, 16, 16, 0, 0, 0, 0, 0, 0, 9, 9, 9, 9, 9, 9]]
|
||||||
|
|
||||||
def box_label(self, box, label='', color=(128, 128, 128), txt_color=(255, 255, 255)):
|
def box_label(self, box, label='', color=(128, 128, 128), txt_color=(255, 255, 255)):
|
||||||
# Add one xyxy box to image with label
|
# Add one xyxy box to image with label
|
||||||
@ -132,6 +143,49 @@ class Annotator:
|
|||||||
# convert im back to PIL and update draw
|
# convert im back to PIL and update draw
|
||||||
self.fromarray(self.im)
|
self.fromarray(self.im)
|
||||||
|
|
||||||
|
def kpts(self, kpts, shape=(640, 640), radius=5, kpt_line=True):
|
||||||
|
"""Plot keypoints.
|
||||||
|
Args:
|
||||||
|
kpts (tensor): predicted kpts, shape: [17, 3]
|
||||||
|
shape (tuple): image shape, (h, w)
|
||||||
|
steps (int): keypoints step
|
||||||
|
radius (int): size of drawing points
|
||||||
|
"""
|
||||||
|
if self.pil:
|
||||||
|
# convert to numpy first
|
||||||
|
self.im = np.asarray(self.im).copy()
|
||||||
|
nkpt, ndim = kpts.shape
|
||||||
|
is_pose = nkpt == 17 and ndim == 3
|
||||||
|
kpt_line &= is_pose # `kpt_line=True` for now only supports human pose plotting
|
||||||
|
for i, k in enumerate(kpts):
|
||||||
|
color_k = [int(x) for x in self.kpt_color[i]] if is_pose else colors(i)
|
||||||
|
x_coord, y_coord = k[0], k[1]
|
||||||
|
if x_coord % shape[1] != 0 and y_coord % shape[0] != 0:
|
||||||
|
if len(k) == 3:
|
||||||
|
conf = k[2]
|
||||||
|
if conf < 0.5:
|
||||||
|
continue
|
||||||
|
cv2.circle(self.im, (int(x_coord), int(y_coord)), radius, color_k, -1)
|
||||||
|
|
||||||
|
if kpt_line:
|
||||||
|
ndim = kpts.shape[-1]
|
||||||
|
for sk_id, sk in enumerate(self.skeleton):
|
||||||
|
pos1 = (int(kpts[(sk[0] - 1), 0]), int(kpts[(sk[0] - 1), 1]))
|
||||||
|
pos2 = (int(kpts[(sk[1] - 1), 0]), int(kpts[(sk[1] - 1), 1]))
|
||||||
|
if ndim == 3:
|
||||||
|
conf1 = kpts[(sk[0] - 1), 2]
|
||||||
|
conf2 = kpts[(sk[1] - 1), 2]
|
||||||
|
if conf1 < 0.5 or conf2 < 0.5:
|
||||||
|
continue
|
||||||
|
if pos1[0] % shape[1] == 0 or pos1[1] % shape[0] == 0 or pos1[0] < 0 or pos1[1] < 0:
|
||||||
|
continue
|
||||||
|
if pos2[0] % shape[1] == 0 or pos2[1] % shape[0] == 0 or pos2[0] < 0 or pos2[1] < 0:
|
||||||
|
continue
|
||||||
|
cv2.line(self.im, pos1, pos2, [int(x) for x in self.limb_color[sk_id]], thickness=2)
|
||||||
|
if self.pil:
|
||||||
|
# convert im back to PIL and update draw
|
||||||
|
self.fromarray(self.im)
|
||||||
|
|
||||||
def rectangle(self, xy, fill=None, outline=None, width=1):
|
def rectangle(self, xy, fill=None, outline=None, width=1):
|
||||||
# Add rectangle to image (PIL-only)
|
# Add rectangle to image (PIL-only)
|
||||||
self.draw.rectangle(xy, fill, outline, width)
|
self.draw.rectangle(xy, fill, outline, width)
|
||||||
@ -213,7 +267,7 @@ def save_one_box(xyxy, im, file=Path('im.jpg'), gain=1.02, pad=10, square=False,
|
|||||||
b[:, 2:] = b[:, 2:].max(1)[0].unsqueeze(1) # attempt rectangle to square
|
b[:, 2:] = b[:, 2:].max(1)[0].unsqueeze(1) # attempt rectangle to square
|
||||||
b[:, 2:] = b[:, 2:] * gain + pad # box wh * gain + pad
|
b[:, 2:] = b[:, 2:] * gain + pad # box wh * gain + pad
|
||||||
xyxy = xywh2xyxy(b).long()
|
xyxy = xywh2xyxy(b).long()
|
||||||
clip_coords(xyxy, im.shape)
|
clip_boxes(xyxy, im.shape)
|
||||||
crop = im[int(xyxy[0, 1]):int(xyxy[0, 3]), int(xyxy[0, 0]):int(xyxy[0, 2]), ::(1 if BGR else -1)]
|
crop = im[int(xyxy[0, 1]):int(xyxy[0, 3]), int(xyxy[0, 0]):int(xyxy[0, 2]), ::(1 if BGR else -1)]
|
||||||
if save:
|
if save:
|
||||||
file.parent.mkdir(parents=True, exist_ok=True) # make directory
|
file.parent.mkdir(parents=True, exist_ok=True) # make directory
|
||||||
@ -229,6 +283,7 @@ def plot_images(images,
|
|||||||
cls,
|
cls,
|
||||||
bboxes,
|
bboxes,
|
||||||
masks=np.zeros(0, dtype=np.uint8),
|
masks=np.zeros(0, dtype=np.uint8),
|
||||||
|
kpts=np.zeros((0, 51), dtype=np.float32),
|
||||||
paths=None,
|
paths=None,
|
||||||
fname='images.jpg',
|
fname='images.jpg',
|
||||||
names=None):
|
names=None):
|
||||||
@ -241,6 +296,8 @@ def plot_images(images,
|
|||||||
bboxes = bboxes.cpu().numpy()
|
bboxes = bboxes.cpu().numpy()
|
||||||
if isinstance(masks, torch.Tensor):
|
if isinstance(masks, torch.Tensor):
|
||||||
masks = masks.cpu().numpy().astype(int)
|
masks = masks.cpu().numpy().astype(int)
|
||||||
|
if isinstance(kpts, torch.Tensor):
|
||||||
|
kpts = kpts.cpu().numpy()
|
||||||
if isinstance(batch_idx, torch.Tensor):
|
if isinstance(batch_idx, torch.Tensor):
|
||||||
batch_idx = batch_idx.cpu().numpy()
|
batch_idx = batch_idx.cpu().numpy()
|
||||||
|
|
||||||
@ -300,6 +357,21 @@ def plot_images(images,
|
|||||||
label = f'{c}' if labels else f'{c} {conf[j]:.1f}'
|
label = f'{c}' if labels else f'{c} {conf[j]:.1f}'
|
||||||
annotator.box_label(box, label, color=color)
|
annotator.box_label(box, label, color=color)
|
||||||
|
|
||||||
|
# Plot keypoints
|
||||||
|
if len(kpts):
|
||||||
|
kpts_ = kpts[idx].copy()
|
||||||
|
if len(kpts_):
|
||||||
|
if kpts_[..., 0].max() <= 1.01 or kpts_[..., 1].max() <= 1.01: # if normalized with tolerance .01
|
||||||
|
kpts_[..., 0] *= w # scale to pixels
|
||||||
|
kpts_[..., 1] *= h
|
||||||
|
elif scale < 1: # absolute coords need scale if image scales
|
||||||
|
kpts_ *= scale
|
||||||
|
kpts_[..., 0] += x
|
||||||
|
kpts_[..., 1] += y
|
||||||
|
for j in range(len(kpts_)):
|
||||||
|
if labels or conf[j] > 0.25: # 0.25 conf thresh
|
||||||
|
annotator.kpts(kpts_[j])
|
||||||
|
|
||||||
# Plot masks
|
# Plot masks
|
||||||
if len(masks):
|
if len(masks):
|
||||||
if idx.shape[0] == masks.shape[0]: # overlap_masks=False
|
if idx.shape[0] == masks.shape[0]: # overlap_masks=False
|
||||||
@ -307,7 +379,7 @@ def plot_images(images,
|
|||||||
else: # overlap_masks=True
|
else: # overlap_masks=True
|
||||||
image_masks = masks[[i]] # (1, 640, 640)
|
image_masks = masks[[i]] # (1, 640, 640)
|
||||||
nl = idx.sum()
|
nl = idx.sum()
|
||||||
index = np.arange(nl).reshape(nl, 1, 1) + 1
|
index = np.arange(nl).reshape((nl, 1, 1)) + 1
|
||||||
image_masks = np.repeat(image_masks, nl, axis=0)
|
image_masks = np.repeat(image_masks, nl, axis=0)
|
||||||
image_masks = np.where(image_masks == index, 1.0, 0.0)
|
image_masks = np.where(image_masks == index, 1.0, 0.0)
|
||||||
|
|
||||||
@ -328,13 +400,16 @@ def plot_images(images,
|
|||||||
annotator.im.save(fname) # save
|
annotator.im.save(fname) # save
|
||||||
|
|
||||||
|
|
||||||
def plot_results(file='path/to/results.csv', dir='', segment=False):
|
def plot_results(file='path/to/results.csv', dir='', segment=False, pose=False):
|
||||||
# Plot training results.csv. Usage: from utils.plots import *; plot_results('path/to/results.csv')
|
# Plot training results.csv. Usage: from utils.plots import *; plot_results('path/to/results.csv')
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
save_dir = Path(file).parent if file else Path(dir)
|
save_dir = Path(file).parent if file else Path(dir)
|
||||||
if segment:
|
if segment:
|
||||||
fig, ax = plt.subplots(2, 8, figsize=(18, 6), tight_layout=True)
|
fig, ax = plt.subplots(2, 8, figsize=(18, 6), tight_layout=True)
|
||||||
index = [1, 2, 3, 4, 5, 6, 9, 10, 13, 14, 15, 16, 7, 8, 11, 12]
|
index = [1, 2, 3, 4, 5, 6, 9, 10, 13, 14, 15, 16, 7, 8, 11, 12]
|
||||||
|
elif pose:
|
||||||
|
fig, ax = plt.subplots(2, 9, figsize=(21, 6), tight_layout=True)
|
||||||
|
index = [1, 2, 3, 4, 5, 6, 7, 10, 11, 14, 15, 16, 17, 18, 8, 9, 12, 13]
|
||||||
else:
|
else:
|
||||||
fig, ax = plt.subplots(2, 5, figsize=(12, 6), tight_layout=True)
|
fig, ax = plt.subplots(2, 5, figsize=(12, 6), tight_layout=True)
|
||||||
index = [1, 2, 3, 4, 5, 8, 9, 10, 6, 7]
|
index = [1, 2, 3, 4, 5, 8, 9, 10, 6, 7]
|
||||||
|
@ -240,8 +240,8 @@ def copy_attr(a, b, include=(), exclude=()):
|
|||||||
|
|
||||||
|
|
||||||
def get_latest_opset():
|
def get_latest_opset():
|
||||||
# Return max supported ONNX opset by this version of torch
|
# Return second-most (for maturity) recently supported ONNX opset by this version of torch
|
||||||
return max(int(k[14:]) for k in vars(torch.onnx) if 'symbolic_opset' in k) # opset
|
return max(int(k[14:]) for k in vars(torch.onnx) if 'symbolic_opset' in k) - 1 # opset
|
||||||
|
|
||||||
|
|
||||||
def intersect_dicts(da, db, exclude=()):
|
def intersect_dicts(da, db, exclude=()):
|
||||||
@ -318,18 +318,18 @@ def strip_optimizer(f: Union[str, Path] = 'best.pt', s: str = '') -> None:
|
|||||||
"""
|
"""
|
||||||
Strip optimizer from 'f' to finalize training, optionally save as 's'.
|
Strip optimizer from 'f' to finalize training, optionally save as 's'.
|
||||||
|
|
||||||
Usage:
|
|
||||||
from ultralytics.yolo.utils.torch_utils import strip_optimizer
|
|
||||||
from pathlib import Path
|
|
||||||
for f in Path('/Users/glennjocher/Downloads/weights').glob('*.pt'):
|
|
||||||
strip_optimizer(f)
|
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
f (str): file path to model to strip the optimizer from. Default is 'best.pt'.
|
f (str): file path to model to strip the optimizer from. Default is 'best.pt'.
|
||||||
s (str): file path to save the model with stripped optimizer to. If not provided, 'f' will be overwritten.
|
s (str): file path to save the model with stripped optimizer to. If not provided, 'f' will be overwritten.
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
None
|
None
|
||||||
|
|
||||||
|
Usage:
|
||||||
|
from pathlib import Path
|
||||||
|
from ultralytics.yolo.utils.torch_utils import strip_optimizer
|
||||||
|
for f in Path('/Users/glennjocher/Downloads/weights').rglob('*.pt'):
|
||||||
|
strip_optimizer(f)
|
||||||
"""
|
"""
|
||||||
x = torch.load(f, map_location=torch.device('cpu'))
|
x = torch.load(f, map_location=torch.device('cpu'))
|
||||||
args = {**DEFAULT_CFG_DICT, **x['train_args']} # combine model args with default args, preferring model args
|
args = {**DEFAULT_CFG_DICT, **x['train_args']} # combine model args with default args, preferring model args
|
||||||
@ -349,7 +349,9 @@ def strip_optimizer(f: Union[str, Path] = 'best.pt', s: str = '') -> None:
|
|||||||
|
|
||||||
|
|
||||||
def profile(input, ops, n=10, device=None):
|
def profile(input, ops, n=10, device=None):
|
||||||
""" YOLOv8 speed/memory/FLOPs profiler
|
"""
|
||||||
|
YOLOv8 speed/memory/FLOPs profiler
|
||||||
|
|
||||||
Usage:
|
Usage:
|
||||||
input = torch.randn(16, 3, 640, 640)
|
input = torch.randn(16, 3, 640, 640)
|
||||||
m1 = lambda x: x * torch.sigmoid(x)
|
m1 = lambda x: x * torch.sigmoid(x)
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
# Ultralytics YOLO 🚀, GPL-3.0 license
|
# Ultralytics YOLO 🚀, GPL-3.0 license
|
||||||
|
|
||||||
from ultralytics.yolo.v8 import classify, detect, segment
|
from ultralytics.yolo.v8 import classify, detect, pose, segment
|
||||||
|
|
||||||
__all__ = 'classify', 'segment', 'detect'
|
__all__ = 'classify', 'segment', 'detect', 'pose'
|
||||||
|
@ -41,7 +41,7 @@ class DetectionTrainer(BaseTrainer):
|
|||||||
shuffle=mode == 'train',
|
shuffle=mode == 'train',
|
||||||
seed=self.args.seed)[0] if self.args.v5loader else \
|
seed=self.args.seed)[0] if self.args.v5loader else \
|
||||||
build_dataloader(self.args, batch_size, img_path=dataset_path, stride=gs, rank=rank, mode=mode,
|
build_dataloader(self.args, batch_size, img_path=dataset_path, stride=gs, rank=rank, mode=mode,
|
||||||
rect=mode == 'val', names=self.data['names'])[0]
|
rect=mode == 'val', data_info=self.data)[0]
|
||||||
|
|
||||||
def preprocess_batch(self, batch):
|
def preprocess_batch(self, batch):
|
||||||
batch['img'] = batch['img'].to(self.device, non_blocking=True).float() / 255
|
batch['img'] = batch['img'].to(self.device, non_blocking=True).float() / 255
|
||||||
|
@ -41,7 +41,7 @@ class DetectionValidator(BaseValidator):
|
|||||||
|
|
||||||
def init_metrics(self, model):
|
def init_metrics(self, model):
|
||||||
val = self.data.get(self.args.split, '') # validation path
|
val = self.data.get(self.args.split, '') # validation path
|
||||||
self.is_coco = isinstance(val, str) and val.endswith(f'coco{os.sep}val2017.txt') # is COCO dataset
|
self.is_coco = isinstance(val, str) and 'coco' in val and val.endswith(f'{os.sep}val2017.txt') # is COCO
|
||||||
self.class_map = ops.coco80_to_coco91_class() if self.is_coco else list(range(1000))
|
self.class_map = ops.coco80_to_coco91_class() if self.is_coco else list(range(1000))
|
||||||
self.args.save_json |= self.is_coco and not self.training # run on final val if training COCO
|
self.args.save_json |= self.is_coco and not self.training # run on final val if training COCO
|
||||||
self.names = model.names
|
self.names = model.names
|
||||||
@ -179,7 +179,7 @@ class DetectionValidator(BaseValidator):
|
|||||||
prefix=colorstr(f'{self.args.mode}: '),
|
prefix=colorstr(f'{self.args.mode}: '),
|
||||||
shuffle=False,
|
shuffle=False,
|
||||||
seed=self.args.seed)[0] if self.args.v5loader else \
|
seed=self.args.seed)[0] if self.args.v5loader else \
|
||||||
build_dataloader(self.args, batch_size, img_path=dataset_path, stride=gs, names=self.data['names'],
|
build_dataloader(self.args, batch_size, img_path=dataset_path, stride=gs, data_info=self.data,
|
||||||
mode='val')[0]
|
mode='val')[0]
|
||||||
|
|
||||||
def plot_val_samples(self, batch, ni):
|
def plot_val_samples(self, batch, ni):
|
||||||
|
7
ultralytics/yolo/v8/pose/__init__.py
Normal file
7
ultralytics/yolo/v8/pose/__init__.py
Normal file
@ -0,0 +1,7 @@
|
|||||||
|
# Ultralytics YOLO 🚀, GPL-3.0 license
|
||||||
|
|
||||||
|
from .predict import PosePredictor, predict
|
||||||
|
from .train import PoseTrainer, train
|
||||||
|
from .val import PoseValidator, val
|
||||||
|
|
||||||
|
__all__ = 'PoseTrainer', 'train', 'PoseValidator', 'val', 'PosePredictor', 'predict'
|
103
ultralytics/yolo/v8/pose/predict.py
Normal file
103
ultralytics/yolo/v8/pose/predict.py
Normal file
@ -0,0 +1,103 @@
|
|||||||
|
# Ultralytics YOLO 🚀, GPL-3.0 license
|
||||||
|
|
||||||
|
from ultralytics.yolo.engine.results import Results
|
||||||
|
from ultralytics.yolo.utils import DEFAULT_CFG, ROOT, ops
|
||||||
|
from ultralytics.yolo.utils.plotting import colors, save_one_box
|
||||||
|
from ultralytics.yolo.v8.detect.predict import DetectionPredictor
|
||||||
|
|
||||||
|
|
||||||
|
class PosePredictor(DetectionPredictor):
|
||||||
|
|
||||||
|
def postprocess(self, preds, img, orig_img):
|
||||||
|
preds = ops.non_max_suppression(preds,
|
||||||
|
self.args.conf,
|
||||||
|
self.args.iou,
|
||||||
|
agnostic=self.args.agnostic_nms,
|
||||||
|
max_det=self.args.max_det,
|
||||||
|
classes=self.args.classes,
|
||||||
|
nc=len(self.model.names))
|
||||||
|
|
||||||
|
results = []
|
||||||
|
for i, pred in enumerate(preds):
|
||||||
|
orig_img = orig_img[i] if isinstance(orig_img, list) else orig_img
|
||||||
|
shape = orig_img.shape
|
||||||
|
pred[:, :4] = ops.scale_boxes(img.shape[2:], pred[:, :4], shape).round()
|
||||||
|
pred_kpts = pred[:, 6:].view(len(pred), *self.model.kpt_shape) if len(pred) else pred[:, 6:]
|
||||||
|
pred_kpts = ops.scale_coords(img.shape[2:], pred_kpts, shape)
|
||||||
|
path, _, _, _, _ = self.batch
|
||||||
|
img_path = path[i] if isinstance(path, list) else path
|
||||||
|
results.append(
|
||||||
|
Results(orig_img=orig_img,
|
||||||
|
path=img_path,
|
||||||
|
names=self.model.names,
|
||||||
|
boxes=pred[:, :6],
|
||||||
|
keypoints=pred_kpts))
|
||||||
|
return results
|
||||||
|
|
||||||
|
def write_results(self, idx, results, batch):
|
||||||
|
p, im, im0 = batch
|
||||||
|
log_string = ''
|
||||||
|
if len(im.shape) == 3:
|
||||||
|
im = im[None] # expand for batch dim
|
||||||
|
self.seen += 1
|
||||||
|
imc = im0.copy() if self.args.save_crop else im0
|
||||||
|
if self.source_type.webcam or self.source_type.from_img: # batch_size >= 1
|
||||||
|
log_string += f'{idx}: '
|
||||||
|
frame = self.dataset.count
|
||||||
|
else:
|
||||||
|
frame = getattr(self.dataset, 'frame', 0)
|
||||||
|
self.data_path = p
|
||||||
|
self.txt_path = str(self.save_dir / 'labels' / p.stem) + ('' if self.dataset.mode == 'image' else f'_{frame}')
|
||||||
|
log_string += '%gx%g ' % im.shape[2:] # print string
|
||||||
|
self.annotator = self.get_annotator(im0)
|
||||||
|
|
||||||
|
det = results[idx].boxes # TODO: make boxes inherit from tensors
|
||||||
|
if len(det) == 0:
|
||||||
|
return f'{log_string}(no detections), '
|
||||||
|
for c in det.cls.unique():
|
||||||
|
n = (det.cls == c).sum() # detections per class
|
||||||
|
log_string += f"{n} {self.model.names[int(c)]}{'s' * (n > 1)}, "
|
||||||
|
|
||||||
|
kpts = reversed(results[idx].keypoints)
|
||||||
|
for k in kpts:
|
||||||
|
self.annotator.kpts(k, shape=results[idx].orig_shape)
|
||||||
|
|
||||||
|
# write
|
||||||
|
for j, d in enumerate(reversed(det)):
|
||||||
|
c, conf, id = int(d.cls), float(d.conf), None if d.id is None else int(d.id.item())
|
||||||
|
if self.args.save_txt: # Write to file
|
||||||
|
kpt = (kpts[j][:, :2] / d.orig_shape[[1, 0]]).reshape(-1).tolist()
|
||||||
|
box = d.xywhn.view(-1).tolist()
|
||||||
|
line = (c, *box, *kpt) + (conf, ) * self.args.save_conf + (() if id is None else (id, ))
|
||||||
|
with open(f'{self.txt_path}.txt', 'a') as f:
|
||||||
|
f.write(('%g ' * len(line)).rstrip() % line + '\n')
|
||||||
|
if self.args.save or self.args.show: # Add bbox to image
|
||||||
|
name = ('' if id is None else f'id:{id} ') + self.model.names[c]
|
||||||
|
label = (f'{name} {conf:.2f}' if self.args.show_conf else name) if self.args.show_labels else None
|
||||||
|
if self.args.boxes:
|
||||||
|
self.annotator.box_label(d.xyxy.squeeze(), label, color=colors(c, True))
|
||||||
|
if self.args.save_crop:
|
||||||
|
save_one_box(d.xyxy,
|
||||||
|
imc,
|
||||||
|
file=self.save_dir / 'crops' / self.model.model.names[c] / f'{self.data_path.stem}.jpg',
|
||||||
|
BGR=True)
|
||||||
|
|
||||||
|
return log_string
|
||||||
|
|
||||||
|
|
||||||
|
def predict(cfg=DEFAULT_CFG, use_python=False):
|
||||||
|
model = cfg.model or 'yolov8n-pose.pt'
|
||||||
|
source = cfg.source if cfg.source is not None else ROOT / 'assets' if (ROOT / 'assets').exists() \
|
||||||
|
else 'https://ultralytics.com/images/bus.jpg'
|
||||||
|
|
||||||
|
args = dict(model=model, source=source)
|
||||||
|
if use_python:
|
||||||
|
from ultralytics import YOLO
|
||||||
|
YOLO(model)(**args)
|
||||||
|
else:
|
||||||
|
predictor = PosePredictor(overrides=args)
|
||||||
|
predictor.predict_cli()
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
predict()
|
170
ultralytics/yolo/v8/pose/train.py
Normal file
170
ultralytics/yolo/v8/pose/train.py
Normal file
@ -0,0 +1,170 @@
|
|||||||
|
# Ultralytics YOLO 🚀, GPL-3.0 license
|
||||||
|
|
||||||
|
from copy import copy
|
||||||
|
|
||||||
|
import torch
|
||||||
|
import torch.nn as nn
|
||||||
|
|
||||||
|
from ultralytics.nn.tasks import PoseModel
|
||||||
|
from ultralytics.yolo import v8
|
||||||
|
from ultralytics.yolo.utils import DEFAULT_CFG
|
||||||
|
from ultralytics.yolo.utils.loss import KeypointLoss
|
||||||
|
from ultralytics.yolo.utils.metrics import OKS_SIGMA
|
||||||
|
from ultralytics.yolo.utils.ops import xyxy2xywh
|
||||||
|
from ultralytics.yolo.utils.plotting import plot_images, plot_results
|
||||||
|
from ultralytics.yolo.utils.tal import make_anchors
|
||||||
|
from ultralytics.yolo.utils.torch_utils import de_parallel
|
||||||
|
from ultralytics.yolo.v8.detect.train import Loss
|
||||||
|
|
||||||
|
|
||||||
|
# BaseTrainer python usage
|
||||||
|
class PoseTrainer(v8.detect.DetectionTrainer):
|
||||||
|
|
||||||
|
def __init__(self, cfg=DEFAULT_CFG, overrides=None):
|
||||||
|
if overrides is None:
|
||||||
|
overrides = {}
|
||||||
|
overrides['task'] = 'pose'
|
||||||
|
super().__init__(cfg, overrides)
|
||||||
|
|
||||||
|
def get_model(self, cfg=None, weights=None, verbose=True):
|
||||||
|
model = PoseModel(cfg, ch=3, nc=self.data['nc'], data_kpt_shape=self.data['kpt_shape'], verbose=verbose)
|
||||||
|
if weights:
|
||||||
|
model.load(weights)
|
||||||
|
|
||||||
|
return model
|
||||||
|
|
||||||
|
def set_model_attributes(self):
|
||||||
|
super().set_model_attributes()
|
||||||
|
self.model.kpt_shape = self.data['kpt_shape']
|
||||||
|
|
||||||
|
def get_validator(self):
|
||||||
|
self.loss_names = 'box_loss', 'pose_loss', 'kobj_loss', 'cls_loss', 'dfl_loss'
|
||||||
|
return v8.pose.PoseValidator(self.test_loader, save_dir=self.save_dir, args=copy(self.args))
|
||||||
|
|
||||||
|
def criterion(self, preds, batch):
|
||||||
|
if not hasattr(self, 'compute_loss'):
|
||||||
|
self.compute_loss = PoseLoss(de_parallel(self.model))
|
||||||
|
return self.compute_loss(preds, batch)
|
||||||
|
|
||||||
|
def plot_training_samples(self, batch, ni):
|
||||||
|
images = batch['img']
|
||||||
|
kpts = batch['keypoints']
|
||||||
|
cls = batch['cls'].squeeze(-1)
|
||||||
|
bboxes = batch['bboxes']
|
||||||
|
paths = batch['im_file']
|
||||||
|
batch_idx = batch['batch_idx']
|
||||||
|
plot_images(images,
|
||||||
|
batch_idx,
|
||||||
|
cls,
|
||||||
|
bboxes,
|
||||||
|
kpts=kpts,
|
||||||
|
paths=paths,
|
||||||
|
fname=self.save_dir / f'train_batch{ni}.jpg')
|
||||||
|
|
||||||
|
def plot_metrics(self):
|
||||||
|
plot_results(file=self.csv, pose=True) # save results.png
|
||||||
|
|
||||||
|
|
||||||
|
# Criterion class for computing training losses
|
||||||
|
class PoseLoss(Loss):
|
||||||
|
|
||||||
|
def __init__(self, model): # model must be de-paralleled
|
||||||
|
super().__init__(model)
|
||||||
|
self.kpt_shape = model.model[-1].kpt_shape
|
||||||
|
self.bce_pose = nn.BCEWithLogitsLoss()
|
||||||
|
is_pose = self.kpt_shape == [17, 3]
|
||||||
|
nkpt = self.kpt_shape[0] # number of keypoints
|
||||||
|
sigmas = torch.from_numpy(OKS_SIGMA).to(self.device) if is_pose else torch.ones(nkpt, device=self.device) / nkpt
|
||||||
|
self.keypoint_loss = KeypointLoss(sigmas=sigmas)
|
||||||
|
|
||||||
|
def __call__(self, preds, batch):
|
||||||
|
loss = torch.zeros(5, device=self.device) # box, cls, dfl, kpt_location, kpt_visibility
|
||||||
|
feats, pred_kpts = preds if isinstance(preds[0], list) else preds[1]
|
||||||
|
pred_distri, pred_scores = torch.cat([xi.view(feats[0].shape[0], self.no, -1) for xi in feats], 2).split(
|
||||||
|
(self.reg_max * 4, self.nc), 1)
|
||||||
|
|
||||||
|
# b, grids, ..
|
||||||
|
pred_scores = pred_scores.permute(0, 2, 1).contiguous()
|
||||||
|
pred_distri = pred_distri.permute(0, 2, 1).contiguous()
|
||||||
|
pred_kpts = pred_kpts.permute(0, 2, 1).contiguous()
|
||||||
|
|
||||||
|
dtype = pred_scores.dtype
|
||||||
|
imgsz = torch.tensor(feats[0].shape[2:], device=self.device, dtype=dtype) * self.stride[0] # image size (h,w)
|
||||||
|
anchor_points, stride_tensor = make_anchors(feats, self.stride, 0.5)
|
||||||
|
|
||||||
|
# targets
|
||||||
|
batch_size = pred_scores.shape[0]
|
||||||
|
batch_idx = batch['batch_idx'].view(-1, 1)
|
||||||
|
targets = torch.cat((batch_idx, batch['cls'].view(-1, 1), batch['bboxes']), 1)
|
||||||
|
targets = self.preprocess(targets.to(self.device), batch_size, scale_tensor=imgsz[[1, 0, 1, 0]])
|
||||||
|
gt_labels, gt_bboxes = targets.split((1, 4), 2) # cls, xyxy
|
||||||
|
mask_gt = gt_bboxes.sum(2, keepdim=True).gt_(0)
|
||||||
|
|
||||||
|
# pboxes
|
||||||
|
pred_bboxes = self.bbox_decode(anchor_points, pred_distri) # xyxy, (b, h*w, 4)
|
||||||
|
pred_kpts = self.kpts_decode(anchor_points, pred_kpts.view(batch_size, -1, *self.kpt_shape)) # (b, h*w, 17, 3)
|
||||||
|
|
||||||
|
_, target_bboxes, target_scores, fg_mask, target_gt_idx = self.assigner(
|
||||||
|
pred_scores.detach().sigmoid(), (pred_bboxes.detach() * stride_tensor).type(gt_bboxes.dtype),
|
||||||
|
anchor_points * stride_tensor, gt_labels, gt_bboxes, mask_gt)
|
||||||
|
|
||||||
|
target_scores_sum = max(target_scores.sum(), 1)
|
||||||
|
|
||||||
|
# cls loss
|
||||||
|
# loss[1] = self.varifocal_loss(pred_scores, target_scores, target_labels) / target_scores_sum # VFL way
|
||||||
|
loss[3] = self.bce(pred_scores, target_scores.to(dtype)).sum() / target_scores_sum # BCE
|
||||||
|
|
||||||
|
# bbox loss
|
||||||
|
if fg_mask.sum():
|
||||||
|
target_bboxes /= stride_tensor
|
||||||
|
loss[0], loss[4] = self.bbox_loss(pred_distri, pred_bboxes, anchor_points, target_bboxes, target_scores,
|
||||||
|
target_scores_sum, fg_mask)
|
||||||
|
keypoints = batch['keypoints'].to(self.device).float().clone()
|
||||||
|
keypoints[..., 0] *= imgsz[1]
|
||||||
|
keypoints[..., 1] *= imgsz[0]
|
||||||
|
for i in range(batch_size):
|
||||||
|
if fg_mask[i].sum():
|
||||||
|
idx = target_gt_idx[i][fg_mask[i]]
|
||||||
|
gt_kpt = keypoints[batch_idx.view(-1) == i][idx] # (n, 51)
|
||||||
|
gt_kpt[..., 0] /= stride_tensor[fg_mask[i]]
|
||||||
|
gt_kpt[..., 1] /= stride_tensor[fg_mask[i]]
|
||||||
|
area = xyxy2xywh(target_bboxes[i][fg_mask[i]])[:, 2:].prod(1, keepdim=True)
|
||||||
|
pred_kpt = pred_kpts[i][fg_mask[i]]
|
||||||
|
kpt_mask = gt_kpt[..., 2] != 0
|
||||||
|
loss[1] += self.keypoint_loss(pred_kpt, gt_kpt, kpt_mask, area) # pose loss
|
||||||
|
# kpt_score loss
|
||||||
|
if pred_kpt.shape[-1] == 3:
|
||||||
|
loss[2] += self.bce_pose(pred_kpt[..., 2], kpt_mask.float()) # keypoint obj loss
|
||||||
|
|
||||||
|
loss[0] *= self.hyp.box # box gain
|
||||||
|
loss[1] *= self.hyp.pose / batch_size # pose gain
|
||||||
|
loss[2] *= self.hyp.kobj / batch_size # kobj gain
|
||||||
|
loss[3] *= self.hyp.cls # cls gain
|
||||||
|
loss[4] *= self.hyp.dfl # dfl gain
|
||||||
|
|
||||||
|
return loss.sum() * batch_size, loss.detach() # loss(box, cls, dfl)
|
||||||
|
|
||||||
|
def kpts_decode(self, anchor_points, pred_kpts):
|
||||||
|
y = pred_kpts.clone()
|
||||||
|
y[..., :2] *= 2.0
|
||||||
|
y[..., 0] += anchor_points[:, [0]] - 0.5
|
||||||
|
y[..., 1] += anchor_points[:, [1]] - 0.5
|
||||||
|
return y
|
||||||
|
|
||||||
|
|
||||||
|
def train(cfg=DEFAULT_CFG, use_python=False):
|
||||||
|
model = cfg.model or 'yolov8n-pose.yaml'
|
||||||
|
data = cfg.data or 'coco8-pose.yaml'
|
||||||
|
device = cfg.device if cfg.device is not None else ''
|
||||||
|
|
||||||
|
args = dict(model=model, data=data, device=device)
|
||||||
|
if use_python:
|
||||||
|
from ultralytics import YOLO
|
||||||
|
YOLO(model).train(**args)
|
||||||
|
else:
|
||||||
|
trainer = PoseTrainer(overrides=args)
|
||||||
|
trainer.train()
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
train()
|
213
ultralytics/yolo/v8/pose/val.py
Normal file
213
ultralytics/yolo/v8/pose/val.py
Normal file
@ -0,0 +1,213 @@
|
|||||||
|
# Ultralytics YOLO 🚀, GPL-3.0 license
|
||||||
|
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
|
import torch
|
||||||
|
|
||||||
|
from ultralytics.yolo.utils import DEFAULT_CFG, LOGGER, ops
|
||||||
|
from ultralytics.yolo.utils.checks import check_requirements
|
||||||
|
from ultralytics.yolo.utils.metrics import OKS_SIGMA, PoseMetrics, box_iou, kpt_iou
|
||||||
|
from ultralytics.yolo.utils.plotting import output_to_target, plot_images
|
||||||
|
from ultralytics.yolo.v8.detect import DetectionValidator
|
||||||
|
|
||||||
|
|
||||||
|
class PoseValidator(DetectionValidator):
|
||||||
|
|
||||||
|
def __init__(self, dataloader=None, save_dir=None, pbar=None, args=None):
|
||||||
|
super().__init__(dataloader, save_dir, pbar, args)
|
||||||
|
self.args.task = 'pose'
|
||||||
|
self.metrics = PoseMetrics(save_dir=self.save_dir)
|
||||||
|
|
||||||
|
def preprocess(self, batch):
|
||||||
|
batch = super().preprocess(batch)
|
||||||
|
batch['keypoints'] = batch['keypoints'].to(self.device).float()
|
||||||
|
return batch
|
||||||
|
|
||||||
|
def get_desc(self):
|
||||||
|
return ('%22s' + '%11s' * 10) % ('Class', 'Images', 'Instances', 'Box(P', 'R', 'mAP50', 'mAP50-95)', 'Pose(P',
|
||||||
|
'R', 'mAP50', 'mAP50-95)')
|
||||||
|
|
||||||
|
def postprocess(self, preds):
|
||||||
|
preds = ops.non_max_suppression(preds,
|
||||||
|
self.args.conf,
|
||||||
|
self.args.iou,
|
||||||
|
labels=self.lb,
|
||||||
|
multi_label=True,
|
||||||
|
agnostic=self.args.single_cls,
|
||||||
|
max_det=self.args.max_det,
|
||||||
|
nc=self.nc)
|
||||||
|
return preds
|
||||||
|
|
||||||
|
def init_metrics(self, model):
|
||||||
|
super().init_metrics(model)
|
||||||
|
self.kpt_shape = self.data['kpt_shape']
|
||||||
|
is_pose = self.kpt_shape == [17, 3]
|
||||||
|
nkpt = self.kpt_shape[0]
|
||||||
|
self.sigma = OKS_SIGMA if is_pose else np.ones(nkpt) / nkpt
|
||||||
|
|
||||||
|
def update_metrics(self, preds, batch):
|
||||||
|
# Metrics
|
||||||
|
for si, pred in enumerate(preds):
|
||||||
|
idx = batch['batch_idx'] == si
|
||||||
|
cls = batch['cls'][idx]
|
||||||
|
bbox = batch['bboxes'][idx]
|
||||||
|
kpts = batch['keypoints'][idx]
|
||||||
|
nl, npr = cls.shape[0], pred.shape[0] # number of labels, predictions
|
||||||
|
nk = kpts.shape[1] # number of keypoints
|
||||||
|
shape = batch['ori_shape'][si]
|
||||||
|
correct_kpts = torch.zeros(npr, self.niou, dtype=torch.bool, device=self.device) # init
|
||||||
|
correct_bboxes = torch.zeros(npr, self.niou, dtype=torch.bool, device=self.device) # init
|
||||||
|
self.seen += 1
|
||||||
|
|
||||||
|
if npr == 0:
|
||||||
|
if nl:
|
||||||
|
self.stats.append((correct_bboxes, correct_kpts, *torch.zeros(
|
||||||
|
(2, 0), device=self.device), cls.squeeze(-1)))
|
||||||
|
if self.args.plots:
|
||||||
|
self.confusion_matrix.process_batch(detections=None, labels=cls.squeeze(-1))
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Predictions
|
||||||
|
if self.args.single_cls:
|
||||||
|
pred[:, 5] = 0
|
||||||
|
predn = pred.clone()
|
||||||
|
ops.scale_boxes(batch['img'][si].shape[1:], predn[:, :4], shape,
|
||||||
|
ratio_pad=batch['ratio_pad'][si]) # native-space pred
|
||||||
|
pred_kpts = predn[:, 6:].view(npr, nk, -1)
|
||||||
|
ops.scale_coords(batch['img'][si].shape[1:], pred_kpts, shape, ratio_pad=batch['ratio_pad'][si])
|
||||||
|
|
||||||
|
# Evaluate
|
||||||
|
if nl:
|
||||||
|
height, width = batch['img'].shape[2:]
|
||||||
|
tbox = ops.xywh2xyxy(bbox) * torch.tensor(
|
||||||
|
(width, height, width, height), device=self.device) # target boxes
|
||||||
|
ops.scale_boxes(batch['img'][si].shape[1:], tbox, shape,
|
||||||
|
ratio_pad=batch['ratio_pad'][si]) # native-space labels
|
||||||
|
tkpts = kpts.clone()
|
||||||
|
tkpts[..., 0] *= width
|
||||||
|
tkpts[..., 1] *= height
|
||||||
|
tkpts = ops.scale_coords(batch['img'][si].shape[1:], tkpts, shape, ratio_pad=batch['ratio_pad'][si])
|
||||||
|
labelsn = torch.cat((cls, tbox), 1) # native-space labels
|
||||||
|
correct_bboxes = self._process_batch(predn[:, :6], labelsn)
|
||||||
|
correct_kpts = self._process_batch(predn[:, :6], labelsn, pred_kpts, tkpts)
|
||||||
|
if self.args.plots:
|
||||||
|
self.confusion_matrix.process_batch(predn, labelsn)
|
||||||
|
|
||||||
|
# Append correct_masks, correct_boxes, pconf, pcls, tcls
|
||||||
|
self.stats.append((correct_bboxes, correct_kpts, pred[:, 4], pred[:, 5], cls.squeeze(-1)))
|
||||||
|
|
||||||
|
# Save
|
||||||
|
if self.args.save_json:
|
||||||
|
self.pred_to_json(predn, batch['im_file'][si])
|
||||||
|
# if self.args.save_txt:
|
||||||
|
# save_one_txt(predn, save_conf, shape, file=save_dir / 'labels' / f'{path.stem}.txt')
|
||||||
|
|
||||||
|
def _process_batch(self, detections, labels, pred_kpts=None, gt_kpts=None):
|
||||||
|
"""
|
||||||
|
Return correct prediction matrix
|
||||||
|
Arguments:
|
||||||
|
detections (array[N, 6]), x1, y1, x2, y2, conf, class
|
||||||
|
labels (array[M, 5]), class, x1, y1, x2, y2
|
||||||
|
pred_kpts (array[N, 51]), 51 = 17 * 3
|
||||||
|
gt_kpts (array[N, 51])
|
||||||
|
Returns:
|
||||||
|
correct (array[N, 10]), for 10 IoU levels
|
||||||
|
"""
|
||||||
|
if pred_kpts is not None and gt_kpts is not None:
|
||||||
|
# `0.53` is from https://github.com/jin-s13/xtcocoapi/blob/master/xtcocotools/cocoeval.py#L384
|
||||||
|
area = ops.xyxy2xywh(labels[:, 1:])[:, 2:].prod(1) * 0.53
|
||||||
|
iou = kpt_iou(gt_kpts, pred_kpts, sigma=self.sigma, area=area)
|
||||||
|
else: # boxes
|
||||||
|
iou = box_iou(labels[:, 1:], detections[:, :4])
|
||||||
|
|
||||||
|
correct = np.zeros((detections.shape[0], self.iouv.shape[0])).astype(bool)
|
||||||
|
correct_class = labels[:, 0:1] == detections[:, 5]
|
||||||
|
for i in range(len(self.iouv)):
|
||||||
|
x = torch.where((iou >= self.iouv[i]) & correct_class) # IoU > threshold and classes match
|
||||||
|
if x[0].shape[0]:
|
||||||
|
matches = torch.cat((torch.stack(x, 1), iou[x[0], x[1]][:, None]),
|
||||||
|
1).cpu().numpy() # [label, detect, iou]
|
||||||
|
if x[0].shape[0] > 1:
|
||||||
|
matches = matches[matches[:, 2].argsort()[::-1]]
|
||||||
|
matches = matches[np.unique(matches[:, 1], return_index=True)[1]]
|
||||||
|
# matches = matches[matches[:, 2].argsort()[::-1]]
|
||||||
|
matches = matches[np.unique(matches[:, 0], return_index=True)[1]]
|
||||||
|
correct[matches[:, 1].astype(int), i] = True
|
||||||
|
return torch.tensor(correct, dtype=torch.bool, device=detections.device)
|
||||||
|
|
||||||
|
def plot_val_samples(self, batch, ni):
|
||||||
|
plot_images(batch['img'],
|
||||||
|
batch['batch_idx'],
|
||||||
|
batch['cls'].squeeze(-1),
|
||||||
|
batch['bboxes'],
|
||||||
|
kpts=batch['keypoints'],
|
||||||
|
paths=batch['im_file'],
|
||||||
|
fname=self.save_dir / f'val_batch{ni}_labels.jpg',
|
||||||
|
names=self.names)
|
||||||
|
|
||||||
|
def plot_predictions(self, batch, preds, ni):
|
||||||
|
pred_kpts = torch.cat([p[:, 6:].view(-1, *self.kpt_shape)[:15] for p in preds], 0)
|
||||||
|
plot_images(batch['img'],
|
||||||
|
*output_to_target(preds, max_det=15),
|
||||||
|
kpts=pred_kpts,
|
||||||
|
paths=batch['im_file'],
|
||||||
|
fname=self.save_dir / f'val_batch{ni}_pred.jpg',
|
||||||
|
names=self.names) # pred
|
||||||
|
|
||||||
|
def pred_to_json(self, predn, filename):
|
||||||
|
stem = Path(filename).stem
|
||||||
|
image_id = int(stem) if stem.isnumeric() else stem
|
||||||
|
box = ops.xyxy2xywh(predn[:, :4]) # xywh
|
||||||
|
box[:, :2] -= box[:, 2:] / 2 # xy center to top-left corner
|
||||||
|
for p, b in zip(predn.tolist(), box.tolist()):
|
||||||
|
self.jdict.append({
|
||||||
|
'image_id': image_id,
|
||||||
|
'category_id': self.class_map[int(p[5])],
|
||||||
|
'bbox': [round(x, 3) for x in b],
|
||||||
|
'keypoints': p[6:],
|
||||||
|
'score': round(p[4], 5)})
|
||||||
|
|
||||||
|
def eval_json(self, stats):
|
||||||
|
if self.args.save_json and self.is_coco and len(self.jdict):
|
||||||
|
anno_json = self.data['path'] / 'annotations/person_keypoints_val2017.json' # annotations
|
||||||
|
pred_json = self.save_dir / 'predictions.json' # predictions
|
||||||
|
LOGGER.info(f'\nEvaluating pycocotools mAP using {pred_json} and {anno_json}...')
|
||||||
|
try: # https://github.com/cocodataset/cocoapi/blob/master/PythonAPI/pycocoEvalDemo.ipynb
|
||||||
|
check_requirements('pycocotools>=2.0.6')
|
||||||
|
from pycocotools.coco import COCO # noqa
|
||||||
|
from pycocotools.cocoeval import COCOeval # noqa
|
||||||
|
|
||||||
|
for x in anno_json, pred_json:
|
||||||
|
assert x.is_file(), f'{x} file not found'
|
||||||
|
anno = COCO(str(anno_json)) # init annotations api
|
||||||
|
pred = anno.loadRes(str(pred_json)) # init predictions api (must pass string, not Path)
|
||||||
|
for i, eval in enumerate([COCOeval(anno, pred, 'bbox'), COCOeval(anno, pred, 'keypoints')]):
|
||||||
|
if self.is_coco:
|
||||||
|
eval.params.imgIds = [int(Path(x).stem) for x in self.dataloader.dataset.im_files] # im to eval
|
||||||
|
eval.evaluate()
|
||||||
|
eval.accumulate()
|
||||||
|
eval.summarize()
|
||||||
|
idx = i * 4 + 2
|
||||||
|
stats[self.metrics.keys[idx + 1]], stats[
|
||||||
|
self.metrics.keys[idx]] = eval.stats[:2] # update mAP50-95 and mAP50
|
||||||
|
except Exception as e:
|
||||||
|
LOGGER.warning(f'pycocotools unable to run: {e}')
|
||||||
|
return stats
|
||||||
|
|
||||||
|
|
||||||
|
def val(cfg=DEFAULT_CFG, use_python=False):
|
||||||
|
model = cfg.model or 'yolov8n-pose.pt'
|
||||||
|
data = cfg.data or 'coco128-pose.yaml'
|
||||||
|
|
||||||
|
args = dict(model=model, data=data)
|
||||||
|
if use_python:
|
||||||
|
from ultralytics import YOLO
|
||||||
|
YOLO(model).val(**args)
|
||||||
|
else:
|
||||||
|
validator = PoseValidator(args=args)
|
||||||
|
validator(model=args['model'])
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
val()
|
@ -65,7 +65,7 @@ class SegmentationValidator(DetectionValidator):
|
|||||||
|
|
||||||
if npr == 0:
|
if npr == 0:
|
||||||
if nl:
|
if nl:
|
||||||
self.stats.append((correct_masks, correct_bboxes, *torch.zeros(
|
self.stats.append((correct_bboxes, correct_masks, *torch.zeros(
|
||||||
(2, 0), device=self.device), cls.squeeze(-1)))
|
(2, 0), device=self.device), cls.squeeze(-1)))
|
||||||
if self.args.plots:
|
if self.args.plots:
|
||||||
self.confusion_matrix.process_batch(detections=None, labels=cls.squeeze(-1))
|
self.confusion_matrix.process_batch(detections=None, labels=cls.squeeze(-1))
|
||||||
@ -103,7 +103,7 @@ class SegmentationValidator(DetectionValidator):
|
|||||||
self.confusion_matrix.process_batch(predn, labelsn)
|
self.confusion_matrix.process_batch(predn, labelsn)
|
||||||
|
|
||||||
# Append correct_masks, correct_boxes, pconf, pcls, tcls
|
# Append correct_masks, correct_boxes, pconf, pcls, tcls
|
||||||
self.stats.append((correct_masks, correct_bboxes, pred[:, 4], pred[:, 5], cls.squeeze(-1)))
|
self.stats.append((correct_bboxes, correct_masks, pred[:, 4], pred[:, 5], cls.squeeze(-1)))
|
||||||
|
|
||||||
pred_masks = torch.as_tensor(pred_masks, dtype=torch.uint8)
|
pred_masks = torch.as_tensor(pred_masks, dtype=torch.uint8)
|
||||||
if self.args.plots and self.batch_i < 3:
|
if self.args.plots and self.batch_i < 3:
|
||||||
@ -220,8 +220,7 @@ class SegmentationValidator(DetectionValidator):
|
|||||||
pred = anno.loadRes(str(pred_json)) # init predictions api (must pass string, not Path)
|
pred = anno.loadRes(str(pred_json)) # init predictions api (must pass string, not Path)
|
||||||
for i, eval in enumerate([COCOeval(anno, pred, 'bbox'), COCOeval(anno, pred, 'segm')]):
|
for i, eval in enumerate([COCOeval(anno, pred, 'bbox'), COCOeval(anno, pred, 'segm')]):
|
||||||
if self.is_coco:
|
if self.is_coco:
|
||||||
eval.params.imgIds = [int(Path(x).stem)
|
eval.params.imgIds = [int(Path(x).stem) for x in self.dataloader.dataset.im_files] # im to eval
|
||||||
for x in self.dataloader.dataset.im_files] # images to eval
|
|
||||||
eval.evaluate()
|
eval.evaluate()
|
||||||
eval.accumulate()
|
eval.accumulate()
|
||||||
eval.summarize()
|
eval.summarize()
|
||||||
|
Loading…
x
Reference in New Issue
Block a user