mirror of
https://github.com/THU-MIG/yolov10.git
synced 2025-05-24 06:14:55 +08:00
update
This commit is contained in:
parent
5be2ffbd13
commit
1197abeb1c
@ -3,7 +3,7 @@
|
||||
__version__ = "8.1.34"
|
||||
|
||||
from ultralytics.data.explorer.explorer import Explorer
|
||||
from ultralytics.models import RTDETR, SAM, YOLO, YOLOWorld
|
||||
from ultralytics.models import RTDETR, SAM, YOLO, YOLOWorld, YOLOv10
|
||||
from ultralytics.models.fastsam import FastSAM
|
||||
from ultralytics.models.nas import NAS
|
||||
from ultralytics.utils import ASSETS, SETTINGS as settings
|
||||
@ -23,4 +23,5 @@ __all__ = (
|
||||
"download",
|
||||
"settings",
|
||||
"Explorer",
|
||||
"YOLOv10"
|
||||
)
|
||||
|
40
ultralytics/cfg/models/v10/yolov10b.yaml
Normal file
40
ultralytics/cfg/models/v10/yolov10b.yaml
Normal file
@ -0,0 +1,40 @@
|
||||
# Parameters
|
||||
nc: 80 # number of classes
|
||||
scales: # model compound scaling constants, i.e. 'model=yolov8n.yaml' will call yolov8.yaml with scale 'n'
|
||||
# [depth, width, max_channels]
|
||||
b: [0.67, 1.00, 512]
|
||||
|
||||
# YOLOv8.0n backbone
|
||||
backbone:
|
||||
# [from, repeats, module, args]
|
||||
- [-1, 1, Conv, [64, 3, 2]] # 0-P1/2
|
||||
- [-1, 1, Conv, [128, 3, 2]] # 1-P2/4
|
||||
- [-1, 3, C2f, [128, True]]
|
||||
- [-1, 1, Conv, [256, 3, 2]] # 3-P3/8
|
||||
- [-1, 6, C2f, [256, True]]
|
||||
- [-1, 1, SCDown, [512, 3, 2]] # 5-P4/16
|
||||
- [-1, 6, C2f, [512, True]]
|
||||
- [-1, 1, SCDown, [1024, 3, 2]] # 7-P5/32
|
||||
- [-1, 3, C2fCIB, [1024, True]]
|
||||
- [-1, 1, SPPF, [1024, 5]] # 9
|
||||
- [-1, 1, PSA, [1024]] # 10
|
||||
|
||||
# YOLOv8.0n head
|
||||
head:
|
||||
- [-1, 1, nn.Upsample, [None, 2, "nearest"]]
|
||||
- [[-1, 6], 1, Concat, [1]] # cat backbone P4
|
||||
- [-1, 3, C2fCIB, [512, True]] # 13
|
||||
|
||||
- [-1, 1, nn.Upsample, [None, 2, "nearest"]]
|
||||
- [[-1, 4], 1, Concat, [1]] # cat backbone P3
|
||||
- [-1, 3, C2f, [256]] # 16 (P3/8-small)
|
||||
|
||||
- [-1, 1, Conv, [256, 3, 2]]
|
||||
- [[-1, 13], 1, Concat, [1]] # cat head P4
|
||||
- [-1, 3, C2fCIB, [512, True]] # 19 (P4/16-medium)
|
||||
|
||||
- [-1, 1, SCDown, [512, 3, 2]]
|
||||
- [[-1, 10], 1, Concat, [1]] # cat head P5
|
||||
- [-1, 3, C2fCIB, [1024, True]] # 22 (P5/32-large)
|
||||
|
||||
- [[16, 19, 22], 1, v10Detect, [nc]] # Detect(P3, P4, P5)
|
40
ultralytics/cfg/models/v10/yolov10l.yaml
Normal file
40
ultralytics/cfg/models/v10/yolov10l.yaml
Normal file
@ -0,0 +1,40 @@
|
||||
# Parameters
|
||||
nc: 80 # number of classes
|
||||
scales: # model compound scaling constants, i.e. 'model=yolov8n.yaml' will call yolov8.yaml with scale 'n'
|
||||
# [depth, width, max_channels]
|
||||
l: [1.00, 1.00, 512] # YOLOv8l summary: 365 layers, 43691520 parameters, 43691504 gradients, 165.7 GFLOPs
|
||||
|
||||
# YOLOv8.0n backbone
|
||||
backbone:
|
||||
# [from, repeats, module, args]
|
||||
- [-1, 1, Conv, [64, 3, 2]] # 0-P1/2
|
||||
- [-1, 1, Conv, [128, 3, 2]] # 1-P2/4
|
||||
- [-1, 3, C2f, [128, True]]
|
||||
- [-1, 1, Conv, [256, 3, 2]] # 3-P3/8
|
||||
- [-1, 6, C2f, [256, True]]
|
||||
- [-1, 1, SCDown, [512, 3, 2]] # 5-P4/16
|
||||
- [-1, 6, C2f, [512, True]]
|
||||
- [-1, 1, SCDown, [1024, 3, 2]] # 7-P5/32
|
||||
- [-1, 3, C2fCIB, [1024, True]]
|
||||
- [-1, 1, SPPF, [1024, 5]] # 9
|
||||
- [-1, 1, PSA, [1024]] # 10
|
||||
|
||||
# YOLOv8.0n head
|
||||
head:
|
||||
- [-1, 1, nn.Upsample, [None, 2, "nearest"]]
|
||||
- [[-1, 6], 1, Concat, [1]] # cat backbone P4
|
||||
- [-1, 3, C2fCIB, [512, True]] # 13
|
||||
|
||||
- [-1, 1, nn.Upsample, [None, 2, "nearest"]]
|
||||
- [[-1, 4], 1, Concat, [1]] # cat backbone P3
|
||||
- [-1, 3, C2f, [256]] # 16 (P3/8-small)
|
||||
|
||||
- [-1, 1, Conv, [256, 3, 2]]
|
||||
- [[-1, 13], 1, Concat, [1]] # cat head P4
|
||||
- [-1, 3, C2fCIB, [512, True]] # 19 (P4/16-medium)
|
||||
|
||||
- [-1, 1, SCDown, [512, 3, 2]]
|
||||
- [[-1, 10], 1, Concat, [1]] # cat head P5
|
||||
- [-1, 3, C2fCIB, [1024, True]] # 22 (P5/32-large)
|
||||
|
||||
- [[16, 19, 22], 1, v10Detect, [nc]] # Detect(P3, P4, P5)
|
43
ultralytics/cfg/models/v10/yolov10m.yaml
Normal file
43
ultralytics/cfg/models/v10/yolov10m.yaml
Normal file
@ -0,0 +1,43 @@
|
||||
# Ultralytics YOLO 🚀, AGPL-3.0 license
|
||||
# YOLOv8 object detection model with P3-P5 outputs. For Usage examples see https://docs.ultralytics.com/tasks/detect
|
||||
|
||||
# Parameters
|
||||
nc: 80 # number of classes
|
||||
scales: # model compound scaling constants, i.e. 'model=yolov8n.yaml' will call yolov8.yaml with scale 'n'
|
||||
# [depth, width, max_channels]
|
||||
m: [0.67, 0.75, 768] # YOLOv8m summary: 295 layers, 25902640 parameters, 25902624 gradients, 79.3 GFLOPs
|
||||
|
||||
# YOLOv8.0n backbone
|
||||
backbone:
|
||||
# [from, repeats, module, args]
|
||||
- [-1, 1, Conv, [64, 3, 2]] # 0-P1/2
|
||||
- [-1, 1, Conv, [128, 3, 2]] # 1-P2/4
|
||||
- [-1, 3, C2f, [128, True]]
|
||||
- [-1, 1, Conv, [256, 3, 2]] # 3-P3/8
|
||||
- [-1, 6, C2f, [256, True]]
|
||||
- [-1, 1, SCDown, [512, 3, 2]] # 5-P4/16
|
||||
- [-1, 6, C2f, [512, True]]
|
||||
- [-1, 1, SCDown, [1024, 3, 2]] # 7-P5/32
|
||||
- [-1, 3, C2fCIB, [1024, True]]
|
||||
- [-1, 1, SPPF, [1024, 5]] # 9
|
||||
- [-1, 1, PSA, [1024]] # 10
|
||||
|
||||
# YOLOv8.0n head
|
||||
head:
|
||||
- [-1, 1, nn.Upsample, [None, 2, "nearest"]]
|
||||
- [[-1, 6], 1, Concat, [1]] # cat backbone P4
|
||||
- [-1, 3, C2f, [512]] # 13
|
||||
|
||||
- [-1, 1, nn.Upsample, [None, 2, "nearest"]]
|
||||
- [[-1, 4], 1, Concat, [1]] # cat backbone P3
|
||||
- [-1, 3, C2f, [256]] # 16 (P3/8-small)
|
||||
|
||||
- [-1, 1, Conv, [256, 3, 2]]
|
||||
- [[-1, 13], 1, Concat, [1]] # cat head P4
|
||||
- [-1, 3, C2fCIB, [512, True]] # 19 (P4/16-medium)
|
||||
|
||||
- [-1, 1, SCDown, [512, 3, 2]]
|
||||
- [[-1, 10], 1, Concat, [1]] # cat head P5
|
||||
- [-1, 3, C2fCIB, [1024, True]] # 22 (P5/32-large)
|
||||
|
||||
- [[16, 19, 22], 1, v10Detect, [nc]] # Detect(P3, P4, P5)
|
40
ultralytics/cfg/models/v10/yolov10n.yaml
Normal file
40
ultralytics/cfg/models/v10/yolov10n.yaml
Normal file
@ -0,0 +1,40 @@
|
||||
# Parameters
|
||||
nc: 80 # number of classes
|
||||
scales: # model compound scaling constants, i.e. 'model=yolov8n.yaml' will call yolov8.yaml with scale 'n'
|
||||
# [depth, width, max_channels]
|
||||
n: [0.33, 0.25, 1024]
|
||||
|
||||
# YOLOv8.0n backbone
|
||||
backbone:
|
||||
# [from, repeats, module, args]
|
||||
- [-1, 1, Conv, [64, 3, 2]] # 0-P1/2
|
||||
- [-1, 1, Conv, [128, 3, 2]] # 1-P2/4
|
||||
- [-1, 3, C2f, [128, True]]
|
||||
- [-1, 1, Conv, [256, 3, 2]] # 3-P3/8
|
||||
- [-1, 6, C2f, [256, True]]
|
||||
- [-1, 1, SCDown, [512, 3, 2]] # 5-P4/16
|
||||
- [-1, 6, C2f, [512, True]]
|
||||
- [-1, 1, SCDown, [1024, 3, 2]] # 7-P5/32
|
||||
- [-1, 3, C2f, [1024, True]]
|
||||
- [-1, 1, SPPF, [1024, 5]] # 9
|
||||
- [-1, 1, PSA, [1024]] # 10
|
||||
|
||||
# YOLOv8.0n head
|
||||
head:
|
||||
- [-1, 1, nn.Upsample, [None, 2, "nearest"]]
|
||||
- [[-1, 6], 1, Concat, [1]] # cat backbone P4
|
||||
- [-1, 3, C2f, [512]] # 13
|
||||
|
||||
- [-1, 1, nn.Upsample, [None, 2, "nearest"]]
|
||||
- [[-1, 4], 1, Concat, [1]] # cat backbone P3
|
||||
- [-1, 3, C2f, [256]] # 16 (P3/8-small)
|
||||
|
||||
- [-1, 1, Conv, [256, 3, 2]]
|
||||
- [[-1, 13], 1, Concat, [1]] # cat head P4
|
||||
- [-1, 3, C2f, [512]] # 19 (P4/16-medium)
|
||||
|
||||
- [-1, 1, SCDown, [512, 3, 2]]
|
||||
- [[-1, 10], 1, Concat, [1]] # cat head P5
|
||||
- [-1, 3, C2fCIB, [1024, True, True]] # 22 (P5/32-large)
|
||||
|
||||
- [[16, 19, 22], 1, v10Detect, [nc]] # Detect(P3, P4, P5)
|
39
ultralytics/cfg/models/v10/yolov10s.yaml
Normal file
39
ultralytics/cfg/models/v10/yolov10s.yaml
Normal file
@ -0,0 +1,39 @@
|
||||
# Parameters
|
||||
nc: 80 # number of classes
|
||||
scales: # model compound scaling constants, i.e. 'model=yolov8n.yaml' will call yolov8.yaml with scale 'n'
|
||||
# [depth, width, max_channels]
|
||||
s: [0.33, 0.50, 1024]
|
||||
|
||||
backbone:
|
||||
# [from, repeats, module, args]
|
||||
- [-1, 1, Conv, [64, 3, 2]] # 0-P1/2
|
||||
- [-1, 1, Conv, [128, 3, 2]] # 1-P2/4
|
||||
- [-1, 3, C2f, [128, True]]
|
||||
- [-1, 1, Conv, [256, 3, 2]] # 3-P3/8
|
||||
- [-1, 6, C2f, [256, True]]
|
||||
- [-1, 1, SCDown, [512, 3, 2]] # 5-P4/16
|
||||
- [-1, 6, C2f, [512, True]]
|
||||
- [-1, 1, SCDown, [1024, 3, 2]] # 7-P5/32
|
||||
- [-1, 3, C2fCIB, [1024, True, True]]
|
||||
- [-1, 1, SPPF, [1024, 5]] # 9
|
||||
- [-1, 1, PSA, [1024]] # 10
|
||||
|
||||
# YOLOv8.0n head
|
||||
head:
|
||||
- [-1, 1, nn.Upsample, [None, 2, "nearest"]]
|
||||
- [[-1, 6], 1, Concat, [1]] # cat backbone P4
|
||||
- [-1, 3, C2f, [512]] # 13
|
||||
|
||||
- [-1, 1, nn.Upsample, [None, 2, "nearest"]]
|
||||
- [[-1, 4], 1, Concat, [1]] # cat backbone P3
|
||||
- [-1, 3, C2f, [256]] # 16 (P3/8-small)
|
||||
|
||||
- [-1, 1, Conv, [256, 3, 2]]
|
||||
- [[-1, 13], 1, Concat, [1]] # cat head P4
|
||||
- [-1, 3, C2f, [512]] # 19 (P4/16-medium)
|
||||
|
||||
- [-1, 1, SCDown, [512, 3, 2]]
|
||||
- [[-1, 10], 1, Concat, [1]] # cat head P5
|
||||
- [-1, 3, C2fCIB, [1024, True, True]] # 22 (P5/32-large)
|
||||
|
||||
- [[16, 19, 22], 1, v10Detect, [nc]] # Detect(P3, P4, P5)
|
40
ultralytics/cfg/models/v10/yolov10x.yaml
Normal file
40
ultralytics/cfg/models/v10/yolov10x.yaml
Normal file
@ -0,0 +1,40 @@
|
||||
# Parameters
|
||||
nc: 80 # number of classes
|
||||
scales: # model compound scaling constants, i.e. 'model=yolov8n.yaml' will call yolov8.yaml with scale 'n'
|
||||
# [depth, width, max_channels]
|
||||
x: [1.00, 1.25, 512]
|
||||
|
||||
# YOLOv8.0n backbone
|
||||
backbone:
|
||||
# [from, repeats, module, args]
|
||||
- [-1, 1, Conv, [64, 3, 2]] # 0-P1/2
|
||||
- [-1, 1, Conv, [128, 3, 2]] # 1-P2/4
|
||||
- [-1, 3, C2f, [128, True]]
|
||||
- [-1, 1, Conv, [256, 3, 2]] # 3-P3/8
|
||||
- [-1, 6, C2f, [256, True]]
|
||||
- [-1, 1, SCDown, [512, 3, 2]] # 5-P4/16
|
||||
- [-1, 6, C2fCIB, [512, True]]
|
||||
- [-1, 1, SCDown, [1024, 3, 2]] # 7-P5/32
|
||||
- [-1, 3, C2fCIB, [1024, True]]
|
||||
- [-1, 1, SPPF, [1024, 5]] # 9
|
||||
- [-1, 1, PSA, [1024]] # 10
|
||||
|
||||
# YOLOv8.0n head
|
||||
head:
|
||||
- [-1, 1, nn.Upsample, [None, 2, "nearest"]]
|
||||
- [[-1, 6], 1, Concat, [1]] # cat backbone P4
|
||||
- [-1, 3, C2fCIB, [512, True]] # 13
|
||||
|
||||
- [-1, 1, nn.Upsample, [None, 2, "nearest"]]
|
||||
- [[-1, 4], 1, Concat, [1]] # cat backbone P3
|
||||
- [-1, 3, C2f, [256]] # 16 (P3/8-small)
|
||||
|
||||
- [-1, 1, Conv, [256, 3, 2]]
|
||||
- [[-1, 13], 1, Concat, [1]] # cat head P4
|
||||
- [-1, 3, C2fCIB, [512, True]] # 19 (P4/16-medium)
|
||||
|
||||
- [-1, 1, SCDown, [512, 3, 2]]
|
||||
- [[-1, 10], 1, Concat, [1]] # cat head P5
|
||||
- [-1, 3, C2fCIB, [1024, True]] # 22 (P5/32-large)
|
||||
|
||||
- [[16, 19, 22], 1, v10Detect, [nc]] # Detect(P3, P4, P5)
|
@ -425,7 +425,8 @@ class BaseTrainer:
|
||||
self.ema.update_attr(self.model, include=["yaml", "nc", "args", "names", "stride", "class_weights"])
|
||||
|
||||
# Validation
|
||||
if self.args.val or final_epoch or self.stopper.possible_stop or self.stop:
|
||||
if (self.args.val and (((epoch+1) % 10 == 0) or (self.epochs - epoch) <= 10)) \
|
||||
or final_epoch or self.stopper.possible_stop or self.stop:
|
||||
self.metrics, self.fitness = self.validate()
|
||||
self.save_metrics(metrics={**self.label_loss_items(self.tloss), **self.metrics, **self.lr})
|
||||
self.stop |= self.stopper(epoch + 1, self.fitness) or final_epoch
|
||||
|
@ -196,10 +196,16 @@ class BaseValidator:
|
||||
self.check_stats(stats)
|
||||
self.speed = dict(zip(self.speed.keys(), (x.t / len(self.dataloader.dataset) * 1e3 for x in dt)))
|
||||
self.finalize_metrics()
|
||||
self.print_results()
|
||||
# self.print_results()
|
||||
self.run_callbacks("on_val_end")
|
||||
if self.training:
|
||||
model.float()
|
||||
assert(self.args.save_json and self.jdict)
|
||||
with open(str(self.save_dir / "predictions.json"), "w") as f:
|
||||
LOGGER.info(f"Saving {f.name}...")
|
||||
json.dump(self.jdict, f) # flatten and save
|
||||
stats = self.eval_json(stats) # update stats
|
||||
stats['fitness'] = stats['metrics/mAP50-95(B)']
|
||||
results = {**stats, **trainer.label_loss_items(self.loss.cpu() / len(self.dataloader), prefix="val")}
|
||||
return {k: round(float(v), 5) for k, v in results.items()} # return results as 5 decimal place floats
|
||||
else:
|
||||
|
@ -3,5 +3,6 @@
|
||||
from .rtdetr import RTDETR
|
||||
from .sam import SAM
|
||||
from .yolo import YOLO, YOLOWorld
|
||||
from .yolov10 import YOLOv10
|
||||
|
||||
__all__ = "YOLO", "RTDETR", "SAM", "YOLOWorld" # allow simpler import
|
||||
__all__ = "YOLO", "RTDETR", "SAM", "YOLOWorld", "YOLOv10" # allow simpler import
|
||||
|
5
ultralytics/models/yolov10/__init__.py
Normal file
5
ultralytics/models/yolov10/__init__.py
Normal file
@ -0,0 +1,5 @@
|
||||
from .model import YOLOv10
|
||||
from .predict import YOLOv10DetectionPredictor
|
||||
from .val import YOLOv10DetectionValidator
|
||||
|
||||
__all__ = "YOLOv10DetectionPredictor", "YOLOv10DetectionValidator", "YOLOv10"
|
18
ultralytics/models/yolov10/model.py
Normal file
18
ultralytics/models/yolov10/model.py
Normal file
@ -0,0 +1,18 @@
|
||||
from ..yolo import YOLO
|
||||
from ultralytics.nn.tasks import YOLOv10DetectionModel
|
||||
from .val import YOLOv10DetectionValidator
|
||||
from .predict import YOLOv10DetectionPredictor
|
||||
from .train import YOLOv10DetectionTrainer
|
||||
|
||||
class YOLOv10(YOLO):
|
||||
@property
|
||||
def task_map(self):
|
||||
"""Map head to model, trainer, validator, and predictor classes."""
|
||||
return {
|
||||
"detect": {
|
||||
"model": YOLOv10DetectionModel,
|
||||
"trainer": YOLOv10DetectionTrainer,
|
||||
"validator": YOLOv10DetectionValidator,
|
||||
"predictor": YOLOv10DetectionPredictor,
|
||||
},
|
||||
}
|
37
ultralytics/models/yolov10/predict.py
Normal file
37
ultralytics/models/yolov10/predict.py
Normal file
@ -0,0 +1,37 @@
|
||||
from ultralytics.models.yolo.detect import DetectionPredictor
|
||||
import torch
|
||||
from ultralytics.utils import ops
|
||||
from ultralytics.engine.results import Results
|
||||
|
||||
|
||||
class YOLOv10DetectionPredictor(DetectionPredictor):
|
||||
def postprocess(self, preds, img, orig_imgs):
|
||||
if not isinstance(preds, (list, tuple)):
|
||||
preds = [preds, None]
|
||||
|
||||
prediction = preds[0].transpose(-1, -2)
|
||||
_, _, nd = prediction.shape
|
||||
nc = nd - 4
|
||||
bboxes, scores = prediction.split((4, nd-4), dim=-1)
|
||||
bboxes = ops.xywh2xyxy(bboxes)
|
||||
|
||||
scores, index = torch.topk(scores.flatten(1), self.args.max_det, axis=-1)
|
||||
labels = index % nc
|
||||
index = torch.div(index, nc, rounding_mode='floor')
|
||||
bboxes = bboxes.gather(dim=1, index=index.unsqueeze(-1).repeat(1, 1, bboxes.shape[-1]))
|
||||
|
||||
preds = torch.cat([bboxes, scores.unsqueeze(-1), labels.unsqueeze(-1)], dim=-1)
|
||||
assert(preds.shape[0] == 1)
|
||||
mask = preds[..., 4] > self.args.conf
|
||||
preds = preds[mask].unsqueeze(0)
|
||||
|
||||
if not isinstance(orig_imgs, list): # input images are a torch.Tensor, not a list
|
||||
orig_imgs = ops.convert_torch2numpy_batch(orig_imgs)
|
||||
|
||||
results = []
|
||||
for i, pred in enumerate(preds):
|
||||
orig_img = orig_imgs[i]
|
||||
pred[:, :4] = ops.scale_boxes(img.shape[2:], pred[:, :4], orig_img.shape)
|
||||
img_path = self.batch[0][i]
|
||||
results.append(Results(orig_img, path=img_path, names=self.model.names, boxes=pred))
|
||||
return results
|
11
ultralytics/models/yolov10/train.py
Normal file
11
ultralytics/models/yolov10/train.py
Normal file
@ -0,0 +1,11 @@
|
||||
from ultralytics.models.yolo.detect import DetectionTrainer
|
||||
from .val import YOLOv10DetectionValidator
|
||||
from copy import copy
|
||||
|
||||
class YOLOv10DetectionTrainer(DetectionTrainer):
|
||||
def get_validator(self):
|
||||
"""Returns a DetectionValidator for YOLO model validation."""
|
||||
self.loss_names = "box_om", "cls_om", "dfl_om", "box_oo", "cls_oo", "dfl_oo",
|
||||
return YOLOv10DetectionValidator(
|
||||
self.test_loader, save_dir=self.save_dir, args=copy(self.args), _callbacks=self.callbacks
|
||||
)
|
29
ultralytics/models/yolov10/val.py
Normal file
29
ultralytics/models/yolov10/val.py
Normal file
@ -0,0 +1,29 @@
|
||||
from ultralytics.models.yolo.detect import DetectionValidator
|
||||
from ultralytics.utils import ops
|
||||
import torch
|
||||
|
||||
class YOLOv10DetectionValidator(DetectionValidator):
|
||||
def __init__(self, *args, **kwargs):
|
||||
super().__init__(*args, **kwargs)
|
||||
self.args.save_json |= self.is_coco
|
||||
|
||||
def postprocess(self, preds):
|
||||
if self.training:
|
||||
preds = preds["one2one"]
|
||||
|
||||
if not isinstance(preds, (list, tuple)):
|
||||
preds = [preds, None]
|
||||
|
||||
prediction = preds[0].transpose(-1, -2)
|
||||
_, _, nd = prediction.shape
|
||||
nc = nd - 4
|
||||
assert(self.nc == nc)
|
||||
bboxes, scores = prediction.split((4, nd-4), dim=-1)
|
||||
bboxes = ops.xywh2xyxy(bboxes)
|
||||
|
||||
scores, index = torch.topk(scores.flatten(1), self.args.max_det, axis=-1)
|
||||
labels = index % self.nc
|
||||
index = torch.div(index, self.nc, rounding_mode='floor')
|
||||
bboxes = bboxes.gather(dim=1, index=index.unsqueeze(-1).repeat(1, 1, bboxes.shape[-1]))
|
||||
|
||||
return torch.cat([bboxes, scores.unsqueeze(-1), labels.unsqueeze(-1)], dim=-1)
|
@ -46,6 +46,10 @@ from .block import (
|
||||
CBFuse,
|
||||
CBLinear,
|
||||
Silence,
|
||||
PSA,
|
||||
C2fCIB,
|
||||
SCDown,
|
||||
RepVGGDW
|
||||
)
|
||||
from .conv import (
|
||||
CBAM,
|
||||
@ -62,7 +66,7 @@ from .conv import (
|
||||
RepConv,
|
||||
SpatialAttention,
|
||||
)
|
||||
from .head import OBB, Classify, Detect, Pose, RTDETRDecoder, Segment, WorldDetect
|
||||
from .head import OBB, Classify, Detect, Pose, RTDETRDecoder, Segment, WorldDetect, v10Detect
|
||||
from .transformer import (
|
||||
AIFI,
|
||||
MLP,
|
||||
@ -135,4 +139,9 @@ __all__ = (
|
||||
"CBFuse",
|
||||
"CBLinear",
|
||||
"Silence",
|
||||
"PSA",
|
||||
"C2fCIB",
|
||||
"SCDown",
|
||||
"RepVGGDW",
|
||||
"v10Detect"
|
||||
)
|
||||
|
@ -7,6 +7,7 @@ import torch.nn.functional as F
|
||||
|
||||
from .conv import Conv, DWConv, GhostConv, LightConv, RepConv, autopad
|
||||
from .transformer import TransformerBlock
|
||||
from ultralytics.utils.torch_utils import fuse_conv_and_bn
|
||||
|
||||
__all__ = (
|
||||
"DFL",
|
||||
@ -696,3 +697,131 @@ class CBFuse(nn.Module):
|
||||
res = [F.interpolate(x[self.idx[i]], size=target_size, mode="nearest") for i, x in enumerate(xs[:-1])]
|
||||
out = torch.sum(torch.stack(res + xs[-1:]), dim=0)
|
||||
return out
|
||||
|
||||
|
||||
class RepVGGDW(torch.nn.Module):
|
||||
def __init__(self, ed) -> None:
|
||||
super().__init__()
|
||||
self.conv = Conv(ed, ed, 7, 1, 3, g=ed, act=False)
|
||||
self.conv1 = Conv(ed, ed, 3, 1, 1, g=ed, act=False)
|
||||
self.dim = ed
|
||||
self.act = nn.SiLU()
|
||||
|
||||
def forward(self, x):
|
||||
return self.act(self.conv(x) + self.conv1(x))
|
||||
|
||||
def forward_fuse(self, x):
|
||||
return self.act(self.conv(x))
|
||||
|
||||
@torch.no_grad()
|
||||
def fuse(self):
|
||||
conv = fuse_conv_and_bn(self.conv.conv, self.conv.bn)
|
||||
conv1 = fuse_conv_and_bn(self.conv1.conv, self.conv1.bn)
|
||||
|
||||
conv_w = conv.weight
|
||||
conv_b = conv.bias
|
||||
conv1_w = conv1.weight
|
||||
conv1_b = conv1.bias
|
||||
|
||||
conv1_w = torch.nn.functional.pad(conv1_w, [2,2,2,2])
|
||||
|
||||
final_conv_w = conv_w + conv1_w
|
||||
final_conv_b = conv_b + conv1_b
|
||||
|
||||
conv.weight.data.copy_(final_conv_w)
|
||||
conv.bias.data.copy_(final_conv_b)
|
||||
|
||||
self.conv = conv
|
||||
del self.conv1
|
||||
|
||||
class CIB(nn.Module):
|
||||
"""Standard bottleneck."""
|
||||
|
||||
def __init__(self, c1, c2, shortcut=True, e=0.5, lk=False):
|
||||
"""Initializes a bottleneck module with given input/output channels, shortcut option, group, kernels, and
|
||||
expansion.
|
||||
"""
|
||||
super().__init__()
|
||||
c_ = int(c2 * e) # hidden channels
|
||||
self.cv1 = nn.Sequential(
|
||||
Conv(c1, c1, 3, g=c1),
|
||||
Conv(c1, 2 * c_, 1),
|
||||
Conv(2 * c_, 2 * c_, 3, g=2 * c_) if not lk else RepVGGDW(2 * c_),
|
||||
Conv(2 * c_, c2, 1),
|
||||
Conv(c2, c2, 3, g=c2),
|
||||
)
|
||||
|
||||
self.add = shortcut and c1 == c2
|
||||
|
||||
def forward(self, x):
|
||||
"""'forward()' applies the YOLO FPN to input data."""
|
||||
return x + self.cv1(x) if self.add else self.cv1(x)
|
||||
|
||||
class C2fCIB(C2f):
|
||||
"""Faster Implementation of CSP Bottleneck with 2 convolutions."""
|
||||
|
||||
def __init__(self, c1, c2, n=1, shortcut=False, lk=False, g=1, e=0.5):
|
||||
"""Initialize CSP bottleneck layer with two convolutions with arguments ch_in, ch_out, number, shortcut, groups,
|
||||
expansion.
|
||||
"""
|
||||
super().__init__(c1, c2, n, shortcut, g, e)
|
||||
self.m = nn.ModuleList(CIB(self.c, self.c, shortcut, e=1.0, lk=lk) for _ in range(n))
|
||||
|
||||
|
||||
class Attention(nn.Module):
|
||||
def __init__(self, dim, num_heads=8,
|
||||
attn_ratio=0.5):
|
||||
super().__init__()
|
||||
self.num_heads = num_heads
|
||||
self.head_dim = dim // num_heads
|
||||
self.key_dim = int(self.head_dim * attn_ratio)
|
||||
self.scale = self.key_dim ** -0.5
|
||||
nh_kd = nh_kd = self.key_dim * num_heads
|
||||
h = dim + nh_kd * 2
|
||||
self.qkv = Conv(dim, h, 1, act=False)
|
||||
self.proj = Conv(dim, dim, 1, act=False)
|
||||
self.pe = Conv(dim, dim, 3, 1, g=dim, act=False)
|
||||
|
||||
def forward(self, x):
|
||||
B, _, H, W = x.shape
|
||||
N = H * W
|
||||
qkv = self.qkv(x)
|
||||
q, k, v = qkv.view(B, self.num_heads, -1, N).split([self.key_dim, self.key_dim, self.head_dim], dim=2)
|
||||
|
||||
attn = (
|
||||
(q.transpose(-2, -1) @ k) * self.scale
|
||||
)
|
||||
attn = attn.softmax(dim=-1)
|
||||
x = (v @ attn.transpose(-2, -1)).view(B, -1, H, W) + self.pe(v.reshape(B, -1, H, W))
|
||||
x = self.proj(x)
|
||||
return x
|
||||
|
||||
class PSA(nn.Module):
|
||||
|
||||
def __init__(self, c1, c2, e=0.5):
|
||||
super().__init__()
|
||||
assert(c1 == c2)
|
||||
self.c = int(c1 * e)
|
||||
self.cv1 = Conv(c1, 2 * self.c, 1, 1)
|
||||
self.cv2 = Conv(2 * self.c, c1, 1)
|
||||
|
||||
self.attn = Attention(self.c, attn_ratio=0.5, num_heads=self.c // 64)
|
||||
self.ffn = nn.Sequential(
|
||||
Conv(self.c, self.c*2, 1),
|
||||
Conv(self.c*2, self.c, 1, act=False)
|
||||
)
|
||||
|
||||
def forward(self, x):
|
||||
a, b = self.cv1(x).split((self.c, self.c), dim=1)
|
||||
b = b + self.attn(b)
|
||||
b = b + self.ffn(b)
|
||||
return self.cv2(torch.cat((a, b), 1))
|
||||
|
||||
class SCDown(nn.Module):
|
||||
def __init__(self, c1, c2, k, s):
|
||||
super().__init__()
|
||||
self.cv1 = Conv(c1, c2, 1, 1)
|
||||
self.cv2 = Conv(c2, c2, k=k, s=s, g=c2, act=False)
|
||||
|
||||
def forward(self, x):
|
||||
return self.cv2(self.cv1(x))
|
@ -12,6 +12,7 @@ from .block import DFL, Proto, ContrastiveHead, BNContrastiveHead
|
||||
from .conv import Conv
|
||||
from .transformer import MLP, DeformableTransformerDecoder, DeformableTransformerDecoderLayer
|
||||
from .utils import bias_init_with_prob, linear_init
|
||||
import copy
|
||||
|
||||
__all__ = "Detect", "Segment", "Pose", "Classify", "OBB", "RTDETRDecoder"
|
||||
|
||||
@ -40,17 +41,17 @@ class Detect(nn.Module):
|
||||
self.cv3 = nn.ModuleList(nn.Sequential(Conv(x, c3, 3), Conv(c3, c3, 3), nn.Conv2d(c3, self.nc, 1)) for x in ch)
|
||||
self.dfl = DFL(self.reg_max) if self.reg_max > 1 else nn.Identity()
|
||||
|
||||
def forward(self, x):
|
||||
"""Concatenates and returns predicted bounding boxes and class probabilities."""
|
||||
for i in range(self.nl):
|
||||
x[i] = torch.cat((self.cv2[i](x[i]), self.cv3[i](x[i])), 1)
|
||||
if self.training: # Training path
|
||||
return x
|
||||
def generate_static_anchors(self, x):
|
||||
shape = x[0].shape
|
||||
self.anchors, self.strides = (x.transpose(0, 1) for x in make_anchors(x, self.stride, 0.5))
|
||||
self.shape = shape
|
||||
|
||||
def inference(self, x):
|
||||
# Inference path
|
||||
shape = x[0].shape # BCHW
|
||||
x_cat = torch.cat([xi.view(shape[0], self.no, -1) for xi in x], 2)
|
||||
if self.dynamic or self.shape != shape:
|
||||
assert(not self.export)
|
||||
self.anchors, self.strides = (x.transpose(0, 1) for x in make_anchors(x, self.stride, 0.5))
|
||||
self.shape = shape
|
||||
|
||||
@ -74,6 +75,21 @@ class Detect(nn.Module):
|
||||
y = torch.cat((dbox, cls.sigmoid()), 1)
|
||||
return y if self.export else (y, x)
|
||||
|
||||
def forward_feat(self, x, cv2, cv3):
|
||||
y = []
|
||||
for i in range(self.nl):
|
||||
y.append(torch.cat((cv2[i](x[i]), cv3[i](x[i])), 1))
|
||||
return y
|
||||
|
||||
def forward(self, x):
|
||||
"""Concatenates and returns predicted bounding boxes and class probabilities."""
|
||||
y = self.forward_feat(x, self.cv2, self.cv3)
|
||||
|
||||
if self.training:
|
||||
return y
|
||||
|
||||
return self.inference(y)
|
||||
|
||||
def bias_init(self):
|
||||
"""Initialize Detect() biases, WARNING: requires stride availability."""
|
||||
m = self # self.model[-1] # Detect() module
|
||||
@ -480,3 +496,34 @@ class RTDETRDecoder(nn.Module):
|
||||
xavier_uniform_(self.query_pos_head.layers[1].weight)
|
||||
for layer in self.input_proj:
|
||||
xavier_uniform_(layer[0].weight)
|
||||
|
||||
class v10Detect(Detect):
|
||||
|
||||
def __init__(self, nc=80, ch=()):
|
||||
super().__init__(nc, ch)
|
||||
c3 = max(ch[0], min(self.nc, 100)) # channels
|
||||
self.cv3 = nn.ModuleList(nn.Sequential(nn.Sequential(Conv(x, x, 3, g=x), Conv(x, c3, 1)), \
|
||||
nn.Sequential(Conv(c3, c3, 3, g=c3), Conv(c3, c3, 1)), \
|
||||
nn.Conv2d(c3, self.nc, 1)) for i, x in enumerate(ch))
|
||||
|
||||
self.one2one_cv2 = copy.deepcopy(self.cv2)
|
||||
self.one2one_cv3 = copy.deepcopy(self.cv3)
|
||||
|
||||
def forward(self, x):
|
||||
one2one = self.forward_feat([xi.detach() for xi in x], self.one2one_cv2, self.one2one_cv3)
|
||||
if not self.training:
|
||||
one2one = self.inference(one2one)
|
||||
return one2one
|
||||
else:
|
||||
one2many = super().forward(x)
|
||||
return {"one2many": one2many, "one2one": one2one}
|
||||
|
||||
def bias_init(self):
|
||||
super().bias_init()
|
||||
"""Initialize Detect() biases, WARNING: requires stride availability."""
|
||||
m = self # self.model[-1] # Detect() module
|
||||
# cf = torch.bincount(torch.tensor(np.concatenate(dataset.labels, 0)[:, 0]).long(), minlength=nc) + 1
|
||||
# ncf = math.log(0.6 / (m.nc - 0.999999)) if cf is None else torch.log(cf / cf.sum()) # nominal class frequency
|
||||
for a, b, s in zip(m.one2one_cv2, m.one2one_cv3, m.stride): # from
|
||||
a[-1].bias.data[:] = 1.0 # box
|
||||
b[-1].bias.data[: m.nc] = math.log(5 / m.nc / (640 / s) ** 2) # cls (.01 objects, 80 classes, 640 img)
|
||||
|
@ -49,10 +49,15 @@ from ultralytics.nn.modules import (
|
||||
CBFuse,
|
||||
CBLinear,
|
||||
Silence,
|
||||
C2fCIB,
|
||||
PSA,
|
||||
SCDown,
|
||||
RepVGGDW,
|
||||
v10Detect
|
||||
)
|
||||
from ultralytics.utils import DEFAULT_CFG_DICT, DEFAULT_CFG_KEYS, LOGGER, colorstr, emojis, yaml_load
|
||||
from ultralytics.utils.checks import check_requirements, check_suffix, check_yaml
|
||||
from ultralytics.utils.loss import v8ClassificationLoss, v8DetectionLoss, v8OBBLoss, v8PoseLoss, v8SegmentationLoss
|
||||
from ultralytics.utils.loss import v8ClassificationLoss, v8DetectionLoss, v8OBBLoss, v8PoseLoss, v8SegmentationLoss, v10DetectLoss
|
||||
from ultralytics.utils.plotting import feature_visualization
|
||||
from ultralytics.utils.torch_utils import (
|
||||
fuse_conv_and_bn,
|
||||
@ -191,6 +196,9 @@ class BaseModel(nn.Module):
|
||||
if isinstance(m, RepConv):
|
||||
m.fuse_convs()
|
||||
m.forward = m.forward_fuse # update forward
|
||||
if isinstance(m, RepVGGDW):
|
||||
m.fuse()
|
||||
m.forward = m.forward_fuse
|
||||
self.info(verbose=verbose)
|
||||
|
||||
return self
|
||||
@ -294,6 +302,8 @@ class DetectionModel(BaseModel):
|
||||
s = 256 # 2x min stride
|
||||
m.inplace = self.inplace
|
||||
forward = lambda x: self.forward(x)[0] if isinstance(m, (Segment, Pose, OBB)) else self.forward(x)
|
||||
if isinstance(m, v10Detect):
|
||||
forward = lambda x: self.forward(x)["one2many"]
|
||||
m.stride = torch.tensor([s / x.shape[-2] for x in forward(torch.zeros(1, ch, s, s))]) # forward
|
||||
self.stride = m.stride
|
||||
m.bias_init() # only run once
|
||||
@ -627,6 +637,9 @@ class WorldModel(DetectionModel):
|
||||
return torch.unbind(torch.cat(embeddings, 1), dim=0)
|
||||
return x
|
||||
|
||||
class YOLOv10DetectionModel(DetectionModel):
|
||||
def init_criterion(self):
|
||||
return v10DetectLoss(self)
|
||||
|
||||
class Ensemble(nn.ModuleList):
|
||||
"""Ensemble of models."""
|
||||
@ -869,6 +882,9 @@ def parse_model(d, ch, verbose=True): # model_dict, input_channels(3)
|
||||
DWConvTranspose2d,
|
||||
C3x,
|
||||
RepC3,
|
||||
PSA,
|
||||
SCDown,
|
||||
C2fCIB
|
||||
}:
|
||||
c1, c2 = ch[f], args[0]
|
||||
if c2 != nc: # if c2 not equal to number of classes (i.e. for Classify() output)
|
||||
@ -880,7 +896,7 @@ def parse_model(d, ch, verbose=True): # model_dict, input_channels(3)
|
||||
) # num heads
|
||||
|
||||
args = [c1, c2, *args[1:]]
|
||||
if m in (BottleneckCSP, C1, C2, C2f, C2fAttn, C3, C3TR, C3Ghost, C3x, RepC3):
|
||||
if m in (BottleneckCSP, C1, C2, C2f, C2fAttn, C3, C3TR, C3Ghost, C3x, RepC3, C2fCIB):
|
||||
args.insert(2, n) # number of repeats
|
||||
n = 1
|
||||
elif m is AIFI:
|
||||
@ -897,7 +913,7 @@ def parse_model(d, ch, verbose=True): # model_dict, input_channels(3)
|
||||
args = [ch[f]]
|
||||
elif m is Concat:
|
||||
c2 = sum(ch[x] for x in f)
|
||||
elif m in {Detect, WorldDetect, Segment, Pose, OBB, ImagePoolingAttn}:
|
||||
elif m in {Detect, WorldDetect, Segment, Pose, OBB, ImagePoolingAttn, v10Detect}:
|
||||
args.append([ch[x] for x in f])
|
||||
if m is Segment:
|
||||
args[2] = make_divisible(min(args[2], max_channels) * width, 8)
|
||||
@ -936,7 +952,10 @@ def yaml_model_load(path):
|
||||
LOGGER.warning(f"WARNING ⚠️ Ultralytics YOLO P6 models now use -p6 suffix. Renaming {path.stem} to {new_stem}.")
|
||||
path = path.with_name(new_stem + path.suffix)
|
||||
|
||||
unified_path = re.sub(r"(\d+)([nslmx])(.+)?$", r"\1\3", str(path)) # i.e. yolov8x.yaml -> yolov8.yaml
|
||||
if "v10" not in str(path):
|
||||
unified_path = re.sub(r"(\d+)([nsblmx])(.+)?$", r"\1\3", str(path)) # i.e. yolov8x.yaml -> yolov8.yaml
|
||||
else:
|
||||
unified_path = path
|
||||
yaml_file = check_yaml(unified_path, hard=False) or check_yaml(path)
|
||||
d = yaml_load(yaml_file) # model dict
|
||||
d["scale"] = guess_model_scale(path)
|
||||
@ -959,7 +978,7 @@ def guess_model_scale(model_path):
|
||||
with contextlib.suppress(AttributeError):
|
||||
import re
|
||||
|
||||
return re.search(r"yolov\d+([nslmx])", Path(model_path).stem).group(1) # n, s, m, l, or x
|
||||
return re.search(r"yolov\d+([nsblmx])", Path(model_path).stem).group(1) # n, s, m, l, or x
|
||||
return ""
|
||||
|
||||
|
||||
@ -982,7 +1001,7 @@ def guess_model_task(model):
|
||||
m = cfg["head"][-1][-2].lower() # output module name
|
||||
if m in {"classify", "classifier", "cls", "fc"}:
|
||||
return "classify"
|
||||
if m == "detect":
|
||||
if m == "detect" or m == "v10detect":
|
||||
return "detect"
|
||||
if m == "segment":
|
||||
return "segment"
|
||||
@ -1014,7 +1033,7 @@ def guess_model_task(model):
|
||||
return "pose"
|
||||
elif isinstance(m, OBB):
|
||||
return "obb"
|
||||
elif isinstance(m, (Detect, WorldDetect)):
|
||||
elif isinstance(m, (Detect, WorldDetect, v10Detect)):
|
||||
return "detect"
|
||||
|
||||
# Guess from model filename
|
||||
|
@ -147,7 +147,7 @@ class KeypointLoss(nn.Module):
|
||||
class v8DetectionLoss:
|
||||
"""Criterion class for computing training losses."""
|
||||
|
||||
def __init__(self, model): # model must be de-paralleled
|
||||
def __init__(self, model, tal_topk=10): # model must be de-paralleled
|
||||
"""Initializes v8DetectionLoss with the model, defining model-related properties and BCE loss function."""
|
||||
device = next(model.parameters()).device # get model device
|
||||
h = model.args # hyperparameters
|
||||
@ -163,7 +163,7 @@ class v8DetectionLoss:
|
||||
|
||||
self.use_dfl = m.reg_max > 1
|
||||
|
||||
self.assigner = TaskAlignedAssigner(topk=10, num_classes=self.nc, alpha=0.5, beta=6.0)
|
||||
self.assigner = TaskAlignedAssigner(topk=tal_topk, num_classes=self.nc, alpha=0.5, beta=6.0)
|
||||
self.bbox_loss = BboxLoss(m.reg_max - 1, use_dfl=self.use_dfl).to(device)
|
||||
self.proj = torch.arange(m.reg_max, dtype=torch.float, device=device)
|
||||
|
||||
@ -713,3 +713,15 @@ class v8OBBLoss(v8DetectionLoss):
|
||||
b, a, c = pred_dist.shape # batch, anchors, channels
|
||||
pred_dist = pred_dist.view(b, a, 4, c // 4).softmax(3).matmul(self.proj.type(pred_dist.dtype))
|
||||
return torch.cat((dist2rbox(pred_dist, pred_angle, anchor_points), pred_angle), dim=-1)
|
||||
|
||||
class v10DetectLoss:
|
||||
def __init__(self, model):
|
||||
self.one2many = v8DetectionLoss(model, tal_topk=10)
|
||||
self.one2one = v8DetectionLoss(model, tal_topk=1)
|
||||
|
||||
def __call__(self, preds, batch):
|
||||
one2many = preds["one2many"]
|
||||
loss_one2many = self.one2many(one2many, batch)
|
||||
one2one = preds["one2one"]
|
||||
loss_one2one = self.one2one(one2one, batch)
|
||||
return loss_one2many[0] + loss_one2one[0], torch.cat((loss_one2many[1], loss_one2one[1]))
|
||||
|
@ -308,7 +308,8 @@ def make_anchors(feats, strides, grid_cell_offset=0.5):
|
||||
|
||||
def dist2bbox(distance, anchor_points, xywh=True, dim=-1):
|
||||
"""Transform distance(ltrb) to box(xywh or xyxy)."""
|
||||
lt, rb = distance.chunk(2, dim)
|
||||
assert(distance.shape[dim] == 4)
|
||||
lt, rb = distance.split([2, 2], dim)
|
||||
x1y1 = anchor_points - lt
|
||||
x2y2 = anchor_points + rb
|
||||
if xywh:
|
||||
|
@ -310,10 +310,11 @@ def get_flops(model, imgsz=640):
|
||||
imgsz = [imgsz, imgsz] # expand if int/float
|
||||
try:
|
||||
# Use stride size for input tensor
|
||||
stride = max(int(model.stride.max()), 32) if hasattr(model, "stride") else 32 # max stride
|
||||
im = torch.empty((1, p.shape[1], stride, stride), device=p.device) # input image in BCHW format
|
||||
flops = thop.profile(deepcopy(model), inputs=[im], verbose=False)[0] / 1e9 * 2 # stride GFLOPs
|
||||
return flops * imgsz[0] / stride * imgsz[1] / stride # imgsz GFLOPs
|
||||
# stride = max(int(model.stride.max()), 32) if hasattr(model, "stride") else 32 # max stride
|
||||
# im = torch.empty((1, p.shape[1], stride, stride), device=p.device) # input image in BCHW format
|
||||
# flops = thop.profile(deepcopy(model), inputs=[im], verbose=False)[0] / 1e9 * 2 # stride GFLOPs
|
||||
# return flops * imgsz[0] / stride * imgsz[1] / stride # imgsz GFLOPs
|
||||
raise Exception
|
||||
except Exception:
|
||||
# Use actual image size for input tensor (i.e. required for RTDETR models)
|
||||
im = torch.empty((1, p.shape[1], *imgsz), device=p.device) # input image in BCHW format
|
||||
|
Loading…
x
Reference in New Issue
Block a user