mirror of
https://github.com/THU-MIG/yolov10.git
synced 2025-05-23 13:34:23 +08:00
default.yaml
type comments (#3237)
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
This commit is contained in:
parent
21df296425
commit
e78fb683f4
@ -2,8 +2,8 @@
|
||||
# YOLOv6 object detection model with P3-P5 outputs. For Usage examples see https://docs.ultralytics.com/models/yolov6
|
||||
|
||||
# Parameters
|
||||
act: nn.ReLU()
|
||||
nc: 80 # number of classes
|
||||
activation: nn.ReLU() # (optional) model default activation function
|
||||
scales: # model compound scaling constants, i.e. 'model=yolov6n.yaml' will call yolov8.yaml with scale 'n'
|
||||
# [depth, width, max_channels]
|
||||
n: [0.33, 0.25, 1024]
|
||||
|
@ -422,9 +422,7 @@ class RTDETRDetectionModel(DetectionModel):
|
||||
# NOTE: preprocess gt_bbox and gt_labels to list.
|
||||
bs = len(img)
|
||||
batch_idx = batch['batch_idx']
|
||||
gt_groups = []
|
||||
for i in range(bs):
|
||||
gt_groups.append((batch_idx == i).sum().item())
|
||||
gt_groups = [(batch_idx == i).sum().item() for i in range(bs)]
|
||||
targets = {
|
||||
'cls': batch['cls'].to(img.device, dtype=torch.long).view(-1),
|
||||
'bboxes': batch['bboxes'].to(device=img.device),
|
||||
@ -606,7 +604,7 @@ def parse_model(d, ch, verbose=True): # model_dict, input_channels(3)
|
||||
|
||||
# Args
|
||||
max_channels = float('inf')
|
||||
nc, act, scales = (d.get(x) for x in ('nc', 'act', 'scales'))
|
||||
nc, act, scales = (d.get(x) for x in ('nc', 'activation', 'scales'))
|
||||
depth, width, kpt_shape = (d.get(x, 1.0) for x in ('depth_multiple', 'width_multiple', 'kpt_shape'))
|
||||
if scales:
|
||||
scale = d.get('scale')
|
||||
|
@ -22,20 +22,15 @@ class MaskDecoder(nn.Module):
|
||||
iou_head_hidden_dim: int = 256,
|
||||
) -> None:
|
||||
"""
|
||||
Predicts masks given an image and prompt embeddings, using a
|
||||
transformer architecture.
|
||||
Predicts masks given an image and prompt embeddings, using a transformer architecture.
|
||||
|
||||
Arguments:
|
||||
transformer_dim (int): the channel dimension of the transformer
|
||||
transformer (nn.Module): the transformer used to predict masks
|
||||
num_multimask_outputs (int): the number of masks to predict
|
||||
when disambiguating masks
|
||||
activation (nn.Module): the type of activation to use when
|
||||
upscaling masks
|
||||
iou_head_depth (int): the depth of the MLP used to predict
|
||||
mask quality
|
||||
iou_head_hidden_dim (int): the hidden dimension of the MLP
|
||||
used to predict mask quality
|
||||
transformer_dim (int): the channel dimension of the transformer module
|
||||
transformer (nn.Module): the transformer used to predict masks
|
||||
num_multimask_outputs (int): the number of masks to predict when disambiguating masks
|
||||
activation (nn.Module): the type of activation to use when upscaling masks
|
||||
iou_head_depth (int): the depth of the MLP used to predict mask quality
|
||||
iou_head_hidden_dim (int): the hidden dimension of the MLP used to predict mask quality
|
||||
"""
|
||||
super().__init__()
|
||||
self.transformer_dim = transformer_dim
|
||||
@ -71,16 +66,15 @@ class MaskDecoder(nn.Module):
|
||||
Predict masks given image and prompt embeddings.
|
||||
|
||||
Arguments:
|
||||
image_embeddings (torch.Tensor): the embeddings from the image encoder
|
||||
image_pe (torch.Tensor): positional encoding with the shape of image_embeddings
|
||||
sparse_prompt_embeddings (torch.Tensor): the embeddings of the points and boxes
|
||||
dense_prompt_embeddings (torch.Tensor): the embeddings of the mask inputs
|
||||
multimask_output (bool): Whether to return multiple masks or a single
|
||||
mask.
|
||||
image_embeddings (torch.Tensor): the embeddings from the image encoder
|
||||
image_pe (torch.Tensor): positional encoding with the shape of image_embeddings
|
||||
sparse_prompt_embeddings (torch.Tensor): the embeddings of the points and boxes
|
||||
dense_prompt_embeddings (torch.Tensor): the embeddings of the mask inputs
|
||||
multimask_output (bool): Whether to return multiple masks or a single mask.
|
||||
|
||||
Returns:
|
||||
torch.Tensor: batched predicted masks
|
||||
torch.Tensor: batched predictions of mask quality
|
||||
torch.Tensor: batched predicted masks
|
||||
torch.Tensor: batched predictions of mask quality
|
||||
"""
|
||||
masks, iou_pred = self.predict_masks(
|
||||
image_embeddings=image_embeddings,
|
||||
@ -136,9 +130,11 @@ class MaskDecoder(nn.Module):
|
||||
return masks, iou_pred
|
||||
|
||||
|
||||
# Lightly adapted from
|
||||
# https://github.com/facebookresearch/MaskFormer/blob/main/mask_former/modeling/transformer/transformer_predictor.py # noqa
|
||||
class MLP(nn.Module):
|
||||
"""
|
||||
Lightly adapted from
|
||||
https://github.com/facebookresearch/MaskFormer/blob/main/mask_former/modeling/transformer/transformer_predictor.py
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
|
@ -249,7 +249,7 @@ def get_cdn_group(batch,
|
||||
attn_mask[max_nums * 2 * i:max_nums * 2 * (i + 1), max_nums * 2 * (i + 1):num_dn] = True
|
||||
attn_mask[max_nums * 2 * i:max_nums * 2 * (i + 1), :max_nums * 2 * i] = True
|
||||
dn_meta = {
|
||||
'dn_pos_idx': [p.reshape(-1) for p in pos_idx.cpu().split([n for n in gt_groups], dim=1)],
|
||||
'dn_pos_idx': [p.reshape(-1) for p in pos_idx.cpu().split(list(gt_groups), dim=1)],
|
||||
'dn_num_group': num_group,
|
||||
'dn_num_split': [num_dn, num_queries]}
|
||||
|
||||
@ -258,5 +258,6 @@ def get_cdn_group(batch,
|
||||
|
||||
|
||||
def inverse_sigmoid(x, eps=1e-6):
|
||||
"""Inverse sigmoid function."""
|
||||
x = x.clip(min=0., max=1.)
|
||||
return torch.log(x / (1 - x + eps) + eps)
|
||||
|
@ -1,117 +1,117 @@
|
||||
# Ultralytics YOLO 🚀, AGPL-3.0 license
|
||||
# Default training settings and hyperparameters for medium-augmentation COCO training
|
||||
|
||||
task: detect # YOLO task, i.e. detect, segment, classify, pose
|
||||
mode: train # YOLO mode, i.e. train, val, predict, export, track, benchmark
|
||||
task: detect # (str) YOLO task, i.e. detect, segment, classify, pose
|
||||
mode: train # (str) YOLO mode, i.e. train, val, predict, export, track, benchmark
|
||||
|
||||
# Train settings -------------------------------------------------------------------------------------------------------
|
||||
model: # path to model file, i.e. yolov8n.pt, yolov8n.yaml
|
||||
data: # path to data file, i.e. coco128.yaml
|
||||
epochs: 100 # number of epochs to train for
|
||||
patience: 50 # epochs to wait for no observable improvement for early stopping of training
|
||||
batch: 16 # number of images per batch (-1 for AutoBatch)
|
||||
imgsz: 640 # size of input images as integer or w,h
|
||||
save: True # save train checkpoints and predict results
|
||||
save_period: -1 # Save checkpoint every x epochs (disabled if < 1)
|
||||
cache: False # True/ram, disk or False. Use cache for data loading
|
||||
device: # device to run on, i.e. cuda device=0 or device=0,1,2,3 or device=cpu
|
||||
workers: 8 # number of worker threads for data loading (per RANK if DDP)
|
||||
project: # project name
|
||||
name: # experiment name, results saved to 'project/name' directory
|
||||
exist_ok: False # whether to overwrite existing experiment
|
||||
pretrained: False # whether to use a pretrained model
|
||||
optimizer: auto # optimizer to use, choices=[SGD, Adam, Adamax, AdamW, NAdam, RAdam, RMSProp, auto]
|
||||
verbose: True # whether to print verbose output
|
||||
seed: 0 # random seed for reproducibility
|
||||
deterministic: True # whether to enable deterministic mode
|
||||
single_cls: False # train multi-class data as single-class
|
||||
rect: False # rectangular training if mode='train' or rectangular validation if mode='val'
|
||||
cos_lr: False # use cosine learning rate scheduler
|
||||
model: # (str, optional) path to model file, i.e. yolov8n.pt, yolov8n.yaml
|
||||
data: # (str, optional) path to data file, i.e. coco128.yaml
|
||||
epochs: 100 # (int) number of epochs to train for
|
||||
patience: 50 # (int) epochs to wait for no observable improvement for early stopping of training
|
||||
batch: 16 # (int) number of images per batch (-1 for AutoBatch)
|
||||
imgsz: 640 # (int) size of input images as integer or w,h
|
||||
save: True # (bool) save train checkpoints and predict results
|
||||
save_period: -1 # (int) Save checkpoint every x epochs (disabled if < 1)
|
||||
cache: False # (bool) True/ram, disk or False. Use cache for data loading
|
||||
device: # (int | str | list, optional) device to run on, i.e. cuda device=0 or device=0,1,2,3 or device=cpu
|
||||
workers: 8 # (int) number of worker threads for data loading (per RANK if DDP)
|
||||
project: # (str, optional) project name
|
||||
name: # (str, optional) experiment name, results saved to 'project/name' directory
|
||||
exist_ok: False # (bool) whether to overwrite existing experiment
|
||||
pretrained: False # (bool) whether to use a pretrained model
|
||||
optimizer: auto # (str) optimizer to use, choices=[SGD, Adam, Adamax, AdamW, NAdam, RAdam, RMSProp, auto]
|
||||
verbose: True # (bool) whether to print verbose output
|
||||
seed: 0 # (int) random seed for reproducibility
|
||||
deterministic: True # (bool) whether to enable deterministic mode
|
||||
single_cls: False # (bool) train multi-class data as single-class
|
||||
rect: False # (bool) rectangular training if mode='train' or rectangular validation if mode='val'
|
||||
cos_lr: False # (bool) use cosine learning rate scheduler
|
||||
close_mosaic: 0 # (int) disable mosaic augmentation for final epochs
|
||||
resume: False # resume training from last checkpoint
|
||||
amp: True # Automatic Mixed Precision (AMP) training, choices=[True, False], True runs AMP check
|
||||
fraction: 1.0 # dataset fraction to train on (default is 1.0, all images in train set)
|
||||
profile: False # profile ONNX and TensorRT speeds during training for loggers
|
||||
resume: False # (bool) resume training from last checkpoint
|
||||
amp: True # (bool) Automatic Mixed Precision (AMP) training, choices=[True, False], True runs AMP check
|
||||
fraction: 1.0 # (float) dataset fraction to train on (default is 1.0, all images in train set)
|
||||
profile: False # (bool) profile ONNX and TensorRT speeds during training for loggers
|
||||
# Segmentation
|
||||
overlap_mask: True # masks should overlap during training (segment train only)
|
||||
mask_ratio: 4 # mask downsample ratio (segment train only)
|
||||
overlap_mask: True # (bool) masks should overlap during training (segment train only)
|
||||
mask_ratio: 4 # (int) mask downsample ratio (segment train only)
|
||||
# Classification
|
||||
dropout: 0.0 # use dropout regularization (classify train only)
|
||||
dropout: 0.0 # (float) use dropout regularization (classify train only)
|
||||
|
||||
# Val/Test settings ----------------------------------------------------------------------------------------------------
|
||||
val: True # validate/test during training
|
||||
split: val # dataset split to use for validation, i.e. 'val', 'test' or 'train'
|
||||
save_json: False # save results to JSON file
|
||||
save_hybrid: False # save hybrid version of labels (labels + additional predictions)
|
||||
conf: # object confidence threshold for detection (default 0.25 predict, 0.001 val)
|
||||
iou: 0.7 # intersection over union (IoU) threshold for NMS
|
||||
max_det: 300 # maximum number of detections per image
|
||||
half: False # use half precision (FP16)
|
||||
dnn: False # use OpenCV DNN for ONNX inference
|
||||
plots: True # save plots during train/val
|
||||
val: True # (bool) validate/test during training
|
||||
split: val # (str) dataset split to use for validation, i.e. 'val', 'test' or 'train'
|
||||
save_json: False # (bool) save results to JSON file
|
||||
save_hybrid: False # (bool) save hybrid version of labels (labels + additional predictions)
|
||||
conf: # (float, optional) object confidence threshold for detection (default 0.25 predict, 0.001 val)
|
||||
iou: 0.7 # (float) intersection over union (IoU) threshold for NMS
|
||||
max_det: 300 # (int) maximum number of detections per image
|
||||
half: False # (bool) use half precision (FP16)
|
||||
dnn: False # (bool) use OpenCV DNN for ONNX inference
|
||||
plots: True # (bool) save plots during train/val
|
||||
|
||||
# Prediction settings --------------------------------------------------------------------------------------------------
|
||||
source: # source directory for images or videos
|
||||
show: False # show results if possible
|
||||
save_txt: False # save results as .txt file
|
||||
save_conf: False # save results with confidence scores
|
||||
save_crop: False # save cropped images with results
|
||||
show_labels: True # show object labels in plots
|
||||
show_conf: True # show object confidence scores in plots
|
||||
vid_stride: 1 # video frame-rate stride
|
||||
line_width: # line width of the bounding boxes
|
||||
visualize: False # visualize model features
|
||||
augment: False # apply image augmentation to prediction sources
|
||||
agnostic_nms: False # class-agnostic NMS
|
||||
classes: # filter results by class, i.e. class=0, or class=[0,2,3]
|
||||
retina_masks: False # use high-resolution segmentation masks
|
||||
boxes: True # Show boxes in segmentation predictions
|
||||
source: # (str, optional) source directory for images or videos
|
||||
show: False # (bool) show results if possible
|
||||
save_txt: False # (bool) save results as .txt file
|
||||
save_conf: False # (bool) save results with confidence scores
|
||||
save_crop: False # (bool) save cropped images with results
|
||||
show_labels: True # (bool) show object labels in plots
|
||||
show_conf: True # (bool) show object confidence scores in plots
|
||||
vid_stride: 1 # (int) video frame-rate stride
|
||||
line_width: # (int, optional) line width of the bounding boxes, auto if missing
|
||||
visualize: False # (bool) visualize model features
|
||||
augment: False # (bool) apply image augmentation to prediction sources
|
||||
agnostic_nms: False # (bool) class-agnostic NMS
|
||||
classes: # (int | list[int], optional) filter results by class, i.e. class=0, or class=[0,2,3]
|
||||
retina_masks: False # (bool) use high-resolution segmentation masks
|
||||
boxes: True # (bool) Show boxes in segmentation predictions
|
||||
|
||||
# Export settings ------------------------------------------------------------------------------------------------------
|
||||
format: torchscript # format to export to
|
||||
keras: False # use Keras
|
||||
optimize: False # TorchScript: optimize for mobile
|
||||
int8: False # CoreML/TF INT8 quantization
|
||||
dynamic: False # ONNX/TF/TensorRT: dynamic axes
|
||||
simplify: False # ONNX: simplify model
|
||||
opset: # ONNX: opset version (optional)
|
||||
workspace: 4 # TensorRT: workspace size (GB)
|
||||
nms: False # CoreML: add NMS
|
||||
format: torchscript # (str) format to export to, choices at https://docs.ultralytics.com/modes/export/#export-formats
|
||||
keras: False # (bool) use Kera=s
|
||||
optimize: False # (bool) TorchScript: optimize for mobile
|
||||
int8: False # (bool) CoreML/TF INT8 quantization
|
||||
dynamic: False # (bool) ONNX/TF/TensorRT: dynamic axes
|
||||
simplify: False # (bool) ONNX: simplify model
|
||||
opset: # (int, optional) ONNX: opset version
|
||||
workspace: 4 # (int) TensorRT: workspace size (GB)
|
||||
nms: False # (bool) CoreML: add NMS
|
||||
|
||||
# Hyperparameters ------------------------------------------------------------------------------------------------------
|
||||
lr0: 0.01 # initial learning rate (i.e. SGD=1E-2, Adam=1E-3)
|
||||
lrf: 0.01 # final learning rate (lr0 * lrf)
|
||||
momentum: 0.937 # SGD momentum/Adam beta1
|
||||
weight_decay: 0.0005 # optimizer weight decay 5e-4
|
||||
warmup_epochs: 3.0 # warmup epochs (fractions ok)
|
||||
warmup_momentum: 0.8 # warmup initial momentum
|
||||
warmup_bias_lr: 0.1 # warmup initial bias lr
|
||||
box: 7.5 # box loss gain
|
||||
cls: 0.5 # cls loss gain (scale with pixels)
|
||||
dfl: 1.5 # dfl loss gain
|
||||
pose: 12.0 # pose loss gain
|
||||
kobj: 1.0 # keypoint obj loss gain
|
||||
label_smoothing: 0.0 # label smoothing (fraction)
|
||||
nbs: 64 # nominal batch size
|
||||
hsv_h: 0.015 # image HSV-Hue augmentation (fraction)
|
||||
hsv_s: 0.7 # image HSV-Saturation augmentation (fraction)
|
||||
hsv_v: 0.4 # image HSV-Value augmentation (fraction)
|
||||
degrees: 0.0 # image rotation (+/- deg)
|
||||
translate: 0.1 # image translation (+/- fraction)
|
||||
scale: 0.5 # image scale (+/- gain)
|
||||
shear: 0.0 # image shear (+/- deg)
|
||||
perspective: 0.0 # image perspective (+/- fraction), range 0-0.001
|
||||
flipud: 0.0 # image flip up-down (probability)
|
||||
fliplr: 0.5 # image flip left-right (probability)
|
||||
mosaic: 1.0 # image mosaic (probability)
|
||||
mixup: 0.0 # image mixup (probability)
|
||||
copy_paste: 0.0 # segment copy-paste (probability)
|
||||
lr0: 0.01 # (float) initial learning rate (i.e. SGD=1E-2, Adam=1E-3)
|
||||
lrf: 0.01 # (float) final learning rate (lr0 * lrf)
|
||||
momentum: 0.937 # (float) SGD momentum/Adam beta1
|
||||
weight_decay: 0.0005 # (float) optimizer weight decay 5e-4
|
||||
warmup_epochs: 3.0 # (float) warmup epochs (fractions ok)
|
||||
warmup_momentum: 0.8 # (float) warmup initial momentum
|
||||
warmup_bias_lr: 0.1 # (float) warmup initial bias lr
|
||||
box: 7.5 # (float) box loss gain
|
||||
cls: 0.5 # (float) cls loss gain (scale with pixels)
|
||||
dfl: 1.5 # (float) dfl loss gain
|
||||
pose: 12.0 # (float) pose loss gain
|
||||
kobj: 1.0 # (float) keypoint obj loss gain
|
||||
label_smoothing: 0.0 # (float) label smoothing (fraction)
|
||||
nbs: 64 # (int) nominal batch size
|
||||
hsv_h: 0.015 # (float) image HSV-Hue augmentation (fraction)
|
||||
hsv_s: 0.7 # (float) image HSV-Saturation augmentation (fraction)
|
||||
hsv_v: 0.4 # (float) image HSV-Value augmentation (fraction)
|
||||
degrees: 0.0 # (float) image rotation (+/- deg)
|
||||
translate: 0.1 # (float) image translation (+/- fraction)
|
||||
scale: 0.5 # (float) image scale (+/- gain)
|
||||
shear: 0.0 # (float) image shear (+/- deg)
|
||||
perspective: 0.0 # (float) image perspective (+/- fraction), range 0-0.001
|
||||
flipud: 0.0 # (float) image flip up-down (probability)
|
||||
fliplr: 0.5 # (float) image flip left-right (probability)
|
||||
mosaic: 1.0 # (float) image mosaic (probability)
|
||||
mixup: 0.0 # (float) image mixup (probability)
|
||||
copy_paste: 0.0 # (float) segment copy-paste (probability)
|
||||
|
||||
# Custom config.yaml ---------------------------------------------------------------------------------------------------
|
||||
cfg: # for overriding defaults.yaml
|
||||
cfg: # (str, optional) for overriding defaults.yaml
|
||||
|
||||
# Debug, do not modify -------------------------------------------------------------------------------------------------
|
||||
v5loader: False # use legacy YOLOv5 dataloader
|
||||
v5loader: False # (bool) use legacy YOLOv5 dataloader (deprecated)
|
||||
|
||||
# Tracker settings ------------------------------------------------------------------------------------------------------
|
||||
tracker: botsort.yaml # tracker type, ['botsort.yaml', 'bytetrack.yaml']
|
||||
tracker: botsort.yaml # (str) tracker type, choices=[botsort.yaml, bytetrack.yaml]
|
||||
|
Loading…
x
Reference in New Issue
Block a user