mirror of
https://github.com/THU-MIG/yolov10.git
synced 2025-10-24 17:55:39 +08:00
ultralytics 8.0.80
single-line docstring fixes (#2060)
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
This commit is contained in:
parent
31db8ed163
commit
5bce1c3021
@ -1,6 +1,6 @@
|
||||
# Ultralytics YOLO 🚀, AGPL-3.0 license
|
||||
|
||||
__version__ = '8.0.79'
|
||||
__version__ = '8.0.80'
|
||||
|
||||
from ultralytics.hub import start
|
||||
from ultralytics.yolo.engine.model import YOLO
|
||||
|
@ -54,7 +54,7 @@ model.train()""")
|
||||
|
||||
|
||||
def reset_model(model_id=''):
|
||||
# Reset a trained model to an untrained state
|
||||
"""Reset a trained model to an untrained state."""
|
||||
r = requests.post('https://api.ultralytics.com/model-reset', json={'apiKey': Auth().api_key, 'modelId': model_id})
|
||||
if r.status_code == 200:
|
||||
LOGGER.info(f'{PREFIX}Model reset successfully')
|
||||
@ -63,13 +63,13 @@ def reset_model(model_id=''):
|
||||
|
||||
|
||||
def export_fmts_hub():
|
||||
# Returns a list of HUB-supported export formats
|
||||
"""Returns a list of HUB-supported export formats."""
|
||||
from ultralytics.yolo.engine.exporter import export_formats
|
||||
return list(export_formats()['Argument'][1:]) + ['ultralytics_tflite', 'ultralytics_coreml']
|
||||
|
||||
|
||||
def export_model(model_id='', format='torchscript'):
|
||||
# Export a model to all formats
|
||||
"""Export a model to all formats."""
|
||||
assert format in export_fmts_hub(), f"Unsupported export format '{format}', valid formats are {export_fmts_hub()}"
|
||||
r = requests.post('https://api.ultralytics.com/export',
|
||||
json={
|
||||
@ -81,7 +81,7 @@ def export_model(model_id='', format='torchscript'):
|
||||
|
||||
|
||||
def get_export(model_id='', format='torchscript'):
|
||||
# Get an exported model dictionary with download URL
|
||||
"""Get an exported model dictionary with download URL."""
|
||||
assert format in export_fmts_hub(), f"Unsupported export format '{format}', valid formats are {export_fmts_hub()}"
|
||||
r = requests.post('https://api.ultralytics.com/get-export',
|
||||
json={
|
||||
|
@ -124,7 +124,7 @@ class HUBTrainingSession:
|
||||
'device': data['device'],
|
||||
'cache': data['cache'],
|
||||
'data': data['data']}
|
||||
self.model_file = data.get('cfg', data['weights'])
|
||||
self.model_file = data.get('cfg') or data.get('weights') # cfg for pretrained=False
|
||||
self.model_file = checks.check_yolov5u_filename(self.model_file, verbose=False) # YOLOv5->YOLOv5u
|
||||
elif data['status'] == 'training': # existing model to resume training
|
||||
self.train_args = {'data': data['data'], 'resume': True}
|
||||
|
@ -21,11 +21,11 @@ from ultralytics.yolo.utils.ops import xywh2xyxy
|
||||
|
||||
|
||||
def check_class_names(names):
|
||||
# Check class names. Map imagenet class codes to human-readable names if required. Convert lists to dicts.
|
||||
"""Check class names. Map imagenet class codes to human-readable names if required. Convert lists to dicts."""
|
||||
if isinstance(names, list): # names is a list
|
||||
names = dict(enumerate(names)) # convert to dict
|
||||
if isinstance(names, dict):
|
||||
# convert 1) string keys to int, i.e. '0' to 0, and non-string values to strings, i.e. True to 'True'
|
||||
# Convert 1) string keys to int, i.e. '0' to 0, and non-string values to strings, i.e. True to 'True'
|
||||
names = {int(k): str(v) for k, v in names.items()}
|
||||
n = len(names)
|
||||
if max(names.keys()) >= n:
|
||||
@ -229,7 +229,7 @@ class AutoBackend(nn.Module):
|
||||
interpreter.allocate_tensors() # allocate
|
||||
input_details = interpreter.get_input_details() # inputs
|
||||
output_details = interpreter.get_output_details() # outputs
|
||||
# load metadata
|
||||
# Load metadata
|
||||
with contextlib.suppress(zipfile.BadZipFile):
|
||||
with zipfile.ZipFile(w, 'r') as model:
|
||||
meta_file = model.namelist()[0]
|
||||
|
@ -24,7 +24,7 @@ from ultralytics.yolo.utils.torch_utils import copy_attr, smart_inference_mode
|
||||
|
||||
|
||||
class AutoShape(nn.Module):
|
||||
# YOLOv8 input-robust model wrapper for passing cv2/np/PIL/torch inputs. Includes preprocessing, inference and NMS
|
||||
"""YOLOv8 input-robust model wrapper for passing cv2/np/PIL/torch inputs. Includes preprocessing, inference and NMS."""
|
||||
conf = 0.25 # NMS confidence threshold
|
||||
iou = 0.45 # NMS IoU threshold
|
||||
agnostic = False # NMS class-agnostic
|
||||
@ -47,7 +47,7 @@ class AutoShape(nn.Module):
|
||||
m.export = True # do not output loss values
|
||||
|
||||
def _apply(self, fn):
|
||||
# Apply to(), cpu(), cuda(), half() to model tensors that are not parameters or registered buffers
|
||||
"""Apply to(), cpu(), cuda(), half() to model tensors that are not parameters or registered buffers."""
|
||||
self = super()._apply(fn)
|
||||
if self.pt:
|
||||
m = self.model.model.model[-1] if self.dmb else self.model.model[-1] # Detect()
|
||||
@ -59,7 +59,7 @@ class AutoShape(nn.Module):
|
||||
|
||||
@smart_inference_mode()
|
||||
def forward(self, ims, size=640, augment=False, profile=False):
|
||||
# Inference from various sources. For size(height=640, width=1280), RGB images example inputs are:
|
||||
"""Inference from various sources. For size(height=640, width=1280), RGB images example inputs are:."""
|
||||
# file: ims = 'data/images/zidane.jpg' # str or PosixPath
|
||||
# URI: = 'https://ultralytics.com/images/zidane.jpg'
|
||||
# OpenCV: = cv2.imread('image.jpg')[:,:,::-1] # HWC BGR to RGB x(640,1280,3)
|
||||
@ -202,7 +202,7 @@ class Detections:
|
||||
return self.ims
|
||||
|
||||
def pandas(self):
|
||||
# return detections as pandas DataFrames, i.e. print(results.pandas().xyxy[0])
|
||||
"""Return detections as pandas DataFrames, i.e. print(results.pandas().xyxy[0])."""
|
||||
import pandas
|
||||
new = copy(self) # return copy
|
||||
ca = 'xmin', 'ymin', 'xmax', 'ymax', 'confidence', 'class', 'name' # xyxy columns
|
||||
@ -213,7 +213,7 @@ class Detections:
|
||||
return new
|
||||
|
||||
def tolist(self):
|
||||
# return a list of Detections objects, i.e. 'for result in results.tolist():'
|
||||
"""Return a list of Detections objects, i.e. 'for result in results.tolist():'."""
|
||||
r = range(self.n) # iterable
|
||||
x = [Detections([self.ims[i]], [self.pred[i]], [self.files[i]], self.times, self.names, self.s) for i in r]
|
||||
# for d in x:
|
||||
|
@ -12,7 +12,7 @@ from ultralytics.yolo.utils.tal import dist2bbox, make_anchors
|
||||
|
||||
|
||||
def autopad(k, p=None, d=1): # kernel, padding, dilation
|
||||
# Pad to 'same' shape outputs
|
||||
"""Pad to 'same' shape outputs."""
|
||||
if d > 1:
|
||||
k = d * (k - 1) + 1 if isinstance(k, int) else [d * (x - 1) + 1 for x in k] # actual kernel-size
|
||||
if p is None:
|
||||
@ -21,7 +21,7 @@ def autopad(k, p=None, d=1): # kernel, padding, dilation
|
||||
|
||||
|
||||
class Conv(nn.Module):
|
||||
# Standard convolution with args(ch_in, ch_out, kernel, stride, padding, groups, dilation, activation)
|
||||
"""Standard convolution with args(ch_in, ch_out, kernel, stride, padding, groups, dilation, activation)."""
|
||||
default_act = nn.SiLU() # default activation
|
||||
|
||||
def __init__(self, c1, c2, k=1, s=1, p=None, g=1, d=1, act=True):
|
||||
@ -38,19 +38,21 @@ class Conv(nn.Module):
|
||||
|
||||
|
||||
class DWConv(Conv):
|
||||
# Depth-wise convolution
|
||||
"""Depth-wise convolution."""
|
||||
|
||||
def __init__(self, c1, c2, k=1, s=1, d=1, act=True): # ch_in, ch_out, kernel, stride, dilation, activation
|
||||
super().__init__(c1, c2, k, s, g=math.gcd(c1, c2), d=d, act=act)
|
||||
|
||||
|
||||
class DWConvTranspose2d(nn.ConvTranspose2d):
|
||||
# Depth-wise transpose convolution
|
||||
"""Depth-wise transpose convolution."""
|
||||
|
||||
def __init__(self, c1, c2, k=1, s=1, p1=0, p2=0): # ch_in, ch_out, kernel, stride, padding, padding_out
|
||||
super().__init__(c1, c2, k, s, p1, p2, groups=math.gcd(c1, c2))
|
||||
|
||||
|
||||
class ConvTranspose(nn.Module):
|
||||
# Convolution transpose 2d layer
|
||||
"""Convolution transpose 2d layer."""
|
||||
default_act = nn.SiLU() # default activation
|
||||
|
||||
def __init__(self, c1, c2, k=2, s=2, p=0, bn=True, act=True):
|
||||
@ -67,8 +69,11 @@ class ConvTranspose(nn.Module):
|
||||
|
||||
|
||||
class DFL(nn.Module):
|
||||
# Integral module of Distribution Focal Loss (DFL)
|
||||
# Proposed in Generalized Focal Loss https://ieeexplore.ieee.org/document/9792391
|
||||
"""
|
||||
Integral module of Distribution Focal Loss (DFL).
|
||||
Proposed in Generalized Focal Loss https://ieeexplore.ieee.org/document/9792391
|
||||
"""
|
||||
|
||||
def __init__(self, c1=16):
|
||||
super().__init__()
|
||||
self.conv = nn.Conv2d(c1, 1, 1, bias=False).requires_grad_(False)
|
||||
@ -83,7 +88,8 @@ class DFL(nn.Module):
|
||||
|
||||
|
||||
class TransformerLayer(nn.Module):
|
||||
# Transformer layer https://arxiv.org/abs/2010.11929 (LayerNorm layers removed for better performance)
|
||||
"""Transformer layer https://arxiv.org/abs/2010.11929 (LayerNorm layers removed for better performance)."""
|
||||
|
||||
def __init__(self, c, num_heads):
|
||||
super().__init__()
|
||||
self.q = nn.Linear(c, c, bias=False)
|
||||
@ -100,7 +106,8 @@ class TransformerLayer(nn.Module):
|
||||
|
||||
|
||||
class TransformerBlock(nn.Module):
|
||||
# Vision Transformer https://arxiv.org/abs/2010.11929
|
||||
"""Vision Transformer https://arxiv.org/abs/2010.11929."""
|
||||
|
||||
def __init__(self, c1, c2, num_heads, num_layers):
|
||||
super().__init__()
|
||||
self.conv = None
|
||||
@ -119,7 +126,8 @@ class TransformerBlock(nn.Module):
|
||||
|
||||
|
||||
class Bottleneck(nn.Module):
|
||||
# Standard bottleneck
|
||||
"""Standard bottleneck."""
|
||||
|
||||
def __init__(self, c1, c2, shortcut=True, g=1, k=(3, 3), e=0.5): # ch_in, ch_out, shortcut, groups, kernels, expand
|
||||
super().__init__()
|
||||
c_ = int(c2 * e) # hidden channels
|
||||
@ -132,7 +140,8 @@ class Bottleneck(nn.Module):
|
||||
|
||||
|
||||
class BottleneckCSP(nn.Module):
|
||||
# CSP Bottleneck https://github.com/WongKinYiu/CrossStagePartialNetworks
|
||||
"""CSP Bottleneck https://github.com/WongKinYiu/CrossStagePartialNetworks."""
|
||||
|
||||
def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5): # ch_in, ch_out, number, shortcut, groups, expansion
|
||||
super().__init__()
|
||||
c_ = int(c2 * e) # hidden channels
|
||||
@ -151,7 +160,8 @@ class BottleneckCSP(nn.Module):
|
||||
|
||||
|
||||
class C3(nn.Module):
|
||||
# CSP Bottleneck with 3 convolutions
|
||||
"""CSP Bottleneck with 3 convolutions."""
|
||||
|
||||
def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5): # ch_in, ch_out, number, shortcut, groups, expansion
|
||||
super().__init__()
|
||||
c_ = int(c2 * e) # hidden channels
|
||||
@ -165,7 +175,8 @@ class C3(nn.Module):
|
||||
|
||||
|
||||
class C2(nn.Module):
|
||||
# CSP Bottleneck with 2 convolutions
|
||||
"""CSP Bottleneck with 2 convolutions."""
|
||||
|
||||
def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5): # ch_in, ch_out, number, shortcut, groups, expansion
|
||||
super().__init__()
|
||||
self.c = int(c2 * e) # hidden channels
|
||||
@ -180,7 +191,8 @@ class C2(nn.Module):
|
||||
|
||||
|
||||
class C2f(nn.Module):
|
||||
# CSP Bottleneck with 2 convolutions
|
||||
"""CSP Bottleneck with 2 convolutions."""
|
||||
|
||||
def __init__(self, c1, c2, n=1, shortcut=False, g=1, e=0.5): # ch_in, ch_out, number, shortcut, groups, expansion
|
||||
super().__init__()
|
||||
self.c = int(c2 * e) # hidden channels
|
||||
@ -200,7 +212,8 @@ class C2f(nn.Module):
|
||||
|
||||
|
||||
class ChannelAttention(nn.Module):
|
||||
# Channel-attention module https://github.com/open-mmlab/mmdetection/tree/v3.0.0rc1/configs/rtmdet
|
||||
"""Channel-attention module https://github.com/open-mmlab/mmdetection/tree/v3.0.0rc1/configs/rtmdet."""
|
||||
|
||||
def __init__(self, channels: int) -> None:
|
||||
super().__init__()
|
||||
self.pool = nn.AdaptiveAvgPool2d(1)
|
||||
@ -212,7 +225,8 @@ class ChannelAttention(nn.Module):
|
||||
|
||||
|
||||
class SpatialAttention(nn.Module):
|
||||
# Spatial-attention module
|
||||
"""Spatial-attention module."""
|
||||
|
||||
def __init__(self, kernel_size=7):
|
||||
super().__init__()
|
||||
assert kernel_size in (3, 7), 'kernel size must be 3 or 7'
|
||||
@ -225,7 +239,8 @@ class SpatialAttention(nn.Module):
|
||||
|
||||
|
||||
class CBAM(nn.Module):
|
||||
# Convolutional Block Attention Module
|
||||
"""Convolutional Block Attention Module."""
|
||||
|
||||
def __init__(self, c1, kernel_size=7): # ch_in, kernels
|
||||
super().__init__()
|
||||
self.channel_attention = ChannelAttention(c1)
|
||||
@ -236,7 +251,8 @@ class CBAM(nn.Module):
|
||||
|
||||
|
||||
class C1(nn.Module):
|
||||
# CSP Bottleneck with 1 convolution
|
||||
"""CSP Bottleneck with 1 convolution."""
|
||||
|
||||
def __init__(self, c1, c2, n=1): # ch_in, ch_out, number
|
||||
super().__init__()
|
||||
self.cv1 = Conv(c1, c2, 1, 1)
|
||||
@ -248,7 +264,8 @@ class C1(nn.Module):
|
||||
|
||||
|
||||
class C3x(C3):
|
||||
# C3 module with cross-convolutions
|
||||
"""C3 module with cross-convolutions."""
|
||||
|
||||
def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5):
|
||||
super().__init__(c1, c2, n, shortcut, g, e)
|
||||
self.c_ = int(c2 * e)
|
||||
@ -256,7 +273,8 @@ class C3x(C3):
|
||||
|
||||
|
||||
class C3TR(C3):
|
||||
# C3 module with TransformerBlock()
|
||||
"""C3 module with TransformerBlock()."""
|
||||
|
||||
def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5):
|
||||
super().__init__(c1, c2, n, shortcut, g, e)
|
||||
c_ = int(c2 * e)
|
||||
@ -264,7 +282,8 @@ class C3TR(C3):
|
||||
|
||||
|
||||
class C3Ghost(C3):
|
||||
# C3 module with GhostBottleneck()
|
||||
"""C3 module with GhostBottleneck()."""
|
||||
|
||||
def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5):
|
||||
super().__init__(c1, c2, n, shortcut, g, e)
|
||||
c_ = int(c2 * e) # hidden channels
|
||||
@ -272,7 +291,8 @@ class C3Ghost(C3):
|
||||
|
||||
|
||||
class SPP(nn.Module):
|
||||
# Spatial Pyramid Pooling (SPP) layer https://arxiv.org/abs/1406.4729
|
||||
"""Spatial Pyramid Pooling (SPP) layer https://arxiv.org/abs/1406.4729."""
|
||||
|
||||
def __init__(self, c1, c2, k=(5, 9, 13)):
|
||||
super().__init__()
|
||||
c_ = c1 // 2 # hidden channels
|
||||
@ -286,7 +306,8 @@ class SPP(nn.Module):
|
||||
|
||||
|
||||
class SPPF(nn.Module):
|
||||
# Spatial Pyramid Pooling - Fast (SPPF) layer for YOLOv5 by Glenn Jocher
|
||||
"""Spatial Pyramid Pooling - Fast (SPPF) layer for YOLOv5 by Glenn Jocher."""
|
||||
|
||||
def __init__(self, c1, c2, k=5): # equivalent to SPP(k=(5, 9, 13))
|
||||
super().__init__()
|
||||
c_ = c1 // 2 # hidden channels
|
||||
@ -302,7 +323,8 @@ class SPPF(nn.Module):
|
||||
|
||||
|
||||
class Focus(nn.Module):
|
||||
# Focus wh information into c-space
|
||||
"""Focus wh information into c-space."""
|
||||
|
||||
def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True): # ch_in, ch_out, kernel, stride, padding, groups
|
||||
super().__init__()
|
||||
self.conv = Conv(c1 * 4, c2, k, s, p, g, act=act)
|
||||
@ -314,7 +336,8 @@ class Focus(nn.Module):
|
||||
|
||||
|
||||
class GhostConv(nn.Module):
|
||||
# Ghost Convolution https://github.com/huawei-noah/ghostnet
|
||||
"""Ghost Convolution https://github.com/huawei-noah/ghostnet."""
|
||||
|
||||
def __init__(self, c1, c2, k=1, s=1, g=1, act=True): # ch_in, ch_out, kernel, stride, groups
|
||||
super().__init__()
|
||||
c_ = c2 // 2 # hidden channels
|
||||
@ -327,7 +350,8 @@ class GhostConv(nn.Module):
|
||||
|
||||
|
||||
class GhostBottleneck(nn.Module):
|
||||
# Ghost Bottleneck https://github.com/huawei-noah/ghostnet
|
||||
"""Ghost Bottleneck https://github.com/huawei-noah/ghostnet."""
|
||||
|
||||
def __init__(self, c1, c2, k=3, s=1): # ch_in, ch_out, kernel, stride
|
||||
super().__init__()
|
||||
c_ = c2 // 2
|
||||
@ -343,7 +367,8 @@ class GhostBottleneck(nn.Module):
|
||||
|
||||
|
||||
class Concat(nn.Module):
|
||||
# Concatenate a list of tensors along dimension
|
||||
"""Concatenate a list of tensors along dimension."""
|
||||
|
||||
def __init__(self, dimension=1):
|
||||
super().__init__()
|
||||
self.d = dimension
|
||||
@ -353,7 +378,8 @@ class Concat(nn.Module):
|
||||
|
||||
|
||||
class Proto(nn.Module):
|
||||
# YOLOv8 mask Proto module for segmentation models
|
||||
"""YOLOv8 mask Proto module for segmentation models."""
|
||||
|
||||
def __init__(self, c1, c_=256, c2=32): # ch_in, number of protos, number of masks
|
||||
super().__init__()
|
||||
self.cv1 = Conv(c1, c_, k=3)
|
||||
@ -366,7 +392,8 @@ class Proto(nn.Module):
|
||||
|
||||
|
||||
class Ensemble(nn.ModuleList):
|
||||
# Ensemble of models
|
||||
"""Ensemble of models."""
|
||||
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
|
||||
@ -382,7 +409,7 @@ class Ensemble(nn.ModuleList):
|
||||
|
||||
|
||||
class Detect(nn.Module):
|
||||
# YOLOv8 Detect head for detection models
|
||||
"""YOLOv8 Detect head for detection models."""
|
||||
dynamic = False # force grid reconstruction
|
||||
export = False # export mode
|
||||
shape = None
|
||||
@ -423,7 +450,7 @@ class Detect(nn.Module):
|
||||
return y if self.export else (y, x)
|
||||
|
||||
def bias_init(self):
|
||||
# Initialize Detect() biases, WARNING: requires stride availability
|
||||
"""Initialize Detect() biases, WARNING: requires stride availability."""
|
||||
m = self # self.model[-1] # Detect() module
|
||||
# cf = torch.bincount(torch.tensor(np.concatenate(dataset.labels, 0)[:, 0]).long(), minlength=nc) + 1
|
||||
# ncf = math.log(0.6 / (m.nc - 0.999999)) if cf is None else torch.log(cf / cf.sum()) # nominal class frequency
|
||||
@ -433,7 +460,8 @@ class Detect(nn.Module):
|
||||
|
||||
|
||||
class Segment(Detect):
|
||||
# YOLOv8 Segment head for segmentation models
|
||||
"""YOLOv8 Segment head for segmentation models."""
|
||||
|
||||
def __init__(self, nc=80, nm=32, npr=256, ch=()):
|
||||
super().__init__(nc, ch)
|
||||
self.nm = nm # number of masks
|
||||
@ -456,7 +484,8 @@ class Segment(Detect):
|
||||
|
||||
|
||||
class Pose(Detect):
|
||||
# YOLOv8 Pose head for keypoints models
|
||||
"""YOLOv8 Pose head for keypoints models."""
|
||||
|
||||
def __init__(self, nc=80, kpt_shape=(17, 3), ch=()):
|
||||
super().__init__(nc, ch)
|
||||
self.kpt_shape = kpt_shape # number of keypoints, number of dims (2 for x,y or 3 for x,y,visible)
|
||||
@ -486,7 +515,8 @@ class Pose(Detect):
|
||||
|
||||
|
||||
class Classify(nn.Module):
|
||||
# YOLOv8 classification head, i.e. x(b,c1,20,20) to x(b,c2)
|
||||
"""YOLOv8 classification head, i.e. x(b,c1,20,20) to x(b,c2)."""
|
||||
|
||||
def __init__(self, c1, c2, k=1, s=1, p=None, g=1): # ch_in, ch_out, kernel, stride, padding, groups
|
||||
super().__init__()
|
||||
c_ = 1280 # efficientnet_b0 size
|
||||
|
@ -167,7 +167,8 @@ class BaseModel(nn.Module):
|
||||
|
||||
|
||||
class DetectionModel(BaseModel):
|
||||
# YOLOv8 detection model
|
||||
"""YOLOv8 detection model."""
|
||||
|
||||
def __init__(self, cfg='yolov8n.yaml', ch=3, nc=None, verbose=True): # model, input channels, number of classes
|
||||
super().__init__()
|
||||
self.yaml = cfg if isinstance(cfg, dict) else yaml_model_load(cfg) # cfg dict
|
||||
@ -218,7 +219,7 @@ class DetectionModel(BaseModel):
|
||||
|
||||
@staticmethod
|
||||
def _descale_pred(p, flips, scale, img_size, dim=1):
|
||||
# de-scale predictions following augmented inference (inverse operation)
|
||||
"""De-scale predictions following augmented inference (inverse operation)."""
|
||||
p[:, :4] /= scale # de-scale
|
||||
x, y, wh, cls = p.split((1, 1, 2, p.shape[dim] - 4), dim)
|
||||
if flips == 2:
|
||||
@ -228,7 +229,7 @@ class DetectionModel(BaseModel):
|
||||
return torch.cat((x, y, wh, cls), dim)
|
||||
|
||||
def _clip_augmented(self, y):
|
||||
# Clip YOLOv5 augmented inference tails
|
||||
"""Clip YOLOv5 augmented inference tails."""
|
||||
nl = self.model[-1].nl # number of detection layers (P3-P5)
|
||||
g = sum(4 ** x for x in range(nl)) # grid points
|
||||
e = 1 # exclude layer count
|
||||
@ -240,7 +241,8 @@ class DetectionModel(BaseModel):
|
||||
|
||||
|
||||
class SegmentationModel(DetectionModel):
|
||||
# YOLOv8 segmentation model
|
||||
"""YOLOv8 segmentation model."""
|
||||
|
||||
def __init__(self, cfg='yolov8n-seg.yaml', ch=3, nc=None, verbose=True):
|
||||
super().__init__(cfg=cfg, ch=ch, nc=nc, verbose=verbose)
|
||||
|
||||
@ -249,7 +251,8 @@ class SegmentationModel(DetectionModel):
|
||||
|
||||
|
||||
class PoseModel(DetectionModel):
|
||||
# YOLOv8 pose model
|
||||
"""YOLOv8 pose model."""
|
||||
|
||||
def __init__(self, cfg='yolov8n-pose.yaml', ch=3, nc=None, data_kpt_shape=(None, None), verbose=True):
|
||||
if not isinstance(cfg, dict):
|
||||
cfg = yaml_model_load(cfg) # load model YAML
|
||||
@ -260,7 +263,8 @@ class PoseModel(DetectionModel):
|
||||
|
||||
|
||||
class ClassificationModel(BaseModel):
|
||||
# YOLOv8 classification model
|
||||
"""YOLOv8 classification model."""
|
||||
|
||||
def __init__(self,
|
||||
cfg=None,
|
||||
model=None,
|
||||
@ -272,7 +276,7 @@ class ClassificationModel(BaseModel):
|
||||
self._from_detection_model(model, nc, cutoff) if model is not None else self._from_yaml(cfg, ch, nc, verbose)
|
||||
|
||||
def _from_detection_model(self, model, nc=1000, cutoff=10):
|
||||
# Create a YOLOv5 classification model from a YOLOv5 detection model
|
||||
"""Create a YOLOv5 classification model from a YOLOv5 detection model."""
|
||||
from ultralytics.nn.autobackend import AutoBackend
|
||||
if isinstance(model, AutoBackend):
|
||||
model = model.model # unwrap DetectMultiBackend
|
||||
@ -304,7 +308,7 @@ class ClassificationModel(BaseModel):
|
||||
|
||||
@staticmethod
|
||||
def reshape_outputs(model, nc):
|
||||
# Update a TorchVision classification model to class count 'n' if required
|
||||
"""Update a TorchVision classification model to class count 'n' if required."""
|
||||
name, m = list((model.model if hasattr(model, 'model') else model).named_children())[-1] # last module
|
||||
if isinstance(m, Classify): # YOLO Classify() head
|
||||
if m.linear.out_features != nc:
|
||||
@ -363,7 +367,7 @@ def torch_safe_load(weight):
|
||||
|
||||
|
||||
def attempt_load_weights(weights, device=None, inplace=True, fuse=False):
|
||||
# Loads an ensemble of models weights=[a,b,c] or a single model weights=[a] or weights=a
|
||||
"""Loads an ensemble of models weights=[a,b,c] or a single model weights=[a] or weights=a."""
|
||||
|
||||
ensemble = Ensemble()
|
||||
for w in weights if isinstance(weights, list) else [weights]:
|
||||
@ -403,7 +407,7 @@ def attempt_load_weights(weights, device=None, inplace=True, fuse=False):
|
||||
|
||||
|
||||
def attempt_load_one_weight(weight, device=None, inplace=True, fuse=False):
|
||||
# Loads a single model weights
|
||||
"""Loads a single model weights."""
|
||||
ckpt, weight = torch_safe_load(weight) # load ckpt
|
||||
args = {**DEFAULT_CFG_DICT, **ckpt['train_args']} # combine model and default args, preferring model args
|
||||
model = (ckpt.get('ema') or ckpt['model']).to(device).float() # FP32 model
|
||||
@ -546,7 +550,7 @@ def guess_model_task(model):
|
||||
"""
|
||||
|
||||
def cfg2task(cfg):
|
||||
# Guess from YAML dictionary
|
||||
"""Guess from YAML dictionary."""
|
||||
m = cfg['head'][-1][-2].lower() # output module name
|
||||
if m in ('classify', 'classifier', 'cls', 'fc'):
|
||||
return 'classify'
|
||||
|
@ -27,7 +27,7 @@ class BaseTrack:
|
||||
frame_id = 0
|
||||
time_since_update = 0
|
||||
|
||||
# multi-camera
|
||||
# Multi-camera
|
||||
location = (np.inf, np.inf)
|
||||
|
||||
@property
|
||||
|
@ -100,7 +100,7 @@ class BOTSORT(BYTETracker):
|
||||
self.appearance_thresh = args.appearance_thresh
|
||||
|
||||
if args.with_reid:
|
||||
# haven't supported BoT-SORT(reid) yet
|
||||
# Haven't supported BoT-SORT(reid) yet
|
||||
self.encoder = None
|
||||
# self.gmc = GMC(method=args.cmc_method, verbose=[args.name, args.ablation])
|
||||
self.gmc = GMC(method=args.cmc_method)
|
||||
|
@ -11,8 +11,7 @@ class STrack(BaseTrack):
|
||||
shared_kalman = KalmanFilterXYAH()
|
||||
|
||||
def __init__(self, tlwh, score, cls):
|
||||
|
||||
# wait activate
|
||||
"""wait activate."""
|
||||
self._tlwh = np.asarray(self.tlbr_to_tlwh(tlwh[:-1]), dtype=np.float32)
|
||||
self.kalman_filter = None
|
||||
self.mean, self.covariance = None, None
|
||||
@ -62,7 +61,7 @@ class STrack(BaseTrack):
|
||||
stracks[i].covariance = cov
|
||||
|
||||
def activate(self, kalman_filter, frame_id):
|
||||
"""Start a new tracklet"""
|
||||
"""Start a new tracklet."""
|
||||
self.kalman_filter = kalman_filter
|
||||
self.track_id = self.next_id()
|
||||
self.mean, self.covariance = self.kalman_filter.initiate(self.convert_coords(self._tlwh))
|
||||
@ -179,7 +178,7 @@ class BYTETracker:
|
||||
|
||||
scores = results.conf
|
||||
bboxes = results.xyxy
|
||||
# add index
|
||||
# Add index
|
||||
bboxes = np.concatenate([bboxes, np.arange(len(bboxes)).reshape(-1, 1)], axis=-1)
|
||||
cls = results.cls
|
||||
|
||||
@ -196,7 +195,7 @@ class BYTETracker:
|
||||
cls_second = cls[inds_second]
|
||||
|
||||
detections = self.init_track(dets, scores_keep, cls_keep, img)
|
||||
""" Add newly detected tracklets to tracked_stracks"""
|
||||
# Add newly detected tracklets to tracked_stracks
|
||||
unconfirmed = []
|
||||
tracked_stracks = [] # type: list[STrack]
|
||||
for track in self.tracked_stracks:
|
||||
@ -204,7 +203,7 @@ class BYTETracker:
|
||||
unconfirmed.append(track)
|
||||
else:
|
||||
tracked_stracks.append(track)
|
||||
""" Step 2: First association, with high score detection boxes"""
|
||||
# Step 2: First association, with high score detection boxes
|
||||
strack_pool = self.joint_stracks(tracked_stracks, self.lost_stracks)
|
||||
# Predict the current location with KF
|
||||
self.multi_predict(strack_pool)
|
||||
@ -225,7 +224,7 @@ class BYTETracker:
|
||||
else:
|
||||
track.re_activate(det, self.frame_id, new_id=False)
|
||||
refind_stracks.append(track)
|
||||
""" Step 3: Second association, with low score detection boxes"""
|
||||
# Step 3: Second association, with low score detection boxes
|
||||
# association the untrack to the low score detections
|
||||
detections_second = self.init_track(dets_second, scores_second, cls_second, img)
|
||||
r_tracked_stracks = [strack_pool[i] for i in u_track if strack_pool[i].state == TrackState.Tracked]
|
||||
@ -247,7 +246,7 @@ class BYTETracker:
|
||||
if track.state != TrackState.Lost:
|
||||
track.mark_lost()
|
||||
lost_stracks.append(track)
|
||||
"""Deal with unconfirmed tracks, usually tracks with only one beginning frame"""
|
||||
# Deal with unconfirmed tracks, usually tracks with only one beginning frame
|
||||
detections = [detections[i] for i in u_detection]
|
||||
dists = self.get_dists(unconfirmed, detections)
|
||||
matches, u_unconfirmed, u_detection = matching.linear_assignment(dists, thresh=0.7)
|
||||
@ -258,14 +257,14 @@ class BYTETracker:
|
||||
track = unconfirmed[it]
|
||||
track.mark_removed()
|
||||
removed_stracks.append(track)
|
||||
""" Step 4: Init new stracks"""
|
||||
# Step 4: Init new stracks
|
||||
for inew in u_detection:
|
||||
track = detections[inew]
|
||||
if track.score < self.args.new_track_thresh:
|
||||
continue
|
||||
track.activate(self.kalman_filter, self.frame_id)
|
||||
activated_starcks.append(track)
|
||||
""" Step 5: Update state"""
|
||||
# Step 5: Update state
|
||||
for track in self.lost_stracks:
|
||||
if self.frame_id - track.end_frame > self.max_time_lost:
|
||||
track.mark_removed()
|
||||
|
@ -83,8 +83,7 @@ class GMC:
|
||||
return np.eye(2, 3)
|
||||
|
||||
def applyEcc(self, raw_frame, detections=None):
|
||||
|
||||
# Initialize
|
||||
"""Initialize."""
|
||||
height, width, _ = raw_frame.shape
|
||||
frame = cv2.cvtColor(raw_frame, cv2.COLOR_BGR2GRAY)
|
||||
H = np.eye(2, 3, dtype=np.float32)
|
||||
@ -116,8 +115,7 @@ class GMC:
|
||||
return H
|
||||
|
||||
def applyFeatures(self, raw_frame, detections=None):
|
||||
|
||||
# Initialize
|
||||
"""Initialize."""
|
||||
height, width, _ = raw_frame.shape
|
||||
frame = cv2.cvtColor(raw_frame, cv2.COLOR_BGR2GRAY)
|
||||
H = np.eye(2, 3)
|
||||
@ -129,7 +127,7 @@ class GMC:
|
||||
width = width // self.downscale
|
||||
height = height // self.downscale
|
||||
|
||||
# find the keypoints
|
||||
# Find the keypoints
|
||||
mask = np.zeros_like(frame)
|
||||
# mask[int(0.05 * height): int(0.95 * height), int(0.05 * width): int(0.95 * width)] = 255
|
||||
mask[int(0.02 * height):int(0.98 * height), int(0.02 * width):int(0.98 * width)] = 255
|
||||
@ -140,7 +138,7 @@ class GMC:
|
||||
|
||||
keypoints = self.detector.detect(frame, mask)
|
||||
|
||||
# compute the descriptors
|
||||
# Compute the descriptors
|
||||
keypoints, descriptors = self.extractor.compute(frame, keypoints)
|
||||
|
||||
# Handle first frame
|
||||
@ -243,7 +241,7 @@ class GMC:
|
||||
return H
|
||||
|
||||
def applySparseOptFlow(self, raw_frame, detections=None):
|
||||
# Initialize
|
||||
"""Initialize."""
|
||||
# t0 = time.time()
|
||||
height, width, _ = raw_frame.shape
|
||||
frame = cv2.cvtColor(raw_frame, cv2.COLOR_BGR2GRAY)
|
||||
@ -254,7 +252,7 @@ class GMC:
|
||||
# frame = cv2.GaussianBlur(frame, (3, 3), 1.5)
|
||||
frame = cv2.resize(frame, (width // self.downscale, height // self.downscale))
|
||||
|
||||
# find the keypoints
|
||||
# Find the keypoints
|
||||
keypoints = cv2.goodFeaturesToTrack(frame, mask=None, **self.feature_params)
|
||||
|
||||
# Handle first frame
|
||||
@ -268,10 +266,10 @@ class GMC:
|
||||
|
||||
return H
|
||||
|
||||
# find correspondences
|
||||
# Find correspondences
|
||||
matchedKeypoints, status, err = cv2.calcOpticalFlowPyrLK(self.prevFrame, frame, self.prevKeyPoints, None)
|
||||
|
||||
# leave good correspondences only
|
||||
# Leave good correspondences only
|
||||
prevPoints = []
|
||||
currPoints = []
|
||||
|
||||
|
@ -8,6 +8,7 @@ from .kalman_filter import chi2inv95
|
||||
|
||||
try:
|
||||
import lap # for linear_assignment
|
||||
|
||||
assert lap.__version__ # verify package is not directory
|
||||
except (ImportError, AssertionError, AttributeError):
|
||||
from ultralytics.yolo.utils.checks import check_requirements
|
||||
@ -45,7 +46,7 @@ def _indices_to_matches(cost_matrix, indices, thresh):
|
||||
|
||||
|
||||
def linear_assignment(cost_matrix, thresh, use_lap=True):
|
||||
# Linear assignment implementations with scipy and lap.lapjv
|
||||
"""Linear assignment implementations with scipy and lap.lapjv."""
|
||||
if cost_matrix.size == 0:
|
||||
return np.empty((0, 2), dtype=int), tuple(range(cost_matrix.shape[0])), tuple(range(cost_matrix.shape[1]))
|
||||
|
||||
|
@ -400,5 +400,5 @@ def copy_default_cfg():
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
# entrypoint(debug='yolo predict model=yolov8n.pt')
|
||||
# Example Usage: entrypoint(debug='yolo predict model=yolov8n.pt')
|
||||
entrypoint(debug='')
|
||||
|
@ -66,7 +66,7 @@ class Compose:
|
||||
|
||||
|
||||
class BaseMixTransform:
|
||||
"""This implementation is from mmyolo"""
|
||||
"""This implementation is from mmyolo."""
|
||||
|
||||
def __init__(self, dataset, pre_transform=None, p=0.0) -> None:
|
||||
self.dataset = dataset
|
||||
@ -77,12 +77,12 @@ class BaseMixTransform:
|
||||
if random.uniform(0, 1) > self.p:
|
||||
return labels
|
||||
|
||||
# get index of one or three other images
|
||||
# Get index of one or three other images
|
||||
indexes = self.get_indexes()
|
||||
if isinstance(indexes, int):
|
||||
indexes = [indexes]
|
||||
|
||||
# get images information will be used for Mosaic or MixUp
|
||||
# Get images information will be used for Mosaic or MixUp
|
||||
mix_labels = [self.dataset.get_label_info(i) for i in indexes]
|
||||
|
||||
if self.pre_transform is not None:
|
||||
@ -132,7 +132,7 @@ class Mosaic(BaseMixTransform):
|
||||
img = labels_patch['img']
|
||||
h, w = labels_patch.pop('resized_shape')
|
||||
|
||||
# place img in img4
|
||||
# Place img in img4
|
||||
if i == 0: # top left
|
||||
img4 = np.full((s * 2, s * 2, img.shape[2]), 114, dtype=np.uint8) # base image with 4 tiles
|
||||
x1a, y1a, x2a, y2a = max(xc - w, 0), max(yc - h, 0), xc, yc # xmin, ymin, xmax, ymax (large image)
|
||||
@ -158,7 +158,7 @@ class Mosaic(BaseMixTransform):
|
||||
return final_labels
|
||||
|
||||
def _update_labels(self, labels, padw, padh):
|
||||
"""Update labels"""
|
||||
"""Update labels."""
|
||||
nh, nw = labels['img'].shape[:2]
|
||||
labels['instances'].convert_bbox(format='xyxy')
|
||||
labels['instances'].denormalize(nw, nh)
|
||||
@ -193,7 +193,7 @@ class MixUp(BaseMixTransform):
|
||||
return random.randint(0, len(self.dataset) - 1)
|
||||
|
||||
def _mix_transform(self, labels):
|
||||
# Applies MixUp augmentation https://arxiv.org/pdf/1710.09412.pdf
|
||||
"""Applies MixUp augmentation https://arxiv.org/pdf/1710.09412.pdf."""
|
||||
r = np.random.beta(32.0, 32.0) # mixup ratio, alpha=beta=32.0
|
||||
labels2 = labels['mix_labels'][0]
|
||||
labels['img'] = (labels['img'] * r + labels2['img'] * (1 - r)).astype(np.uint8)
|
||||
@ -217,12 +217,12 @@ class RandomPerspective:
|
||||
self.scale = scale
|
||||
self.shear = shear
|
||||
self.perspective = perspective
|
||||
# mosaic border
|
||||
# Mosaic border
|
||||
self.border = border
|
||||
self.pre_transform = pre_transform
|
||||
|
||||
def affine_transform(self, img, border):
|
||||
# Center
|
||||
"""Center."""
|
||||
C = np.eye(3, dtype=np.float32)
|
||||
|
||||
C[0, 2] = -img.shape[1] / 2 # x translation (pixels)
|
||||
@ -253,7 +253,7 @@ class RandomPerspective:
|
||||
|
||||
# Combined rotation matrix
|
||||
M = T @ S @ R @ P @ C # order of operations (right to left) is IMPORTANT
|
||||
# affine image
|
||||
# Affine image
|
||||
if (border[0] != 0) or (border[1] != 0) or (M != np.eye(3)).any(): # image changed
|
||||
if self.perspective:
|
||||
img = cv2.warpPerspective(img, M, dsize=self.size, borderValue=(114, 114, 114))
|
||||
@ -281,7 +281,7 @@ class RandomPerspective:
|
||||
xy = xy @ M.T # transform
|
||||
xy = (xy[:, :2] / xy[:, 2:3] if self.perspective else xy[:, :2]).reshape(n, 8) # perspective rescale or affine
|
||||
|
||||
# create new boxes
|
||||
# Create new boxes
|
||||
x = xy[:, [0, 2, 4, 6]]
|
||||
y = xy[:, [1, 3, 5, 7]]
|
||||
return np.concatenate((x.min(1), y.min(1), x.max(1), y.max(1)), dtype=bboxes.dtype).reshape(4, n).T
|
||||
@ -348,7 +348,7 @@ class RandomPerspective:
|
||||
img = labels['img']
|
||||
cls = labels['cls']
|
||||
instances = labels.pop('instances')
|
||||
# make sure the coord formats are right
|
||||
# Make sure the coord formats are right
|
||||
instances.convert_bbox(format='xyxy')
|
||||
instances.denormalize(*img.shape[:2][::-1])
|
||||
|
||||
@ -362,19 +362,19 @@ class RandomPerspective:
|
||||
|
||||
segments = instances.segments
|
||||
keypoints = instances.keypoints
|
||||
# update bboxes if there are segments.
|
||||
# Update bboxes if there are segments.
|
||||
if len(segments):
|
||||
bboxes, segments = self.apply_segments(segments, M)
|
||||
|
||||
if keypoints is not None:
|
||||
keypoints = self.apply_keypoints(keypoints, M)
|
||||
new_instances = Instances(bboxes, segments, keypoints, bbox_format='xyxy', normalized=False)
|
||||
# clip
|
||||
# Clip
|
||||
new_instances.clip(*self.size)
|
||||
|
||||
# filter instances
|
||||
# Filter instances
|
||||
instances.scale(scale_w=scale, scale_h=scale, bbox_only=True)
|
||||
# make the bboxes have the same scale with new_bboxes
|
||||
# Make the bboxes have the same scale with new_bboxes
|
||||
i = self.box_candidates(box1=instances.bboxes.T,
|
||||
box2=new_instances.bboxes.T,
|
||||
area_thr=0.01 if len(segments) else 0.10)
|
||||
@ -441,7 +441,7 @@ class RandomFlip:
|
||||
if self.direction == 'horizontal' and random.random() < self.p:
|
||||
img = np.fliplr(img)
|
||||
instances.fliplr(w)
|
||||
# for keypoints
|
||||
# For keypoints
|
||||
if self.flip_idx is not None and instances.keypoints is not None:
|
||||
instances.keypoints = np.ascontiguousarray(instances.keypoints[:, self.flip_idx, :])
|
||||
labels['img'] = np.ascontiguousarray(img)
|
||||
@ -450,7 +450,7 @@ class RandomFlip:
|
||||
|
||||
|
||||
class LetterBox:
|
||||
"""Resize image and padding for detection, instance segmentation, pose"""
|
||||
"""Resize image and padding for detection, instance segmentation, pose."""
|
||||
|
||||
def __init__(self, new_shape=(640, 640), auto=False, scaleFill=False, scaleup=True, stride=32):
|
||||
self.new_shape = new_shape
|
||||
@ -505,7 +505,7 @@ class LetterBox:
|
||||
return img
|
||||
|
||||
def _update_labels(self, labels, ratio, padw, padh):
|
||||
"""Update labels"""
|
||||
"""Update labels."""
|
||||
labels['instances'].convert_bbox(format='xyxy')
|
||||
labels['instances'].denormalize(*labels['img'].shape[:2][::-1])
|
||||
labels['instances'].scale(*ratio)
|
||||
@ -519,7 +519,7 @@ class CopyPaste:
|
||||
self.p = p
|
||||
|
||||
def __call__(self, labels):
|
||||
# Implement Copy-Paste augmentation https://arxiv.org/abs/2012.07177, labels as nx5 np.array(cls, xyxy)
|
||||
"""Implement Copy-Paste augmentation https://arxiv.org/abs/2012.07177, labels as nx5 np.array(cls, xyxy)."""
|
||||
im = labels['img']
|
||||
cls = labels['cls']
|
||||
h, w = im.shape[:2]
|
||||
@ -531,7 +531,7 @@ class CopyPaste:
|
||||
_, w, _ = im.shape # height, width, channels
|
||||
im_new = np.zeros(im.shape, np.uint8)
|
||||
|
||||
# calculate ioa first then select indexes randomly
|
||||
# Calculate ioa first then select indexes randomly
|
||||
ins_flip = deepcopy(instances)
|
||||
ins_flip.fliplr(w)
|
||||
|
||||
@ -641,7 +641,7 @@ class Format:
|
||||
labels['bboxes'] = torch.from_numpy(instances.bboxes) if nl else torch.zeros((nl, 4))
|
||||
if self.return_keypoint:
|
||||
labels['keypoints'] = torch.from_numpy(instances.keypoints)
|
||||
# then we can use collate_fn
|
||||
# Then we can use collate_fn
|
||||
if self.batch_idx:
|
||||
labels['batch_idx'] = torch.zeros(nl)
|
||||
return labels
|
||||
@ -654,7 +654,7 @@ class Format:
|
||||
return img
|
||||
|
||||
def _format_segments(self, instances, cls, w, h):
|
||||
"""convert polygon points to bitmap"""
|
||||
"""convert polygon points to bitmap."""
|
||||
segments = instances.segments
|
||||
if self.mask_overlap:
|
||||
masks, sorted_idx = polygons2masks_overlap((h, w), segments, downsample_ratio=self.mask_ratio)
|
||||
|
@ -70,7 +70,7 @@ class BaseDataset(Dataset):
|
||||
|
||||
self.ni = len(self.labels)
|
||||
|
||||
# rect stuff
|
||||
# Rect stuff
|
||||
self.rect = rect
|
||||
self.batch_size = batch_size
|
||||
self.stride = stride
|
||||
@ -79,13 +79,13 @@ class BaseDataset(Dataset):
|
||||
assert self.batch_size is not None
|
||||
self.set_rectangle()
|
||||
|
||||
# cache stuff
|
||||
# Cache stuff
|
||||
self.ims = [None] * self.ni
|
||||
self.npy_files = [Path(f).with_suffix('.npy') for f in self.im_files]
|
||||
if cache:
|
||||
self.cache_images(cache)
|
||||
|
||||
# transforms
|
||||
# Transforms
|
||||
self.transforms = self.build_transforms(hyp=hyp)
|
||||
|
||||
def get_img_files(self, img_path):
|
||||
@ -96,13 +96,13 @@ class BaseDataset(Dataset):
|
||||
p = Path(p) # os-agnostic
|
||||
if p.is_dir(): # dir
|
||||
f += glob.glob(str(p / '**' / '*.*'), recursive=True)
|
||||
# f = list(p.rglob('*.*')) # pathlib
|
||||
# F = list(p.rglob('*.*')) # pathlib
|
||||
elif p.is_file(): # file
|
||||
with open(p) as t:
|
||||
t = t.read().strip().splitlines()
|
||||
parent = str(p.parent) + os.sep
|
||||
f += [x.replace('./', parent) if x.startswith('./') else x for x in t] # local to global path
|
||||
# f += [p.parent / x.lstrip(os.sep) for x in t] # local to global path (pathlib)
|
||||
# F += [p.parent / x.lstrip(os.sep) for x in t] # local to global path (pathlib)
|
||||
else:
|
||||
raise FileNotFoundError(f'{self.prefix}{p} does not exist')
|
||||
im_files = sorted(x.replace('/', os.sep) for x in f if x.split('.')[-1].lower() in IMG_FORMATS)
|
||||
@ -113,7 +113,7 @@ class BaseDataset(Dataset):
|
||||
return im_files
|
||||
|
||||
def update_labels(self, include_class: Optional[list]):
|
||||
"""include_class, filter labels to include only these classes (optional)"""
|
||||
"""include_class, filter labels to include only these classes (optional)."""
|
||||
include_class_array = np.array(include_class).reshape(1, -1)
|
||||
for i in range(len(self.labels)):
|
||||
if include_class is not None:
|
||||
@ -129,7 +129,7 @@ class BaseDataset(Dataset):
|
||||
self.labels[i]['cls'][:, 0] = 0
|
||||
|
||||
def load_image(self, i):
|
||||
# Loads 1 image from dataset index 'i', returns (im, resized hw)
|
||||
"""Loads 1 image from dataset index 'i', returns (im, resized hw)."""
|
||||
im, f, fn = self.ims[i], self.im_files[i], self.npy_files[i]
|
||||
if im is None: # not cached in RAM
|
||||
if fn.exists(): # load npy
|
||||
@ -147,7 +147,7 @@ class BaseDataset(Dataset):
|
||||
return self.ims[i], self.im_hw0[i], self.im_hw[i] # im, hw_original, hw_resized
|
||||
|
||||
def cache_images(self, cache):
|
||||
# cache images to memory or disk
|
||||
"""Cache images to memory or disk."""
|
||||
gb = 0 # Gigabytes of cached images
|
||||
self.im_hw0, self.im_hw = [None] * self.ni, [None] * self.ni
|
||||
fcn = self.cache_images_to_disk if cache == 'disk' else self.load_image
|
||||
@ -164,7 +164,7 @@ class BaseDataset(Dataset):
|
||||
pbar.close()
|
||||
|
||||
def cache_images_to_disk(self, i):
|
||||
# Saves an image as an *.npy file for faster loading
|
||||
"""Saves an image as an *.npy file for faster loading."""
|
||||
f = self.npy_files[i]
|
||||
if not f.exists():
|
||||
np.save(f.as_posix(), cv2.imread(self.im_files[i]))
|
||||
@ -211,17 +211,17 @@ class BaseDataset(Dataset):
|
||||
return len(self.labels)
|
||||
|
||||
def update_labels_info(self, label):
|
||||
"""custom your label format here"""
|
||||
"""custom your label format here."""
|
||||
return label
|
||||
|
||||
def build_transforms(self, hyp=None):
|
||||
"""Users can custom augmentations here
|
||||
like:
|
||||
if self.augment:
|
||||
# training transforms
|
||||
# Training transforms
|
||||
return Compose([])
|
||||
else:
|
||||
# val transforms
|
||||
# Val transforms
|
||||
return Compose([])
|
||||
"""
|
||||
raise NotImplementedError
|
||||
|
@ -104,7 +104,7 @@ def build_dataloader(cfg, batch, img_path, data_info, stride=32, rect=False, ran
|
||||
generator=generator), dataset
|
||||
|
||||
|
||||
# build classification
|
||||
# Build classification
|
||||
# TODO: using cfg like `build_dataloader`
|
||||
def build_classification_dataloader(path,
|
||||
imgsz=224,
|
||||
@ -114,7 +114,7 @@ def build_classification_dataloader(path,
|
||||
rank=-1,
|
||||
workers=8,
|
||||
shuffle=True):
|
||||
# Returns Dataloader object to be used with YOLOv5 Classifier
|
||||
"""Returns Dataloader object to be used with YOLOv5 Classifier."""
|
||||
with torch_distributed_zero_first(rank): # init dataset *.cache only once if DDP
|
||||
dataset = ClassificationDataset(root=path, imgsz=imgsz, augment=augment, cache=cache)
|
||||
batch_size = min(batch_size, len(dataset))
|
||||
|
@ -70,7 +70,7 @@ class LoadStreams:
|
||||
self.threads[i].start()
|
||||
LOGGER.info('') # newline
|
||||
|
||||
# check for common shapes
|
||||
# Check for common shapes
|
||||
s = np.stack([LetterBox(imgsz, auto, stride=stride)(image=x).shape for x in self.imgs])
|
||||
self.rect = np.unique(s, axis=0).shape[0] == 1 # rect inference if all shapes equal
|
||||
self.auto = auto and self.rect
|
||||
@ -81,7 +81,7 @@ class LoadStreams:
|
||||
LOGGER.warning('WARNING ⚠️ Stream shapes differ. For optimal performance supply similarly-shaped streams.')
|
||||
|
||||
def update(self, i, cap, stream):
|
||||
# Read stream `i` frames in daemon thread
|
||||
"""Read stream `i` frames in daemon thread."""
|
||||
n, f = 0, self.frames[i] # frame number, frame array
|
||||
while cap.isOpened() and n < f:
|
||||
n += 1
|
||||
@ -123,7 +123,7 @@ class LoadStreams:
|
||||
class LoadScreenshots:
|
||||
# YOLOv8 screenshot dataloader, i.e. `yolo predict source=screen`
|
||||
def __init__(self, source, imgsz=640, stride=32, auto=True, transforms=None):
|
||||
# source = [screen_number left top width height] (pixels)
|
||||
"""source = [screen_number left top width height] (pixels)."""
|
||||
check_requirements('mss')
|
||||
import mss # noqa
|
||||
|
||||
@ -156,7 +156,7 @@ class LoadScreenshots:
|
||||
return self
|
||||
|
||||
def __next__(self):
|
||||
# mss screen capture: get raw pixels from the screen as np array
|
||||
"""mss screen capture: get raw pixels from the screen as np array."""
|
||||
im0 = np.array(self.sct.grab(self.monitor))[:, :, :3] # [:, :, :3] BGRA to BGR
|
||||
s = f'screen {self.screen} (LTWH): {self.left},{self.top},{self.width},{self.height}: '
|
||||
|
||||
@ -256,7 +256,7 @@ class LoadImages:
|
||||
return path, im, im0, self.cap, s
|
||||
|
||||
def _new_video(self, path):
|
||||
# Create a new video capture object
|
||||
"""Create a new video capture object."""
|
||||
self.frame = 0
|
||||
self.cap = cv2.VideoCapture(path)
|
||||
self.frames = int(self.cap.get(cv2.CAP_PROP_FRAME_COUNT) / self.vid_stride)
|
||||
@ -266,7 +266,7 @@ class LoadImages:
|
||||
# self.cap.set(cv2.CAP_PROP_ORIENTATION_AUTO, 0)
|
||||
|
||||
def _cv2_rotate(self, im):
|
||||
# Rotate a cv2 video manually
|
||||
"""Rotate a cv2 video manually."""
|
||||
if self.orientation == 0:
|
||||
return cv2.rotate(im, cv2.ROTATE_90_CLOCKWISE)
|
||||
elif self.orientation == 180:
|
||||
@ -291,7 +291,7 @@ class LoadPilAndNumpy:
|
||||
self.auto = auto
|
||||
self.transforms = transforms
|
||||
self.mode = 'image'
|
||||
# generate fake paths
|
||||
# Generate fake paths
|
||||
self.bs = len(self.im0)
|
||||
|
||||
@staticmethod
|
||||
|
@ -55,19 +55,19 @@ class Albumentations:
|
||||
|
||||
|
||||
def normalize(x, mean=IMAGENET_MEAN, std=IMAGENET_STD, inplace=False):
|
||||
# Denormalize RGB images x per ImageNet stats in BCHW format, i.e. = (x - mean) / std
|
||||
"""Denormalize RGB images x per ImageNet stats in BCHW format, i.e. = (x - mean) / std."""
|
||||
return TF.normalize(x, mean, std, inplace=inplace)
|
||||
|
||||
|
||||
def denormalize(x, mean=IMAGENET_MEAN, std=IMAGENET_STD):
|
||||
# Denormalize RGB images x per ImageNet stats in BCHW format, i.e. = x * std + mean
|
||||
"""Denormalize RGB images x per ImageNet stats in BCHW format, i.e. = x * std + mean."""
|
||||
for i in range(3):
|
||||
x[:, i] = x[:, i] * std[i] + mean[i]
|
||||
return x
|
||||
|
||||
|
||||
def augment_hsv(im, hgain=0.5, sgain=0.5, vgain=0.5):
|
||||
# HSV color-space augmentation
|
||||
"""HSV color-space augmentation."""
|
||||
if hgain or sgain or vgain:
|
||||
r = np.random.uniform(-1, 1, 3) * [hgain, sgain, vgain] + 1 # random gains
|
||||
hue, sat, val = cv2.split(cv2.cvtColor(im, cv2.COLOR_BGR2HSV))
|
||||
@ -83,7 +83,7 @@ def augment_hsv(im, hgain=0.5, sgain=0.5, vgain=0.5):
|
||||
|
||||
|
||||
def hist_equalize(im, clahe=True, bgr=False):
|
||||
# Equalize histogram on BGR image 'im' with im.shape(n,m,3) and range 0-255
|
||||
"""Equalize histogram on BGR image 'im' with im.shape(n,m,3) and range 0-255."""
|
||||
yuv = cv2.cvtColor(im, cv2.COLOR_BGR2YUV if bgr else cv2.COLOR_RGB2YUV)
|
||||
if clahe:
|
||||
c = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
|
||||
@ -94,7 +94,7 @@ def hist_equalize(im, clahe=True, bgr=False):
|
||||
|
||||
|
||||
def replicate(im, labels):
|
||||
# Replicate labels
|
||||
"""Replicate labels."""
|
||||
h, w = im.shape[:2]
|
||||
boxes = labels[:, 1:].astype(int)
|
||||
x1, y1, x2, y2 = boxes.T
|
||||
@ -213,7 +213,7 @@ def random_perspective(im,
|
||||
xy = xy @ M.T # transform
|
||||
xy = xy[:, :2] / xy[:, 2:3] if perspective else xy[:, :2] # perspective rescale or affine
|
||||
|
||||
# clip
|
||||
# Clip
|
||||
new[i] = segment2box(xy, width, height)
|
||||
|
||||
else: # warp boxes
|
||||
@ -222,16 +222,16 @@ def random_perspective(im,
|
||||
xy = xy @ M.T # transform
|
||||
xy = (xy[:, :2] / xy[:, 2:3] if perspective else xy[:, :2]).reshape(n, 8) # perspective rescale or affine
|
||||
|
||||
# create new boxes
|
||||
# Create new boxes
|
||||
x = xy[:, [0, 2, 4, 6]]
|
||||
y = xy[:, [1, 3, 5, 7]]
|
||||
new = np.concatenate((x.min(1), y.min(1), x.max(1), y.max(1))).reshape(4, n).T
|
||||
|
||||
# clip
|
||||
# Clip
|
||||
new[:, [0, 2]] = new[:, [0, 2]].clip(0, width)
|
||||
new[:, [1, 3]] = new[:, [1, 3]].clip(0, height)
|
||||
|
||||
# filter candidates
|
||||
# Filter candidates
|
||||
i = box_candidates(box1=targets[:, 1:5].T * s, box2=new.T, area_thr=0.01 if use_segments else 0.10)
|
||||
targets = targets[i]
|
||||
targets[:, 1:5] = new[i]
|
||||
@ -240,13 +240,13 @@ def random_perspective(im,
|
||||
|
||||
|
||||
def copy_paste(im, labels, segments, p=0.5):
|
||||
# Implement Copy-Paste augmentation https://arxiv.org/abs/2012.07177, labels as nx5 np.array(cls, xyxy)
|
||||
"""Implement Copy-Paste augmentation https://arxiv.org/abs/2012.07177, labels as nx5 np.array(cls, xyxy)."""
|
||||
n = len(segments)
|
||||
if p and n:
|
||||
h, w, c = im.shape # height, width, channels
|
||||
im_new = np.zeros(im.shape, np.uint8)
|
||||
|
||||
# calculate ioa first then select indexes randomly
|
||||
# Calculate ioa first then select indexes randomly
|
||||
boxes = np.stack([w - labels[:, 3], labels[:, 2], w - labels[:, 1], labels[:, 4]], axis=-1) # (n, 4)
|
||||
ioa = bbox_ioa(boxes, labels[:, 1:5]) # intersection over area
|
||||
indexes = np.nonzero((ioa < 0.30).all(1))[0] # (N, )
|
||||
@ -265,7 +265,7 @@ def copy_paste(im, labels, segments, p=0.5):
|
||||
|
||||
|
||||
def cutout(im, labels, p=0.5):
|
||||
# Applies image cutout augmentation https://arxiv.org/abs/1708.04552
|
||||
"""Applies image cutout augmentation https://arxiv.org/abs/1708.04552."""
|
||||
if random.random() < p:
|
||||
h, w = im.shape[:2]
|
||||
scales = [0.5] * 1 + [0.25] * 2 + [0.125] * 4 + [0.0625] * 8 + [0.03125] * 16 # image size fraction
|
||||
@ -273,16 +273,16 @@ def cutout(im, labels, p=0.5):
|
||||
mask_h = random.randint(1, int(h * s)) # create random masks
|
||||
mask_w = random.randint(1, int(w * s))
|
||||
|
||||
# box
|
||||
# Box
|
||||
xmin = max(0, random.randint(0, w) - mask_w // 2)
|
||||
ymin = max(0, random.randint(0, h) - mask_h // 2)
|
||||
xmax = min(w, xmin + mask_w)
|
||||
ymax = min(h, ymin + mask_h)
|
||||
|
||||
# apply random color mask
|
||||
# Apply random color mask
|
||||
im[ymin:ymax, xmin:xmax] = [random.randint(64, 191) for _ in range(3)]
|
||||
|
||||
# return unobscured labels
|
||||
# Return unobscured labels
|
||||
if len(labels) and s > 0.03:
|
||||
box = np.array([[xmin, ymin, xmax, ymax]], dtype=np.float32)
|
||||
ioa = bbox_ioa(box, xywhn2xyxy(labels[:, 1:5], w, h))[0] # intersection over area
|
||||
@ -292,7 +292,7 @@ def cutout(im, labels, p=0.5):
|
||||
|
||||
|
||||
def mixup(im, labels, im2, labels2):
|
||||
# Applies MixUp augmentation https://arxiv.org/pdf/1710.09412.pdf
|
||||
"""Applies MixUp augmentation https://arxiv.org/pdf/1710.09412.pdf."""
|
||||
r = np.random.beta(32.0, 32.0) # mixup ratio, alpha=beta=32.0
|
||||
im = (im * r + im2 * (1 - r)).astype(np.uint8)
|
||||
labels = np.concatenate((labels, labels2), 0)
|
||||
@ -350,7 +350,7 @@ def classify_albumentations(
|
||||
|
||||
|
||||
def classify_transforms(size=224):
|
||||
# Transforms to apply if albumentations not installed
|
||||
"""Transforms to apply if albumentations not installed."""
|
||||
assert isinstance(size, int), f'ERROR: classify_transforms size {size} must be integer, not (list, tuple)'
|
||||
# T.Compose([T.ToTensor(), T.Resize(size), T.CenterCrop(size), T.Normalize(IMAGENET_MEAN, IMAGENET_STD)])
|
||||
return T.Compose([CenterCrop(size), ToTensor(), T.Normalize(IMAGENET_MEAN, IMAGENET_STD)])
|
||||
|
@ -50,7 +50,7 @@ for orientation in ExifTags.TAGS.keys():
|
||||
|
||||
|
||||
def get_hash(paths):
|
||||
# Returns a single hash value of a list of paths (files or dirs)
|
||||
"""Returns a single hash value of a list of paths (files or dirs)."""
|
||||
size = sum(os.path.getsize(p) for p in paths if os.path.exists(p)) # sizes
|
||||
h = hashlib.sha256(str(size).encode()) # hash sizes
|
||||
h.update(''.join(paths).encode()) # hash paths
|
||||
@ -58,7 +58,7 @@ def get_hash(paths):
|
||||
|
||||
|
||||
def exif_size(img):
|
||||
# Returns exif-corrected PIL size
|
||||
"""Returns exif-corrected PIL size."""
|
||||
s = img.size # (width, height)
|
||||
with contextlib.suppress(Exception):
|
||||
rotation = dict(img._getexif().items())[orientation]
|
||||
@ -94,7 +94,7 @@ def exif_transpose(image):
|
||||
|
||||
|
||||
def seed_worker(worker_id):
|
||||
# Set dataloader worker seed https://pytorch.org/docs/stable/notes/randomness.html#dataloader
|
||||
"""Set dataloader worker seed https://pytorch.org/docs/stable/notes/randomness.html#dataloader."""
|
||||
worker_seed = torch.initial_seed() % 2 ** 32
|
||||
np.random.seed(worker_seed)
|
||||
random.seed(worker_seed)
|
||||
@ -192,7 +192,7 @@ class _RepeatSampler:
|
||||
class LoadScreenshots:
|
||||
# YOLOv5 screenshot dataloader, i.e. `python detect.py --source "screen 0 100 100 512 256"`
|
||||
def __init__(self, source, img_size=640, stride=32, auto=True, transforms=None):
|
||||
# source = [screen_number left top width height] (pixels)
|
||||
"""source = [screen_number left top width height] (pixels)."""
|
||||
check_requirements('mss')
|
||||
import mss
|
||||
|
||||
@ -224,7 +224,7 @@ class LoadScreenshots:
|
||||
return self
|
||||
|
||||
def __next__(self):
|
||||
# mss screen capture: get raw pixels from the screen as np array
|
||||
"""mss screen capture: get raw pixels from the screen as np array."""
|
||||
im0 = np.array(self.sct.grab(self.monitor))[:, :, :3] # [:, :, :3] BGRA to BGR
|
||||
s = f'screen {self.screen} (LTWH): {self.left},{self.top},{self.width},{self.height}: '
|
||||
|
||||
@ -320,7 +320,7 @@ class LoadImages:
|
||||
return path, im, im0, self.cap, s
|
||||
|
||||
def _new_video(self, path):
|
||||
# Create a new video capture object
|
||||
"""Create a new video capture object."""
|
||||
self.frame = 0
|
||||
self.cap = cv2.VideoCapture(path)
|
||||
self.frames = int(self.cap.get(cv2.CAP_PROP_FRAME_COUNT) / self.vid_stride)
|
||||
@ -328,7 +328,7 @@ class LoadImages:
|
||||
# self.cap.set(cv2.CAP_PROP_ORIENTATION_AUTO, 0) # disable https://github.com/ultralytics/yolov5/issues/8493
|
||||
|
||||
def _cv2_rotate(self, im):
|
||||
# Rotate a cv2 video manually
|
||||
"""Rotate a cv2 video manually."""
|
||||
if self.orientation == 0:
|
||||
return cv2.rotate(im, cv2.ROTATE_90_CLOCKWISE)
|
||||
elif self.orientation == 180:
|
||||
@ -379,7 +379,7 @@ class LoadStreams:
|
||||
self.threads[i].start()
|
||||
LOGGER.info('') # newline
|
||||
|
||||
# check for common shapes
|
||||
# Check for common shapes
|
||||
s = np.stack([letterbox(x, img_size, stride=stride, auto=auto)[0].shape for x in self.imgs])
|
||||
self.rect = np.unique(s, axis=0).shape[0] == 1 # rect inference if all shapes equal
|
||||
self.auto = auto and self.rect
|
||||
@ -388,7 +388,7 @@ class LoadStreams:
|
||||
LOGGER.warning('WARNING ⚠️ Stream shapes differ. For optimal performance supply similarly-shaped streams.')
|
||||
|
||||
def update(self, i, cap, stream):
|
||||
# Read stream `i` frames in daemon thread
|
||||
"""Read stream `i` frames in daemon thread."""
|
||||
n, f = 0, self.frames[i] # frame number, frame array
|
||||
while cap.isOpened() and n < f:
|
||||
n += 1
|
||||
@ -428,13 +428,13 @@ class LoadStreams:
|
||||
|
||||
|
||||
def img2label_paths(img_paths):
|
||||
# Define label paths as a function of image paths
|
||||
"""Define label paths as a function of image paths."""
|
||||
sa, sb = f'{os.sep}images{os.sep}', f'{os.sep}labels{os.sep}' # /images/, /labels/ substrings
|
||||
return [sb.join(x.rsplit(sa, 1)).rsplit('.', 1)[0] + '.txt' for x in img_paths]
|
||||
|
||||
|
||||
class LoadImagesAndLabels(Dataset):
|
||||
# YOLOv5 train_loader/val_loader, loads images and labels for training and validation
|
||||
"""YOLOv5 train_loader/val_loader, loads images and labels for training and validation."""
|
||||
cache_version = 0.6 # dataset labels *.cache version
|
||||
rand_interp_methods = [cv2.INTER_NEAREST, cv2.INTER_LINEAR, cv2.INTER_CUBIC, cv2.INTER_AREA, cv2.INTER_LANCZOS4]
|
||||
|
||||
@ -590,7 +590,7 @@ class LoadImagesAndLabels(Dataset):
|
||||
pbar.close()
|
||||
|
||||
def check_cache_ram(self, safety_margin=0.1, prefix=''):
|
||||
# Check image caching requirements vs available memory
|
||||
"""Check image caching requirements vs available memory."""
|
||||
b, gb = 0, 1 << 30 # bytes of cached images, bytes per gigabytes
|
||||
n = min(self.n, 30) # extrapolate from 30 random images
|
||||
for _ in range(n):
|
||||
@ -648,12 +648,6 @@ class LoadImagesAndLabels(Dataset):
|
||||
def __len__(self):
|
||||
return len(self.im_files)
|
||||
|
||||
# def __iter__(self):
|
||||
# self.count = -1
|
||||
# print('ran dataset iter')
|
||||
# #self.shuffled_vector = np.random.permutation(self.nF) if self.augment else np.arange(self.nF)
|
||||
# return self
|
||||
|
||||
def __getitem__(self, index):
|
||||
index = self.indices[index] # linear, shuffled, or image_weights
|
||||
|
||||
@ -729,7 +723,7 @@ class LoadImagesAndLabels(Dataset):
|
||||
return torch.from_numpy(img), labels_out, self.im_files[index], shapes
|
||||
|
||||
def load_image(self, i):
|
||||
# Loads 1 image from dataset index 'i', returns (im, original hw, resized hw)
|
||||
"""Loads 1 image from dataset index 'i', returns (im, original hw, resized hw)."""
|
||||
im, f, fn = self.ims[i], self.im_files[i], self.npy_files[i],
|
||||
if im is None: # not cached in RAM
|
||||
if fn.exists(): # load npy
|
||||
@ -746,13 +740,13 @@ class LoadImagesAndLabels(Dataset):
|
||||
return self.ims[i], self.im_hw0[i], self.im_hw[i] # im, hw_original, hw_resized
|
||||
|
||||
def cache_images_to_disk(self, i):
|
||||
# Saves an image as an *.npy file for faster loading
|
||||
"""Saves an image as an *.npy file for faster loading."""
|
||||
f = self.npy_files[i]
|
||||
if not f.exists():
|
||||
np.save(f.as_posix(), cv2.imread(self.im_files[i]))
|
||||
|
||||
def load_mosaic(self, index):
|
||||
# YOLOv5 4-mosaic loader. Loads 1 image + 3 random images into a 4-image mosaic
|
||||
"""YOLOv5 4-mosaic loader. Loads 1 image + 3 random images into a 4-image mosaic."""
|
||||
labels4, segments4 = [], []
|
||||
s = self.img_size
|
||||
yc, xc = (int(random.uniform(-x, 2 * s + x)) for x in self.mosaic_border) # mosaic center x, y
|
||||
@ -762,7 +756,7 @@ class LoadImagesAndLabels(Dataset):
|
||||
# Load image
|
||||
img, _, (h, w) = self.load_image(index)
|
||||
|
||||
# place img in img4
|
||||
# Place img in img4
|
||||
if i == 0: # top left
|
||||
img4 = np.full((s * 2, s * 2, img.shape[2]), 114, dtype=np.uint8) # base image with 4 tiles
|
||||
x1a, y1a, x2a, y2a = max(xc - w, 0), max(yc - h, 0), xc, yc # xmin, ymin, xmax, ymax (large image)
|
||||
@ -810,7 +804,7 @@ class LoadImagesAndLabels(Dataset):
|
||||
return img4, labels4
|
||||
|
||||
def load_mosaic9(self, index):
|
||||
# YOLOv5 9-mosaic loader. Loads 1 image + 8 random images into a 9-image mosaic
|
||||
"""YOLOv5 9-mosaic loader. Loads 1 image + 8 random images into a 9-image mosaic."""
|
||||
labels9, segments9 = [], []
|
||||
s = self.img_size
|
||||
indices = [index] + random.choices(self.indices, k=8) # 8 additional image indices
|
||||
@ -820,7 +814,7 @@ class LoadImagesAndLabels(Dataset):
|
||||
# Load image
|
||||
img, _, (h, w) = self.load_image(index)
|
||||
|
||||
# place img in img9
|
||||
# Place img in img9
|
||||
if i == 0: # center
|
||||
img9 = np.full((s * 3, s * 3, img.shape[2]), 114, dtype=np.uint8) # base image with 4 tiles
|
||||
h0, w0 = h, w
|
||||
@ -888,7 +882,7 @@ class LoadImagesAndLabels(Dataset):
|
||||
|
||||
@staticmethod
|
||||
def collate_fn(batch):
|
||||
# YOLOv8 collate function, outputs dict
|
||||
"""YOLOv8 collate function, outputs dict."""
|
||||
im, label, path, shapes = zip(*batch) # transposed
|
||||
for i, lb in enumerate(label):
|
||||
lb[:, 0] = i # add target image index for build_targets()
|
||||
@ -904,7 +898,7 @@ class LoadImagesAndLabels(Dataset):
|
||||
|
||||
@staticmethod
|
||||
def collate_fn_old(batch):
|
||||
# YOLOv5 original collate function
|
||||
"""YOLOv5 original collate function."""
|
||||
im, label, path, shapes = zip(*batch) # transposed
|
||||
for i, lb in enumerate(label):
|
||||
lb[:, 0] = i # add target image index for build_targets()
|
||||
@ -913,7 +907,7 @@ class LoadImagesAndLabels(Dataset):
|
||||
|
||||
# Ancillary functions --------------------------------------------------------------------------------------------------
|
||||
def flatten_recursive(path=DATASETS_DIR / 'coco128'):
|
||||
# Flatten a recursive directory by bringing all files to top level
|
||||
"""Flatten a recursive directory by bringing all files to top level."""
|
||||
new_path = Path(f'{str(path)}_flat')
|
||||
if os.path.exists(new_path):
|
||||
shutil.rmtree(new_path) # delete output folder
|
||||
@ -930,11 +924,11 @@ def extract_boxes(path=DATASETS_DIR / 'coco128'): # from utils.dataloaders impo
|
||||
n = len(files) # number of files
|
||||
for im_file in tqdm(files, total=n):
|
||||
if im_file.suffix[1:] in IMG_FORMATS:
|
||||
# image
|
||||
# Image
|
||||
im = cv2.imread(str(im_file))[..., ::-1] # BGR to RGB
|
||||
h, w = im.shape[:2]
|
||||
|
||||
# labels
|
||||
# Labels
|
||||
lb_file = Path(img2label_paths([str(im_file)])[0])
|
||||
if Path(lb_file).exists():
|
||||
with open(lb_file) as f:
|
||||
@ -947,7 +941,7 @@ def extract_boxes(path=DATASETS_DIR / 'coco128'): # from utils.dataloaders impo
|
||||
f.parent.mkdir(parents=True)
|
||||
|
||||
b = x[1:] * [w, h, w, h] # box
|
||||
# b[2:] = b[2:].max() # rectangle to square
|
||||
# B[2:] = b[2:].max() # rectangle to square
|
||||
b[2:] = b[2:] * 1.2 + 3 # pad
|
||||
b = xywh2xyxy(b.reshape(-1, 4)).ravel().astype(int)
|
||||
|
||||
@ -983,11 +977,11 @@ def autosplit(path=DATASETS_DIR / 'coco128/images', weights=(0.9, 0.1, 0.0), ann
|
||||
|
||||
|
||||
def verify_image_label(args):
|
||||
# Verify one image-label pair
|
||||
"""Verify one image-label pair."""
|
||||
im_file, lb_file, prefix = args
|
||||
nm, nf, ne, nc, msg, segments = 0, 0, 0, 0, '', [] # number (missing, found, empty, corrupt), message, segments
|
||||
try:
|
||||
# verify images
|
||||
# Verify images
|
||||
im = Image.open(im_file)
|
||||
im.verify() # PIL verify
|
||||
shape = exif_size(im) # image size
|
||||
@ -1000,7 +994,7 @@ def verify_image_label(args):
|
||||
ImageOps.exif_transpose(Image.open(im_file)).save(im_file, 'JPEG', subsampling=0, quality=100)
|
||||
msg = f'{prefix}WARNING ⚠️ {im_file}: corrupt JPEG restored and saved'
|
||||
|
||||
# verify labels
|
||||
# Verify labels
|
||||
if os.path.isfile(lb_file):
|
||||
nf = 1 # label found
|
||||
with open(lb_file) as f:
|
||||
@ -1077,7 +1071,7 @@ def create_classification_dataloader(path,
|
||||
rank=-1,
|
||||
workers=8,
|
||||
shuffle=True):
|
||||
# Returns Dataloader object to be used with YOLOv5 Classifier
|
||||
"""Returns Dataloader object to be used with YOLOv5 Classifier."""
|
||||
with torch_distributed_zero_first(rank): # init dataset *.cache only once if DDP
|
||||
dataset = ClassificationDataset(root=path, imgsz=imgsz, augment=augment, cache=cache)
|
||||
batch_size = min(batch_size, len(dataset))
|
||||
|
@ -193,7 +193,7 @@ class YOLODataset(BaseDataset):
|
||||
self.transforms = self.build_transforms(hyp)
|
||||
|
||||
def update_labels_info(self, label):
|
||||
"""custom your label format here"""
|
||||
"""custom your label format here."""
|
||||
# NOTE: cls is not with bboxes now, classification and semantic segmentation need an independent cls label
|
||||
# we can make it also support classification and semantic segmentation by add or remove some dict keys there.
|
||||
bboxes = label.pop('bboxes')
|
||||
|
@ -39,7 +39,7 @@ class MixAndRectDataset:
|
||||
"""
|
||||
labels = deepcopy(self.dataset[index])
|
||||
for transform in self.dataset.transforms.tolist():
|
||||
# mosaic and mixup
|
||||
# Mosaic and mixup
|
||||
if hasattr(transform, 'get_indexes'):
|
||||
indexes = transform.get_indexes(self.dataset)
|
||||
if not isinstance(indexes, collections.abc.Sequence):
|
||||
|
@ -37,13 +37,13 @@ for orientation in ExifTags.TAGS.keys():
|
||||
|
||||
|
||||
def img2label_paths(img_paths):
|
||||
# Define label paths as a function of image paths
|
||||
"""Define label paths as a function of image paths."""
|
||||
sa, sb = f'{os.sep}images{os.sep}', f'{os.sep}labels{os.sep}' # /images/, /labels/ substrings
|
||||
return [sb.join(x.rsplit(sa, 1)).rsplit('.', 1)[0] + '.txt' for x in img_paths]
|
||||
|
||||
|
||||
def get_hash(paths):
|
||||
# Returns a single hash value of a list of paths (files or dirs)
|
||||
"""Returns a single hash value of a list of paths (files or dirs)."""
|
||||
size = sum(os.path.getsize(p) for p in paths if os.path.exists(p)) # sizes
|
||||
h = hashlib.sha256(str(size).encode()) # hash sizes
|
||||
h.update(''.join(paths).encode()) # hash paths
|
||||
@ -51,7 +51,7 @@ def get_hash(paths):
|
||||
|
||||
|
||||
def exif_size(img):
|
||||
# Returns exif-corrected PIL size
|
||||
"""Returns exif-corrected PIL size."""
|
||||
s = img.size # (width, height)
|
||||
with contextlib.suppress(Exception):
|
||||
rotation = dict(img._getexif().items())[orientation]
|
||||
@ -61,12 +61,12 @@ def exif_size(img):
|
||||
|
||||
|
||||
def verify_image_label(args):
|
||||
# Verify one image-label pair
|
||||
"""Verify one image-label pair."""
|
||||
im_file, lb_file, prefix, keypoint, num_cls, nkpt, ndim = args
|
||||
# number (missing, found, empty, corrupt), message, segments, keypoints
|
||||
# Number (missing, found, empty, corrupt), message, segments, keypoints
|
||||
nm, nf, ne, nc, msg, segments, keypoints = 0, 0, 0, 0, '', [], None
|
||||
try:
|
||||
# verify images
|
||||
# Verify images
|
||||
im = Image.open(im_file)
|
||||
im.verify() # PIL verify
|
||||
shape = exif_size(im) # image size
|
||||
@ -80,7 +80,7 @@ def verify_image_label(args):
|
||||
ImageOps.exif_transpose(Image.open(im_file)).save(im_file, 'JPEG', subsampling=0, quality=100)
|
||||
msg = f'{prefix}WARNING ⚠️ {im_file}: corrupt JPEG restored and saved'
|
||||
|
||||
# verify labels
|
||||
# Verify labels
|
||||
if os.path.isfile(lb_file):
|
||||
nf = 1 # label found
|
||||
with open(lb_file) as f:
|
||||
@ -191,7 +191,7 @@ def polygons2masks_overlap(imgsz, segments, downsample_ratio=1):
|
||||
|
||||
|
||||
def check_det_dataset(dataset, autodownload=True):
|
||||
# Download, check and/or unzip dataset if not found locally
|
||||
"""Download, check and/or unzip dataset if not found locally."""
|
||||
data = check_file(dataset)
|
||||
|
||||
# Download (optional)
|
||||
@ -321,7 +321,7 @@ class HUBDatasetStats():
|
||||
"""
|
||||
|
||||
def __init__(self, path='coco128.yaml', autodownload=False):
|
||||
# Initialize class
|
||||
"""Initialize class."""
|
||||
zipped, data_dir, yaml_path = self._unzip(Path(path))
|
||||
try:
|
||||
# data = yaml_load(check_yaml(yaml_path)) # data dict
|
||||
@ -339,7 +339,7 @@ class HUBDatasetStats():
|
||||
|
||||
@staticmethod
|
||||
def _find_yaml(dir):
|
||||
# Return data.yaml file
|
||||
"""Return data.yaml file."""
|
||||
files = list(dir.glob('*.yaml')) or list(dir.rglob('*.yaml')) # try root level first and then recursive
|
||||
assert files, f'No *.yaml file found in {dir}'
|
||||
if len(files) > 1:
|
||||
@ -349,7 +349,7 @@ class HUBDatasetStats():
|
||||
return files[0]
|
||||
|
||||
def _unzip(self, path):
|
||||
# Unzip data.zip
|
||||
"""Unzip data.zip."""
|
||||
if not str(path).endswith('.zip'): # path is data.yaml
|
||||
return False, None, path
|
||||
assert Path(path).is_file(), f'Error unzipping {path}, file not found'
|
||||
@ -362,12 +362,12 @@ class HUBDatasetStats():
|
||||
compress_one_image(f, self.im_dir / Path(f).name) # save to dataset-hub
|
||||
|
||||
def get_json(self, save=False, verbose=False):
|
||||
# Return dataset JSON for Ultralytics HUB
|
||||
"""Return dataset JSON for Ultralytics HUB."""
|
||||
# from ultralytics.yolo.data import YOLODataset
|
||||
from ultralytics.yolo.data.dataloaders.v5loader import LoadImagesAndLabels
|
||||
|
||||
def _round(labels):
|
||||
# Update labels to integer class and 6 decimal place floats
|
||||
"""Update labels to integer class and 6 decimal place floats."""
|
||||
return [[int(c), *(round(x, 4) for x in points)] for c, *points in labels]
|
||||
|
||||
for split in 'train', 'val', 'test':
|
||||
@ -400,7 +400,7 @@ class HUBDatasetStats():
|
||||
return self.stats
|
||||
|
||||
def process_images(self):
|
||||
# Compress images for Ultralytics HUB
|
||||
"""Compress images for Ultralytics HUB."""
|
||||
# from ultralytics.yolo.data import YOLODataset
|
||||
from ultralytics.yolo.data.dataloaders.v5loader import LoadImagesAndLabels
|
||||
|
||||
|
@ -73,7 +73,7 @@ ARM64 = platform.machine() in ('arm64', 'aarch64')
|
||||
|
||||
|
||||
def export_formats():
|
||||
"""YOLOv8 export formats"""
|
||||
"""YOLOv8 export formats."""
|
||||
import pandas
|
||||
x = [
|
||||
['PyTorch', '-', '.pt', True, True],
|
||||
@ -92,7 +92,7 @@ def export_formats():
|
||||
|
||||
|
||||
def gd_outputs(gd):
|
||||
"""TensorFlow GraphDef model output node names"""
|
||||
"""TensorFlow GraphDef model output node names."""
|
||||
name_list, input_list = [], []
|
||||
for node in gd.node: # tensorflow.core.framework.node_def_pb2.NodeDef
|
||||
name_list.append(node.name)
|
||||
@ -101,7 +101,7 @@ def gd_outputs(gd):
|
||||
|
||||
|
||||
def try_export(inner_func):
|
||||
"""YOLOv8 export decorator, i..e @try_export"""
|
||||
"""YOLOv8 export decorator, i..e @try_export."""
|
||||
inner_args = get_default_args(inner_func)
|
||||
|
||||
def outer_func(*args, **kwargs):
|
||||
@ -119,7 +119,7 @@ def try_export(inner_func):
|
||||
|
||||
|
||||
class iOSDetectModel(torch.nn.Module):
|
||||
"""Wrap an Ultralytics YOLO model for iOS export"""
|
||||
"""Wrap an Ultralytics YOLO model for iOS export."""
|
||||
|
||||
def __init__(self, model, im):
|
||||
super().__init__()
|
||||
@ -246,28 +246,28 @@ class Exporter:
|
||||
# Exports
|
||||
f = [''] * len(fmts) # exported filenames
|
||||
if jit: # TorchScript
|
||||
f[0], _ = self._export_torchscript()
|
||||
f[0], _ = self.export_torchscript()
|
||||
if engine: # TensorRT required before ONNX
|
||||
f[1], _ = self._export_engine()
|
||||
f[1], _ = self.export_engine()
|
||||
if onnx or xml: # OpenVINO requires ONNX
|
||||
f[2], _ = self._export_onnx()
|
||||
f[2], _ = self.export_onnx()
|
||||
if xml: # OpenVINO
|
||||
f[3], _ = self._export_openvino()
|
||||
f[3], _ = self.export_openvino()
|
||||
if coreml: # CoreML
|
||||
f[4], _ = self._export_coreml()
|
||||
f[4], _ = self.export_coreml()
|
||||
if any((saved_model, pb, tflite, edgetpu, tfjs)): # TensorFlow formats
|
||||
self.args.int8 |= edgetpu
|
||||
f[5], s_model = self._export_saved_model()
|
||||
f[5], s_model = self.export_saved_model()
|
||||
if pb or tfjs: # pb prerequisite to tfjs
|
||||
f[6], _ = self._export_pb(s_model)
|
||||
f[6], _ = self.export_pb(s_model)
|
||||
if tflite:
|
||||
f[7], _ = self._export_tflite(s_model, nms=False, agnostic_nms=self.args.agnostic_nms)
|
||||
f[7], _ = self.export_tflite(s_model, nms=False, agnostic_nms=self.args.agnostic_nms)
|
||||
if edgetpu:
|
||||
f[8], _ = self._export_edgetpu(tflite_model=Path(f[5]) / f'{self.file.stem}_full_integer_quant.tflite')
|
||||
f[8], _ = self.export_edgetpu(tflite_model=Path(f[5]) / f'{self.file.stem}_full_integer_quant.tflite')
|
||||
if tfjs:
|
||||
f[9], _ = self._export_tfjs()
|
||||
f[9], _ = self.export_tfjs()
|
||||
if paddle: # PaddlePaddle
|
||||
f[10], _ = self._export_paddle()
|
||||
f[10], _ = self.export_paddle()
|
||||
|
||||
# Finish
|
||||
f = [str(x) for x in f if x] # filter out '' and None
|
||||
@ -289,8 +289,8 @@ class Exporter:
|
||||
return f # return list of exported files/dirs
|
||||
|
||||
@try_export
|
||||
def _export_torchscript(self, prefix=colorstr('TorchScript:')):
|
||||
# YOLOv8 TorchScript model export
|
||||
def export_torchscript(self, prefix=colorstr('TorchScript:')):
|
||||
"""YOLOv8 TorchScript model export."""
|
||||
LOGGER.info(f'\n{prefix} starting export with torch {torch.__version__}...')
|
||||
f = self.file.with_suffix('.torchscript')
|
||||
|
||||
@ -305,8 +305,8 @@ class Exporter:
|
||||
return f, None
|
||||
|
||||
@try_export
|
||||
def _export_onnx(self, prefix=colorstr('ONNX:')):
|
||||
# YOLOv8 ONNX export
|
||||
def export_onnx(self, prefix=colorstr('ONNX:')):
|
||||
"""YOLOv8 ONNX export."""
|
||||
requirements = ['onnx>=1.12.0']
|
||||
if self.args.simplify:
|
||||
requirements += ['onnxsim>=0.4.17', 'onnxruntime-gpu' if torch.cuda.is_available() else 'onnxruntime']
|
||||
@ -363,8 +363,8 @@ class Exporter:
|
||||
return f, model_onnx
|
||||
|
||||
@try_export
|
||||
def _export_openvino(self, prefix=colorstr('OpenVINO:')):
|
||||
# YOLOv8 OpenVINO export
|
||||
def export_openvino(self, prefix=colorstr('OpenVINO:')):
|
||||
"""YOLOv8 OpenVINO export."""
|
||||
check_requirements('openvino-dev>=2022.3') # requires openvino-dev: https://pypi.org/project/openvino-dev/
|
||||
import openvino.runtime as ov # noqa
|
||||
from openvino.tools import mo # noqa
|
||||
@ -383,8 +383,8 @@ class Exporter:
|
||||
return f, None
|
||||
|
||||
@try_export
|
||||
def _export_paddle(self, prefix=colorstr('PaddlePaddle:')):
|
||||
# YOLOv8 Paddle export
|
||||
def export_paddle(self, prefix=colorstr('PaddlePaddle:')):
|
||||
"""YOLOv8 Paddle export."""
|
||||
check_requirements(('paddlepaddle', 'x2paddle'))
|
||||
import x2paddle # noqa
|
||||
from x2paddle.convert import pytorch2paddle # noqa
|
||||
@ -397,8 +397,8 @@ class Exporter:
|
||||
return f, None
|
||||
|
||||
@try_export
|
||||
def _export_coreml(self, prefix=colorstr('CoreML:')):
|
||||
# YOLOv8 CoreML export
|
||||
def export_coreml(self, prefix=colorstr('CoreML:')):
|
||||
"""YOLOv8 CoreML export."""
|
||||
check_requirements('coremltools>=6.0')
|
||||
import coremltools as ct # noqa
|
||||
|
||||
@ -439,8 +439,8 @@ class Exporter:
|
||||
return f, ct_model
|
||||
|
||||
@try_export
|
||||
def _export_engine(self, workspace=4, verbose=False, prefix=colorstr('TensorRT:')):
|
||||
# YOLOv8 TensorRT export https://developer.nvidia.com/tensorrt
|
||||
def export_engine(self, workspace=4, verbose=False, prefix=colorstr('TensorRT:')):
|
||||
"""YOLOv8 TensorRT export https://developer.nvidia.com/tensorrt."""
|
||||
assert self.im.device.type != 'cpu', "export running on CPU but must be on GPU, i.e. use 'device=0'"
|
||||
try:
|
||||
import tensorrt as trt # noqa
|
||||
@ -451,7 +451,7 @@ class Exporter:
|
||||
|
||||
check_version(trt.__version__, '7.0.0', hard=True) # require tensorrt>=8.0.0
|
||||
self.args.simplify = True
|
||||
f_onnx, _ = self._export_onnx()
|
||||
f_onnx, _ = self.export_onnx()
|
||||
|
||||
LOGGER.info(f'\n{prefix} starting export with TensorRT {trt.__version__}...')
|
||||
assert Path(f_onnx).exists(), f'failed to export ONNX file: {f_onnx}'
|
||||
@ -504,9 +504,8 @@ class Exporter:
|
||||
return f, None
|
||||
|
||||
@try_export
|
||||
def _export_saved_model(self, prefix=colorstr('TensorFlow SavedModel:')):
|
||||
|
||||
# YOLOv8 TensorFlow SavedModel export
|
||||
def export_saved_model(self, prefix=colorstr('TensorFlow SavedModel:')):
|
||||
"""YOLOv8 TensorFlow SavedModel export."""
|
||||
try:
|
||||
import tensorflow as tf # noqa
|
||||
except ImportError:
|
||||
@ -525,7 +524,7 @@ class Exporter:
|
||||
|
||||
# Export to ONNX
|
||||
self.args.simplify = True
|
||||
f_onnx, _ = self._export_onnx()
|
||||
f_onnx, _ = self.export_onnx()
|
||||
|
||||
# Export to TF
|
||||
int8 = '-oiqt -qt per-tensor' if self.args.int8 else ''
|
||||
@ -551,8 +550,8 @@ class Exporter:
|
||||
return str(f), keras_model
|
||||
|
||||
@try_export
|
||||
def _export_pb(self, keras_model, prefix=colorstr('TensorFlow GraphDef:')):
|
||||
# YOLOv8 TensorFlow GraphDef *.pb export https://github.com/leimao/Frozen_Graph_TensorFlow
|
||||
def export_pb(self, keras_model, prefix=colorstr('TensorFlow GraphDef:')):
|
||||
"""YOLOv8 TensorFlow GraphDef *.pb export https://github.com/leimao/Frozen_Graph_TensorFlow."""
|
||||
import tensorflow as tf # noqa
|
||||
from tensorflow.python.framework.convert_to_constants import convert_variables_to_constants_v2 # noqa
|
||||
|
||||
@ -567,8 +566,8 @@ class Exporter:
|
||||
return f, None
|
||||
|
||||
@try_export
|
||||
def _export_tflite(self, keras_model, nms, agnostic_nms, prefix=colorstr('TensorFlow Lite:')):
|
||||
# YOLOv8 TensorFlow Lite export
|
||||
def export_tflite(self, keras_model, nms, agnostic_nms, prefix=colorstr('TensorFlow Lite:')):
|
||||
"""YOLOv8 TensorFlow Lite export."""
|
||||
import tensorflow as tf # noqa
|
||||
|
||||
LOGGER.info(f'\n{prefix} starting export with tensorflow {tf.__version__}...')
|
||||
@ -581,44 +580,9 @@ class Exporter:
|
||||
f = saved_model / f'{self.file.stem}_float32.tflite'
|
||||
return str(f), None
|
||||
|
||||
# # OLD TFLITE EXPORT CODE BELOW -------------------------------------------------------------------------------
|
||||
# batch_size, ch, *imgsz = list(self.im.shape) # BCHW
|
||||
# f = str(self.file).replace(self.file.suffix, '-fp16.tflite')
|
||||
#
|
||||
# converter = tf.lite.TFLiteConverter.from_keras_model(keras_model)
|
||||
# converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS]
|
||||
# converter.target_spec.supported_types = [tf.float16]
|
||||
# converter.optimizations = [tf.lite.Optimize.DEFAULT]
|
||||
# if self.args.int8:
|
||||
#
|
||||
# def representative_dataset_gen(dataset, n_images=100):
|
||||
# # Dataset generator for use with converter.representative_dataset, returns a generator of np arrays
|
||||
# for n, (path, img, im0s, vid_cap, string) in enumerate(dataset):
|
||||
# im = np.transpose(img, [1, 2, 0])
|
||||
# im = np.expand_dims(im, axis=0).astype(np.float32)
|
||||
# im /= 255
|
||||
# yield [im]
|
||||
# if n >= n_images:
|
||||
# break
|
||||
#
|
||||
# dataset = LoadImages(check_det_dataset(self.args.data)['train'], imgsz=imgsz, auto=False)
|
||||
# converter.representative_dataset = lambda: representative_dataset_gen(dataset, n_images=100)
|
||||
# converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS_INT8]
|
||||
# converter.target_spec.supported_types = []
|
||||
# converter.inference_input_type = tf.uint8 # or tf.int8
|
||||
# converter.inference_output_type = tf.uint8 # or tf.int8
|
||||
# converter.experimental_new_quantizer = True
|
||||
# f = str(self.file).replace(self.file.suffix, '-int8.tflite')
|
||||
# if nms or agnostic_nms:
|
||||
# converter.target_spec.supported_ops.append(tf.lite.OpsSet.SELECT_TF_OPS)
|
||||
#
|
||||
# tflite_model = converter.convert()
|
||||
# open(f, 'wb').write(tflite_model)
|
||||
# return f, None
|
||||
|
||||
@try_export
|
||||
def _export_edgetpu(self, tflite_model='', prefix=colorstr('Edge TPU:')):
|
||||
# YOLOv8 Edge TPU export https://coral.ai/docs/edgetpu/models-intro/
|
||||
def export_edgetpu(self, tflite_model='', prefix=colorstr('Edge TPU:')):
|
||||
"""YOLOv8 Edge TPU export https://coral.ai/docs/edgetpu/models-intro/."""
|
||||
LOGGER.warning(f'{prefix} WARNING ⚠️ Edge TPU known bug https://github.com/ultralytics/ultralytics/issues/1185')
|
||||
|
||||
cmd = 'edgetpu_compiler --version'
|
||||
@ -644,8 +608,8 @@ class Exporter:
|
||||
return f, None
|
||||
|
||||
@try_export
|
||||
def _export_tfjs(self, prefix=colorstr('TensorFlow.js:')):
|
||||
# YOLOv8 TensorFlow.js export
|
||||
def export_tfjs(self, prefix=colorstr('TensorFlow.js:')):
|
||||
"""YOLOv8 TensorFlow.js export."""
|
||||
check_requirements('tensorflowjs')
|
||||
import tensorflow as tf
|
||||
import tensorflowjs as tfjs # noqa
|
||||
@ -681,7 +645,7 @@ class Exporter:
|
||||
return f, None
|
||||
|
||||
def _add_tflite_metadata(self, file):
|
||||
# Add metadata to *.tflite models per https://www.tensorflow.org/lite/models/convert/metadata
|
||||
"""Add metadata to *.tflite models per https://www.tensorflow.org/lite/models/convert/metadata."""
|
||||
from tflite_support import flatbuffers # noqa
|
||||
from tflite_support import metadata as _metadata # noqa
|
||||
from tflite_support import metadata_schema_py_generated as _metadata_fb # noqa
|
||||
|
@ -35,6 +35,7 @@ class YOLO:
|
||||
|
||||
Args:
|
||||
model (str, Path): Path to the model file to load or create.
|
||||
task (Any, optional): Task type for the YOLO model. Defaults to None.
|
||||
|
||||
Attributes:
|
||||
predictor (Any): The predictor object.
|
||||
@ -76,7 +77,6 @@ class YOLO:
|
||||
Args:
|
||||
model (Union[str, Path], optional): Path or name of the model to load or create. Defaults to 'yolov8n.pt'.
|
||||
task (Any, optional): Task type for the YOLO model. Defaults to None.
|
||||
|
||||
"""
|
||||
self.callbacks = callbacks.get_default_callbacks()
|
||||
self.predictor = None # reuse predictor
|
||||
@ -365,7 +365,7 @@ class YOLO:
|
||||
self.model = self.trainer.model
|
||||
self.trainer.hub_session = self.session # attach optional HUB session
|
||||
self.trainer.train()
|
||||
# update model and cfg after training
|
||||
# Update model and cfg after training
|
||||
if RANK in (-1, 0):
|
||||
self.model, _ = attempt_load_one_weight(str(self.trainer.best))
|
||||
self.overrides = self.model.args
|
||||
|
@ -134,7 +134,7 @@ class BasePredictor:
|
||||
if not self.args.retina_masks:
|
||||
plot_args['im_gpu'] = im[idx]
|
||||
self.plotted_img = result.plot(**plot_args)
|
||||
# write
|
||||
# Write
|
||||
if self.args.save_txt:
|
||||
result.save_txt(f'{self.txt_path}.txt', save_conf=self.args.save_conf)
|
||||
if self.args.save_crop:
|
||||
@ -153,7 +153,7 @@ class BasePredictor:
|
||||
return list(self.stream_inference(source, model)) # merge list of Result into one
|
||||
|
||||
def predict_cli(self, source=None, model=None):
|
||||
# Method used for CLI prediction. It uses always generator as outputs as not required by CLI mode
|
||||
"""Method used for CLI prediction. It uses always generator as outputs as not required by CLI mode."""
|
||||
gen = self.stream_inference(source, model)
|
||||
for _ in gen: # running CLI inference without accumulating any outputs (do not modify)
|
||||
pass
|
||||
@ -182,16 +182,16 @@ class BasePredictor:
|
||||
if self.args.verbose:
|
||||
LOGGER.info('')
|
||||
|
||||
# setup model
|
||||
# Setup model
|
||||
if not self.model:
|
||||
self.setup_model(model)
|
||||
# setup source every time predict is called
|
||||
# Setup source every time predict is called
|
||||
self.setup_source(source if source is not None else self.args.source)
|
||||
|
||||
# check if save_dir/ label file exists
|
||||
# Check if save_dir/ label file exists
|
||||
if self.args.save or self.args.save_txt:
|
||||
(self.save_dir / 'labels' if self.args.save_txt else self.save_dir).mkdir(parents=True, exist_ok=True)
|
||||
# warmup model
|
||||
# Warmup model
|
||||
if not self.done_warmup:
|
||||
self.model.warmup(imgsz=(1 if self.model.pt or self.model.triton else self.dataset.bs, 3, *self.imgsz))
|
||||
self.done_warmup = True
|
||||
@ -204,22 +204,22 @@ class BasePredictor:
|
||||
path, im, im0s, vid_cap, s = batch
|
||||
visualize = increment_path(self.save_dir / Path(path).stem, mkdir=True) if self.args.visualize else False
|
||||
|
||||
# preprocess
|
||||
# Preprocess
|
||||
with self.dt[0]:
|
||||
im = self.preprocess(im)
|
||||
if len(im.shape) == 3:
|
||||
im = im[None] # expand for batch dim
|
||||
|
||||
# inference
|
||||
# Inference
|
||||
with self.dt[1]:
|
||||
preds = self.model(im, augment=self.args.augment, visualize=visualize)
|
||||
|
||||
# postprocess
|
||||
# Postprocess
|
||||
with self.dt[2]:
|
||||
self.results = self.postprocess(preds, im, im0s)
|
||||
self.run_callbacks('on_predict_postprocess_end')
|
||||
|
||||
# visualize, save, write results
|
||||
# Visualize, save, write results
|
||||
n = len(im)
|
||||
for i in range(n):
|
||||
self.results[i].speed = {
|
||||
@ -288,7 +288,7 @@ class BasePredictor:
|
||||
|
||||
def save_preds(self, vid_cap, idx, save_path):
|
||||
im0 = self.plotted_img
|
||||
# save imgs
|
||||
# Save imgs
|
||||
if self.dataset.mode == 'image':
|
||||
cv2.imwrite(save_path, im0)
|
||||
else: # 'video' or 'stream'
|
||||
|
@ -262,12 +262,12 @@ class Results(SimpleClass):
|
||||
kpts = self.keypoints
|
||||
texts = []
|
||||
if probs is not None:
|
||||
# classify
|
||||
# Classify
|
||||
n5 = min(len(self.names), 5)
|
||||
top5i = probs.argsort(0, descending=True)[:n5].tolist() # top 5 indices
|
||||
[texts.append(f'{probs[j]:.2f} {self.names[j]}') for j in top5i]
|
||||
elif boxes:
|
||||
# detect/segment/pose
|
||||
# Detect/segment/pose
|
||||
for j, d in enumerate(boxes):
|
||||
c, conf, id = int(d.cls), float(d.conf), None if d.id is None else int(d.id.item())
|
||||
line = (c, *d.xywhn.view(-1))
|
||||
@ -418,7 +418,7 @@ class Masks(BaseTensor):
|
||||
@property
|
||||
@lru_cache(maxsize=1)
|
||||
def segments(self):
|
||||
# Segments-deprecated (normalized)
|
||||
"""Segments-deprecated (normalized)."""
|
||||
LOGGER.warning("WARNING ⚠️ 'Masks.segments' is deprecated. Use 'Masks.xyn' for segments (normalized) and "
|
||||
"'Masks.xy' for segments (pixels) instead.")
|
||||
return self.xyn
|
||||
@ -426,7 +426,7 @@ class Masks(BaseTensor):
|
||||
@property
|
||||
@lru_cache(maxsize=1)
|
||||
def xyn(self):
|
||||
# Segments (normalized)
|
||||
"""Segments (normalized)."""
|
||||
return [
|
||||
ops.scale_coords(self.data.shape[1:], x, self.orig_shape, normalize=True)
|
||||
for x in ops.masks2segments(self.data)]
|
||||
@ -434,7 +434,7 @@ class Masks(BaseTensor):
|
||||
@property
|
||||
@lru_cache(maxsize=1)
|
||||
def xy(self):
|
||||
# Segments (pixels)
|
||||
"""Segments (pixels)."""
|
||||
return [
|
||||
ops.scale_coords(self.data.shape[1:], x, self.orig_shape, normalize=False)
|
||||
for x in ops.masks2segments(self.data)]
|
||||
|
@ -163,7 +163,7 @@ class BaseTrainer:
|
||||
callback(self)
|
||||
|
||||
def train(self):
|
||||
# Allow device='', device=None on Multi-GPU systems to default to device=0
|
||||
"""Allow device='', device=None on Multi-GPU systems to default to device=0."""
|
||||
if isinstance(self.args.device, int) or self.args.device: # i.e. device=0 or device=[0,1,2,3]
|
||||
world_size = torch.cuda.device_count()
|
||||
elif torch.cuda.is_available(): # i.e. device=None or device=''
|
||||
@ -306,7 +306,7 @@ class BaseTrainer:
|
||||
xi = [0, nw] # x interp
|
||||
self.accumulate = max(1, np.interp(ni, xi, [1, self.args.nbs / self.batch_size]).round())
|
||||
for j, x in enumerate(self.optimizer.param_groups):
|
||||
# bias lr falls from 0.1 to lr0, all other lrs rise from 0.0 to lr0
|
||||
# Bias lr falls from 0.1 to lr0, all other lrs rise from 0.0 to lr0
|
||||
x['lr'] = np.interp(
|
||||
ni, xi, [self.args.warmup_bias_lr if j == 0 else 0.0, x['initial_lr'] * self.lf(epoch)])
|
||||
if 'momentum' in x:
|
||||
@ -631,7 +631,7 @@ def check_amp(model):
|
||||
return False # AMP only used on CUDA devices
|
||||
|
||||
def amp_allclose(m, im):
|
||||
# All close FP32 vs AMP results
|
||||
"""All close FP32 vs AMP results."""
|
||||
a = m(im, device=device, verbose=False)[0].boxes.data # FP32 inference
|
||||
with torch.cuda.amp.autocast(True):
|
||||
b = m(im, device=device, verbose=False)[0].boxes.data # AMP inference
|
||||
|
@ -149,20 +149,20 @@ class BaseValidator:
|
||||
for batch_i, batch in enumerate(bar):
|
||||
self.run_callbacks('on_val_batch_start')
|
||||
self.batch_i = batch_i
|
||||
# preprocess
|
||||
# Preprocess
|
||||
with dt[0]:
|
||||
batch = self.preprocess(batch)
|
||||
|
||||
# inference
|
||||
# Inference
|
||||
with dt[1]:
|
||||
preds = model(batch['img'])
|
||||
|
||||
# loss
|
||||
# Loss
|
||||
with dt[2]:
|
||||
if self.training:
|
||||
self.loss += trainer.criterion(preds, batch)[1]
|
||||
|
||||
# postprocess
|
||||
# Postprocess
|
||||
with dt[3]:
|
||||
preds = self.postprocess(preds)
|
||||
|
||||
|
@ -199,7 +199,7 @@ def plt_settings(rcparams={'font.size': 11}, backend='Agg'):
|
||||
|
||||
|
||||
def set_logging(name=LOGGING_NAME, verbose=True):
|
||||
# sets up logging for the given name
|
||||
"""Sets up logging for the given name."""
|
||||
rank = int(os.getenv('RANK', -1)) # rank in world for Multi-GPU trainings
|
||||
level = logging.INFO if verbose and rank in {-1, 0} else logging.ERROR
|
||||
logging.config.dictConfig({
|
||||
@ -539,12 +539,12 @@ SETTINGS_YAML = USER_CONFIG_DIR / 'settings.yaml'
|
||||
|
||||
|
||||
def emojis(string=''):
|
||||
# Return platform-dependent emoji-safe version of string
|
||||
"""Return platform-dependent emoji-safe version of string."""
|
||||
return string.encode().decode('ascii', 'ignore') if WINDOWS else string
|
||||
|
||||
|
||||
def colorstr(*input):
|
||||
# Colors a string https://en.wikipedia.org/wiki/ANSI_escape_code, i.e. colorstr('blue', 'hello world')
|
||||
"""Colors a string https://en.wikipedia.org/wiki/ANSI_escape_code, i.e. colorstr('blue', 'hello world')."""
|
||||
*args, string = input if len(input) > 1 else ('blue', 'bold', input[0]) # color arguments, string
|
||||
colors = {
|
||||
'black': '\033[30m', # basic colors
|
||||
@ -570,7 +570,8 @@ def colorstr(*input):
|
||||
|
||||
|
||||
class TryExcept(contextlib.ContextDecorator):
|
||||
# YOLOv8 TryExcept class. Usage: @TryExcept() decorator or 'with TryExcept():' context manager
|
||||
"""YOLOv8 TryExcept class. Usage: @TryExcept() decorator or 'with TryExcept():' context manager."""
|
||||
|
||||
def __init__(self, msg='', verbose=True):
|
||||
self.msg = msg
|
||||
self.verbose = verbose
|
||||
@ -585,7 +586,8 @@ class TryExcept(contextlib.ContextDecorator):
|
||||
|
||||
|
||||
def threaded(func):
|
||||
# Multi-threads a target function and returns thread. Usage: @threaded decorator
|
||||
"""Multi-threads a target function and returns thread. Usage: @threaded decorator."""
|
||||
|
||||
def wrapper(*args, **kwargs):
|
||||
thread = threading.Thread(target=func, args=args, kwargs=kwargs, daemon=True)
|
||||
thread.start()
|
||||
@ -703,13 +705,13 @@ def deprecation_warn(arg, new_arg, version=None):
|
||||
|
||||
|
||||
def clean_url(url):
|
||||
# Strip auth from URL, i.e. https://url.com/file.txt?auth -> https://url.com/file.txt
|
||||
"""Strip auth from URL, i.e. https://url.com/file.txt?auth -> https://url.com/file.txt."""
|
||||
url = str(Path(url)).replace(':/', '://') # Pathlib turns :// -> :/
|
||||
return urllib.parse.unquote(url).split('?')[0] # '%2F' to '/', split https://url.com/file.txt?auth
|
||||
|
||||
|
||||
def url2file(url):
|
||||
# Convert URL to filename, i.e. https://url.com/file.txt?auth -> file.txt
|
||||
"""Convert URL to filename, i.e. https://url.com/file.txt?auth -> file.txt."""
|
||||
return Path(clean_url(url)).name
|
||||
|
||||
|
||||
|
@ -15,20 +15,20 @@ except (ImportError, AssertionError):
|
||||
|
||||
COMET_MODE = os.getenv('COMET_MODE', 'online')
|
||||
COMET_MODEL_NAME = os.getenv('COMET_MODEL_NAME', 'YOLOv8')
|
||||
# determines how many batches of image predictions to log from the validation set
|
||||
# Determines how many batches of image predictions to log from the validation set
|
||||
COMET_EVAL_BATCH_LOGGING_INTERVAL = int(os.getenv('COMET_EVAL_BATCH_LOGGING_INTERVAL', 1))
|
||||
# determines whether to log confusion matrix every evaluation epoch
|
||||
# Determines whether to log confusion matrix every evaluation epoch
|
||||
COMET_EVAL_LOG_CONFUSION_MATRIX = (os.getenv('COMET_EVAL_LOG_CONFUSION_MATRIX', 'true').lower() == 'true')
|
||||
# determines whether to log image predictions every evaluation epoch
|
||||
# Determines whether to log image predictions every evaluation epoch
|
||||
COMET_EVAL_LOG_IMAGE_PREDICTIONS = (os.getenv('COMET_EVAL_LOG_IMAGE_PREDICTIONS', 'true').lower() == 'true')
|
||||
COMET_MAX_IMAGE_PREDICTIONS = int(os.getenv('COMET_MAX_IMAGE_PREDICTIONS', 100))
|
||||
|
||||
# ensures certain logging functions only run for supported tasks
|
||||
# Ensures certain logging functions only run for supported tasks
|
||||
COMET_SUPPORTED_TASKS = ['detect']
|
||||
# scales reported confidence scores (0.0-1.0) by this value
|
||||
# Scales reported confidence scores (0.0-1.0) by this value
|
||||
COMET_MAX_CONFIDENCE_SCORE = int(os.getenv('COMET_MAX_CONFIDENCE_SCORE', 100))
|
||||
|
||||
# names of plots created by YOLOv8 that are logged to Comet
|
||||
# Names of plots created by YOLOv8 that are logged to Comet
|
||||
EVALUATION_PLOT_NAMES = 'F1_curve', 'P_curve', 'R_curve', 'PR_curve', 'confusion_matrix'
|
||||
LABEL_PLOT_NAMES = 'labels', 'labels_correlogram'
|
||||
|
||||
@ -43,7 +43,7 @@ def _get_experiment_type(mode, project_name):
|
||||
|
||||
|
||||
def _create_experiment(args):
|
||||
# Ensures that the experiment object is only created in a single process during distributed training.
|
||||
"""Ensures that the experiment object is only created in a single process during distributed training."""
|
||||
if RANK not in (-1, 0):
|
||||
return
|
||||
try:
|
||||
@ -83,13 +83,13 @@ def _scale_bounding_box_to_original_image_shape(box, resized_image_shape, origin
|
||||
|
||||
resized_image_height, resized_image_width = resized_image_shape
|
||||
|
||||
# convert normalized xywh format predictions to xyxy in resized scale format
|
||||
# Convert normalized xywh format predictions to xyxy in resized scale format
|
||||
box = ops.xywhn2xyxy(box, h=resized_image_height, w=resized_image_width)
|
||||
# scale box predictions from resized image scale back to original image scale
|
||||
# Scale box predictions from resized image scale back to original image scale
|
||||
box = ops.scale_boxes(resized_image_shape, box, original_image_shape, ratio_pad)
|
||||
# Convert bounding box format from xyxy to xywh for Comet logging
|
||||
box = ops.xyxy2xywh(box)
|
||||
# adjust xy center to correspond top-left corner
|
||||
# Adjust xy center to correspond top-left corner
|
||||
box[:2] -= box[2:] / 2
|
||||
box = box.tolist()
|
||||
|
||||
|
@ -244,7 +244,7 @@ def check_requirements(requirements=ROOT.parent / 'requirements.txt', exclude=()
|
||||
|
||||
|
||||
def check_suffix(file='yolov8n.pt', suffix='.pt', msg=''):
|
||||
# Check file(s) for acceptable suffix
|
||||
"""Check file(s) for acceptable suffix."""
|
||||
if file and suffix:
|
||||
if isinstance(suffix, str):
|
||||
suffix = (suffix, )
|
||||
@ -255,7 +255,7 @@ def check_suffix(file='yolov8n.pt', suffix='.pt', msg=''):
|
||||
|
||||
|
||||
def check_yolov5u_filename(file: str, verbose: bool = True):
|
||||
# Replace legacy YOLOv5 filenames with updated YOLOv5u filenames
|
||||
"""Replace legacy YOLOv5 filenames with updated YOLOv5u filenames."""
|
||||
if ('yolov3' in file or 'yolov5' in file) and 'u' not in file:
|
||||
original_file = file
|
||||
file = re.sub(r'(.*yolov5([nsmlx]))\.pt', '\\1u.pt', file) # i.e. yolov5n.pt -> yolov5nu.pt
|
||||
@ -269,7 +269,7 @@ def check_yolov5u_filename(file: str, verbose: bool = True):
|
||||
|
||||
|
||||
def check_file(file, suffix='', download=True, hard=True):
|
||||
# Search/download file (if necessary) and return path
|
||||
"""Search/download file (if necessary) and return path."""
|
||||
check_suffix(file, suffix) # optional
|
||||
file = str(file).strip() # convert to string and strip spaces
|
||||
file = check_yolov5u_filename(file) # yolov5n -> yolov5nu
|
||||
@ -300,7 +300,7 @@ def check_yaml(file, suffix=('.yaml', '.yml'), hard=True):
|
||||
|
||||
|
||||
def check_imshow(warn=False):
|
||||
# Check if environment supports image displays
|
||||
"""Check if environment supports image displays."""
|
||||
try:
|
||||
assert not any((is_colab(), is_kaggle(), is_docker()))
|
||||
cv2.imshow('test', np.zeros((1, 1, 3)))
|
||||
@ -346,9 +346,10 @@ def git_describe(path=ROOT): # path must be a directory
|
||||
|
||||
|
||||
def print_args(args: Optional[dict] = None, show_file=True, show_func=False):
|
||||
# Print function arguments (optional args dict)
|
||||
"""Print function arguments (optional args dict)."""
|
||||
|
||||
def strip_auth(v):
|
||||
# Clean longer Ultralytics HUB URLs by stripping potential authentication information
|
||||
"""Clean longer Ultralytics HUB URLs by stripping potential authentication information."""
|
||||
return clean_url(v) if (isinstance(v, str) and v.startswith('http') and len(v) > 100) else v
|
||||
|
||||
x = inspect.currentframe().f_back # previous frame
|
||||
|
@ -59,6 +59,6 @@ def generate_ddp_command(world_size, trainer):
|
||||
|
||||
|
||||
def ddp_cleanup(trainer, file):
|
||||
# delete temp file if created
|
||||
"""Delete temp file if created."""
|
||||
if f'{id(trainer)}.py' in file: # if temp_file suffix in file
|
||||
os.remove(file)
|
||||
|
@ -21,7 +21,7 @@ GITHUB_ASSET_STEMS = [Path(k).stem for k in GITHUB_ASSET_NAMES]
|
||||
|
||||
|
||||
def is_url(url, check=True):
|
||||
# Check if string is URL and check if URL exists
|
||||
"""Check if string is URL and check if URL exists."""
|
||||
with contextlib.suppress(Exception):
|
||||
url = str(url)
|
||||
result = parse.urlparse(url)
|
||||
@ -141,11 +141,11 @@ def safe_download(url,
|
||||
|
||||
|
||||
def attempt_download_asset(file, repo='ultralytics/assets', release='v0.0.0'):
|
||||
# Attempt file download from GitHub release assets if not found locally. release = 'latest', 'v6.2', etc.
|
||||
"""Attempt file download from GitHub release assets if not found locally. release = 'latest', 'v6.2', etc."""
|
||||
from ultralytics.yolo.utils import SETTINGS # scoped for circular import
|
||||
|
||||
def github_assets(repository, version='latest'):
|
||||
# Return GitHub repo tag and assets (i.e. ['yolov8n.pt', 'yolov8s.pt', ...])
|
||||
"""Return GitHub repo tag and assets (i.e. ['yolov8n.pt', 'yolov8s.pt', ...])."""
|
||||
if version != 'latest':
|
||||
version = f'tags/{version}' # i.e. tags/v6.2
|
||||
response = requests.get(f'https://api.github.com/repos/{repository}/releases/{version}').json() # github api
|
||||
|
@ -8,7 +8,8 @@ from pathlib import Path
|
||||
|
||||
|
||||
class WorkingDirectory(contextlib.ContextDecorator):
|
||||
# Usage: @WorkingDirectory(dir) decorator or 'with WorkingDirectory(dir):' context manager
|
||||
"""Usage: @WorkingDirectory(dir) decorator or 'with WorkingDirectory(dir):' context manager."""
|
||||
|
||||
def __init__(self, new_dir):
|
||||
self.dir = new_dir # new dir
|
||||
self.cwd = Path.cwd().resolve() # current dir
|
||||
@ -56,19 +57,19 @@ def increment_path(path, exist_ok=False, sep='', mkdir=False):
|
||||
|
||||
|
||||
def file_age(path=__file__):
|
||||
# Return days since last file update
|
||||
"""Return days since last file update."""
|
||||
dt = (datetime.now() - datetime.fromtimestamp(Path(path).stat().st_mtime)) # delta
|
||||
return dt.days # + dt.seconds / 86400 # fractional days
|
||||
|
||||
|
||||
def file_date(path=__file__):
|
||||
# Return human-readable file modification date, i.e. '2021-3-26'
|
||||
"""Return human-readable file modification date, i.e. '2021-3-26'."""
|
||||
t = datetime.fromtimestamp(Path(path).stat().st_mtime)
|
||||
return f'{t.year}-{t.month}-{t.day}'
|
||||
|
||||
|
||||
def file_size(path):
|
||||
# Return file/dir size (MB)
|
||||
"""Return file/dir size (MB)."""
|
||||
if isinstance(path, (str, Path)):
|
||||
mb = 1 << 20 # bytes to MiB (1024 ** 2)
|
||||
path = Path(path)
|
||||
@ -80,6 +81,6 @@ def file_size(path):
|
||||
|
||||
|
||||
def get_latest_run(search_dir='.'):
|
||||
# Return path to most recent 'last.pt' in /runs (i.e. to --resume from)
|
||||
"""Return path to most recent 'last.pt' in /runs (i.e. to --resume from)."""
|
||||
last_list = glob.glob(f'{search_dir}/**/last*.pt', recursive=True)
|
||||
return max(last_list, key=os.path.getctime) if last_list else ''
|
||||
|
@ -11,7 +11,8 @@ from .ops import ltwh2xywh, ltwh2xyxy, resample_segments, xywh2ltwh, xywh2xyxy,
|
||||
|
||||
|
||||
def _ntuple(n):
|
||||
# From PyTorch internals
|
||||
"""From PyTorch internals."""
|
||||
|
||||
def parse(x):
|
||||
return x if isinstance(x, abc.Iterable) else tuple(repeat(x, n))
|
||||
|
||||
@ -29,7 +30,7 @@ __all__ = 'Bboxes', # tuple or list
|
||||
|
||||
|
||||
class Bboxes:
|
||||
"""Now only numpy is supported"""
|
||||
"""Now only numpy is supported."""
|
||||
|
||||
def __init__(self, bboxes, format='xyxy') -> None:
|
||||
assert format in _formats
|
||||
@ -207,7 +208,7 @@ class Instances:
|
||||
self._bboxes.areas()
|
||||
|
||||
def scale(self, scale_w, scale_h, bbox_only=False):
|
||||
"""this might be similar with denormalize func but without normalized sign"""
|
||||
"""this might be similar with denormalize func but without normalized sign."""
|
||||
self._bboxes.mul(scale=(scale_w, scale_h, scale_w, scale_h))
|
||||
if bbox_only:
|
||||
return
|
||||
@ -240,7 +241,7 @@ class Instances:
|
||||
self.normalized = True
|
||||
|
||||
def add_padding(self, padw, padh):
|
||||
# handle rect and mosaic situation
|
||||
"""Handle rect and mosaic situation."""
|
||||
assert not self.normalized, 'you should add padding with absolute coordinates.'
|
||||
self._bboxes.add(offset=(padw, padh, padw, padh))
|
||||
self.segments[..., 0] += padw
|
||||
|
@ -9,7 +9,8 @@ from .tal import bbox2dist
|
||||
|
||||
|
||||
class VarifocalLoss(nn.Module):
|
||||
# Varifocal loss by Zhang et al. https://arxiv.org/abs/2008.13367
|
||||
"""Varifocal loss by Zhang et al. https://arxiv.org/abs/2008.13367."""
|
||||
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
|
||||
@ -29,7 +30,7 @@ class BboxLoss(nn.Module):
|
||||
self.use_dfl = use_dfl
|
||||
|
||||
def forward(self, pred_dist, pred_bboxes, anchor_points, target_bboxes, target_scores, target_scores_sum, fg_mask):
|
||||
# IoU loss
|
||||
"""IoU loss."""
|
||||
weight = torch.masked_select(target_scores.sum(-1), fg_mask).unsqueeze(-1)
|
||||
iou = bbox_iou(pred_bboxes[fg_mask], target_bboxes[fg_mask], xywh=False, CIoU=True)
|
||||
loss_iou = ((1.0 - iou) * weight).sum() / target_scores_sum
|
||||
@ -46,7 +47,7 @@ class BboxLoss(nn.Module):
|
||||
|
||||
@staticmethod
|
||||
def _df_loss(pred_dist, target):
|
||||
# Return sum of left and right DFL losses
|
||||
"""Return sum of left and right DFL losses."""
|
||||
# Distribution Focal Loss (DFL) proposed in Generalized Focal Loss https://ieeexplore.ieee.org/document/9792391
|
||||
tl = target.long() # target left
|
||||
tr = tl + 1 # target right
|
||||
|
@ -16,9 +16,9 @@ from ultralytics.yolo.utils import LOGGER, SimpleClass, TryExcept, plt_settings
|
||||
OKS_SIGMA = np.array([.26, .25, .25, .35, .35, .79, .79, .72, .72, .62, .62, 1.07, 1.07, .87, .87, .89, .89]) / 10.0
|
||||
|
||||
|
||||
# boxes
|
||||
# Boxes
|
||||
def box_area(box):
|
||||
# box = xyxy(4,n)
|
||||
"""Return box area, where box shape is xyxy(4,n)."""
|
||||
return (box[2] - box[0]) * (box[3] - box[1])
|
||||
|
||||
|
||||
@ -175,9 +175,10 @@ def smooth_BCE(eps=0.1): # https://github.com/ultralytics/yolov3/issues/238#iss
|
||||
return 1.0 - 0.5 * eps, 0.5 * eps
|
||||
|
||||
|
||||
# losses
|
||||
# Losses
|
||||
class FocalLoss(nn.Module):
|
||||
# Wraps focal loss around existing loss_fcn(), i.e. criteria = FocalLoss(nn.BCEWithLogitsLoss(), gamma=1.5)
|
||||
"""Wraps focal loss around existing loss_fcn(), i.e. criteria = FocalLoss(nn.BCEWithLogitsLoss(), gamma=1.5)."""
|
||||
|
||||
def __init__(self, loss_fcn, gamma=1.5, alpha=0.25):
|
||||
super().__init__()
|
||||
self.loss_fcn = loss_fcn # must be nn.BCEWithLogitsLoss()
|
||||
@ -341,7 +342,7 @@ class ConfusionMatrix:
|
||||
|
||||
|
||||
def smooth(y, f=0.05):
|
||||
# Box filter of fraction f
|
||||
"""Box filter of fraction f."""
|
||||
nf = round(len(y) * f * 2) // 2 + 1 # number of filter elements (must be odd)
|
||||
p = np.ones(nf // 2) # ones padding
|
||||
yp = np.concatenate((p * y[0], y, p * y[-1]), 0) # y padded
|
||||
@ -350,7 +351,7 @@ def smooth(y, f=0.05):
|
||||
|
||||
@plt_settings()
|
||||
def plot_pr_curve(px, py, ap, save_dir=Path('pr_curve.png'), names=()):
|
||||
# Precision-recall curve
|
||||
"""Plots a precision-recall curve."""
|
||||
fig, ax = plt.subplots(1, 1, figsize=(9, 6), tight_layout=True)
|
||||
py = np.stack(py, axis=1)
|
||||
|
||||
@ -373,7 +374,7 @@ def plot_pr_curve(px, py, ap, save_dir=Path('pr_curve.png'), names=()):
|
||||
|
||||
@plt_settings()
|
||||
def plot_mc_curve(px, py, save_dir=Path('mc_curve.png'), names=(), xlabel='Confidence', ylabel='Metric'):
|
||||
# Metric-confidence curve
|
||||
"""Plots a metric-confidence curve."""
|
||||
fig, ax = plt.subplots(1, 1, figsize=(9, 6), tight_layout=True)
|
||||
|
||||
if 0 < len(names) < 21: # display per-class legend if < 21 classes
|
||||
@ -614,23 +615,23 @@ class Metric(SimpleClass):
|
||||
return self.all_ap.mean() if len(self.all_ap) else 0.0
|
||||
|
||||
def mean_results(self):
|
||||
"""Mean of results, return mp, mr, map50, map"""
|
||||
"""Mean of results, return mp, mr, map50, map."""
|
||||
return [self.mp, self.mr, self.map50, self.map]
|
||||
|
||||
def class_result(self, i):
|
||||
"""class-aware result, return p[i], r[i], ap50[i], ap[i]"""
|
||||
"""class-aware result, return p[i], r[i], ap50[i], ap[i]."""
|
||||
return self.p[i], self.r[i], self.ap50[i], self.ap[i]
|
||||
|
||||
@property
|
||||
def maps(self):
|
||||
"""mAP of each class"""
|
||||
"""mAP of each class."""
|
||||
maps = np.zeros(self.nc) + self.map
|
||||
for i, c in enumerate(self.ap_class_index):
|
||||
maps[c] = self.ap[i]
|
||||
return maps
|
||||
|
||||
def fitness(self):
|
||||
# Model fitness as a weighted combination of metrics
|
||||
"""Model fitness as a weighted combination of metrics."""
|
||||
w = [0.0, 0.0, 0.1, 0.9] # weights for [P, R, mAP@0.5, mAP@0.5:0.95]
|
||||
return (np.array(self.mean_results()) * w).sum()
|
||||
|
||||
@ -800,7 +801,7 @@ class SegmentMetrics(SimpleClass):
|
||||
|
||||
@property
|
||||
def ap_class_index(self):
|
||||
# boxes and masks have the same ap_class_index
|
||||
"""Boxes and masks have the same ap_class_index."""
|
||||
return self.box.ap_class_index
|
||||
|
||||
@property
|
||||
@ -926,7 +927,7 @@ class ClassifyMetrics(SimpleClass):
|
||||
self.speed = {'preprocess': 0.0, 'inference': 0.0, 'loss': 0.0, 'postprocess': 0.0}
|
||||
|
||||
def process(self, targets, pred):
|
||||
# target classes and predicted classes
|
||||
"""Target classes and predicted classes."""
|
||||
pred, targets = torch.cat(pred), torch.cat(targets)
|
||||
correct = (targets[:, None] == pred).float()
|
||||
acc = torch.stack((correct[:, 0], correct.max(1).values), dim=1) # (top1, top5) accuracy
|
||||
|
@ -246,7 +246,7 @@ def non_max_suppression(
|
||||
i = torchvision.ops.nms(boxes, scores, iou_thres) # NMS
|
||||
i = i[:max_det] # limit detections
|
||||
if merge and (1 < n < 3E3): # Merge NMS (boxes merged using weighted mean)
|
||||
# update boxes as boxes(i,4) = weights(i,n) * boxes(n,4)
|
||||
# Update boxes as boxes(i,4) = weights(i,n) * boxes(n,4)
|
||||
iou = box_iou(boxes[i], boxes) > iou_thres # iou matrix
|
||||
weights = iou * scores[None] # box weights
|
||||
x[i, :4] = torch.mm(weights, x[:, :4]).float() / weights.sum(1, keepdim=True) # merged boxes
|
||||
|
@ -21,7 +21,7 @@ from .ops import clip_boxes, scale_image, xywh2xyxy, xyxy2xywh
|
||||
class Colors:
|
||||
# Ultralytics color palette https://ultralytics.com/
|
||||
def __init__(self):
|
||||
# hex = matplotlib.colors.TABLEAU_COLORS.values()
|
||||
"""Initialize colors as hex = matplotlib.colors.TABLEAU_COLORS.values()."""
|
||||
hexs = ('FF3838', 'FF9D97', 'FF701F', 'FFB21D', 'CFD231', '48F90A', '92CC17', '3DDB86', '1A9334', '00D4BB',
|
||||
'2C99A8', '00C2FF', '344593', '6473FF', '0018EC', '8438FF', '520085', 'CB38FF', 'FF95C8', 'FF37C7')
|
||||
self.palette = [self.hex2rgb(f'#{c}') for c in hexs]
|
||||
@ -63,7 +63,7 @@ class Annotator:
|
||||
else: # use cv2
|
||||
self.im = im
|
||||
self.lw = line_width or max(round(sum(im.shape) / 2 * 0.003), 2) # line width
|
||||
# pose
|
||||
# Pose
|
||||
self.skeleton = [[16, 14], [14, 12], [17, 15], [15, 13], [12, 13], [6, 12], [7, 13], [6, 7], [6, 8], [7, 9],
|
||||
[8, 10], [9, 11], [2, 3], [1, 2], [1, 3], [2, 4], [3, 5], [4, 6], [5, 7]]
|
||||
|
||||
@ -115,7 +115,7 @@ class Annotator:
|
||||
alpha (float): mask transparency: 0.0 fully transparent, 1.0 opaque
|
||||
"""
|
||||
if self.pil:
|
||||
# convert to numpy first
|
||||
# Convert to numpy first
|
||||
self.im = np.asarray(self.im).copy()
|
||||
if len(masks) == 0:
|
||||
self.im[:] = im_gpu.permute(1, 2, 0).contiguous().cpu().numpy() * 255
|
||||
@ -136,7 +136,7 @@ class Annotator:
|
||||
im_mask_np = im_mask.byte().cpu().numpy()
|
||||
self.im[:] = im_mask_np if retina_masks else scale_image(im_mask_np, self.im.shape)
|
||||
if self.pil:
|
||||
# convert im back to PIL and update draw
|
||||
# Convert im back to PIL and update draw
|
||||
self.fromarray(self.im)
|
||||
|
||||
def kpts(self, kpts, shape=(640, 640), radius=5, kpt_line=True):
|
||||
@ -152,7 +152,7 @@ class Annotator:
|
||||
Note: `kpt_line=True` currently only supports human pose plotting.
|
||||
"""
|
||||
if self.pil:
|
||||
# convert to numpy first
|
||||
# Convert to numpy first
|
||||
self.im = np.asarray(self.im).copy()
|
||||
nkpt, ndim = kpts.shape
|
||||
is_pose = nkpt == 17 and ndim == 3
|
||||
@ -183,11 +183,11 @@ class Annotator:
|
||||
continue
|
||||
cv2.line(self.im, pos1, pos2, [int(x) for x in self.limb_color[i]], thickness=2, lineType=cv2.LINE_AA)
|
||||
if self.pil:
|
||||
# convert im back to PIL and update draw
|
||||
# Convert im back to PIL and update draw
|
||||
self.fromarray(self.im)
|
||||
|
||||
def rectangle(self, xy, fill=None, outline=None, width=1):
|
||||
# Add rectangle to image (PIL-only)
|
||||
"""Add rectangle to image (PIL-only)."""
|
||||
self.draw.rectangle(xy, fill, outline, width)
|
||||
|
||||
def text(self, xy, text, txt_color=(255, 255, 255), anchor='top'):
|
||||
@ -202,12 +202,12 @@ class Annotator:
|
||||
cv2.putText(self.im, text, xy, 0, self.lw / 3, txt_color, thickness=tf, lineType=cv2.LINE_AA)
|
||||
|
||||
def fromarray(self, im):
|
||||
# Update self.im from a numpy array
|
||||
"""Update self.im from a numpy array."""
|
||||
self.im = im if isinstance(im, Image.Image) else Image.fromarray(im)
|
||||
self.draw = ImageDraw.Draw(self.im)
|
||||
|
||||
def result(self):
|
||||
# Return annotated image as array
|
||||
"""Return annotated image as array."""
|
||||
return np.asarray(self.im)
|
||||
|
||||
|
||||
@ -217,18 +217,18 @@ def plot_labels(boxes, cls, names=(), save_dir=Path('')):
|
||||
import pandas as pd
|
||||
import seaborn as sn
|
||||
|
||||
# plot dataset labels
|
||||
# Plot dataset labels
|
||||
LOGGER.info(f"Plotting labels to {save_dir / 'labels.jpg'}... ")
|
||||
b = boxes.transpose() # classes, boxes
|
||||
nc = int(cls.max() + 1) # number of classes
|
||||
x = pd.DataFrame(b.transpose(), columns=['x', 'y', 'width', 'height'])
|
||||
|
||||
# seaborn correlogram
|
||||
# Seaborn correlogram
|
||||
sn.pairplot(x, corner=True, diag_kind='auto', kind='hist', diag_kws=dict(bins=50), plot_kws=dict(pmax=0.9))
|
||||
plt.savefig(save_dir / 'labels_correlogram.jpg', dpi=200)
|
||||
plt.close()
|
||||
|
||||
# matplotlib labels
|
||||
# Matplotlib labels
|
||||
ax = plt.subplots(2, 2, figsize=(8, 8), tight_layout=True)[1].ravel()
|
||||
y = ax[0].hist(cls, bins=np.linspace(0, nc, nc + 1) - 0.5, rwidth=0.8)
|
||||
with contextlib.suppress(Exception): # color histogram bars by class
|
||||
@ -242,7 +242,7 @@ def plot_labels(boxes, cls, names=(), save_dir=Path('')):
|
||||
sn.histplot(x, x='x', y='y', ax=ax[2], bins=50, pmax=0.9)
|
||||
sn.histplot(x, x='width', y='height', ax=ax[3], bins=50, pmax=0.9)
|
||||
|
||||
# rectangles
|
||||
# Rectangles
|
||||
boxes[:, 0:2] = 0.5 # center
|
||||
boxes = xywh2xyxy(boxes) * 1000
|
||||
img = Image.fromarray(np.ones((1000, 1000, 3), dtype=np.uint8) * 255)
|
||||
@ -401,7 +401,7 @@ def plot_images(images,
|
||||
|
||||
@plt_settings()
|
||||
def plot_results(file='path/to/results.csv', dir='', segment=False, pose=False):
|
||||
# Plot training results.csv. Usage: from utils.plots import *; plot_results('path/to/results.csv')
|
||||
"""Plot training results.csv. Usage: from utils.plots import *; plot_results('path/to/results.csv')."""
|
||||
import pandas as pd
|
||||
save_dir = Path(file).parent if file else Path(dir)
|
||||
if segment:
|
||||
@ -436,7 +436,7 @@ def plot_results(file='path/to/results.csv', dir='', segment=False, pose=False):
|
||||
|
||||
|
||||
def output_to_target(output, max_det=300):
|
||||
# Convert model output to target format [batch_id, class_id, x, y, w, h, conf] for plotting
|
||||
"""Convert model output to target format [batch_id, class_id, x, y, w, h, conf] for plotting."""
|
||||
targets = []
|
||||
for i, o in enumerate(output):
|
||||
box, conf, cls = o[:max_det, :6].cpu().split((4, 1, 1), 1)
|
||||
|
@ -48,7 +48,7 @@ def select_highest_overlaps(mask_pos, overlaps, n_max_boxes):
|
||||
is_max_overlaps = is_max_overlaps.permute(0, 2, 1).to(overlaps.dtype) # (b, n_max_boxes, h*w)
|
||||
mask_pos = torch.where(mask_multi_gts, is_max_overlaps, mask_pos) # (b, n_max_boxes, h*w)
|
||||
fg_mask = mask_pos.sum(-2)
|
||||
# find each grid serve which gt(index)
|
||||
# Find each grid serve which gt(index)
|
||||
target_gt_idx = mask_pos.argmax(-2) # (b, h*w)
|
||||
return target_gt_idx, fg_mask, mask_pos
|
||||
|
||||
@ -112,10 +112,10 @@ class TaskAlignedAssigner(nn.Module):
|
||||
|
||||
target_gt_idx, fg_mask, mask_pos = select_highest_overlaps(mask_pos, overlaps, self.n_max_boxes)
|
||||
|
||||
# assigned target
|
||||
# Assigned target
|
||||
target_labels, target_bboxes, target_scores = self.get_targets(gt_labels, gt_bboxes, target_gt_idx, fg_mask)
|
||||
|
||||
# normalize
|
||||
# Normalize
|
||||
align_metric *= mask_pos
|
||||
pos_align_metrics = align_metric.amax(axis=-1, keepdim=True) # b, max_num_obj
|
||||
pos_overlaps = (overlaps * mask_pos).amax(axis=-1, keepdim=True) # b, max_num_obj
|
||||
@ -125,13 +125,13 @@ class TaskAlignedAssigner(nn.Module):
|
||||
return target_labels, target_bboxes, target_scores, fg_mask.bool(), target_gt_idx
|
||||
|
||||
def get_pos_mask(self, pd_scores, pd_bboxes, gt_labels, gt_bboxes, anc_points, mask_gt):
|
||||
# get in_gts mask, (b, max_num_obj, h*w)
|
||||
"""Get in_gts mask, (b, max_num_obj, h*w)."""
|
||||
mask_in_gts = select_candidates_in_gts(anc_points, gt_bboxes)
|
||||
# get anchor_align metric, (b, max_num_obj, h*w)
|
||||
# Get anchor_align metric, (b, max_num_obj, h*w)
|
||||
align_metric, overlaps = self.get_box_metrics(pd_scores, pd_bboxes, gt_labels, gt_bboxes, mask_in_gts * mask_gt)
|
||||
# get topk_metric mask, (b, max_num_obj, h*w)
|
||||
# Get topk_metric mask, (b, max_num_obj, h*w)
|
||||
mask_topk = self.select_topk_candidates(align_metric, topk_mask=mask_gt.repeat([1, 1, self.topk]).bool())
|
||||
# merge all mask to a final mask, (b, max_num_obj, h*w)
|
||||
# Merge all mask to a final mask, (b, max_num_obj, h*w)
|
||||
mask_pos = mask_topk * mask_in_gts * mask_gt
|
||||
|
||||
return mask_pos, align_metric, overlaps
|
||||
@ -145,7 +145,7 @@ class TaskAlignedAssigner(nn.Module):
|
||||
ind = torch.zeros([2, self.bs, self.n_max_boxes], dtype=torch.long) # 2, b, max_num_obj
|
||||
ind[0] = torch.arange(end=self.bs).view(-1, 1).repeat(1, self.n_max_boxes) # b, max_num_obj
|
||||
ind[1] = gt_labels.long().squeeze(-1) # b, max_num_obj
|
||||
# get the scores of each grid for each gt cls
|
||||
# Get the scores of each grid for each gt cls
|
||||
bbox_scores[mask_gt] = pd_scores[ind[0], :, ind[1]][mask_gt] # b, max_num_obj, h*w
|
||||
|
||||
# (b, max_num_obj, 1, 4), (b, 1, h*w, 4)
|
||||
|
@ -30,7 +30,7 @@ TORCH_2_X = check_version(torch.__version__, minimum='2.0')
|
||||
|
||||
@contextmanager
|
||||
def torch_distributed_zero_first(local_rank: int):
|
||||
# Decorator to make all processes in distributed training wait for each local_master to do something
|
||||
"""Decorator to make all processes in distributed training wait for each local_master to do something."""
|
||||
initialized = torch.distributed.is_available() and torch.distributed.is_initialized()
|
||||
if initialized and local_rank not in (-1, 0):
|
||||
dist.barrier(device_ids=[local_rank])
|
||||
@ -40,7 +40,8 @@ def torch_distributed_zero_first(local_rank: int):
|
||||
|
||||
|
||||
def smart_inference_mode():
|
||||
# Applies torch.inference_mode() decorator if torch>=1.9.0 else torch.no_grad() decorator
|
||||
"""Applies torch.inference_mode() decorator if torch>=1.9.0 else torch.no_grad() decorator."""
|
||||
|
||||
def decorate(fn):
|
||||
return (torch.inference_mode if TORCH_1_9 else torch.no_grad)()(fn)
|
||||
|
||||
@ -48,7 +49,7 @@ def smart_inference_mode():
|
||||
|
||||
|
||||
def select_device(device='', batch=0, newline=False, verbose=True):
|
||||
# device = None or 'cpu' or 0 or '0' or '0,1,2,3'
|
||||
"""Selects PyTorch Device. Options are device = None or 'cpu' or 0 or '0' or '0,1,2,3'."""
|
||||
s = f'Ultralytics YOLOv{__version__} 🚀 Python-{platform.python_version()} torch-{torch.__version__} '
|
||||
device = str(device).lower()
|
||||
for remove in 'cuda:', 'none', '(', ')', '[', ']', "'", ' ':
|
||||
@ -84,7 +85,7 @@ def select_device(device='', batch=0, newline=False, verbose=True):
|
||||
s += f"{'' if i == 0 else space}CUDA:{d} ({p.name}, {p.total_memory / (1 << 20):.0f}MiB)\n" # bytes to MB
|
||||
arg = 'cuda:0'
|
||||
elif mps and getattr(torch, 'has_mps', False) and torch.backends.mps.is_available() and TORCH_2_X:
|
||||
# prefer MPS if available
|
||||
# Prefer MPS if available
|
||||
s += 'MPS\n'
|
||||
arg = 'mps'
|
||||
else: # revert to CPU
|
||||
@ -97,14 +98,14 @@ def select_device(device='', batch=0, newline=False, verbose=True):
|
||||
|
||||
|
||||
def time_sync():
|
||||
# PyTorch-accurate time
|
||||
"""PyTorch-accurate time."""
|
||||
if torch.cuda.is_available():
|
||||
torch.cuda.synchronize()
|
||||
return time.time()
|
||||
|
||||
|
||||
def fuse_conv_and_bn(conv, bn):
|
||||
# Fuse Conv2d() and BatchNorm2d() layers https://tehnokv.com/posts/fusing-batchnorm-and-conv/
|
||||
"""Fuse Conv2d() and BatchNorm2d() layers https://tehnokv.com/posts/fusing-batchnorm-and-conv/."""
|
||||
fusedconv = nn.Conv2d(conv.in_channels,
|
||||
conv.out_channels,
|
||||
kernel_size=conv.kernel_size,
|
||||
@ -128,7 +129,7 @@ def fuse_conv_and_bn(conv, bn):
|
||||
|
||||
|
||||
def fuse_deconv_and_bn(deconv, bn):
|
||||
# Fuse ConvTranspose2d() and BatchNorm2d() layers
|
||||
"""Fuse ConvTranspose2d() and BatchNorm2d() layers."""
|
||||
fuseddconv = nn.ConvTranspose2d(deconv.in_channels,
|
||||
deconv.out_channels,
|
||||
kernel_size=deconv.kernel_size,
|
||||
@ -139,7 +140,7 @@ def fuse_deconv_and_bn(deconv, bn):
|
||||
groups=deconv.groups,
|
||||
bias=True).requires_grad_(False).to(deconv.weight.device)
|
||||
|
||||
# prepare filters
|
||||
# Prepare filters
|
||||
w_deconv = deconv.weight.clone().view(deconv.out_channels, -1)
|
||||
w_bn = torch.diag(bn.weight.div(torch.sqrt(bn.eps + bn.running_var)))
|
||||
fuseddconv.weight.copy_(torch.mm(w_bn, w_deconv).view(fuseddconv.weight.shape))
|
||||
@ -153,7 +154,7 @@ def fuse_deconv_and_bn(deconv, bn):
|
||||
|
||||
|
||||
def model_info(model, detailed=False, verbose=True, imgsz=640):
|
||||
# Model information. imgsz may be int or list, i.e. imgsz=640 or imgsz=[640, 320]
|
||||
"""Model information. imgsz may be int or list, i.e. imgsz=640 or imgsz=[640, 320]."""
|
||||
if not verbose:
|
||||
return
|
||||
n_p = get_num_params(model)
|
||||
@ -174,17 +175,17 @@ def model_info(model, detailed=False, verbose=True, imgsz=640):
|
||||
|
||||
|
||||
def get_num_params(model):
|
||||
# Return the total number of parameters in a YOLO model
|
||||
"""Return the total number of parameters in a YOLO model."""
|
||||
return sum(x.numel() for x in model.parameters())
|
||||
|
||||
|
||||
def get_num_gradients(model):
|
||||
# Return the total number of parameters with gradients in a YOLO model
|
||||
"""Return the total number of parameters with gradients in a YOLO model."""
|
||||
return sum(x.numel() for x in model.parameters() if x.requires_grad)
|
||||
|
||||
|
||||
def get_flops(model, imgsz=640):
|
||||
# Return a YOLO model's FLOPs
|
||||
"""Return a YOLO model's FLOPs."""
|
||||
try:
|
||||
model = de_parallel(model)
|
||||
p = next(model.parameters())
|
||||
@ -199,7 +200,7 @@ def get_flops(model, imgsz=640):
|
||||
|
||||
|
||||
def initialize_weights(model):
|
||||
# Initialize model weights to random values
|
||||
"""Initialize model weights to random values."""
|
||||
for m in model.modules():
|
||||
t = type(m)
|
||||
if t is nn.Conv2d:
|
||||
@ -224,7 +225,7 @@ def scale_img(img, ratio=1.0, same_shape=False, gs=32): # img(16,3,256,416)
|
||||
|
||||
|
||||
def make_divisible(x, divisor):
|
||||
# Returns nearest x divisible by divisor
|
||||
"""Returns nearest x divisible by divisor."""
|
||||
if isinstance(divisor, torch.Tensor):
|
||||
divisor = int(divisor.max()) # to int
|
||||
return math.ceil(x / divisor) * divisor
|
||||
@ -240,7 +241,7 @@ def copy_attr(a, b, include=(), exclude=()):
|
||||
|
||||
|
||||
def get_latest_opset():
|
||||
# Return second-most (for maturity) recently supported ONNX opset by this version of torch
|
||||
"""Return second-most (for maturity) recently supported ONNX opset by this version of torch."""
|
||||
return max(int(k[14:]) for k in vars(torch.onnx) if 'symbolic_opset' in k) - 1 # opset
|
||||
|
||||
|
||||
@ -250,22 +251,22 @@ def intersect_dicts(da, db, exclude=()):
|
||||
|
||||
|
||||
def is_parallel(model):
|
||||
# Returns True if model is of type DP or DDP
|
||||
"""Returns True if model is of type DP or DDP."""
|
||||
return isinstance(model, (nn.parallel.DataParallel, nn.parallel.DistributedDataParallel))
|
||||
|
||||
|
||||
def de_parallel(model):
|
||||
# De-parallelize a model: returns single-GPU model if model is of type DP or DDP
|
||||
"""De-parallelize a model: returns single-GPU model if model is of type DP or DDP."""
|
||||
return model.module if is_parallel(model) else model
|
||||
|
||||
|
||||
def one_cycle(y1=0.0, y2=1.0, steps=100):
|
||||
# lambda function for sinusoidal ramp from y1 to y2 https://arxiv.org/pdf/1812.01187.pdf
|
||||
"""Returns a lambda function for sinusoidal ramp from y1 to y2 https://arxiv.org/pdf/1812.01187.pdf."""
|
||||
return lambda x: ((1 - math.cos(x * math.pi / steps)) / 2) * (y2 - y1) + y1
|
||||
|
||||
|
||||
def init_seeds(seed=0, deterministic=False):
|
||||
# Initialize random number generator (RNG) seeds https://pytorch.org/docs/stable/notes/randomness.html
|
||||
"""Initialize random number generator (RNG) seeds https://pytorch.org/docs/stable/notes/randomness.html."""
|
||||
random.seed(seed)
|
||||
np.random.seed(seed)
|
||||
torch.manual_seed(seed)
|
||||
@ -287,7 +288,7 @@ class ModelEMA:
|
||||
"""
|
||||
|
||||
def __init__(self, model, decay=0.9999, tau=2000, updates=0):
|
||||
# Create EMA
|
||||
"""Create EMA."""
|
||||
self.ema = deepcopy(de_parallel(model)).eval() # FP32 EMA
|
||||
self.updates = updates # number of EMA updates
|
||||
self.decay = lambda x: decay * (1 - math.exp(-x / tau)) # decay exponential ramp (to help early epochs)
|
||||
@ -296,7 +297,7 @@ class ModelEMA:
|
||||
self.enabled = True
|
||||
|
||||
def update(self, model):
|
||||
# Update EMA parameters
|
||||
"""Update EMA parameters."""
|
||||
if self.enabled:
|
||||
self.updates += 1
|
||||
d = self.decay(self.updates)
|
||||
|
@ -46,7 +46,7 @@ class ClassificationTrainer(BaseTrainer):
|
||||
"""
|
||||
load/create/download model for any task
|
||||
"""
|
||||
# classification models require special handling
|
||||
# Classification models require special handling
|
||||
|
||||
if isinstance(self.model, torch.nn.Module): # if model is loaded beforehand. No setup needed
|
||||
return
|
||||
|
@ -22,8 +22,8 @@ from ultralytics.yolo.utils.torch_utils import de_parallel
|
||||
class DetectionTrainer(BaseTrainer):
|
||||
|
||||
def get_dataloader(self, dataset_path, batch_size, rank=0, mode='train'):
|
||||
# TODO: manage splits differently
|
||||
# calculate stride - check if model is initialized
|
||||
"""TODO: manage splits differently."""
|
||||
# Calculate stride - check if model is initialized
|
||||
gs = max(int(de_parallel(self.model).stride.max() if self.model else 0), 32)
|
||||
return create_dataloader(path=dataset_path,
|
||||
imgsz=self.args.imgsz,
|
||||
@ -48,7 +48,7 @@ class DetectionTrainer(BaseTrainer):
|
||||
return batch
|
||||
|
||||
def set_model_attributes(self):
|
||||
# nl = de_parallel(self.model).model[-1].nl # number of detection layers (to scale hyps)
|
||||
"""nl = de_parallel(self.model).model[-1].nl # number of detection layers (to scale hyps)."""
|
||||
# self.args.box *= 3 / nl # scale to layers
|
||||
# self.args.cls *= self.data["nc"] / 80 * 3 / nl # scale to classes and layers
|
||||
# self.args.cls *= (self.args.imgsz / 640) ** 2 * 3 / nl # scale to image size and layers
|
||||
|
@ -67,7 +67,7 @@ class DetectionValidator(BaseValidator):
|
||||
return preds
|
||||
|
||||
def update_metrics(self, preds, batch):
|
||||
# Metrics
|
||||
"""Metrics."""
|
||||
for si, pred in enumerate(preds):
|
||||
idx = batch['batch_idx'] == si
|
||||
cls = batch['cls'][idx]
|
||||
@ -164,8 +164,8 @@ class DetectionValidator(BaseValidator):
|
||||
return torch.tensor(correct, dtype=torch.bool, device=detections.device)
|
||||
|
||||
def get_dataloader(self, dataset_path, batch_size):
|
||||
# TODO: manage splits differently
|
||||
# calculate stride - check if model is initialized
|
||||
"""TODO: manage splits differently."""
|
||||
# Calculate stride - check if model is initialized
|
||||
gs = max(int(de_parallel(self.model).stride if self.model else 0), 32)
|
||||
return create_dataloader(path=dataset_path,
|
||||
imgsz=self.args.imgsz,
|
||||
|
@ -47,7 +47,7 @@ class PoseValidator(DetectionValidator):
|
||||
self.sigma = OKS_SIGMA if is_pose else np.ones(nkpt) / nkpt
|
||||
|
||||
def update_metrics(self, preds, batch):
|
||||
# Metrics
|
||||
"""Metrics."""
|
||||
for si, pred in enumerate(preds):
|
||||
idx = batch['batch_idx'] == si
|
||||
cls = batch['cls'][idx]
|
||||
|
@ -10,7 +10,7 @@ from ultralytics.yolo.v8.detect.predict import DetectionPredictor
|
||||
class SegmentationPredictor(DetectionPredictor):
|
||||
|
||||
def postprocess(self, preds, img, orig_imgs):
|
||||
# TODO: filter by classes
|
||||
"""TODO: filter by classes."""
|
||||
p = ops.non_max_suppression(preds[0],
|
||||
self.args.conf,
|
||||
self.args.iou,
|
||||
|
@ -140,7 +140,7 @@ class SegLoss(Loss):
|
||||
return loss.sum() * batch_size, loss.detach() # loss(box, cls, dfl)
|
||||
|
||||
def single_mask_loss(self, gt_mask, pred, proto, xyxy, area):
|
||||
# Mask loss for one image
|
||||
"""Mask loss for one image."""
|
||||
pred_mask = (pred @ proto.view(self.nm, -1)).view(-1, *proto.shape[1:]) # (n, 32) @ (32,80,80) -> (n,80,80)
|
||||
loss = F.binary_cross_entropy_with_logits(pred_mask, gt_mask, reduction='none')
|
||||
return (crop_mask(loss, xyxy).mean(dim=(1, 2)) / area).mean()
|
||||
|
@ -52,7 +52,7 @@ class SegmentationValidator(DetectionValidator):
|
||||
return p, proto
|
||||
|
||||
def update_metrics(self, preds, batch):
|
||||
# Metrics
|
||||
"""Metrics."""
|
||||
for si, (pred, proto) in enumerate(zip(preds[0], preds[1])):
|
||||
idx = batch['batch_idx'] == si
|
||||
cls = batch['cls'][idx]
|
||||
@ -179,7 +179,7 @@ class SegmentationValidator(DetectionValidator):
|
||||
self.plot_masks.clear()
|
||||
|
||||
def pred_to_json(self, predn, filename, pred_masks):
|
||||
# Save one JSON result
|
||||
"""Save one JSON result."""
|
||||
# Example result = {"image_id": 42, "category_id": 18, "bbox": [258.15, 41.29, 348.26, 243.78], "score": 0.236}
|
||||
from pycocotools.mask import encode # noqa
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user