mirror of
https://github.com/THU-MIG/yolov10.git
synced 2025-05-23 21:44:22 +08:00
Add DVC experiments logger with DVCLive (#2792)
This commit is contained in:
parent
2b26572e42
commit
6057b267af
@ -14,6 +14,7 @@ tqdm>=4.64.0
|
|||||||
|
|
||||||
# Logging -------------------------------------
|
# Logging -------------------------------------
|
||||||
# tensorboard>=2.13.0
|
# tensorboard>=2.13.0
|
||||||
|
# dvclive>=2.11.0
|
||||||
# clearml
|
# clearml
|
||||||
# comet
|
# comet
|
||||||
|
|
||||||
|
@ -198,6 +198,7 @@ def add_integration_callbacks(instance):
|
|||||||
"""
|
"""
|
||||||
from .clearml import callbacks as clearml_cb
|
from .clearml import callbacks as clearml_cb
|
||||||
from .comet import callbacks as comet_cb
|
from .comet import callbacks as comet_cb
|
||||||
|
from .dvc import callbacks as dvc_cb
|
||||||
from .hub import callbacks as hub_cb
|
from .hub import callbacks as hub_cb
|
||||||
from .mlflow import callbacks as mlflow_cb
|
from .mlflow import callbacks as mlflow_cb
|
||||||
from .neptune import callbacks as neptune_cb
|
from .neptune import callbacks as neptune_cb
|
||||||
@ -205,7 +206,7 @@ def add_integration_callbacks(instance):
|
|||||||
from .tensorboard import callbacks as tensorboard_cb
|
from .tensorboard import callbacks as tensorboard_cb
|
||||||
from .wb import callbacks as wb_cb
|
from .wb import callbacks as wb_cb
|
||||||
|
|
||||||
for x in clearml_cb, comet_cb, hub_cb, mlflow_cb, neptune_cb, tune_cb, tensorboard_cb, wb_cb:
|
for x in clearml_cb, comet_cb, hub_cb, mlflow_cb, neptune_cb, tune_cb, tensorboard_cb, wb_cb, dvc_cb:
|
||||||
for k, v in x.items():
|
for k, v in x.items():
|
||||||
if v not in instance.callbacks[k]: # prevent duplicate callbacks addition
|
if v not in instance.callbacks[k]: # prevent duplicate callbacks addition
|
||||||
instance.callbacks[k].append(v) # callback[name].append(func)
|
instance.callbacks[k].append(v) # callback[name].append(func)
|
||||||
|
135
ultralytics/yolo/utils/callbacks/dvc.py
Normal file
135
ultralytics/yolo/utils/callbacks/dvc.py
Normal file
@ -0,0 +1,135 @@
|
|||||||
|
# Ultralytics YOLO 🚀, GPL-3.0 license
|
||||||
|
import os
|
||||||
|
|
||||||
|
from ultralytics.yolo.utils import LOGGER, TESTS_RUNNING
|
||||||
|
from ultralytics.yolo.utils.torch_utils import get_flops, get_num_params
|
||||||
|
|
||||||
|
try:
|
||||||
|
from importlib.metadata import version
|
||||||
|
|
||||||
|
import dvclive
|
||||||
|
|
||||||
|
assert not TESTS_RUNNING # do not log pytest
|
||||||
|
assert version('dvclive')
|
||||||
|
except (ImportError, AssertionError):
|
||||||
|
dvclive = None
|
||||||
|
|
||||||
|
# DVCLive logger instance
|
||||||
|
live = None
|
||||||
|
_processed_plots = {}
|
||||||
|
|
||||||
|
# `on_fit_epoch_end` is called on final validation (probably need to be fixed)
|
||||||
|
# for now this is the way we distinguish final evaluation of the best model vs
|
||||||
|
# last epoch validation
|
||||||
|
_training_epoch = False
|
||||||
|
|
||||||
|
|
||||||
|
def _logger_disabled():
|
||||||
|
return os.getenv('ULTRALYTICS_DVC_DISABLED', 'false').lower() == 'true'
|
||||||
|
|
||||||
|
|
||||||
|
def _log_images(image_path, prefix=''):
|
||||||
|
if live:
|
||||||
|
live.log_image(os.path.join(prefix, image_path.name), image_path)
|
||||||
|
|
||||||
|
|
||||||
|
def _log_plots(plots, prefix=''):
|
||||||
|
for name, params in plots.items():
|
||||||
|
timestamp = params['timestamp']
|
||||||
|
if _processed_plots.get(name, None) != timestamp:
|
||||||
|
_log_images(name, prefix)
|
||||||
|
_processed_plots[name] = timestamp
|
||||||
|
|
||||||
|
|
||||||
|
def _log_confusion_matrix(validator):
|
||||||
|
targets = []
|
||||||
|
preds = []
|
||||||
|
matrix = validator.confusion_matrix.matrix
|
||||||
|
names = list(validator.names.values())
|
||||||
|
if validator.confusion_matrix.task == 'detect':
|
||||||
|
names += ['background']
|
||||||
|
|
||||||
|
for ti, pred in enumerate(matrix.T.astype(int)):
|
||||||
|
for pi, num in enumerate(pred):
|
||||||
|
targets.extend([names[ti]] * num)
|
||||||
|
preds.extend([names[pi]] * num)
|
||||||
|
|
||||||
|
live.log_sklearn_plot('confusion_matrix', targets, preds, name='cf.json', normalized=True)
|
||||||
|
|
||||||
|
|
||||||
|
def on_pretrain_routine_start(trainer):
|
||||||
|
try:
|
||||||
|
global live
|
||||||
|
if not _logger_disabled():
|
||||||
|
live = dvclive.Live(save_dvc_exp=True)
|
||||||
|
LOGGER.info(
|
||||||
|
'DVCLive is detected and auto logging is enabled (can be disabled with `ULTRALYTICS_DVC_DISABLED=true`).'
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
LOGGER.debug('DVCLive is detected and auto logging is disabled via `ULTRALYTICS_DVC_DISABLED`.')
|
||||||
|
live = None
|
||||||
|
except Exception as e:
|
||||||
|
LOGGER.warning(f'WARNING ⚠️ DVCLive installed but not initialized correctly, not logging this run. {e}')
|
||||||
|
|
||||||
|
|
||||||
|
def on_pretrain_routine_end(trainer):
|
||||||
|
_log_plots(trainer.plots, 'train')
|
||||||
|
|
||||||
|
|
||||||
|
def on_train_start(trainer):
|
||||||
|
if live:
|
||||||
|
live.log_params(trainer.args)
|
||||||
|
|
||||||
|
|
||||||
|
def on_train_epoch_start(trainer):
|
||||||
|
global _training_epoch
|
||||||
|
_training_epoch = True
|
||||||
|
|
||||||
|
|
||||||
|
def on_fit_epoch_end(trainer):
|
||||||
|
global _training_epoch
|
||||||
|
if live and _training_epoch:
|
||||||
|
all_metrics = {**trainer.label_loss_items(trainer.tloss, prefix='train'), **trainer.metrics, **trainer.lr}
|
||||||
|
for metric, value in all_metrics.items():
|
||||||
|
live.log_metric(metric, value)
|
||||||
|
|
||||||
|
if trainer.epoch == 0:
|
||||||
|
model_info = {
|
||||||
|
'model/parameters': get_num_params(trainer.model),
|
||||||
|
'model/GFLOPs': round(get_flops(trainer.model), 3),
|
||||||
|
'model/speed(ms)': round(trainer.validator.speed['inference'], 3)}
|
||||||
|
|
||||||
|
for metric, value in model_info.items():
|
||||||
|
live.log_metric(metric, value, plot=False)
|
||||||
|
|
||||||
|
_log_plots(trainer.plots, 'train')
|
||||||
|
_log_plots(trainer.validator.plots, 'val')
|
||||||
|
|
||||||
|
live.next_step()
|
||||||
|
_training_epoch = False
|
||||||
|
|
||||||
|
|
||||||
|
def on_train_end(trainer):
|
||||||
|
if live:
|
||||||
|
# At the end log the best metrics. It runs validator on the best model internally.
|
||||||
|
all_metrics = {**trainer.label_loss_items(trainer.tloss, prefix='train'), **trainer.metrics, **trainer.lr}
|
||||||
|
for metric, value in all_metrics.items():
|
||||||
|
live.log_metric(metric, value, plot=False)
|
||||||
|
|
||||||
|
_log_plots(trainer.plots, 'eval')
|
||||||
|
_log_plots(trainer.validator.plots, 'eval')
|
||||||
|
_log_confusion_matrix(trainer.validator)
|
||||||
|
|
||||||
|
if trainer.best.exists():
|
||||||
|
live.log_artifact(trainer.best, copy=True)
|
||||||
|
|
||||||
|
live.end()
|
||||||
|
|
||||||
|
|
||||||
|
callbacks = {
|
||||||
|
'on_pretrain_routine_start': on_pretrain_routine_start,
|
||||||
|
'on_pretrain_routine_end': on_pretrain_routine_end,
|
||||||
|
'on_train_start': on_train_start,
|
||||||
|
'on_train_epoch_start': on_train_epoch_start,
|
||||||
|
'on_fit_epoch_end': on_fit_epoch_end,
|
||||||
|
'on_train_end': on_train_end} if dvclive else {}
|
Loading…
x
Reference in New Issue
Block a user