Update Tuner() attempt metrics save on crash (#6711)

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
This commit is contained in:
Finlay Morrison 2023-12-01 15:45:31 +00:00 committed by GitHub
parent 16639b60eb
commit 4425abce59
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -180,8 +180,9 @@ class Tuner:
try:
# Train YOLO model with mutated hyperparameters (run in subprocess to avoid dataloader hang)
cmd = ['yolo', 'train', *(f'{k}={v}' for k, v in train_args.items())]
assert subprocess.run(cmd, check=True).returncode == 0, 'training failed'
return_code = subprocess.run(cmd, check=True).returncode
metrics = torch.load(ckpt_file)['train_metrics']
assert return_code == 0, 'training failed'
except Exception as e:
LOGGER.warning(f'WARNING ❌️ training failure for hyperparameter tuning iteration {i + 1}\n{e}')