In the flowling ways, the error is prompt without distrupt training process.
for i in range(conf.max_iters + 1):
# Train a single iteration on the current data instance
try:
model.train_one_iter(i, input_images)
except KeyboardInterrupt:
raise
except Exception as e:
print('Something went wrong in iteration %d, While training.' % i)
print_exc()
# Take care of all testing, saving and presenting of current results and status
try:
model.test_and_display(i)
except KeyboardInterrupt:
raise
except Exception as e:
print('Something went wrong in iteration %d, While testing or visualizing.' % i)
print_exc()
# Save snapshot when needed
try:
if i > 0 and not i % conf.save_snapshot_freq:
model.save(os.path.join(conf.output_dir_path, 'checkpoint_%07d.pth.tar' % i))
# Recreate model and load saved checkpoints just saved
del model
model = Model(conf)
model.resuem(os.path.join(conf.output_dir_path, 'checkpoint_%07d.pth.tar' % i))
except KeyboardInterrupt:
raise
except Exception as e:
print('Something went wrong in iteration %d, While saving snapshot.' % i)
print_exc()