Print Error Info without Stopping Training

 

In the flowling ways, the error is prompt without distrupt training process.


for i in range(conf.max_iters + 1):
    # Train a single iteration on the current data instance
    try:
        model.train_one_iter(i, input_images)
    except KeyboardInterrupt:
        raise
    except Exception as e:
        print('Something went wrong in iteration %d, While training.' % i)
        print_exc()

    # Take care of all testing, saving and presenting of current results and status
    try:
        model.test_and_display(i)
    except KeyboardInterrupt:
        raise
    except Exception as e:
        print('Something went wrong in iteration %d, While testing or visualizing.' % i)
        print_exc()

    # Save snapshot when needed
    try:
        if i > 0 and not i % conf.save_snapshot_freq:
            model.save(os.path.join(conf.output_dir_path, 'checkpoint_%07d.pth.tar' % i))
           	# Recreate model and load saved checkpoints just saved
            del model
            model = Model(conf)
            model.resuem(os.path.join(conf.output_dir_path, 'checkpoint_%07d.pth.tar' % i))
    except KeyboardInterrupt:
        raise
    except Exception as e:
        print('Something went wrong in iteration %d, While saving snapshot.' % i)
        print_exc()