ml.rl.workflow package

Submodules

ml.rl.workflow.base_workflow module

class ml.rl.workflow.base_workflow.BaseWorkflow(preprocess_handler: ml.rl.workflow.preprocess_handler.PreprocessHandler, trainer, evaluator, minibatch_size)

Bases: object

static init_multiprocessing(num_processes_per_node, num_nodes, node_index, gpu_index, init_method)
static read_norm_file(path) → Dict[int, ml.rl.thrift.core.ttypes.NormalizationParameters]
report(evaluation_details)
train_network(train_dataset, eval_dataset, epochs: int)

ml.rl.workflow.create_normalization_metadata module

ml.rl.workflow.create_normalization_metadata.check_samples_per_feature(samples_per_feature, num_samples)
ml.rl.workflow.create_normalization_metadata.create_norm_table(params)
ml.rl.workflow.create_normalization_metadata.get_norm_metadata(dataset, norm_params, norm_col)
ml.rl.workflow.create_normalization_metadata.get_norm_params(norm_params)

ml.rl.workflow.ddpg_workflow module

class ml.rl.workflow.ddpg_workflow.ContinuousWorkflow(model_params: ml.rl.thrift.core.ttypes.ContinuousActionModelParameters, preprocess_handler: ml.rl.workflow.preprocess_handler.PreprocessHandler, state_normalization: Dict[int, ml.rl.thrift.core.ttypes.NormalizationParameters], action_normalization: Dict[int, ml.rl.thrift.core.ttypes.NormalizationParameters], use_gpu: bool, use_all_avail_gpus: bool)

Bases: ml.rl.workflow.base_workflow.BaseWorkflow

ml.rl.workflow.ddpg_workflow.main(params)

ml.rl.workflow.dqn_workflow module

class ml.rl.workflow.dqn_workflow.DqnWorkflow(model_params: ml.rl.thrift.core.ttypes.DiscreteActionModelParameters, preprocess_handler: ml.rl.workflow.preprocess_handler.PreprocessHandler, state_normalization: Dict[int, ml.rl.thrift.core.ttypes.NormalizationParameters], use_gpu: bool, use_all_avail_gpus: bool)

Bases: ml.rl.workflow.base_workflow.BaseWorkflow

ml.rl.workflow.dqn_workflow.main(params)
ml.rl.workflow.dqn_workflow.single_process_main(gpu_index, *args)

ml.rl.workflow.helpers module

ml.rl.workflow.helpers.export_trainer_and_predictor(trainer, output_path, exporter=None)

Writes PyTorch trainer and Caffe2 Predictor to file and returns predictor

returns: Predictor object

ml.rl.workflow.helpers.minibatch_size_multiplier(use_gpu, use_all_avail_gpus)

Increase size of minibatch if using PyTorch DataParallel.

ml.rl.workflow.helpers.parse_args(args)
ml.rl.workflow.helpers.save_model_to_file(model, path)

Save network parameters and optimizer parameters to file.

Parameters

model – one of (DQNTrainer, ParametricDQNTrainer, DDPGTrainer) object.

ml.rl.workflow.helpers.update_model_for_warm_start(model, path=None)

Load network parameters and optimizer parameters into trainer object to warm start it.

Parameters

model – one of (DQNTrainer, ParametricDQNTrainer, DDPGTrainer) object.

ml.rl.workflow.page_handler module

class ml.rl.workflow.page_handler.EvaluationPageHandler(trainer, evaluator, reporter)

Bases: ml.rl.workflow.page_handler.PageHandler

finish() → None
get_last_cpe_results()
handle(tdp: ml.rl.training.training_data_page.TrainingDataPage) → None
class ml.rl.workflow.page_handler.ImitatorPageHandler(trainer, train=True)

Bases: ml.rl.workflow.page_handler.PageHandler

finish() → None
handle(tdp: ml.rl.training.training_data_page.TrainingDataPage) → None
class ml.rl.workflow.page_handler.PageHandler

Bases: object

finish() → None
handle(tdp: ml.rl.training.training_data_page.TrainingDataPage) → None
set_epoch(epoch) → None
class ml.rl.workflow.page_handler.TrainingPageHandler(trainer)

Bases: ml.rl.workflow.page_handler.PageHandler

finish() → None
handle(tdp: ml.rl.training.training_data_page.TrainingDataPage) → None
class ml.rl.workflow.page_handler.WorldModelEvaluationPageHandler(trainer_or_evaluator)

Bases: ml.rl.workflow.page_handler.WorldModelPageHandler

handle(tdp: ml.rl.types.TrainingBatch) → None
class ml.rl.workflow.page_handler.WorldModelPageHandler(trainer_or_evaluator)

Bases: ml.rl.workflow.page_handler.PageHandler

finish() → None
get_mean_loss(loss_name='loss', axis=None) → float
Parameters
  • loss_name – possible loss names: ‘loss’ (referring to total loss), ‘bce’ (loss for predicting not_terminal), ‘gmm’ (loss for next state prediction), ‘mse’ (loss for predicting reward)

  • axis – axis to perform mean function.

refresh_results() → None
class ml.rl.workflow.page_handler.WorldModelRandomTrainingPageHandler(trainer_or_evaluator)

Bases: ml.rl.workflow.page_handler.WorldModelPageHandler

Train a baseline model based on randomly shuffled data

handle(tdp: ml.rl.types.TrainingBatch) → None
class ml.rl.workflow.page_handler.WorldModelTrainingPageHandler(trainer_or_evaluator)

Bases: ml.rl.workflow.page_handler.WorldModelPageHandler

handle(tdp: ml.rl.types.TrainingBatch) → None
ml.rl.workflow.page_handler.feed_pages(data_streamer, dataset_num_rows, epoch, minibatch_size, use_gpu, page_handler, feature_extractor=None, batch_preprocessor=None)
ml.rl.workflow.page_handler.get_actual_minibatch_size(batch, minibatch_size_preset)

ml.rl.workflow.parametric_dqn_workflow module

class ml.rl.workflow.parametric_dqn_workflow.ParametricDqnWorkflow(model_params: ml.rl.thrift.core.ttypes.ContinuousActionModelParameters, preprocess_handler: ml.rl.workflow.preprocess_handler.PreprocessHandler, state_normalization: Dict[int, ml.rl.thrift.core.ttypes.NormalizationParameters], action_normalization: Dict[int, ml.rl.thrift.core.ttypes.NormalizationParameters], use_gpu: bool, use_all_avail_gpus: bool)

Bases: ml.rl.workflow.base_workflow.BaseWorkflow

ml.rl.workflow.parametric_dqn_workflow.main(params)
ml.rl.workflow.parametric_dqn_workflow.single_process_main(gpu_index, *args)

ml.rl.workflow.preprocess_handler module

class ml.rl.workflow.preprocess_handler.ContinuousPreprocessHandler(state_preprocessor, action_preprocessor, sparse_to_dense_processor: ml.rl.preprocessing.sparse_to_dense.SparseToDenseProcessor)

Bases: ml.rl.workflow.preprocess_handler.PreprocessHandler

preprocess(batch) → ml.rl.training.training_data_page.TrainingDataPage
class ml.rl.workflow.preprocess_handler.DqnPreprocessHandler(state_preprocessor, action_names: List[str], sparse_to_dense_processor: ml.rl.preprocessing.sparse_to_dense.SparseToDenseProcessor)

Bases: ml.rl.workflow.preprocess_handler.PreprocessHandler

preprocess(batch) → ml.rl.training.training_data_page.TrainingDataPage
read_actions(actions)
class ml.rl.workflow.preprocess_handler.ParametricDqnPreprocessHandler(state_preprocessor, action_preprocessor, sparse_to_dense_processor: ml.rl.preprocessing.sparse_to_dense.SparseToDenseProcessor)

Bases: ml.rl.workflow.preprocess_handler.PreprocessHandler

preprocess(batch) → ml.rl.training.training_data_page.TrainingDataPage
class ml.rl.workflow.preprocess_handler.PreprocessHandler(state_preprocessor, sparse_to_dense_processor: ml.rl.preprocessing.sparse_to_dense.SparseToDenseProcessor)

Bases: object

preprocess(batch) → ml.rl.training.training_data_page.TrainingDataPage

ml.rl.workflow.transitional module

ml.rl.workflow.transitional.create_dqn_trainer_from_params(model: ml.rl.thrift.core.ttypes.DiscreteActionModelParameters, normalization_parameters: Dict[int, ml.rl.thrift.core.ttypes.NormalizationParameters], use_gpu: bool = False, use_all_avail_gpus: bool = False, metrics_to_score=None)
ml.rl.workflow.transitional.create_parametric_dqn_trainer_from_params(model: ml.rl.thrift.core.ttypes.ContinuousActionModelParameters, state_normalization_parameters: Dict[int, ml.rl.thrift.core.ttypes.NormalizationParameters], action_normalization_parameters: Dict[int, ml.rl.thrift.core.ttypes.NormalizationParameters], use_gpu: bool = False, use_all_avail_gpus: bool = False)

Module contents