diff --git a/d4rl-generation/.gitignore b/d4rl-generation/.gitignore new file mode 100644 index 00000000..16c6bfd4 --- /dev/null +++ b/d4rl-generation/.gitignore @@ -0,0 +1,165 @@ +experiment_logs/ +datasets/ +logs/ +*.hdf5 + +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ +cover/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +.pybuilder/ +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +# For a library or package, you might want to ignore these files since the code is +# intended to run in multiple environments; otherwise, check them in: +# .python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# poetry +# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. +# This is especially recommended for binary packages to ensure reproducibility, and is more +# commonly ignored for libraries. +# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control +#poetry.lock + +# pdm +# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. +#pdm.lock +# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it +# in version control. +# https://pdm.fming.dev/#use-with-ide +.pdm.toml + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +# pytype static type analyzer +.pytype/ + +# Cython debug symbols +cython_debug/ + +# PyCharm +# JetBrains specific template is maintained in a separate JetBrains.gitignore that can +# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore +# and can be added to the global gitignore or merged into this file. For a more nuclear +# option (not recommended) you can uncomment the following to ignore the entire idea folder. +#.idea/ \ No newline at end of file diff --git a/d4rl-generation/README.md b/d4rl-generation/README.md new file mode 100644 index 00000000..0f648b82 --- /dev/null +++ b/d4rl-generation/README.md @@ -0,0 +1,54 @@ +# Kabuki Dataset generation. + +This repo serves to demonstrate a workflow for creating +offline datasets compatible with Kabuki. + +It uses dm-acme for online RL training. The data +is logged via EnvLogger, and the logged data is then post-processed +into an HDF5 file. However, this is only a proof of concept, and +neither dm-acme nor EnvLogger constitutes part of the specification. + +## Spec for the new HDF5 dataset in Kabuki +We describe the standard used in the HDF5 dataset used in D4RL-V2. +Unlike the old datasets used in previous versions of the D4RL datasets, +we now formalize a new standard for the new datasets. + +The new datasets will continue to use HDF5 as the storage format. Although HDF5 files +do not naturally have the mechanisms for storing distinct episodes as separate entries, +it is a widely adopted a standard format that can be easily used in different +frameworks and languages. + +The previous iterations of the D4RL datasets have some outstanding issues. +One notable issue is that terminal observations are not captured. +While the omission of terminal observations are not really problematic +for offline actor-critic algorithms such as CQL/BCQ/IQL etc., +they pose issues for researchers +who would like to work on offline imitation learning research. +It is known that proper handling of terminal transitions can significantly influence the +performance of imitation learning algorithms. +Therefore, the terminal observations will be recorded in the dataset in the new version. The new datasets should capture as much information from the original environment as possible whenever possible. + +In the new version, the dataset will follow the convention introduced by the RLDS project. +The agent's experience is stored in the dataset as a sequence of episodes consisting of a variable number of steps. The steps are stored as a flattened dictionary of arrays +in the state-action-reward (SAR) alignment. Concretely each step consists of + +* is_first, is_last: indicating the observation for the step is the first/the last step of an episode. +* observation: observation for the step +* action: action taken after observing the `observation` of the step +* reward: reward obtained after applying the action in the step. +* is_terminal: indicating whether the observation is terminal (is_terminal = False indicates that the episode is truncated.) +* discount: discount factor at this step. This may be unfamiliar to gym.Env users but +is consistent with the discount used in dm_env. In particular, discount = 0 indicates that +the *next* step is terminal and 1.0 otherwise. + +Refer to https://github.com/google-research/rlds for a more detailed description. + +## Generating datasets. +While HDF5 is used as the final format for storing benchmark datasets in D4RL, +HDF5 is not used as the format during the data collection process. In this repo, +we demonstrate using EnvLogger for recording the interactions made by an +RL agent during online learning. The logged experience will then be post-processed +(and potentially stitched with other datasets) to produce the final HDF5 files. +We provide `convert_dataset.py` to show how this can be done by converting +from EnvLogger's Riegeli file formats to a single HDF5 file. +Alternatively, we can also use EnvLogger's RLDS backend to generate an RLDS-compatible TensorFlow dataset and convert that to the HDF5 file. \ No newline at end of file diff --git a/d4rl-generation/convert_dataset.py b/d4rl-generation/convert_dataset.py new file mode 100644 index 00000000..c279f79d --- /dev/null +++ b/d4rl-generation/convert_dataset.py @@ -0,0 +1,77 @@ +"""Convert dataset logged by the EnvLogger Riegelli backend to HDF5.""" +from envlogger import reader +import numpy as np +import h5py +import tree +from absl import flags +from absl import app + +_DATASET_DIR = flags.DEFINE_string("dataset_dir", None, "") +_OUTPUT_FILE = flags.DEFINE_string("output_file", "dataset.hdf5", "") +flags.mark_flag_as_required("dataset_dir") + + +def _convert_envlogger_episode_to_rlds_steps(episode): + """Convert an episode of envlogger.StepData to RLDS compatible steps.""" + observations = np.stack([step.timestep.observation for step in episode]) + # RLDS uses the SAR alignment while envlogger uses ARS. + # The following lines handle converting from the ARS to SAR alignment. + actions = np.stack([step.action for step in episode[1:]]) + # Add dummy action to the last step containing the terminal observation. + actions = np.concatenate( + [actions, np.expand_dims(np.zeros_like(actions[0]), axis=0)] + ) + # Add dummy reward to the last step containing the terminal observation. + rewards = np.stack([step.timestep.reward for step in episode[1:]]) + rewards = np.concatenate( + [rewards, np.expand_dims(np.zeros_like(rewards[0]), axis=0)] + ) + # Add dummy discounts to the last step containing the terminal observation. + discounts = np.stack([step.timestep.reward for step in episode[1:]]) + discounts = np.concatenate( + [discounts, np.expand_dims(np.zeros_like(discounts[0]), axis=0)] + ) + # the is_first/last/terminal flags are already aligned in ARS alignment. + is_first = np.array([step.timestep.first() for step in episode]) + is_last = np.array([step.timestep.last() for step in episode]) + is_terminal = np.array( + [step.timestep.last() and step.timestep.discount == 0.0 for step in episode] + ) + return { + "observation": observations, + "action": actions, + "reward": rewards, + "discounnts": discounts, + "is_first": is_first, + "is_last": is_last, + "is_terminal": is_terminal, + } + + +def write_to_hdf5_file(episodes, filename): + """Write episodes in EnvLogger format to an HDF5 file.""" + all_steps = [] + for episode in episodes: + all_steps.append(_convert_envlogger_episode_to_rlds_steps(episode)) + all_steps = tree.map_structure(lambda *xs: np.concatenate(xs), *all_steps) + f = h5py.File(filename, "w") + for key in all_steps.keys(): + f.create_dataset(key, data=all_steps[key], compression="gzip") + f.close() + + +def main(_): + output_file = _OUTPUT_FILE.value + with reader.Reader(data_directory=_DATASET_DIR.value) as r: + print(r.observation_spec()) + print(r.metadata()) + write_to_hdf5_file(r.episodes, output_file) + # Inspecting the created HDF5 file + f = h5py.File(output_file, "r") + for k in f: + print(k, f[k].shape) + f.close() + + +if __name__ == "__main__": + app.run(main) diff --git a/d4rl-generation/helpers.py b/d4rl-generation/helpers.py new file mode 100644 index 00000000..f0ef8afb --- /dev/null +++ b/d4rl-generation/helpers.py @@ -0,0 +1,69 @@ +"""Shared helpers for rl_continuous experiments.""" +from typing import Optional +from acme import wrappers +import dm_env +import gym +from acme.utils import loggers as acme_loggers +from absl import logging + +_VALID_TASK_SUITES = ("gym", "control") + + +def make_environment(suite: str, task: str, seed=None) -> dm_env.Environment: + """Makes the requested continuous control environment. + Args: + suite: One of 'gym' or 'control'. + task: Task to load. If `suite` is 'control', the task must be formatted as + f'{domain_name}:{task_name}' + Returns: + An environment satisfying the dm_env interface expected by Acme agents. + """ + + if suite not in _VALID_TASK_SUITES: + raise ValueError( + f"Unsupported suite: {suite}. Expected one of {_VALID_TASK_SUITES}" + ) + + if suite == "gym": + env = gym.make(task) + env.seed(seed) + # Make sure the environment obeys the dm_env.Environment interface. + env = wrappers.GymWrapper(env) + + elif suite == "control": + # Load dm_suite lazily not require Mujoco license when not using it. + from dm_control import suite as dm_suite # pylint: disable=g-import-not-at-top + + domain_name, task_name = task.split(":") + env = dm_suite.load(domain_name, task_name, task_kwargs={'random': seed}) + env = wrappers.ConcatObservationWrapper(env) + + # Wrap the environment so the expected continuous action spec is [-1, 1]. + # Note: this is a no-op on 'control' tasks. + env = wrappers.CanonicalSpecWrapper(env, clip=True) + env = wrappers.SinglePrecisionWrapper(env) + return env + + +def get_default_logger_factory(workdir: str, save_data=True, time_delta: float = 1.0): + """Create a custom logger factory for use in the experiment.""" + + def logger_factory(label: str, steps_key: Optional[str] = None, task_id: int = 0): + del steps_key, task_id + + print_fn = logging.info + terminal_logger = acme_loggers.TerminalLogger(label=label, print_fn=print_fn) + + loggers = [terminal_logger] + + if save_data: + loggers.append(acme_loggers.CSVLogger(workdir, label=label)) + + # Dispatch to all writers and filter Nones and by time. + logger = acme_loggers.Dispatcher(loggers, acme_loggers.to_numpy) + logger = acme_loggers.NoneFilter(logger) + logger = acme_loggers.TimeFilter(logger, time_delta) + + return logger + + return logger_factory diff --git a/d4rl-generation/logged_experiment.py b/d4rl-generation/logged_experiment.py new file mode 100644 index 00000000..16f1486a --- /dev/null +++ b/d4rl-generation/logged_experiment.py @@ -0,0 +1,292 @@ +"""Runners used for executing local agents. + +This is a fork of the run_experiment.py from Acme. +The only additional feature added by the fork is to optionally +wrap the training environment loop with an envlogger.EnvLogger wrapper. +Note that the wrapper is not applied to the evaluation environment despite +sharing the same underlying environment instance. + +""" + +import sys +import time +from typing import Optional, Sequence, Tuple, Callable + +import acme +from acme import core +from acme import specs +from acme import types +from acme.jax import utils +from acme.jax.experiments import config +from acme.tf import savers +from acme.utils import counting +import dm_env +import jax +import reverb +import envlogger + + +def run_experiment( + experiment: config.ExperimentConfig, + eval_every: int = 100, + num_eval_episodes: int = 1, + make_envlogger: Optional[ + Callable[[dm_env.Environment], envlogger.EnvLogger] + ] = None, +): + """Runs a simple, single-threaded training loop using the default evaluators. + + It targets simplicity of the code and so only the basic features of the + ExperimentConfig are supported. + + Arguments: + experiment: Definition and configuration of the agent to run. + eval_every: After how many actor steps to perform evaluation. + num_eval_episodes: How many evaluation episodes to execute at each + evaluation step. + """ + + key = jax.random.PRNGKey(experiment.seed) + + # Create the environment and get its spec. + environment = experiment.environment_factory(experiment.seed) + environment_spec = experiment.environment_spec or specs.make_environment_spec( + environment + ) + + # Create the networks and policy. + networks = experiment.network_factory(environment_spec) + policy = config.make_policy( + experiment=experiment, + networks=networks, + environment_spec=environment_spec, + evaluation=False, + ) + + # Create the replay server and grab its address. + replay_tables = experiment.builder.make_replay_tables(environment_spec, policy) + + # Disable blocking of inserts by tables' rate limiters, as this function + # executes learning (sampling from the table) and data generation + # (inserting into the table) sequentially from the same thread + # which could result in blocked insert making the algorithm hang. + replay_tables, rate_limiters_max_diff = _disable_insert_blocking(replay_tables) + + replay_server = reverb.Server(replay_tables, port=None) + replay_client = reverb.Client(f"localhost:{replay_server.port}") + + # Parent counter allows to share step counts between train and eval loops and + # the learner, so that it is possible to plot for example evaluator's return + # value as a function of the number of training episodes. + parent_counter = counting.Counter(time_delta=0.0) + + dataset = experiment.builder.make_dataset_iterator(replay_client) + # We always use prefetch as it provides an iterator with an additional + # 'ready' method. + dataset = utils.prefetch(dataset, buffer_size=1) + + # Create actor, adder, and learner for generating, storing, and consuming + # data respectively. + # NOTE: These are created in reverse order as the actor needs to be given the + # adder and the learner (as a source of variables). + learner_key, key = jax.random.split(key) + learner = experiment.builder.make_learner( + random_key=learner_key, + networks=networks, + dataset=dataset, + logger_fn=experiment.logger_factory, + environment_spec=environment_spec, + replay_client=replay_client, + counter=counting.Counter(parent_counter, prefix="learner", time_delta=0.0), + ) + + adder = experiment.builder.make_adder(replay_client, environment_spec, policy) + + actor_key, key = jax.random.split(key) + actor = experiment.builder.make_actor( + actor_key, policy, environment_spec, variable_source=learner, adder=adder + ) + + # Create the environment loop used for training. + train_counter = counting.Counter(parent_counter, prefix="actor", time_delta=0.0) + train_logger = experiment.logger_factory("actor", train_counter.get_steps_key(), 0) + + checkpointer = None + if experiment.checkpointing is not None: + checkpointer = savers.Checkpointer( + objects_to_save={"learner": learner, "counter": parent_counter}, + time_delta_minutes=experiment.checkpointing.time_delta_minutes, + directory=experiment.checkpointing.directory, + add_uid=experiment.checkpointing.add_uid, + max_to_keep=experiment.checkpointing.max_to_keep, + ) + + # Replace the actor with a LearningActor. This makes sure that every time + # that `update` is called on the actor it checks to see whether there is + # any new data to learn from and if so it runs a learner step. The rate + # at which new data is released is controlled by the replay table's + # rate_limiter which is created by the builder.make_replay_tables call above. + actor = _LearningActor( + actor, learner, dataset, replay_tables, rate_limiters_max_diff, checkpointer + ) + if make_envlogger is not None: + logged_environment = make_envlogger(environment) + else: + logged_environment = environment + + train_loop = acme.EnvironmentLoop( + logged_environment, + actor, + counter=train_counter, + logger=train_logger, + observers=experiment.observers, + ) + + max_num_actor_steps = ( + experiment.max_num_actor_steps + - parent_counter.get_counts().get(train_counter.get_steps_key(), 0) + ) + + if num_eval_episodes == 0: + # No evaluation. Just run the training loop. + train_loop.run(num_steps=max_num_actor_steps) + logged_environment.close() + return + + # Create the evaluation actor and loop. + eval_counter = counting.Counter(parent_counter, prefix="evaluator", time_delta=0.0) + eval_logger = experiment.logger_factory( + "evaluator", eval_counter.get_steps_key(), 0 + ) + eval_policy = config.make_policy( + experiment=experiment, + networks=networks, + environment_spec=environment_spec, + evaluation=True, + ) + eval_actor = experiment.builder.make_actor( + random_key=jax.random.PRNGKey(experiment.seed), + policy=eval_policy, + environment_spec=environment_spec, + variable_source=learner, + ) + eval_loop = acme.EnvironmentLoop( + environment, + eval_actor, + counter=eval_counter, + logger=eval_logger, + observers=experiment.observers, + ) + + steps = 0 + while steps < max_num_actor_steps: + eval_loop.run(num_episodes=num_eval_episodes) + steps += train_loop.run(num_steps=eval_every) + eval_loop.run(num_episodes=num_eval_episodes) + logged_environment.close() + + +class _LearningActor(core.Actor): + """Actor which learns (updates its parameters) when `update` is called. + + This combines a base actor and a learner. Whenever `update` is called + on the wrapping actor the learner will take a step (e.g. one step of gradient + descent) as long as there is data available for training + (provided iterator and replay_tables are used to check for that). + Selecting actions and making observations are handled by the base actor. + Intended to be used by the `run_experiment` only. + """ + + def __init__( + self, + actor: core.Actor, + learner: core.Learner, + iterator: core.PrefetchingIterator, + replay_tables: Sequence[reverb.Table], + sample_sizes: Sequence[int], + checkpointer: Optional[savers.Checkpointer], + ): + """Initializes _LearningActor. + + Args: + actor: Actor to be wrapped. + learner: Learner on which step() is to be called when there is data. + iterator: Iterator used by the Learner to fetch training data. + replay_tables: Collection of tables from which Learner fetches data + through the iterator. + sample_sizes: For each table from `replay_tables`, how many elements the + table should have available for sampling to wait for the `iterator` to + prefetch a batch of data. Otherwise more experience needs to be + collected by the actor. + checkpointer: Checkpointer to save the state on update. + """ + self._actor = actor + self._learner = learner + self._iterator = iterator + self._replay_tables = replay_tables + self._sample_sizes = sample_sizes + self._learner_steps = 0 + self._checkpointer = checkpointer + + def select_action(self, observation: types.NestedArray) -> types.NestedArray: + return self._actor.select_action(observation) + + def observe_first(self, timestep: dm_env.TimeStep): + self._actor.observe_first(timestep) + + def observe(self, action: types.NestedArray, next_timestep: dm_env.TimeStep): + self._actor.observe(action, next_timestep) + + def _maybe_train(self): + trained = False + while True: + if self._iterator.ready(): + self._learner.step() + batches = self._iterator.retrieved_elements() - self._learner_steps + self._learner_steps += 1 + assert batches == 1, ( + "Learner step must retrieve exactly one element from the iterator" + f" (retrieved {batches}). Otherwise agent can deadlock. Example " + "cause is that your chosen agent" + "s Builder has a `make_learner` " + "factory that prefetches the data but it shouldn" + "t." + ) + trained = True + else: + # Wait for the iterator to fetch more data from the table(s) only + # if there plenty of data to sample from each table. + for table, sample_size in zip(self._replay_tables, self._sample_sizes): + if not table.can_sample(sample_size): + return trained + # Let iterator's prefetching thread get data from the table(s). + time.sleep(0.001) + + def update(self): + if self._maybe_train(): + # Update the actor weights only when learner was updated. + self._actor.update() + if self._checkpointer: + self._checkpointer.save() + + +def _disable_insert_blocking( + tables: Sequence[reverb.Table], +) -> Tuple[Sequence[reverb.Table], Sequence[int]]: + """Disables blocking of insert operations for a given collection of tables.""" + modified_tables = [] + sample_sizes = [] + for table in tables: + rate_limiter_info = table.info.rate_limiter_info + rate_limiter = reverb.rate_limiters.RateLimiter( + samples_per_insert=rate_limiter_info.samples_per_insert, + min_size_to_sample=rate_limiter_info.min_size_to_sample, + min_diff=rate_limiter_info.min_diff, + max_diff=sys.float_info.max, + ) + modified_tables.append(table.replace(rate_limiter=rate_limiter)) + # Target the middle of the rate limiter's insert-sample balance window. + sample_sizes.append( + max(1, int((rate_limiter_info.max_diff - rate_limiter_info.min_diff) / 2)) + ) + return modified_tables, sample_sizes diff --git a/d4rl-generation/requirements.txt b/d4rl-generation/requirements.txt new file mode 100644 index 00000000..a17beb15 --- /dev/null +++ b/d4rl-generation/requirements.txt @@ -0,0 +1,7 @@ +-f https://storage.googleapis.com/jax-releases/jax_cuda_releases.html +jax[cuda] +dm-acme[jax,tf] @ git+https://github.com/deepmind/acme.git +envlogger +gym[mujoco]==0.25.1 +dm-control +mujoco diff --git a/d4rl-generation/run_sac.py b/d4rl-generation/run_sac.py new file mode 100644 index 00000000..edf2c1b7 --- /dev/null +++ b/d4rl-generation/run_sac.py @@ -0,0 +1,80 @@ +"""Example running SAC on continuous control tasks and generating data via EnvLogger.""" + +from absl import flags +from acme import specs +import tensorflow as tf +from acme.agents.jax import sac +from acme.agents.jax.sac import builder +import helpers +from absl import app +from acme.jax import experiments +import logged_experiment +import envlogger +from acme.utils import paths + +FLAGS = flags.FLAGS + +flags.DEFINE_string("env_name", "gym:HalfCheetah-v4", "What environment to run") +flags.DEFINE_integer("seed", 0, "Random seed.") +flags.DEFINE_integer("num_steps", 1_000_0, "Number of env steps to run.") +flags.DEFINE_integer("eval_every", 5_000, "How often to run evaluation.") +flags.DEFINE_integer("evaluation_episodes", 10, "Evaluation episodes.") +flags.DEFINE_string("workdir", None, "Evaluation episodes.") +flags.DEFINE_string("dataset_dir", None, "Where to save logged interaction") +flags.mark_flag_as_required("workdir") + + +def build_experiment_config(): + """Builds SAC experiment config which can be executed in different ways.""" + # Create an environment, grab the spec, and use it to create networks. + + suite, task = FLAGS.env_name.split(":", 1) + environment = helpers.make_environment(suite, task) + logger_factory = helpers.get_default_logger_factory(FLAGS.workdir) + + environment_spec = specs.make_environment_spec(environment) + network_factory = lambda spec: sac.make_networks( + spec, hidden_layer_sizes=(256, 256, 256) + ) + + # Construct the agent. + config = sac.SACConfig( + learning_rate=3e-4, + n_step=1, + min_replay_size=1000, + target_entropy=sac.target_entropy_from_env_spec(environment_spec), + ) + sac_builder = builder.SACBuilder(config) + + return experiments.ExperimentConfig( + builder=sac_builder, + environment_factory=lambda seed: helpers.make_environment(suite, task, seed), + network_factory=network_factory, + seed=FLAGS.seed, + max_num_actor_steps=FLAGS.num_steps, + logger_factory=logger_factory, + ) + + +def wrap_with_envlogger(env, dataset_dir): + paths.process_path(dataset_dir, add_uid=False) + return envlogger.EnvLogger(env, data_directory=dataset_dir) + + +def main(_): + tf.config.set_visible_devices([], "GPU") + config = build_experiment_config() + if FLAGS.dataset_dir: + make_envlogger = lambda env: wrap_with_envlogger(env, FLAGS.dataset_dir) + else: + make_envlogger = None + logged_experiment.run_experiment( + experiment=config, + eval_every=FLAGS.eval_every, + num_eval_episodes=FLAGS.evaluation_episodes, + make_envlogger=make_envlogger, + ) + + +if __name__ == "__main__": + app.run(main)