diff --git a/d4rl-generation/.gitignore b/d4rl-generation/.gitignore
new file mode 100644
index 00000000..16c6bfd4
--- /dev/null
+++ b/d4rl-generation/.gitignore
@@ -0,0 +1,165 @@
+experiment_logs/
+datasets/
+logs/
+*.hdf5
+
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+cover/
+
+# Translations
+*.mo
+*.pot
+
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+
+# Flask stuff:
+instance/
+.webassets-cache
+
+# Scrapy stuff:
+.scrapy
+
+# Sphinx documentation
+docs/_build/
+
+# PyBuilder
+.pybuilder/
+target/
+
+# Jupyter Notebook
+.ipynb_checkpoints
+
+# IPython
+profile_default/
+ipython_config.py
+
+# pyenv
+#   For a library or package, you might want to ignore these files since the code is
+#   intended to run in multiple environments; otherwise, check them in:
+# .python-version
+
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+
+# poetry
+#   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+#   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
+#poetry.lock
+
+# pdm
+#   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
+#pdm.lock
+#   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
+#   in version control.
+#   https://pdm.fming.dev/#use-with-ide
+.pdm.toml
+
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
+__pypackages__/
+
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+
+# SageMath parsed files
+*.sage.py
+
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+
+# Spyder project settings
+.spyderproject
+.spyproject
+
+# Rope project settings
+.ropeproject
+
+# mkdocs documentation
+/site
+
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+
+# Pyre type checker
+.pyre/
+
+# pytype static type analyzer
+.pytype/
+
+# Cython debug symbols
+cython_debug/
+
+# PyCharm
+#  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
+#  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
+#  and can be added to the global gitignore or merged into this file.  For a more nuclear
+#  option (not recommended) you can uncomment the following to ignore the entire idea folder.
+#.idea/
\ No newline at end of file
diff --git a/d4rl-generation/README.md b/d4rl-generation/README.md
new file mode 100644
index 00000000..0f648b82
--- /dev/null
+++ b/d4rl-generation/README.md
@@ -0,0 +1,54 @@
+# Kabuki Dataset generation.
+
+This repo serves to demonstrate a workflow for creating
+offline datasets compatible with Kabuki.
+
+It uses dm-acme for online RL training. The data
+is logged via EnvLogger, and the logged data is then post-processed
+into an HDF5 file. However, this is only a proof of concept, and 
+neither dm-acme nor EnvLogger constitutes part of the specification.
+
+## Spec for the new HDF5 dataset in Kabuki
+We describe the standard used in the HDF5 dataset used in D4RL-V2.
+Unlike the old datasets used in previous versions of the D4RL datasets,
+we now formalize a new standard for the new datasets.
+
+The new datasets will continue to use HDF5 as the storage format. Although HDF5 files
+do not naturally have the mechanisms for storing distinct episodes as separate entries,
+it is a widely adopted a standard format that can be easily used in different
+frameworks and languages.
+
+The previous iterations of the D4RL datasets have some outstanding issues.
+One notable issue is that terminal observations are not captured.
+While the omission of terminal observations are not really problematic
+for offline actor-critic algorithms such as CQL/BCQ/IQL etc.,
+they pose issues for researchers 
+who would like to work on offline imitation learning research.
+It is known that proper handling of terminal transitions can significantly influence the 
+performance of imitation learning algorithms.
+Therefore, the terminal observations will be recorded in the dataset in the new version. The new datasets should capture as much information from the original environment as possible whenever possible.
+
+In the new version, the dataset will follow the convention introduced by the RLDS project.
+The agent's experience is stored in the dataset as a sequence of episodes consisting of a variable number of steps. The steps are stored as a flattened dictionary of arrays
+in the state-action-reward (SAR) alignment. Concretely each step consists of
+
+* is_first, is_last: indicating the observation for the step is the first/the last step of an episode.
+* observation: observation for the step
+* action: action taken after observing the `observation` of the step
+* reward: reward obtained after applying the action in the step.
+* is_terminal: indicating whether the observation is terminal (is_terminal = False indicates that the episode is truncated.)
+* discount: discount factor at this step. This may be unfamiliar to gym.Env users but 
+is consistent with the discount used in dm_env. In particular, discount = 0 indicates that
+the *next* step is terminal and 1.0 otherwise.
+
+Refer to https://github.com/google-research/rlds for a more detailed description.
+
+## Generating datasets.
+While HDF5 is used as the final format for storing benchmark datasets in D4RL,
+HDF5 is not used as the format during the data collection process. In this repo,
+we demonstrate using EnvLogger for recording the interactions made by an
+RL agent during online learning. The logged experience will then be post-processed
+(and potentially stitched with other datasets) to produce the final HDF5 files.
+We provide `convert_dataset.py` to show how this can be done by converting
+from EnvLogger's Riegeli file formats to a single HDF5 file.
+Alternatively, we can also use EnvLogger's RLDS backend to generate an RLDS-compatible TensorFlow dataset and convert that to the HDF5 file.
\ No newline at end of file
diff --git a/d4rl-generation/convert_dataset.py b/d4rl-generation/convert_dataset.py
new file mode 100644
index 00000000..c279f79d
--- /dev/null
+++ b/d4rl-generation/convert_dataset.py
@@ -0,0 +1,77 @@
+"""Convert dataset logged by the EnvLogger Riegelli backend to HDF5."""
+from envlogger import reader
+import numpy as np
+import h5py
+import tree
+from absl import flags
+from absl import app
+
+_DATASET_DIR = flags.DEFINE_string("dataset_dir", None, "")
+_OUTPUT_FILE = flags.DEFINE_string("output_file", "dataset.hdf5", "")
+flags.mark_flag_as_required("dataset_dir")
+
+
+def _convert_envlogger_episode_to_rlds_steps(episode):
+    """Convert an episode of envlogger.StepData to RLDS compatible steps."""
+    observations = np.stack([step.timestep.observation for step in episode])
+    # RLDS uses the SAR alignment while envlogger uses ARS.
+    # The following lines handle converting from the ARS to SAR alignment.
+    actions = np.stack([step.action for step in episode[1:]])
+    # Add dummy action to the last step containing the terminal observation.
+    actions = np.concatenate(
+        [actions, np.expand_dims(np.zeros_like(actions[0]), axis=0)]
+    )
+    # Add dummy reward to the last step containing the terminal observation.
+    rewards = np.stack([step.timestep.reward for step in episode[1:]])
+    rewards = np.concatenate(
+        [rewards, np.expand_dims(np.zeros_like(rewards[0]), axis=0)]
+    )
+    # Add dummy discounts to the last step containing the terminal observation.
+    discounts = np.stack([step.timestep.reward for step in episode[1:]])
+    discounts = np.concatenate(
+        [discounts, np.expand_dims(np.zeros_like(discounts[0]), axis=0)]
+    )
+    # the is_first/last/terminal flags are already aligned in ARS alignment.
+    is_first = np.array([step.timestep.first() for step in episode])
+    is_last = np.array([step.timestep.last() for step in episode])
+    is_terminal = np.array(
+        [step.timestep.last() and step.timestep.discount == 0.0 for step in episode]
+    )
+    return {
+        "observation": observations,
+        "action": actions,
+        "reward": rewards,
+        "discounnts": discounts,
+        "is_first": is_first,
+        "is_last": is_last,
+        "is_terminal": is_terminal,
+    }
+
+
+def write_to_hdf5_file(episodes, filename):
+    """Write episodes in EnvLogger format to an HDF5 file."""
+    all_steps = []
+    for episode in episodes:
+        all_steps.append(_convert_envlogger_episode_to_rlds_steps(episode))
+    all_steps = tree.map_structure(lambda *xs: np.concatenate(xs), *all_steps)
+    f = h5py.File(filename, "w")
+    for key in all_steps.keys():
+        f.create_dataset(key, data=all_steps[key], compression="gzip")
+    f.close()
+
+
+def main(_):
+    output_file = _OUTPUT_FILE.value
+    with reader.Reader(data_directory=_DATASET_DIR.value) as r:
+        print(r.observation_spec())
+        print(r.metadata())
+        write_to_hdf5_file(r.episodes, output_file)
+    # Inspecting the created HDF5 file
+    f = h5py.File(output_file, "r")
+    for k in f:
+        print(k, f[k].shape)
+    f.close()
+
+
+if __name__ == "__main__":
+    app.run(main)
diff --git a/d4rl-generation/helpers.py b/d4rl-generation/helpers.py
new file mode 100644
index 00000000..f0ef8afb
--- /dev/null
+++ b/d4rl-generation/helpers.py
@@ -0,0 +1,69 @@
+"""Shared helpers for rl_continuous experiments."""
+from typing import Optional
+from acme import wrappers
+import dm_env
+import gym
+from acme.utils import loggers as acme_loggers
+from absl import logging
+
+_VALID_TASK_SUITES = ("gym", "control")
+
+
+def make_environment(suite: str, task: str, seed=None) -> dm_env.Environment:
+    """Makes the requested continuous control environment.
+    Args:
+      suite: One of 'gym' or 'control'.
+      task: Task to load. If `suite` is 'control', the task must be formatted as
+        f'{domain_name}:{task_name}'
+    Returns:
+      An environment satisfying the dm_env interface expected by Acme agents.
+    """
+
+    if suite not in _VALID_TASK_SUITES:
+        raise ValueError(
+            f"Unsupported suite: {suite}. Expected one of {_VALID_TASK_SUITES}"
+        )
+
+    if suite == "gym":
+        env = gym.make(task)
+        env.seed(seed)
+        # Make sure the environment obeys the dm_env.Environment interface.
+        env = wrappers.GymWrapper(env)
+
+    elif suite == "control":
+        # Load dm_suite lazily not require Mujoco license when not using it.
+        from dm_control import suite as dm_suite  # pylint: disable=g-import-not-at-top
+
+        domain_name, task_name = task.split(":")
+        env = dm_suite.load(domain_name, task_name, task_kwargs={'random': seed})
+        env = wrappers.ConcatObservationWrapper(env)
+
+    # Wrap the environment so the expected continuous action spec is [-1, 1].
+    # Note: this is a no-op on 'control' tasks.
+    env = wrappers.CanonicalSpecWrapper(env, clip=True)
+    env = wrappers.SinglePrecisionWrapper(env)
+    return env
+
+
+def get_default_logger_factory(workdir: str, save_data=True, time_delta: float = 1.0):
+    """Create a custom logger factory for use in the experiment."""
+
+    def logger_factory(label: str, steps_key: Optional[str] = None, task_id: int = 0):
+        del steps_key, task_id
+
+        print_fn = logging.info
+        terminal_logger = acme_loggers.TerminalLogger(label=label, print_fn=print_fn)
+
+        loggers = [terminal_logger]
+
+        if save_data:
+            loggers.append(acme_loggers.CSVLogger(workdir, label=label))
+
+        # Dispatch to all writers and filter Nones and by time.
+        logger = acme_loggers.Dispatcher(loggers, acme_loggers.to_numpy)
+        logger = acme_loggers.NoneFilter(logger)
+        logger = acme_loggers.TimeFilter(logger, time_delta)
+
+        return logger
+
+    return logger_factory
diff --git a/d4rl-generation/logged_experiment.py b/d4rl-generation/logged_experiment.py
new file mode 100644
index 00000000..16f1486a
--- /dev/null
+++ b/d4rl-generation/logged_experiment.py
@@ -0,0 +1,292 @@
+"""Runners used for executing local agents.
+
+This is a fork of the run_experiment.py from Acme.
+The only additional feature added by the fork is to optionally
+wrap the training environment loop with an envlogger.EnvLogger wrapper.
+Note that the wrapper is not applied to the evaluation environment despite 
+sharing the same underlying environment instance.
+
+"""
+
+import sys
+import time
+from typing import Optional, Sequence, Tuple, Callable
+
+import acme
+from acme import core
+from acme import specs
+from acme import types
+from acme.jax import utils
+from acme.jax.experiments import config
+from acme.tf import savers
+from acme.utils import counting
+import dm_env
+import jax
+import reverb
+import envlogger
+
+
+def run_experiment(
+    experiment: config.ExperimentConfig,
+    eval_every: int = 100,
+    num_eval_episodes: int = 1,
+    make_envlogger: Optional[
+        Callable[[dm_env.Environment], envlogger.EnvLogger]
+    ] = None,
+):
+    """Runs a simple, single-threaded training loop using the default evaluators.
+
+    It targets simplicity of the code and so only the basic features of the
+    ExperimentConfig are supported.
+
+    Arguments:
+      experiment: Definition and configuration of the agent to run.
+      eval_every: After how many actor steps to perform evaluation.
+      num_eval_episodes: How many evaluation episodes to execute at each
+        evaluation step.
+    """
+
+    key = jax.random.PRNGKey(experiment.seed)
+
+    # Create the environment and get its spec.
+    environment = experiment.environment_factory(experiment.seed)
+    environment_spec = experiment.environment_spec or specs.make_environment_spec(
+        environment
+    )
+
+    # Create the networks and policy.
+    networks = experiment.network_factory(environment_spec)
+    policy = config.make_policy(
+        experiment=experiment,
+        networks=networks,
+        environment_spec=environment_spec,
+        evaluation=False,
+    )
+
+    # Create the replay server and grab its address.
+    replay_tables = experiment.builder.make_replay_tables(environment_spec, policy)
+
+    # Disable blocking of inserts by tables' rate limiters, as this function
+    # executes learning (sampling from the table) and data generation
+    # (inserting into the table) sequentially from the same thread
+    # which could result in blocked insert making the algorithm hang.
+    replay_tables, rate_limiters_max_diff = _disable_insert_blocking(replay_tables)
+
+    replay_server = reverb.Server(replay_tables, port=None)
+    replay_client = reverb.Client(f"localhost:{replay_server.port}")
+
+    # Parent counter allows to share step counts between train and eval loops and
+    # the learner, so that it is possible to plot for example evaluator's return
+    # value as a function of the number of training episodes.
+    parent_counter = counting.Counter(time_delta=0.0)
+
+    dataset = experiment.builder.make_dataset_iterator(replay_client)
+    # We always use prefetch as it provides an iterator with an additional
+    # 'ready' method.
+    dataset = utils.prefetch(dataset, buffer_size=1)
+
+    # Create actor, adder, and learner for generating, storing, and consuming
+    # data respectively.
+    # NOTE: These are created in reverse order as the actor needs to be given the
+    # adder and the learner (as a source of variables).
+    learner_key, key = jax.random.split(key)
+    learner = experiment.builder.make_learner(
+        random_key=learner_key,
+        networks=networks,
+        dataset=dataset,
+        logger_fn=experiment.logger_factory,
+        environment_spec=environment_spec,
+        replay_client=replay_client,
+        counter=counting.Counter(parent_counter, prefix="learner", time_delta=0.0),
+    )
+
+    adder = experiment.builder.make_adder(replay_client, environment_spec, policy)
+
+    actor_key, key = jax.random.split(key)
+    actor = experiment.builder.make_actor(
+        actor_key, policy, environment_spec, variable_source=learner, adder=adder
+    )
+
+    # Create the environment loop used for training.
+    train_counter = counting.Counter(parent_counter, prefix="actor", time_delta=0.0)
+    train_logger = experiment.logger_factory("actor", train_counter.get_steps_key(), 0)
+
+    checkpointer = None
+    if experiment.checkpointing is not None:
+        checkpointer = savers.Checkpointer(
+            objects_to_save={"learner": learner, "counter": parent_counter},
+            time_delta_minutes=experiment.checkpointing.time_delta_minutes,
+            directory=experiment.checkpointing.directory,
+            add_uid=experiment.checkpointing.add_uid,
+            max_to_keep=experiment.checkpointing.max_to_keep,
+        )
+
+    # Replace the actor with a LearningActor. This makes sure that every time
+    # that `update` is called on the actor it checks to see whether there is
+    # any new data to learn from and if so it runs a learner step. The rate
+    # at which new data is released is controlled by the replay table's
+    # rate_limiter which is created by the builder.make_replay_tables call above.
+    actor = _LearningActor(
+        actor, learner, dataset, replay_tables, rate_limiters_max_diff, checkpointer
+    )
+    if make_envlogger is not None:
+        logged_environment = make_envlogger(environment)
+    else:
+        logged_environment = environment
+
+    train_loop = acme.EnvironmentLoop(
+        logged_environment,
+        actor,
+        counter=train_counter,
+        logger=train_logger,
+        observers=experiment.observers,
+    )
+
+    max_num_actor_steps = (
+        experiment.max_num_actor_steps
+        - parent_counter.get_counts().get(train_counter.get_steps_key(), 0)
+    )
+
+    if num_eval_episodes == 0:
+        # No evaluation. Just run the training loop.
+        train_loop.run(num_steps=max_num_actor_steps)
+        logged_environment.close()
+        return
+
+    # Create the evaluation actor and loop.
+    eval_counter = counting.Counter(parent_counter, prefix="evaluator", time_delta=0.0)
+    eval_logger = experiment.logger_factory(
+        "evaluator", eval_counter.get_steps_key(), 0
+    )
+    eval_policy = config.make_policy(
+        experiment=experiment,
+        networks=networks,
+        environment_spec=environment_spec,
+        evaluation=True,
+    )
+    eval_actor = experiment.builder.make_actor(
+        random_key=jax.random.PRNGKey(experiment.seed),
+        policy=eval_policy,
+        environment_spec=environment_spec,
+        variable_source=learner,
+    )
+    eval_loop = acme.EnvironmentLoop(
+        environment,
+        eval_actor,
+        counter=eval_counter,
+        logger=eval_logger,
+        observers=experiment.observers,
+    )
+
+    steps = 0
+    while steps < max_num_actor_steps:
+        eval_loop.run(num_episodes=num_eval_episodes)
+        steps += train_loop.run(num_steps=eval_every)
+    eval_loop.run(num_episodes=num_eval_episodes)
+    logged_environment.close()
+
+
+class _LearningActor(core.Actor):
+    """Actor which learns (updates its parameters) when `update` is called.
+
+    This combines a base actor and a learner. Whenever `update` is called
+    on the wrapping actor the learner will take a step (e.g. one step of gradient
+    descent) as long as there is data available for training
+    (provided iterator and replay_tables are used to check for that).
+    Selecting actions and making observations are handled by the base actor.
+    Intended to be used by the `run_experiment` only.
+    """
+
+    def __init__(
+        self,
+        actor: core.Actor,
+        learner: core.Learner,
+        iterator: core.PrefetchingIterator,
+        replay_tables: Sequence[reverb.Table],
+        sample_sizes: Sequence[int],
+        checkpointer: Optional[savers.Checkpointer],
+    ):
+        """Initializes _LearningActor.
+
+        Args:
+          actor: Actor to be wrapped.
+          learner: Learner on which step() is to be called when there is data.
+          iterator: Iterator used by the Learner to fetch training data.
+          replay_tables: Collection of tables from which Learner fetches data
+            through the iterator.
+          sample_sizes: For each table from `replay_tables`, how many elements the
+            table should have available for sampling to wait for the `iterator` to
+            prefetch a batch of data. Otherwise more experience needs to be
+            collected by the actor.
+          checkpointer: Checkpointer to save the state on update.
+        """
+        self._actor = actor
+        self._learner = learner
+        self._iterator = iterator
+        self._replay_tables = replay_tables
+        self._sample_sizes = sample_sizes
+        self._learner_steps = 0
+        self._checkpointer = checkpointer
+
+    def select_action(self, observation: types.NestedArray) -> types.NestedArray:
+        return self._actor.select_action(observation)
+
+    def observe_first(self, timestep: dm_env.TimeStep):
+        self._actor.observe_first(timestep)
+
+    def observe(self, action: types.NestedArray, next_timestep: dm_env.TimeStep):
+        self._actor.observe(action, next_timestep)
+
+    def _maybe_train(self):
+        trained = False
+        while True:
+            if self._iterator.ready():
+                self._learner.step()
+                batches = self._iterator.retrieved_elements() - self._learner_steps
+                self._learner_steps += 1
+                assert batches == 1, (
+                    "Learner step must retrieve exactly one element from the iterator"
+                    f" (retrieved {batches}). Otherwise agent can deadlock. Example "
+                    "cause is that your chosen agent"
+                    "s Builder has a `make_learner` "
+                    "factory that prefetches the data but it shouldn"
+                    "t."
+                )
+                trained = True
+            else:
+                # Wait for the iterator to fetch more data from the table(s) only
+                # if there plenty of data to sample from each table.
+                for table, sample_size in zip(self._replay_tables, self._sample_sizes):
+                    if not table.can_sample(sample_size):
+                        return trained
+                # Let iterator's prefetching thread get data from the table(s).
+                time.sleep(0.001)
+
+    def update(self):
+        if self._maybe_train():
+            # Update the actor weights only when learner was updated.
+            self._actor.update()
+        if self._checkpointer:
+            self._checkpointer.save()
+
+
+def _disable_insert_blocking(
+    tables: Sequence[reverb.Table],
+) -> Tuple[Sequence[reverb.Table], Sequence[int]]:
+    """Disables blocking of insert operations for a given collection of tables."""
+    modified_tables = []
+    sample_sizes = []
+    for table in tables:
+        rate_limiter_info = table.info.rate_limiter_info
+        rate_limiter = reverb.rate_limiters.RateLimiter(
+            samples_per_insert=rate_limiter_info.samples_per_insert,
+            min_size_to_sample=rate_limiter_info.min_size_to_sample,
+            min_diff=rate_limiter_info.min_diff,
+            max_diff=sys.float_info.max,
+        )
+        modified_tables.append(table.replace(rate_limiter=rate_limiter))
+        # Target the middle of the rate limiter's insert-sample balance window.
+        sample_sizes.append(
+            max(1, int((rate_limiter_info.max_diff - rate_limiter_info.min_diff) / 2))
+        )
+    return modified_tables, sample_sizes
diff --git a/d4rl-generation/requirements.txt b/d4rl-generation/requirements.txt
new file mode 100644
index 00000000..a17beb15
--- /dev/null
+++ b/d4rl-generation/requirements.txt
@@ -0,0 +1,7 @@
+-f https://storage.googleapis.com/jax-releases/jax_cuda_releases.html
+jax[cuda]
+dm-acme[jax,tf] @ git+https://github.com/deepmind/acme.git
+envlogger
+gym[mujoco]==0.25.1
+dm-control
+mujoco
diff --git a/d4rl-generation/run_sac.py b/d4rl-generation/run_sac.py
new file mode 100644
index 00000000..edf2c1b7
--- /dev/null
+++ b/d4rl-generation/run_sac.py
@@ -0,0 +1,80 @@
+"""Example running SAC on continuous control tasks and generating data via EnvLogger."""
+
+from absl import flags
+from acme import specs
+import tensorflow as tf
+from acme.agents.jax import sac
+from acme.agents.jax.sac import builder
+import helpers
+from absl import app
+from acme.jax import experiments
+import logged_experiment
+import envlogger
+from acme.utils import paths
+
+FLAGS = flags.FLAGS
+
+flags.DEFINE_string("env_name", "gym:HalfCheetah-v4", "What environment to run")
+flags.DEFINE_integer("seed", 0, "Random seed.")
+flags.DEFINE_integer("num_steps", 1_000_0, "Number of env steps to run.")
+flags.DEFINE_integer("eval_every", 5_000, "How often to run evaluation.")
+flags.DEFINE_integer("evaluation_episodes", 10, "Evaluation episodes.")
+flags.DEFINE_string("workdir", None, "Evaluation episodes.")
+flags.DEFINE_string("dataset_dir", None, "Where to save logged interaction")
+flags.mark_flag_as_required("workdir")
+
+
+def build_experiment_config():
+    """Builds SAC experiment config which can be executed in different ways."""
+    # Create an environment, grab the spec, and use it to create networks.
+
+    suite, task = FLAGS.env_name.split(":", 1)
+    environment = helpers.make_environment(suite, task)
+    logger_factory = helpers.get_default_logger_factory(FLAGS.workdir)
+
+    environment_spec = specs.make_environment_spec(environment)
+    network_factory = lambda spec: sac.make_networks(
+        spec, hidden_layer_sizes=(256, 256, 256)
+    )
+
+    # Construct the agent.
+    config = sac.SACConfig(
+        learning_rate=3e-4,
+        n_step=1,
+        min_replay_size=1000,
+        target_entropy=sac.target_entropy_from_env_spec(environment_spec),
+    )
+    sac_builder = builder.SACBuilder(config)
+
+    return experiments.ExperimentConfig(
+        builder=sac_builder,
+        environment_factory=lambda seed: helpers.make_environment(suite, task, seed),
+        network_factory=network_factory,
+        seed=FLAGS.seed,
+        max_num_actor_steps=FLAGS.num_steps,
+        logger_factory=logger_factory,
+    )
+
+
+def wrap_with_envlogger(env, dataset_dir):
+    paths.process_path(dataset_dir, add_uid=False)
+    return envlogger.EnvLogger(env, data_directory=dataset_dir)
+
+
+def main(_):
+    tf.config.set_visible_devices([], "GPU")
+    config = build_experiment_config()
+    if FLAGS.dataset_dir:
+        make_envlogger = lambda env: wrap_with_envlogger(env, FLAGS.dataset_dir)
+    else:
+        make_envlogger = None
+    logged_experiment.run_experiment(
+        experiment=config,
+        eval_every=FLAGS.eval_every,
+        num_eval_episodes=FLAGS.evaluation_episodes,
+        make_envlogger=make_envlogger,
+    )
+
+
+if __name__ == "__main__":
+    app.run(main)