Unity-Technologies · alexander-zap · May 28, 2025 · May 28, 2025 · May 28, 2025 · May 28, 2025
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -56,9 +56,13 @@ repos:
             (?x)^(
                 .*cs.meta|
                 .*.css|
-                .*.meta
+                .*.meta|
+                .*.asset|
+                .*.prefab|
+                .*.unity|
+                .*.json
             )$
-        args: [--fix=lf]
+        args: [--fix=crlf]
 
     -   id: trailing-whitespace
         name: trailing-whitespace-markdown

diff --git a/colab/Colab_UnityEnvironment_4_SB3VectorEnv.ipynb b/colab/Colab_UnityEnvironment_4_SB3VectorEnv.ipynb
@@ -161,8 +161,8 @@
     "from pathlib import Path\n",
     "from typing import Callable, Any\n",
     "\n",
-    "import gym\n",
-    "from gym import Env\n",
+    "import gymnasium as gym\n",
+    "from gymnasium import Env\n",
     "\n",
     "from stable_baselines3 import PPO\n",
     "from stable_baselines3.common.vec_env import VecMonitor, VecEnv, SubprocVecEnv\n",

diff --git a/docs/Python-Gym-API-Documentation.md b/docs/Python-Gym-API-Documentation.md
@@ -59,18 +59,22 @@ Environment initialization
 #### reset
 
 ```python
- | reset() -> Union[List[np.ndarray], np.ndarray]
+ | reset(*, seed: int | None = None, options: dict[str, Any] | None = None) -> Tuple[np.ndarray, Dict]
 ```
 
-Resets the state of the environment and returns an initial observation.
-Returns: observation (object/list): the initial observation of the
-space.
+Resets the state of the environment and returns an initial observation and info.
+
+**Returns**:
+
+- `observation` _object/list_ - the initial observation of the
+  space.
+- `info` _dict_ - contains auxiliary diagnostic information.
 
 <a name="mlagents_envs.envs.unity_gym_env.UnityToGymWrapper.step"></a>
 #### step
 
 ```python
- | step(action: List[Any]) -> GymStepResult
+ | step(action: Any) -> GymStepResult
 ```
 
 Run one timestep of the environment's dynamics. When end of
@@ -86,14 +90,15 @@ Accepts an action and returns a tuple (observation, reward, done, info).
 
 - `observation` _object/list_ - agent's observation of the current environment
   reward (float/list) : amount of reward returned after previous action
-- `done` _boolean/list_ - whether the episode has ended.
+- `terminated` _boolean/list_ - whether the episode has ended by termination.
+- `truncated` _boolean/list_ - whether the episode has ended by truncation.
 - `info` _dict_ - contains auxiliary diagnostic information.
 
 <a name="mlagents_envs.envs.unity_gym_env.UnityToGymWrapper.render"></a>
 #### render
 
 ```python
- | render(mode="rgb_array")
+ | render()
 ```
 
 Return the latest visual observations.

diff --git a/docs/Python-Gym-API.md b/docs/Python-Gym-API.md
@@ -93,7 +93,7 @@ observation, a single discrete action and a single Agent in the scene.
 Add the following code to the `train_unity.py` file:
 
 ```python
-import gym
+import gymnasium as gym
 
 from baselines import deepq
 from baselines import logger

diff --git a/docs/Python-PettingZoo-API-Documentation.md b/docs/Python-PettingZoo-API-Documentation.md
@@ -21,7 +21,6 @@
     * [action\_space](#mlagents_envs.envs.unity_pettingzoo_base_env.UnityPettingzooBaseEnv.action_space)
     * [side\_channel](#mlagents_envs.envs.unity_pettingzoo_base_env.UnityPettingzooBaseEnv.side_channel)
     * [reset](#mlagents_envs.envs.unity_pettingzoo_base_env.UnityPettingzooBaseEnv.reset)
-    * [seed](#mlagents_envs.envs.unity_pettingzoo_base_env.UnityPettingzooBaseEnv.seed)
     * [render](#mlagents_envs.envs.unity_pettingzoo_base_env.UnityPettingzooBaseEnv.render)
     * [close](#mlagents_envs.envs.unity_pettingzoo_base_env.UnityPettingzooBaseEnv.close)
 
@@ -137,7 +136,7 @@ Initializes a Unity Parallel environment wrapper.
 #### reset
 
 ```python
- | reset() -> Dict[str, Any]
+ | reset(seed: int | None = None, options: dict | None = None) -> Tuple[Dict[str, Any], Dict[str, Dict]]
 ```
 
 Resets the environment.
@@ -207,32 +206,24 @@ of an environment with `env.side_channel[<name-of-channel>]`.
 #### reset
 
 ```python
- | reset()
+ | reset(seed: int | None = None, options: dict | None = None) -> Any
 ```
 
 Resets the environment.
 
-<a name="mlagents_envs.envs.unity_pettingzoo_base_env.UnityPettingzooBaseEnv.seed"></a>
-#### seed
-
-```python
- | seed(seed=None)
-```
-
-Reseeds the environment (making the resulting environment deterministic).
-`reset()` must be called after `seed()`, and before `step()`.
-
 <a name="mlagents_envs.envs.unity_pettingzoo_base_env.UnityPettingzooBaseEnv.render"></a>
 #### render
 
 ```python
- | render(mode="human")
+ | render()
 ```
 
 NOT SUPPORTED.
 
-Displays a rendered frame from the environment, if supported.
-Alternate render modes in the default environments are `'rgb_array'`
+Renders the environment as specified by self.render_mode, if supported.
+
+Render mode can be `human` to display a window.
+Other render modes in the default environments are `'rgb_array'`
 which returns a numpy array and is supported by all environments outside of classic,
 and `'ansi'` which returns the strings printed (specific to classic environments).
 

diff --git a/docs/Python-PettingZoo-API.md b/docs/Python-PettingZoo-API.md
@@ -25,13 +25,13 @@ Here's an example of interacting with wrapped environment:
 
 ```python
 from mlagents_envs.environment import UnityEnvironment
-from mlagents_envs.envs import UnityToPettingZooWrapper
+from mlagents_envs.envs.unity_aec_env import UnityAECEnv
 
 unity_env = UnityEnvironment("StrikersVsGoalie")
-env = UnityToPettingZooWrapper(unity_env)
+env = UnityAECEnv(unity_env)
 env.reset()
 for agent in env.agent_iter():
-    observation, reward, done, info = env.last()
+    observation, reward, terminated, truncated, info = env.last()
     action = policy(observation, agent)
     env.step(action)
 ```

diff --git a/ml-agents-envs/README.md b/ml-agents-envs/README.md
@@ -12,14 +12,6 @@ The LLAPI is used by the trainer implementation in `mlagents`.
 `mlagents_envs` can be used independently of `mlagents` for Python
 communication.
 
-## Installation
-
-Install the `mlagents_envs` package with:
-
-```sh
-python -m pip install mlagents_envs==1.1.0
-```
-
 ## Usage & More Information
 
 See
@@ -42,3 +34,32 @@ scene with the ML-Agents SDK, check out the main
 - Communication between Unity and the Python `UnityEnvironment` is not secure.
 - On Linux, ports are not released immediately after the communication closes.
   As such, you cannot reuse ports right after closing a `UnityEnvironment`.
+
+## Development and publishing (Wargaming artifactory)
+
+Since this package does not seem to be maintained anymore be the official developers, we have forked it to the Wargaming gitlab and are maintaining it there.
+Publishing is done via the [Wargaming artifactory](https://ed.artifactory.wgdp.io:443/artifactory/api/pypi/mlopsbi-pypi/simple).
+
+To contribute to the `mlagents_envs` package, please work on a branch and create a merge request to `master` once ready.
+Once the merge request is approved and merged to `master` branch, a gitlab pipeline will automatically create a new git tag and publish the new version to the Wargaming artifactory.
+
+## Installation (Wargaming artifactory)
+
+Since publishing is done via the Wargaming artifactory, you can use this package as dependency by adding the following to your `pyproject.toml`:
+
+```toml
+[tool.poetry.dependencies]
+mlagents-envs = { version = "^0.1", source = "artifactory" }
+
+[[tool.poetry.source]]
+name = "artifactory"
+url = "https://ed.artifactory.wgdp.io:443/artifactory/api/pypi/mlopsbi-pypi/simple"
+priority = "explicit"
+```
+
+
+Or you can install the `mlagents_envs` package from the Wargaming artifactory using pip:
+
+```bash
+pip install mlagents-envs --extra-index-url https://ed.artifactory.wgdp.io:443/artifactory/api/pypi/mlopsbi-pypi/simple
+```
diff --git a/ml-agents-envs/mlagents_envs/base_env.py b/ml-agents-envs/mlagents_envs/base_env.py
@@ -138,6 +138,30 @@ def __getitem__(self, agent_id: AgentId) -> DecisionStep:
     def __iter__(self) -> Iterator[Any]:
         yield from self.agent_id
 
+    def __add__(self, other: "DecisionSteps") -> "DecisionSteps":
+        assert isinstance(other, DecisionSteps)
+
+        combined_terminal_steps = DecisionSteps(
+            list(np.hstack([self.obs, other.obs])),
+            np.hstack([self.reward, other.reward]),
+            np.hstack([self.agent_id, other.agent_id]),
+            list(np.hstack([self.action_mask, other.action_mask]))
+            if self.action_mask or other.action_mask
+            else None,
+            np.hstack([self.group_id, other.group_id]),
+            np.hstack([self.group_reward, other.group_reward]),
+        )
+        combined_terminal_steps._agent_id_to_index = {
+            **self.agent_id_to_index,
+            # shift index of added termination steps because of appending
+            **{
+                agent_id: index + len(self)
+                for agent_id, index in other.agent_id_to_index.items()
+            },
+        }
+
+        return combined_terminal_steps
+
     @staticmethod
     def empty(spec: "BehaviorSpec") -> "DecisionSteps":
         """
@@ -245,6 +269,28 @@ def __getitem__(self, agent_id: AgentId) -> TerminalStep:
     def __iter__(self) -> Iterator[Any]:
         yield from self.agent_id
 
+    def __add__(self, other: "TerminalSteps") -> "TerminalSteps":
+        assert isinstance(other, TerminalSteps)
+
+        combined_terminal_steps = TerminalSteps(
+            list(np.hstack([self.obs, other.obs])),
+            np.hstack([self.reward, other.reward]),
+            np.hstack([self.interrupted, other.interrupted]),
+            np.hstack([self.agent_id, other.agent_id]),
+            np.hstack([self.group_id, other.group_id]),
+            np.hstack([self.group_reward, other.group_reward]),
+        )
+        combined_terminal_steps._agent_id_to_index = {
+            **self.agent_id_to_index,
+            # shift index of added termination steps because of appending
+            **{
+                agent_id: index + len(self)
+                for agent_id, index in other.agent_id_to_index.items()
+            },
+        }
+
+        return combined_terminal_steps
+
     @staticmethod
     def empty(spec: "BehaviorSpec") -> "TerminalSteps":
         """

diff --git a/ml-agents-envs/mlagents_envs/envs/env_helpers.py b/ml-agents-envs/mlagents_envs/envs/env_helpers.py
@@ -17,7 +17,11 @@ def _unwrap_batch_steps(batch_steps, behavior_name):
     termination_id = [
         _behavior_to_agent_id(behavior_name, i) for i in termination_batch.agent_id
     ]
-    agents = decision_id + termination_id
+    agents = decision_id
+    for id in termination_id:
+        if id not in agents:
+            agents.append(id)
+
     obs = {
         agent_id: [batch_obs[i] for batch_obs in termination_batch.obs]
         for i, agent_id in enumerate(termination_id)
@@ -40,30 +44,46 @@ def _unwrap_batch_steps(batch_steps, behavior_name):
             }
         )
     obs = {k: v if len(v) > 1 else v[0] for k, v in obs.items()}
-    dones = {agent_id: True for agent_id in termination_id}
-    dones.update({agent_id: False for agent_id in decision_id})
     rewards = {
-        agent_id: termination_batch.reward[i]
-        for i, agent_id in enumerate(termination_id)
+        agent_id: decision_batch.reward[i] for i, agent_id in enumerate(decision_id)
     }
     rewards.update(
-        {agent_id: decision_batch.reward[i] for i, agent_id in enumerate(decision_id)}
+        {
+            agent_id: termination_batch.reward[i]
+            for i, agent_id in enumerate(termination_id)
+        }
     )
     cumulative_rewards = {k: v for k, v in rewards.items()}
     infos = {}
+    terminations = {}
+    truncations = {}
     for i, agent_id in enumerate(decision_id):
         infos[agent_id] = {}
         infos[agent_id]["behavior_name"] = behavior_name
         infos[agent_id]["group_id"] = decision_batch.group_id[i]
         infos[agent_id]["group_reward"] = decision_batch.group_reward[i]
+        truncations[agent_id] = False
+        terminations[agent_id] = False
     for i, agent_id in enumerate(termination_id):
         infos[agent_id] = {}
         infos[agent_id]["behavior_name"] = behavior_name
         infos[agent_id]["group_id"] = termination_batch.group_id[i]
         infos[agent_id]["group_reward"] = termination_batch.group_reward[i]
-        infos[agent_id]["interrupted"] = termination_batch.interrupted[i]
+        truncated = bool(termination_batch.interrupted[i])
+        infos[agent_id]["interrupted"] = truncated
+        truncations[agent_id] = truncated
+        terminations[agent_id] = not truncated
     id_map = {agent_id: i for i, agent_id in enumerate(decision_id)}
-    return agents, obs, dones, rewards, cumulative_rewards, infos, id_map
+    return (
+        agents,
+        obs,
+        terminations,
+        truncations,
+        rewards,
+        cumulative_rewards,
+        infos,
+        id_map,
+    )
 
 
 def _parse_behavior(full_behavior):

diff --git a/ml-agents-envs/mlagents_envs/envs/unity_aec_env.py b/ml-agents-envs/mlagents_envs/envs/unity_aec_env.py
@@ -1,5 +1,5 @@
 from typing import Any, Optional
-from gym import error
+from gymnasium import error
 from mlagents_envs.base_env import BaseEnv
 from pettingzoo import AECEnv
 
@@ -53,16 +53,19 @@ def observe(self, agent_id):
         return (
             self._observations[agent_id],
             self._cumm_rewards[agent_id],
-            self._dones[agent_id],
+            self._terminations[agent_id],
+            self._truncations[agent_id],
             self._infos[agent_id],
         )
 
     def last(self, observe=True):
         """
         returns observation, cumulative reward, done, info for the current agent (specified by self.agent_selection)
         """
-        obs, reward, done, info = self.observe(self._agents[self._agent_index])
-        return obs if observe else None, reward, done, info
+        obs, cumm_rewards, terminated, truncated, info = self.observe(
+            self._agents[self._agent_index]
+        )
+        return obs if observe else None, cumm_rewards, terminated, truncated, info
 
     @property
     def agent_selection(self):