-
Notifications
You must be signed in to change notification settings - Fork 13
/
LockGaussian.py
49 lines (39 loc) · 1.71 KB
/
LockGaussian.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
import numpy as np
import gym
from gym.spaces import MultiBinary, Discrete, Box
import LockBernoulli
class LockGaussian(LockBernoulli.LockBernoulli):
"""A (stochastic) combination lock environment.
Here the feature vector is hit with a random rotation and augmented with gaussian noise.
x = Rs + eps where s is the one-hot encoding of the state.
Can configure the length, dimension, and switching probability via env_config"""
def __init__(self,env_config={}):
super(LockGaussian,self).__init__(env_config=env_config)
self.initialized=False
def init(self,env_config={}):
super(LockGaussian,self).init(env_config=env_config)
self.noise = None
if 'noise' in env_config.keys():
self.noise = env_config['noise']
self.rotation = np.matrix(np.eye(self.observation_space.n))
def make_obs(self, s):
if self.tabular:
return np.array([s,self.h])
else:
gaussian = np.zeros((self.observation_space.n,))
if self.noise is not None:
gaussian = gym.spaces.np_random.normal(0,self.noise,[self.observation_space.n])
gaussian[s] += 1
x = (self.rotation*np.matrix(gaussian).T).T
return np.reshape(np.array(x), x.shape[1])
if __name__=='__main__':
env = LockGaussian()
env.init(env_config={'horizon':2,'dimension': 2, 'tabular': False, 'swap': 0.0, 'noise':0.1})
for t in range(20):
o = env.reset()
done = False
while not done:
env.render()
print(o)
(o,r,done,blah) = env.step(gym.spaces.np_random.randint(low=0,high=env.action_space.n,size=1))
print("End of episode: r=%d" % (r))