imwithye commited on
Commit
edb87c5
·
1 Parent(s): d8cdf92

add test script

Browse files
rlcube/rlcube/envs/__init__.py CHANGED
@@ -1,18 +1,3 @@
1
  from .cube2 import Cube2Env
2
 
3
  __all__ = ["Cube2Env"]
4
-
5
- if __name__ == "__main__":
6
- print("Testing Cube2Env.from_obs")
7
- env = Cube2Env()
8
- env2 = Cube2Env.from_obs(env.obs())
9
- print(env2.state, env2.is_solved())
10
- env2.print_js_code()
11
- print()
12
-
13
- print("Testing Cube2Env.adjacent_obs")
14
- env = Cube2Env()
15
- adjacent_obs = env.adjacent_obs()
16
- for i in range(12):
17
- env = Cube2Env.from_obs(adjacent_obs[i])
18
- env.print_js_code()
 
1
  from .cube2 import Cube2Env
2
 
3
  __all__ = ["Cube2Env"]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
rlcube/rlcube/envs/cube2.py CHANGED
@@ -242,7 +242,7 @@ class Cube2Env(gym.Env):
242
  for i in range(6):
243
  for j in range(4):
244
  label = int(self.state[i, j])
245
- zeros = np.zeros(6)
246
  zeros[label] = 1
247
  one_hots.append(zeros)
248
  return np.array(one_hots)
 
242
  for i in range(6):
243
  for j in range(4):
244
  label = int(self.state[i, j])
245
+ zeros = np.zeros(6, dtype=np.int8)
246
  zeros[label] = 1
247
  one_hots.append(zeros)
248
  return np.array(one_hots)
rlcube/rlcube/envs/test.py ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from rlcube.envs.cube2 import Cube2Env
2
+
3
+
4
+ if __name__ == "__main__":
5
+ print("Testing Cube2Env.from_obs")
6
+ env = Cube2Env()
7
+ env2 = Cube2Env.from_obs(env.obs())
8
+ print(env2.state, env2.is_solved())
9
+ env2.print_js_code()
10
+ print()
11
+
12
+ print("Testing Cube2Env.adjacent_obs")
13
+ env = Cube2Env()
14
+ adjacent_obs = env.adjacent_obs()
15
+ for i in range(12):
16
+ env = Cube2Env.from_obs(adjacent_obs[i])
17
+ env.print_js_code()
rlcube/rlcube/models/dataset.py CHANGED
@@ -1,5 +1,5 @@
1
  from torch.utils.data import Dataset
2
- from rlcube.envs.cube2 import Cube2
3
  import numpy as np
4
  import torch
5
  from tqdm import tqdm
@@ -12,13 +12,13 @@ def create_dataset(
12
  neighbors = []
13
  D = []
14
  for _ in tqdm(range(num_envs)):
15
- env = Cube2()
16
  obs, _ = env.reset()
17
  for _ in range(num_steps):
18
  action = env.action_space.sample()
19
  obs, _, _, _, _ = env.step(action)
20
  states.append(obs)
21
- neighbors.append(env.neighbors())
22
  D.append(env.step_count)
23
  states = np.array(states)
24
  neighbors = np.array(neighbors)
 
1
  from torch.utils.data import Dataset
2
+ from rlcube.envs import Cube2Env
3
  import numpy as np
4
  import torch
5
  from tqdm import tqdm
 
12
  neighbors = []
13
  D = []
14
  for _ in tqdm(range(num_envs)):
15
+ env = Cube2Env()
16
  obs, _ = env.reset()
17
  for _ in range(num_steps):
18
  action = env.action_space.sample()
19
  obs, _, _, _, _ = env.step(action)
20
  states.append(obs)
21
+ neighbors.append(env.adjacent_obs())
22
  D.append(env.step_count)
23
  states = np.array(states)
24
  neighbors = np.array(neighbors)
rlcube/rlcube/models/models.py CHANGED
@@ -2,7 +2,7 @@ import torch.nn as nn
2
  import torch.nn.functional as F
3
  import torch
4
  from tensordict import TensorDict
5
- from rlcube.envs.cube2 import Cube2
6
  import numpy as np
7
 
8
 
@@ -20,8 +20,8 @@ class Reward(nn.Module):
20
  solved = face_solved.all(dim=1)
21
  return torch.where(
22
  solved,
23
- torch.tensor(1, device=batch_obs.device, dtype=batch_obs.dtype),
24
- torch.tensor(-1, device=batch_obs.device, dtype=batch_obs.dtype),
25
  )
26
 
27
 
@@ -82,7 +82,7 @@ class DNN(nn.Module):
82
 
83
  if __name__ == "__main__":
84
  print("Testing RewardNet")
85
- env = Cube2()
86
  obs, _ = env.reset()
87
  obs1, _, _, _, _ = env.step(1)
88
  obs2, _, _, _, _ = env.step(2)
 
2
  import torch.nn.functional as F
3
  import torch
4
  from tensordict import TensorDict
5
+ from rlcube.envs.cube2 import Cube2Env
6
  import numpy as np
7
 
8
 
 
20
  solved = face_solved.all(dim=1)
21
  return torch.where(
22
  solved,
23
+ torch.tensor(1, device=batch_obs.device, dtype=torch.float32),
24
+ torch.tensor(-1, device=batch_obs.device, dtype=torch.float32),
25
  )
26
 
27
 
 
82
 
83
  if __name__ == "__main__":
84
  print("Testing RewardNet")
85
+ env = Cube2Env()
86
  obs, _ = env.reset()
87
  obs1, _, _, _, _ = env.step(1)
88
  obs2, _, _, _, _ = env.step(2)
rlcube/rlcube/models/test.py ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from rlcube.envs.cube2 import Cube2Env
2
+ from rlcube.models.models import Reward
3
+ import torch
4
+
5
+ if __name__ == "__main__":
6
+ print("Testing Reward")
7
+ env = Cube2Env()
8
+ adjacent_obs = torch.tensor(env.adjacent_obs())
9
+ reward = Reward()(adjacent_obs)
10
+ print("Reward:", reward)
11
+ obs, _, _, _, _ = env.step(0)
12
+ env1 = Cube2Env.from_obs(obs)
13
+ adjacent_obs = env1.adjacent_obs()
14
+ for i in range(12):
15
+ env2 = Cube2Env.from_obs(adjacent_obs[i])
16
+ env2.print_js_code()
17
+ print(env2.is_solved())