Spaces:
Sleeping
Sleeping
add test script
Browse files- rlcube/rlcube/envs/__init__.py +0 -15
- rlcube/rlcube/envs/cube2.py +1 -1
- rlcube/rlcube/envs/test.py +17 -0
- rlcube/rlcube/models/dataset.py +3 -3
- rlcube/rlcube/models/models.py +4 -4
- rlcube/rlcube/models/test.py +17 -0
rlcube/rlcube/envs/__init__.py
CHANGED
|
@@ -1,18 +1,3 @@
|
|
| 1 |
from .cube2 import Cube2Env
|
| 2 |
|
| 3 |
__all__ = ["Cube2Env"]
|
| 4 |
-
|
| 5 |
-
if __name__ == "__main__":
|
| 6 |
-
print("Testing Cube2Env.from_obs")
|
| 7 |
-
env = Cube2Env()
|
| 8 |
-
env2 = Cube2Env.from_obs(env.obs())
|
| 9 |
-
print(env2.state, env2.is_solved())
|
| 10 |
-
env2.print_js_code()
|
| 11 |
-
print()
|
| 12 |
-
|
| 13 |
-
print("Testing Cube2Env.adjacent_obs")
|
| 14 |
-
env = Cube2Env()
|
| 15 |
-
adjacent_obs = env.adjacent_obs()
|
| 16 |
-
for i in range(12):
|
| 17 |
-
env = Cube2Env.from_obs(adjacent_obs[i])
|
| 18 |
-
env.print_js_code()
|
|
|
|
| 1 |
from .cube2 import Cube2Env
|
| 2 |
|
| 3 |
__all__ = ["Cube2Env"]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
rlcube/rlcube/envs/cube2.py
CHANGED
|
@@ -242,7 +242,7 @@ class Cube2Env(gym.Env):
|
|
| 242 |
for i in range(6):
|
| 243 |
for j in range(4):
|
| 244 |
label = int(self.state[i, j])
|
| 245 |
-
zeros = np.zeros(6)
|
| 246 |
zeros[label] = 1
|
| 247 |
one_hots.append(zeros)
|
| 248 |
return np.array(one_hots)
|
|
|
|
| 242 |
for i in range(6):
|
| 243 |
for j in range(4):
|
| 244 |
label = int(self.state[i, j])
|
| 245 |
+
zeros = np.zeros(6, dtype=np.int8)
|
| 246 |
zeros[label] = 1
|
| 247 |
one_hots.append(zeros)
|
| 248 |
return np.array(one_hots)
|
rlcube/rlcube/envs/test.py
ADDED
|
@@ -0,0 +1,17 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from rlcube.envs.cube2 import Cube2Env
|
| 2 |
+
|
| 3 |
+
|
| 4 |
+
if __name__ == "__main__":
|
| 5 |
+
print("Testing Cube2Env.from_obs")
|
| 6 |
+
env = Cube2Env()
|
| 7 |
+
env2 = Cube2Env.from_obs(env.obs())
|
| 8 |
+
print(env2.state, env2.is_solved())
|
| 9 |
+
env2.print_js_code()
|
| 10 |
+
print()
|
| 11 |
+
|
| 12 |
+
print("Testing Cube2Env.adjacent_obs")
|
| 13 |
+
env = Cube2Env()
|
| 14 |
+
adjacent_obs = env.adjacent_obs()
|
| 15 |
+
for i in range(12):
|
| 16 |
+
env = Cube2Env.from_obs(adjacent_obs[i])
|
| 17 |
+
env.print_js_code()
|
rlcube/rlcube/models/dataset.py
CHANGED
|
@@ -1,5 +1,5 @@
|
|
| 1 |
from torch.utils.data import Dataset
|
| 2 |
-
from rlcube.envs
|
| 3 |
import numpy as np
|
| 4 |
import torch
|
| 5 |
from tqdm import tqdm
|
|
@@ -12,13 +12,13 @@ def create_dataset(
|
|
| 12 |
neighbors = []
|
| 13 |
D = []
|
| 14 |
for _ in tqdm(range(num_envs)):
|
| 15 |
-
env =
|
| 16 |
obs, _ = env.reset()
|
| 17 |
for _ in range(num_steps):
|
| 18 |
action = env.action_space.sample()
|
| 19 |
obs, _, _, _, _ = env.step(action)
|
| 20 |
states.append(obs)
|
| 21 |
-
neighbors.append(env.
|
| 22 |
D.append(env.step_count)
|
| 23 |
states = np.array(states)
|
| 24 |
neighbors = np.array(neighbors)
|
|
|
|
| 1 |
from torch.utils.data import Dataset
|
| 2 |
+
from rlcube.envs import Cube2Env
|
| 3 |
import numpy as np
|
| 4 |
import torch
|
| 5 |
from tqdm import tqdm
|
|
|
|
| 12 |
neighbors = []
|
| 13 |
D = []
|
| 14 |
for _ in tqdm(range(num_envs)):
|
| 15 |
+
env = Cube2Env()
|
| 16 |
obs, _ = env.reset()
|
| 17 |
for _ in range(num_steps):
|
| 18 |
action = env.action_space.sample()
|
| 19 |
obs, _, _, _, _ = env.step(action)
|
| 20 |
states.append(obs)
|
| 21 |
+
neighbors.append(env.adjacent_obs())
|
| 22 |
D.append(env.step_count)
|
| 23 |
states = np.array(states)
|
| 24 |
neighbors = np.array(neighbors)
|
rlcube/rlcube/models/models.py
CHANGED
|
@@ -2,7 +2,7 @@ import torch.nn as nn
|
|
| 2 |
import torch.nn.functional as F
|
| 3 |
import torch
|
| 4 |
from tensordict import TensorDict
|
| 5 |
-
from rlcube.envs.cube2 import
|
| 6 |
import numpy as np
|
| 7 |
|
| 8 |
|
|
@@ -20,8 +20,8 @@ class Reward(nn.Module):
|
|
| 20 |
solved = face_solved.all(dim=1)
|
| 21 |
return torch.where(
|
| 22 |
solved,
|
| 23 |
-
torch.tensor(1, device=batch_obs.device, dtype=
|
| 24 |
-
torch.tensor(-1, device=batch_obs.device, dtype=
|
| 25 |
)
|
| 26 |
|
| 27 |
|
|
@@ -82,7 +82,7 @@ class DNN(nn.Module):
|
|
| 82 |
|
| 83 |
if __name__ == "__main__":
|
| 84 |
print("Testing RewardNet")
|
| 85 |
-
env =
|
| 86 |
obs, _ = env.reset()
|
| 87 |
obs1, _, _, _, _ = env.step(1)
|
| 88 |
obs2, _, _, _, _ = env.step(2)
|
|
|
|
| 2 |
import torch.nn.functional as F
|
| 3 |
import torch
|
| 4 |
from tensordict import TensorDict
|
| 5 |
+
from rlcube.envs.cube2 import Cube2Env
|
| 6 |
import numpy as np
|
| 7 |
|
| 8 |
|
|
|
|
| 20 |
solved = face_solved.all(dim=1)
|
| 21 |
return torch.where(
|
| 22 |
solved,
|
| 23 |
+
torch.tensor(1, device=batch_obs.device, dtype=torch.float32),
|
| 24 |
+
torch.tensor(-1, device=batch_obs.device, dtype=torch.float32),
|
| 25 |
)
|
| 26 |
|
| 27 |
|
|
|
|
| 82 |
|
| 83 |
if __name__ == "__main__":
|
| 84 |
print("Testing RewardNet")
|
| 85 |
+
env = Cube2Env()
|
| 86 |
obs, _ = env.reset()
|
| 87 |
obs1, _, _, _, _ = env.step(1)
|
| 88 |
obs2, _, _, _, _ = env.step(2)
|
rlcube/rlcube/models/test.py
ADDED
|
@@ -0,0 +1,17 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from rlcube.envs.cube2 import Cube2Env
|
| 2 |
+
from rlcube.models.models import Reward
|
| 3 |
+
import torch
|
| 4 |
+
|
| 5 |
+
if __name__ == "__main__":
|
| 6 |
+
print("Testing Reward")
|
| 7 |
+
env = Cube2Env()
|
| 8 |
+
adjacent_obs = torch.tensor(env.adjacent_obs())
|
| 9 |
+
reward = Reward()(adjacent_obs)
|
| 10 |
+
print("Reward:", reward)
|
| 11 |
+
obs, _, _, _, _ = env.step(0)
|
| 12 |
+
env1 = Cube2Env.from_obs(obs)
|
| 13 |
+
adjacent_obs = env1.adjacent_obs()
|
| 14 |
+
for i in range(12):
|
| 15 |
+
env2 = Cube2Env.from_obs(adjacent_obs[i])
|
| 16 |
+
env2.print_js_code()
|
| 17 |
+
print(env2.is_solved())
|