imwithye commited on
Commit
3072ff1
·
1 Parent(s): 0b3d91e

auto python state

Browse files
rlcube/main.py CHANGED
@@ -1,7 +1,9 @@
1
- from rlcube.cube2 import train
2
 
3
  def main():
4
- train()
 
 
5
 
6
 
7
  if __name__ == "__main__":
 
1
+ from rlcube.envs.cube2 import Cube2
2
 
3
  def main():
4
+ env = Cube2()
5
+ obs, _ = env.reset()
6
+ print(obs)
7
 
8
 
9
  if __name__ == "__main__":
rlcube/rlcube/envs/cube2.py CHANGED
@@ -2,35 +2,35 @@ from random import shuffle
2
  import gymnasium as gym
3
  import numpy as np
4
 
5
- FRONT = 0
6
- RIGHT = 1
7
- BACK = 2
8
- LEFT = 3
9
- UP = 4
10
- DOWN = 5
11
 
12
  class Cube2(gym.Env):
13
  def __init__(self):
14
  super().__init__()
15
  self.action_space = gym.spaces.Discrete(12)
16
  self.observation_space = gym.spaces.Box(low=0,high=1,shape=(24, 6),dtype=np.int8)
17
- self.state = np.zeros((6, 2, 2))
18
  self.step_count = 0
19
 
20
  def reset(self, seed=None, options=None):
21
  super().reset(seed=seed, options=options)
22
- self.state = np.zeros((6, 2, 2))
23
- self.state[0] = np.ones((2, 2)) * FRONT
24
- self.state[1] = np.ones((2, 2)) * RIGHT
25
- self.state[2] = np.ones((2, 2)) * BACK
26
- self.state[3] = np.ones((2, 2)) * LEFT
27
- self.state[4] = np.ones((2, 2)) * UP
28
- self.state[5] = np.ones((2, 2)) * DOWN
29
- shuffle_steps =self.np_random.integers(0, 20)
30
- for i in range(shuffle_steps):
31
- self.step(self.action_space.sample())
32
  self.step_count = 0
33
- return self._get_obs(), {}
34
 
35
  def step(self, action):
36
  self.step_count += 1
@@ -38,127 +38,175 @@ class Cube2(gym.Env):
38
 
39
  # Front Clockwise
40
  if action == 0:
41
- new_state[RIGHT, 0, 0] = self.state[UP, 0, 0]
42
- new_state[RIGHT, 1, 0] = self.state[UP, 1, 0]
43
- new_state[DOWN, 0, 1] = self.state[RIGHT, 1, 0]
44
- new_state[DOWN, 1, 1] = self.state[RIGHT, 0, 0]
45
- new_state[LEFT, 0, 1] = self.state[DOWN, 0, 1]
46
- new_state[LEFT, 1, 1] = self.state[DOWN, 1, 1]
47
- new_state[UP, 0, 0] = self.state[LEFT, 1, 1]
48
- new_state[UP, 1, 0] = self.state[LEFT, 0, 1]
 
 
 
 
49
  # Front Counter-Clockwise
50
  if action == 1:
51
- new_state[LEFT, 0, 1] = self.state[UP, 1, 0]
52
- new_state[LEFT, 1, 1] = self.state[UP, 0, 0]
53
- new_state[DOWN, 0, 1] = self.state[LEFT, 0, 1]
54
- new_state[DOWN, 1, 1] = self.state[LEFT, 1, 1]
55
- new_state[RIGHT, 0, 0] = self.state[DOWN, 1, 1]
56
- new_state[RIGHT, 1, 0] = self.state[DOWN, 0, 1]
57
- new_state[UP, 0, 0] = self.state[RIGHT, 0, 0]
58
- new_state[UP, 1, 0] = self.state[RIGHT, 1, 0]
59
- # Right Clockwise
 
 
 
 
60
  if action == 2:
61
- new_state[BACK, 0, 0] = self.state[UP, 1, 0]
62
- new_state[BACK, 1, 0] = self.state[UP, 1, 1]
63
- new_state[DOWN, 1, 0] = self.state[BACK, 0, 0]
64
- new_state[DOWN, 1, 1] = self.state[BACK, 1, 0]
65
- new_state[FRONT, 0, 1] = self.state[DOWN, 1, 1]
66
- new_state[FRONT, 1, 1] = self.state[DOWN, 1, 0]
67
- new_state[UP, 1, 0] = self.state[FRONT, 1, 1]
68
- new_state[UP, 1, 1] = self.state[FRONT, 0, 1]
69
- # Right Counter-Clockwise
 
 
 
 
70
  if action == 3:
71
- new_state[FRONT, 0, 1] = self.state[UP, 1, 1]
72
- new_state[FRONT, 1, 1] = self.state[UP, 1, 0]
73
- new_state[DOWN, 1, 1] = self.state[FRONT, 0, 1]
74
- new_state[DOWN, 1, 0] = self.state[FRONT, 1, 1]
75
- new_state[BACK, 0, 0] = self.state[DOWN, 1, 0]
76
- new_state[BACK, 1, 0] = self.state[DOWN, 1, 1]
77
- new_state[UP, 1, 0] = self.state[BACK, 0, 0]
78
- new_state[UP, 1, 1] = self.state[BACK, 1, 0]
79
- # Left Clockwise
 
 
 
 
80
  if action == 4:
81
- new_state[FRONT, 0, 0] = self.state[UP, 0, 1]
82
- new_state[FRONT, 1, 0] = self.state[UP, 0, 0]
83
- new_state[DOWN, 0, 0] = self.state[FRONT, 1, 0]
84
- new_state[DOWN, 0, 1] = self.state[FRONT, 0, 0]
85
- new_state[BACK, 0, 1] = self.state[DOWN, 0, 0]
86
- new_state[BACK, 1, 1] = self.state[DOWN, 0, 1]
87
- new_state[UP, 0, 0] = self.state[BACK, 0, 1]
88
- new_state[UP, 0, 1] = self.state[BACK, 1, 1]
89
- # Left Counter-Clockwise
 
 
 
 
90
  if action == 5:
91
- new_state[BACK, 0, 1] = self.state[UP, 0, 0]
92
- new_state[BACK, 1, 1] = self.state[UP, 0, 1]
93
- new_state[DOWN, 0, 0] = self.state[BACK, 0, 1]
94
- new_state[DOWN, 0, 1] = self.state[BACK, 1, 1]
95
- new_state[FRONT, 0, 0] = self.state[DOWN, 0, 1]
96
- new_state[FRONT, 1, 0] = self.state[DOWN, 0, 0]
97
- new_state[UP, 0, 1] = self.state[FRONT, 0, 0]
98
- new_state[UP, 0, 0] = self.state[FRONT, 1, 0]
99
- # Back Clockwise
 
 
 
 
100
  if action == 6:
101
- new_state[RIGHT, 0, 1] = self.state[DOWN, 1, 0]
102
- new_state[RIGHT, 1, 1] = self.state[DOWN, 0, 0]
103
- new_state[UP, 0, 1] = self.state[RIGHT, 0, 1]
104
- new_state[UP, 1, 1] = self.state[RIGHT, 1, 1]
105
- new_state[LEFT, 0, 0] = self.state[UP, 1, 1]
106
- new_state[LEFT, 1, 0] = self.state[UP, 0, 1]
107
- new_state[DOWN, 0, 0] = self.state[LEFT, 0, 0]
108
- new_state[DOWN, 1, 0] = self.state[LEFT, 1, 0]
109
- # Back Counter-Clockwise
 
 
 
 
110
  if action == 7:
111
- new_state[DOWN, 1, 0] = self.state[RIGHT, 0, 1]
112
- new_state[DOWN, 0, 0] = self.state[RIGHT, 1, 1]
113
- new_state[RIGHT, 0, 1] = self.state[UP, 0, 1]
114
- new_state[RIGHT, 1, 1] = self.state[UP, 1, 1]
115
- new_state[UP, 1, 1] = self.state[LEFT, 0, 0]
116
- new_state[UP, 0, 1] = self.state[LEFT, 1, 0]
117
- new_state[LEFT, 0, 0] = self.state[DOWN, 0, 0]
118
- new_state[LEFT, 1, 0] = self.state[DOWN, 1, 0]
119
- # Up Clockwise
 
 
 
 
120
  if action == 8:
121
- new_state[FRONT, 0, 0] = self.state[RIGHT, 0, 0]
122
- new_state[FRONT, 0, 1] = self.state[RIGHT, 0, 1]
123
- new_state[LEFT, 0, 0] = self.state[FRONT, 0, 0]
124
- new_state[LEFT, 0, 1] = self.state[FRONT, 0, 1]
125
- new_state[BACK, 0, 0] = self.state[LEFT, 0, 0]
126
- new_state[BACK, 0, 1] = self.state[LEFT, 0, 1]
127
- new_state[RIGHT, 0, 0] = self.state[BACK, 0, 0]
128
- new_state[RIGHT, 0, 1] = self.state[BACK, 0, 1]
 
 
 
 
129
  # Up Counter-Clockwise
130
  if action == 9:
131
- new_state[RIGHT, 0, 0] = self.state[FRONT, 0, 0]
132
- new_state[RIGHT, 0, 1] = self.state[FRONT, 0, 1]
133
- new_state[FRONT, 0, 0] = self.state[LEFT, 0, 0]
134
- new_state[FRONT, 0, 1] = self.state[LEFT, 0, 1]
135
- new_state[LEFT, 0, 0] = self.state[BACK, 0, 0]
136
- new_state[LEFT, 0, 1] = self.state[BACK, 0, 1]
137
- new_state[BACK, 0, 0] = self.state[RIGHT, 0, 0]
138
- new_state[BACK, 0, 1] = self.state[RIGHT, 0, 1]
139
- # Down Clockwise
 
 
 
 
140
  if action == 10:
141
- new_state[FRONT, 1, 0] = self.state[LEFT, 1, 0]
142
- new_state[FRONT, 1, 1] = self.state[LEFT, 1, 1]
143
- new_state[LEFT, 1, 0] = self.state[BACK, 1, 0]
144
- new_state[LEFT, 1, 1] = self.state[BACK, 1, 1]
145
- new_state[BACK, 1, 0] = self.state[RIGHT, 1, 0]
146
- new_state[BACK, 1, 1] = self.state[RIGHT, 1, 1]
147
- new_state[RIGHT, 1, 0] = self.state[FRONT, 1, 0]
148
- new_state[RIGHT, 1, 1] = self.state[FRONT, 1, 1]
 
 
 
 
149
  # Down Counter-Clockwise
150
  if action == 11:
151
- new_state[LEFT, 1, 0] = self.state[FRONT, 1, 0]
152
- new_state[LEFT, 1, 1] = self.state[FRONT, 1, 1]
153
- new_state[BACK, 1, 0] = self.state[LEFT, 1, 0]
154
- new_state[BACK, 1, 1] = self.state[LEFT, 1, 1]
155
- new_state[RIGHT, 1, 0] = self.state[BACK, 1, 0]
156
- new_state[RIGHT, 1, 1] = self.state[BACK, 1, 1]
157
- new_state[FRONT, 1, 0] = self.state[RIGHT, 1, 0]
158
- new_state[FRONT, 1, 1] = self.state[RIGHT, 1, 1]
 
 
 
 
159
 
160
  self.state = new_state
161
- return self._get_obs(), 1 if self._is_solved() else -1, self._is_solved(), self.step_count >= 100, {}
162
 
163
  def _get_obs(self):
164
  one_hots = []
@@ -173,6 +221,6 @@ class Cube2(gym.Env):
173
 
174
  def _is_solved(self):
175
  for i in range(6):
176
- if np.mean(self.state[i]) != self.state[i][0][0]:
177
  return False
178
  return True
 
2
  import gymnasium as gym
3
  import numpy as np
4
 
5
+ F = 0
6
+ B = 1
7
+ R = 2
8
+ L = 3
9
+ T = 4
10
+ B = 5
11
 
12
  class Cube2(gym.Env):
13
  def __init__(self):
14
  super().__init__()
15
  self.action_space = gym.spaces.Discrete(12)
16
  self.observation_space = gym.spaces.Box(low=0,high=1,shape=(24, 6),dtype=np.int8)
17
+ self.state = np.zeros((6, 4))
18
  self.step_count = 0
19
 
20
  def reset(self, seed=None, options=None):
21
  super().reset(seed=seed, options=options)
22
+ self.state = np.zeros((6, 4))
23
+ self.state[0] = np.ones(4) * F
24
+ self.state[1] = np.ones(4) * B
25
+ self.state[2] = np.ones(4) * R
26
+ self.state[3] = np.ones(4) * L
27
+ self.state[4] = np.ones(4) * T
28
+ self.state[5] = np.ones(4) * B
29
+ # shuffle_steps =self.np_random.integers(0, 20)
30
+ # for i in range(shuffle_steps):
31
+ # self.step(self.action_space.sample())
32
  self.step_count = 0
33
+ return self.state, {}
34
 
35
  def step(self, action):
36
  self.step_count += 1
 
38
 
39
  # Front Clockwise
40
  if action == 0:
41
+ new_state[F, 0] = self.state[F, 2]
42
+ new_state[F, 1] = self.state[F, 0]
43
+ new_state[F, 2] = self.state[F, 3]
44
+ new_state[F, 3] = self.state[F, 1]
45
+ new_state[R, 1] = self.state[T, 3]
46
+ new_state[R, 3] = self.state[T, 1]
47
+ new_state[L, 1] = self.state[B, 3]
48
+ new_state[L, 3] = self.state[B, 1]
49
+ new_state[T, 1] = self.state[L, 1]
50
+ new_state[T, 3] = self.state[L, 3]
51
+ new_state[B, 1] = self.state[R, 1]
52
+ new_state[B, 3] = self.state[R, 3]
53
  # Front Counter-Clockwise
54
  if action == 1:
55
+ new_state[F, 0] = self.state[F, 1]
56
+ new_state[F, 1] = self.state[F, 3]
57
+ new_state[F, 2] = self.state[F, 0]
58
+ new_state[F, 3] = self.state[F, 2]
59
+ new_state[R, 1] = self.state[B, 1]
60
+ new_state[R, 3] = self.state[B, 3]
61
+ new_state[L, 1] = self.state[T, 1]
62
+ new_state[L, 3] = self.state[T, 3]
63
+ new_state[T, 1] = self.state[R, 3]
64
+ new_state[T, 3] = self.state[R, 1]
65
+ new_state[B, 1] = self.state[L, 3]
66
+ new_state[B, 3] = self.state[L, 1]
67
+ # Back Clockwise
68
  if action == 2:
69
+ new_state[B, 0] = self.state[B, 1]
70
+ new_state[B, 1] = self.state[B, 3]
71
+ new_state[B, 2] = self.state[B, 0]
72
+ new_state[B, 3] = self.state[B, 2]
73
+ new_state[R, 0] = self.state[B, 0]
74
+ new_state[R, 2] = self.state[B, 2]
75
+ new_state[L, 0] = self.state[T, 0]
76
+ new_state[L, 2] = self.state[T, 2]
77
+ new_state[T, 0] = self.state[R, 2]
78
+ new_state[T, 2] = self.state[R, 0]
79
+ new_state[B, 0] = self.state[L, 2]
80
+ new_state[B, 2] = self.state[L, 0]
81
+ # Back Counter-Clockwise
82
  if action == 3:
83
+ new_state[B, 0] = self.state[B, 2]
84
+ new_state[B, 1] = self.state[B, 0]
85
+ new_state[B, 2] = self.state[B, 3]
86
+ new_state[B, 3] = self.state[B, 1]
87
+ new_state[R, 0] = self.state[T, 2]
88
+ new_state[R, 2] = self.state[T, 0]
89
+ new_state[L, 0] = self.state[B, 2]
90
+ new_state[L, 2] = self.state[B, 0]
91
+ new_state[T, 0] = self.state[L, 0]
92
+ new_state[T, 2] = self.state[L, 2]
93
+ new_state[B, 0] = self.state[R, 0]
94
+ new_state[B, 2] = self.state[R, 2]
95
+ # Right Clockwise
96
  if action == 4:
97
+ new_state[F, 2] = self.state[B, 2]
98
+ new_state[F, 3] = self.state[B, 3]
99
+ new_state[B, 2] = self.state[T, 2]
100
+ new_state[B, 3] = self.state[T, 3]
101
+ new_state[R, 0] = self.state[R, 2]
102
+ new_state[R, 1] = self.state[R, 0]
103
+ new_state[R, 2] = self.state[R, 3]
104
+ new_state[R, 3] = self.state[R, 1]
105
+ new_state[T, 2] = self.state[F, 3]
106
+ new_state[T, 3] = self.state[F, 2]
107
+ new_state[B, 2] = self.state[B, 3]
108
+ new_state[B, 3] = self.state[B, 2]
109
+ # Right Counter-Clockwise
110
  if action == 5:
111
+ new_state[F, 2] = self.state[T, 3]
112
+ new_state[F, 3] = self.state[T, 2]
113
+ new_state[B, 2] = self.state[B, 3]
114
+ new_state[B, 3] = self.state[B, 2]
115
+ new_state[R, 0] = self.state[R, 1]
116
+ new_state[R, 1] = self.state[R, 3]
117
+ new_state[R, 2] = self.state[R, 0]
118
+ new_state[R, 3] = self.state[R, 2]
119
+ new_state[T, 2] = self.state[B, 2]
120
+ new_state[T, 3] = self.state[B, 3]
121
+ new_state[B, 2] = self.state[F, 2]
122
+ new_state[B, 3] = self.state[F, 3]
123
+ # Left Clockwise
124
  if action == 6:
125
+ new_state[F, 0] = self.state[T, 1]
126
+ new_state[F, 1] = self.state[T, 0]
127
+ new_state[B, 0] = self.state[B, 1]
128
+ new_state[B, 1] = self.state[B, 0]
129
+ new_state[L, 0] = self.state[L, 1]
130
+ new_state[L, 1] = self.state[L, 3]
131
+ new_state[L, 2] = self.state[L, 0]
132
+ new_state[L, 3] = self.state[L, 2]
133
+ new_state[T, 0] = self.state[B, 0]
134
+ new_state[T, 1] = self.state[B, 1]
135
+ new_state[B, 0] = self.state[F, 0]
136
+ new_state[B, 1] = self.state[F, 1]
137
+ # Left Counter-Clockwise
138
  if action == 7:
139
+ new_state[F, 0] = self.state[B, 0]
140
+ new_state[F, 1] = self.state[B, 1]
141
+ new_state[B, 0] = self.state[T, 0]
142
+ new_state[B, 1] = self.state[T, 1]
143
+ new_state[L, 0] = self.state[L, 2]
144
+ new_state[L, 1] = self.state[L, 0]
145
+ new_state[L, 2] = self.state[L, 3]
146
+ new_state[L, 3] = self.state[L, 1]
147
+ new_state[T, 0] = self.state[F, 1]
148
+ new_state[T, 1] = self.state[F, 0]
149
+ new_state[B, 0] = self.state[B, 1]
150
+ new_state[B, 1] = self.state[B, 0]
151
+ # Top Clockwise
152
  if action == 8:
153
+ new_state[F, 1] = self.state[R, 3]
154
+ new_state[F, 3] = self.state[R, 2]
155
+ new_state[B, 1] = self.state[L, 3]
156
+ new_state[B, 3] = self.state[L, 2]
157
+ new_state[R, 2] = self.state[B, 1]
158
+ new_state[R, 3] = self.state[B, 3]
159
+ new_state[L, 2] = self.state[F, 1]
160
+ new_state[L, 3] = self.state[F, 3]
161
+ new_state[T, 0] = self.state[T, 1]
162
+ new_state[T, 1] = self.state[T, 3]
163
+ new_state[T, 2] = self.state[T, 0]
164
+ new_state[T, 3] = self.state[T, 2]
165
  # Up Counter-Clockwise
166
  if action == 9:
167
+ new_state[F, 1] = self.state[L, 2]
168
+ new_state[F, 3] = self.state[L, 3]
169
+ new_state[B, 1] = self.state[R, 2]
170
+ new_state[B, 3] = self.state[R, 3]
171
+ new_state[R, 2] = self.state[F, 3]
172
+ new_state[R, 3] = self.state[F, 1]
173
+ new_state[L, 2] = self.state[B, 3]
174
+ new_state[L, 3] = self.state[B, 1]
175
+ new_state[T, 0] = self.state[T, 2]
176
+ new_state[T, 1] = self.state[T, 0]
177
+ new_state[T, 2] = self.state[T, 3]
178
+ new_state[T, 3] = self.state[T, 1]
179
+ # Bottom Clockwise
180
  if action == 10:
181
+ new_state[F, 0] = self.state[L, 0]
182
+ new_state[F, 2] = self.state[L, 1]
183
+ new_state[B, 0] = self.state[R, 0]
184
+ new_state[B, 2] = self.state[R, 1]
185
+ new_state[R, 0] = self.state[F, 2]
186
+ new_state[R, 1] = self.state[F, 0]
187
+ new_state[L, 0] = self.state[B, 2]
188
+ new_state[L, 1] = self.state[B, 0]
189
+ new_state[B, 0] = self.state[B, 2]
190
+ new_state[B, 1] = self.state[B, 0]
191
+ new_state[B, 2] = self.state[B, 3]
192
+ new_state[B, 3] = self.state[B, 1]
193
  # Down Counter-Clockwise
194
  if action == 11:
195
+ new_state[F, 0] = self.state[R, 1]
196
+ new_state[F, 2] = self.state[R, 0]
197
+ new_state[B, 0] = self.state[L, 1]
198
+ new_state[B, 2] = self.state[L, 0]
199
+ new_state[R, 0] = self.state[B, 0]
200
+ new_state[R, 1] = self.state[B, 2]
201
+ new_state[L, 0] = self.state[F, 0]
202
+ new_state[L, 1] = self.state[F, 2]
203
+ new_state[B, 0] = self.state[B, 1]
204
+ new_state[B, 1] = self.state[B, 3]
205
+ new_state[B, 2] = self.state[B, 0]
206
+ new_state[B, 3] = self.state[B, 2]
207
 
208
  self.state = new_state
209
+ return self.state, 1 if self._is_solved() else -1, self._is_solved(), self.step_count >= 100, {}
210
 
211
  def _get_obs(self):
212
  one_hots = []
 
221
 
222
  def _is_solved(self):
223
  for i in range(6):
224
+ if np.mean(self.state[i]) != self.state[i][0]:
225
  return False
226
  return True
src/components/state-modal.tsx CHANGED
@@ -55,12 +55,12 @@ export const StateModal = forwardRef<StateModalRef, unknown>((_, ref) => {
55
  <div className="font-mono">[{state[3].map((index) => Index2Color[index]).join(', ')}]</div>
56
  </div>
57
  <div className="flex gap-2 items-center">
58
- <div className="text-sm w-24 font-mont">Up</div>
59
  <div className="font-mono">{JSON.stringify(state[4])}</div>
60
  <div className="font-mono">[{state[4].map((index) => Index2Color[index]).join(', ')}]</div>
61
  </div>
62
  <div className="flex gap-2 items-center">
63
- <div className="text-sm w-24 font-mont">Down</div>
64
  <div className="font-mono">{JSON.stringify(state[5])}</div>
65
  <div className="font-mono">[{state[5].map((index) => Index2Color[index]).join(', ')}]</div>
66
  </div>
 
55
  <div className="font-mono">[{state[3].map((index) => Index2Color[index]).join(', ')}]</div>
56
  </div>
57
  <div className="flex gap-2 items-center">
58
+ <div className="text-sm w-24 font-mont">Top</div>
59
  <div className="font-mono">{JSON.stringify(state[4])}</div>
60
  <div className="font-mono">[{state[4].map((index) => Index2Color[index]).join(', ')}]</div>
61
  </div>
62
  <div className="flex gap-2 items-center">
63
+ <div className="text-sm w-24 font-mont">Bottom</div>
64
  <div className="font-mono">{JSON.stringify(state[5])}</div>
65
  <div className="font-mono">[{state[5].map((index) => Index2Color[index]).join(', ')}]</div>
66
  </div>