Spaces:

cross-entropy-ai
/

rlcube

Sleeping

App Files Files Community

imwithye commited on Sep 20

Commit

8b59b01

1 Parent(s): f02352c

implement search

Browse files

Files changed (2) hide show

rlcube/cube2.ipynb +167 -58
rlcube/rlcube/models/search.py +95 -0

rlcube/cube2.ipynb CHANGED Viewed

@@ -40,7 +40,6 @@
    "source": [
     "from rlcube.models.models import DNN\n",
     "from rlcube.envs.cube2 import Cube2Env\n",
-    "import numpy as np\n",
     "import torch\n",
     "\n",
     "net = DNN()\n",
@@ -50,81 +49,191 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 9,
    "id": "16736f3a",
    "metadata": {},
    "outputs": [
     {
-     "name": "stdout",
      "output_type": "stream",
      "text": [
-      "rotationController.setState([[0, 0, 4, 4], [1, 1, 5, 5], [2, 2, 2, 2], [3, 3, 3, 3], [4, 4, 1, 1], [5, 5, 0, 0]]);\n",
-      "0.40487873554229736\n",
-      "4\n",
-      "\n",
-      "rotationController.setState([[0, 4, 0, 4], [1, 1, 5, 5], [2, 5, 2, 0], [3, 4, 3, 1], [4, 2, 1, 2], [5, 3, 0, 3]]);\n",
-      "0.0839405208826065\n",
-      "7\n",
-      "\n",
-      "rotationController.setState([[0, 4, 0, 4], [5, 1, 5, 1], [1, 5, 4, 0], [0, 4, 5, 1], [3, 2, 3, 2], [2, 3, 2, 3]]);\n",
-      "-0.23320673406124115\n",
-      "3\n",
-      "\n",
-      "rotationController.setState([[0, 5, 0, 1], [5, 4, 5, 0], [1, 5, 4, 4], [0, 4, 1, 1], [3, 3, 2, 2], [2, 3, 2, 3]]);\n",
-      "0.31869572401046753\n",
-      "0\n",
-      "\n",
-      "rotationController.setState([[5, 5, 1, 1], [4, 4, 0, 0], [5, 5, 4, 4], [0, 0, 1, 1], [3, 3, 2, 2], [3, 3, 2, 2]]);\n",
-      "-0.16905824840068817\n",
-      "7\n",
-      "\n",
-      "rotationController.setState([[5, 4, 1, 4], [4, 1, 0, 1], [5, 5, 4, 0], [0, 0, 5, 1], [3, 2, 3, 2], [3, 3, 2, 2]]);\n",
-      "0.20266102254390717\n",
-      "3\n",
-      "\n",
-      "rotationController.setState([[2, 3, 1, 4], [3, 3, 0, 1], [5, 5, 4, 0], [0, 1, 0, 5], [4, 1, 3, 2], [5, 4, 2, 2]]);\n",
-      "0.6111429333686829\n",
-      "3\n",
-      "\n",
-      "rotationController.setState([[2, 0, 1, 4], [3, 5, 0, 0], [5, 5, 3, 1], [0, 1, 3, 4], [1, 2, 4, 3], [5, 4, 2, 2]]);\n",
-      "1.3550236225128174\n",
-      "2\n",
-      "\n",
-      "rotationController.setState([[0, 0, 1, 4], [5, 5, 5, 0], [1, 2, 3, 1], [0, 3, 3, 4], [1, 2, 4, 3], [2, 5, 2, 4]]);\n",
-      "0.9975889325141907\n",
-      "7\n",
-      "\n",
-      "rotationController.setState([[2, 0, 1, 4], [3, 5, 0, 0], [5, 5, 3, 1], [0, 1, 3, 4], [1, 2, 4, 3], [5, 4, 2, 2]]);\n",
-      "1.3550236225128174\n",
-      "2\n",
-      "\n"
      ]
     }
    ],
    "source": [
-    "batch_obs = []\n",
-    "env = Cube2Env()\n",
-    "for _ in range(10):\n",
-    "    obs, _, _, _, _ = env.step(env.action_space.sample())\n",
-    "    batch_obs.append(torch.tensor(obs, dtype=torch.float32))\n",
-    "batched_obs = torch.stack(batch_obs)\n",
-    "out = net(batched_obs)\n",
     "\n",
-    "for i in range(10):\n",
-    "    env = Cube2Env.from_obs(batch_obs[i])\n",
-    "    env.print_js_code()\n",
-    "    print(out[\"value\"][i].item())\n",
-    "    print(torch.argmax(out[\"policy\"][i]).item())\n",
-    "    print()"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": null,
    "id": "aee2a911",
    "metadata": {},
    "outputs": [],
-   "source": []
   }
  ],
  "metadata": {

    "source": [
     "from rlcube.models.models import DNN\n",
     "from rlcube.envs.cube2 import Cube2Env\n",
     "import torch\n",
     "\n",
     "net = DNN()\n",
   },
   {
    "cell_type": "code",
+   "execution_count": 2,
    "id": "16736f3a",
    "metadata": {},
    "outputs": [
     {
+     "name": "stderr",
      "output_type": "stream",
      "text": [
+      "100%|██████████| 300/300 [00:02<00:00, 132.06it/s]\n"
      ]
     }
    ],
    "source": [
+    "from rlcube.models.search import MonteCarloTree\n",
     "\n",
+    "env = Cube2Env()\n",
+    "actions = []\n",
+    "for _ in range(3):\n",
+    "    action = env.action_space.sample()\n",
+    "    actions.append(action)\n",
+    "    env.step(action)\n",
+    "tree = MonteCarloTree(env.obs())"
    ]
   },
   {
    "cell_type": "code",
+   "execution_count": 3,
    "id": "aee2a911",
    "metadata": {},
    "outputs": [],
+   "source": [
+    "node = tree.root"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "id": "048f58c9",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "[np.int64(8), np.int64(1), np.int64(4)]"
+      ]
+     },
+     "execution_count": 4,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "actions"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "id": "00994021",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "tensor([3.4725e+00, 3.3189e+00, 1.2619e-02, 3.1231e-01, 1.1286e-02, 2.5817e-02,\n",
+       "        1.6722e-02, 2.1334e-02, 3.4603e+00, 7.5021e-02, 2.5891e-02, 2.8712e-03])"
+      ]
+     },
+     "execution_count": 5,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "node.u()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "id": "fb9ac54c",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "defaultdict(<function rlcube.models.search.Node.__init__.<locals>.<lambda>()>,\n",
+       "            {0: 276,\n",
+       "             1: 7,\n",
+       "             2: 0,\n",
+       "             3: 0,\n",
+       "             4: 0,\n",
+       "             5: 0,\n",
+       "             6: 0,\n",
+       "             7: 0,\n",
+       "             8: 16,\n",
+       "             9: 0,\n",
+       "             10: 0,\n",
+       "             11: 0})"
+      ]
+     },
+     "execution_count": 6,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "node.N"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "id": "2f8a09d1",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "defaultdict(<function rlcube.models.search.Node.__init__.<locals>.<lambda>()>,\n",
+       "            {0: tensor([3.4720]),\n",
+       "             1: tensor([1.8959]),\n",
+       "             2: 0,\n",
+       "             3: 0,\n",
+       "             4: 0,\n",
+       "             5: 0,\n",
+       "             6: 0,\n",
+       "             7: 0,\n",
+       "             8: tensor([2.7285]),\n",
+       "             9: 0,\n",
+       "             10: 0,\n",
+       "             11: 0})"
+      ]
+     },
+     "execution_count": 7,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "node.W"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "id": "3e341459",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "defaultdict(<function rlcube.models.search.Node.__init__.<locals>.<lambda>()>,\n",
+       "            {0: 4,\n",
+       "             1: 0,\n",
+       "             2: 0,\n",
+       "             3: 0,\n",
+       "             4: 0,\n",
+       "             5: 2,\n",
+       "             6: 0,\n",
+       "             7: 0,\n",
+       "             8: 269,\n",
+       "             9: 0,\n",
+       "             10: 0,\n",
+       "             11: 0})"
+      ]
+     },
+     "execution_count": 8,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "node.children[0].N"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "51dddf56",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "node.children[8].N"
+   ]
   }
  ],
  "metadata": {

rlcube/rlcube/models/search.py ADDED Viewed

	@@ -0,0 +1,95 @@

+from collections import defaultdict
+import torch
+from rlcube.models.models import DNN
+from rlcube.envs.cube2 import Cube2Env
+from tqdm import tqdm
+net = DNN()
+net.load("models/model_best.pth")
+net.eval()
+class Node:
+    def __init__(self, obs, parent=None):
+        self.obs = torch.tensor(obs, dtype=torch.float32)
+        self.parent = parent
+        out = net(self.obs.unsqueeze(0))
+        value = out["value"].detach()
+        policy = torch.softmax(out["policy"].detach(), dim=1)
+        self.is_solved = Cube2Env.from_obs(obs).is_solved()
+        self.value = torch.tensor(1) if self.is_solved else value.view(-1)
+        self.policy = policy.view(-1)
+        self.children = defaultdict(lambda: None)
+        self.N = defaultdict(lambda: 0)
+        self.W = defaultdict(lambda: 0)
+    def is_leaf(self):
+        return len(self.children) == 0
+    def u(self):
+        c = 1.414
+        n_sum = torch.sum(torch.tensor([self.N[action] for action in range(12)]))
+        u = torch.tensor(
+            [
+                c
+                * self.policy[action].item()
+                * torch.sqrt(n_sum)
+                / (self.N[action] + 1)
+                + self.W[action]
+                for action in range(12)
+            ]
+        )
+        return u
+    def select_action(self):
+        return torch.argmax(self.u()).item()
+class MonteCarloTree:
+    def __init__(self, obs, max_simulations=300):
+        self.obs = obs
+        self.max_simulations = max_simulations
+        self.root = Node(obs)
+        self.nodes = [self.root]
+        self.is_solved = False
+        self._build()
+    def _build(self):
+        for _ in tqdm(range(self.max_simulations)):
+            if self.is_solved:
+                break
+            node = self.root
+            path = []
+            # Selection
+            while not node.is_leaf():
+                action = node.select_action()
+                path.append((node, action))
+                node = node.children[action]
+            # Expansion
+            env = Cube2Env.from_obs(node.obs)
+            adjacent_obs = env.adjacent_obs()
+            for i in range(12):
+                obs = adjacent_obs[i]
+                child = Node(obs, node)
+                node.children[i] = child
+                self.nodes.append(child)
+                self.is_solved = self.is_solved or child.is_solved
+            # Backup
+            for parent, action in reversed(path):
+                parent.N[action] += 1
+                parent.W[action] = max(parent.W[action], node.value)
+if __name__ == "__main__":
+    env = Cube2Env()
+    for _ in range(3):
+        env.step(env.action_space.sample())
+    tree = MonteCarloTree(env.obs())
+    print(tree.is_solved)