Spaces:

ttomy
/

proxy-lite-demo-for-setup

Paused

App Files Files Community

Trisha Tomy commited on Jun 26

Commit

928d95c

1 Parent(s): 7af9344

trying fixes for loading

Browse files

Files changed (1) hide show

src/proxy_lite/browser/browser.py +43 -117

src/proxy_lite/browser/browser.py CHANGED Viewed

@@ -15,7 +15,7 @@ from tenacity import before_sleep_log, retry, stop_after_delay, wait_exponential
 from proxy_lite.browser.bounding_boxes import POI, BoundingBox, Point, annotate_bounding_boxes
 from proxy_lite.logger import logger
-import base64 # Make sure this import is present!
 SELF_CONTAINED_TAGS = [
     # many of these are non-interactive but keeping them anyway
@@ -107,10 +107,8 @@ class BrowserSession:
             viewport={"width": self.viewport_width, "height": self.viewport_height},
         )
         await self.context.new_page()
-        # Set default timeouts for context and page
-        self.context.set_default_timeout(180_000) # Increased based on previous discussions
-        self.current_page.set_default_timeout(180_000) # Increased based on previous discussions
         await stealth_async(self.current_page, StealthConfig(navigator_user_agent=False))
         await self.context.add_init_script(
             path=Path(__file__).with_name("add_custom_select.js"),
@@ -187,31 +185,31 @@ class BrowserSession:
     )
     async def update_poi(self) -> None:
         try:
-            # Step 1: Wait for DOMContentLoaded. This ensures the basic HTML structure is parsed.
-            logger.debug("Attempting wait_for_load_state('domcontentloaded')...")
-            await self.current_page.wait_for_load_state("domcontentloaded", timeout=180000)
-            logger.debug("wait_for_load_state('domcontentloaded') completed.")
-            # Step 2: Wait for the specific text "Account Forecasting" to be visible on the page.
-            # This is a strong indicator that the core content for the task has loaded.
-            target_text = "Account Forecasting"
-            logger.debug(f"Attempting to wait for text: '{target_text}' to be visible...")
-            await self.current_page.wait_for_selector(f"text={target_text}", timeout=180000, state="visible")
-            logger.debug(f"Text '{target_text}' became visible.")
-            # Optional: You can still add a wait for network idle *after* the text is visible
-            # if the page still isn't interactive immediately, but prioritize the text.
-            # try:
-            #     await self.current_page.wait_for_load_state("networkidle", timeout=60000) # Shorter timeout here
-            #     logger.debug("wait_for_load_state('networkidle') completed after text appeared.")
-            # except PlaywrightTimeoutError:
-            #     logger.warning("Network idle state not reached after text appeared, but proceeding.")
         except PlaywrightTimeoutError as e:
             # --- START TEMPORARY DEBUGGING CODE ---
             # This block captures state specifically when a Playwright timeout occurs
             current_url = self.current_page.url if self.current_page else "N/A"
-            logger.error(f"DEBUGGING: Playwright Timeout (180s) during page readiness check for text '{target_text}' at URL: {current_url}")
             html_content = None
             try:
@@ -224,89 +222,17 @@ class BrowserSession:
             screenshot_b64 = "N/A"
             try:
                 if self.current_page:
                     screenshot_bytes = await self.current_page.screenshot(type="jpeg", quality=50)
                     screenshot_b64 = base64.b64encode(screenshot_bytes).decode("utf-8")
                     logger.error(f"DEBUGGING: Base64 Screenshot (truncated) when timeout occurred:\ndata:image/jpeg;base64,{screenshot_b64[:100]}... (full string is much longer)")
-            except Exception as ss_e:
-                logger.error(f"DEBUGGING: Could not take screenshot for debug: {ss_e}")
-            # It's crucial to re-raise the exception so the upstream code knows the operation failed.
-            raise e
-            # --- END TEMPORARY DEBUGGING CODE ---
-        except Exception as e:
-            # This catches any other unexpected errors during the page readiness checks
-            logger.error(f"An unexpected error occurred during page readiness check: {e}")
-            raise # Re-raise to propagate
-        # --- Code below this point will only execute if the page readiness checks pass ---
-        # Ensure this block is at the SAME INDENTATION LEVEL as the 'try' and 'except' above.
-        # Run the bounding box javascript code to highlight the points of interest on the page
-        # This part assumes the page is now ready for interaction and content extraction.
-        page_info = await self.current_page.evaluate(
-            """() => {
-                overwriteDefaultSelectConvergence();
-                return findPOIsConvergence();
-            }""",
-        )
-        # Get the points of interest on the page
-        self.poi_elements = page_info["element_descriptions"]
-        element_centroids = page_info["element_centroids"]
-        try: # This is a new try block for iframe processing
-            # Select all iframes on the page
-            iframes = await self.current_page.query_selector_all("iframe")
-            max_iframes = 10 # Limit the number of iframes to process for performance
-            # Define an asynchronous function to process and filter each iframe
-            tasks = [asyncio.create_task(self.process_iframe(iframe)) for iframe in iframes[:max_iframes]]
-            # Gather results from iframe processing tasks concurrently
-            results = await asyncio.gather(*tasks)
-            # Filter out any None results from iframe processing errors or non-visible iframes
-            filtered_results = [result for result in results if result is not None]
-            iframes_pois = []
-            iframe_offsets = []
-            for poi, offset in filtered_results:
-                iframes_pois.append(poi)
-                iframe_offsets.append(offset)
-            # Combine the points of interest from the iframes with the main page and adjust the centroids
-            for index, iframe_poi in enumerate(iframes_pois):
-                self.poi_elements.extend(iframe_poi["element_descriptions"])
-                for centroid in iframe_poi["element_centroids"]:
-                    # Adjust iframe POI coordinates based on iframe's position on the main page
-                    centroid["x"] += iframe_offsets[index]["x"]
-                    centroid["y"] += iframe_offsets[index]["y"]
-                    centroid["left"] += iframe_offsets[index]["x"]
-                    centroid["top"] += iframe_offsets[index]["y"]
-                    centroid["right"] += iframe_offsets[index]["x"]
-                    centroid["bottom"] += iframe_offsets[index]["y"]
-                element_centroids.extend(iframe_poi["element_centroids"])
-        except Exception as e:
-            logger.error(f"Error in finding iframes: {e}")
-            # Do not re-raise here unless iframe parsing is critical for the main task
-            # Iframes not found is often not a fatal error for core functionality.
-        # Get the centroids of the points of interest
-        self.poi_centroids = [Point(x=xy["x"], y=xy["y"]) for xy in element_centroids]
-        # Create BoundingBox objects for annotation
-        self.bounding_boxes = [BoundingBox(**xy, label=str(i)) for i, xy in enumerate(element_centroids)]
-        # Create POI objects which combine info, centroid, and bounding box
-        self.pois = [
-            POI(info=info, element_centroid=centroid, bounding_box=bbox)
-            for info, centroid, bbox in zip(
-                self.poi_elements,
-                self.poi_centroids,
-                self.bounding_boxes,
-                strict=False, # Use strict=False if lengths might genuinely differ slightly
-            )
-        ]
     @property
     def poi_text(self) -> str:
@@ -444,17 +370,17 @@ class BrowserSession:
             await self.current_page.keyboard.press("Backspace")
-    if __name__ == "__main__":
-        async def dummy_test():
-            async with BrowserSession(headless=False) as s:
-                page = await s.context.new_page()
-                await page.goto("http://google.co.uk")
-                await asyncio.sleep(5)
-                await page.screenshot(path="example.png")
-                await s.update_poi()
-                _, annotated_image = await s.screenshot()
-                with open("output.png", "wb") as f:
-                    f.write(annotated_image)
-        asyncio.run(dummy_test())

 from proxy_lite.browser.bounding_boxes import POI, BoundingBox, Point, annotate_bounding_boxes
 from proxy_lite.logger import logger
+import base64
 SELF_CONTAINED_TAGS = [
     # many of these are non-interactive but keeping them anyway
             viewport={"width": self.viewport_width, "height": self.viewport_height},
         )
         await self.context.new_page()
+        self.context.set_default_timeout(60_000)
+        self.current_page.set_default_timeout(60_000)
         await stealth_async(self.current_page, StealthConfig(navigator_user_agent=False))
         await self.context.add_init_script(
             path=Path(__file__).with_name("add_custom_select.js"),
     )
     async def update_poi(self) -> None:
         try:
+            # Step 1: Wait for network to be idle. This indicates that initial requests have settled.
+            logger.debug("Attempting wait_for_load_state('networkidle')...")
+            await self.current_page.wait_for_load_state("networkidle", timeout=180000) # Increased timeout
+            logger.debug("wait_for_load_state('networkidle') completed.")
+            # Step 2: Wait for the 'loading' class to disappear from the body.
+            # This is a common and effective way to detect when SPAs like Salesforce are visually ready.
+            logger.debug("Attempting wait_for_selector('body:not(.loading)')...")
+            # Removed state="visible" as it's often too strict for 'body' in SPAs,
+            # and 'not(.loading)' implies it should become visible eventually.
+            await self.current_page.wait_for_selector("body:not(.loading)", timeout=180000)
+            logger.debug("wait_for_selector('body:not(.loading)') completed.")
+            # Optional Step 3 (Highly Recommended): If the above still times out,
+            # uncomment and replace with a reliable selector for an interactive element
+            # that only appears after the Salesforce UI is fully loaded and ready for user input.
+            # Example: await self.current_page.wait_for_selector("#some_salesforce_specific_id", timeout=180000, state="visible")
+            # Example: await self.current_page.wait_for_selector("text=App Launcher", timeout=180000, state="visible")
+            # For now, we'll rely on the 'body:not(.loading)' as the primary indicator.
         except PlaywrightTimeoutError as e:
             # --- START TEMPORARY DEBUGGING CODE ---
             # This block captures state specifically when a Playwright timeout occurs
             current_url = self.current_page.url if self.current_page else "N/A"
+            logger.error(f"DEBUGGING: Playwright Timeout (180s) during page readiness check at URL: {current_url}")
             html_content = None
             try:
             screenshot_b64 = "N/A"
             try:
                 if self.current_page:
+                    # Capture screenshot at lower quality (e.g., 50) to keep log size manageable.
+                    # Higher quality might make logs too large for some platforms.
                     screenshot_bytes = await self.current_page.screenshot(type="jpeg", quality=50)
                     screenshot_b64 = base64.b64encode(screenshot_bytes).decode("utf-8")
+                    # Log only a very short snippet of base64 string to confirm it's there
                     logger.error(f"DEBUGGING: Base64 Screenshot (truncated) when timeout occurred:\ndata:image/jpeg;base64,{screenshot_b64[:100]}... (full string is much longer)")
+                # If you want to view the full screenshot locally during development, you can save it:
+                # with open("debug_timeout_full_screenshot.jpeg", "wb") as f:
+                #     f.write(screenshot_bytes)
+                # logger.error("DEBUGGING: Full screenshot saved to debug_timeout
     @property
     def poi_text(self) -> str:
             await self.current_page.keyboard.press("Backspace")
+if __name__ == "__main__":
+    async def dummy_test():
+        async with BrowserSession(headless=False) as s:
+            page = await s.context.new_page()
+            await page.goto("http://google.co.uk")
+            await asyncio.sleep(5)
+            await page.screenshot(path="example.png")
+            await s.update_poi()
+            _, annotated_image = await s.screenshot()
+            with open("output.png", "wb") as f:
+                f.write(annotated_image)
+    asyncio.run(dummy_test())