Spaces:

derek-thomas
/

arabic-RAG

Paused

App Files Files Community

derek-thomas commited on Nov 16, 2023

Commit

7dfa79f

1 Parent(s): 37df3b9

Using gr.Warning, and adding jais generate time.

Browse files

Files changed (1) hide show

backend/query_llm.py +19 -26

backend/query_llm.py CHANGED Viewed

@@ -1,10 +1,16 @@
 import datetime
 import os
 from os import getenv
 import gradio as gr
 import requests
 API_URL = getenv('API_URL')
 BEARER = getenv('BEARER')
@@ -22,56 +28,43 @@ def call_jais(payload):
     except requests.exceptions.HTTPError as http_err:
         # Check if the error is a 5XX server error
         if 500 <= http_err.response.status_code < 600:
-            raise gr.Error("The endpoint is loading, it takes about 4 min from the first call.")
         else:
-            raise gr.Error(f"An error occurred while processing the request. {http_err}")
     except Exception as err:
-        raise gr.Error(f"Check Inference Endpoint Status. An error occurred while processing the request. {err}")
 def generate(prompt: str):
     payload = {'inputs': '', 'prompt': prompt}
     response = call_jais(payload)
-    return response
-# Global variable to store the previous status and the time when it changed
-previous_status = None
-status_change_time = None
 def check_endpoint_status():
-    global previous_status, status_change_time
     api_url = os.getenv("ENDPOINT_URL")
     headers = {
         'accept': 'application/json',
         'Authorization': f'Bearer {os.getenv("BEARER")}'
-        }
     try:
         response = requests.get(api_url, headers=headers)
-        response.raise_for_status()  # will throw an exception for non-200 status
         data = response.json()
         # Extracting the status information
         status = data.get('status', {}).get('state', 'No status found')
         message = data.get('status', {}).get('message', 'No message found')
-        # Check if the status has changed
-        if status != previous_status:
-            previous_status = status
-            status_change_time = datetime.datetime.now()
-        # If the previous status was 'scaled to zero' and the current one isn't,
-        # start the countdown
-        countdown_message = ""
-        if status_change_time and previous_status == "scaled to zero" and status != "scaled to zero":
-            elapsed_time = datetime.datetime.now() - status_change_time
-            if elapsed_time < datetime.timedelta(minutes=4):
-                remaining_time = datetime.timedelta(minutes=4) - elapsed_time
-                countdown_message = f"Countdown: {remaining_time} remaining until fully operational."
-        return f"Status: {status}\nMessage: {message}\n{countdown_message}"
     except requests.exceptions.RequestException as e:
         return f"Failed to get status: {str(e)}"

 import datetime
+import logging
 import os
 from os import getenv
+import time
 import gradio as gr
 import requests
+# Setting up the logging
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
 API_URL = getenv('API_URL')
 BEARER = getenv('BEARER')
     except requests.exceptions.HTTPError as http_err:
         # Check if the error is a 5XX server error
         if 500 <= http_err.response.status_code < 600:
+            raise gr.Warning("The endpoint is loading, it takes about 4 min from the first call.")
         else:
+            raise gr.Warning(f"An error occurred while processing the request. {http_err}")
     except Exception as err:
+        raise gr.Warning(f"Check Inference Endpoint Status. An error occurred while processing the request. {err}")
 def generate(prompt: str):
+    start_time = time.perf_counter()
     payload = {'inputs': '', 'prompt': prompt}
     response = call_jais(payload)
+    end_time = time.perf_counter()
+    elapsed_time = end_time - start_time
+    logger.warning(f"Function took {elapsed_time:.1f} seconds to execute")
+    return response
 def check_endpoint_status():
+    # Replace with the actual API URL and headers
     api_url = os.getenv("ENDPOINT_URL")
     headers = {
         'accept': 'application/json',
         'Authorization': f'Bearer {os.getenv("BEARER")}'
+    }
     try:
         response = requests.get(api_url, headers=headers)
+        response.raise_for_status()
         data = response.json()
         # Extracting the status information
         status = data.get('status', {}).get('state', 'No status found')
         message = data.get('status', {}).get('message', 'No message found')
+        return f"Status: {status}\nMessage: {message}"
     except requests.exceptions.RequestException as e:
         return f"Failed to get status: {str(e)}"