Spaces:
Sleeping
Sleeping
File size: 3,294 Bytes
740a870 de1c670 740a870 04cb4b7 de1c670 04cb4b7 de1c670 740a870 de1c670 740a870 04cb4b7 740a870 de1c670 740a870 04cb4b7 de1c670 04cb4b7 740a870 04cb4b7 de1c670 740a870 de1c670 04cb4b7 de1c670 740a870 04cb4b7 de1c670 04cb4b7 740a870 de1c670 04cb4b7 740a870 de1c670 04cb4b7 740a870 04cb4b7 740a870 04cb4b7 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 |
import gradio as gr
import pandas as pd
import duckdb
import requests
import re
import os
from io import BytesIO
# π Read Together API key from Hugging Face Secrets
TOGETHER_API_KEY = os.getenv("TOGETHER_API_KEY")
# π§ Generate SQL from Prompt
def generate_sql_from_prompt(prompt, df):
schema = ", ".join([f"{col} ({str(dtype)})" for col, dtype in df.dtypes.items()])
full_prompt = f"""
You are a SQL expert. Here is a table called 'df' with the following schema:
{schema}
User question: "{prompt}"
Write a valid SQL query using the 'df' table. Return only the SQL code.
"""
url = "https://api.together.xyz/v1/chat/completions"
headers = {
"Authorization": f"Bearer {TOGETHER_API_KEY}",
"Content-Type": "application/json"
}
payload = {
"model": "mistralai/Mixtral-8x7B-Instruct-v0.1",
"messages": [{"role": "user", "content": full_prompt}],
"temperature": 0.2,
"max_tokens": 200
}
try:
response = requests.post(url, headers=headers, json=payload)
response.raise_for_status()
result = response.json()
return result['choices'][0]['message']['content'].strip("```sql").strip("```").strip()
except Exception as e:
return f"Error in API call: {str(e)}"
# π§½ Clean SQL for DuckDB
def clean_sql_for_duckdb(sql, df_columns):
sql = sql.replace("`", '"')
for col in df_columns:
if " " in col and f'"{col}"' not in sql:
pattern = r'\b' + re.escape(col) + r'\b'
sql = re.sub(pattern, f'"{col}"', sql)
return sql
# π Global dataframe holder
df_global = {"df": None}
# π Upload handler
def upload_excel(file):
if file is None:
return "β No file uploaded."
try:
df = pd.read_excel(BytesIO(file.read()))
df_global["df"] = df
return f"β
Uploaded file with shape: {df.shape}"
except Exception as e:
return f"β Failed to load file: {e}"
# π¬ Handle prompt
def handle_query(prompt):
df = df_global.get("df")
if df is None:
return "β Please upload an Excel file first.", pd.DataFrame()
try:
sql = generate_sql_from_prompt(prompt, df)
if sql.startswith("Error"):
return sql, pd.DataFrame()
cleaned_sql = clean_sql_for_duckdb(sql, df.columns)
result_df = duckdb.query(cleaned_sql).to_df()
return f"π Generated SQL:\n{sql}", result_df
except Exception as e:
return f"β Error: {e}", pd.DataFrame()
# π¨ UI
with gr.Blocks() as demo:
gr.Markdown("# π€ SQL Chatbot with Together API + DuckDB")
file_input = gr.File(label="π Upload Excel File (.xlsx only)", file_types=[".xlsx"])
upload_status = gr.Textbox(label="Status", interactive=False)
with gr.Row():
prompt_box = gr.Textbox(label="π¬ Your Question", placeholder="e.g., Show me total sales by region")
run_button = gr.Button("π Generate SQL + Run")
sql_output = gr.Textbox(label="π SQL Query")
result_table = gr.Dataframe(label="π Query Results")
file_input.change(upload_excel, inputs=file_input, outputs=upload_status)
run_button.click(handle_query, inputs=prompt_box, outputs=[sql_output, result_table])
if __name__ == "__main__":
demo.launch()
|