File size: 3,294 Bytes
740a870
de1c670
 
 
 
 
 
740a870
04cb4b7
 
de1c670
04cb4b7
de1c670
 
 
 
 
 
 
 
 
740a870
 
de1c670
 
 
 
 
 
 
 
 
 
 
740a870
04cb4b7
 
 
 
 
 
 
740a870
de1c670
 
 
 
 
 
 
 
740a870
04cb4b7
 
 
 
de1c670
04cb4b7
 
 
 
 
 
 
 
740a870
04cb4b7
 
 
de1c670
 
740a870
de1c670
 
04cb4b7
 
 
de1c670
 
 
 
 
740a870
04cb4b7
de1c670
 
04cb4b7
 
740a870
de1c670
04cb4b7
 
740a870
de1c670
04cb4b7
740a870
04cb4b7
 
740a870
 
04cb4b7
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
import gradio as gr
import pandas as pd
import duckdb
import requests
import re
import os
from io import BytesIO

# πŸ” Read Together API key from Hugging Face Secrets
TOGETHER_API_KEY = os.getenv("TOGETHER_API_KEY")

# 🧠 Generate SQL from Prompt
def generate_sql_from_prompt(prompt, df):
    schema = ", ".join([f"{col} ({str(dtype)})" for col, dtype in df.dtypes.items()])
    full_prompt = f"""
You are a SQL expert. Here is a table called 'df' with the following schema:
{schema}

User question: "{prompt}"

Write a valid SQL query using the 'df' table. Return only the SQL code.
"""

    url = "https://api.together.xyz/v1/chat/completions"
    headers = {
        "Authorization": f"Bearer {TOGETHER_API_KEY}",
        "Content-Type": "application/json"
    }
    payload = {
        "model": "mistralai/Mixtral-8x7B-Instruct-v0.1",
        "messages": [{"role": "user", "content": full_prompt}],
        "temperature": 0.2,
        "max_tokens": 200
    }

    try:
        response = requests.post(url, headers=headers, json=payload)
        response.raise_for_status()
        result = response.json()
        return result['choices'][0]['message']['content'].strip("```sql").strip("```").strip()
    except Exception as e:
        return f"Error in API call: {str(e)}"

# 🧽 Clean SQL for DuckDB
def clean_sql_for_duckdb(sql, df_columns):
    sql = sql.replace("`", '"')
    for col in df_columns:
        if " " in col and f'"{col}"' not in sql:
            pattern = r'\b' + re.escape(col) + r'\b'
            sql = re.sub(pattern, f'"{col}"', sql)
    return sql

# πŸ”„ Global dataframe holder
df_global = {"df": None}

# πŸ“‚ Upload handler
def upload_excel(file):
    if file is None:
        return "❌ No file uploaded."
    try:
        df = pd.read_excel(BytesIO(file.read()))
        df_global["df"] = df
        return f"βœ… Uploaded file with shape: {df.shape}"
    except Exception as e:
        return f"❌ Failed to load file: {e}"

# πŸ’¬ Handle prompt
def handle_query(prompt):
    df = df_global.get("df")
    if df is None:
        return "❌ Please upload an Excel file first.", pd.DataFrame()

    try:
        sql = generate_sql_from_prompt(prompt, df)
        if sql.startswith("Error"):
            return sql, pd.DataFrame()

        cleaned_sql = clean_sql_for_duckdb(sql, df.columns)
        result_df = duckdb.query(cleaned_sql).to_df()
        return f"πŸ“œ Generated SQL:\n{sql}", result_df
    except Exception as e:
        return f"❌ Error: {e}", pd.DataFrame()

# 🎨 UI
with gr.Blocks() as demo:
    gr.Markdown("# πŸ€– SQL Chatbot with Together API + DuckDB")
    file_input = gr.File(label="πŸ“‚ Upload Excel File (.xlsx only)", file_types=[".xlsx"])
    upload_status = gr.Textbox(label="Status", interactive=False)

    with gr.Row():
        prompt_box = gr.Textbox(label="πŸ’¬ Your Question", placeholder="e.g., Show me total sales by region")
        run_button = gr.Button("πŸš€ Generate SQL + Run")

    sql_output = gr.Textbox(label="πŸ“œ SQL Query")
    result_table = gr.Dataframe(label="πŸ“Š Query Results")

    file_input.change(upload_excel, inputs=file_input, outputs=upload_status)
    run_button.click(handle_query, inputs=prompt_box, outputs=[sql_output, result_table])

if __name__ == "__main__":
    demo.launch()