Spaces:

AliInamdar
/

SQL-ChatBot

Sleeping

File size: 2,754 Bytes

17225cb
 
 
2fba167
17225cb
 
 
8a08c4f
 
 
 
 
0cd2878
 
 
64b7268
0cd2878
64b7268
17225cb
0cd2878
 
 
 
 
 
 
 
17225cb
 
0cd2878
 
 
 
 
807b1cf
0cd2878
807b1cf
 
 
0cd2878
 
 
 
807b1cf
454008d
0cd2878
 
 
 
adca139
0cd2878
 
17225cb
0cd2878
 
 
 
17225cb
 
0cd2878
 
cfc4c6b
17225cb
0cd2878
 
 
 
17225cb
0cd2878
2c600a0
0cd2878
17225cb
0cd2878
 
 
 
454008d
 
 
 
0cd2878
adca139
0cd2878
17225cb
0cd2878

import gradio as gr
import pandas as pd
import duckdb
import requests
import re
import os

print("🔍 ENV DEBUG:")
print("GROQ_API_KEY found:", "GROQ_API_KEY" in os.environ)
print("GROQ_API_KEY value starts with:", os.environ.get("GROQ_API_KEY", "")[:8])


# 🔐 Load Groq API Key securely
def get_groq_api_key():
    key = os.environ.get("GROQ_API_KEY")
    if not key:
        raise RuntimeError("❌ GROQ_API_KEY not found in environment. Add it in Hugging Face Secrets.")
    return key

GROQ_API_KEY = get_groq_api_key()

# 🧠 Generate SQL using Groq LLaMA3 model
def generate_sql_from_prompt(prompt, df):
    schema = ", ".join([f"{col} ({str(dtype)})" for col, dtype in df.dtypes.items()])
    full_prompt = f"""
You are a SQL expert. The table is called 'df' and has the following columns:
{schema}

User question: "{prompt}"

Write a valid SQL query using the 'df' table. Return only the SQL code.
"""

    url = "https://api.groq.com/openai/v1/chat/completions"
    headers = {
        "Authorization": f"Bearer {GROQ_API_KEY}",
        "Content-Type": "application/json"
    }
    payload = {
        "model": "llama3-70b-8192",
        "messages": [{"role": "user", "content": full_prompt}],
        "temperature": 0.3,
        "max_tokens": 300
    }

    response = requests.post(url, headers=headers, json=payload)
    response.raise_for_status()
    result = response.json()
    return result['choices'][0]['message']['content'].strip("```sql").strip("```").strip()

# 🧽 Clean SQL for DuckDB
def clean_sql_for_duckdb(sql, df_columns):
    sql = sql.replace("`", '"')
    for col in df_columns:
        if " " in col and f'"{col}"' not in sql:
            pattern = r'\b' + re.escape(col) + r'\b'
            sql = re.sub(pattern, f'"{col}"', sql)
    return sql

# 💬 Main chatbot logic
def chatbot_interface(file, question):
    try:
        df = pd.read_excel(file)
        sql = generate_sql_from_prompt(question, df)
        cleaned_sql = clean_sql_for_duckdb(sql, df.columns)
        result = duckdb.query(cleaned_sql).to_df()
        return f"📜 SQL Query:\n```sql\n{sql}\n```", result
    except Exception as e:
        return f"❌ Error: {str(e)}", pd.DataFrame()

# 🎛️ Gradio UI
with gr.Blocks() as demo:
    gr.Markdown("## 🧠 Excel SQL Chatbot powered by Groq + LLaMA3")
    with gr.Row():
        file_input = gr.File(label="📂 Upload Excel File (.xlsx)")
        question = gr.Textbox(label="🧠 Ask your SQL question")
    submit = gr.Button("🚀 Generate & Run")
    sql_output = gr.Markdown()
    result_table = gr.Dataframe()

    submit.click(fn=chatbot_interface, inputs=[file_input, question], outputs=[sql_output, result_table])

# 🚀 Run the app
if __name__ == "__main__":
    demo.launch()