|
|
import os |
|
|
import base64 |
|
|
import io |
|
|
import pandas as pd |
|
|
import plotly.express as px |
|
|
import plotly.graph_objects as go |
|
|
from dash import Dash, html, dcc, Input, Output, State, callback_context |
|
|
import dash_bootstrap_components as dbc |
|
|
from typing import Optional |
|
|
from dotenv import load_dotenv |
|
|
from pydantic import Field, SecretStr |
|
|
import numpy as np |
|
|
|
|
|
|
|
|
from langchain_community.vectorstores import FAISS |
|
|
from langchain_text_splitters import RecursiveCharacterTextSplitter |
|
|
from langchain.schema import Document |
|
|
from langchain_core.prompts import PromptTemplate |
|
|
|
|
|
|
|
|
load_dotenv() |
|
|
|
|
|
|
|
|
AI_AVAILABLE = False |
|
|
openrouter_model = None |
|
|
|
|
|
|
|
|
app = Dash(__name__, external_stylesheets=[dbc.themes.BOOTSTRAP]) |
|
|
server = app.server |
|
|
|
|
|
|
|
|
vector_store = None |
|
|
|
|
|
|
|
|
def create_builtin_datasets(): |
|
|
"""Create built-in sample datasets""" |
|
|
datasets = {} |
|
|
|
|
|
|
|
|
np.random.seed(42) |
|
|
countries = ['USA', 'China', 'India', 'Germany', 'UK', 'France', 'Japan', 'Brazil', 'Canada', 'Australia'] |
|
|
years = list(range(2000, 2021)) |
|
|
gapminder_data = [] |
|
|
for country in countries: |
|
|
base_gdp = np.random.uniform(20000, 80000) |
|
|
base_life_exp = np.random.uniform(70, 85) |
|
|
base_pop = np.random.uniform(10000000, 100000000) |
|
|
for year in years: |
|
|
gapminder_data.append({ |
|
|
'country': country, |
|
|
'year': year, |
|
|
'gdpPercap': base_gdp * (1 + np.random.uniform(-0.1, 0.15)) * ((year-2000)*0.02 + 1), |
|
|
'lifeExp': base_life_exp + np.random.uniform(-2, 3) + (year-2000)*0.1, |
|
|
'pop': base_pop * (1.01 + np.random.uniform(-0.005, 0.015))**(year-2000), |
|
|
'continent': 'Asia' if country in ['China', 'India', 'Japan'] else 'Europe' if country in ['Germany', 'UK', 'France'] else 'Americas' if country in ['USA', 'Brazil', 'Canada'] else 'Oceania' |
|
|
}) |
|
|
datasets['Gapminder'] = pd.DataFrame(gapminder_data) |
|
|
|
|
|
|
|
|
from sklearn.datasets import load_iris |
|
|
try: |
|
|
iris = load_iris() |
|
|
datasets['Iris'] = pd.DataFrame(iris.data, columns=iris.feature_names) |
|
|
datasets['Iris']['species'] = [iris.target_names[i] for i in iris.target] |
|
|
except ImportError: |
|
|
|
|
|
iris_data = { |
|
|
'sepal_length': np.random.normal(5.8, 0.8, 150), |
|
|
'sepal_width': np.random.normal(3.0, 0.4, 150), |
|
|
'petal_length': np.random.normal(3.8, 1.8, 150), |
|
|
'petal_width': np.random.normal(1.2, 0.8, 150), |
|
|
'species': ['setosa']*50 + ['versicolor']*50 + ['virginica']*50 |
|
|
} |
|
|
datasets['Iris'] = pd.DataFrame(iris_data) |
|
|
|
|
|
|
|
|
tips_data = { |
|
|
'total_bill': np.random.uniform(10, 50, 200), |
|
|
'tip': np.random.uniform(1, 10, 200), |
|
|
'sex': np.random.choice(['Male', 'Female'], 200), |
|
|
'smoker': np.random.choice(['Yes', 'No'], 200), |
|
|
'day': np.random.choice(['Thur', 'Fri', 'Sat', 'Sun'], 200), |
|
|
'time': np.random.choice(['Lunch', 'Dinner'], 200), |
|
|
'size': np.random.choice([1, 2, 3, 4, 5, 6], 200) |
|
|
} |
|
|
datasets['Tips'] = pd.DataFrame(tips_data) |
|
|
|
|
|
|
|
|
dates = pd.date_range('2020-01-01', '2023-12-31', freq='D') |
|
|
stock_price = 100 |
|
|
stock_data = [] |
|
|
for date in dates: |
|
|
daily_return = np.random.normal(0.001, 0.02) |
|
|
stock_price *= (1 + daily_return) |
|
|
stock_data.append({ |
|
|
'date': date, |
|
|
'price': stock_price, |
|
|
'volume': np.random.randint(1000000, 5000000), |
|
|
'high': stock_price * (1 + abs(np.random.normal(0, 0.01))), |
|
|
'low': stock_price * (1 - abs(np.random.normal(0, 0.01))), |
|
|
'open': stock_price * (1 + np.random.normal(0, 0.005)) |
|
|
}) |
|
|
datasets['Stock Data'] = pd.DataFrame(stock_data) |
|
|
|
|
|
|
|
|
hours = list(range(24)) |
|
|
wind_data = [] |
|
|
for month in range(1, 13): |
|
|
for day in range(1, 29): |
|
|
for hour in hours: |
|
|
wind_data.append({ |
|
|
'month': month, |
|
|
'day': day, |
|
|
'hour': hour, |
|
|
'wind_speed': abs(np.random.normal(15, 8)) + 5*np.sin(hour/24*2*np.pi), |
|
|
'temperature': np.random.normal(20, 15) + 10*np.cos(month/12*2*np.pi), |
|
|
'humidity': np.random.uniform(30, 90), |
|
|
'pressure': np.random.normal(1013, 20) |
|
|
}) |
|
|
datasets['Wind Data'] = pd.DataFrame(wind_data) |
|
|
|
|
|
return datasets |
|
|
|
|
|
|
|
|
builtin_datasets = create_builtin_datasets() |
|
|
|
|
|
|
|
|
app.layout = dbc.Container([ |
|
|
dbc.Row([ |
|
|
dbc.Col([ |
|
|
html.H1("🤖 AI-Powered Data Analytics", className="text-center mb-4"), |
|
|
html.P("Upload data, ask questions, and get AI-powered insights!", |
|
|
className="text-center text-muted"), |
|
|
html.Hr(), |
|
|
], width=12) |
|
|
]), |
|
|
|
|
|
|
|
|
dbc.Tabs([ |
|
|
|
|
|
dbc.Tab(label="📁 Dataset Management", tab_id="dataset-management", children=[ |
|
|
dbc.Row([ |
|
|
dbc.Col([ |
|
|
dbc.Card([ |
|
|
dbc.CardBody([ |
|
|
html.H4("Load Built-in Dataset", className="card-title"), |
|
|
dcc.Dropdown( |
|
|
id="builtin-choice", |
|
|
options=[ |
|
|
{"label": "Gapminder", "value": "Gapminder"}, |
|
|
{"label": "Iris", "value": "Iris"}, |
|
|
{"label": "Tips", "value": "Tips"}, |
|
|
{"label": "Stock Data", "value": "Stock Data"}, |
|
|
{"label": "Wind Data", "value": "Wind Data"} |
|
|
], |
|
|
value="Gapminder", |
|
|
className="mb-2" |
|
|
), |
|
|
dbc.Button("Load Dataset", id="load-builtin-btn", color="primary", className="mb-3"), |
|
|
|
|
|
html.Hr(), |
|
|
html.H4("Upload Custom Dataset", className="card-title"), |
|
|
dcc.Upload( |
|
|
id='file-upload', |
|
|
children=html.Div([ |
|
|
'Drag and Drop or ', |
|
|
html.A('Select CSV/Excel Files') |
|
|
]), |
|
|
style={ |
|
|
'width': '100%', |
|
|
'height': '60px', |
|
|
'lineHeight': '60px', |
|
|
'borderWidth': '1px', |
|
|
'borderStyle': 'dashed', |
|
|
'borderRadius': '5px', |
|
|
'textAlign': 'center', |
|
|
'margin': '10px' |
|
|
}, |
|
|
multiple=False, |
|
|
accept='.csv,.xlsx,.xls' |
|
|
), |
|
|
|
|
|
dbc.Input( |
|
|
id="custom-name", |
|
|
placeholder="Dataset Name (optional)", |
|
|
type="text", |
|
|
className="mb-2" |
|
|
), |
|
|
dbc.Button("Upload", id="upload-btn", color="primary", className="mb-3"), |
|
|
|
|
|
html.Hr(), |
|
|
html.H4("Active Datasets", className="card-title"), |
|
|
dcc.Dropdown( |
|
|
id="dataset-selector", |
|
|
options=[{"label": "Gapminder", "value": "Gapminder"}], |
|
|
value="Gapminder", |
|
|
className="mb-2" |
|
|
), |
|
|
|
|
|
html.Hr(), |
|
|
html.Div(id="status-msg", children=[ |
|
|
dbc.Alert("Ready to load data", color="info") |
|
|
]), |
|
|
html.Div(id="data-info") |
|
|
]) |
|
|
]) |
|
|
], width=4), |
|
|
|
|
|
dbc.Col([ |
|
|
dbc.Card([ |
|
|
dbc.CardBody([ |
|
|
html.H4("Data Preview (First 10 rows)", className="card-title"), |
|
|
html.Div(id="data-preview", className="mb-4"), |
|
|
html.H4("Quick Analytics", className="card-title"), |
|
|
html.Div(id="auto-analytics") |
|
|
]) |
|
|
]) |
|
|
], width=8) |
|
|
], className="mt-4") |
|
|
]), |
|
|
|
|
|
|
|
|
dbc.Tab(label="🤖 AI Assistant", tab_id="ai-assistant", children=[ |
|
|
dbc.Row([ |
|
|
dbc.Col([ |
|
|
dbc.Card([ |
|
|
dbc.CardBody([ |
|
|
html.H4("🤖 AI Assistant", className="card-title"), |
|
|
html.Div(id="ai-dataset-info", className="mb-3", children=[ |
|
|
dbc.Alert("No dataset loaded. Please load a dataset in the Dataset Management tab first.", |
|
|
color="warning", className="mb-3") |
|
|
]), |
|
|
dbc.InputGroup([ |
|
|
dbc.Input( |
|
|
id="ai-question", |
|
|
placeholder="Ask questions about your data...", |
|
|
type="text", |
|
|
style={"fontSize": "14px"} |
|
|
), |
|
|
dbc.Button( |
|
|
"Ask AI", |
|
|
id="ask-button", |
|
|
color="primary", |
|
|
n_clicks=0 |
|
|
) |
|
|
]), |
|
|
|
|
|
html.Div(id="ai-response", className="mt-3") |
|
|
]) |
|
|
]) |
|
|
], width=12) |
|
|
], className="mt-4") |
|
|
]), |
|
|
|
|
|
|
|
|
dbc.Tab(label="📈 Visualizations", tab_id="visualizations", children=[ |
|
|
dbc.Row([ |
|
|
dbc.Col([ |
|
|
dbc.Card([ |
|
|
dbc.CardBody([ |
|
|
html.H4("📈 Visualizations", className="card-title"), |
|
|
|
|
|
|
|
|
dbc.Row([ |
|
|
dbc.Col([ |
|
|
html.Label("Chart Type:", className="form-label"), |
|
|
dcc.Dropdown( |
|
|
id='chart-type', |
|
|
options=[ |
|
|
{'label': 'Scatter Plot', 'value': 'scatter'}, |
|
|
{'label': 'Line Chart', 'value': 'line'}, |
|
|
{'label': 'Bar Chart', 'value': 'bar'}, |
|
|
{'label': 'Histogram', 'value': 'histogram'}, |
|
|
{'label': 'Box Plot', 'value': 'box'}, |
|
|
{'label': 'Heatmap', 'value': 'heatmap'}, |
|
|
{'label': 'Pie Chart', 'value': 'pie'} |
|
|
], |
|
|
value='scatter', |
|
|
className="mb-2" |
|
|
) |
|
|
], width=6), |
|
|
dbc.Col([ |
|
|
html.Label("Color By:", className="form-label"), |
|
|
dcc.Dropdown( |
|
|
id='color-column', |
|
|
placeholder="Select column (optional)", |
|
|
className="mb-2" |
|
|
) |
|
|
], width=6) |
|
|
]), |
|
|
|
|
|
dbc.Row([ |
|
|
dbc.Col([ |
|
|
html.Label("X-Axis:", className="form-label"), |
|
|
dcc.Dropdown( |
|
|
id='x-column', |
|
|
placeholder="Select X column" |
|
|
) |
|
|
], width=6), |
|
|
dbc.Col([ |
|
|
html.Label("Y-Axis:", className="form-label"), |
|
|
dcc.Dropdown( |
|
|
id='y-column', |
|
|
placeholder="Select Y column" |
|
|
) |
|
|
], width=6) |
|
|
], className="mb-3"), |
|
|
|
|
|
dcc.Graph(id='main-graph', style={'height': '500px'}), |
|
|
]) |
|
|
]) |
|
|
], width=12) |
|
|
], className="mt-4") |
|
|
]), |
|
|
|
|
|
|
|
|
dbc.Tab(label="🔍 Data Explorer", tab_id="data-explorer", children=[ |
|
|
dbc.Row([ |
|
|
dbc.Col([ |
|
|
dbc.Card([ |
|
|
dbc.CardBody([ |
|
|
html.H4("🔍 Data Explorer", className="card-title"), |
|
|
html.Div(id='data-table') |
|
|
]) |
|
|
]) |
|
|
], width=12) |
|
|
], className="mt-4") |
|
|
]) |
|
|
], id="main-tabs", active_tab="dataset-management"), |
|
|
|
|
|
|
|
|
dcc.Store(id='stored-data'), |
|
|
dcc.Store(id='data-context'), |
|
|
dcc.Store(id='dataset-registry', data={"Gapminder": "builtin"}), |
|
|
dcc.Store(id='current-dataset-name', data="Gapminder") |
|
|
], fluid=True) |
|
|
|
|
|
def create_vector_store(df): |
|
|
"""Simplified - just return True for now""" |
|
|
return True |
|
|
|
|
|
|
|
|
from ai_assistant import get_ai_response |
|
|
|
|
|
def create_auto_analytics(df): |
|
|
"""Create automatic analytics display""" |
|
|
analytics_components = [] |
|
|
|
|
|
|
|
|
numeric_cols = df.select_dtypes(include=['number']).columns |
|
|
if len(numeric_cols) > 0: |
|
|
stats = df[numeric_cols].describe() |
|
|
analytics_components.extend([ |
|
|
html.H6("📊 Summary Statistics", className="mt-2"), |
|
|
dbc.Table.from_dataframe( |
|
|
stats.reset_index().round(2), |
|
|
size='sm', |
|
|
striped=True, |
|
|
hover=True |
|
|
) |
|
|
]) |
|
|
|
|
|
|
|
|
missing_data = df.isnull().sum() |
|
|
missing_data = missing_data[missing_data > 0] |
|
|
if not missing_data.empty: |
|
|
analytics_components.extend([ |
|
|
html.H6("⚠️ Missing Data", className="mt-3"), |
|
|
dbc.Alert([ |
|
|
html.Pre(missing_data.to_string()) |
|
|
], color="warning") |
|
|
]) |
|
|
else: |
|
|
analytics_components.extend([ |
|
|
html.H6("✅ Data Quality", className="mt-3"), |
|
|
dbc.Alert("No missing values found!", color="success") |
|
|
]) |
|
|
|
|
|
|
|
|
dtype_info = df.dtypes.value_counts() |
|
|
analytics_components.extend([ |
|
|
html.H6("🔍 Data Types", className="mt-3"), |
|
|
dbc.Alert([ |
|
|
html.P(f"📈 Numeric columns: {len(df.select_dtypes(include=['number']).columns)}"), |
|
|
html.P(f"📝 Text columns: {len(df.select_dtypes(include=['object']).columns)}"), |
|
|
html.P(f"📅 DateTime columns: {len(df.select_dtypes(include=['datetime64']).columns)}"), |
|
|
html.P(f"🔢 Boolean columns: {len(df.select_dtypes(include=['bool']).columns)}") |
|
|
], color="light") |
|
|
]) |
|
|
|
|
|
|
|
|
if len(numeric_cols) > 1: |
|
|
corr_matrix = df[numeric_cols].corr() |
|
|
|
|
|
corr_pairs = [] |
|
|
for i in range(len(corr_matrix.columns)): |
|
|
for j in range(i+1, len(corr_matrix.columns)): |
|
|
corr_val = corr_matrix.iloc[i, j] |
|
|
if abs(corr_val) > 0.5: |
|
|
corr_pairs.append((corr_matrix.columns[i], corr_matrix.columns[j], corr_val)) |
|
|
|
|
|
if corr_pairs: |
|
|
analytics_components.extend([ |
|
|
html.H6("🔗 Strong Correlations (>0.5)", className="mt-3"), |
|
|
dbc.Alert([ |
|
|
html.P(f"{pair[0]} ↔ {pair[1]}: {pair[2]:.3f}") for pair in corr_pairs[:5] |
|
|
], color="info") |
|
|
]) |
|
|
|
|
|
return analytics_components |
|
|
|
|
|
def parse_contents(contents, filename): |
|
|
"""Parse uploaded file contents""" |
|
|
content_type, content_string = contents.split(',') |
|
|
decoded = base64.b64decode(content_string) |
|
|
|
|
|
try: |
|
|
if 'csv' in filename: |
|
|
df = pd.read_csv(io.StringIO(decoded.decode('utf-8'))) |
|
|
elif 'xls' in filename: |
|
|
df = pd.read_excel(io.BytesIO(decoded)) |
|
|
else: |
|
|
return None, "Unsupported file type" |
|
|
|
|
|
return df, None |
|
|
except Exception as e: |
|
|
return None, f"Error processing file: {str(e)}" |
|
|
|
|
|
|
|
|
@app.callback( |
|
|
[Output('stored-data', 'data'), |
|
|
Output('status-msg', 'children'), |
|
|
Output('data-preview', 'children'), |
|
|
Output('data-info', 'children'), |
|
|
Output('auto-analytics', 'children'), |
|
|
Output('x-column', 'options'), |
|
|
Output('y-column', 'options'), |
|
|
Output('color-column', 'options'), |
|
|
Output('x-column', 'value'), |
|
|
Output('y-column', 'value'), |
|
|
Output('dataset-registry', 'data'), |
|
|
Output('dataset-selector', 'options'), |
|
|
Output('current-dataset-name', 'data')], |
|
|
[Input('load-builtin-btn', 'n_clicks'), |
|
|
Input('file-upload', 'contents'), |
|
|
Input('dataset-selector', 'value')], |
|
|
[State('builtin-choice', 'value'), |
|
|
State('file-upload', 'filename'), |
|
|
State('custom-name', 'value'), |
|
|
State('dataset-registry', 'data')] |
|
|
) |
|
|
def manage_datasets(builtin_clicks, file_contents, selected_dataset, builtin_choice, filename, custom_name, registry): |
|
|
"""Handle dataset loading and switching""" |
|
|
ctx = callback_context |
|
|
|
|
|
|
|
|
registry = registry or {"Gapminder": "builtin"} |
|
|
|
|
|
if not ctx.triggered: |
|
|
|
|
|
df = builtin_datasets["Gapminder"] |
|
|
dataset_name = "Gapminder" |
|
|
|
|
|
|
|
|
vector_success = create_vector_store(df) |
|
|
|
|
|
|
|
|
table = dbc.Table.from_dataframe( |
|
|
df.head(10), |
|
|
striped=True, |
|
|
bordered=True, |
|
|
hover=True, |
|
|
size='sm' |
|
|
) |
|
|
|
|
|
ai_status = "🤖 AI Ready" if vector_success else "⚠️ AI Limited" |
|
|
status_msg = dbc.Alert(f"✅ Gapminder dataset loaded! {ai_status}", color="success") |
|
|
|
|
|
data_info = dbc.Alert([ |
|
|
html.H6("Dataset Information:"), |
|
|
html.P(f"Shape: {df.shape[0]} rows × {df.shape[1]} columns"), |
|
|
html.P(f"Columns: {', '.join(df.columns.tolist())}"), |
|
|
html.P(f"Data types: {len(df.select_dtypes(include=['number']).columns)} numeric, {len(df.select_dtypes(include=['object']).columns)} categorical") |
|
|
], color="light") |
|
|
|
|
|
|
|
|
auto_analytics = create_auto_analytics(df) |
|
|
|
|
|
|
|
|
all_columns = [{'label': col, 'value': col} for col in df.columns] |
|
|
numeric_columns = [{'label': col, 'value': col} for col in df.select_dtypes(include=['number']).columns] |
|
|
|
|
|
|
|
|
default_x = numeric_columns[0]['value'] if numeric_columns else all_columns[0]['value'] if all_columns else None |
|
|
default_y = numeric_columns[1]['value'] if len(numeric_columns) > 1 else (numeric_columns[0]['value'] if numeric_columns else (all_columns[1]['value'] if len(all_columns) > 1 else None)) |
|
|
|
|
|
selector_options = [{"label": name, "value": name} for name in registry.keys()] |
|
|
|
|
|
return df.to_dict('records'), status_msg, table, data_info, auto_analytics, all_columns, all_columns, all_columns, default_x, default_y, registry, selector_options, dataset_name |
|
|
|
|
|
trigger_id = ctx.triggered[0]['prop_id'].split('.')[0] |
|
|
|
|
|
if trigger_id == 'load-builtin-btn' and builtin_clicks: |
|
|
|
|
|
if builtin_choice in builtin_datasets: |
|
|
df = builtin_datasets[builtin_choice] |
|
|
registry[builtin_choice] = "builtin" |
|
|
|
|
|
|
|
|
vector_success = create_vector_store(df) |
|
|
|
|
|
|
|
|
table = dbc.Table.from_dataframe( |
|
|
df.head(10), |
|
|
striped=True, |
|
|
bordered=True, |
|
|
hover=True, |
|
|
size='sm' |
|
|
) |
|
|
|
|
|
ai_status = "🤖 AI Ready" if vector_success else "⚠️ AI Limited" |
|
|
status_msg = dbc.Alert(f"✅ {builtin_choice} dataset loaded! {ai_status}", color="success") |
|
|
|
|
|
data_info = dbc.Alert([ |
|
|
html.H6(f"{builtin_choice} Dataset Information:"), |
|
|
html.P(f"Shape: {df.shape[0]} rows × {df.shape[1]} columns"), |
|
|
html.P(f"Columns: {', '.join(df.columns.tolist())}"), |
|
|
html.P(f"Data types: {len(df.select_dtypes(include=['number']).columns)} numeric, {len(df.select_dtypes(include=['object']).columns)} categorical") |
|
|
], color="light") |
|
|
|
|
|
|
|
|
auto_analytics = create_auto_analytics(df) |
|
|
|
|
|
|
|
|
all_columns = [{'label': col, 'value': col} for col in df.columns] |
|
|
numeric_columns = [{'label': col, 'value': col} for col in df.select_dtypes(include=['number']).columns] |
|
|
|
|
|
|
|
|
default_x = numeric_columns[0]['value'] if numeric_columns else all_columns[0]['value'] if all_columns else None |
|
|
default_y = numeric_columns[1]['value'] if len(numeric_columns) > 1 else (numeric_columns[0]['value'] if numeric_columns else (all_columns[1]['value'] if len(all_columns) > 1 else None)) |
|
|
|
|
|
selector_options = [{"label": name, "value": name} for name in registry.keys()] |
|
|
|
|
|
return df.to_dict('records'), status_msg, table, data_info, auto_analytics, all_columns, all_columns, all_columns, default_x, default_y, registry, selector_options, builtin_choice |
|
|
|
|
|
elif trigger_id == 'file-upload' and file_contents: |
|
|
|
|
|
df, error = parse_contents(file_contents, filename) |
|
|
|
|
|
if error: |
|
|
status_msg = dbc.Alert(error, color="danger") |
|
|
selector_options = [{"label": name, "value": name} for name in registry.keys()] |
|
|
return None, status_msg, "", "", "", [], [], [], None, None, registry, selector_options, None |
|
|
|
|
|
|
|
|
dataset_name = custom_name if custom_name else filename.split('.')[0] |
|
|
registry[dataset_name] = "custom" |
|
|
|
|
|
|
|
|
vector_success = create_vector_store(df) |
|
|
|
|
|
|
|
|
table = dbc.Table.from_dataframe( |
|
|
df.head(10), |
|
|
striped=True, |
|
|
bordered=True, |
|
|
hover=True, |
|
|
size='sm' |
|
|
) |
|
|
|
|
|
ai_status = "🤖 AI Ready" if vector_success else "⚠️ AI Limited" |
|
|
status_msg = dbc.Alert(f"✅ {dataset_name} uploaded successfully! {ai_status}", color="success") |
|
|
|
|
|
data_info = dbc.Alert([ |
|
|
html.H6(f"{dataset_name} Dataset Information:"), |
|
|
html.P(f"Shape: {df.shape[0]} rows × {df.shape[1]} columns"), |
|
|
html.P(f"Columns: {', '.join(df.columns.tolist())}"), |
|
|
html.P(f"Data types: {len(df.select_dtypes(include=['number']).columns)} numeric, {len(df.select_dtypes(include=['object']).columns)} categorical") |
|
|
], color="light") |
|
|
|
|
|
|
|
|
auto_analytics = create_auto_analytics(df) |
|
|
|
|
|
|
|
|
all_columns = [{'label': col, 'value': col} for col in df.columns] |
|
|
numeric_columns = [{'label': col, 'value': col} for col in df.select_dtypes(include=['number']).columns] |
|
|
|
|
|
|
|
|
default_x = numeric_columns[0]['value'] if numeric_columns else all_columns[0]['value'] if all_columns else None |
|
|
default_y = numeric_columns[1]['value'] if len(numeric_columns) > 1 else (numeric_columns[0]['value'] if numeric_columns else (all_columns[1]['value'] if len(all_columns) > 1 else None)) |
|
|
|
|
|
selector_options = [{"label": name, "value": name} for name in registry.keys()] |
|
|
|
|
|
return df.to_dict('records'), status_msg, table, data_info, auto_analytics, all_columns, all_columns, all_columns, default_x, default_y, registry, selector_options, dataset_name |
|
|
|
|
|
elif trigger_id == 'dataset-selector' and selected_dataset: |
|
|
|
|
|
if selected_dataset in registry: |
|
|
if registry[selected_dataset] == "builtin" and selected_dataset in builtin_datasets: |
|
|
df = builtin_datasets[selected_dataset] |
|
|
else: |
|
|
|
|
|
|
|
|
if selected_dataset in builtin_datasets: |
|
|
df = builtin_datasets[selected_dataset] |
|
|
else: |
|
|
|
|
|
df = builtin_datasets["Gapminder"] |
|
|
selected_dataset = "Gapminder" |
|
|
|
|
|
|
|
|
vector_success = create_vector_store(df) |
|
|
|
|
|
|
|
|
table = dbc.Table.from_dataframe( |
|
|
df.head(10), |
|
|
striped=True, |
|
|
bordered=True, |
|
|
hover=True, |
|
|
size='sm' |
|
|
) |
|
|
|
|
|
ai_status = "🤖 AI Ready" if vector_success else "⚠️ AI Limited" |
|
|
status_msg = dbc.Alert(f"✅ Switched to {selected_dataset} dataset! {ai_status}", color="success") |
|
|
|
|
|
data_info = dbc.Alert([ |
|
|
html.H6(f"{selected_dataset} Dataset Information:"), |
|
|
html.P(f"Shape: {df.shape[0]} rows × {df.shape[1]} columns"), |
|
|
html.P(f"Columns: {', '.join(df.columns.tolist())}"), |
|
|
html.P(f"Data types: {len(df.select_dtypes(include=['number']).columns)} numeric, {len(df.select_dtypes(include=['object']).columns)} categorical") |
|
|
], color="light") |
|
|
|
|
|
|
|
|
auto_analytics = create_auto_analytics(df) |
|
|
|
|
|
|
|
|
all_columns = [{'label': col, 'value': col} for col in df.columns] |
|
|
numeric_columns = [{'label': col, 'value': col} for col in df.select_dtypes(include=['number']).columns] |
|
|
|
|
|
|
|
|
default_x = numeric_columns[0]['value'] if numeric_columns else all_columns[0]['value'] if all_columns else None |
|
|
default_y = numeric_columns[1]['value'] if len(numeric_columns) > 1 else (numeric_columns[0]['value'] if numeric_columns else (all_columns[1]['value'] if len(all_columns) > 1 else None)) |
|
|
|
|
|
selector_options = [{"label": name, "value": name} for name in registry.keys()] |
|
|
|
|
|
return df.to_dict('records'), status_msg, table, data_info, auto_analytics, all_columns, all_columns, all_columns, default_x, default_y, registry, selector_options, selected_dataset |
|
|
|
|
|
|
|
|
selector_options = [{"label": name, "value": name} for name in registry.keys()] |
|
|
return None, "", "", "", "", [], [], [], None, None, registry, selector_options, None |
|
|
|
|
|
|
|
|
@app.callback( |
|
|
Output('data-table', 'children'), |
|
|
[Input('stored-data', 'data')] |
|
|
) |
|
|
def update_data_table(data): |
|
|
"""Update data table for data explorer tab""" |
|
|
if not data: |
|
|
return html.P("No data loaded", className="text-muted") |
|
|
|
|
|
df = pd.DataFrame(data) |
|
|
return dbc.Table.from_dataframe( |
|
|
df.head(20), |
|
|
striped=True, |
|
|
bordered=True, |
|
|
hover=True, |
|
|
size='sm', |
|
|
responsive=True |
|
|
) |
|
|
|
|
|
|
|
|
@app.callback( |
|
|
Output('ai-dataset-info', 'children'), |
|
|
[Input('stored-data', 'data'), |
|
|
Input('current-dataset-name', 'data')] |
|
|
) |
|
|
def update_ai_dataset_info(data, dataset_name): |
|
|
"""Update AI assistant tab with current dataset information""" |
|
|
if not data or not dataset_name: |
|
|
return dbc.Alert("No dataset loaded. Please load a dataset in the Dataset Management tab first.", |
|
|
color="warning", className="mb-3") |
|
|
|
|
|
df = pd.DataFrame(data) |
|
|
return dbc.Alert([ |
|
|
html.H6(f"📊 Current Dataset: {dataset_name}"), |
|
|
html.P(f"Shape: {df.shape[0]:,} rows × {df.shape[1]} columns"), |
|
|
html.P(f"Columns: {', '.join(df.columns.tolist()[:5])}{'...' if len(df.columns) > 5 else ''}"), |
|
|
html.P(f"Data types: {len(df.select_dtypes(include=['number']).columns)} numeric, {len(df.select_dtypes(include=['object']).columns)} categorical"), |
|
|
html.Small("✨ AI is ready to answer questions about this data!", className="text-muted") |
|
|
], color="success", className="mb-3") |
|
|
|
|
|
@app.callback( |
|
|
Output('ai-response', 'children'), |
|
|
[Input('ask-button', 'n_clicks')], |
|
|
[State('ai-question', 'value'), |
|
|
State('stored-data', 'data'), |
|
|
State('current-dataset-name', 'data')] |
|
|
) |
|
|
def handle_ai_question(n_clicks, question, data, dataset_name): |
|
|
"""Handle AI question""" |
|
|
if not n_clicks or not question or not data: |
|
|
return "" |
|
|
|
|
|
if not dataset_name: |
|
|
return dbc.Alert("Please load a dataset first in the Dataset Management tab.", color="warning") |
|
|
|
|
|
df = pd.DataFrame(data) |
|
|
response = get_ai_response(question, df) |
|
|
|
|
|
return dbc.Alert( |
|
|
dcc.Markdown(response), |
|
|
color="info" |
|
|
) |
|
|
|
|
|
|
|
|
@app.callback( |
|
|
Output('main-graph', 'figure'), |
|
|
[Input('stored-data', 'data'), |
|
|
Input('chart-type', 'value'), |
|
|
Input('x-column', 'value'), |
|
|
Input('y-column', 'value'), |
|
|
Input('color-column', 'value')] |
|
|
) |
|
|
def update_main_graph(data, chart_type, x_col, y_col, color_col): |
|
|
"""Update main visualization based on user selections""" |
|
|
if not data: |
|
|
fig = go.Figure() |
|
|
fig.add_annotation(text="Upload data to see visualizations", |
|
|
x=0.5, y=0.5, showarrow=False, |
|
|
font=dict(size=16, color="gray")) |
|
|
fig.update_layout(template="plotly_white") |
|
|
return fig |
|
|
|
|
|
df = pd.DataFrame(data) |
|
|
|
|
|
|
|
|
if not x_col and not y_col: |
|
|
fig = go.Figure() |
|
|
fig.add_annotation(text="Select columns to create visualization", |
|
|
x=0.5, y=0.5, showarrow=False, |
|
|
font=dict(size=16, color="gray")) |
|
|
fig.update_layout(template="plotly_white") |
|
|
return fig |
|
|
|
|
|
try: |
|
|
|
|
|
if chart_type == 'scatter': |
|
|
if x_col and y_col: |
|
|
fig = px.scatter(df, x=x_col, y=y_col, color=color_col, |
|
|
title=f"Scatter Plot: {y_col} vs {x_col}") |
|
|
else: |
|
|
fig = go.Figure() |
|
|
fig.add_annotation(text="Select both X and Y columns for scatter plot", |
|
|
x=0.5, y=0.5, showarrow=False) |
|
|
|
|
|
elif chart_type == 'line': |
|
|
if x_col and y_col: |
|
|
fig = px.line(df, x=x_col, y=y_col, color=color_col, |
|
|
title=f"Line Chart: {y_col} vs {x_col}") |
|
|
else: |
|
|
fig = go.Figure() |
|
|
fig.add_annotation(text="Select both X and Y columns for line chart", |
|
|
x=0.5, y=0.5, showarrow=False) |
|
|
|
|
|
elif chart_type == 'bar': |
|
|
if x_col and y_col: |
|
|
fig = px.bar(df, x=x_col, y=y_col, color=color_col, |
|
|
title=f"Bar Chart: {y_col} by {x_col}") |
|
|
elif x_col: |
|
|
fig = px.bar(df[x_col].value_counts().reset_index(), |
|
|
x='index', y=x_col, |
|
|
title=f"Value Counts: {x_col}") |
|
|
else: |
|
|
fig = go.Figure() |
|
|
fig.add_annotation(text="Select at least X column for bar chart", |
|
|
x=0.5, y=0.5, showarrow=False) |
|
|
|
|
|
elif chart_type == 'histogram': |
|
|
if x_col: |
|
|
fig = px.histogram(df, x=x_col, color=color_col, |
|
|
title=f"Histogram: {x_col}") |
|
|
else: |
|
|
fig = go.Figure() |
|
|
fig.add_annotation(text="Select X column for histogram", |
|
|
x=0.5, y=0.5, showarrow=False) |
|
|
|
|
|
elif chart_type == 'box': |
|
|
if y_col: |
|
|
fig = px.box(df, x=color_col, y=y_col, |
|
|
title=f"Box Plot: {y_col}" + (f" by {color_col}" if color_col else "")) |
|
|
elif x_col: |
|
|
fig = px.box(df, y=x_col, |
|
|
title=f"Box Plot: {x_col}") |
|
|
else: |
|
|
fig = go.Figure() |
|
|
fig.add_annotation(text="Select a column for box plot", |
|
|
x=0.5, y=0.5, showarrow=False) |
|
|
|
|
|
elif chart_type == 'heatmap': |
|
|
numeric_cols = df.select_dtypes(include=['number']).columns |
|
|
if len(numeric_cols) > 1: |
|
|
corr_matrix = df[numeric_cols].corr() |
|
|
fig = px.imshow(corr_matrix, |
|
|
text_auto=True, |
|
|
aspect="auto", |
|
|
title="Correlation Heatmap", |
|
|
color_continuous_scale='RdBu_r') |
|
|
else: |
|
|
fig = go.Figure() |
|
|
fig.add_annotation(text="Need at least 2 numeric columns for heatmap", |
|
|
x=0.5, y=0.5, showarrow=False) |
|
|
|
|
|
elif chart_type == 'pie': |
|
|
if x_col: |
|
|
value_counts = df[x_col].value_counts() |
|
|
fig = px.pie(values=value_counts.values, |
|
|
names=value_counts.index, |
|
|
title=f"Pie Chart: {x_col}") |
|
|
else: |
|
|
fig = go.Figure() |
|
|
fig.add_annotation(text="Select X column for pie chart", |
|
|
x=0.5, y=0.5, showarrow=False) |
|
|
|
|
|
else: |
|
|
fig = go.Figure() |
|
|
fig.add_annotation(text="Select a chart type", |
|
|
x=0.5, y=0.5, showarrow=False) |
|
|
|
|
|
fig.update_layout(template="plotly_white", height=500) |
|
|
return fig |
|
|
|
|
|
except Exception as e: |
|
|
fig = go.Figure() |
|
|
fig.add_annotation(text=f"Error creating chart: {str(e)}", |
|
|
x=0.5, y=0.5, showarrow=False, |
|
|
font=dict(color="red")) |
|
|
fig.update_layout(template="plotly_white") |
|
|
return fig |
|
|
|
|
|
if __name__ == '__main__': |
|
|
app.run(host='0.0.0.0', port=7860, debug=False) |