dash-mcp / app1.py
mic3333's picture
update new version of app
20706fe
import os
import base64
import io
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
from dash import Dash, html, dcc, Input, Output, State, callback_context
import dash_bootstrap_components as dbc
from typing import Optional
from dotenv import load_dotenv
from pydantic import Field, SecretStr
import numpy as np
# Langchain imports - simplified without embeddings
from langchain_community.vectorstores import FAISS
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain.schema import Document
from langchain_core.prompts import PromptTemplate
# Load environment variables
load_dotenv()
# Simplified - no OpenRouter for now
AI_AVAILABLE = False
openrouter_model = None
# Initialize Dash app
app = Dash(__name__, external_stylesheets=[dbc.themes.BOOTSTRAP])
server = app.server
# Global variables
vector_store = None
# Built-in datasets
def create_builtin_datasets():
"""Create built-in sample datasets"""
datasets = {}
# Gapminder dataset
np.random.seed(42)
countries = ['USA', 'China', 'India', 'Germany', 'UK', 'France', 'Japan', 'Brazil', 'Canada', 'Australia']
years = list(range(2000, 2021))
gapminder_data = []
for country in countries:
base_gdp = np.random.uniform(20000, 80000)
base_life_exp = np.random.uniform(70, 85)
base_pop = np.random.uniform(10000000, 100000000)
for year in years:
gapminder_data.append({
'country': country,
'year': year,
'gdpPercap': base_gdp * (1 + np.random.uniform(-0.1, 0.15)) * ((year-2000)*0.02 + 1),
'lifeExp': base_life_exp + np.random.uniform(-2, 3) + (year-2000)*0.1,
'pop': base_pop * (1.01 + np.random.uniform(-0.005, 0.015))**(year-2000),
'continent': 'Asia' if country in ['China', 'India', 'Japan'] else 'Europe' if country in ['Germany', 'UK', 'France'] else 'Americas' if country in ['USA', 'Brazil', 'Canada'] else 'Oceania'
})
datasets['Gapminder'] = pd.DataFrame(gapminder_data)
# Iris dataset
from sklearn.datasets import load_iris
try:
iris = load_iris()
datasets['Iris'] = pd.DataFrame(iris.data, columns=iris.feature_names)
datasets['Iris']['species'] = [iris.target_names[i] for i in iris.target]
except ImportError:
# Fallback if sklearn not available
iris_data = {
'sepal_length': np.random.normal(5.8, 0.8, 150),
'sepal_width': np.random.normal(3.0, 0.4, 150),
'petal_length': np.random.normal(3.8, 1.8, 150),
'petal_width': np.random.normal(1.2, 0.8, 150),
'species': ['setosa']*50 + ['versicolor']*50 + ['virginica']*50
}
datasets['Iris'] = pd.DataFrame(iris_data)
# Tips dataset
tips_data = {
'total_bill': np.random.uniform(10, 50, 200),
'tip': np.random.uniform(1, 10, 200),
'sex': np.random.choice(['Male', 'Female'], 200),
'smoker': np.random.choice(['Yes', 'No'], 200),
'day': np.random.choice(['Thur', 'Fri', 'Sat', 'Sun'], 200),
'time': np.random.choice(['Lunch', 'Dinner'], 200),
'size': np.random.choice([1, 2, 3, 4, 5, 6], 200)
}
datasets['Tips'] = pd.DataFrame(tips_data)
# Stock Data
dates = pd.date_range('2020-01-01', '2023-12-31', freq='D')
stock_price = 100
stock_data = []
for date in dates:
daily_return = np.random.normal(0.001, 0.02)
stock_price *= (1 + daily_return)
stock_data.append({
'date': date,
'price': stock_price,
'volume': np.random.randint(1000000, 5000000),
'high': stock_price * (1 + abs(np.random.normal(0, 0.01))),
'low': stock_price * (1 - abs(np.random.normal(0, 0.01))),
'open': stock_price * (1 + np.random.normal(0, 0.005))
})
datasets['Stock Data'] = pd.DataFrame(stock_data)
# Wind Data
hours = list(range(24))
wind_data = []
for month in range(1, 13):
for day in range(1, 29):
for hour in hours:
wind_data.append({
'month': month,
'day': day,
'hour': hour,
'wind_speed': abs(np.random.normal(15, 8)) + 5*np.sin(hour/24*2*np.pi),
'temperature': np.random.normal(20, 15) + 10*np.cos(month/12*2*np.pi),
'humidity': np.random.uniform(30, 90),
'pressure': np.random.normal(1013, 20)
})
datasets['Wind Data'] = pd.DataFrame(wind_data)
return datasets
# Initialize built-in datasets
builtin_datasets = create_builtin_datasets()
# App layout
app.layout = dbc.Container([
dbc.Row([
dbc.Col([
html.H1("🤖 AI-Powered Data Analytics", className="text-center mb-4"),
html.P("Upload data, ask questions, and get AI-powered insights!",
className="text-center text-muted"),
html.Hr(),
], width=12)
]),
# Tabbed interface
dbc.Tabs([
# Tab 1: Dataset Management
dbc.Tab(label="📁 Dataset Management", tab_id="dataset-management", children=[
dbc.Row([
dbc.Col([
dbc.Card([
dbc.CardBody([
html.H4("Load Built-in Dataset", className="card-title"),
dcc.Dropdown(
id="builtin-choice",
options=[
{"label": "Gapminder", "value": "Gapminder"},
{"label": "Iris", "value": "Iris"},
{"label": "Tips", "value": "Tips"},
{"label": "Stock Data", "value": "Stock Data"},
{"label": "Wind Data", "value": "Wind Data"}
],
value="Gapminder",
className="mb-2"
),
dbc.Button("Load Dataset", id="load-builtin-btn", color="primary", className="mb-3"),
html.Hr(),
html.H4("Upload Custom Dataset", className="card-title"),
dcc.Upload(
id='file-upload',
children=html.Div([
'Drag and Drop or ',
html.A('Select CSV/Excel Files')
]),
style={
'width': '100%',
'height': '60px',
'lineHeight': '60px',
'borderWidth': '1px',
'borderStyle': 'dashed',
'borderRadius': '5px',
'textAlign': 'center',
'margin': '10px'
},
multiple=False,
accept='.csv,.xlsx,.xls'
),
dbc.Input(
id="custom-name",
placeholder="Dataset Name (optional)",
type="text",
className="mb-2"
),
dbc.Button("Upload", id="upload-btn", color="primary", className="mb-3"),
html.Hr(),
html.H4("Active Datasets", className="card-title"),
dcc.Dropdown(
id="dataset-selector",
options=[{"label": "Gapminder", "value": "Gapminder"}],
value="Gapminder",
className="mb-2"
),
html.Hr(),
html.Div(id="status-msg", children=[
dbc.Alert("Ready to load data", color="info")
]),
html.Div(id="data-info")
])
])
], width=4),
dbc.Col([
dbc.Card([
dbc.CardBody([
html.H4("Data Preview (First 10 rows)", className="card-title"),
html.Div(id="data-preview", className="mb-4"),
html.H4("Quick Analytics", className="card-title"),
html.Div(id="auto-analytics")
])
])
], width=8)
], className="mt-4")
]),
# Tab 2: AI Assistant
dbc.Tab(label="🤖 AI Assistant", tab_id="ai-assistant", children=[
dbc.Row([
dbc.Col([
dbc.Card([
dbc.CardBody([
html.H4("🤖 AI Assistant", className="card-title"),
html.Div(id="ai-dataset-info", className="mb-3", children=[
dbc.Alert("No dataset loaded. Please load a dataset in the Dataset Management tab first.",
color="warning", className="mb-3")
]),
dbc.InputGroup([
dbc.Input(
id="ai-question",
placeholder="Ask questions about your data...",
type="text",
style={"fontSize": "14px"}
),
dbc.Button(
"Ask AI",
id="ask-button",
color="primary",
n_clicks=0
)
]),
html.Div(id="ai-response", className="mt-3")
])
])
], width=12)
], className="mt-4")
]),
# Tab 3: Visualizations
dbc.Tab(label="📈 Visualizations", tab_id="visualizations", children=[
dbc.Row([
dbc.Col([
dbc.Card([
dbc.CardBody([
html.H4("📈 Visualizations", className="card-title"),
# Chart controls
dbc.Row([
dbc.Col([
html.Label("Chart Type:", className="form-label"),
dcc.Dropdown(
id='chart-type',
options=[
{'label': 'Scatter Plot', 'value': 'scatter'},
{'label': 'Line Chart', 'value': 'line'},
{'label': 'Bar Chart', 'value': 'bar'},
{'label': 'Histogram', 'value': 'histogram'},
{'label': 'Box Plot', 'value': 'box'},
{'label': 'Heatmap', 'value': 'heatmap'},
{'label': 'Pie Chart', 'value': 'pie'}
],
value='scatter',
className="mb-2"
)
], width=6),
dbc.Col([
html.Label("Color By:", className="form-label"),
dcc.Dropdown(
id='color-column',
placeholder="Select column (optional)",
className="mb-2"
)
], width=6)
]),
dbc.Row([
dbc.Col([
html.Label("X-Axis:", className="form-label"),
dcc.Dropdown(
id='x-column',
placeholder="Select X column"
)
], width=6),
dbc.Col([
html.Label("Y-Axis:", className="form-label"),
dcc.Dropdown(
id='y-column',
placeholder="Select Y column"
)
], width=6)
], className="mb-3"),
dcc.Graph(id='main-graph', style={'height': '500px'}),
])
])
], width=12)
], className="mt-4")
]),
# Tab 4: Data Explorer
dbc.Tab(label="🔍 Data Explorer", tab_id="data-explorer", children=[
dbc.Row([
dbc.Col([
dbc.Card([
dbc.CardBody([
html.H4("🔍 Data Explorer", className="card-title"),
html.Div(id='data-table')
])
])
], width=12)
], className="mt-4")
])
], id="main-tabs", active_tab="dataset-management"),
# Store components
dcc.Store(id='stored-data'),
dcc.Store(id='data-context'),
dcc.Store(id='dataset-registry', data={"Gapminder": "builtin"}),
dcc.Store(id='current-dataset-name', data="Gapminder")
], fluid=True)
def create_vector_store(df):
"""Simplified - just return True for now"""
return True
# Import AI assistant module
from ai_assistant import get_ai_response
def create_auto_analytics(df):
"""Create automatic analytics display"""
analytics_components = []
# Summary Statistics
numeric_cols = df.select_dtypes(include=['number']).columns
if len(numeric_cols) > 0:
stats = df[numeric_cols].describe()
analytics_components.extend([
html.H6("📊 Summary Statistics", className="mt-2"),
dbc.Table.from_dataframe(
stats.reset_index().round(2),
size='sm',
striped=True,
hover=True
)
])
# Missing Data Analysis
missing_data = df.isnull().sum()
missing_data = missing_data[missing_data > 0]
if not missing_data.empty:
analytics_components.extend([
html.H6("⚠️ Missing Data", className="mt-3"),
dbc.Alert([
html.Pre(missing_data.to_string())
], color="warning")
])
else:
analytics_components.extend([
html.H6("✅ Data Quality", className="mt-3"),
dbc.Alert("No missing values found!", color="success")
])
# Data Types Analysis
dtype_info = df.dtypes.value_counts()
analytics_components.extend([
html.H6("🔍 Data Types", className="mt-3"),
dbc.Alert([
html.P(f"📈 Numeric columns: {len(df.select_dtypes(include=['number']).columns)}"),
html.P(f"📝 Text columns: {len(df.select_dtypes(include=['object']).columns)}"),
html.P(f"📅 DateTime columns: {len(df.select_dtypes(include=['datetime64']).columns)}"),
html.P(f"🔢 Boolean columns: {len(df.select_dtypes(include=['bool']).columns)}")
], color="light")
])
# Correlation Analysis for numeric columns
if len(numeric_cols) > 1:
corr_matrix = df[numeric_cols].corr()
# Find highest correlations
corr_pairs = []
for i in range(len(corr_matrix.columns)):
for j in range(i+1, len(corr_matrix.columns)):
corr_val = corr_matrix.iloc[i, j]
if abs(corr_val) > 0.5: # Only show strong correlations
corr_pairs.append((corr_matrix.columns[i], corr_matrix.columns[j], corr_val))
if corr_pairs:
analytics_components.extend([
html.H6("🔗 Strong Correlations (>0.5)", className="mt-3"),
dbc.Alert([
html.P(f"{pair[0]}{pair[1]}: {pair[2]:.3f}") for pair in corr_pairs[:5] # Show top 5
], color="info")
])
return analytics_components
def parse_contents(contents, filename):
"""Parse uploaded file contents"""
content_type, content_string = contents.split(',')
decoded = base64.b64decode(content_string)
try:
if 'csv' in filename:
df = pd.read_csv(io.StringIO(decoded.decode('utf-8')))
elif 'xls' in filename:
df = pd.read_excel(io.BytesIO(decoded))
else:
return None, "Unsupported file type"
return df, None
except Exception as e:
return None, f"Error processing file: {str(e)}"
# Dataset management callbacks
@app.callback(
[Output('stored-data', 'data'),
Output('status-msg', 'children'),
Output('data-preview', 'children'),
Output('data-info', 'children'),
Output('auto-analytics', 'children'),
Output('x-column', 'options'),
Output('y-column', 'options'),
Output('color-column', 'options'),
Output('x-column', 'value'),
Output('y-column', 'value'),
Output('dataset-registry', 'data'),
Output('dataset-selector', 'options'),
Output('current-dataset-name', 'data')],
[Input('load-builtin-btn', 'n_clicks'),
Input('file-upload', 'contents'),
Input('dataset-selector', 'value')],
[State('builtin-choice', 'value'),
State('file-upload', 'filename'),
State('custom-name', 'value'),
State('dataset-registry', 'data')]
)
def manage_datasets(builtin_clicks, file_contents, selected_dataset, builtin_choice, filename, custom_name, registry):
"""Handle dataset loading and switching"""
ctx = callback_context
# Initialize defaults
registry = registry or {"Gapminder": "builtin"}
if not ctx.triggered:
# Initial load - load Gapminder dataset
df = builtin_datasets["Gapminder"]
dataset_name = "Gapminder"
# Create vector store for AI
vector_success = create_vector_store(df)
# Create data table preview
table = dbc.Table.from_dataframe(
df.head(10),
striped=True,
bordered=True,
hover=True,
size='sm'
)
ai_status = "🤖 AI Ready" if vector_success else "⚠️ AI Limited"
status_msg = dbc.Alert(f"✅ Gapminder dataset loaded! {ai_status}", color="success")
data_info = dbc.Alert([
html.H6("Dataset Information:"),
html.P(f"Shape: {df.shape[0]} rows × {df.shape[1]} columns"),
html.P(f"Columns: {', '.join(df.columns.tolist())}"),
html.P(f"Data types: {len(df.select_dtypes(include=['number']).columns)} numeric, {len(df.select_dtypes(include=['object']).columns)} categorical")
], color="light")
# Create automatic analytics
auto_analytics = create_auto_analytics(df)
# Create column options for dropdowns
all_columns = [{'label': col, 'value': col} for col in df.columns]
numeric_columns = [{'label': col, 'value': col} for col in df.select_dtypes(include=['number']).columns]
# Set default values - prefer numeric columns for x and y
default_x = numeric_columns[0]['value'] if numeric_columns else all_columns[0]['value'] if all_columns else None
default_y = numeric_columns[1]['value'] if len(numeric_columns) > 1 else (numeric_columns[0]['value'] if numeric_columns else (all_columns[1]['value'] if len(all_columns) > 1 else None))
selector_options = [{"label": name, "value": name} for name in registry.keys()]
return df.to_dict('records'), status_msg, table, data_info, auto_analytics, all_columns, all_columns, all_columns, default_x, default_y, registry, selector_options, dataset_name
trigger_id = ctx.triggered[0]['prop_id'].split('.')[0]
if trigger_id == 'load-builtin-btn' and builtin_clicks:
# Load built-in dataset
if builtin_choice in builtin_datasets:
df = builtin_datasets[builtin_choice]
registry[builtin_choice] = "builtin"
# Create vector store for AI
vector_success = create_vector_store(df)
# Create data table preview
table = dbc.Table.from_dataframe(
df.head(10),
striped=True,
bordered=True,
hover=True,
size='sm'
)
ai_status = "🤖 AI Ready" if vector_success else "⚠️ AI Limited"
status_msg = dbc.Alert(f"✅ {builtin_choice} dataset loaded! {ai_status}", color="success")
data_info = dbc.Alert([
html.H6(f"{builtin_choice} Dataset Information:"),
html.P(f"Shape: {df.shape[0]} rows × {df.shape[1]} columns"),
html.P(f"Columns: {', '.join(df.columns.tolist())}"),
html.P(f"Data types: {len(df.select_dtypes(include=['number']).columns)} numeric, {len(df.select_dtypes(include=['object']).columns)} categorical")
], color="light")
# Create automatic analytics
auto_analytics = create_auto_analytics(df)
# Create column options for dropdowns
all_columns = [{'label': col, 'value': col} for col in df.columns]
numeric_columns = [{'label': col, 'value': col} for col in df.select_dtypes(include=['number']).columns]
# Set default values - prefer numeric columns for x and y
default_x = numeric_columns[0]['value'] if numeric_columns else all_columns[0]['value'] if all_columns else None
default_y = numeric_columns[1]['value'] if len(numeric_columns) > 1 else (numeric_columns[0]['value'] if numeric_columns else (all_columns[1]['value'] if len(all_columns) > 1 else None))
selector_options = [{"label": name, "value": name} for name in registry.keys()]
return df.to_dict('records'), status_msg, table, data_info, auto_analytics, all_columns, all_columns, all_columns, default_x, default_y, registry, selector_options, builtin_choice
elif trigger_id == 'file-upload' and file_contents:
# Upload custom dataset
df, error = parse_contents(file_contents, filename)
if error:
status_msg = dbc.Alert(error, color="danger")
selector_options = [{"label": name, "value": name} for name in registry.keys()]
return None, status_msg, "", "", "", [], [], [], None, None, registry, selector_options, None
# Determine dataset name
dataset_name = custom_name if custom_name else filename.split('.')[0]
registry[dataset_name] = "custom"
# Create vector store for AI
vector_success = create_vector_store(df)
# Create data table preview
table = dbc.Table.from_dataframe(
df.head(10),
striped=True,
bordered=True,
hover=True,
size='sm'
)
ai_status = "🤖 AI Ready" if vector_success else "⚠️ AI Limited"
status_msg = dbc.Alert(f"✅ {dataset_name} uploaded successfully! {ai_status}", color="success")
data_info = dbc.Alert([
html.H6(f"{dataset_name} Dataset Information:"),
html.P(f"Shape: {df.shape[0]} rows × {df.shape[1]} columns"),
html.P(f"Columns: {', '.join(df.columns.tolist())}"),
html.P(f"Data types: {len(df.select_dtypes(include=['number']).columns)} numeric, {len(df.select_dtypes(include=['object']).columns)} categorical")
], color="light")
# Create automatic analytics
auto_analytics = create_auto_analytics(df)
# Create column options for dropdowns
all_columns = [{'label': col, 'value': col} for col in df.columns]
numeric_columns = [{'label': col, 'value': col} for col in df.select_dtypes(include=['number']).columns]
# Set default values - prefer numeric columns for x and y
default_x = numeric_columns[0]['value'] if numeric_columns else all_columns[0]['value'] if all_columns else None
default_y = numeric_columns[1]['value'] if len(numeric_columns) > 1 else (numeric_columns[0]['value'] if numeric_columns else (all_columns[1]['value'] if len(all_columns) > 1 else None))
selector_options = [{"label": name, "value": name} for name in registry.keys()]
return df.to_dict('records'), status_msg, table, data_info, auto_analytics, all_columns, all_columns, all_columns, default_x, default_y, registry, selector_options, dataset_name
elif trigger_id == 'dataset-selector' and selected_dataset:
# Switch between datasets
if selected_dataset in registry:
if registry[selected_dataset] == "builtin" and selected_dataset in builtin_datasets:
df = builtin_datasets[selected_dataset]
else:
# For custom datasets, we would need to store them persistently
# For now, just reload builtin if available
if selected_dataset in builtin_datasets:
df = builtin_datasets[selected_dataset]
else:
# Fallback to Gapminder if dataset not found
df = builtin_datasets["Gapminder"]
selected_dataset = "Gapminder"
# Create vector store for AI
vector_success = create_vector_store(df)
# Create data table preview
table = dbc.Table.from_dataframe(
df.head(10),
striped=True,
bordered=True,
hover=True,
size='sm'
)
ai_status = "🤖 AI Ready" if vector_success else "⚠️ AI Limited"
status_msg = dbc.Alert(f"✅ Switched to {selected_dataset} dataset! {ai_status}", color="success")
data_info = dbc.Alert([
html.H6(f"{selected_dataset} Dataset Information:"),
html.P(f"Shape: {df.shape[0]} rows × {df.shape[1]} columns"),
html.P(f"Columns: {', '.join(df.columns.tolist())}"),
html.P(f"Data types: {len(df.select_dtypes(include=['number']).columns)} numeric, {len(df.select_dtypes(include=['object']).columns)} categorical")
], color="light")
# Create automatic analytics
auto_analytics = create_auto_analytics(df)
# Create column options for dropdowns
all_columns = [{'label': col, 'value': col} for col in df.columns]
numeric_columns = [{'label': col, 'value': col} for col in df.select_dtypes(include=['number']).columns]
# Set default values - prefer numeric columns for x and y
default_x = numeric_columns[0]['value'] if numeric_columns else all_columns[0]['value'] if all_columns else None
default_y = numeric_columns[1]['value'] if len(numeric_columns) > 1 else (numeric_columns[0]['value'] if numeric_columns else (all_columns[1]['value'] if len(all_columns) > 1 else None))
selector_options = [{"label": name, "value": name} for name in registry.keys()]
return df.to_dict('records'), status_msg, table, data_info, auto_analytics, all_columns, all_columns, all_columns, default_x, default_y, registry, selector_options, selected_dataset
# Default fallback
selector_options = [{"label": name, "value": name} for name in registry.keys()]
return None, "", "", "", "", [], [], [], None, None, registry, selector_options, None
# Updated callback for data table (now shared across tabs)
@app.callback(
Output('data-table', 'children'),
[Input('stored-data', 'data')]
)
def update_data_table(data):
"""Update data table for data explorer tab"""
if not data:
return html.P("No data loaded", className="text-muted")
df = pd.DataFrame(data)
return dbc.Table.from_dataframe(
df.head(20),
striped=True,
bordered=True,
hover=True,
size='sm',
responsive=True
)
# Callback to update AI assistant tab with current dataset info
@app.callback(
Output('ai-dataset-info', 'children'),
[Input('stored-data', 'data'),
Input('current-dataset-name', 'data')]
)
def update_ai_dataset_info(data, dataset_name):
"""Update AI assistant tab with current dataset information"""
if not data or not dataset_name:
return dbc.Alert("No dataset loaded. Please load a dataset in the Dataset Management tab first.",
color="warning", className="mb-3")
df = pd.DataFrame(data)
return dbc.Alert([
html.H6(f"📊 Current Dataset: {dataset_name}"),
html.P(f"Shape: {df.shape[0]:,} rows × {df.shape[1]} columns"),
html.P(f"Columns: {', '.join(df.columns.tolist()[:5])}{'...' if len(df.columns) > 5 else ''}"),
html.P(f"Data types: {len(df.select_dtypes(include=['number']).columns)} numeric, {len(df.select_dtypes(include=['object']).columns)} categorical"),
html.Small("✨ AI is ready to answer questions about this data!", className="text-muted")
], color="success", className="mb-3")
@app.callback(
Output('ai-response', 'children'),
[Input('ask-button', 'n_clicks')],
[State('ai-question', 'value'),
State('stored-data', 'data'),
State('current-dataset-name', 'data')]
)
def handle_ai_question(n_clicks, question, data, dataset_name):
"""Handle AI question"""
if not n_clicks or not question or not data:
return ""
if not dataset_name:
return dbc.Alert("Please load a dataset first in the Dataset Management tab.", color="warning")
df = pd.DataFrame(data)
response = get_ai_response(question, df)
return dbc.Alert(
dcc.Markdown(response),
color="info"
)
@app.callback(
Output('main-graph', 'figure'),
[Input('stored-data', 'data'),
Input('chart-type', 'value'),
Input('x-column', 'value'),
Input('y-column', 'value'),
Input('color-column', 'value')]
)
def update_main_graph(data, chart_type, x_col, y_col, color_col):
"""Update main visualization based on user selections"""
if not data:
fig = go.Figure()
fig.add_annotation(text="Upload data to see visualizations",
x=0.5, y=0.5, showarrow=False,
font=dict(size=16, color="gray"))
fig.update_layout(template="plotly_white")
return fig
df = pd.DataFrame(data)
# Handle cases where columns aren't selected yet
if not x_col and not y_col:
fig = go.Figure()
fig.add_annotation(text="Select columns to create visualization",
x=0.5, y=0.5, showarrow=False,
font=dict(size=16, color="gray"))
fig.update_layout(template="plotly_white")
return fig
try:
# Create visualization based on chart type
if chart_type == 'scatter':
if x_col and y_col:
fig = px.scatter(df, x=x_col, y=y_col, color=color_col,
title=f"Scatter Plot: {y_col} vs {x_col}")
else:
fig = go.Figure()
fig.add_annotation(text="Select both X and Y columns for scatter plot",
x=0.5, y=0.5, showarrow=False)
elif chart_type == 'line':
if x_col and y_col:
fig = px.line(df, x=x_col, y=y_col, color=color_col,
title=f"Line Chart: {y_col} vs {x_col}")
else:
fig = go.Figure()
fig.add_annotation(text="Select both X and Y columns for line chart",
x=0.5, y=0.5, showarrow=False)
elif chart_type == 'bar':
if x_col and y_col:
fig = px.bar(df, x=x_col, y=y_col, color=color_col,
title=f"Bar Chart: {y_col} by {x_col}")
elif x_col:
fig = px.bar(df[x_col].value_counts().reset_index(),
x='index', y=x_col,
title=f"Value Counts: {x_col}")
else:
fig = go.Figure()
fig.add_annotation(text="Select at least X column for bar chart",
x=0.5, y=0.5, showarrow=False)
elif chart_type == 'histogram':
if x_col:
fig = px.histogram(df, x=x_col, color=color_col,
title=f"Histogram: {x_col}")
else:
fig = go.Figure()
fig.add_annotation(text="Select X column for histogram",
x=0.5, y=0.5, showarrow=False)
elif chart_type == 'box':
if y_col:
fig = px.box(df, x=color_col, y=y_col,
title=f"Box Plot: {y_col}" + (f" by {color_col}" if color_col else ""))
elif x_col:
fig = px.box(df, y=x_col,
title=f"Box Plot: {x_col}")
else:
fig = go.Figure()
fig.add_annotation(text="Select a column for box plot",
x=0.5, y=0.5, showarrow=False)
elif chart_type == 'heatmap':
numeric_cols = df.select_dtypes(include=['number']).columns
if len(numeric_cols) > 1:
corr_matrix = df[numeric_cols].corr()
fig = px.imshow(corr_matrix,
text_auto=True,
aspect="auto",
title="Correlation Heatmap",
color_continuous_scale='RdBu_r')
else:
fig = go.Figure()
fig.add_annotation(text="Need at least 2 numeric columns for heatmap",
x=0.5, y=0.5, showarrow=False)
elif chart_type == 'pie':
if x_col:
value_counts = df[x_col].value_counts()
fig = px.pie(values=value_counts.values,
names=value_counts.index,
title=f"Pie Chart: {x_col}")
else:
fig = go.Figure()
fig.add_annotation(text="Select X column for pie chart",
x=0.5, y=0.5, showarrow=False)
else:
fig = go.Figure()
fig.add_annotation(text="Select a chart type",
x=0.5, y=0.5, showarrow=False)
fig.update_layout(template="plotly_white", height=500)
return fig
except Exception as e:
fig = go.Figure()
fig.add_annotation(text=f"Error creating chart: {str(e)}",
x=0.5, y=0.5, showarrow=False,
font=dict(color="red"))
fig.update_layout(template="plotly_white")
return fig
if __name__ == '__main__':
app.run(host='0.0.0.0', port=7860, debug=False)