Spaces:

mic3333
/

dash-mcp

Sleeping

App Files Files Community

dash-mcp / app1.py

mic3333

update new version of app

20706fe 3 months ago

raw

history blame contribute delete

36.9 kB

	import os
	import base64
	import io
	import pandas as pd
	import plotly.express as px
	import plotly.graph_objects as go
	from dash import Dash, html, dcc, Input, Output, State, callback_context
	import dash_bootstrap_components as dbc
	from typing import Optional
	from dotenv import load_dotenv
	from pydantic import Field, SecretStr
	import numpy as np

	# Langchain imports - simplified without embeddings
	from langchain_community.vectorstores import FAISS
	from langchain_text_splitters import RecursiveCharacterTextSplitter
	from langchain.schema import Document
	from langchain_core.prompts import PromptTemplate

	# Load environment variables
	load_dotenv()

	# Simplified - no OpenRouter for now
	AI_AVAILABLE = False
	openrouter_model = None

	# Initialize Dash app
	app = Dash(__name__, external_stylesheets=[dbc.themes.BOOTSTRAP])
	server = app.server

	# Global variables
	vector_store = None

	# Built-in datasets
	def create_builtin_datasets():
	"""Create built-in sample datasets"""
	datasets = {}

	# Gapminder dataset
	np.random.seed(42)
	countries = ['USA', 'China', 'India', 'Germany', 'UK', 'France', 'Japan', 'Brazil', 'Canada', 'Australia']
	years = list(range(2000, 2021))
	gapminder_data = []
	for country in countries:
	base_gdp = np.random.uniform(20000, 80000)
	base_life_exp = np.random.uniform(70, 85)
	base_pop = np.random.uniform(10000000, 100000000)
	for year in years:
	gapminder_data.append({
	'country': country,
	'year': year,
	'gdpPercap': base_gdp * (1 + np.random.uniform(-0.1, 0.15)) * ((year-2000)*0.02 + 1),
	'lifeExp': base_life_exp + np.random.uniform(-2, 3) + (year-2000)*0.1,
	'pop': base_pop * (1.01 + np.random.uniform(-0.005, 0.015))**(year-2000),
	'continent': 'Asia' if country in ['China', 'India', 'Japan'] else 'Europe' if country in ['Germany', 'UK', 'France'] else 'Americas' if country in ['USA', 'Brazil', 'Canada'] else 'Oceania'
	})
	datasets['Gapminder'] = pd.DataFrame(gapminder_data)

	# Iris dataset
	from sklearn.datasets import load_iris
	try:
	iris = load_iris()
	datasets['Iris'] = pd.DataFrame(iris.data, columns=iris.feature_names)
	datasets['Iris']['species'] = [iris.target_names[i] for i in iris.target]
	except ImportError:
	# Fallback if sklearn not available
	iris_data = {
	'sepal_length': np.random.normal(5.8, 0.8, 150),
	'sepal_width': np.random.normal(3.0, 0.4, 150),
	'petal_length': np.random.normal(3.8, 1.8, 150),
	'petal_width': np.random.normal(1.2, 0.8, 150),
	'species': ['setosa']50 + ['versicolor']50 + ['virginica']*50
	}
	datasets['Iris'] = pd.DataFrame(iris_data)

	# Tips dataset
	tips_data = {
	'total_bill': np.random.uniform(10, 50, 200),
	'tip': np.random.uniform(1, 10, 200),
	'sex': np.random.choice(['Male', 'Female'], 200),
	'smoker': np.random.choice(['Yes', 'No'], 200),
	'day': np.random.choice(['Thur', 'Fri', 'Sat', 'Sun'], 200),
	'time': np.random.choice(['Lunch', 'Dinner'], 200),
	'size': np.random.choice([1, 2, 3, 4, 5, 6], 200)
	}
	datasets['Tips'] = pd.DataFrame(tips_data)

	# Stock Data
	dates = pd.date_range('2020-01-01', '2023-12-31', freq='D')
	stock_price = 100
	stock_data = []
	for date in dates:
	daily_return = np.random.normal(0.001, 0.02)
	stock_price *= (1 + daily_return)
	stock_data.append({
	'date': date,
	'price': stock_price,
	'volume': np.random.randint(1000000, 5000000),
	'high': stock_price * (1 + abs(np.random.normal(0, 0.01))),
	'low': stock_price * (1 - abs(np.random.normal(0, 0.01))),
	'open': stock_price * (1 + np.random.normal(0, 0.005))
	})
	datasets['Stock Data'] = pd.DataFrame(stock_data)

	# Wind Data
	hours = list(range(24))
	wind_data = []
	for month in range(1, 13):
	for day in range(1, 29):
	for hour in hours:
	wind_data.append({
	'month': month,
	'day': day,
	'hour': hour,
	'wind_speed': abs(np.random.normal(15, 8)) + 5np.sin(hour/242*np.pi),
	'temperature': np.random.normal(20, 15) + 10np.cos(month/122*np.pi),
	'humidity': np.random.uniform(30, 90),
	'pressure': np.random.normal(1013, 20)
	})
	datasets['Wind Data'] = pd.DataFrame(wind_data)

	return datasets

	# Initialize built-in datasets
	builtin_datasets = create_builtin_datasets()

	# App layout
	app.layout = dbc.Container([
	dbc.Row([
	dbc.Col([
	html.H1("🤖 AI-Powered Data Analytics", className="text-center mb-4"),
	html.P("Upload data, ask questions, and get AI-powered insights!",
	className="text-center text-muted"),
	html.Hr(),
	], width=12)
	]),

	# Tabbed interface
	dbc.Tabs([
	# Tab 1: Dataset Management
	dbc.Tab(label="📁 Dataset Management", tab_id="dataset-management", children=[
	dbc.Row([
	dbc.Col([
	dbc.Card([
	dbc.CardBody([
	html.H4("Load Built-in Dataset", className="card-title"),
	dcc.Dropdown(
	id="builtin-choice",
	options=[
	{"label": "Gapminder", "value": "Gapminder"},
	{"label": "Iris", "value": "Iris"},
	{"label": "Tips", "value": "Tips"},
	{"label": "Stock Data", "value": "Stock Data"},
	{"label": "Wind Data", "value": "Wind Data"}
	],
	value="Gapminder",
	className="mb-2"
	),
	dbc.Button("Load Dataset", id="load-builtin-btn", color="primary", className="mb-3"),

	html.Hr(),
	html.H4("Upload Custom Dataset", className="card-title"),
	dcc.Upload(
	id='file-upload',
	children=html.Div([
	'Drag and Drop or ',
	html.A('Select CSV/Excel Files')
	]),
	style={
	'width': '100%',
	'height': '60px',
	'lineHeight': '60px',
	'borderWidth': '1px',
	'borderStyle': 'dashed',
	'borderRadius': '5px',
	'textAlign': 'center',
	'margin': '10px'
	},
	multiple=False,
	accept='.csv,.xlsx,.xls'
	),

	dbc.Input(
	id="custom-name",
	placeholder="Dataset Name (optional)",
	type="text",
	className="mb-2"
	),
	dbc.Button("Upload", id="upload-btn", color="primary", className="mb-3"),

	html.Hr(),
	html.H4("Active Datasets", className="card-title"),
	dcc.Dropdown(
	id="dataset-selector",
	options=[{"label": "Gapminder", "value": "Gapminder"}],
	value="Gapminder",
	className="mb-2"
	),

	html.Hr(),
	html.Div(id="status-msg", children=[
	dbc.Alert("Ready to load data", color="info")
	]),
	html.Div(id="data-info")
	])
	])
	], width=4),

	dbc.Col([
	dbc.Card([
	dbc.CardBody([
	html.H4("Data Preview (First 10 rows)", className="card-title"),
	html.Div(id="data-preview", className="mb-4"),
	html.H4("Quick Analytics", className="card-title"),
	html.Div(id="auto-analytics")
	])
	])
	], width=8)
	], className="mt-4")
	]),

	# Tab 2: AI Assistant
	dbc.Tab(label="🤖 AI Assistant", tab_id="ai-assistant", children=[
	dbc.Row([
	dbc.Col([
	dbc.Card([
	dbc.CardBody([
	html.H4("🤖 AI Assistant", className="card-title"),
	html.Div(id="ai-dataset-info", className="mb-3", children=[
	dbc.Alert("No dataset loaded. Please load a dataset in the Dataset Management tab first.",
	color="warning", className="mb-3")
	]),
	dbc.InputGroup([
	dbc.Input(
	id="ai-question",
	placeholder="Ask questions about your data...",
	type="text",
	style={"fontSize": "14px"}
	),
	dbc.Button(
	"Ask AI",
	id="ask-button",
	color="primary",
	n_clicks=0
	)
	]),

	html.Div(id="ai-response", className="mt-3")
	])
	])
	], width=12)
	], className="mt-4")
	]),

	# Tab 3: Visualizations
	dbc.Tab(label="📈 Visualizations", tab_id="visualizations", children=[
	dbc.Row([
	dbc.Col([
	dbc.Card([
	dbc.CardBody([
	html.H4("📈 Visualizations", className="card-title"),

	# Chart controls
	dbc.Row([
	dbc.Col([
	html.Label("Chart Type:", className="form-label"),
	dcc.Dropdown(
	id='chart-type',
	options=[
	{'label': 'Scatter Plot', 'value': 'scatter'},
	{'label': 'Line Chart', 'value': 'line'},
	{'label': 'Bar Chart', 'value': 'bar'},
	{'label': 'Histogram', 'value': 'histogram'},
	{'label': 'Box Plot', 'value': 'box'},
	{'label': 'Heatmap', 'value': 'heatmap'},
	{'label': 'Pie Chart', 'value': 'pie'}
	],
	value='scatter',
	className="mb-2"
	)
	], width=6),
	dbc.Col([
	html.Label("Color By:", className="form-label"),
	dcc.Dropdown(
	id='color-column',
	placeholder="Select column (optional)",
	className="mb-2"
	)
	], width=6)
	]),

	dbc.Row([
	dbc.Col([
	html.Label("X-Axis:", className="form-label"),
	dcc.Dropdown(
	id='x-column',
	placeholder="Select X column"
	)
	], width=6),
	dbc.Col([
	html.Label("Y-Axis:", className="form-label"),
	dcc.Dropdown(
	id='y-column',
	placeholder="Select Y column"
	)
	], width=6)
	], className="mb-3"),

	dcc.Graph(id='main-graph', style={'height': '500px'}),
	])
	])
	], width=12)
	], className="mt-4")
	]),

	# Tab 4: Data Explorer
	dbc.Tab(label="🔍 Data Explorer", tab_id="data-explorer", children=[
	dbc.Row([
	dbc.Col([
	dbc.Card([
	dbc.CardBody([
	html.H4("🔍 Data Explorer", className="card-title"),
	html.Div(id='data-table')
	])
	])
	], width=12)
	], className="mt-4")
	])
	], id="main-tabs", active_tab="dataset-management"),

	# Store components
	dcc.Store(id='stored-data'),
	dcc.Store(id='data-context'),
	dcc.Store(id='dataset-registry', data={"Gapminder": "builtin"}),
	dcc.Store(id='current-dataset-name', data="Gapminder")
	], fluid=True)

	def create_vector_store(df):
	"""Simplified - just return True for now"""
	return True

	# Import AI assistant module
	from ai_assistant import get_ai_response

	def create_auto_analytics(df):
	"""Create automatic analytics display"""
	analytics_components = []

	# Summary Statistics
	numeric_cols = df.select_dtypes(include=['number']).columns
	if len(numeric_cols) > 0:
	stats = df[numeric_cols].describe()
	analytics_components.extend([
	html.H6("📊 Summary Statistics", className="mt-2"),
	dbc.Table.from_dataframe(
	stats.reset_index().round(2),
	size='sm',
	striped=True,
	hover=True
	)
	])

	# Missing Data Analysis
	missing_data = df.isnull().sum()
	missing_data = missing_data[missing_data > 0]
	if not missing_data.empty:
	analytics_components.extend([
	html.H6("⚠️ Missing Data", className="mt-3"),
	dbc.Alert([
	html.Pre(missing_data.to_string())
	], color="warning")
	])
	else:
	analytics_components.extend([
	html.H6("✅ Data Quality", className="mt-3"),
	dbc.Alert("No missing values found!", color="success")
	])

	# Data Types Analysis
	dtype_info = df.dtypes.value_counts()
	analytics_components.extend([
	html.H6("🔍 Data Types", className="mt-3"),
	dbc.Alert([
	html.P(f"📈 Numeric columns: {len(df.select_dtypes(include=['number']).columns)}"),
	html.P(f"📝 Text columns: {len(df.select_dtypes(include=['object']).columns)}"),
	html.P(f"📅 DateTime columns: {len(df.select_dtypes(include=['datetime64']).columns)}"),
	html.P(f"🔢 Boolean columns: {len(df.select_dtypes(include=['bool']).columns)}")
	], color="light")
	])

	# Correlation Analysis for numeric columns
	if len(numeric_cols) > 1:
	corr_matrix = df[numeric_cols].corr()
	# Find highest correlations
	corr_pairs = []
	for i in range(len(corr_matrix.columns)):
	for j in range(i+1, len(corr_matrix.columns)):
	corr_val = corr_matrix.iloc[i, j]
	if abs(corr_val) > 0.5: # Only show strong correlations
	corr_pairs.append((corr_matrix.columns[i], corr_matrix.columns[j], corr_val))

	if corr_pairs:
	analytics_components.extend([
	html.H6("🔗 Strong Correlations (>0.5)", className="mt-3"),
	dbc.Alert([
	html.P(f"{pair[0]} ↔ {pair[1]}: {pair[2]:.3f}") for pair in corr_pairs[:5] # Show top 5
	], color="info")
	])

	return analytics_components

	def parse_contents(contents, filename):
	"""Parse uploaded file contents"""
	content_type, content_string = contents.split(',')
	decoded = base64.b64decode(content_string)

	try:
	if 'csv' in filename:
	df = pd.read_csv(io.StringIO(decoded.decode('utf-8')))
	elif 'xls' in filename:
	df = pd.read_excel(io.BytesIO(decoded))
	else:
	return None, "Unsupported file type"

	return df, None
	except Exception as e:
	return None, f"Error processing file: {str(e)}"

	# Dataset management callbacks
	@app.callback(
	[Output('stored-data', 'data'),
	Output('status-msg', 'children'),
	Output('data-preview', 'children'),
	Output('data-info', 'children'),
	Output('auto-analytics', 'children'),
	Output('x-column', 'options'),
	Output('y-column', 'options'),
	Output('color-column', 'options'),
	Output('x-column', 'value'),
	Output('y-column', 'value'),
	Output('dataset-registry', 'data'),
	Output('dataset-selector', 'options'),
	Output('current-dataset-name', 'data')],
	[Input('load-builtin-btn', 'n_clicks'),
	Input('file-upload', 'contents'),
	Input('dataset-selector', 'value')],
	[State('builtin-choice', 'value'),
	State('file-upload', 'filename'),
	State('custom-name', 'value'),
	State('dataset-registry', 'data')]
	)
	def manage_datasets(builtin_clicks, file_contents, selected_dataset, builtin_choice, filename, custom_name, registry):
	"""Handle dataset loading and switching"""
	ctx = callback_context

	# Initialize defaults
	registry = registry or {"Gapminder": "builtin"}

	if not ctx.triggered:
	# Initial load - load Gapminder dataset
	df = builtin_datasets["Gapminder"]
	dataset_name = "Gapminder"

	# Create vector store for AI
	vector_success = create_vector_store(df)

	# Create data table preview
	table = dbc.Table.from_dataframe(
	df.head(10),
	striped=True,
	bordered=True,
	hover=True,
	size='sm'
	)

	ai_status = "🤖 AI Ready" if vector_success else "⚠️ AI Limited"
	status_msg = dbc.Alert(f"✅ Gapminder dataset loaded! {ai_status}", color="success")

	data_info = dbc.Alert([
	html.H6("Dataset Information:"),
	html.P(f"Shape: {df.shape[0]} rows × {df.shape[1]} columns"),
	html.P(f"Columns: {', '.join(df.columns.tolist())}"),
	html.P(f"Data types: {len(df.select_dtypes(include=['number']).columns)} numeric, {len(df.select_dtypes(include=['object']).columns)} categorical")
	], color="light")

	# Create automatic analytics
	auto_analytics = create_auto_analytics(df)

	# Create column options for dropdowns
	all_columns = [{'label': col, 'value': col} for col in df.columns]
	numeric_columns = [{'label': col, 'value': col} for col in df.select_dtypes(include=['number']).columns]

	# Set default values - prefer numeric columns for x and y
	default_x = numeric_columns[0]['value'] if numeric_columns else all_columns[0]['value'] if all_columns else None
	default_y = numeric_columns[1]['value'] if len(numeric_columns) > 1 else (numeric_columns[0]['value'] if numeric_columns else (all_columns[1]['value'] if len(all_columns) > 1 else None))

	selector_options = [{"label": name, "value": name} for name in registry.keys()]

	return df.to_dict('records'), status_msg, table, data_info, auto_analytics, all_columns, all_columns, all_columns, default_x, default_y, registry, selector_options, dataset_name

	trigger_id = ctx.triggered[0]['prop_id'].split('.')[0]

	if trigger_id == 'load-builtin-btn' and builtin_clicks:
	# Load built-in dataset
	if builtin_choice in builtin_datasets:
	df = builtin_datasets[builtin_choice]
	registry[builtin_choice] = "builtin"

	# Create vector store for AI
	vector_success = create_vector_store(df)

	# Create data table preview
	table = dbc.Table.from_dataframe(
	df.head(10),
	striped=True,
	bordered=True,
	hover=True,
	size='sm'
	)

	ai_status = "🤖 AI Ready" if vector_success else "⚠️ AI Limited"
	status_msg = dbc.Alert(f"✅ {builtin_choice} dataset loaded! {ai_status}", color="success")

	data_info = dbc.Alert([
	html.H6(f"{builtin_choice} Dataset Information:"),
	html.P(f"Shape: {df.shape[0]} rows × {df.shape[1]} columns"),
	html.P(f"Columns: {', '.join(df.columns.tolist())}"),
	html.P(f"Data types: {len(df.select_dtypes(include=['number']).columns)} numeric, {len(df.select_dtypes(include=['object']).columns)} categorical")
	], color="light")

	# Create automatic analytics
	auto_analytics = create_auto_analytics(df)

	# Create column options for dropdowns
	all_columns = [{'label': col, 'value': col} for col in df.columns]
	numeric_columns = [{'label': col, 'value': col} for col in df.select_dtypes(include=['number']).columns]

	# Set default values - prefer numeric columns for x and y
	default_x = numeric_columns[0]['value'] if numeric_columns else all_columns[0]['value'] if all_columns else None
	default_y = numeric_columns[1]['value'] if len(numeric_columns) > 1 else (numeric_columns[0]['value'] if numeric_columns else (all_columns[1]['value'] if len(all_columns) > 1 else None))

	selector_options = [{"label": name, "value": name} for name in registry.keys()]

	return df.to_dict('records'), status_msg, table, data_info, auto_analytics, all_columns, all_columns, all_columns, default_x, default_y, registry, selector_options, builtin_choice

	elif trigger_id == 'file-upload' and file_contents:
	# Upload custom dataset
	df, error = parse_contents(file_contents, filename)

	if error:
	status_msg = dbc.Alert(error, color="danger")
	selector_options = [{"label": name, "value": name} for name in registry.keys()]
	return None, status_msg, "", "", "", [], [], [], None, None, registry, selector_options, None

	# Determine dataset name
	dataset_name = custom_name if custom_name else filename.split('.')[0]
	registry[dataset_name] = "custom"

	# Create vector store for AI
	vector_success = create_vector_store(df)

	# Create data table preview
	table = dbc.Table.from_dataframe(
	df.head(10),
	striped=True,
	bordered=True,
	hover=True,
	size='sm'
	)

	ai_status = "🤖 AI Ready" if vector_success else "⚠️ AI Limited"
	status_msg = dbc.Alert(f"✅ {dataset_name} uploaded successfully! {ai_status}", color="success")

	data_info = dbc.Alert([
	html.H6(f"{dataset_name} Dataset Information:"),
	html.P(f"Shape: {df.shape[0]} rows × {df.shape[1]} columns"),
	html.P(f"Columns: {', '.join(df.columns.tolist())}"),
	html.P(f"Data types: {len(df.select_dtypes(include=['number']).columns)} numeric, {len(df.select_dtypes(include=['object']).columns)} categorical")
	], color="light")

	# Create automatic analytics
	auto_analytics = create_auto_analytics(df)

	# Create column options for dropdowns
	all_columns = [{'label': col, 'value': col} for col in df.columns]
	numeric_columns = [{'label': col, 'value': col} for col in df.select_dtypes(include=['number']).columns]

	# Set default values - prefer numeric columns for x and y
	default_x = numeric_columns[0]['value'] if numeric_columns else all_columns[0]['value'] if all_columns else None
	default_y = numeric_columns[1]['value'] if len(numeric_columns) > 1 else (numeric_columns[0]['value'] if numeric_columns else (all_columns[1]['value'] if len(all_columns) > 1 else None))

	selector_options = [{"label": name, "value": name} for name in registry.keys()]

	return df.to_dict('records'), status_msg, table, data_info, auto_analytics, all_columns, all_columns, all_columns, default_x, default_y, registry, selector_options, dataset_name

	elif trigger_id == 'dataset-selector' and selected_dataset:
	# Switch between datasets
	if selected_dataset in registry:
	if registry[selected_dataset] == "builtin" and selected_dataset in builtin_datasets:
	df = builtin_datasets[selected_dataset]
	else:
	# For custom datasets, we would need to store them persistently
	# For now, just reload builtin if available
	if selected_dataset in builtin_datasets:
	df = builtin_datasets[selected_dataset]
	else:
	# Fallback to Gapminder if dataset not found
	df = builtin_datasets["Gapminder"]
	selected_dataset = "Gapminder"

	# Create vector store for AI
	vector_success = create_vector_store(df)

	# Create data table preview
	table = dbc.Table.from_dataframe(
	df.head(10),
	striped=True,
	bordered=True,
	hover=True,
	size='sm'
	)

	ai_status = "🤖 AI Ready" if vector_success else "⚠️ AI Limited"
	status_msg = dbc.Alert(f"✅ Switched to {selected_dataset} dataset! {ai_status}", color="success")

	data_info = dbc.Alert([
	html.H6(f"{selected_dataset} Dataset Information:"),
	html.P(f"Shape: {df.shape[0]} rows × {df.shape[1]} columns"),
	html.P(f"Columns: {', '.join(df.columns.tolist())}"),
	html.P(f"Data types: {len(df.select_dtypes(include=['number']).columns)} numeric, {len(df.select_dtypes(include=['object']).columns)} categorical")
	], color="light")

	# Create automatic analytics
	auto_analytics = create_auto_analytics(df)

	# Create column options for dropdowns
	all_columns = [{'label': col, 'value': col} for col in df.columns]
	numeric_columns = [{'label': col, 'value': col} for col in df.select_dtypes(include=['number']).columns]

	# Set default values - prefer numeric columns for x and y
	default_x = numeric_columns[0]['value'] if numeric_columns else all_columns[0]['value'] if all_columns else None
	default_y = numeric_columns[1]['value'] if len(numeric_columns) > 1 else (numeric_columns[0]['value'] if numeric_columns else (all_columns[1]['value'] if len(all_columns) > 1 else None))

	selector_options = [{"label": name, "value": name} for name in registry.keys()]

	return df.to_dict('records'), status_msg, table, data_info, auto_analytics, all_columns, all_columns, all_columns, default_x, default_y, registry, selector_options, selected_dataset

	# Default fallback
	selector_options = [{"label": name, "value": name} for name in registry.keys()]
	return None, "", "", "", "", [], [], [], None, None, registry, selector_options, None

	# Updated callback for data table (now shared across tabs)
	@app.callback(
	Output('data-table', 'children'),
	[Input('stored-data', 'data')]
	)
	def update_data_table(data):
	"""Update data table for data explorer tab"""
	if not data:
	return html.P("No data loaded", className="text-muted")

	df = pd.DataFrame(data)
	return dbc.Table.from_dataframe(
	df.head(20),
	striped=True,
	bordered=True,
	hover=True,
	size='sm',
	responsive=True
	)

	# Callback to update AI assistant tab with current dataset info
	@app.callback(
	Output('ai-dataset-info', 'children'),
	[Input('stored-data', 'data'),
	Input('current-dataset-name', 'data')]
	)
	def update_ai_dataset_info(data, dataset_name):
	"""Update AI assistant tab with current dataset information"""
	if not data or not dataset_name:
	return dbc.Alert("No dataset loaded. Please load a dataset in the Dataset Management tab first.",
	color="warning", className="mb-3")

	df = pd.DataFrame(data)
	return dbc.Alert([
	html.H6(f"📊 Current Dataset: {dataset_name}"),
	html.P(f"Shape: {df.shape[0]:,} rows × {df.shape[1]} columns"),
	html.P(f"Columns: {', '.join(df.columns.tolist()[:5])}{'...' if len(df.columns) > 5 else ''}"),
	html.P(f"Data types: {len(df.select_dtypes(include=['number']).columns)} numeric, {len(df.select_dtypes(include=['object']).columns)} categorical"),
	html.Small("✨ AI is ready to answer questions about this data!", className="text-muted")
	], color="success", className="mb-3")

	@app.callback(
	Output('ai-response', 'children'),
	[Input('ask-button', 'n_clicks')],
	[State('ai-question', 'value'),
	State('stored-data', 'data'),
	State('current-dataset-name', 'data')]
	)
	def handle_ai_question(n_clicks, question, data, dataset_name):
	"""Handle AI question"""
	if not n_clicks or not question or not data:
	return ""

	if not dataset_name:
	return dbc.Alert("Please load a dataset first in the Dataset Management tab.", color="warning")

	df = pd.DataFrame(data)
	response = get_ai_response(question, df)

	return dbc.Alert(
	dcc.Markdown(response),
	color="info"
	)


	@app.callback(
	Output('main-graph', 'figure'),
	[Input('stored-data', 'data'),
	Input('chart-type', 'value'),
	Input('x-column', 'value'),
	Input('y-column', 'value'),
	Input('color-column', 'value')]
	)
	def update_main_graph(data, chart_type, x_col, y_col, color_col):
	"""Update main visualization based on user selections"""
	if not data:
	fig = go.Figure()
	fig.add_annotation(text="Upload data to see visualizations",
	x=0.5, y=0.5, showarrow=False,
	font=dict(size=16, color="gray"))
	fig.update_layout(template="plotly_white")
	return fig

	df = pd.DataFrame(data)

	# Handle cases where columns aren't selected yet
	if not x_col and not y_col:
	fig = go.Figure()
	fig.add_annotation(text="Select columns to create visualization",
	x=0.5, y=0.5, showarrow=False,
	font=dict(size=16, color="gray"))
	fig.update_layout(template="plotly_white")
	return fig

	try:
	# Create visualization based on chart type
	if chart_type == 'scatter':
	if x_col and y_col:
	fig = px.scatter(df, x=x_col, y=y_col, color=color_col,
	title=f"Scatter Plot: {y_col} vs {x_col}")
	else:
	fig = go.Figure()
	fig.add_annotation(text="Select both X and Y columns for scatter plot",
	x=0.5, y=0.5, showarrow=False)

	elif chart_type == 'line':
	if x_col and y_col:
	fig = px.line(df, x=x_col, y=y_col, color=color_col,
	title=f"Line Chart: {y_col} vs {x_col}")
	else:
	fig = go.Figure()
	fig.add_annotation(text="Select both X and Y columns for line chart",
	x=0.5, y=0.5, showarrow=False)

	elif chart_type == 'bar':
	if x_col and y_col:
	fig = px.bar(df, x=x_col, y=y_col, color=color_col,
	title=f"Bar Chart: {y_col} by {x_col}")
	elif x_col:
	fig = px.bar(df[x_col].value_counts().reset_index(),
	x='index', y=x_col,
	title=f"Value Counts: {x_col}")
	else:
	fig = go.Figure()
	fig.add_annotation(text="Select at least X column for bar chart",
	x=0.5, y=0.5, showarrow=False)

	elif chart_type == 'histogram':
	if x_col:
	fig = px.histogram(df, x=x_col, color=color_col,
	title=f"Histogram: {x_col}")
	else:
	fig = go.Figure()
	fig.add_annotation(text="Select X column for histogram",
	x=0.5, y=0.5, showarrow=False)

	elif chart_type == 'box':
	if y_col:
	fig = px.box(df, x=color_col, y=y_col,
	title=f"Box Plot: {y_col}" + (f" by {color_col}" if color_col else ""))
	elif x_col:
	fig = px.box(df, y=x_col,
	title=f"Box Plot: {x_col}")
	else:
	fig = go.Figure()
	fig.add_annotation(text="Select a column for box plot",
	x=0.5, y=0.5, showarrow=False)

	elif chart_type == 'heatmap':
	numeric_cols = df.select_dtypes(include=['number']).columns
	if len(numeric_cols) > 1:
	corr_matrix = df[numeric_cols].corr()
	fig = px.imshow(corr_matrix,
	text_auto=True,
	aspect="auto",
	title="Correlation Heatmap",
	color_continuous_scale='RdBu_r')
	else:
	fig = go.Figure()
	fig.add_annotation(text="Need at least 2 numeric columns for heatmap",
	x=0.5, y=0.5, showarrow=False)

	elif chart_type == 'pie':
	if x_col:
	value_counts = df[x_col].value_counts()
	fig = px.pie(values=value_counts.values,
	names=value_counts.index,
	title=f"Pie Chart: {x_col}")
	else:
	fig = go.Figure()
	fig.add_annotation(text="Select X column for pie chart",
	x=0.5, y=0.5, showarrow=False)

	else:
	fig = go.Figure()
	fig.add_annotation(text="Select a chart type",
	x=0.5, y=0.5, showarrow=False)

	fig.update_layout(template="plotly_white", height=500)
	return fig

	except Exception as e:
	fig = go.Figure()
	fig.add_annotation(text=f"Error creating chart: {str(e)}",
	x=0.5, y=0.5, showarrow=False,
	font=dict(color="red"))
	fig.update_layout(template="plotly_white")
	return fig

	if __name__ == '__main__':
	app.run(host='0.0.0.0', port=7860, debug=False)