Spaces:

mic3333
/

dash-mcp

Sleeping

File size: 25,034 Bytes

20706fe

import os
import base64
import io
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
from dash import Dash, html, dcc, Input, Output, State, callback_context
import dash_bootstrap_components as dbc
import numpy as np
from scipy import stats
import re

# Initialize Dash app
app = Dash(__name__, external_stylesheets=[dbc.themes.BOOTSTRAP])
server = app.server

class AIVisualizationEngine:
    def __init__(self, df):
        self.df = df
        self.numeric_cols = df.select_dtypes(include=['number']).columns.tolist()
        self.categorical_cols = df.select_dtypes(include=['object']).columns.tolist()
        self.datetime_cols = df.select_dtypes(include=['datetime64']).columns.tolist()
    
    def recommend_chart_type(self, x_col=None, y_col=None):
        """AI-powered chart type recommendation"""
        recommendations = []
        
        if x_col and y_col:
            x_type = 'numeric' if x_col in self.numeric_cols else 'categorical'
            y_type = 'numeric' if y_col in self.numeric_cols else 'categorical'
            
            if x_type == 'numeric' and y_type == 'numeric':
                recommendations = [
                    {'type': 'scatter', 'confidence': 0.9, 'reason': 'Both variables are numeric - scatter plot shows correlation'},
                    {'type': 'line', 'confidence': 0.7, 'reason': 'Line chart good for trends if X is ordered'},
                ]
            elif x_type == 'categorical' and y_type == 'numeric':
                recommendations = [
                    {'type': 'bar', 'confidence': 0.9, 'reason': 'Categorical vs numeric - bar chart shows comparisons'},
                    {'type': 'box', 'confidence': 0.8, 'reason': 'Box plot shows distribution across categories'},
                ]
            elif x_type == 'categorical' and y_type == 'categorical':
                recommendations = [
                    {'type': 'bar', 'confidence': 0.8, 'reason': 'Count relationships between categories'},
                ]
        elif x_col and not y_col:
            if x_col in self.numeric_cols:
                recommendations = [
                    {'type': 'histogram', 'confidence': 0.9, 'reason': 'Single numeric variable - histogram shows distribution'},
                    {'type': 'box', 'confidence': 0.7, 'reason': 'Box plot shows statistical summary'},
                ]
            else:
                recommendations = [
                    {'type': 'pie', 'confidence': 0.8, 'reason': 'Categorical variable - pie chart shows proportions'},
                    {'type': 'bar', 'confidence': 0.9, 'reason': 'Bar chart shows category frequencies'},
                ]
        
        return recommendations
    
    def detect_outliers(self, column):
        """Detect outliers using IQR method"""
        if column not in self.numeric_cols:
            return []
        
        Q1 = self.df[column].quantile(0.25)
        Q3 = self.df[column].quantile(0.75)
        IQR = Q3 - Q1
        lower_bound = Q1 - 1.5 * IQR
        upper_bound = Q3 + 1.5 * IQR
        
        outliers = self.df[(self.df[column] < lower_bound) | (self.df[column] > upper_bound)]
        return outliers.index.tolist()
    
    def generate_insights(self, x_col, y_col=None):
        """Generate AI insights about the data"""
        insights = []
        
        if x_col in self.numeric_cols:
            mean_val = self.df[x_col].mean()
            median_val = self.df[x_col].median()
            std_val = self.df[x_col].std()
            
            insights.append(f"📊 {x_col}: Mean = {mean_val:.2f}, Median = {median_val:.2f}")
            
            if abs(mean_val - median_val) > std_val * 0.5:
                insights.append(f"⚠️ {x_col} distribution appears skewed")
            
            outliers = self.detect_outliers(x_col)
            if outliers:
                insights.append(f"🎯 Found {len(outliers)} potential outliers in {x_col}")
        
        if y_col and x_col in self.numeric_cols and y_col in self.numeric_cols:
            correlation = self.df[x_col].corr(self.df[y_col])
            if abs(correlation) > 0.7:
                strength = "strong" if abs(correlation) > 0.8 else "moderate"
                direction = "positive" if correlation > 0 else "negative"
                insights.append(f"🔗 {strength.title()} {direction} correlation ({correlation:.3f}) between {x_col} and {y_col}")
            elif abs(correlation) < 0.3:
                insights.append(f"📈 Weak correlation ({correlation:.3f}) between {x_col} and {y_col}")
        
        return insights
    
    def parse_natural_language_query(self, query):
        """Simple NLP to parse visualization requests"""
        query = query.lower().strip()
        
        # Extract chart types
        chart_keywords = {
            'scatter': ['scatter', 'correlation', 'relationship'],
            'bar': ['bar', 'compare', 'comparison', 'by'],
            'histogram': ['histogram', 'distribution', 'freq'],
            'line': ['line', 'trend', 'over time', 'timeline'],
            'box': ['box', 'quartile', 'median'],
            'pie': ['pie', 'proportion', 'percentage'],
            'heatmap': ['heatmap', 'correlation matrix']
        }
        
        suggested_chart = None
        for chart_type, keywords in chart_keywords.items():
            if any(keyword in query for keyword in keywords):
                suggested_chart = chart_type
                break
        
        # Extract column names
        mentioned_cols = []
        for col in self.df.columns:
            if col.lower() in query or col.lower().replace('_', ' ') in query:
                mentioned_cols.append(col)
        
        return {
            'chart_type': suggested_chart,
            'columns': mentioned_cols,
            'query': query
        }
    
    def get_smart_color_scheme(self, chart_type, column=None):
        """AI-powered color scheme selection"""
        color_schemes = {
            'scatter': 'Viridis',
            'line': 'Blues',
            'bar': 'Set3',
            'histogram': 'Plasma',
            'box': 'Set2',
            'pie': 'Pastel',
            'heatmap': 'RdBu_r'
        }
        return color_schemes.get(chart_type, 'Viridis')

# App layout with AI features
app.layout = dbc.Container([
    dbc.Row([
        dbc.Col([
            html.H1("🤖 AI-Enhanced Data Dashboard", className="text-center mb-4"),
            html.P("Upload data and let AI help you create intelligent visualizations!", 
                   className="text-center text-muted"),
            html.Hr(),
        ], width=12)
    ]),
    
    dbc.Row([
        dbc.Col([
            dbc.Card([
                dbc.CardBody([
                    html.H4("📁 Data Upload", className="card-title"),
                    dcc.Upload(
                        id='upload-data',
                        children=html.Div([
                            'Drag and Drop or ',
                            html.A('Select Files')
                        ]),
                        style={
                            'width': '100%',
                            'height': '60px',
                            'lineHeight': '60px',
                            'borderWidth': '1px',
                            'borderStyle': 'dashed',
                            'borderRadius': '5px',
                            'textAlign': 'center',
                            'margin': '10px'
                        },
                        multiple=False,
                        accept='.csv,.xlsx,.txt'
                    ),
                    
                    html.Div(id='upload-status', className="mt-2"),
                    html.Hr(),
                    
                    html.H4("🎯 AI Query Interface", className="card-title"),
                    dbc.InputGroup([
                        dbc.Input(
                            id="ai-query",
                            placeholder="Try: 'Show scatter plot of age vs salary' or 'Bar chart of departments'",
                            type="text",
                        ),
                        dbc.Button(
                            "🤖 AI Create", 
                            id="ai-create-btn", 
                            color="primary",
                            n_clicks=0
                        )
                    ]),
                    
                    html.Div(id="ai-recommendations", className="mt-3"),
                    html.Hr(),
                    
                    html.H4("📊 Quick Analytics", className="card-title"),
                    dbc.ButtonGroup([
                        dbc.Button("Summary Stats", id="stats-btn", size="sm"),
                        dbc.Button("AI Insights", id="insights-btn", size="sm"),
                        dbc.Button("Outliers", id="outliers-btn", size="sm"),
                    ], className="w-100"),
                    
                    html.Div(id="quick-analytics", className="mt-3")
                ])
            ])
        ], width=4),
        
        dbc.Col([
            dbc.Card([
                dbc.CardBody([
                    html.H4("📈 AI-Enhanced Visualizations", className="card-title"),
                    
                    # Chart controls
                    dbc.Row([
                        dbc.Col([
                            html.Label("Chart Type:", className="form-label"),
                            dcc.Dropdown(
                                id='chart-type',
                                options=[
                                    {'label': 'AI Recommend', 'value': 'ai_recommend'},
                                    {'label': 'Scatter Plot', 'value': 'scatter'},
                                    {'label': 'Line Chart', 'value': 'line'},
                                    {'label': 'Bar Chart', 'value': 'bar'},
                                    {'label': 'Histogram', 'value': 'histogram'},
                                    {'label': 'Box Plot', 'value': 'box'},
                                    {'label': 'Heatmap', 'value': 'heatmap'},
                                    {'label': 'Pie Chart', 'value': 'pie'}
                                ],
                                value='ai_recommend',
                                className="mb-2"
                            )
                        ], width=6),
                        dbc.Col([
                            html.Label("Color By:", className="form-label"),
                            dcc.Dropdown(
                                id='color-column',
                                placeholder="AI will suggest colors",
                                className="mb-2"
                            )
                        ], width=6)
                    ]),
                    
                    dbc.Row([
                        dbc.Col([
                            html.Label("X-Axis:", className="form-label"),
                            dcc.Dropdown(
                                id='x-column',
                                placeholder="Select X column"
                            )
                        ], width=6),
                        dbc.Col([
                            html.Label("Y-Axis:", className="form-label"),
                            dcc.Dropdown(
                                id='y-column',
                                placeholder="Select Y column"
                            )
                        ], width=6)
                    ], className="mb-3"),
                    
                    dcc.Graph(id='main-graph', style={'height': '500px'}),
                    
                    html.Div(id='ai-insights-display', className="mt-3")
                ])
            ]),
            
            dbc.Card([
                dbc.CardBody([
                    html.H4("🔍 Data Explorer", className="card-title"),
                    html.Div(id='data-table')
                ])
            ], className="mt-3")
        ], width=8)
    ], className="mt-4"),
    
    # Store components
    dcc.Store(id='stored-data'),
    dcc.Store(id='ai-engine'),
], fluid=True)

def parse_contents(contents, filename):
    """Parse uploaded file contents"""
    content_type, content_string = contents.split(',')
    decoded = base64.b64decode(content_string)
    
    try:
        if 'csv' in filename:
            df = pd.read_csv(io.StringIO(decoded.decode('utf-8')))
        elif 'xls' in filename:
            df = pd.read_excel(io.BytesIO(decoded))
        else:
            return None, "Unsupported file type"
        
        return df, None
    except Exception as e:
        return None, f"Error processing file: {str(e)}"

@app.callback(
    [Output('stored-data', 'data'),
     Output('upload-status', 'children'),
     Output('data-table', 'children'),
     Output('x-column', 'options'),
     Output('y-column', 'options'),
     Output('color-column', 'options'),
     Output('x-column', 'value'),
     Output('y-column', 'value')],
    [Input('upload-data', 'contents')],
    [State('upload-data', 'filename')]
)
def update_data(contents, filename):
    """Update data when file is uploaded"""
    if contents is None:
        return None, "", "", [], [], [], None, None
    
    df, error = parse_contents(contents, filename)
    
    if error:
        return None, dbc.Alert(error, color="danger"), "", [], [], [], None, None
    
    # Create data table preview
    table = dbc.Table.from_dataframe(
        df.head(10), 
        striped=True, 
        bordered=True, 
        hover=True, 
        size='sm'
    )
    
    # AI analysis of dataset
    ai_engine = AIVisualizationEngine(df)
    
    success_msg = dbc.Alert([
        html.H6(f"✅ File uploaded successfully! 🤖 AI Ready"),
        html.P(f"Shape: {df.shape[0]} rows × {df.shape[1]} columns"),
        html.P(f"📊 Numeric: {len(ai_engine.numeric_cols)}, 📝 Categorical: {len(ai_engine.categorical_cols)}")
    ], color="success")
    
    # Create column options for dropdowns
    all_columns = [{'label': col, 'value': col} for col in df.columns]
    
    # AI recommends default columns
    if ai_engine.numeric_cols:
        default_x = ai_engine.numeric_cols[0]
        default_y = ai_engine.numeric_cols[1] if len(ai_engine.numeric_cols) > 1 else None
    else:
        default_x = all_columns[0]['value'] if all_columns else None
        default_y = all_columns[1]['value'] if len(all_columns) > 1 else None
    
    return df.to_dict('records'), success_msg, table, all_columns, all_columns, all_columns, default_x, default_y

@app.callback(
    [Output('chart-type', 'value'),
     Output('ai-recommendations', 'children')],
    [Input('ai-create-btn', 'n_clicks')],
    [State('ai-query', 'value'),
     State('stored-data', 'data')]
)
def handle_ai_query(n_clicks, query, data):
    """Handle AI natural language queries"""
    if not n_clicks or not query or not data:
        return 'ai_recommend', ""
    
    df = pd.DataFrame(data)
    ai_engine = AIVisualizationEngine(df)
    
    # Parse the natural language query
    parsed = ai_engine.parse_natural_language_query(query)
    
    recommendations = []
    if parsed['chart_type']:
        recommendations.append(f"🎯 Suggested chart type: **{parsed['chart_type'].title()}**")
    
    if parsed['columns']:
        recommendations.append(f"📊 Detected columns: {', '.join(parsed['columns'])}")
    
    if not recommendations:
        recommendations.append("🤖 Try queries like: 'scatter age salary', 'bar chart departments', 'histogram of scores'")
    
    return parsed['chart_type'] or 'ai_recommend', dbc.Alert(recommendations, color="info")

@app.callback(
    Output('quick-analytics', 'children'),
    [Input('stats-btn', 'n_clicks'),
     Input('insights-btn', 'n_clicks'),
     Input('outliers-btn', 'n_clicks')],
    [State('stored-data', 'data'),
     State('x-column', 'value'),
     State('y-column', 'value')]
)
def quick_analytics(stats_clicks, insights_clicks, outliers_clicks, data, x_col, y_col):
    """Handle quick analytics buttons with AI insights"""
    if not data:
        return ""
    
    df = pd.DataFrame(data)
    ai_engine = AIVisualizationEngine(df)
    ctx = callback_context
    
    if not ctx.triggered:
        return ""
    
    button_id = ctx.triggered[0]['prop_id'].split('.')[0]
    
    if button_id == 'stats-btn':
        stats = df.describe()
        return dbc.Alert([
            html.H6("📊 Summary Statistics"),
            dbc.Table.from_dataframe(stats.reset_index(), size='sm')
        ], color="light")
    
    elif button_id == 'insights-btn':
        if x_col:
            insights = ai_engine.generate_insights(x_col, y_col)
            return dbc.Alert([
                html.H6("🤖 AI Insights"),
                html.Ul([html.Li(insight) for insight in insights])
            ], color="info")
        return dbc.Alert("Select columns to get AI insights", color="warning")
    
    elif button_id == 'outliers-btn':
        if x_col and x_col in ai_engine.numeric_cols:
            outliers = ai_engine.detect_outliers(x_col)
            if outliers:
                outlier_data = df.loc[outliers, [x_col]]
                return dbc.Alert([
                    html.H6(f"🎯 Outliers in {x_col}"),
                    dbc.Table.from_dataframe(outlier_data.reset_index(), size='sm')
                ], color="warning")
            return dbc.Alert(f"✅ No outliers detected in {x_col}", color="success")
        return dbc.Alert("Select a numeric column to detect outliers", color="warning")
    
    return ""

@app.callback(
    [Output('main-graph', 'figure'),
     Output('ai-insights-display', 'children')],
    [Input('stored-data', 'data'),
     Input('chart-type', 'value'),
     Input('x-column', 'value'),
     Input('y-column', 'value'),
     Input('color-column', 'value')]
)
def update_main_graph(data, chart_type, x_col, y_col, color_col):
    """Update visualization with AI enhancements"""
    if not data:
        fig = go.Figure()
        fig.add_annotation(text="Upload data to see AI-powered visualizations", 
                         x=0.5, y=0.5, showarrow=False, 
                         font=dict(size=16, color="gray"))
        fig.update_layout(template="plotly_white")
        return fig, ""
    
    df = pd.DataFrame(data)
    ai_engine = AIVisualizationEngine(df)
    
    # AI recommendation system
    if chart_type == 'ai_recommend' and x_col:
        recommendations = ai_engine.recommend_chart_type(x_col, y_col)
        if recommendations:
            chart_type = recommendations[0]['type']
    
    # Handle cases where columns aren't selected yet
    if not x_col and not y_col:
        fig = go.Figure()
        fig.add_annotation(text="Select columns or use AI Query to create visualization", 
                         x=0.5, y=0.5, showarrow=False,
                         font=dict(size=16, color="gray"))
        fig.update_layout(template="plotly_white")
        return fig, ""
    
    insights_display = ""
    
    try:
        # Get AI-powered color scheme
        color_scheme = ai_engine.get_smart_color_scheme(chart_type, color_col)
        
        # Create visualization based on chart type
        if chart_type == 'scatter':
            if x_col and y_col:
                fig = px.scatter(df, x=x_col, y=y_col, color=color_col,
                               title=f"🤖 AI Scatter Plot: {y_col} vs {x_col}",
                               color_continuous_scale=color_scheme)
                # Add AI insights
                insights = ai_engine.generate_insights(x_col, y_col)
                insights_display = dbc.Alert([
                    html.H6("🤖 AI Insights"),
                    html.Ul([html.Li(insight) for insight in insights])
                ], color="info")
            else:
                fig = go.Figure()
                fig.add_annotation(text="Select both X and Y columns for scatter plot", 
                                 x=0.5, y=0.5, showarrow=False)
        
        elif chart_type == 'line':
            if x_col and y_col:
                fig = px.line(df, x=x_col, y=y_col, color=color_col,
                             title=f"🤖 AI Line Chart: {y_col} vs {x_col}",
                             color_discrete_sequence=px.colors.qualitative.Set3)
            else:
                fig = go.Figure()
                fig.add_annotation(text="Select both X and Y columns for line chart", 
                                 x=0.5, y=0.5, showarrow=False)
        
        elif chart_type == 'bar':
            if x_col and y_col:
                fig = px.bar(df, x=x_col, y=y_col, color=color_col,
                           title=f"🤖 AI Bar Chart: {y_col} by {x_col}",
                           color_discrete_sequence=px.colors.qualitative.Set3)
            elif x_col:
                fig = px.bar(df[x_col].value_counts().reset_index(), 
                           x='index', y=x_col,
                           title=f"🤖 AI Value Counts: {x_col}",
                           color_discrete_sequence=px.colors.qualitative.Set3)
            else:
                fig = go.Figure()
                fig.add_annotation(text="Select at least X column for bar chart", 
                                 x=0.5, y=0.5, showarrow=False)
        
        elif chart_type == 'histogram':
            if x_col:
                fig = px.histogram(df, x=x_col, color=color_col,
                                 title=f"🤖 AI Histogram: {x_col}",
                                 color_discrete_sequence=px.colors.qualitative.Pastel)
                # Add statistical annotations
                mean_val = df[x_col].mean() if x_col in ai_engine.numeric_cols else None
                if mean_val:
                    fig.add_vline(x=mean_val, line_dash="dash", line_color="red",
                                annotation_text=f"Mean: {mean_val:.2f}")
            else:
                fig = go.Figure()
                fig.add_annotation(text="Select X column for histogram", 
                                 x=0.5, y=0.5, showarrow=False)
        
        elif chart_type == 'box':
            if y_col:
                fig = px.box(df, x=color_col, y=y_col,
                           title=f"🤖 AI Box Plot: {y_col}" + (f" by {color_col}" if color_col else ""),
                           color_discrete_sequence=px.colors.qualitative.Set2)
            elif x_col:
                fig = px.box(df, y=x_col,
                           title=f"🤖 AI Box Plot: {x_col}",
                           color_discrete_sequence=px.colors.qualitative.Set2)
            else:
                fig = go.Figure()
                fig.add_annotation(text="Select a column for box plot", 
                                 x=0.5, y=0.5, showarrow=False)
        
        elif chart_type == 'heatmap':
            numeric_cols = df.select_dtypes(include=['number']).columns
            if len(numeric_cols) > 1:
                corr_matrix = df[numeric_cols].corr()
                fig = px.imshow(corr_matrix, 
                              text_auto=True, 
                              aspect="auto",
                              title="🤖 AI Correlation Heatmap",
                              color_continuous_scale='RdBu_r')
            else:
                fig = go.Figure()
                fig.add_annotation(text="Need at least 2 numeric columns for heatmap", 
                                 x=0.5, y=0.5, showarrow=False)
        
        elif chart_type == 'pie':
            if x_col:
                value_counts = df[x_col].value_counts()
                fig = px.pie(values=value_counts.values, 
                           names=value_counts.index,
                           title=f"🤖 AI Pie Chart: {x_col}",
                           color_discrete_sequence=px.colors.qualitative.Pastel)
            else:
                fig = go.Figure()
                fig.add_annotation(text="Select X column for pie chart", 
                                 x=0.5, y=0.5, showarrow=False)
        
        else:
            fig = go.Figure()
            fig.add_annotation(text="🤖 AI is analyzing... Select chart type or use AI Query", 
                             x=0.5, y=0.5, showarrow=False)
        
        # Apply AI styling enhancements
        fig.update_layout(
            template="plotly_white", 
            height=500,
            font=dict(size=12),
            title_font_size=16,
        )
        
        return fig, insights_display
        
    except Exception as e:
        fig = go.Figure()
        fig.add_annotation(text=f"AI Error: {str(e)}", 
                         x=0.5, y=0.5, showarrow=False,
                         font=dict(color="red"))
        fig.update_layout(template="plotly_white")
        return fig, ""

if __name__ == '__main__':
    app.run(host='0.0.0.0', port=8051, debug=True)