Spaces:

Arif-Badhon
/

llm-data-analyzer

Sleeping

File size: 3,174 Bytes

e020ac8
38f9e13
 
e020ac8
 
38f9e13
 
 
e020ac8
38f9e13
3f44a73
 
 
38f9e13
 
 
3f44a73
 
38f9e13
 
e020ac8
38f9e13
 
 
e020ac8
38f9e13
 
 
e020ac8
38f9e13
 
 
 
e020ac8
38f9e13
e020ac8
38f9e13
 
 
e020ac8
38f9e13
 
e020ac8
38f9e13
 
 
 
 
e020ac8
38f9e13
 
e020ac8
38f9e13
 
e020ac8
38f9e13
 
 
e020ac8
38f9e13
 
 
3f44a73
38f9e13
 
 
 
 
 
 
e020ac8
38f9e13

"""
Data Processing Service
Handles CSV and Excel file uploads and processing
"""
import logging
import pandas as pd
from pathlib import Path
from fastapi import UploadFile

logger = logging.getLogger(__name__)


class DataProcessor:
    """Process uploaded data files (CSV, Excel)"""
    
    SUPPORTED_FORMATS = ["csv", "xlsx", "xls"]
    
    def __init__(self):
        self.temp_dir = Path("./uploads")
        self.temp_dir.mkdir(exist_ok=True)
    
    async def process_file(self, file: UploadFile) -> tuple:
        """
        Process uploaded file (CSV or Excel)
        
        Returns:
            tuple: (data_list, file_type)
        """
        try:
            # Validate file type
            file_ext = self._get_file_extension(file.filename)
            if file_ext not in self.SUPPORTED_FORMATS:
                raise ValueError(f"Unsupported file type: {file_ext}")
            
            logger.info(f"🔄 Processing file: {file.filename}")
            
            # Save file temporarily
            file_path = self.temp_dir / file.filename
            contents = await file.read()
            
            with open(file_path, "wb") as f:
                f.write(contents)
            
            # Process based on file type
            if file_ext == "csv":
                data = self._process_csv(str(file_path))
            else:  # xlsx or xls
                data = self._process_excel(str(file_path))
            
            logger.info(f"✅ File processed: {len(data)} rows")
            return data, file_ext
            
        except ValueError as e:
            logger.error(f"❌ Validation error: {e}")
            raise
        except Exception as e:
            logger.error(f"❌ File processing failed: {e}")
            raise ValueError(f"File processing failed: {e}")
    
    def _get_file_extension(self, filename: str) -> str:
        """Extract file extension"""
        return filename.split(".")[-1].lower()
    
    def _process_csv(self, file_path: str) -> list:
        """Process CSV file using pandas"""
        try:
            df = pd.read_csv(file_path)
            
            # Replace NaN values with None (becomes null in JSON)
            df = df.where(pd.notna(df), None)
        
            data = df.to_dict("records")
            logger.info(f"📄 CSV processed: {len(data)} rows, {len(df.columns)} columns")
            return data
        except Exception as e:
            logger.error(f"❌ CSV processing failed: {e}")
            raise ValueError(f"CSV processing error: {e}")

    def _process_excel(self, file_path: str) -> list:
        """Process Excel file using pandas"""
        try:
            df = pd.read_excel(file_path)
            
            # Replace NaN values with None (becomes null in JSON)
            df = df.where(pd.notna(df), None)
            
            data = df.to_dict("records")
            logger.info(f"📊 Excel processed: {len(data)} rows, {len(df.columns)} columns")
            return data
        except Exception as e:
            logger.error(f"❌ Excel processing failed: {e}")
            raise ValueError(f"Excel processing error: {e}")