Spaces:

Arif-Badhon
/

llm-data-analyzer

Sleeping

llm-data-analyzer / backend /app /services /data_processor.py

Arif

Updated but not working

38f9e13 16 days ago

3.17 kB

	"""
	Data Processing Service
	Handles CSV and Excel file uploads and processing
	"""
	import logging
	import pandas as pd
	from pathlib import Path
	from fastapi import UploadFile

	logger = logging.getLogger(__name__)


	class DataProcessor:
	"""Process uploaded data files (CSV, Excel)"""

	SUPPORTED_FORMATS = ["csv", "xlsx", "xls"]

	def __init__(self):
	self.temp_dir = Path("./uploads")
	self.temp_dir.mkdir(exist_ok=True)

	async def process_file(self, file: UploadFile) -> tuple:
	"""
	Process uploaded file (CSV or Excel)

	Returns:
	tuple: (data_list, file_type)
	"""
	try:
	# Validate file type
	file_ext = self._get_file_extension(file.filename)
	if file_ext not in self.SUPPORTED_FORMATS:
	raise ValueError(f"Unsupported file type: {file_ext}")

	logger.info(f"🔄 Processing file: {file.filename}")

	# Save file temporarily
	file_path = self.temp_dir / file.filename
	contents = await file.read()

	with open(file_path, "wb") as f:
	f.write(contents)

	# Process based on file type
	if file_ext == "csv":
	data = self._process_csv(str(file_path))
	else: # xlsx or xls
	data = self._process_excel(str(file_path))

	logger.info(f"✅ File processed: {len(data)} rows")
	return data, file_ext

	except ValueError as e:
	logger.error(f"❌ Validation error: {e}")
	raise
	except Exception as e:
	logger.error(f"❌ File processing failed: {e}")
	raise ValueError(f"File processing failed: {e}")

	def _get_file_extension(self, filename: str) -> str:
	"""Extract file extension"""
	return filename.split(".")[-1].lower()

	def _process_csv(self, file_path: str) -> list:
	"""Process CSV file using pandas"""
	try:
	df = pd.read_csv(file_path)

	# Replace NaN values with None (becomes null in JSON)
	df = df.where(pd.notna(df), None)

	data = df.to_dict("records")
	logger.info(f"📄 CSV processed: {len(data)} rows, {len(df.columns)} columns")
	return data
	except Exception as e:
	logger.error(f"❌ CSV processing failed: {e}")
	raise ValueError(f"CSV processing error: {e}")

	def _process_excel(self, file_path: str) -> list:
	"""Process Excel file using pandas"""
	try:
	df = pd.read_excel(file_path)

	# Replace NaN values with None (becomes null in JSON)
	df = df.where(pd.notna(df), None)

	data = df.to_dict("records")
	logger.info(f"📊 Excel processed: {len(data)} rows, {len(df.columns)} columns")
	return data
	except Exception as e:
	logger.error(f"❌ Excel processing failed: {e}")
	raise ValueError(f"Excel processing error: {e}")