Spaces:

angelsg213
/

TESTING22

Sleeping

App Files Files Community

angelsg213 commited on 18 days ago

Commit

0a02adf

verified ·

1 Parent(s): c9bd63a

Update app.py

Browse files

Files changed (1) hide show

app.py +488 -228

app.py CHANGED Viewed

@@ -6,18 +6,12 @@ import pandas as pd
 import re
 from datetime import datetime
 from huggingface_hub import InferenceClient
-# Importar Google Drive solo si está disponible
-try:
-    from google.oauth2.credentials import Credentials
-    from google_auth_oauthlib.flow import InstalledAppFlow
-    from google.auth.transport.requests import Request
-    from googleapiclient.discovery import build
-    from googleapiclient.http import MediaFileUpload
-    import pickle
-    DRIVE_DISPONIBLE = True
-except ImportError:
-    DRIVE_DISPONIBLE = False
 # ============= EXTRAER TEXTO DEL PDF =============
 def extraer_texto_pdf(pdf_file):
@@ -38,10 +32,8 @@ def analizar_y_convertir_json(texto):
     if not token:
         return None, None, "Error: Falta configurar HF_TOKEN en Settings → Secrets"
-    # Limitar texto
     texto_limpio = texto[:8000]
-    # Prompt para que el LLM decida la estructura JSON
     prompt = f"""Eres un experto en análisis de facturas. Lee esta factura y conviértela a JSON.
 TEXTO DE LA FACTURA:
@@ -85,7 +77,6 @@ FORMATO JSON (ajusta según lo que encuentres):
 Responde SOLO con el JSON válido (sin explicaciones, sin markdown):"""
-    # Lista de modelos que funcionan
     modelos = [
         "Qwen/Qwen2.5-72B-Instruct",
         "meta-llama/Llama-3.2-3B-Instruct",
@@ -95,10 +86,9 @@ Responde SOLO con el JSON válido (sin explicaciones, sin markdown):"""
     for modelo in modelos:
         try:
-            print(f"\n🤖 Probando: {modelo}")
             client = InferenceClient(token=token)
-            # Llamar al modelo
             response = client.chat.completions.create(
                 model=modelo,
                 messages=[
@@ -108,36 +98,28 @@ Responde SOLO con el JSON válido (sin explicaciones, sin markdown):"""
                 temperature=0.1
             )
-            # Extraer respuesta
             resultado = response.choices[0].message.content
-            # Limpiar respuesta (quitar markdown si existe)
             resultado = resultado.strip()
             resultado = re.sub(r'```json\s*', '', resultado)
             resultado = re.sub(r'```\s*', '', resultado)
             resultado = resultado.strip()
-            # Buscar JSON en la respuesta
             match = re.search(r'\{.*\}', resultado, re.DOTALL)
             if match:
                 json_str = match.group(0)
                 try:
                     datos_json = json.loads(json_str)
-                    print(f"✅ JSON válido extraído con {modelo}")
-                    # Generar resumen de información útil
                     resumen_util = generar_resumen_util(texto_limpio, modelo, client)
                     return datos_json, resumen_util, f"Procesado con {modelo}"
                 except json.JSONDecodeError as e:
-                    print(f"⚠️ JSON inválido: {str(e)[:50]}")
                     continue
-            else:
-                print(f"⚠️ No se encontró JSON en la respuesta")
-                continue
         except Exception as e:
-            print(f"❌ {modelo} falló: {str(e)[:100]}")
             continue
     return None, None, "Ningún modelo LLM pudo extraer el JSON. Verifica tu HF_TOKEN."
@@ -179,12 +161,10 @@ def json_a_csv(datos_json):
     filas = []
-    # === INFORMACIÓN GENERAL ===
     filas.append({'Campo': '=== INFORMACIÓN GENERAL ===', 'Valor': ''})
     filas.append({'Campo': 'Número de Factura', 'Valor': datos_json.get('numero_factura', 'N/A')})
     filas.append({'Campo': 'Fecha', 'Valor': datos_json.get('fecha', 'N/A')})
-    # === EMISOR ===
     if 'emisor' in datos_json:
         filas.append({'Campo': '', 'Valor': ''})
         filas.append({'Campo': '=== EMISOR ===', 'Valor': ''})
@@ -195,7 +175,6 @@ def json_a_csv(datos_json):
         else:
             filas.append({'Campo': 'Nombre', 'Valor': str(emisor)})
-    # === CLIENTE ===
     if 'cliente' in datos_json:
         filas.append({'Campo': '', 'Valor': ''})
         filas.append({'Campo': '=== CLIENTE ===', 'Valor': ''})
@@ -206,7 +185,6 @@ def json_a_csv(datos_json):
         else:
             filas.append({'Campo': 'Nombre', 'Valor': str(cliente)})
-    # === PRODUCTOS/SERVICIOS ===
     productos = datos_json.get('productos', datos_json.get('conceptos', datos_json.get('items', [])))
     if productos and len(productos) > 0:
         filas.append({'Campo': '', 'Valor': ''})
@@ -219,13 +197,11 @@ def json_a_csv(datos_json):
             filas.append({'Campo': '  Total', 'Valor': f"{prod.get('total', 0)}€"})
             filas.append({'Campo': '', 'Valor': ''})
-    # === TOTALES ===
     totales = datos_json.get('totales', {})
     if totales or 'base_imponible' in datos_json or 'total' in datos_json:
         filas.append({'Campo': '', 'Valor': ''})
         filas.append({'Campo': '=== TOTALES ===', 'Valor': ''})
-        # Buscar totales en varios lugares del JSON
         base = totales.get('base_imponible', datos_json.get('base_imponible', 0))
         iva = totales.get('iva', datos_json.get('iva', 0))
         porcentaje_iva = totales.get('porcentaje_iva', datos_json.get('porcentaje_iva', 0))
@@ -240,166 +216,457 @@ def json_a_csv(datos_json):
     return pd.DataFrame(filas)
-# ============= AUTENTICAR GOOGLE DRIVE =============
-def autenticar_drive():
-    """Inicia el proceso de autenticación con Google Drive"""
-    if not DRIVE_DISPONIBLE:
-        return "Error: Librerías de Google Drive no instaladas.\n\nAgrega al requirements.txt:\ngoogle-auth-oauthlib\ngoogle-auth-httplib2\ngoogle-api-python-client", False
-    SCOPES = ['https://www.googleapis.com/auth/drive.file']
-    try:
-        # Verificar si existe credentials.json
-        if not os.path.exists('credentials.json'):
-            return "Error: Falta el archivo credentials.json.\n\nPasos:\n1. Ve a https://console.cloud.google.com/\n2. Crea un proyecto\n3. Activa Google Drive API\n4. Crea credenciales OAuth 2.0\n5. Descarga credentials.json\n6. Súbelo a tu aplicación", False
-        # Verificar si ya hay una sesión activa
-        if os.path.exists('token.pickle'):
-            with open('token.pickle', 'rb') as token:
-                creds = pickle.load(token)
-                if creds and creds.valid:
-                    return "Ya estás conectado a Google Drive", True
-        # Iniciar flujo de autenticación
-        flow = InstalledAppFlow.from_client_secrets_file('credentials.json', SCOPES)
-        creds = flow.run_local_server(port=8080)
-        # Guardar credenciales
-        with open('token.pickle', 'wb') as token:
-            pickle.dump(creds, token)
-        return "Autenticación exitosa! Ahora puedes guardar archivos en Google Drive", True
-    except Exception as e:
-        return f"Error en la autenticación: {str(e)}", False
-# ============= VERIFICAR ESTADO DE DRIVE =============
-def verificar_sesion_drive():
-    """Verifica si hay una sesión activa de Google Drive"""
-    if not DRIVE_DISPONIBLE:
-        return "Librerías no instaladas", False
-    if not os.path.exists('token.pickle'):
-        return "No conectado", False
-    try:
-        with open('token.pickle', 'rb') as token:
-            creds = pickle.load(token)
-            if creds and creds.valid:
-                return "Conectado a Google Drive", True
-            else:
-                return "Sesión expirada", False
-    except:
-        return "Error al verificar sesión", False
-# ============= CERRAR SESIÓN DE DRIVE =============
-def cerrar_sesion_drive():
-    """Cierra la sesión de Google Drive"""
-    try:
-        if os.path.exists('token.pickle'):
-            os.remove('token.pickle')
-            return "Sesión cerrada correctamente", False
-        else:
-            return "No había sesión activa", False
-    except Exception as e:
-        return f"Error al cerrar sesión: {str(e)}", False
-def subir_a_drive(archivo_csv):
-    """Sube el archivo CSV a Google Drive"""
-    if not DRIVE_DISPONIBLE:
-        return "Error: Librerías de Google Drive no instaladas.\n\nAgrega al requirements.txt:\ngoogle-auth-oauthlib\ngoogle-auth-httplib2\ngoogle-api-python-client"
-    SCOPES = ['https://www.googleapis.com/auth/drive.file']
-    creds = None
-    try:
-        # Verificar si existen credenciales guardadas
-        if os.path.exists('token.pickle'):
-            with open('token.pickle', 'rb') as token:
-                creds = pickle.load(token)
-        # Si no hay credenciales válidas, solicitar login
-        if not creds or not creds.valid:
-            if creds and creds.expired and creds.refresh_token:
-                creds.refresh(Request())
-            else:
-                # Verificar si existe credentials.json
-                if not os.path.exists('credentials.json'):
-                    return "Error: Falta el archivo credentials.json.\n\nDescárgalo desde Google Cloud Console:\nhttps://console.cloud.google.com/"
-                flow = InstalledAppFlow.from_client_secrets_file('credentials.json', SCOPES)
-                creds = flow.run_local_server(port=0)
-            # Guardar credenciales para la próxima vez
-            with open('token.pickle', 'wb') as token:
-                pickle.dump(creds, token)
-        # Crear servicio de Drive
-        service = build('drive', 'v3', credentials=creds)
-        # Metadatos del archivo
-        file_metadata = {
-            'name': os.path.basename(archivo_csv),
-            'mimeType': 'text/csv'
-        }
-        media = MediaFileUpload(archivo_csv, mimetype='text/csv', resumable=True)
-        # Subir archivo
-        file = service.files().create(
-            body=file_metadata,
-            media_body=media,
-            fields='id, webViewLink'
-        ).execute()
-        return f"Archivo subido exitosamente a Google Drive\n\nEnlace: {file.get('webViewLink')}"
-    except Exception as e:
-        return f"Error al subir a Google Drive: {str(e)}"
 # ============= FUNCIÓN PRINCIPAL =============
-def procesar_factura(pdf_file, guardar_en_drive):
     if pdf_file is None:
-        return "", None, None, "", "Sube un PDF primero", ""
-    # PASO 1: Extraer texto del PDF
     print("\n--- Extrayendo texto del PDF...")
     texto = extraer_texto_pdf(pdf_file)
     if texto.startswith("Error"):
-        return "", None, None, "", f"Error: {texto}", ""
-    # Mostrar preview del texto
     texto_preview = f"{texto[:1500]}..." if len(texto) > 1500 else texto
-    # PASO 2: LLM analiza y convierte a JSON
     print("--- El LLM está analizando la factura y creando el JSON...")
     datos_json, resumen_util, mensaje = analizar_y_convertir_json(texto)
     if not datos_json:
-        return texto_preview, None, None, "", mensaje, ""
-    # PASO 3: Convertir JSON a DataFrame
     print("--- Convirtiendo JSON a CSV...")
     df = json_a_csv(datos_json)
-    # PASO 4: Guardar CSV
     timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
     numero = datos_json.get('numero_factura', 'factura')
     numero = re.sub(r'[^\w\-]', '_', str(numero))
     csv_filename = f"{numero}_{timestamp}.csv"
     df.to_csv(csv_filename, index=False, encoding='utf-8-sig')
-    # PASO 5: Subir a Drive si está seleccionado
-    mensaje_drive = ""
-    if guardar_en_drive:
-        print("--- Subiendo a Google Drive...")
-        mensaje_drive = subir_a_drive(csv_filename)
-    # PASO 6: Crear resumen técnico
     resumen_tecnico = f"""## Factura Procesada Exitosamente
 **Modelo utilizado:** {mensaje}
@@ -431,98 +698,104 @@ def procesar_factura(pdf_file, guardar_en_drive):
 """
     print(f"--- CSV guardado: {csv_filename}")
-    return texto_preview, df, csv_filename, resumen_tecnico, resumen_util, mensaje_drive
 # ============= INTERFAZ GRADIO =============
-with gr.Blocks(title="Extractor IA de Facturas") as demo:
-    # Título principal
     gr.Markdown("""
-    # Extractor Inteligente de Facturas
-    ### Análisis automático de facturas PDF con Inteligencia Artificial
     """)
     gr.Markdown("---")
     with gr.Row():
-        # COLUMNA IZQUIERDA - INPUT
         with gr.Column(scale=1):
-            gr.Markdown("### Cargar Documento")
             gr.Markdown("")
             pdf_input = gr.File(
-                label="Seleccionar factura PDF",
                 file_types=[".pdf"],
                 type="filepath"
             )
             gr.Markdown("")
             gr.Markdown("---")
             gr.Markdown("")
-            # Sección de Google Drive
-            gr.Markdown("### Google Drive")
-            with gr.Row():
-                btn_conectar_drive = gr.Button(
-                    "Conectar con Google Drive",
-                    size="sm",
-                    variant="secondary"
-                )
-                btn_cerrar_sesion = gr.Button(
-                    "Cerrar Sesión",
-                    size="sm"
-                )
             gr.Markdown("")
-            drive_status_auth = gr.Textbox(
-                label="Estado de conexión",
-                value="No conectado",
-                interactive=False,
-                lines=2
-            )
             gr.Markdown("")
-            # Checkbox para Google Drive
-            drive_checkbox = gr.Checkbox(
-                label="Guardar en Google Drive",
-                value=False,
-                info="Primero debes conectar tu cuenta"
             )
-            gr.Markdown("")
-            gr.Markdown("---")
             gr.Markdown("")
-            btn = gr.Button(
-                "Procesar Factura",
-                variant="primary",
                 size="lg"
             )
-            gr.Markdown("")
-            gr.Markdown("---")
             gr.Markdown("")
-            csv_output = gr.File(label="Descargar archivo CSV generado")
-            gr.Markdown("")
-            # Estado de subida a Drive
-            drive_upload_status = gr.Textbox(
-                label="Estado de subida a Drive",
                 interactive=False,
-                lines=3
             )
-        # COLUMNA DERECHA - RESULTADOS
         with gr.Column(scale=2):
             gr.Markdown("### Resultados del Análisis")
             gr.Markdown("")
-            # Información útil destacada
             gr.Markdown("#### Información Útil para Administrativos")
             info_util = gr.Markdown(
                 value="*Aquí aparecerá información relevante una vez procesada la factura*"
@@ -532,7 +805,6 @@ with gr.Blocks(title="Extractor IA de Facturas") as demo:
             gr.Markdown("---")
             gr.Markdown("")
-            # Tabs para información detallada
             with gr.Tabs():
                 with gr.Tab("Vista Previa CSV"):
                     gr.Markdown("")
@@ -558,41 +830,29 @@ with gr.Blocks(title="Extractor IA de Facturas") as demo:
     gr.Markdown("---")
     gr.Markdown("")
-    # Footer con información
     gr.Markdown("""
-    **Sistema de extracción automática de datos mediante modelos de lenguaje**
-    *Configuración requerida:*
-    - *HF_TOKEN en Settings → Secrets (obligatorio)*
-    - *credentials.json de Google Cloud para usar Drive (opcional)*
-    **Pasos para configurar Google Drive:**
-    1. Ve a [Google Cloud Console](https://console.cloud.google.com/)
-    2. Crea un proyecto y activa Google Drive API
-    3. Crea credenciales OAuth 2.0 (Tipo: Aplicación de escritorio)
-    4. Descarga el archivo credentials.json
-    5. Súbelo al directorio raíz de tu aplicación
-    6. Haz clic en "Conectar con Google Drive"
     """)
     # Conectar botones
-    btn_conectar_drive.click(
-        fn=autenticar_drive,
-        inputs=[],
-        outputs=[drive_status_auth, drive_checkbox]
-    )
-    btn_cerrar_sesion.click(
-        fn=cerrar_sesion_drive,
-        inputs=[],
-        outputs=[drive_status_auth, drive_checkbox]
     )
-    # Conectar botón principal
-    btn.click(
-        fn=procesar_factura,
-        inputs=[pdf_input, drive_checkbox],
-        outputs=[texto_extraido, tabla_preview, csv_output, resumen_tecnico, info_util, drive_upload_status]
     )
 if __name__ == "__main__":

 import re
 from datetime import datetime
 from huggingface_hub import InferenceClient
+from reportlab.lib.pagesizes import letter, A4
+from reportlab.lib import colors
+from reportlab.lib.units import inch
+from reportlab.platypus import SimpleDocTemplate, Table, TableStyle, Paragraph, Spacer, Image
+from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
+from reportlab.lib.enums import TA_CENTER, TA_RIGHT, TA_LEFT
 # ============= EXTRAER TEXTO DEL PDF =============
 def extraer_texto_pdf(pdf_file):
     if not token:
         return None, None, "Error: Falta configurar HF_TOKEN en Settings → Secrets"
     texto_limpio = texto[:8000]
     prompt = f"""Eres un experto en análisis de facturas. Lee esta factura y conviértela a JSON.
 TEXTO DE LA FACTURA:
 Responde SOLO con el JSON válido (sin explicaciones, sin markdown):"""
     modelos = [
         "Qwen/Qwen2.5-72B-Instruct",
         "meta-llama/Llama-3.2-3B-Instruct",
     for modelo in modelos:
         try:
+            print(f"\nProbando: {modelo}")
             client = InferenceClient(token=token)
             response = client.chat.completions.create(
                 model=modelo,
                 messages=[
                 temperature=0.1
             )
             resultado = response.choices[0].message.content
             resultado = resultado.strip()
             resultado = re.sub(r'```json\s*', '', resultado)
             resultado = re.sub(r'```\s*', '', resultado)
             resultado = resultado.strip()
             match = re.search(r'\{.*\}', resultado, re.DOTALL)
             if match:
                 json_str = match.group(0)
                 try:
                     datos_json = json.loads(json_str)
+                    print(f"JSON válido extraído con {modelo}")
                     resumen_util = generar_resumen_util(texto_limpio, modelo, client)
                     return datos_json, resumen_util, f"Procesado con {modelo}"
                 except json.JSONDecodeError as e:
+                    print(f"JSON inválido: {str(e)[:50]}")
                     continue
         except Exception as e:
+            print(f"{modelo} falló: {str(e)[:100]}")
             continue
     return None, None, "Ningún modelo LLM pudo extraer el JSON. Verifica tu HF_TOKEN."
     filas = []
     filas.append({'Campo': '=== INFORMACIÓN GENERAL ===', 'Valor': ''})
     filas.append({'Campo': 'Número de Factura', 'Valor': datos_json.get('numero_factura', 'N/A')})
     filas.append({'Campo': 'Fecha', 'Valor': datos_json.get('fecha', 'N/A')})
     if 'emisor' in datos_json:
         filas.append({'Campo': '', 'Valor': ''})
         filas.append({'Campo': '=== EMISOR ===', 'Valor': ''})
         else:
             filas.append({'Campo': 'Nombre', 'Valor': str(emisor)})
     if 'cliente' in datos_json:
         filas.append({'Campo': '', 'Valor': ''})
         filas.append({'Campo': '=== CLIENTE ===', 'Valor': ''})
         else:
             filas.append({'Campo': 'Nombre', 'Valor': str(cliente)})
     productos = datos_json.get('productos', datos_json.get('conceptos', datos_json.get('items', [])))
     if productos and len(productos) > 0:
         filas.append({'Campo': '', 'Valor': ''})
             filas.append({'Campo': '  Total', 'Valor': f"{prod.get('total', 0)}€"})
             filas.append({'Campo': '', 'Valor': ''})
     totales = datos_json.get('totales', {})
     if totales or 'base_imponible' in datos_json or 'total' in datos_json:
         filas.append({'Campo': '', 'Valor': ''})
         filas.append({'Campo': '=== TOTALES ===', 'Valor': ''})
         base = totales.get('base_imponible', datos_json.get('base_imponible', 0))
         iva = totales.get('iva', datos_json.get('iva', 0))
         porcentaje_iva = totales.get('porcentaje_iva', datos_json.get('porcentaje_iva', 0))
     return pd.DataFrame(filas)
+# ============= GENERAR PDF DESDE CSV - TEMPLATE CLÁSICO =============
+def generar_pdf_clasico(csv_file, datos_json):
+    """Template clásico - Estilo tradicional corporativo"""
+    timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
+    pdf_filename = f"factura_clasica_{timestamp}.pdf"
+    doc = SimpleDocTemplate(pdf_filename, pagesize=A4)
+    story = []
+    styles = getSampleStyleSheet()
+    # Estilos personalizados
+    titulo_style = ParagraphStyle(
+        'CustomTitle',
+        parent=styles['Heading1'],
+        fontSize=24,
+        textColor=colors.HexColor('#1a1a1a'),
+        spaceAfter=30,
+        alignment=TA_CENTER
+    )
+    # Título
+    story.append(Paragraph("FACTURA", titulo_style))
+    story.append(Spacer(1, 0.3*inch))
+    # Información básica
+    info_data = [
+        ['Número de Factura:', datos_json.get('numero_factura', 'N/A')],
+        ['Fecha:', datos_json.get('fecha', 'N/A')]
+    ]
+    info_table = Table(info_data, colWidths=[2*inch, 4*inch])
+    info_table.setStyle(TableStyle([
+        ('FONTNAME', (0, 0), (-1, -1), 'Helvetica'),
+        ('FONTSIZE', (0, 0), (-1, -1), 11),
+        ('TEXTCOLOR', (0, 0), (0, -1), colors.HexColor('#666666')),
+        ('ALIGN', (0, 0), (0, -1), 'RIGHT'),
+        ('ALIGN', (1, 0), (1, -1), 'LEFT'),
+        ('BOTTOMPADDING', (0, 0), (-1, -1), 12),
+    ]))
+    story.append(info_table)
+    story.append(Spacer(1, 0.3*inch))
+    # Emisor y Cliente
+    emisor = datos_json.get('emisor', {})
+    cliente = datos_json.get('cliente', {})
+    partes_data = [
+        ['EMISOR', 'CLIENTE'],
+        [
+            emisor.get('nombre', 'N/A') if isinstance(emisor, dict) else str(emisor),
+            cliente.get('nombre', 'N/A') if isinstance(cliente, dict) else str(cliente)
+        ],
+        [
+            emisor.get('nif', '') if isinstance(emisor, dict) else '',
+            cliente.get('nif', '') if isinstance(cliente, dict) else ''
+        ]
+    ]
+    partes_table = Table(partes_data, colWidths=[3*inch, 3*inch])
+    partes_table.setStyle(TableStyle([
+        ('FONTNAME', (0, 0), (-1, 0), 'Helvetica-Bold'),
+        ('FONTSIZE', (0, 0), (-1, 0), 12),
+        ('BACKGROUND', (0, 0), (-1, 0), colors.HexColor('#e0e0e0')),
+        ('TEXTCOLOR', (0, 0), (-1, 0), colors.HexColor('#1a1a1a')),
+        ('ALIGN', (0, 0), (-1, -1), 'LEFT'),
+        ('VALIGN', (0, 0), (-1, -1), 'TOP'),
+        ('FONTNAME', (0, 1), (-1, -1), 'Helvetica'),
+        ('FONTSIZE', (0, 1), (-1, -1), 10),
+        ('GRID', (0, 0), (-1, -1), 1, colors.HexColor('#cccccc')),
+        ('TOPPADDING', (0, 0), (-1, -1), 10),
+        ('BOTTOMPADDING', (0, 0), (-1, -1), 10),
+        ('LEFTPADDING', (0, 0), (-1, -1), 10),
+    ]))
+    story.append(partes_table)
+    story.append(Spacer(1, 0.4*inch))
+    # Productos
+    productos = datos_json.get('productos', datos_json.get('conceptos', []))
+    if productos:
+        productos_data = [['Descripción', 'Cantidad', 'Precio Unit.', 'Total']]
+        for prod in productos:
+            productos_data.append([
+                str(prod.get('descripcion', '')),
+                str(prod.get('cantidad', '')),
+                f"{prod.get('precio_unitario', 0):.2f} €",
+                f"{prod.get('total', 0):.2f} €"
+            ])
+        productos_table = Table(productos_data, colWidths=[3*inch, 1*inch, 1.5*inch, 1.5*inch])
+        productos_table.setStyle(TableStyle([
+            ('FONTNAME', (0, 0), (-1, 0), 'Helvetica-Bold'),
+            ('FONTSIZE', (0, 0), (-1, 0), 11),
+            ('BACKGROUND', (0, 0), (-1, 0), colors.HexColor('#4a4a4a')),
+            ('TEXTCOLOR', (0, 0), (-1, 0), colors.white),
+            ('ALIGN', (0, 0), (0, -1), 'LEFT'),
+            ('ALIGN', (1, 0), (-1, -1), 'RIGHT'),
+            ('FONTNAME', (0, 1), (-1, -1), 'Helvetica'),
+            ('FONTSIZE', (0, 1), (-1, -1), 10),
+            ('GRID', (0, 0), (-1, -1), 1, colors.HexColor('#cccccc')),
+            ('ROWBACKGROUNDS', (0, 1), (-1, -1), [colors.white, colors.HexColor('#f5f5f5')]),
+            ('TOPPADDING', (0, 0), (-1, -1), 8),
+            ('BOTTOMPADDING', (0, 0), (-1, -1), 8),
+        ]))
+        story.append(productos_table)
+        story.append(Spacer(1, 0.3*inch))
+    # Totales
+    totales = datos_json.get('totales', {})
+    base = totales.get('base_imponible', datos_json.get('base_imponible', 0))
+    iva = totales.get('iva', datos_json.get('iva', 0))
+    porcentaje_iva = totales.get('porcentaje_iva', datos_json.get('porcentaje_iva', 0))
+    total = totales.get('total', datos_json.get('total', 0))
+    totales_data = [
+        ['Base Imponible:', f"{base:.2f} €"],
+        [f'IVA ({porcentaje_iva}%):', f"{iva:.2f} €"],
+        ['TOTAL:', f"{total:.2f} €"]
+    ]
+    totales_table = Table(totales_data, colWidths=[4.5*inch, 1.5*inch])
+    totales_table.setStyle(TableStyle([
+        ('FONTNAME', (0, 0), (-1, 1), 'Helvetica'),
+        ('FONTNAME', (0, 2), (-1, 2), 'Helvetica-Bold'),
+        ('FONTSIZE', (0, 0), (-1, 1), 11),
+        ('FONTSIZE', (0, 2), (-1, 2), 14),
+        ('ALIGN', (0, 0), (0, -1), 'RIGHT'),
+        ('ALIGN', (1, 0), (1, -1), 'RIGHT'),
+        ('BACKGROUND', (0, 2), (-1, 2), colors.HexColor('#4a4a4a')),
+        ('TEXTCOLOR', (0, 2), (-1, 2), colors.white),
+        ('TOPPADDING', (0, 0), (-1, -1), 8),
+        ('BOTTOMPADDING', (0, 0), (-1, -1), 8),
+        ('RIGHTPADDING', (0, 0), (-1, -1), 10),
+    ]))
+    story.append(totales_table)
+    doc.build(story)
+    return pdf_filename
+# ============= GENERAR PDF - TEMPLATE MODERNO =============
+def generar_pdf_moderno(csv_file, datos_json):
+    """Template moderno - Estilo minimalista y limpio"""
+    timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
+    pdf_filename = f"factura_moderna_{timestamp}.pdf"
+    doc = SimpleDocTemplate(pdf_filename, pagesize=A4)
+    story = []
+    styles = getSampleStyleSheet()
+    # Título moderno
+    titulo_style = ParagraphStyle(
+        'ModernTitle',
+        parent=styles['Heading1'],
+        fontSize=32,
+        textColor=colors.HexColor('#2196F3'),
+        spaceAfter=10,
+        alignment=TA_LEFT,
+        fontName='Helvetica-Bold'
+    )
+    story.append(Paragraph("FACTURA", titulo_style))
+    # Subtítulo
+    subtitulo = f"No. {datos_json.get('numero_factura', 'N/A')} | {datos_json.get('fecha', 'N/A')}"
+    subtitulo_style = ParagraphStyle(
+        'Subtitle',
+        parent=styles['Normal'],
+        fontSize=11,
+        textColor=colors.HexColor('#757575'),
+        spaceAfter=30
+    )
+    story.append(Paragraph(subtitulo, subtitulo_style))
+    story.append(Spacer(1, 0.3*inch))
+    # Emisor y Cliente en cajas
+    emisor = datos_json.get('emisor', {})
+    cliente = datos_json.get('cliente', {})
+    info_boxes = [
+        [
+            Paragraph(f"<b>DE:</b><br/>{emisor.get('nombre', 'N/A') if isinstance(emisor, dict) else str(emisor)}<br/>{emisor.get('nif', '') if isinstance(emisor, dict) else ''}", styles['Normal']),
+            Paragraph(f"<b>PARA:</b><br/>{cliente.get('nombre', 'N/A') if isinstance(cliente, dict) else str(cliente)}<br/>{cliente.get('nif', '') if isinstance(cliente, dict) else ''}", styles['Normal'])
+        ]
+    ]
+    boxes_table = Table(info_boxes, colWidths=[3*inch, 3*inch])
+    boxes_table.setStyle(TableStyle([
+        ('BACKGROUND', (0, 0), (0, 0), colors.HexColor('#E3F2FD')),
+        ('BACKGROUND', (1, 0), (1, 0), colors.HexColor('#FFF3E0')),
+        ('VALIGN', (0, 0), (-1, -1), 'TOP'),
+        ('TOPPADDING', (0, 0), (-1, -1), 15),
+        ('BOTTOMPADDING', (0, 0), (-1, -1), 15),
+        ('LEFTPADDING', (0, 0), (-1, -1), 15),
+        ('RIGHTPADDING', (0, 0), (-1, -1), 15),
+    ]))
+    story.append(boxes_table)
+    story.append(Spacer(1, 0.4*inch))
+    # Productos con estilo moderno
+    productos = datos_json.get('productos', datos_json.get('conceptos', []))
+    if productos:
+        productos_data = [['DESCRIPCIÓN', 'CANT.', 'PRECIO', 'TOTAL']]
+        for prod in productos:
+            productos_data.append([
+                str(prod.get('descripcion', '')),
+                str(prod.get('cantidad', '')),
+                f"{prod.get('precio_unitario', 0):.2f} €",
+                f"{prod.get('total', 0):.2f} €"
+            ])
+        productos_table = Table(productos_data, colWidths=[3*inch, 0.8*inch, 1.5*inch, 1.7*inch])
+        productos_table.setStyle(TableStyle([
+            ('FONTNAME', (0, 0), (-1, 0), 'Helvetica-Bold'),
+            ('FONTSIZE', (0, 0), (-1, 0), 9),
+            ('TEXTCOLOR', (0, 0), (-1, 0), colors.HexColor('#757575')),
+            ('ALIGN', (0, 0), (0, -1), 'LEFT'),
+            ('ALIGN', (1, 0), (-1, -1), 'RIGHT'),
+            ('FONTNAME', (0, 1), (-1, -1), 'Helvetica'),
+            ('FONTSIZE', (0, 1), (-1, -1), 10),
+            ('LINEBELOW', (0, 0), (-1, 0), 2, colors.HexColor('#2196F3')),
+            ('LINEBELOW', (0, 1), (-1, -2), 0.5, colors.HexColor('#e0e0e0')),
+            ('TOPPADDING', (0, 0), (-1, -1), 10),
+            ('BOTTOMPADDING', (0, 0), (-1, -1), 10),
+        ]))
+        story.append(productos_table)
+        story.append(Spacer(1, 0.4*inch))
+    # Totales modernos
+    totales = datos_json.get('totales', {})
+    base = totales.get('base_imponible', datos_json.get('base_imponible', 0))
+    iva = totales.get('iva', datos_json.get('iva', 0))
+    porcentaje_iva = totales.get('porcentaje_iva', datos_json.get('porcentaje_iva', 0))
+    total = totales.get('total', datos_json.get('total', 0))
+    totales_data = [
+        ['Subtotal', f"{base:.2f} €"],
+        [f'IVA {porcentaje_iva}%', f"{iva:.2f} €"],
+        ['', ''],
+        ['TOTAL', f"{total:.2f} €"]
+    ]
+    totales_table = Table(totales_data, colWidths=[5*inch, 2*inch])
+    totales_table.setStyle(TableStyle([
+        ('FONTNAME', (0, 0), (-1, 2), 'Helvetica'),
+        ('FONTNAME', (0, 3), (-1, 3), 'Helvetica-Bold'),
+        ('FONTSIZE', (0, 0), (-1, 2), 11),
+        ('FONTSIZE', (0, 3), (-1, 3), 16),
+        ('ALIGN', (0, 0), (-1, -1), 'RIGHT'),
+        ('TEXTCOLOR', (0, 3), (-1, 3), colors.HexColor('#2196F3')),
+        ('LINEABOVE', (0, 3), (-1, 3), 2, colors.HexColor('#2196F3')),
+        ('TOPPADDING', (0, 0), (-1, -1), 8),
+        ('BOTTOMPADDING', (0, 0), (-1, -1), 8),
+    ]))
+    story.append(totales_table)
+    doc.build(story)
+    return pdf_filename
+# ============= GENERAR PDF - TEMPLATE ELEGANTE =============
+def generar_pdf_elegante(csv_file, datos_json):
+    """Template elegante - Estilo premium con detalles"""
+    timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
+    pdf_filename = f"factura_elegante_{timestamp}.pdf"
+    doc = SimpleDocTemplate(pdf_filename, pagesize=A4)
+    story = []
+    styles = getSampleStyleSheet()
+    # Encabezado elegante
+    header_style = ParagraphStyle(
+        'ElegantHeader',
+        parent=styles['Heading1'],
+        fontSize=28,
+        textColor=colors.HexColor('#1a237e'),
+        spaceAfter=5,
+        alignment=TA_CENTER,
+        fontName='Helvetica-Bold'
+    )
+    story.append(Paragraph("F A C T U R A", header_style))
+    # Línea decorativa
+    line_data = [['']]
+    line_table = Table(line_data, colWidths=[6.5*inch])
+    line_table.setStyle(TableStyle([
+        ('LINEBELOW', (0, 0), (-1, 0), 3, colors.HexColor('#7986cb')),
+        ('BOTTOMPADDING', (0, 0), (-1, -1), 20),
+    ]))
+    story.append(line_table)
+    # Información de factura
+    info_data = [[
+        f"No. {datos_json.get('numero_factura', 'N/A')}",
+        f"Fecha: {datos_json.get('fecha', 'N/A')}"
+    ]]
+    info_table = Table(info_data, colWidths=[3.25*inch, 3.25*inch])
+    info_table.setStyle(TableStyle([
+        ('FONTNAME', (0, 0), (-1, -1), 'Helvetica'),
+        ('FONTSIZE', (0, 0), (-1, -1), 10),
+        ('TEXTCOLOR', (0, 0), (-1, -1), colors.HexColor('#424242')),
+        ('ALIGN', (0, 0), (0, -1), 'LEFT'),
+        ('ALIGN', (1, 0), (1, -1), 'RIGHT'),
+        ('BOTTOMPADDING', (0, 0), (-1, -1), 15),
+    ]))
+    story.append(info_table)
+    story.append(Spacer(1, 0.2*inch))
+    # Emisor y Cliente elegante
+    emisor = datos_json.get('emisor', {})
+    cliente = datos_json.get('cliente', {})
+    partes_data = [
+        ['Emisor', 'Cliente'],
+        [
+            f"{emisor.get('nombre', 'N/A') if isinstance(emisor, dict) else str(emisor)}\n{emisor.get('nif', '') if isinstance(emisor, dict) else ''}\n{emisor.get('direccion', '') if isinstance(emisor, dict) else ''}",
+            f"{cliente.get('nombre', 'N/A') if isinstance(cliente, dict) else str(cliente)}\n{cliente.get('nif', '') if isinstance(cliente, dict) else ''}"
+        ]
+    ]
+    partes_table = Table(partes_data, colWidths=[3.25*inch, 3.25*inch])
+    partes_table.setStyle(TableStyle([
+        ('FONTNAME', (0, 0), (-1, 0), 'Helvetica-Bold'),
+        ('FONTSIZE', (0, 0), (-1, 0), 11),
+        ('TEXTCOLOR', (0, 0), (-1, 0), colors.HexColor('#1a237e')),
+        ('BACKGROUND', (0, 0), (-1, 0), colors.HexColor('#e8eaf6')),
+        ('FONTNAME', (0, 1), (-1, -1), 'Helvetica'),
+        ('FONTSIZE', (0, 1), (-1, -1), 9),
+        ('VALIGN', (0, 0), (-1, -1), 'TOP'),
+        ('ALIGN', (0, 0), (-1, -1), 'LEFT'),
+        ('BOX', (0, 0), (-1, -1), 1.5, colors.HexColor('#7986cb')),
+        ('INNERGRID', (0, 0), (-1, -1), 0.5, colors.HexColor('#c5cae9')),
+        ('TOPPADDING', (0, 0), (-1, -1), 12),
+        ('BOTTOMPADDING', (0, 0), (-1, -1), 12),
+        ('LEFTPADDING', (0, 0), (-1, -1), 12),
+    ]))
+    story.append(partes_table)
+    story.append(Spacer(1, 0.3*inch))
+    # Productos elegantes
+    productos = datos_json.get('productos', datos_json.get('conceptos', []))
+    if productos:
+        productos_data = [['Descripción', 'Cant.', 'Precio Unitario', 'Total']]
+        for prod in productos:
+            productos_data.append([
+                str(prod.get('descripcion', '')),
+                str(prod.get('cantidad', '')),
+                f"{prod.get('precio_unitario', 0):.2f} €",
+                f"{prod.get('total', 0):.2f} €"
+            ])
+        productos_table = Table(productos_data, colWidths=[2.8*inch, 0.8*inch, 1.4*inch, 1.5*inch])
+        productos_table.setStyle(TableStyle([
+            ('FONTNAME', (0, 0), (-1, 0), 'Helvetica-Bold'),
+            ('FONTSIZE', (0, 0), (-1, 0), 10),
+            ('TEXTCOLOR', (0, 0), (-1, 0), colors.white),
+            ('BACKGROUND', (0, 0), (-1, 0), colors.HexColor('#5c6bc0')),
+            ('ALIGN', (0, 0), (0, -1), 'LEFT'),
+            ('ALIGN', (1, 0), (-1, -1), 'RIGHT'),
+            ('FONTNAME', (0, 1), (-1, -1), 'Helvetica'),
+            ('FONTSIZE', (0, 1), (-1, -1), 9),
+            ('BOX', (0, 0), (-1, -1), 1, colors.HexColor('#7986cb')),
+            ('LINEBELOW', (0, 0), (-1, 0), 1.5, colors.HexColor('#3f51b5')),
+            ('ROWBACKGROUNDS', (0, 1), (-1, -1), [colors.white, colors.HexColor('#fafafa')]),
+            ('TOPPADDING', (0, 0), (-1, -1), 10),
+            ('BOTTOMPADDING', (0, 0), (-1, -1), 10),
+        ]))
+        story.append(productos_table)
+        story.append(Spacer(1, 0.3*inch))
+    # Totales elegantes
+    totales = datos_json.get('totales', {})
+    base = totales.get('base_imponible', datos_json.get('base_imponible', 0))
+    iva = totales.get('iva', datos_json.get('iva', 0))
+    porcentaje_iva = totales.get('porcentaje_iva', datos_json.get('porcentaje_iva', 0))
+    total = totales.get('total', datos_json.get('total', 0))
+    totales_data = [
+        ['', 'Base Imponible:', f"{base:.2f} €"],
+        ['', f'IVA ({porcentaje_iva}%):', f"{iva:.2f} €"],
+        ['', '', ''],
+        ['', 'TOTAL A PAGAR:', f"{total:.2f} €"]
+    ]
+    totales_table = Table(totales_data, colWidths=[2.5*inch, 2.5*inch, 1.5*inch])
+    totales_table.setStyle(TableStyle([
+        ('FONTNAME', (1, 0), (-1, 2), 'Helvetica'),
+        ('FONTNAME', (1, 3), (-1, 3), 'Helvetica-Bold'),
+        ('FONTSIZE', (1, 0), (-1, 2), 10),
+        ('FONTSIZE', (1, 3), (-1, 3), 14),
+        ('ALIGN', (1, 0), (1, -1), 'RIGHT'),
+        ('ALIGN', (2, 0), (2, -1), 'RIGHT'),
+        ('BACKGROUND', (1, 3), (-1, 3), colors.HexColor('#1a237e')),
+        ('TEXTCOLOR', (1, 3), (-1, 3), colors.white),
+        ('TOPPADDING', (0, 0), (-1, -1), 8),
+        ('BOTTOMPADDING', (0, 0), (-1, -1), 8),
+        ('RIGHTPADDING', (0, 0), (-1, -1), 12),
+        ('LEFTPADDING', (1, 3), (-1, 3), 12),
+    ]))
+    story.append(totales_table)
+    doc.build(story)
+    return pdf_filename
 # ============= FUNCIÓN PRINCIPAL =============
+def procesar_factura(pdf_file):
     if pdf_file is None:
+        return "", None, None, "", "", None, None
     print("\n--- Extrayendo texto del PDF...")
     texto = extraer_texto_pdf(pdf_file)
     if texto.startswith("Error"):
+        return "", None, None, "", f"Error: {texto}", None, None
     texto_preview = f"{texto[:1500]}..." if len(texto) > 1500 else texto
     print("--- El LLM está analizando la factura y creando el JSON...")
     datos_json, resumen_util, mensaje = analizar_y_convertir_json(texto)
     if not datos_json:
+        return texto_preview, None, None, "", mensaje, None, None
     print("--- Convirtiendo JSON a CSV...")
     df = json_a_csv(datos_json)
     timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
     numero = datos_json.get('numero_factura', 'factura')
     numero = re.sub(r'[^\w\-]', '_', str(numero))
     csv_filename = f"{numero}_{timestamp}.csv"
     df.to_csv(csv_filename, index=False, encoding='utf-8-sig')
     resumen_tecnico = f"""## Factura Procesada Exitosamente
 **Modelo utilizado:** {mensaje}
 """
     print(f"--- CSV guardado: {csv_filename}")
+    return texto_preview, df, csv_filename, resumen_tecnico, resumen_util, datos_json, csv_filename
+# ============= GENERAR PDF CON TEMPLATE SELECCIONADO =============
+def generar_pdf_con_template(template, csv_file, datos_json):
+    if not datos_json:
+        return None, "Error: Primero debes procesar una factura"
+    try:
+        if template == "Clásico":
+            pdf_file = generar_pdf_clasico(csv_file, datos_json)
+        elif template == "Moderno":
+            pdf_file = generar_pdf_moderno(csv_file, datos_json)
+        elif template == "Elegante":
+            pdf_file = generar_pdf_elegante(csv_file, datos_json)
+        else:
+            return None, "Template no válido"
+        return pdf_file, f"PDF generado exitosamente: {pdf_file}"
+    except Exception as e:
+        return None, f"Error al generar PDF: {str(e)}"
 # ============= INTERFAZ GRADIO =============
+with gr.Blocks(title="Extractor y Generador de Facturas") as demo:
+    datos_json_state = gr.State()
+    csv_file_state = gr.State()
     gr.Markdown("""
+    # Extractor y Generador de Facturas
+    ### Extrae datos de facturas PDF y genera nuevas facturas con diferentes estilos
     """)
     gr.Markdown("---")
     with gr.Row():
+        # COLUMNA IZQUIERDA
         with gr.Column(scale=1):
+            gr.Markdown("### Paso 1: Extraer Datos")
             gr.Markdown("")
             pdf_input = gr.File(
+                label="Subir factura PDF para extraer datos",
                 file_types=[".pdf"],
                 type="filepath"
             )
+            gr.Markdown("")
+            btn_extraer = gr.Button(
+                "Extraer Datos de la Factura",
+                variant="primary",
+                size="lg"
+            )
             gr.Markdown("")
             gr.Markdown("---")
             gr.Markdown("")
+            csv_output = gr.File(label="Descargar CSV con los datos extraídos")
+            gr.Markdown("")
+            gr.Markdown("---")
             gr.Markdown("")
+            # Generador de PDF
+            gr.Markdown("### Paso 2: Generar PDF")
             gr.Markdown("")
+            template_selector = gr.Radio(
+                choices=["Clásico", "Moderno", "Elegante"],
+                value="Moderno",
+                label="Seleccionar estilo de factura",
+                info="Elige el diseño que prefieras"
             )
             gr.Markdown("")
+            btn_generar_pdf = gr.Button(
+                "Generar Factura PDF",
+                variant="secondary",
                 size="lg"
             )
             gr.Markdown("")
+            pdf_output = gr.File(label="Descargar factura PDF generada")
+            pdf_status = gr.Textbox(
+                label="Estado",
                 interactive=False,
+                lines=2
             )
+        # COLUMNA DERECHA
         with gr.Column(scale=2):
             gr.Markdown("### Resultados del Análisis")
             gr.Markdown("")
             gr.Markdown("#### Información Útil para Administrativos")
             info_util = gr.Markdown(
                 value="*Aquí aparecerá información relevante una vez procesada la factura*"
             gr.Markdown("---")
             gr.Markdown("")
             with gr.Tabs():
                 with gr.Tab("Vista Previa CSV"):
                     gr.Markdown("")
     gr.Markdown("---")
     gr.Markdown("")
     gr.Markdown("""
+    **Sistema de extracción y generación de facturas con IA**
+    **Características:**
+    - Extrae datos automáticamente de facturas PDF
+    - Genera CSV estructurado
+    - Crea facturas PDF profesionales con 3 templates diferentes
+    - Análisis inteligente con modelos de lenguaje
+    *Configuración requerida: HF_TOKEN en Settings → Secrets*
     """)
     # Conectar botones
+    btn_extraer.click(
+        fn=procesar_factura,
+        inputs=[pdf_input],
+        outputs=[texto_extraido, tabla_preview, csv_output, resumen_tecnico, info_util, datos_json_state, csv_file_state]
     )
+    btn_generar_pdf.click(
+        fn=generar_pdf_con_template,
+        inputs=[template_selector, csv_file_state, datos_json_state],
+        outputs=[pdf_output, pdf_status]
     )
 if __name__ == "__main__":