Spaces:

angelsg213
/

TESTING22

Sleeping

App Files Files Community

angelsg213 commited on 11 days ago

Commit

4bbc0ce

verified ·

1 Parent(s): 9e4ee14

Update app.py

Browse files

Files changed (1) hide show

app.py +13 -78

app.py CHANGED Viewed

@@ -1,52 +1,21 @@
-import gradio as gr
-import PyPDF2
-import requests
-import json
-import re
-import os  # Necesario para leer los secretos
-# ==========================================
-# CONFIGURACIÓN SEGURA (SECRETS)
-# ==========================================
-# Busca una variable llamada "HF_TOKEN" en la configuración del Space
-HF_TOKEN = os.getenv("aa")
-# Verificación de seguridad
-if not HF_TOKEN:
-    print("⚠️ ADVERTENCIA: No se encontró el HF_TOKEN. Configúralo en Settings -> Secrets.")
 # ==========================================
-# 1. EXTRACCIÓN DE TEXTO (PDF -> String)
-# ==========================================
-def extraer_texto_pdf(pdf_file):
-    if not pdf_file:
-        return "No se subió ningún archivo."
-    try:
-        pdf_reader = PyPDF2.PdfReader(pdf_file)
-        texto_completo = ""
-        for pagina in pdf_reader.pages:
-            texto_completo += pagina.extract_text() + "\n"
-        return texto_completo
-    except Exception as e:
-        return f"Error al leer PDF: {str(e)}"
-# ==========================================
-# 2. CONSULTA AL LLM (API Hugging Face)
 # ==========================================
 def consultar_llm(texto_factura):
-    # Si no hay token, devolver error inmediato
-    if not HF_TOKEN:
         return {"error": "Falta configurar HF_TOKEN en Settings -> Secrets"}
-    # Recortar texto para no exceder tokens
     texto_limpio = texto_factura[:6000]
-    # URL del modelo (Usamos Mistral v0.3 que es excelente siguiendo instrucciones)
-    API_URL = "https://router.huggingface.co/models/mistralai/Mistral-7B-Instruct-v0.3"
     headers = {
         "Content-Type": "application/json",
-        "Authorization": f"Bearer {HF_TOKEN}"
     }
     prompt = f"""
@@ -78,8 +47,11 @@ def consultar_llm(texto_factura):
         "inputs": prompt,
         "parameters": {
             "max_new_tokens": 1500,
-            "temperature": 0.1,  # Bajo para precisión
             "return_full_text": False
         }
     }
@@ -91,14 +63,12 @@ def consultar_llm(texto_factura):
         resultado = response.json()
-        # Obtener el texto generado (maneja si devuelve lista o dict)
         texto_generado = ""
         if isinstance(resultado, list) and len(resultado) > 0:
             texto_generado = resultado[0].get('generated_text', '')
         elif isinstance(resultado, dict):
             texto_generado = resultado.get('generated_text', '')
-        # Buscar el bloque JSON dentro de la respuesta (por si el modelo añade texto extra)
         match = re.search(r'\{.*\}', texto_generado, re.DOTALL)
         if match:
             return json.loads(match.group(0))
@@ -106,39 +76,4 @@ def consultar_llm(texto_factura):
             return {"error": "El modelo no generó un JSON válido", "respuesta_cruda": texto_generado}
     except Exception as e:
-        return {"error": f"Error interno: {str(e)}"}
-# ==========================================
-# 3. INTERFAZ GRÁFICA (Gradio)
-# ==========================================
-def procesar_factura(pdf_file):
-    # 1. Extraer
-    texto = extraer_texto_pdf(pdf_file)
-    if texto.startswith("Error"):
-        return texto, {"error": "Fallo al leer PDF"}
-    # 2. Consultar IA
-    datos_json = consultar_llm(texto)
-    # 3. Mostrar bonito
-    return texto, json.dumps(datos_json, indent=4, ensure_ascii=False)
-# Diseño de la interfaz
-with gr.Blocks(title="Extractor Facturas AI") as demo:
-    gr.Markdown("# 🤖 Extractor de Facturas Seguro")
-    gr.Markdown("Este espacio usa tu token de forma privada para extraer datos de facturas PDF.")
-    with gr.Row():
-        with gr.Column(scale=1):
-            input_pdf = gr.File(label="Sube tu Factura (PDF)", file_types=[".pdf"])
-            btn = gr.Button("Extraer Datos", variant="primary")
-        with gr.Column(scale=2):
-            output_json = gr.Code(label="Datos Extraídos (JSON)", language="json")
-            with gr.Accordion("Ver texto crudo extraído del PDF", open=False):
-                output_text = gr.Textbox(label="Texto Original", lines=10)
-    btn.click(fn=procesar_factura, inputs=input_pdf, outputs=[output_text, output_json])
-if __name__ == "__main__":
-    demo.launch()

 # ==========================================
+# 2. CONSULTA AL LLM (CORREGIDO)
 # ==========================================
 def consultar_llm(texto_factura):
+    # CORRECCIÓN 1: Asegúrate de que esto coincida con el nombre en tus Secrets
+    token = os.getenv("aa")
+    if not token:
         return {"error": "Falta configurar HF_TOKEN en Settings -> Secrets"}
     texto_limpio = texto_factura[:6000]
+    # CORRECCIÓN 2: Usamos la URL estándar y la versión v0.2 que es más estable
+    API_URL = "https://api-inference.huggingface.co/models/mistralai/Mistral-7B-Instruct-v0.2"
     headers = {
         "Content-Type": "application/json",
+        "Authorization": f"Bearer {token}"
     }
     prompt = f"""
         "inputs": prompt,
         "parameters": {
             "max_new_tokens": 1500,
+            "temperature": 0.1,
             "return_full_text": False
+        },
+        "options": {
+            "wait_for_model": True  # Esperar si el modelo está cargando
         }
     }
         resultado = response.json()
         texto_generado = ""
         if isinstance(resultado, list) and len(resultado) > 0:
             texto_generado = resultado[0].get('generated_text', '')
         elif isinstance(resultado, dict):
             texto_generado = resultado.get('generated_text', '')
         match = re.search(r'\{.*\}', texto_generado, re.DOTALL)
         if match:
             return json.loads(match.group(0))
             return {"error": "El modelo no generó un JSON válido", "respuesta_cruda": texto_generado}
     except Exception as e:
+        return {"error": f"Error interno: {str(e)}"}