Lucas Gagneten
commited on
Commit
·
bef8785
1
Parent(s):
546c83b
Descargar Excel
Browse files- batch_processor.py +129 -7
- interface.py +167 -11
- requirements.txt +3 -1
batch_processor.py
CHANGED
|
@@ -5,6 +5,7 @@ Procesamiento por lotes y navegación de resultados
|
|
| 5 |
|
| 6 |
import os
|
| 7 |
from PIL import Image
|
|
|
|
| 8 |
|
| 9 |
|
| 10 |
class BatchProcessor:
|
|
@@ -18,42 +19,103 @@ class BatchProcessor:
|
|
| 18 |
invoice_processor: Instancia de InvoiceProcessor
|
| 19 |
"""
|
| 20 |
self.invoice_processor = invoice_processor
|
|
|
|
| 21 |
|
| 22 |
def process_batch(self, file_list):
|
| 23 |
"""
|
| 24 |
-
Procesa un lote de archivos de facturas.
|
| 25 |
|
| 26 |
Args:
|
| 27 |
file_list: Lista de archivos cargados
|
| 28 |
|
| 29 |
Returns:
|
| 30 |
-
tuple: (results, initial_index, status, first_image, first_table, first_filename)
|
| 31 |
"""
|
| 32 |
if not file_list:
|
| 33 |
-
return [], 0, "Por favor, carga al menos un archivo.", None, [], ""
|
| 34 |
|
| 35 |
results = []
|
| 36 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 37 |
try:
|
| 38 |
image = Image.open(file.name).convert("RGB")
|
| 39 |
filename = os.path.basename(file.name)
|
| 40 |
|
|
|
|
|
|
|
| 41 |
_, annotated_image, table_data, _ = self.invoice_processor.process_invoice(
|
| 42 |
image, filename
|
| 43 |
)
|
| 44 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 45 |
results.append({
|
| 46 |
"filename": filename,
|
| 47 |
"image": annotated_image,
|
| 48 |
-
"table": table_data
|
|
|
|
| 49 |
})
|
|
|
|
|
|
|
|
|
|
| 50 |
except Exception as e:
|
|
|
|
| 51 |
results.append({
|
| 52 |
"filename": os.path.basename(file.name),
|
| 53 |
"image": None,
|
| 54 |
-
"table": [["ERROR FATAL", f"No se pudo cargar o procesar: {e}"]]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 55 |
})
|
| 56 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 57 |
# Preparar resultados iniciales
|
| 58 |
initial_index = 0
|
| 59 |
first_result = results[initial_index]
|
|
@@ -66,8 +128,68 @@ class BatchProcessor:
|
|
| 66 |
status,
|
| 67 |
first_result["image"],
|
| 68 |
first_result["table"],
|
| 69 |
-
first_result["filename"]
|
|
|
|
| 70 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 71 |
|
| 72 |
|
| 73 |
|
|
|
|
| 5 |
|
| 6 |
import os
|
| 7 |
from PIL import Image
|
| 8 |
+
import pandas as pd
|
| 9 |
|
| 10 |
|
| 11 |
class BatchProcessor:
|
|
|
|
| 19 |
invoice_processor: Instancia de InvoiceProcessor
|
| 20 |
"""
|
| 21 |
self.invoice_processor = invoice_processor
|
| 22 |
+
self.master_dataframe = None # DataFrame maestro con todas las facturas
|
| 23 |
|
| 24 |
def process_batch(self, file_list):
|
| 25 |
"""
|
| 26 |
+
Procesa un lote de archivos de facturas y crea el DataFrame maestro.
|
| 27 |
|
| 28 |
Args:
|
| 29 |
file_list: Lista de archivos cargados
|
| 30 |
|
| 31 |
Returns:
|
| 32 |
+
tuple: (results, initial_index, status, first_image, first_table, first_filename, master_df)
|
| 33 |
"""
|
| 34 |
if not file_list:
|
| 35 |
+
return [], 0, "Por favor, carga al menos un archivo.", None, [], "", None
|
| 36 |
|
| 37 |
results = []
|
| 38 |
+
df_data = [] # Lista para construir el DataFrame
|
| 39 |
+
|
| 40 |
+
print("\n" + "=" * 80)
|
| 41 |
+
print("PROCESAMIENTO POR LOTES - CREANDO DATAFRAME MAESTRO")
|
| 42 |
+
print("=" * 80)
|
| 43 |
+
|
| 44 |
+
for idx, file in enumerate(file_list):
|
| 45 |
try:
|
| 46 |
image = Image.open(file.name).convert("RGB")
|
| 47 |
filename = os.path.basename(file.name)
|
| 48 |
|
| 49 |
+
print(f"\n[{idx + 1}/{len(file_list)}] Procesando: {filename}")
|
| 50 |
+
|
| 51 |
_, annotated_image, table_data, _ = self.invoice_processor.process_invoice(
|
| 52 |
image, filename
|
| 53 |
)
|
| 54 |
|
| 55 |
+
# Convertir tabla a diccionario para el DataFrame
|
| 56 |
+
row_dict = {'Nombre_Archivo': filename}
|
| 57 |
+
for etiqueta, valor in table_data:
|
| 58 |
+
row_dict[etiqueta] = valor
|
| 59 |
+
|
| 60 |
+
df_data.append(row_dict)
|
| 61 |
+
|
| 62 |
results.append({
|
| 63 |
"filename": filename,
|
| 64 |
"image": annotated_image,
|
| 65 |
+
"table": table_data,
|
| 66 |
+
"df_index": idx # Índice en el DataFrame
|
| 67 |
})
|
| 68 |
+
|
| 69 |
+
print(f" ✓ Agregado al DataFrame: {filename}")
|
| 70 |
+
|
| 71 |
except Exception as e:
|
| 72 |
+
print(f" ✗ Error procesando {os.path.basename(file.name)}: {e}")
|
| 73 |
results.append({
|
| 74 |
"filename": os.path.basename(file.name),
|
| 75 |
"image": None,
|
| 76 |
+
"table": [["ERROR FATAL", f"No se pudo cargar o procesar: {e}"]],
|
| 77 |
+
"df_index": idx
|
| 78 |
+
})
|
| 79 |
+
# Agregar fila con errores al DataFrame
|
| 80 |
+
df_data.append({
|
| 81 |
+
'Nombre_Archivo': os.path.basename(file.name),
|
| 82 |
+
'ERROR': str(e)
|
| 83 |
})
|
| 84 |
|
| 85 |
+
# Crear DataFrame maestro
|
| 86 |
+
self.master_dataframe = pd.DataFrame(df_data)
|
| 87 |
+
|
| 88 |
+
# Reordenar columnas
|
| 89 |
+
columnas_ordenadas = [
|
| 90 |
+
'Nombre_Archivo',
|
| 91 |
+
'PROVEEDOR_RAZON_SOCIAL',
|
| 92 |
+
'PROVEEDOR_CUIT',
|
| 93 |
+
'COMPROBANTE_NUMERO',
|
| 94 |
+
'FECHA',
|
| 95 |
+
'JURISDICCION_GASTO',
|
| 96 |
+
'TIPO',
|
| 97 |
+
'CONCEPTO_GASTO',
|
| 98 |
+
'ALICUOTA',
|
| 99 |
+
'IVA',
|
| 100 |
+
'NETO',
|
| 101 |
+
'TOTAL'
|
| 102 |
+
]
|
| 103 |
+
|
| 104 |
+
# Filtrar columnas existentes
|
| 105 |
+
columnas_existentes = [col for col in columnas_ordenadas if col in self.master_dataframe.columns]
|
| 106 |
+
# Agregar columnas no previstas
|
| 107 |
+
for col in self.master_dataframe.columns:
|
| 108 |
+
if col not in columnas_existentes:
|
| 109 |
+
columnas_existentes.append(col)
|
| 110 |
+
|
| 111 |
+
self.master_dataframe = self.master_dataframe[columnas_existentes]
|
| 112 |
+
|
| 113 |
+
print("\n" + "=" * 80)
|
| 114 |
+
print("DATAFRAME MAESTRO CREADO")
|
| 115 |
+
print(f"Dimensiones: {self.master_dataframe.shape}")
|
| 116 |
+
print(f"Columnas: {list(self.master_dataframe.columns)}")
|
| 117 |
+
print("=" * 80 + "\n")
|
| 118 |
+
|
| 119 |
# Preparar resultados iniciales
|
| 120 |
initial_index = 0
|
| 121 |
first_result = results[initial_index]
|
|
|
|
| 128 |
status,
|
| 129 |
first_result["image"],
|
| 130 |
first_result["table"],
|
| 131 |
+
first_result["filename"],
|
| 132 |
+
self.master_dataframe
|
| 133 |
)
|
| 134 |
+
|
| 135 |
+
def update_dataframe(self, df, df_index, edited_table):
|
| 136 |
+
"""
|
| 137 |
+
Actualiza el DataFrame maestro con los valores editados.
|
| 138 |
+
IMPORTANTE: Crea una nueva instancia del DataFrame para forzar actualización en Gradio.
|
| 139 |
+
|
| 140 |
+
Args:
|
| 141 |
+
df: DataFrame maestro
|
| 142 |
+
df_index: Índice de la fila a actualizar
|
| 143 |
+
edited_table: Tabla editada [[etiqueta, valor], ...]
|
| 144 |
+
|
| 145 |
+
Returns:
|
| 146 |
+
pd.DataFrame: Nuevo DataFrame actualizado
|
| 147 |
+
"""
|
| 148 |
+
if df is None or df.empty:
|
| 149 |
+
print("⚠️ DataFrame está vacío o es None")
|
| 150 |
+
return df
|
| 151 |
+
|
| 152 |
+
if df_index >= len(df):
|
| 153 |
+
print(f"⚠️ Índice {df_index} fuera de rango (DataFrame tiene {len(df)} filas)")
|
| 154 |
+
return df
|
| 155 |
+
|
| 156 |
+
print(f"\n{'='*80}")
|
| 157 |
+
print(f"[DATAFRAME UPDATE] Actualizando fila {df_index}")
|
| 158 |
+
print(f"{'='*80}")
|
| 159 |
+
print(f"Antes de actualizar:")
|
| 160 |
+
print(df.iloc[df_index].to_dict())
|
| 161 |
+
|
| 162 |
+
# IMPORTANTE: Crear una copia PROFUNDA del DataFrame
|
| 163 |
+
# Esto asegura que Gradio detecte el cambio de estado
|
| 164 |
+
import pandas as pd
|
| 165 |
+
df_updated = pd.DataFrame(df.values.copy(), columns=df.columns)
|
| 166 |
+
|
| 167 |
+
# Actualizar valores en el DataFrame
|
| 168 |
+
cambios = []
|
| 169 |
+
for row in edited_table:
|
| 170 |
+
if len(row) >= 2:
|
| 171 |
+
etiqueta = str(row[0]).strip()
|
| 172 |
+
valor = str(row[1]).strip()
|
| 173 |
+
|
| 174 |
+
if etiqueta in df_updated.columns:
|
| 175 |
+
df_updated.at[df_index, etiqueta] = valor
|
| 176 |
+
cambios.append(f" ✓ {etiqueta}: '{valor}'")
|
| 177 |
+
else:
|
| 178 |
+
print(f" ⚠️ Columna '{etiqueta}' no existe en DataFrame")
|
| 179 |
+
|
| 180 |
+
print("\nCambios aplicados:")
|
| 181 |
+
for cambio in cambios:
|
| 182 |
+
print(cambio)
|
| 183 |
+
|
| 184 |
+
print(f"\nDespués de actualizar:")
|
| 185 |
+
print(df_updated.iloc[df_index].to_dict())
|
| 186 |
+
print(f"{'='*80}\n")
|
| 187 |
+
|
| 188 |
+
return df_updated
|
| 189 |
+
|
| 190 |
+
def get_master_dataframe(self):
|
| 191 |
+
"""Retorna el DataFrame maestro."""
|
| 192 |
+
return self.master_dataframe
|
| 193 |
|
| 194 |
|
| 195 |
|
interface.py
CHANGED
|
@@ -21,10 +21,114 @@ class GradioInterface:
|
|
| 21 |
self.batch_processor = batch_processor
|
| 22 |
self.navigator = result_navigator
|
| 23 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 24 |
def enable_buttons(self, all_results):
|
| 25 |
-
"""Habilita los botones de navegación si hay resultados."""
|
| 26 |
has_results = len(all_results) > 0
|
| 27 |
-
return
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 28 |
|
| 29 |
def build_interface(self):
|
| 30 |
"""
|
|
@@ -45,6 +149,7 @@ class GradioInterface:
|
|
| 45 |
# Estados
|
| 46 |
all_results_state = gr.State(value=[])
|
| 47 |
current_index_state = gr.State(value=0)
|
|
|
|
| 48 |
|
| 49 |
# Sección de carga y procesamiento
|
| 50 |
with gr.Row():
|
|
@@ -57,13 +162,22 @@ class GradioInterface:
|
|
| 57 |
interactive=True
|
| 58 |
)
|
| 59 |
|
| 60 |
-
# Columna derecha:
|
| 61 |
with gr.Column(scale=1):
|
| 62 |
-
|
| 63 |
-
|
| 64 |
-
|
| 65 |
-
|
| 66 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 67 |
status_output = gr.Textbox(
|
| 68 |
label="📊 Estado del Procesamiento",
|
| 69 |
value="Carga tus facturas y haz clic en 'Procesar'",
|
|
@@ -72,6 +186,9 @@ class GradioInterface:
|
|
| 72 |
|
| 73 |
gr.Markdown("---")
|
| 74 |
|
|
|
|
|
|
|
|
|
|
| 75 |
# Sección de resultados: Imagen a la izquierda, datos a la derecha
|
| 76 |
with gr.Row():
|
| 77 |
# Columna izquierda: Imagen
|
|
@@ -97,6 +214,12 @@ class GradioInterface:
|
|
| 97 |
interactive=False,
|
| 98 |
size="lg"
|
| 99 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 100 |
next_button = gr.Button(
|
| 101 |
"Siguiente ➡️",
|
| 102 |
interactive=False,
|
|
@@ -106,7 +229,7 @@ class GradioInterface:
|
|
| 106 |
table_output = gr.Dataframe(
|
| 107 |
headers=["Etiqueta", "Valor"],
|
| 108 |
label="📋 Resultados de NER",
|
| 109 |
-
interactive=
|
| 110 |
col_count=(2, "fixed"),
|
| 111 |
datatype=["str", "str"]
|
| 112 |
)
|
|
@@ -121,12 +244,45 @@ class GradioInterface:
|
|
| 121 |
status_output,
|
| 122 |
image_output,
|
| 123 |
table_output,
|
| 124 |
-
filename_output
|
|
|
|
| 125 |
]
|
| 126 |
).then(
|
| 127 |
fn=self.enable_buttons,
|
| 128 |
inputs=[all_results_state],
|
| 129 |
-
outputs=[prev_button, next_button]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 130 |
)
|
| 131 |
|
| 132 |
prev_button.click(
|
|
|
|
| 21 |
self.batch_processor = batch_processor
|
| 22 |
self.navigator = result_navigator
|
| 23 |
|
| 24 |
+
def update_current_result(self, all_results, current_index, edited_table, master_df):
|
| 25 |
+
"""
|
| 26 |
+
Actualiza los resultados y el DataFrame maestro cuando el usuario edita la tabla.
|
| 27 |
+
|
| 28 |
+
Args:
|
| 29 |
+
all_results: Lista de todos los resultados
|
| 30 |
+
current_index: Índice actual
|
| 31 |
+
edited_table: Tabla editada por el usuario
|
| 32 |
+
master_df: DataFrame maestro
|
| 33 |
+
|
| 34 |
+
Returns:
|
| 35 |
+
tuple: (all_results actualizado, master_df actualizado)
|
| 36 |
+
"""
|
| 37 |
+
if not all_results or current_index >= len(all_results):
|
| 38 |
+
print("⚠️ No hay resultados para actualizar")
|
| 39 |
+
return all_results, master_df
|
| 40 |
+
|
| 41 |
+
print(f"\n{'='*80}")
|
| 42 |
+
print(f"GUARDANDO CAMBIOS MANUALES")
|
| 43 |
+
print(f"{'='*80}")
|
| 44 |
+
print(f"Factura: {all_results[current_index]['filename']}")
|
| 45 |
+
print(f"Índice: {current_index}")
|
| 46 |
+
|
| 47 |
+
# Actualizar la tabla del resultado actual
|
| 48 |
+
all_results[current_index]['table'] = edited_table
|
| 49 |
+
|
| 50 |
+
# Actualizar el DataFrame maestro
|
| 51 |
+
df_index = all_results[current_index].get('df_index', current_index)
|
| 52 |
+
|
| 53 |
+
print(f"\nActualizando DataFrame en fila {df_index}:")
|
| 54 |
+
for row in edited_table:
|
| 55 |
+
if len(row) >= 2:
|
| 56 |
+
print(f" {row[0]}: {row[1]}")
|
| 57 |
+
|
| 58 |
+
updated_df = self.batch_processor.update_dataframe(master_df, df_index, edited_table)
|
| 59 |
+
|
| 60 |
+
print(f"\n✓ Cambios guardados en el DataFrame maestro")
|
| 61 |
+
print(f"{'='*80}\n")
|
| 62 |
+
|
| 63 |
+
return all_results, updated_df
|
| 64 |
+
|
| 65 |
+
def verify_dataframe_state(self, master_df):
|
| 66 |
+
"""Verifica y muestra el estado actual del DataFrame."""
|
| 67 |
+
if master_df is None:
|
| 68 |
+
return "❌ DataFrame es None"
|
| 69 |
+
if master_df.empty:
|
| 70 |
+
return "❌ DataFrame está vacío"
|
| 71 |
+
return f"✓ DataFrame OK: {master_df.shape[0]} filas, {master_df.shape[1]} columnas"
|
| 72 |
+
|
| 73 |
+
def generate_excel(self, master_df):
|
| 74 |
+
"""
|
| 75 |
+
Genera un archivo Excel con el DataFrame maestro completo.
|
| 76 |
+
|
| 77 |
+
Args:
|
| 78 |
+
master_df: DataFrame maestro con todas las facturas validadas y corregidas
|
| 79 |
+
|
| 80 |
+
Returns:
|
| 81 |
+
str: Ruta al archivo Excel generado o None
|
| 82 |
+
"""
|
| 83 |
+
from datetime import datetime
|
| 84 |
+
import os
|
| 85 |
+
|
| 86 |
+
print("\n" + "=" * 80)
|
| 87 |
+
print("GENERANDO EXCEL DESDE DATAFRAME MAESTRO")
|
| 88 |
+
print("=" * 80)
|
| 89 |
+
|
| 90 |
+
if master_df is None:
|
| 91 |
+
print("❌ ERROR: master_df es None")
|
| 92 |
+
return None
|
| 93 |
+
|
| 94 |
+
if master_df.empty:
|
| 95 |
+
print("❌ ERROR: master_df está vacío")
|
| 96 |
+
return None
|
| 97 |
+
|
| 98 |
+
print(f"✓ DataFrame recibido correctamente")
|
| 99 |
+
print(f"Tipo: {type(master_df)}")
|
| 100 |
+
print(f"Dimensiones: {master_df.shape}")
|
| 101 |
+
print(f"Columnas: {list(master_df.columns)}")
|
| 102 |
+
|
| 103 |
+
print(f"\n{'='*80}")
|
| 104 |
+
print("CONTENIDO COMPLETO DEL DATAFRAME:")
|
| 105 |
+
print(f"{'='*80}")
|
| 106 |
+
print(master_df.to_string())
|
| 107 |
+
print(f"{'='*80}\n")
|
| 108 |
+
|
| 109 |
+
# Generar nombre de archivo con timestamp
|
| 110 |
+
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
| 111 |
+
filename = f"facturas_lote_{timestamp}.xlsx"
|
| 112 |
+
filepath = os.path.join(os.getcwd(), filename)
|
| 113 |
+
|
| 114 |
+
# Guardar Excel directamente desde el DataFrame
|
| 115 |
+
print(f"Guardando Excel en: {filepath}")
|
| 116 |
+
master_df.to_excel(filepath, index=False, engine='openpyxl')
|
| 117 |
+
|
| 118 |
+
print(f"✓ Excel generado exitosamente con {len(master_df)} facturas")
|
| 119 |
+
print("=" * 80 + "\n")
|
| 120 |
+
|
| 121 |
+
return filepath
|
| 122 |
+
|
| 123 |
def enable_buttons(self, all_results):
|
| 124 |
+
"""Habilita los botones de navegación, guardar y descarga si hay resultados."""
|
| 125 |
has_results = len(all_results) > 0
|
| 126 |
+
return (
|
| 127 |
+
gr.update(interactive=has_results), # prev_button
|
| 128 |
+
gr.update(interactive=has_results), # save_button
|
| 129 |
+
gr.update(interactive=has_results), # next_button
|
| 130 |
+
gr.update(interactive=has_results) # download_button
|
| 131 |
+
)
|
| 132 |
|
| 133 |
def build_interface(self):
|
| 134 |
"""
|
|
|
|
| 149 |
# Estados
|
| 150 |
all_results_state = gr.State(value=[])
|
| 151 |
current_index_state = gr.State(value=0)
|
| 152 |
+
master_dataframe_state = gr.State(value=None) # DataFrame maestro
|
| 153 |
|
| 154 |
# Sección de carga y procesamiento
|
| 155 |
with gr.Row():
|
|
|
|
| 162 |
interactive=True
|
| 163 |
)
|
| 164 |
|
| 165 |
+
# Columna derecha: Botones y estado
|
| 166 |
with gr.Column(scale=1):
|
| 167 |
+
with gr.Row():
|
| 168 |
+
process_button = gr.Button(
|
| 169 |
+
"🚀 Procesar Lote de Facturas",
|
| 170 |
+
variant="primary",
|
| 171 |
+
size="lg",
|
| 172 |
+
scale=2
|
| 173 |
+
)
|
| 174 |
+
download_button = gr.Button(
|
| 175 |
+
"📥 Descargar XLSX",
|
| 176 |
+
variant="secondary",
|
| 177 |
+
size="lg",
|
| 178 |
+
scale=1,
|
| 179 |
+
interactive=False
|
| 180 |
+
)
|
| 181 |
status_output = gr.Textbox(
|
| 182 |
label="📊 Estado del Procesamiento",
|
| 183 |
value="Carga tus facturas y haz clic en 'Procesar'",
|
|
|
|
| 186 |
|
| 187 |
gr.Markdown("---")
|
| 188 |
|
| 189 |
+
# Componente para descarga de Excel
|
| 190 |
+
excel_file = gr.File(label="📥 Archivo Excel Generado", visible=False)
|
| 191 |
+
|
| 192 |
# Sección de resultados: Imagen a la izquierda, datos a la derecha
|
| 193 |
with gr.Row():
|
| 194 |
# Columna izquierda: Imagen
|
|
|
|
| 214 |
interactive=False,
|
| 215 |
size="lg"
|
| 216 |
)
|
| 217 |
+
save_button = gr.Button(
|
| 218 |
+
"💾 Guardar Cambios",
|
| 219 |
+
variant="secondary",
|
| 220 |
+
size="lg",
|
| 221 |
+
interactive=False
|
| 222 |
+
)
|
| 223 |
next_button = gr.Button(
|
| 224 |
"Siguiente ➡️",
|
| 225 |
interactive=False,
|
|
|
|
| 229 |
table_output = gr.Dataframe(
|
| 230 |
headers=["Etiqueta", "Valor"],
|
| 231 |
label="📋 Resultados de NER",
|
| 232 |
+
interactive=True, # ✅ Permitir edición
|
| 233 |
col_count=(2, "fixed"),
|
| 234 |
datatype=["str", "str"]
|
| 235 |
)
|
|
|
|
| 244 |
status_output,
|
| 245 |
image_output,
|
| 246 |
table_output,
|
| 247 |
+
filename_output,
|
| 248 |
+
master_dataframe_state # ✅ Captura el DataFrame maestro
|
| 249 |
]
|
| 250 |
).then(
|
| 251 |
fn=self.enable_buttons,
|
| 252 |
inputs=[all_results_state],
|
| 253 |
+
outputs=[prev_button, save_button, next_button, download_button]
|
| 254 |
+
)
|
| 255 |
+
|
| 256 |
+
# Evento para guardar cambios manualmente
|
| 257 |
+
save_button.click(
|
| 258 |
+
fn=self.update_current_result,
|
| 259 |
+
inputs=[all_results_state, current_index_state, table_output, master_dataframe_state],
|
| 260 |
+
outputs=[all_results_state, master_dataframe_state]
|
| 261 |
+
).then(
|
| 262 |
+
fn=lambda idx, df: f"✓ Cambios guardados en factura {idx + 1}. DataFrame tiene {len(df) if df is not None else 0} filas",
|
| 263 |
+
inputs=[current_index_state, master_dataframe_state],
|
| 264 |
+
outputs=[status_output]
|
| 265 |
+
)
|
| 266 |
+
|
| 267 |
+
# Evento para actualizar resultados cuando se edita la tabla (auto-save)
|
| 268 |
+
table_output.change(
|
| 269 |
+
fn=self.update_current_result,
|
| 270 |
+
inputs=[all_results_state, current_index_state, table_output, master_dataframe_state],
|
| 271 |
+
outputs=[all_results_state, master_dataframe_state]
|
| 272 |
+
)
|
| 273 |
+
|
| 274 |
+
# Evento de descarga usando el DataFrame maestro
|
| 275 |
+
download_button.click(
|
| 276 |
+
fn=self.verify_dataframe_state,
|
| 277 |
+
inputs=[master_dataframe_state],
|
| 278 |
+
outputs=[status_output]
|
| 279 |
+
).then(
|
| 280 |
+
fn=self.generate_excel,
|
| 281 |
+
inputs=[master_dataframe_state],
|
| 282 |
+
outputs=[excel_file]
|
| 283 |
+
).then(
|
| 284 |
+
fn=lambda: gr.update(visible=True),
|
| 285 |
+
outputs=[excel_file]
|
| 286 |
)
|
| 287 |
|
| 288 |
prev_button.click(
|
requirements.txt
CHANGED
|
@@ -4,4 +4,6 @@ torch>=2.0.0
|
|
| 4 |
torchvision>=0.15.0
|
| 5 |
python-doctr>=0.6.0
|
| 6 |
Pillow>=9.0.0
|
| 7 |
-
numpy>=1.24.0
|
|
|
|
|
|
|
|
|
| 4 |
torchvision>=0.15.0
|
| 5 |
python-doctr>=0.6.0
|
| 6 |
Pillow>=9.0.0
|
| 7 |
+
numpy>=1.24.0
|
| 8 |
+
pandas>=2.0.0
|
| 9 |
+
openpyxl>=3.1.0
|