Lucas Gagneten
commited on
Commit
·
809b92e
1
Parent(s):
e5619d9
Interfaz mejorada
Browse files- README.md +150 -1
- app.py +33 -404
- batch_processor.py +144 -0
- config.py +61 -0
- interface.py +155 -0
- invoice_processor.py +279 -0
- model_loader.py +75 -0
- requirements.txt +7 -14
README.md
CHANGED
|
@@ -11,4 +11,153 @@ license: mit
|
|
| 11 |
short_description: LayoutLMv3 fine-tuned - Ner Facturas Extractor
|
| 12 |
---
|
| 13 |
|
| 14 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 11 |
short_description: LayoutLMv3 fine-tuned - Ner Facturas Extractor
|
| 12 |
---
|
| 13 |
|
| 14 |
+
# 🇦🇷 Extractor de Datos de Facturas Argentinas
|
| 15 |
+
|
| 16 |
+
Aplicación de extracción automática de datos de facturas argentinas usando LayoutLMv3 y DocTR.
|
| 17 |
+
|
| 18 |
+
## 📁 Estructura del Proyecto
|
| 19 |
+
|
| 20 |
+
```
|
| 21 |
+
layoutlmv3-facturas-extractor/
|
| 22 |
+
│
|
| 23 |
+
├── app.py # Punto de entrada principal
|
| 24 |
+
├── config.py # Configuración y constantes
|
| 25 |
+
├── model_loader.py # Carga de modelos (LayoutLMv3 y DocTR)
|
| 26 |
+
├── invoice_processor.py # Procesamiento de facturas individuales
|
| 27 |
+
├── batch_processor.py # Procesamiento por lotes y navegación
|
| 28 |
+
├── interface.py # Interfaz Gradio
|
| 29 |
+
├── requirements.txt # Dependencias
|
| 30 |
+
└── README.md # Este archivo
|
| 31 |
+
```
|
| 32 |
+
|
| 33 |
+
## 🏗️ Arquitectura
|
| 34 |
+
|
| 35 |
+
### 1. **config.py**
|
| 36 |
+
- Define etiquetas NER
|
| 37 |
+
- Mapeo de colores para visualización
|
| 38 |
+
- Constantes de configuración
|
| 39 |
+
|
| 40 |
+
### 2. **model_loader.py**
|
| 41 |
+
- Clase `ModelManager`: Carga y gestiona modelos
|
| 42 |
+
- LayoutLMv3 para NER
|
| 43 |
+
- DocTR para OCR
|
| 44 |
+
- Manejo de dispositivos (CPU/GPU)
|
| 45 |
+
|
| 46 |
+
### 3. **invoice_processor.py**
|
| 47 |
+
- Clase `InvoiceProcessor`: Procesamiento completo de facturas
|
| 48 |
+
- `extract_ocr_data()`: Extracción de texto con DocTR
|
| 49 |
+
- `perform_ner()`: Predicción de entidades con LayoutLMv3
|
| 50 |
+
- `group_entities()`: Agrupación BIO y desduplicación
|
| 51 |
+
- `draw_annotations()`: Visualización de resultados
|
| 52 |
+
- `process_invoice()`: Pipeline completo
|
| 53 |
+
|
| 54 |
+
### 4. **batch_processor.py**
|
| 55 |
+
- Clase `BatchProcessor`: Procesa múltiples facturas
|
| 56 |
+
- Clase `ResultNavigator`: Navegación entre resultados
|
| 57 |
+
- `go_next()`: Siguiente factura
|
| 58 |
+
- `go_prev()`: Factura anterior
|
| 59 |
+
|
| 60 |
+
### 5. **interface.py**
|
| 61 |
+
- Clase `GradioInterface`: Construcción de UI
|
| 62 |
+
- Carga de archivos
|
| 63 |
+
- Visualización de resultados
|
| 64 |
+
- Controles de navegación
|
| 65 |
+
|
| 66 |
+
### 6. **app.py**
|
| 67 |
+
- Inicializa todos los componentes
|
| 68 |
+
- Lanza la aplicación
|
| 69 |
+
|
| 70 |
+
## 🚀 Uso
|
| 71 |
+
|
| 72 |
+
### Instalación
|
| 73 |
+
|
| 74 |
+
```bash
|
| 75 |
+
# Crear entorno virtual
|
| 76 |
+
python -m venv venv
|
| 77 |
+
|
| 78 |
+
# Activar entorno (Windows)
|
| 79 |
+
venv\Scripts\activate
|
| 80 |
+
|
| 81 |
+
# Activar entorno (Linux/Mac)
|
| 82 |
+
source venv/bin/activate
|
| 83 |
+
|
| 84 |
+
# Instalar dependencias
|
| 85 |
+
pip install -r requirements.txt
|
| 86 |
+
```
|
| 87 |
+
|
| 88 |
+
### Ejecución
|
| 89 |
+
|
| 90 |
+
```bash
|
| 91 |
+
python app.py
|
| 92 |
+
```
|
| 93 |
+
|
| 94 |
+
La aplicación se abrirá en `http://localhost:7860`
|
| 95 |
+
|
| 96 |
+
## 📦 Dependencias Principales
|
| 97 |
+
|
| 98 |
+
- `gradio`: Interfaz de usuario
|
| 99 |
+
- `transformers`: LayoutLMv3
|
| 100 |
+
- `torch`: Framework de deep learning
|
| 101 |
+
- `python-doctr`: OCR
|
| 102 |
+
- `Pillow`: Procesamiento de imágenes
|
| 103 |
+
- `numpy`: Operaciones numéricas
|
| 104 |
+
|
| 105 |
+
## 🔧 Personalización
|
| 106 |
+
|
| 107 |
+
### Cambiar modelo
|
| 108 |
+
Edita `HUGGINGFACE_MODEL` en `config.py`:
|
| 109 |
+
```python
|
| 110 |
+
HUGGINGFACE_MODEL = "tu-usuario/tu-modelo"
|
| 111 |
+
```
|
| 112 |
+
|
| 113 |
+
### Añadir nuevas etiquetas
|
| 114 |
+
Modifica `LABEL_LIST` en `config.py`:
|
| 115 |
+
```python
|
| 116 |
+
LABEL_LIST = [
|
| 117 |
+
'B-TU_NUEVA_ETIQUETA',
|
| 118 |
+
'I-TU_NUEVA_ETIQUETA',
|
| 119 |
+
# ...
|
| 120 |
+
]
|
| 121 |
+
```
|
| 122 |
+
|
| 123 |
+
### Cambiar colores
|
| 124 |
+
Ajusta `COLOR_PALETTE` en `config.py`
|
| 125 |
+
|
| 126 |
+
## 🎯 Etiquetas Soportadas
|
| 127 |
+
|
| 128 |
+
- ALICUOTA
|
| 129 |
+
- COMPROBANTE_NUMERO
|
| 130 |
+
- CONCEPTO_GASTO
|
| 131 |
+
- FECHA
|
| 132 |
+
- IVA
|
| 133 |
+
- JURISDICCION_GASTO
|
| 134 |
+
- NETO
|
| 135 |
+
- PROVEEDOR_CUIT
|
| 136 |
+
- PROVEEDOR_RAZON_SOCIAL
|
| 137 |
+
- TIPO
|
| 138 |
+
- TOTAL
|
| 139 |
+
|
| 140 |
+
## 📝 Flujo de Procesamiento
|
| 141 |
+
|
| 142 |
+
1. **Carga**: Usuario sube hasta 10 facturas
|
| 143 |
+
2. **OCR**: DocTR extrae texto y coordenadas
|
| 144 |
+
3. **NER**: LayoutLMv3 predice entidades
|
| 145 |
+
4. **Agrupación**: Sistema BIO agrupa tokens
|
| 146 |
+
5. **Desduplicación**: Selección de mejores candidatos
|
| 147 |
+
6. **Visualización**: Anotación de imagen con bounding boxes
|
| 148 |
+
7. **Navegación**: Usuario explora resultados
|
| 149 |
+
|
| 150 |
+
## 🐛 Solución de Problemas
|
| 151 |
+
|
| 152 |
+
### Error de memoria
|
| 153 |
+
Reduce el tamaño del lote o las imágenes
|
| 154 |
+
|
| 155 |
+
### Modelo no encontrado
|
| 156 |
+
Verifica conexión a internet para descargar desde HuggingFace
|
| 157 |
+
|
| 158 |
+
### Fuente no encontrada
|
| 159 |
+
El sistema usará fuente por defecto si `arial.ttf` no está disponible
|
| 160 |
+
|
| 161 |
+
## 📄 Licencia
|
| 162 |
+
|
| 163 |
+
Este proyecto utiliza modelos pre-entrenados sujetos a sus respectivas licencias.
|
app.py
CHANGED
|
@@ -1,416 +1,45 @@
|
|
| 1 |
-
|
| 2 |
-
|
| 3 |
-
|
| 4 |
-
|
| 5 |
-
from transformers import AutoProcessor, LayoutLMv3ForTokenClassification
|
| 6 |
-
from doctr.models import ocr_predictor
|
| 7 |
-
from doctr.io import DocumentFile
|
| 8 |
-
import os
|
| 9 |
-
import warnings
|
| 10 |
-
from io import BytesIO
|
| 11 |
-
warnings.filterwarnings('ignore')
|
| 12 |
|
| 13 |
-
|
| 14 |
-
|
| 15 |
-
|
|
|
|
| 16 |
|
| 17 |
-
# Define el dispositivo como CPU
|
| 18 |
-
device = torch.device("cpu")
|
| 19 |
-
print(f"Inferencia forzada al dispositivo: {device}")
|
| 20 |
|
| 21 |
-
|
| 22 |
-
|
| 23 |
-
'B-ALICUOTA',
|
| 24 |
-
'B-COMPROBANTE_NUMERO',
|
| 25 |
-
'B-CONCEPTO_GASTO',
|
| 26 |
-
'B-FECHA',
|
| 27 |
-
'B-IVA',
|
| 28 |
-
'B-JURISDICCION_GASTO',
|
| 29 |
-
'B-NETO',
|
| 30 |
-
'B-PROVEEDOR_CUIT',
|
| 31 |
-
'B-PROVEEDOR_RAZON_SOCIAL',
|
| 32 |
-
'B-TIPO',
|
| 33 |
-
'B-TOTAL',
|
| 34 |
-
'I-COMPROBANTE_NUMERO',
|
| 35 |
-
'I-CONCEPTO_GASTO',
|
| 36 |
-
'I-JURISDICCION_GASTO',
|
| 37 |
-
'I-PROVEEDOR_CUIT',
|
| 38 |
-
'I-PROVEEDOR_RAZON_SOCIAL',
|
| 39 |
-
'I-TOTAL',
|
| 40 |
-
'O'
|
| 41 |
-
]
|
| 42 |
-
id2label = {i: label for i, label in enumerate(label_list)}
|
| 43 |
-
label2id = {label: i for i, label in enumerate(label_list)}
|
| 44 |
-
|
| 45 |
-
# Configuración de colores para las cajas delimitadoras
|
| 46 |
-
color_palette = [
|
| 47 |
-
'red', 'blue', 'green', 'purple', 'orange', 'brown', 'pink', 'cyan',
|
| 48 |
-
'lime', 'olive', 'teal', 'magenta', 'navy', 'maroon', 'gold', 'silver',
|
| 49 |
-
'indigo', 'turquoise'
|
| 50 |
-
]
|
| 51 |
-
|
| 52 |
-
root_labels = set()
|
| 53 |
-
for label in label_list:
|
| 54 |
-
if label != 'O':
|
| 55 |
-
root_label = label.split('-', 1)[-1]
|
| 56 |
-
root_labels.add(root_label)
|
| 57 |
-
|
| 58 |
-
label2color = {}
|
| 59 |
-
for i, root_label in enumerate(sorted(list(root_labels))):
|
| 60 |
-
label2color[root_label] = color_palette[i % len(color_palette)]
|
| 61 |
-
|
| 62 |
-
# Cargar el modelo/procesador
|
| 63 |
-
try:
|
| 64 |
-
loaded_processor = AutoProcessor.from_pretrained(HUGGINGFACE_MODEL, apply_ocr=False)
|
| 65 |
-
loaded_model = LayoutLMv3ForTokenClassification.from_pretrained(HUGGINGFACE_MODEL).to(device)
|
| 66 |
-
loaded_model.config.id2label = id2label
|
| 67 |
-
loaded_model.config.label2id = label2id
|
| 68 |
-
print(f"Modelo fine-tuneado cargado exitosamente desde Hugging Face: {HUGGINGFACE_MODEL} en CPU.")
|
| 69 |
-
except Exception as e:
|
| 70 |
-
print(f"Error fatal al cargar el modelo o procesador desde Hugging Face: {e}")
|
| 71 |
-
|
| 72 |
-
# Cargar el predictor OCR de DocTR
|
| 73 |
-
doctr_model = ocr_predictor(det_arch='db_resnet50', reco_arch='crnn_vgg16_bn', pretrained=True)
|
| 74 |
-
|
| 75 |
-
# --- 2. Funciones de Procesamiento y Navegación ---
|
| 76 |
-
|
| 77 |
-
def process_single_invoice(image: Image.Image, image_filename: str):
|
| 78 |
-
"""
|
| 79 |
-
Realiza OCR, NER, y devuelve los resultados y la imagen anotada para una sola factura.
|
| 80 |
-
Retorna: nombre_archivo, imagen_anotada, tabla_resultados, json_resultados
|
| 81 |
-
"""
|
| 82 |
-
# 1. OCR con DocTR (obtener texto y bboxes)
|
| 83 |
-
try:
|
| 84 |
-
rgb_image = image.convert("RGB")
|
| 85 |
-
img_byte_arr = BytesIO()
|
| 86 |
-
rgb_image.save(img_byte_arr, format='JPEG')
|
| 87 |
-
img_byte_arr.seek(0)
|
| 88 |
-
image_bytes = img_byte_arr.read()
|
| 89 |
-
|
| 90 |
-
doctr_doc = DocumentFile.from_images([image_bytes])
|
| 91 |
-
except Exception as e:
|
| 92 |
-
return image_filename, None, [["ERROR", f"DocTR Error: {e}"]], []
|
| 93 |
-
|
| 94 |
-
doctr_result = doctr_model(doctr_doc)
|
| 95 |
-
|
| 96 |
-
if not doctr_result.pages:
|
| 97 |
-
return image_filename, None, [["ERROR", "DocTR no pudo extraer ninguna página."]], []
|
| 98 |
-
|
| 99 |
-
page = doctr_result.pages[0]
|
| 100 |
-
|
| 101 |
-
words_data = []
|
| 102 |
-
for block in page.blocks:
|
| 103 |
-
for line in block.lines:
|
| 104 |
-
for word in line.words:
|
| 105 |
-
text = word.value
|
| 106 |
-
geom = np.array(word.geometry) * 1000
|
| 107 |
-
xmin, ymin = map(int, geom[0])
|
| 108 |
-
xmax, ymax = map(int, geom[1])
|
| 109 |
-
words_data.append({"text": text, "box": [xmin, ymin, xmax, ymax]})
|
| 110 |
-
|
| 111 |
-
words = [wd["text"] for wd in words_data]
|
| 112 |
-
boxes = [wd["box"] for wd in words_data]
|
| 113 |
-
image_width, image_height = image.size
|
| 114 |
-
|
| 115 |
-
# 2. Preprocesamiento para LayoutLMv3
|
| 116 |
-
encoding = loaded_processor(
|
| 117 |
-
image, words, boxes=boxes, max_length=512, truncation=True,
|
| 118 |
-
padding="max_length", return_tensors="pt"
|
| 119 |
-
)
|
| 120 |
|
| 121 |
-
|
| 122 |
-
|
| 123 |
-
|
| 124 |
-
pixel_values = encoding["pixel_values"].to(device)
|
| 125 |
-
|
| 126 |
-
# 3. Inferencia del Modelo LayoutLMv3
|
| 127 |
-
loaded_model.eval()
|
| 128 |
-
with torch.no_grad():
|
| 129 |
-
outputs = loaded_model(
|
| 130 |
-
input_ids=input_ids, attention_mask=attention_mask,
|
| 131 |
-
bbox=bbox, pixel_values=pixel_values
|
| 132 |
-
)
|
| 133 |
-
|
| 134 |
-
predictions = outputs.logits.argmax(dim=-1).squeeze().tolist()
|
| 135 |
-
|
| 136 |
-
# Mapeo Correcto de Predicciones a Palabras del OCR
|
| 137 |
-
word_ids = encoding.word_ids()
|
| 138 |
-
predictions_final = []
|
| 139 |
-
current_word_index = None
|
| 140 |
-
|
| 141 |
-
for idx, pred_id in enumerate(predictions):
|
| 142 |
-
word_idx = word_ids[idx]
|
| 143 |
-
if word_idx is not None:
|
| 144 |
-
if word_idx != current_word_index:
|
| 145 |
-
if len(predictions_final) < len(words):
|
| 146 |
-
predictions_final.append(loaded_model.config.id2label[pred_id])
|
| 147 |
-
current_word_index = word_idx
|
| 148 |
|
| 149 |
-
#
|
| 150 |
-
|
| 151 |
-
|
| 152 |
-
current_label = None
|
| 153 |
-
current_bbox_group = []
|
| 154 |
|
| 155 |
-
|
| 156 |
-
|
| 157 |
-
|
| 158 |
-
# Calcular el Bounding Box de la entidad a partir de los BBoxes de las palabras
|
| 159 |
-
all_x = [b[0] for b in bbox_list] + [b[2] for b in bbox_list]
|
| 160 |
-
all_y = [b[1] for b in bbox_list] + [b[3] for b in bbox_list]
|
| 161 |
-
bbox_normalized = [min(all_x), min(all_y), max(all_x), max(all_y)]
|
| 162 |
-
|
| 163 |
-
if label not in ner_candidates:
|
| 164 |
-
ner_candidates[label] = []
|
| 165 |
-
|
| 166 |
-
ner_candidates[label].append({
|
| 167 |
-
'valor': " ".join(entity_list),
|
| 168 |
-
'bbox_entity': bbox_normalized
|
| 169 |
-
})
|
| 170 |
|
| 171 |
-
|
| 172 |
-
|
| 173 |
-
|
| 174 |
-
tag_parts = pred_label.split('-', 1)
|
| 175 |
-
tag_type = tag_parts[0]
|
| 176 |
-
root_label = tag_parts[1] if len(tag_parts) > 1 else None
|
| 177 |
-
|
| 178 |
-
if tag_type == 'B':
|
| 179 |
-
save_current_entity(current_entity, current_label, current_bbox_group)
|
| 180 |
-
current_label = root_label
|
| 181 |
-
current_entity = [word_text]
|
| 182 |
-
current_bbox_group = [word_box]
|
| 183 |
-
elif tag_type == 'I':
|
| 184 |
-
if current_label == root_label:
|
| 185 |
-
current_entity.append(word_text)
|
| 186 |
-
current_bbox_group.append(word_box)
|
| 187 |
-
else:
|
| 188 |
-
save_current_entity(current_entity, current_label, current_bbox_group)
|
| 189 |
-
current_label = root_label
|
| 190 |
-
current_entity = [word_text]
|
| 191 |
-
current_bbox_group = [word_box]
|
| 192 |
-
elif tag_type == 'O':
|
| 193 |
-
save_current_entity(current_entity, current_label, current_bbox_group)
|
| 194 |
-
current_entity = []
|
| 195 |
-
current_label = None
|
| 196 |
-
current_bbox_group = []
|
| 197 |
-
|
| 198 |
-
save_current_entity(current_entity, current_label, current_bbox_group)
|
| 199 |
-
|
| 200 |
-
# Desduplicación (Seleccionar el valor más largo)
|
| 201 |
-
final_ner_results = []
|
| 202 |
|
| 203 |
-
|
| 204 |
-
|
| 205 |
-
|
| 206 |
-
|
| 207 |
-
best_candidate = sorted_candidates[0]
|
| 208 |
-
|
| 209 |
-
final_ner_results.append({
|
| 210 |
-
'etiqueta': label,
|
| 211 |
-
'valor': best_candidate['valor'],
|
| 212 |
-
'bbox_entity': best_candidate['bbox_entity']
|
| 213 |
-
})
|
| 214 |
|
| 215 |
-
|
| 216 |
-
|
| 217 |
-
|
| 218 |
|
| 219 |
-
|
| 220 |
-
|
| 221 |
-
except IOError:
|
| 222 |
-
font = ImageFont.load_default()
|
| 223 |
-
|
| 224 |
-
for res in final_ner_results:
|
| 225 |
-
label = res['etiqueta']
|
| 226 |
-
min_x_norm, min_y_norm, max_x_norm, max_y_norm = res['bbox_entity']
|
| 227 |
-
|
| 228 |
-
# Desnormalizar el bbox [0-1000] a píxeles
|
| 229 |
-
min_x = int(min_x_norm * image_width / 1000)
|
| 230 |
-
min_y = int(min_y_norm * image_height / 1000)
|
| 231 |
-
max_x = int(max_x_norm * image_width / 1000)
|
| 232 |
-
max_y = int(max_y_norm * image_height / 1000)
|
| 233 |
-
|
| 234 |
-
color = label2color.get(label, 'yellow')
|
| 235 |
-
|
| 236 |
-
draw.rectangle([min_x, min_y, max_x, max_y], outline=color, width=3)
|
| 237 |
-
draw.text((min_x, min_y - 20), label, fill=color, font=font)
|
| 238 |
-
|
| 239 |
-
# 6. Devolver resultados
|
| 240 |
-
table_data = [[res['etiqueta'], res['valor']] for res in final_ner_results]
|
| 241 |
-
json_data = [
|
| 242 |
-
{'etiqueta': r['etiqueta'], 'valor': r['valor'], 'bbox_entity': r['bbox_entity']}
|
| 243 |
-
for r in final_ner_results
|
| 244 |
-
]
|
| 245 |
-
return image_filename, annotated_image, table_data, json_data
|
| 246 |
-
|
| 247 |
-
# --- Funciones de Lote y Navegación (CORREGIDAS) ---
|
| 248 |
-
|
| 249 |
-
def load_and_process_batch(file_list):
|
| 250 |
-
"""
|
| 251 |
-
Carga un lote de archivos, los procesa y devuelve una lista de resultados,
|
| 252 |
-
y los valores crudos para la primera imagen.
|
| 253 |
-
"""
|
| 254 |
-
if not file_list:
|
| 255 |
-
return [], 0, "Por favor, carga al menos un archivo.", None, [], ""
|
| 256 |
-
|
| 257 |
-
results = []
|
| 258 |
-
for i, file in enumerate(file_list):
|
| 259 |
-
try:
|
| 260 |
-
image = Image.open(file.name).convert("RGB")
|
| 261 |
-
filename = os.path.basename(file.name)
|
| 262 |
-
|
| 263 |
-
_, annotated_image, table_data, _ = process_single_invoice(image, filename)
|
| 264 |
-
|
| 265 |
-
results.append({
|
| 266 |
-
"filename": filename,
|
| 267 |
-
"image": annotated_image,
|
| 268 |
-
"table": table_data
|
| 269 |
-
})
|
| 270 |
-
except Exception as e:
|
| 271 |
-
results.append({
|
| 272 |
-
"filename": os.path.basename(file.name),
|
| 273 |
-
"image": None,
|
| 274 |
-
"table": [["ERROR FATAL", f"No se pudo cargar o procesar el archivo: {e}"]]
|
| 275 |
-
})
|
| 276 |
-
|
| 277 |
-
initial_index = 0
|
| 278 |
-
first_result = results[initial_index]
|
| 279 |
-
total_count = len(results)
|
| 280 |
-
|
| 281 |
-
status = f"Procesamiento completado para {total_count} facturas. Mostrando la factura 1 de {total_count}."
|
| 282 |
-
|
| 283 |
-
return (
|
| 284 |
-
results, # all_results_state
|
| 285 |
-
initial_index, # current_index_state
|
| 286 |
-
status, # status_output
|
| 287 |
-
first_result["image"],
|
| 288 |
-
first_result["table"],
|
| 289 |
-
first_result["filename"]
|
| 290 |
-
)
|
| 291 |
-
|
| 292 |
-
def update_ui(all_results, current_index):
|
| 293 |
-
""" Función auxiliar que devuelve los 4 elementos de la interfaz como valores crudos. """
|
| 294 |
-
if not all_results:
|
| 295 |
-
return None, [["Resultado", "Lista vacía"]], "Sin datos", "Sin nombre"
|
| 296 |
-
|
| 297 |
-
current_result = all_results[current_index]
|
| 298 |
-
total_count = len(all_results)
|
| 299 |
-
|
| 300 |
-
status = f"Factura {current_index + 1} de {total_count}."
|
| 301 |
-
|
| 302 |
-
return (
|
| 303 |
-
current_result["image"],
|
| 304 |
-
current_result["table"],
|
| 305 |
-
status,
|
| 306 |
-
current_result["filename"],
|
| 307 |
-
)
|
| 308 |
-
|
| 309 |
-
def go_next(all_results, current_index):
|
| 310 |
-
"""Avanza a la siguiente factura en el lote."""
|
| 311 |
-
if not all_results:
|
| 312 |
-
return 0, None, [["ERROR", "No hay facturas cargadas."]], "Sin datos", "Sin nombre"
|
| 313 |
-
|
| 314 |
-
new_index = (current_index + 1) % len(all_results)
|
| 315 |
-
image, table, status, filename = update_ui(all_results, new_index)
|
| 316 |
-
|
| 317 |
-
return new_index, image, table, status, filename
|
| 318 |
-
|
| 319 |
-
def go_prev(all_results, current_index):
|
| 320 |
-
"""Retrocede a la factura anterior en el lote."""
|
| 321 |
-
if not all_results:
|
| 322 |
-
return 0, None, [["ERROR", "No hay facturas cargadas."]], "Sin datos", "Sin nombre"
|
| 323 |
-
|
| 324 |
-
new_index = (current_index - 1) % len(all_results)
|
| 325 |
-
image, table, status, filename = update_ui(all_results, new_index)
|
| 326 |
-
|
| 327 |
-
return new_index, image, table, status, filename
|
| 328 |
-
|
| 329 |
-
def enable_buttons(all_results):
|
| 330 |
-
"""Habilita los botones de navegación si hay resultados."""
|
| 331 |
-
has_results = len(all_results) > 0
|
| 332 |
-
return gr.update(interactive=has_results), gr.update(interactive=has_results)
|
| 333 |
-
|
| 334 |
-
# --- 3. Interfaz Gradio ---
|
| 335 |
-
|
| 336 |
-
with gr.Blocks(title="NER de Facturas Argentinas por Lote") as demo:
|
| 337 |
-
gr.Markdown(
|
| 338 |
-
f"""
|
| 339 |
-
# 🇦🇷 Extracción de Datos de Facturas Argentinas (Procesamiento por Lote)
|
| 340 |
-
Carga hasta **10 facturas** para su procesamiento.
|
| 341 |
-
Se utiliza **LayoutLMv3** (`{HUGGINGFACE_MODEL}`) y **DocTR** forzando la **ejecución en CPU**.
|
| 342 |
-
"""
|
| 343 |
-
)
|
| 344 |
-
|
| 345 |
-
# Elementos de estado
|
| 346 |
-
all_results_state = gr.State(value=[])
|
| 347 |
-
current_index_state = gr.State(value=0)
|
| 348 |
-
|
| 349 |
-
with gr.Row():
|
| 350 |
-
with gr.Column(scale=1):
|
| 351 |
-
file_input = gr.Files(
|
| 352 |
-
file_count="multiple",
|
| 353 |
-
type="filepath",
|
| 354 |
-
label="Cargar hasta 10 Facturas (Máx. 10 archivos)",
|
| 355 |
-
interactive=True
|
| 356 |
-
)
|
| 357 |
-
process_button = gr.Button("🚀 Procesar Lote de Facturas", variant="primary")
|
| 358 |
-
status_output = gr.Textbox(
|
| 359 |
-
label="Estado del Lote",
|
| 360 |
-
value="Carga tus facturas y haz clic en 'Procesar'",
|
| 361 |
-
interactive=False
|
| 362 |
-
)
|
| 363 |
-
with gr.Column(scale=2):
|
| 364 |
-
filename_output = gr.Textbox(
|
| 365 |
-
label="Nombre de Archivo",
|
| 366 |
-
value="",
|
| 367 |
-
interactive=False,
|
| 368 |
-
visible=True
|
| 369 |
-
)
|
| 370 |
-
image_output = gr.Image(type="pil", label="Factura con Entidades Resaltadas")
|
| 371 |
-
|
| 372 |
-
# Controles de navegación
|
| 373 |
-
with gr.Row():
|
| 374 |
-
prev_button = gr.Button("⬅️ Anterior", interactive=False)
|
| 375 |
-
next_button = gr.Button("Siguiente ➡️", interactive=False)
|
| 376 |
-
|
| 377 |
-
table_output = gr.Dataframe(
|
| 378 |
-
headers=["Etiqueta", "Valor"],
|
| 379 |
-
label="Resultados de NER",
|
| 380 |
-
interactive=False,
|
| 381 |
-
col_count=(2, "fixed")
|
| 382 |
-
)
|
| 383 |
-
|
| 384 |
-
# Lógica de procesamiento de lote
|
| 385 |
-
process_button.click(
|
| 386 |
-
fn=load_and_process_batch,
|
| 387 |
-
inputs=[file_input],
|
| 388 |
-
outputs=[
|
| 389 |
-
all_results_state,
|
| 390 |
-
current_index_state,
|
| 391 |
-
status_output,
|
| 392 |
-
image_output,
|
| 393 |
-
table_output,
|
| 394 |
-
filename_output
|
| 395 |
-
]
|
| 396 |
-
).then(
|
| 397 |
-
fn=enable_buttons,
|
| 398 |
-
inputs=[all_results_state],
|
| 399 |
-
outputs=[prev_button, next_button]
|
| 400 |
-
)
|
| 401 |
-
|
| 402 |
-
# Lógica de navegación
|
| 403 |
-
prev_button.click(
|
| 404 |
-
fn=go_prev,
|
| 405 |
-
inputs=[all_results_state, current_index_state],
|
| 406 |
-
outputs=[current_index_state, image_output, table_output, status_output, filename_output]
|
| 407 |
-
)
|
| 408 |
|
| 409 |
-
next_button.click(
|
| 410 |
-
fn=go_next,
|
| 411 |
-
inputs=[all_results_state, current_index_state],
|
| 412 |
-
outputs=[current_index_state, image_output, table_output, status_output, filename_output]
|
| 413 |
-
)
|
| 414 |
|
| 415 |
-
|
| 416 |
-
|
|
|
|
| 1 |
+
# app.py
|
| 2 |
+
"""
|
| 3 |
+
Punto de entrada principal de la aplicación
|
| 4 |
+
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 5 |
|
| 6 |
+
from model_loader import ModelManager
|
| 7 |
+
from invoice_processor import InvoiceProcessor
|
| 8 |
+
from batch_processor import BatchProcessor, ResultNavigator
|
| 9 |
+
from interface import GradioInterface
|
| 10 |
|
|
|
|
|
|
|
|
|
|
| 11 |
|
| 12 |
+
def main():
|
| 13 |
+
"""Función principal para inicializar y lanzar la aplicación."""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 14 |
|
| 15 |
+
print("=" * 60)
|
| 16 |
+
print("Iniciando aplicación de extracción de datos de facturas")
|
| 17 |
+
print("=" * 60)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 18 |
|
| 19 |
+
# 1. Cargar modelos
|
| 20 |
+
print("\n[1/4] Cargando modelos...")
|
| 21 |
+
model_manager = ModelManager(force_cpu=True)
|
|
|
|
|
|
|
| 22 |
|
| 23 |
+
# 2. Inicializar procesador de facturas
|
| 24 |
+
print("\n[2/4] Inicializando procesador de facturas...")
|
| 25 |
+
invoice_processor = InvoiceProcessor(model_manager)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 26 |
|
| 27 |
+
# 3. Inicializar procesador de lotes
|
| 28 |
+
print("\n[3/4] Inicializando procesador de lotes...")
|
| 29 |
+
batch_processor = BatchProcessor(invoice_processor)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 30 |
|
| 31 |
+
# 4. Construir interfaz
|
| 32 |
+
print("\n[4/4] Construyendo interfaz Gradio...")
|
| 33 |
+
gradio_interface = GradioInterface(batch_processor, ResultNavigator)
|
| 34 |
+
demo = gradio_interface.build_interface()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 35 |
|
| 36 |
+
print("\n" + "=" * 60)
|
| 37 |
+
print("✓ Aplicación lista")
|
| 38 |
+
print("=" * 60 + "\n")
|
| 39 |
|
| 40 |
+
# Lanzar la aplicación
|
| 41 |
+
demo.launch()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 42 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 43 |
|
| 44 |
+
if __name__ == "__main__":
|
| 45 |
+
main()
|
batch_processor.py
ADDED
|
@@ -0,0 +1,144 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# batch_processor.py
|
| 2 |
+
"""
|
| 3 |
+
Procesamiento por lotes y navegación de resultados
|
| 4 |
+
"""
|
| 5 |
+
|
| 6 |
+
import os
|
| 7 |
+
from PIL import Image
|
| 8 |
+
|
| 9 |
+
|
| 10 |
+
class BatchProcessor:
|
| 11 |
+
"""Clase para manejar el procesamiento por lotes de facturas."""
|
| 12 |
+
|
| 13 |
+
def __init__(self, invoice_processor):
|
| 14 |
+
"""
|
| 15 |
+
Inicializa el procesador de lotes.
|
| 16 |
+
|
| 17 |
+
Args:
|
| 18 |
+
invoice_processor: Instancia de InvoiceProcessor
|
| 19 |
+
"""
|
| 20 |
+
self.invoice_processor = invoice_processor
|
| 21 |
+
|
| 22 |
+
def process_batch(self, file_list):
|
| 23 |
+
"""
|
| 24 |
+
Procesa un lote de archivos de facturas.
|
| 25 |
+
|
| 26 |
+
Args:
|
| 27 |
+
file_list: Lista de archivos cargados
|
| 28 |
+
|
| 29 |
+
Returns:
|
| 30 |
+
tuple: (results, initial_index, status, first_image, first_table, first_filename)
|
| 31 |
+
"""
|
| 32 |
+
if not file_list:
|
| 33 |
+
return [], 0, "Por favor, carga al menos un archivo.", None, [], ""
|
| 34 |
+
|
| 35 |
+
results = []
|
| 36 |
+
for file in file_list:
|
| 37 |
+
try:
|
| 38 |
+
image = Image.open(file.name).convert("RGB")
|
| 39 |
+
filename = os.path.basename(file.name)
|
| 40 |
+
|
| 41 |
+
_, annotated_image, table_data, _ = self.invoice_processor.process_invoice(
|
| 42 |
+
image, filename
|
| 43 |
+
)
|
| 44 |
+
|
| 45 |
+
results.append({
|
| 46 |
+
"filename": filename,
|
| 47 |
+
"image": annotated_image,
|
| 48 |
+
"table": table_data
|
| 49 |
+
})
|
| 50 |
+
except Exception as e:
|
| 51 |
+
results.append({
|
| 52 |
+
"filename": os.path.basename(file.name),
|
| 53 |
+
"image": None,
|
| 54 |
+
"table": [["ERROR FATAL", f"No se pudo cargar o procesar: {e}"]]
|
| 55 |
+
})
|
| 56 |
+
|
| 57 |
+
# Preparar resultados iniciales
|
| 58 |
+
initial_index = 0
|
| 59 |
+
first_result = results[initial_index]
|
| 60 |
+
total_count = len(results)
|
| 61 |
+
status = f"Procesamiento completado para {total_count} facturas. Mostrando factura 1 de {total_count}."
|
| 62 |
+
|
| 63 |
+
return (
|
| 64 |
+
results,
|
| 65 |
+
initial_index,
|
| 66 |
+
status,
|
| 67 |
+
first_result["image"],
|
| 68 |
+
first_result["table"],
|
| 69 |
+
first_result["filename"]
|
| 70 |
+
)
|
| 71 |
+
|
| 72 |
+
|
| 73 |
+
class ResultNavigator:
|
| 74 |
+
"""Clase para navegar entre resultados procesados."""
|
| 75 |
+
|
| 76 |
+
@staticmethod
|
| 77 |
+
def get_result_at_index(all_results, index):
|
| 78 |
+
"""
|
| 79 |
+
Obtiene los datos de un resultado específico.
|
| 80 |
+
|
| 81 |
+
Args:
|
| 82 |
+
all_results: Lista de todos los resultados
|
| 83 |
+
index: Índice del resultado a obtener
|
| 84 |
+
|
| 85 |
+
Returns:
|
| 86 |
+
tuple: (image, table, status, filename)
|
| 87 |
+
"""
|
| 88 |
+
if not all_results:
|
| 89 |
+
return None, [["Resultado", "Lista vacía"]], "Sin datos", "Sin nombre"
|
| 90 |
+
|
| 91 |
+
current_result = all_results[index]
|
| 92 |
+
total_count = len(all_results)
|
| 93 |
+
status = f"Factura {index + 1} de {total_count}."
|
| 94 |
+
|
| 95 |
+
return (
|
| 96 |
+
current_result["image"],
|
| 97 |
+
current_result["table"],
|
| 98 |
+
status,
|
| 99 |
+
current_result["filename"]
|
| 100 |
+
)
|
| 101 |
+
|
| 102 |
+
@staticmethod
|
| 103 |
+
def go_next(all_results, current_index):
|
| 104 |
+
"""
|
| 105 |
+
Avanza a la siguiente factura.
|
| 106 |
+
|
| 107 |
+
Args:
|
| 108 |
+
all_results: Lista de todos los resultados
|
| 109 |
+
current_index: Índice actual
|
| 110 |
+
|
| 111 |
+
Returns:
|
| 112 |
+
tuple: (new_index, image, table, status, filename)
|
| 113 |
+
"""
|
| 114 |
+
if not all_results:
|
| 115 |
+
return 0, None, [["ERROR", "No hay facturas cargadas."]], "Sin datos", "Sin nombre"
|
| 116 |
+
|
| 117 |
+
new_index = (current_index + 1) % len(all_results)
|
| 118 |
+
image, table, status, filename = ResultNavigator.get_result_at_index(
|
| 119 |
+
all_results, new_index
|
| 120 |
+
)
|
| 121 |
+
|
| 122 |
+
return new_index, image, table, status, filename
|
| 123 |
+
|
| 124 |
+
@staticmethod
|
| 125 |
+
def go_prev(all_results, current_index):
|
| 126 |
+
"""
|
| 127 |
+
Retrocede a la factura anterior.
|
| 128 |
+
|
| 129 |
+
Args:
|
| 130 |
+
all_results: Lista de todos los resultados
|
| 131 |
+
current_index: Índice actual
|
| 132 |
+
|
| 133 |
+
Returns:
|
| 134 |
+
tuple: (new_index, image, table, status, filename)
|
| 135 |
+
"""
|
| 136 |
+
if not all_results:
|
| 137 |
+
return 0, None, [["ERROR", "No hay facturas cargadas."]], "Sin datos", "Sin nombre"
|
| 138 |
+
|
| 139 |
+
new_index = (current_index - 1) % len(all_results)
|
| 140 |
+
image, table, status, filename = ResultNavigator.get_result_at_index(
|
| 141 |
+
all_results, new_index
|
| 142 |
+
)
|
| 143 |
+
|
| 144 |
+
return new_index, image, table, status, filename
|
config.py
ADDED
|
@@ -0,0 +1,61 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# config.py
|
| 2 |
+
"""
|
| 3 |
+
Configuración y constantes del proyecto
|
| 4 |
+
"""
|
| 5 |
+
|
| 6 |
+
# MODELO DE HUGGING FACE FINE-TUNEADO
|
| 7 |
+
HUGGINGFACE_MODEL = "lucasgagneten/layoutlmv3-argentine-invoices"
|
| 8 |
+
|
| 9 |
+
# Definir las etiquetas utilizadas durante el entrenamiento
|
| 10 |
+
LABEL_LIST = [
|
| 11 |
+
'B-ALICUOTA',
|
| 12 |
+
'B-COMPROBANTE_NUMERO',
|
| 13 |
+
'B-CONCEPTO_GASTO',
|
| 14 |
+
'B-FECHA',
|
| 15 |
+
'B-IVA',
|
| 16 |
+
'B-JURISDICCION_GASTO',
|
| 17 |
+
'B-NETO',
|
| 18 |
+
'B-PROVEEDOR_CUIT',
|
| 19 |
+
'B-PROVEEDOR_RAZON_SOCIAL',
|
| 20 |
+
'B-TIPO',
|
| 21 |
+
'B-TOTAL',
|
| 22 |
+
'I-COMPROBANTE_NUMERO',
|
| 23 |
+
'I-CONCEPTO_GASTO',
|
| 24 |
+
'I-JURISDICCION_GASTO',
|
| 25 |
+
'I-PROVEEDOR_CUIT',
|
| 26 |
+
'I-PROVEEDOR_RAZON_SOCIAL',
|
| 27 |
+
'I-TOTAL',
|
| 28 |
+
'O'
|
| 29 |
+
]
|
| 30 |
+
|
| 31 |
+
# Mapeo de etiquetas
|
| 32 |
+
ID2LABEL = {i: label for i, label in enumerate(LABEL_LIST)}
|
| 33 |
+
LABEL2ID = {label: i for i, label in enumerate(LABEL_LIST)}
|
| 34 |
+
|
| 35 |
+
# Configuración de colores para las cajas delimitadoras
|
| 36 |
+
COLOR_PALETTE = [
|
| 37 |
+
'red', 'blue', 'green', 'purple', 'orange', 'brown', 'pink', 'cyan',
|
| 38 |
+
'lime', 'olive', 'teal', 'magenta', 'navy', 'maroon', 'gold', 'silver',
|
| 39 |
+
'indigo', 'turquoise'
|
| 40 |
+
]
|
| 41 |
+
|
| 42 |
+
# Crear mapeo de etiquetas a colores
|
| 43 |
+
def get_label_colors():
|
| 44 |
+
"""Genera el mapeo de etiquetas raíz a colores."""
|
| 45 |
+
root_labels = set()
|
| 46 |
+
for label in LABEL_LIST:
|
| 47 |
+
if label != 'O':
|
| 48 |
+
root_label = label.split('-', 1)[-1]
|
| 49 |
+
root_labels.add(root_label)
|
| 50 |
+
|
| 51 |
+
label2color = {}
|
| 52 |
+
for i, root_label in enumerate(sorted(list(root_labels))):
|
| 53 |
+
label2color[root_label] = COLOR_PALETTE[i % len(COLOR_PALETTE)]
|
| 54 |
+
|
| 55 |
+
return label2color
|
| 56 |
+
|
| 57 |
+
LABEL2COLOR = get_label_colors()
|
| 58 |
+
|
| 59 |
+
# Configuración de procesamiento
|
| 60 |
+
MAX_LENGTH = 512
|
| 61 |
+
NORMALIZATION_FACTOR = 1000 # Factor para normalizar coordenadas de bbox
|
interface.py
ADDED
|
@@ -0,0 +1,155 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# interface.py
|
| 2 |
+
"""
|
| 3 |
+
Interfaz de usuario con Gradio
|
| 4 |
+
"""
|
| 5 |
+
|
| 6 |
+
import gradio as gr
|
| 7 |
+
from config import HUGGINGFACE_MODEL
|
| 8 |
+
|
| 9 |
+
|
| 10 |
+
class GradioInterface:
|
| 11 |
+
"""Clase para construir y gestionar la interfaz Gradio."""
|
| 12 |
+
|
| 13 |
+
def __init__(self, batch_processor, result_navigator):
|
| 14 |
+
"""
|
| 15 |
+
Inicializa la interfaz.
|
| 16 |
+
|
| 17 |
+
Args:
|
| 18 |
+
batch_processor: Instancia de BatchProcessor
|
| 19 |
+
result_navigator: Clase ResultNavigator (no instancia)
|
| 20 |
+
"""
|
| 21 |
+
self.batch_processor = batch_processor
|
| 22 |
+
self.navigator = result_navigator
|
| 23 |
+
|
| 24 |
+
def enable_buttons(self, all_results):
|
| 25 |
+
"""Habilita los botones de navegación si hay resultados."""
|
| 26 |
+
has_results = len(all_results) > 0
|
| 27 |
+
return gr.update(interactive=has_results), gr.update(interactive=has_results)
|
| 28 |
+
|
| 29 |
+
def build_interface(self):
|
| 30 |
+
"""
|
| 31 |
+
Construye y retorna la interfaz Gradio.
|
| 32 |
+
|
| 33 |
+
Returns:
|
| 34 |
+
gr.Blocks: Interfaz Gradio configurada
|
| 35 |
+
"""
|
| 36 |
+
with gr.Blocks(title="NER de Facturas Argentinas por Lote") as demo:
|
| 37 |
+
gr.Markdown(
|
| 38 |
+
f"""
|
| 39 |
+
# 🇦🇷 Extracción de Datos de Facturas Argentinas (Procesamiento por Lote)
|
| 40 |
+
Carga hasta **10 facturas** para su procesamiento.
|
| 41 |
+
Se utiliza **LayoutLMv3** (`{HUGGINGFACE_MODEL}`) y **DocTR** forzando la **ejecución en CPU**.
|
| 42 |
+
"""
|
| 43 |
+
)
|
| 44 |
+
|
| 45 |
+
# Estados
|
| 46 |
+
all_results_state = gr.State(value=[])
|
| 47 |
+
current_index_state = gr.State(value=0)
|
| 48 |
+
|
| 49 |
+
# Sección de carga y procesamiento
|
| 50 |
+
with gr.Row():
|
| 51 |
+
# Columna izquierda: Carga de archivos
|
| 52 |
+
with gr.Column(scale=1):
|
| 53 |
+
file_input = gr.Files(
|
| 54 |
+
file_count="multiple",
|
| 55 |
+
type="filepath",
|
| 56 |
+
label="📂 Cargar hasta 10 Facturas (Máx. 10 archivos)",
|
| 57 |
+
interactive=True
|
| 58 |
+
)
|
| 59 |
+
|
| 60 |
+
# Columna derecha: Botón y estado
|
| 61 |
+
with gr.Column(scale=1):
|
| 62 |
+
process_button = gr.Button(
|
| 63 |
+
"🚀 Procesar Lote de Facturas",
|
| 64 |
+
variant="primary",
|
| 65 |
+
size="lg"
|
| 66 |
+
)
|
| 67 |
+
status_output = gr.Textbox(
|
| 68 |
+
label="📊 Estado del Procesamiento",
|
| 69 |
+
value="Carga tus facturas y haz clic en 'Procesar'",
|
| 70 |
+
interactive=False
|
| 71 |
+
)
|
| 72 |
+
|
| 73 |
+
gr.Markdown("---")
|
| 74 |
+
|
| 75 |
+
# Sección de resultados: Imagen a la izquierda, datos a la derecha
|
| 76 |
+
with gr.Row():
|
| 77 |
+
# Columna izquierda: Imagen
|
| 78 |
+
with gr.Column(scale=1):
|
| 79 |
+
image_output = gr.Image(
|
| 80 |
+
type="pil",
|
| 81 |
+
label="🖼️ Factura con Entidades Resaltadas"
|
| 82 |
+
)
|
| 83 |
+
|
| 84 |
+
# Columna derecha: Información y navegación
|
| 85 |
+
with gr.Column(scale=1):
|
| 86 |
+
filename_output = gr.Textbox(
|
| 87 |
+
label="📄 Nombre de Archivo",
|
| 88 |
+
value="",
|
| 89 |
+
interactive=False,
|
| 90 |
+
visible=True
|
| 91 |
+
)
|
| 92 |
+
|
| 93 |
+
# Controles de navegación
|
| 94 |
+
with gr.Row():
|
| 95 |
+
prev_button = gr.Button(
|
| 96 |
+
"⬅️ Anterior",
|
| 97 |
+
interactive=False,
|
| 98 |
+
size="lg"
|
| 99 |
+
)
|
| 100 |
+
next_button = gr.Button(
|
| 101 |
+
"Siguiente ➡️",
|
| 102 |
+
interactive=False,
|
| 103 |
+
size="lg"
|
| 104 |
+
)
|
| 105 |
+
|
| 106 |
+
table_output = gr.Dataframe(
|
| 107 |
+
headers=["Etiqueta", "Valor"],
|
| 108 |
+
label="📋 Resultados de NER",
|
| 109 |
+
interactive=False,
|
| 110 |
+
col_count=(2, "fixed")
|
| 111 |
+
)
|
| 112 |
+
|
| 113 |
+
# Eventos
|
| 114 |
+
process_button.click(
|
| 115 |
+
fn=self.batch_processor.process_batch,
|
| 116 |
+
inputs=[file_input],
|
| 117 |
+
outputs=[
|
| 118 |
+
all_results_state,
|
| 119 |
+
current_index_state,
|
| 120 |
+
status_output,
|
| 121 |
+
image_output,
|
| 122 |
+
table_output,
|
| 123 |
+
filename_output
|
| 124 |
+
]
|
| 125 |
+
).then(
|
| 126 |
+
fn=self.enable_buttons,
|
| 127 |
+
inputs=[all_results_state],
|
| 128 |
+
outputs=[prev_button, next_button]
|
| 129 |
+
)
|
| 130 |
+
|
| 131 |
+
prev_button.click(
|
| 132 |
+
fn=self.navigator.go_prev,
|
| 133 |
+
inputs=[all_results_state, current_index_state],
|
| 134 |
+
outputs=[
|
| 135 |
+
current_index_state,
|
| 136 |
+
image_output,
|
| 137 |
+
table_output,
|
| 138 |
+
status_output,
|
| 139 |
+
filename_output
|
| 140 |
+
]
|
| 141 |
+
)
|
| 142 |
+
|
| 143 |
+
next_button.click(
|
| 144 |
+
fn=self.navigator.go_next,
|
| 145 |
+
inputs=[all_results_state, current_index_state],
|
| 146 |
+
outputs=[
|
| 147 |
+
current_index_state,
|
| 148 |
+
image_output,
|
| 149 |
+
table_output,
|
| 150 |
+
status_output,
|
| 151 |
+
filename_output
|
| 152 |
+
]
|
| 153 |
+
)
|
| 154 |
+
|
| 155 |
+
return demo
|
invoice_processor.py
ADDED
|
@@ -0,0 +1,279 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# invoice_processor.py
|
| 2 |
+
"""
|
| 3 |
+
Procesamiento de facturas: OCR, NER y visualización
|
| 4 |
+
"""
|
| 5 |
+
|
| 6 |
+
import numpy as np
|
| 7 |
+
from PIL import Image, ImageDraw, ImageFont
|
| 8 |
+
import torch
|
| 9 |
+
from doctr.io import DocumentFile
|
| 10 |
+
from io import BytesIO
|
| 11 |
+
from config import LABEL2COLOR, MAX_LENGTH, NORMALIZATION_FACTOR
|
| 12 |
+
|
| 13 |
+
|
| 14 |
+
class InvoiceProcessor:
|
| 15 |
+
"""Clase para procesar facturas y extraer entidades."""
|
| 16 |
+
|
| 17 |
+
def __init__(self, model_manager):
|
| 18 |
+
"""
|
| 19 |
+
Inicializa el procesador de facturas.
|
| 20 |
+
|
| 21 |
+
Args:
|
| 22 |
+
model_manager: Instancia de ModelManager con los modelos cargados
|
| 23 |
+
"""
|
| 24 |
+
self.model_manager = model_manager
|
| 25 |
+
self.processor = model_manager.get_processor()
|
| 26 |
+
self.model = model_manager.get_model()
|
| 27 |
+
self.ocr_model = model_manager.get_ocr_model()
|
| 28 |
+
self.device = model_manager.get_device()
|
| 29 |
+
|
| 30 |
+
def extract_ocr_data(self, image: Image.Image):
|
| 31 |
+
"""
|
| 32 |
+
Extrae texto y bounding boxes usando DocTR.
|
| 33 |
+
|
| 34 |
+
Args:
|
| 35 |
+
image: Imagen PIL de la factura
|
| 36 |
+
|
| 37 |
+
Returns:
|
| 38 |
+
tuple: (words_data, image_width, image_height) o (None, None, None) en caso de error
|
| 39 |
+
"""
|
| 40 |
+
try:
|
| 41 |
+
rgb_image = image.convert("RGB")
|
| 42 |
+
img_byte_arr = BytesIO()
|
| 43 |
+
rgb_image.save(img_byte_arr, format='JPEG')
|
| 44 |
+
img_byte_arr.seek(0)
|
| 45 |
+
image_bytes = img_byte_arr.read()
|
| 46 |
+
|
| 47 |
+
doctr_doc = DocumentFile.from_images([image_bytes])
|
| 48 |
+
doctr_result = self.ocr_model(doctr_doc)
|
| 49 |
+
|
| 50 |
+
if not doctr_result.pages:
|
| 51 |
+
return None, None, None
|
| 52 |
+
|
| 53 |
+
page = doctr_result.pages[0]
|
| 54 |
+
words_data = []
|
| 55 |
+
|
| 56 |
+
for block in page.blocks:
|
| 57 |
+
for line in block.lines:
|
| 58 |
+
for word in line.words:
|
| 59 |
+
text = word.value
|
| 60 |
+
geom = np.array(word.geometry) * NORMALIZATION_FACTOR
|
| 61 |
+
xmin, ymin = map(int, geom[0])
|
| 62 |
+
xmax, ymax = map(int, geom[1])
|
| 63 |
+
words_data.append({"text": text, "box": [xmin, ymin, xmax, ymax]})
|
| 64 |
+
|
| 65 |
+
image_width, image_height = image.size
|
| 66 |
+
return words_data, image_width, image_height
|
| 67 |
+
|
| 68 |
+
except Exception as e:
|
| 69 |
+
print(f"Error en OCR: {e}")
|
| 70 |
+
return None, None, None
|
| 71 |
+
|
| 72 |
+
def perform_ner(self, image: Image.Image, words_data: list):
|
| 73 |
+
"""
|
| 74 |
+
Realiza NER sobre las palabras extraídas.
|
| 75 |
+
|
| 76 |
+
Args:
|
| 77 |
+
image: Imagen PIL
|
| 78 |
+
words_data: Lista de diccionarios con 'text' y 'box'
|
| 79 |
+
|
| 80 |
+
Returns:
|
| 81 |
+
list: Predicciones para cada palabra
|
| 82 |
+
"""
|
| 83 |
+
words = [wd["text"] for wd in words_data]
|
| 84 |
+
boxes = [wd["box"] for wd in words_data]
|
| 85 |
+
|
| 86 |
+
# Preprocesamiento
|
| 87 |
+
encoding = self.processor(
|
| 88 |
+
image, words, boxes=boxes, max_length=MAX_LENGTH,
|
| 89 |
+
truncation=True, padding="max_length", return_tensors="pt"
|
| 90 |
+
)
|
| 91 |
+
|
| 92 |
+
input_ids = encoding["input_ids"].to(self.device)
|
| 93 |
+
attention_mask = encoding["attention_mask"].to(self.device)
|
| 94 |
+
bbox = encoding["bbox"].to(self.device)
|
| 95 |
+
pixel_values = encoding["pixel_values"].to(self.device)
|
| 96 |
+
|
| 97 |
+
# Inferencia
|
| 98 |
+
self.model.eval()
|
| 99 |
+
with torch.no_grad():
|
| 100 |
+
outputs = self.model(
|
| 101 |
+
input_ids=input_ids,
|
| 102 |
+
attention_mask=attention_mask,
|
| 103 |
+
bbox=bbox,
|
| 104 |
+
pixel_values=pixel_values
|
| 105 |
+
)
|
| 106 |
+
|
| 107 |
+
predictions = outputs.logits.argmax(dim=-1).squeeze().tolist()
|
| 108 |
+
|
| 109 |
+
# Mapeo de predicciones a palabras
|
| 110 |
+
word_ids = encoding.word_ids()
|
| 111 |
+
predictions_final = []
|
| 112 |
+
current_word_index = None
|
| 113 |
+
|
| 114 |
+
for idx, pred_id in enumerate(predictions):
|
| 115 |
+
word_idx = word_ids[idx]
|
| 116 |
+
if word_idx is not None:
|
| 117 |
+
if word_idx != current_word_index:
|
| 118 |
+
if len(predictions_final) < len(words):
|
| 119 |
+
predictions_final.append(self.model.config.id2label[pred_id])
|
| 120 |
+
current_word_index = word_idx
|
| 121 |
+
|
| 122 |
+
return predictions_final
|
| 123 |
+
|
| 124 |
+
def group_entities(self, words_data: list, predictions: list):
|
| 125 |
+
"""
|
| 126 |
+
Agrupa entidades usando el esquema BIO y desduplicación.
|
| 127 |
+
|
| 128 |
+
Args:
|
| 129 |
+
words_data: Lista de palabras con sus bboxes
|
| 130 |
+
predictions: Predicciones NER para cada palabra
|
| 131 |
+
|
| 132 |
+
Returns:
|
| 133 |
+
list: Lista de entidades finales con etiqueta, valor y bbox
|
| 134 |
+
"""
|
| 135 |
+
ner_candidates = {}
|
| 136 |
+
current_entity = []
|
| 137 |
+
current_label = None
|
| 138 |
+
current_bbox_group = []
|
| 139 |
+
|
| 140 |
+
def save_current_entity(entity_list, label, bbox_list):
|
| 141 |
+
if not entity_list or not label:
|
| 142 |
+
return
|
| 143 |
+
|
| 144 |
+
all_x = [b[0] for b in bbox_list] + [b[2] for b in bbox_list]
|
| 145 |
+
all_y = [b[1] for b in bbox_list] + [b[3] for b in bbox_list]
|
| 146 |
+
bbox_normalized = [min(all_x), min(all_y), max(all_x), max(all_y)]
|
| 147 |
+
|
| 148 |
+
if label not in ner_candidates:
|
| 149 |
+
ner_candidates[label] = []
|
| 150 |
+
|
| 151 |
+
ner_candidates[label].append({
|
| 152 |
+
'valor': " ".join(entity_list),
|
| 153 |
+
'bbox_entity': bbox_normalized
|
| 154 |
+
})
|
| 155 |
+
|
| 156 |
+
for word_data, pred_label in zip(words_data, predictions):
|
| 157 |
+
word_text = word_data["text"]
|
| 158 |
+
word_box = word_data["box"]
|
| 159 |
+
tag_parts = pred_label.split('-', 1)
|
| 160 |
+
tag_type = tag_parts[0]
|
| 161 |
+
root_label = tag_parts[1] if len(tag_parts) > 1 else None
|
| 162 |
+
|
| 163 |
+
if tag_type == 'B':
|
| 164 |
+
save_current_entity(current_entity, current_label, current_bbox_group)
|
| 165 |
+
current_label = root_label
|
| 166 |
+
current_entity = [word_text]
|
| 167 |
+
current_bbox_group = [word_box]
|
| 168 |
+
elif tag_type == 'I':
|
| 169 |
+
if current_label == root_label:
|
| 170 |
+
current_entity.append(word_text)
|
| 171 |
+
current_bbox_group.append(word_box)
|
| 172 |
+
else:
|
| 173 |
+
save_current_entity(current_entity, current_label, current_bbox_group)
|
| 174 |
+
current_label = root_label
|
| 175 |
+
current_entity = [word_text]
|
| 176 |
+
current_bbox_group = [word_box]
|
| 177 |
+
elif tag_type == 'O':
|
| 178 |
+
save_current_entity(current_entity, current_label, current_bbox_group)
|
| 179 |
+
current_entity = []
|
| 180 |
+
current_label = None
|
| 181 |
+
current_bbox_group = []
|
| 182 |
+
|
| 183 |
+
save_current_entity(current_entity, current_label, current_bbox_group)
|
| 184 |
+
|
| 185 |
+
# Desduplicación: seleccionar el valor más largo
|
| 186 |
+
final_ner_results = []
|
| 187 |
+
for label, candidates in ner_candidates.items():
|
| 188 |
+
if not candidates:
|
| 189 |
+
continue
|
| 190 |
+
sorted_candidates = sorted(candidates, key=lambda x: len(x['valor']), reverse=True)
|
| 191 |
+
best_candidate = sorted_candidates[0]
|
| 192 |
+
final_ner_results.append({
|
| 193 |
+
'etiqueta': label,
|
| 194 |
+
'valor': best_candidate['valor'],
|
| 195 |
+
'bbox_entity': best_candidate['bbox_entity']
|
| 196 |
+
})
|
| 197 |
+
|
| 198 |
+
return final_ner_results
|
| 199 |
+
|
| 200 |
+
def draw_annotations(self, image: Image.Image, entities: list):
|
| 201 |
+
"""
|
| 202 |
+
Dibuja bounding boxes y etiquetas en la imagen.
|
| 203 |
+
|
| 204 |
+
Args:
|
| 205 |
+
image: Imagen PIL original
|
| 206 |
+
entities: Lista de entidades con bbox
|
| 207 |
+
|
| 208 |
+
Returns:
|
| 209 |
+
Image: Imagen anotada
|
| 210 |
+
"""
|
| 211 |
+
annotated_image = image.copy()
|
| 212 |
+
draw = ImageDraw.Draw(annotated_image)
|
| 213 |
+
image_width, image_height = image.size
|
| 214 |
+
|
| 215 |
+
try:
|
| 216 |
+
font = ImageFont.truetype("arial.ttf", 20)
|
| 217 |
+
except IOError:
|
| 218 |
+
font = ImageFont.load_default()
|
| 219 |
+
|
| 220 |
+
for entity in entities:
|
| 221 |
+
label = entity['etiqueta']
|
| 222 |
+
min_x_norm, min_y_norm, max_x_norm, max_y_norm = entity['bbox_entity']
|
| 223 |
+
|
| 224 |
+
# Desnormalizar coordenadas
|
| 225 |
+
min_x = int(min_x_norm * image_width / NORMALIZATION_FACTOR)
|
| 226 |
+
min_y = int(min_y_norm * image_height / NORMALIZATION_FACTOR)
|
| 227 |
+
max_x = int(max_x_norm * image_width / NORMALIZATION_FACTOR)
|
| 228 |
+
max_y = int(max_y_norm * image_height / NORMALIZATION_FACTOR)
|
| 229 |
+
|
| 230 |
+
color = LABEL2COLOR.get(label, 'yellow')
|
| 231 |
+
|
| 232 |
+
draw.rectangle([min_x, min_y, max_x, max_y], outline=color, width=3)
|
| 233 |
+
draw.text((min_x, min_y - 20), label, fill=color, font=font)
|
| 234 |
+
|
| 235 |
+
return annotated_image
|
| 236 |
+
|
| 237 |
+
def process_invoice(self, image: Image.Image, filename: str):
|
| 238 |
+
"""
|
| 239 |
+
Procesa una factura completa: OCR + NER + visualización.
|
| 240 |
+
|
| 241 |
+
Args:
|
| 242 |
+
image: Imagen PIL de la factura
|
| 243 |
+
filename: Nombre del archivo
|
| 244 |
+
|
| 245 |
+
Returns:
|
| 246 |
+
tuple: (filename, annotated_image, table_data, json_data)
|
| 247 |
+
"""
|
| 248 |
+
# 1. OCR
|
| 249 |
+
words_data, image_width, image_height = self.extract_ocr_data(image)
|
| 250 |
+
if words_data is None:
|
| 251 |
+
return filename, None, [["ERROR", "No se pudo realizar OCR"]], []
|
| 252 |
+
|
| 253 |
+
if not words_data:
|
| 254 |
+
return filename, None, [["ERROR", "No se encontró texto en la imagen"]], []
|
| 255 |
+
|
| 256 |
+
# 2. NER
|
| 257 |
+
try:
|
| 258 |
+
predictions = self.perform_ner(image, words_data)
|
| 259 |
+
except Exception as e:
|
| 260 |
+
return filename, None, [["ERROR", f"Error en NER: {e}"]], []
|
| 261 |
+
|
| 262 |
+
# 3. Agrupar entidades
|
| 263 |
+
entities = self.group_entities(words_data, predictions)
|
| 264 |
+
|
| 265 |
+
# 4. Dibujar anotaciones
|
| 266 |
+
annotated_image = self.draw_annotations(image, entities)
|
| 267 |
+
|
| 268 |
+
# 5. Preparar resultados
|
| 269 |
+
table_data = [[e['etiqueta'], e['valor']] for e in entities]
|
| 270 |
+
json_data = [
|
| 271 |
+
{
|
| 272 |
+
'etiqueta': e['etiqueta'],
|
| 273 |
+
'valor': e['valor'],
|
| 274 |
+
'bbox_entity': e['bbox_entity']
|
| 275 |
+
}
|
| 276 |
+
for e in entities
|
| 277 |
+
]
|
| 278 |
+
|
| 279 |
+
return filename, annotated_image, table_data, json_data
|
model_loader.py
ADDED
|
@@ -0,0 +1,75 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# model_loader.py
|
| 2 |
+
"""
|
| 3 |
+
Carga y gestión de modelos (LayoutLMv3 y DocTR)
|
| 4 |
+
"""
|
| 5 |
+
|
| 6 |
+
import torch
|
| 7 |
+
from transformers import AutoProcessor, LayoutLMv3ForTokenClassification
|
| 8 |
+
from doctr.models import ocr_predictor
|
| 9 |
+
import warnings
|
| 10 |
+
from config import HUGGINGFACE_MODEL, ID2LABEL, LABEL2ID
|
| 11 |
+
|
| 12 |
+
warnings.filterwarnings('ignore')
|
| 13 |
+
|
| 14 |
+
|
| 15 |
+
class ModelManager:
|
| 16 |
+
"""Clase para gestionar la carga y acceso a los modelos."""
|
| 17 |
+
|
| 18 |
+
def __init__(self, force_cpu=True):
|
| 19 |
+
"""
|
| 20 |
+
Inicializa y carga los modelos necesarios.
|
| 21 |
+
|
| 22 |
+
Args:
|
| 23 |
+
force_cpu (bool): Si True, fuerza el uso de CPU para inferencia
|
| 24 |
+
"""
|
| 25 |
+
self.device = torch.device("cpu" if force_cpu else "cuda" if torch.cuda.is_available() else "cpu")
|
| 26 |
+
print(f"Inferencia en dispositivo: {self.device}")
|
| 27 |
+
|
| 28 |
+
# Cargar LayoutLMv3
|
| 29 |
+
self.processor, self.model = self._load_layoutlmv3()
|
| 30 |
+
|
| 31 |
+
# Cargar DocTR
|
| 32 |
+
self.ocr_model = self._load_doctr()
|
| 33 |
+
|
| 34 |
+
def _load_layoutlmv3(self):
|
| 35 |
+
"""Carga el modelo LayoutLMv3 y su procesador."""
|
| 36 |
+
try:
|
| 37 |
+
processor = AutoProcessor.from_pretrained(HUGGINGFACE_MODEL, apply_ocr=False)
|
| 38 |
+
model = LayoutLMv3ForTokenClassification.from_pretrained(HUGGINGFACE_MODEL).to(self.device)
|
| 39 |
+
model.config.id2label = ID2LABEL
|
| 40 |
+
model.config.label2id = LABEL2ID
|
| 41 |
+
print(f"✓ Modelo LayoutLMv3 cargado: {HUGGINGFACE_MODEL}")
|
| 42 |
+
return processor, model
|
| 43 |
+
except Exception as e:
|
| 44 |
+
print(f"✗ Error al cargar LayoutLMv3: {e}")
|
| 45 |
+
raise
|
| 46 |
+
|
| 47 |
+
def _load_doctr(self):
|
| 48 |
+
"""Carga el modelo OCR de DocTR."""
|
| 49 |
+
try:
|
| 50 |
+
ocr_model = ocr_predictor(
|
| 51 |
+
det_arch='db_resnet50',
|
| 52 |
+
reco_arch='crnn_vgg16_bn',
|
| 53 |
+
pretrained=True
|
| 54 |
+
)
|
| 55 |
+
print("✓ Modelo DocTR cargado")
|
| 56 |
+
return ocr_model
|
| 57 |
+
except Exception as e:
|
| 58 |
+
print(f"✗ Error al cargar DocTR: {e}")
|
| 59 |
+
raise
|
| 60 |
+
|
| 61 |
+
def get_processor(self):
|
| 62 |
+
"""Retorna el procesador de LayoutLMv3."""
|
| 63 |
+
return self.processor
|
| 64 |
+
|
| 65 |
+
def get_model(self):
|
| 66 |
+
"""Retorna el modelo de LayoutLMv3."""
|
| 67 |
+
return self.model
|
| 68 |
+
|
| 69 |
+
def get_ocr_model(self):
|
| 70 |
+
"""Retorna el modelo OCR de DocTR."""
|
| 71 |
+
return self.ocr_model
|
| 72 |
+
|
| 73 |
+
def get_device(self):
|
| 74 |
+
"""Retorna el dispositivo utilizado."""
|
| 75 |
+
return self.device
|
requirements.txt
CHANGED
|
@@ -1,14 +1,7 @@
|
|
| 1 |
-
|
| 2 |
-
|
| 3 |
-
|
| 4 |
-
|
| 5 |
-
|
| 6 |
-
|
| 7 |
-
|
| 8 |
-
transformers>=4.30.0 # Librería principal para LayoutLMv3
|
| 9 |
-
|
| 10 |
-
# --- Requerimientos de PyTorch (Ajustado) ---
|
| 11 |
-
# Se recomienda encarecidamente instalar PyTorch CPU por separado
|
| 12 |
-
# para asegurar la versión correcta, pero si usas pip, puedes usar:
|
| 13 |
-
torch>=2.0.0 # PyTorch (necesario para la inferencia de modelos)
|
| 14 |
-
matplotlib # Para visualización (ya añadida)
|
|
|
|
| 1 |
+
gradio>=4.0.0
|
| 2 |
+
transformers>=4.30.0
|
| 3 |
+
torch>=2.0.0
|
| 4 |
+
torchvision>=0.15.0
|
| 5 |
+
python-doctr>=0.6.0
|
| 6 |
+
Pillow>=9.0.0
|
| 7 |
+
numpy>=1.24.0
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|