Spaces:

lightonai
/

LightOnOCR-1B-Demo-zero

Running on Zero

App Files Files Community

Bapt120 commited on Nov 13

Commit

3654ed1

verified ·

1 Parent(s): 6807791

Update app.py

Browse files

Files changed (1) hide show

app.py +83 -27

app.py CHANGED Viewed

@@ -1,13 +1,11 @@
 #!/usr/bin/env python3
 import subprocess
 import sys
 import spaces
 import torch
 import gradio as gr
 from PIL import Image
 from io import BytesIO
@@ -15,6 +13,7 @@ import pypdfium2 as pdfium
 from transformers import (
     LightOnOCRForConditionalGeneration,
     LightOnOCRProcessor,
 )
 device = "cuda" if torch.cuda.is_available() else "cpu"
@@ -68,8 +67,35 @@ def process_pdf(pdf_path, page_num=1):
     return img, total_pages, page_idx + 1
 @spaces.GPU
-def extract_text_from_image(image, temperature=0.2):
     """Extract text from image using LightOnOCR model."""
     # Prepare the chat format
     chat = [
@@ -98,26 +124,55 @@ def extract_text_from_image(image, temperature=0.2):
         for k, v in inputs.items()
     }
-    # Generate text with appropriate settings
-    with torch.no_grad():  # Disable gradients for inference
-        outputs = model.generate(
-            **inputs,
-            max_new_tokens=2048,
-            temperature=temperature if temperature > 0 else 0.0,
-            use_cache=True,
-            do_sample=temperature > 0,
-        )
-    # Decode the output
-    output_text = processor.decode(outputs[0], skip_special_tokens=True)
-    return output_text
 def process_input(file_input, temperature, page_num):
-    """Process uploaded file (image or PDF) and extract text."""
     if file_input is None:
-        return "Please upload an image or PDF first.", "", "", None, gr.update()
     image_to_process = None
     page_info = ""
@@ -130,24 +185,25 @@ def process_input(file_input, temperature, page_num):
             image_to_process, total_pages, actual_page = process_pdf(file_path, int(page_num))
             page_info = f"Processing page {actual_page} of {total_pages}"
         except Exception as e:
-            return f"Error processing PDF: {str(e)}", "", "", None, gr.update()
     # Handle image files
     else:
         try:
             image_to_process = Image.open(file_path)
             page_info = "Processing image"
         except Exception as e:
-            return f"Error opening image: {str(e)}", "", "", None, gr.update()
     try:
-        # Extract text using LightOnOCR
-        extracted_text = extract_text_from_image(image_to_process, temperature)
-        return extracted_text, extracted_text, page_info, image_to_process, gr.update()
     except Exception as e:
         error_msg = f"Error during text extraction: {str(e)}"
-        return error_msg, error_msg, page_info, image_to_process, gr.update()
 def update_slider(file_input):
@@ -178,7 +234,7 @@ with gr.Blocks(title="📖 Image/PDF OCR with LightOnOCR", theme=gr.themes.Soft(
 1. Upload an image or PDF
 2. For PDFs: select which page to extract (1-20)
 3. Adjust temperature if needed (0.0 for deterministic, higher for more varied output)
-4. Click "Extract Text"
 **Note:** The Markdown rendering for tables may not always be perfect. Check the raw output for complex tables!

 #!/usr/bin/env python3
 import subprocess
 import sys
+import threading
 import spaces
 import torch
 import gradio as gr
 from PIL import Image
 from io import BytesIO
 from transformers import (
     LightOnOCRForConditionalGeneration,
     LightOnOCRProcessor,
+    TextIteratorStreamer,
 )
 device = "cuda" if torch.cuda.is_available() else "cpu"
     return img, total_pages, page_idx + 1
+def clean_output_text(text):
+    """Remove chat template artifacts from output."""
+    # Remove common chat template markers
+    markers_to_remove = ["system", "user", "assistant"]
+    # Split by lines and filter
+    lines = text.split('\n')
+    cleaned_lines = []
+    for line in lines:
+        stripped = line.strip()
+        # Skip lines that are just template markers
+        if stripped.lower() not in markers_to_remove:
+            cleaned_lines.append(line)
+    # Join back and strip leading/trailing whitespace
+    cleaned = '\n'.join(cleaned_lines).strip()
+    # Alternative approach: if there's an "assistant" marker, take everything after it
+    if "assistant" in text.lower():
+        parts = text.split("assistant", 1)
+        if len(parts) > 1:
+            cleaned = parts[1].strip()
+    return cleaned
 @spaces.GPU
+def extract_text_from_image(image, temperature=0.2, stream=False):
     """Extract text from image using LightOnOCR model."""
     # Prepare the chat format
     chat = [
         for k, v in inputs.items()
     }
+    generation_kwargs = dict(
+        **inputs,
+        max_new_tokens=2048,
+        temperature=temperature if temperature > 0 else 0.0,
+        use_cache=True,
+        do_sample=temperature > 0,
+    )
+    if stream:
+        # Setup streamer for streaming generation
+        streamer = TextIteratorStreamer(
+            processor.tokenizer,
+            skip_prompt=True,
+            skip_special_tokens=True
+        )
+        generation_kwargs["streamer"] = streamer
+        # Run generation in a separate thread
+        thread = threading.Thread(target=model.generate, kwargs=generation_kwargs)
+        thread.start()
+        # Yield chunks as they arrive
+        full_text = ""
+        for new_text in streamer:
+            full_text += new_text
+            # Clean the accumulated text
+            cleaned_text = clean_output_text(full_text)
+            yield cleaned_text
+        thread.join()
+    else:
+        # Non-streaming generation
+        with torch.no_grad():
+            outputs = model.generate(**generation_kwargs)
+        # Decode the output
+        output_text = processor.decode(outputs[0], skip_special_tokens=True)
+        # Clean the output
+        cleaned_text = clean_output_text(output_text)
+        yield cleaned_text
 def process_input(file_input, temperature, page_num):
+    """Process uploaded file (image or PDF) and extract text with streaming."""
     if file_input is None:
+        yield "Please upload an image or PDF first.", "", "", None, gr.update()
+        return
     image_to_process = None
     page_info = ""
             image_to_process, total_pages, actual_page = process_pdf(file_path, int(page_num))
             page_info = f"Processing page {actual_page} of {total_pages}"
         except Exception as e:
+            yield f"Error processing PDF: {str(e)}", "", "", None, gr.update()
+            return
     # Handle image files
     else:
         try:
             image_to_process = Image.open(file_path)
             page_info = "Processing image"
         except Exception as e:
+            yield f"Error opening image: {str(e)}", "", "", None, gr.update()
+            return
     try:
+        # Extract text using LightOnOCR with streaming
+        for extracted_text in extract_text_from_image(image_to_process, temperature, stream=True):
+            yield extracted_text, extracted_text, page_info, image_to_process, gr.update()
     except Exception as e:
         error_msg = f"Error during text extraction: {str(e)}"
+        yield error_msg, error_msg, page_info, image_to_process, gr.update()
 def update_slider(file_input):
 1. Upload an image or PDF
 2. For PDFs: select which page to extract (1-20)
 3. Adjust temperature if needed (0.0 for deterministic, higher for more varied output)
+4. Click "Extract Text" (now with streaming! ✨)
 **Note:** The Markdown rendering for tables may not always be perfect. Check the raw output for complex tables!