Spaces:
Paused
Paused
Update app.py
Browse files
app.py
CHANGED
|
@@ -245,101 +245,122 @@ def update_slider(file_input):
|
|
| 245 |
|
| 246 |
|
| 247 |
# Create Gradio interface
|
| 248 |
-
with gr.Blocks(title="π Image/PDF OCR with LightOnOCR", theme=gr.themes.Soft()) as demo:
|
| 249 |
-
|
| 250 |
-
# π Image/PDF to Text Extraction with LightOnOCR
|
| 251 |
-
|
| 252 |
-
**π‘ How to use:**
|
| 253 |
-
1. Upload an image or PDF
|
| 254 |
-
2. For PDFs: select which page to extract (1-20)
|
| 255 |
-
3. Adjust temperature if needed
|
| 256 |
-
4. Click "Extract Text"
|
| 257 |
-
|
| 258 |
-
**Note:** The Markdown rendering for tables may not always be perfect. Check the raw output for complex tables!
|
| 259 |
-
|
| 260 |
-
**Model:** LightOnOCR-1B-1025 by LightOn AI
|
| 261 |
-
**Device:** {device.upper()}
|
| 262 |
-
**Attention:** {attn_implementation}
|
| 263 |
-
""")
|
| 264 |
|
| 265 |
-
|
| 266 |
-
|
| 267 |
-
|
| 268 |
-
|
| 269 |
-
|
| 270 |
-
|
| 271 |
-
|
| 272 |
-
|
| 273 |
-
|
| 274 |
-
|
| 275 |
-
|
| 276 |
-
|
| 277 |
-
|
| 278 |
-
|
| 279 |
-
|
| 280 |
-
|
| 281 |
-
|
| 282 |
-
|
| 283 |
-
|
| 284 |
-
|
| 285 |
-
|
| 286 |
-
|
| 287 |
-
|
| 288 |
-
|
| 289 |
-
|
| 290 |
-
|
| 291 |
-
|
| 292 |
-
|
| 293 |
-
|
| 294 |
-
|
| 295 |
-
|
| 296 |
-
|
| 297 |
-
|
| 298 |
-
|
| 299 |
-
|
| 300 |
-
|
| 301 |
|
| 302 |
-
|
| 303 |
-
|
| 304 |
-
|
| 305 |
-
|
| 306 |
-
|
| 307 |
-
|
| 308 |
-
|
| 309 |
-
|
| 310 |
-
|
| 311 |
-
|
| 312 |
-
|
| 313 |
-
|
| 314 |
-
)
|
| 315 |
|
| 316 |
-
|
| 317 |
-
|
| 318 |
-
|
| 319 |
-
|
| 320 |
-
|
| 321 |
-
|
| 322 |
-
|
| 323 |
-
|
| 324 |
-
|
| 325 |
|
| 326 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 327 |
submit_btn.click(
|
| 328 |
-
|
| 329 |
-
|
| 330 |
-
|
| 331 |
-
)
|
| 332 |
-
|
| 333 |
-
file_input.change(
|
| 334 |
-
fn=update_slider,
|
| 335 |
-
inputs=[file_input],
|
| 336 |
-
outputs=[num_pages]
|
| 337 |
)
|
|
|
|
| 338 |
|
| 339 |
-
clear_btn.click(
|
| 340 |
-
fn=lambda: (None, "*Extracted text will appear here...*", "", "", None, 1),
|
| 341 |
-
outputs=[file_input, output_text, raw_output, page_info, rendered_image, num_pages]
|
| 342 |
-
)
|
| 343 |
|
| 344 |
|
| 345 |
if __name__ == "__main__":
|
|
|
|
| 245 |
|
| 246 |
|
| 247 |
# Create Gradio interface
|
| 248 |
+
# with gr.Blocks(title="π Image/PDF OCR with LightOnOCR", theme=gr.themes.Soft()) as demo:
|
| 249 |
+
# gr.Markdown(f"""
|
| 250 |
+
# # π Image/PDF to Text Extraction with LightOnOCR
|
| 251 |
+
|
| 252 |
+
# **π‘ How to use:**
|
| 253 |
+
# 1. Upload an image or PDF
|
| 254 |
+
# 2. For PDFs: select which page to extract (1-20)
|
| 255 |
+
# 3. Adjust temperature if needed
|
| 256 |
+
# 4. Click "Extract Text"
|
| 257 |
+
|
| 258 |
+
# **Note:** The Markdown rendering for tables may not always be perfect. Check the raw output for complex tables!
|
| 259 |
+
|
| 260 |
+
# **Model:** LightOnOCR-1B-1025 by LightOn AI
|
| 261 |
+
# **Device:** {device.upper()}
|
| 262 |
+
# **Attention:** {attn_implementation}
|
| 263 |
+
# """)
|
| 264 |
|
| 265 |
+
# with gr.Row():
|
| 266 |
+
# with gr.Column(scale=1):
|
| 267 |
+
# file_input = gr.File(
|
| 268 |
+
# label="πΌοΈ Upload Image or PDF",
|
| 269 |
+
# file_types=[".pdf", ".png", ".jpg", ".jpeg"],
|
| 270 |
+
# type="filepath"
|
| 271 |
+
# )
|
| 272 |
+
# rendered_image = gr.Image(
|
| 273 |
+
# label="π Preview",
|
| 274 |
+
# type="pil",
|
| 275 |
+
# height=400,
|
| 276 |
+
# interactive=False
|
| 277 |
+
# )
|
| 278 |
+
# num_pages = gr.Slider(
|
| 279 |
+
# minimum=1,
|
| 280 |
+
# maximum=20,
|
| 281 |
+
# value=1,
|
| 282 |
+
# step=1,
|
| 283 |
+
# label="PDF: Page Number",
|
| 284 |
+
# info="Select which page to extract"
|
| 285 |
+
# )
|
| 286 |
+
# page_info = gr.Textbox(
|
| 287 |
+
# label="Processing Info",
|
| 288 |
+
# value="",
|
| 289 |
+
# interactive=False
|
| 290 |
+
# )
|
| 291 |
+
# temperature = gr.Slider(
|
| 292 |
+
# minimum=0.0,
|
| 293 |
+
# maximum=1.0,
|
| 294 |
+
# value=0.2,
|
| 295 |
+
# step=0.05,
|
| 296 |
+
# label="Temperature",
|
| 297 |
+
# info="0.0 = deterministic, Higher = more varied"
|
| 298 |
+
# )
|
| 299 |
+
# submit_btn = gr.Button("Extract Text", variant="primary")
|
| 300 |
+
# clear_btn = gr.Button("Clear", variant="secondary")
|
| 301 |
|
| 302 |
+
# with gr.Column(scale=2):
|
| 303 |
+
# output_text = gr.Markdown(
|
| 304 |
+
# label="π Extracted Text (Rendered)",
|
| 305 |
+
# value="*Extracted text will appear here...*"
|
| 306 |
+
# )
|
| 307 |
+
# medications_output = gr.Textbox(
|
| 308 |
+
# label="π Extracted Medicines/Drugs",
|
| 309 |
+
# placeholder="Medicine/drug names will appear here...",
|
| 310 |
+
# lines=2,
|
| 311 |
+
# max_lines=5,
|
| 312 |
+
# interactive=False,
|
| 313 |
+
# show_copy_button=True
|
| 314 |
+
# )
|
| 315 |
|
| 316 |
+
# with gr.Row():
|
| 317 |
+
# with gr.Column():
|
| 318 |
+
# raw_output = gr.Textbox(
|
| 319 |
+
# label="Raw Markdown Output",
|
| 320 |
+
# placeholder="Raw text will appear here...",
|
| 321 |
+
# lines=20,
|
| 322 |
+
# max_lines=30,
|
| 323 |
+
# show_copy_button=True
|
| 324 |
+
# )
|
| 325 |
|
| 326 |
+
# # Event handlers
|
| 327 |
+
# submit_btn.click(
|
| 328 |
+
# fn=process_input,
|
| 329 |
+
# inputs=[file_input, temperature, num_pages, ],
|
| 330 |
+
# outputs=[output_text, medications_output, raw_output, page_info, rendered_image, num_pages]
|
| 331 |
+
# )
|
| 332 |
+
|
| 333 |
+
with gr.Blocks(title="π Medicine Extraction", theme=gr.themes.Soft()) as demo:
|
| 334 |
+
file_input = gr.File(
|
| 335 |
+
label="πΌοΈ Upload Image or PDF",
|
| 336 |
+
file_types=[".pdf", ".png", ".jpg", ".jpeg"],
|
| 337 |
+
type="filepath"
|
| 338 |
+
)
|
| 339 |
+
temperature = gr.Slider(
|
| 340 |
+
minimum=0.0,
|
| 341 |
+
maximum=1.0,
|
| 342 |
+
value=0.2,
|
| 343 |
+
step=0.05,
|
| 344 |
+
label="Temperature",
|
| 345 |
+
info="0.0 = deterministic, Higher = more varied"
|
| 346 |
+
)
|
| 347 |
+
medicines_output = gr.Textbox(
|
| 348 |
+
label="π Extracted Medicines/Drugs",
|
| 349 |
+
placeholder="Medicine/drug names will appear here...",
|
| 350 |
+
lines=2,
|
| 351 |
+
max_lines=5,
|
| 352 |
+
interactive=False,
|
| 353 |
+
show_copy_button=True
|
| 354 |
+
)
|
| 355 |
+
submit_btn = gr.Button("Extract Medicines", variant="primary")
|
| 356 |
+
|
| 357 |
submit_btn.click(
|
| 358 |
+
fn=process_input, # already yields medicines as second output
|
| 359 |
+
inputs=[file_input, temperature, 1], # fix page=1 or expose slider
|
| 360 |
+
outputs=[gr.update(), medicines_output, gr.update(), gr.update(), gr.update(), gr.update()]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 361 |
)
|
| 362 |
+
|
| 363 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 364 |
|
| 365 |
|
| 366 |
if __name__ == "__main__":
|