Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
|
@@ -129,6 +129,7 @@ MODEL_ID_V = "nanonets/Nanonets-OCR2-3B"
|
|
| 129 |
processor_v = AutoProcessor.from_pretrained(MODEL_ID_V, trust_remote_code=True)
|
| 130 |
model_v = Qwen2_5_VLForConditionalGeneration.from_pretrained(
|
| 131 |
MODEL_ID_V,
|
|
|
|
| 132 |
trust_remote_code=True,
|
| 133 |
torch_dtype=torch.float16
|
| 134 |
).to(device).eval()
|
|
@@ -138,6 +139,7 @@ MODEL_ID_X = "prithivMLmods/Qwen2-VL-OCR-2B-Instruct"
|
|
| 138 |
processor_x = AutoProcessor.from_pretrained(MODEL_ID_X, trust_remote_code=True)
|
| 139 |
model_x = Qwen2VLForConditionalGeneration.from_pretrained(
|
| 140 |
MODEL_ID_X,
|
|
|
|
| 141 |
trust_remote_code=True,
|
| 142 |
torch_dtype=torch.float16
|
| 143 |
).to(device).eval()
|
|
@@ -147,6 +149,7 @@ MODEL_ID_A = "CohereForAI/aya-vision-8b"
|
|
| 147 |
processor_a = AutoProcessor.from_pretrained(MODEL_ID_A, trust_remote_code=True)
|
| 148 |
model_a = AutoModelForImageTextToText.from_pretrained(
|
| 149 |
MODEL_ID_A,
|
|
|
|
| 150 |
trust_remote_code=True,
|
| 151 |
torch_dtype=torch.float16
|
| 152 |
).to(device).eval()
|
|
@@ -156,6 +159,7 @@ MODEL_ID_W = "allenai/olmOCR-7B-0725"
|
|
| 156 |
processor_w = AutoProcessor.from_pretrained(MODEL_ID_W, trust_remote_code=True)
|
| 157 |
model_w = Qwen2_5_VLForConditionalGeneration.from_pretrained(
|
| 158 |
MODEL_ID_W,
|
|
|
|
| 159 |
trust_remote_code=True,
|
| 160 |
torch_dtype=torch.float16
|
| 161 |
).to(device).eval()
|
|
@@ -165,6 +169,7 @@ MODEL_ID_M = "reducto/RolmOCR"
|
|
| 165 |
processor_m = AutoProcessor.from_pretrained(MODEL_ID_M, trust_remote_code=True)
|
| 166 |
model_m = Qwen2_5_VLForConditionalGeneration.from_pretrained(
|
| 167 |
MODEL_ID_M,
|
|
|
|
| 168 |
trust_remote_code=True,
|
| 169 |
torch_dtype=torch.float16
|
| 170 |
).to(device).eval()
|
|
@@ -245,8 +250,8 @@ image_examples = [
|
|
| 245 |
]
|
| 246 |
|
| 247 |
# Create the Gradio Interface
|
| 248 |
-
with gr.Blocks(
|
| 249 |
-
gr.Markdown("# **Multimodal
|
| 250 |
with gr.Row():
|
| 251 |
with gr.Column(scale=2):
|
| 252 |
image_query = gr.Textbox(label="Query Input", placeholder="Enter your query here...")
|
|
@@ -267,7 +272,7 @@ with gr.Blocks(css=css, theme=steel_blue_theme) as demo:
|
|
| 267 |
|
| 268 |
with gr.Column(scale=3):
|
| 269 |
gr.Markdown("## Output", elem_id="output-title")
|
| 270 |
-
output = gr.Textbox(label="Raw Output Stream", interactive=
|
| 271 |
with gr.Accordion("(Result.md)", open=False):
|
| 272 |
markdown_output = gr.Markdown(label="(Result.Md)")
|
| 273 |
|
|
@@ -285,4 +290,4 @@ with gr.Blocks(css=css, theme=steel_blue_theme) as demo:
|
|
| 285 |
)
|
| 286 |
|
| 287 |
if __name__ == "__main__":
|
| 288 |
-
demo.queue(max_size=50).launch(mcp_server=True, ssr_mode=False, show_error=True)
|
|
|
|
| 129 |
processor_v = AutoProcessor.from_pretrained(MODEL_ID_V, trust_remote_code=True)
|
| 130 |
model_v = Qwen2_5_VLForConditionalGeneration.from_pretrained(
|
| 131 |
MODEL_ID_V,
|
| 132 |
+
attn_implementation="flash_attention_2",
|
| 133 |
trust_remote_code=True,
|
| 134 |
torch_dtype=torch.float16
|
| 135 |
).to(device).eval()
|
|
|
|
| 139 |
processor_x = AutoProcessor.from_pretrained(MODEL_ID_X, trust_remote_code=True)
|
| 140 |
model_x = Qwen2VLForConditionalGeneration.from_pretrained(
|
| 141 |
MODEL_ID_X,
|
| 142 |
+
attn_implementation="flash_attention_2",
|
| 143 |
trust_remote_code=True,
|
| 144 |
torch_dtype=torch.float16
|
| 145 |
).to(device).eval()
|
|
|
|
| 149 |
processor_a = AutoProcessor.from_pretrained(MODEL_ID_A, trust_remote_code=True)
|
| 150 |
model_a = AutoModelForImageTextToText.from_pretrained(
|
| 151 |
MODEL_ID_A,
|
| 152 |
+
attn_implementation="flash_attention_2",
|
| 153 |
trust_remote_code=True,
|
| 154 |
torch_dtype=torch.float16
|
| 155 |
).to(device).eval()
|
|
|
|
| 159 |
processor_w = AutoProcessor.from_pretrained(MODEL_ID_W, trust_remote_code=True)
|
| 160 |
model_w = Qwen2_5_VLForConditionalGeneration.from_pretrained(
|
| 161 |
MODEL_ID_W,
|
| 162 |
+
attn_implementation="flash_attention_2",
|
| 163 |
trust_remote_code=True,
|
| 164 |
torch_dtype=torch.float16
|
| 165 |
).to(device).eval()
|
|
|
|
| 169 |
processor_m = AutoProcessor.from_pretrained(MODEL_ID_M, trust_remote_code=True)
|
| 170 |
model_m = Qwen2_5_VLForConditionalGeneration.from_pretrained(
|
| 171 |
MODEL_ID_M,
|
| 172 |
+
attn_implementation="flash_attention_2",
|
| 173 |
trust_remote_code=True,
|
| 174 |
torch_dtype=torch.float16
|
| 175 |
).to(device).eval()
|
|
|
|
| 250 |
]
|
| 251 |
|
| 252 |
# Create the Gradio Interface
|
| 253 |
+
with gr.Blocks() as demo:
|
| 254 |
+
gr.Markdown("# **Multimodal OCR**", elem_id="main-title")
|
| 255 |
with gr.Row():
|
| 256 |
with gr.Column(scale=2):
|
| 257 |
image_query = gr.Textbox(label="Query Input", placeholder="Enter your query here...")
|
|
|
|
| 272 |
|
| 273 |
with gr.Column(scale=3):
|
| 274 |
gr.Markdown("## Output", elem_id="output-title")
|
| 275 |
+
output = gr.Textbox(label="Raw Output Stream", interactive=True, lines=11)
|
| 276 |
with gr.Accordion("(Result.md)", open=False):
|
| 277 |
markdown_output = gr.Markdown(label="(Result.Md)")
|
| 278 |
|
|
|
|
| 290 |
)
|
| 291 |
|
| 292 |
if __name__ == "__main__":
|
| 293 |
+
demo.queue(max_size=50).launch(css=css, theme=steel_blue_theme, mcp_server=True, ssr_mode=False, show_error=True)
|