Spaces:
Running
Running
Pussh from the phone
Browse files
app.py
CHANGED
|
@@ -3,16 +3,15 @@ import base64
|
|
| 3 |
import gradio as gr
|
| 4 |
from mistralai import Mistral
|
| 5 |
from mistralai.models import OCRResponse
|
| 6 |
-
from mistralai.exceptions import MistralException
|
| 7 |
from pathlib import Path
|
| 8 |
-
from pydantic import BaseModel
|
| 9 |
import pycountry
|
| 10 |
import json
|
| 11 |
import logging
|
| 12 |
-
from tenacity import retry, stop_after_attempt, wait_fixed
|
| 13 |
import tempfile
|
| 14 |
from typing import Union, Dict, List
|
| 15 |
from contextlib import contextmanager
|
|
|
|
| 16 |
|
| 17 |
# Constants
|
| 18 |
DEFAULT_LANGUAGE = "English"
|
|
@@ -32,7 +31,7 @@ class OCRProcessor:
|
|
| 32 |
self.client = Mistral(api_key=self.api_key)
|
| 33 |
try:
|
| 34 |
self.client.models.list() # Validate API key
|
| 35 |
-
except
|
| 36 |
raise ValueError(f"Invalid API key: {str(e)}")
|
| 37 |
|
| 38 |
@staticmethod
|
|
@@ -52,26 +51,33 @@ class OCRProcessor:
|
|
| 52 |
if os.path.exists(temp_file.name):
|
| 53 |
os.unlink(temp_file.name)
|
| 54 |
|
| 55 |
-
@retry(stop=stop_after_attempt(3), wait=wait_fixed(2)
|
| 56 |
def _call_ocr_api(self, document: Dict) -> OCRResponse:
|
| 57 |
try:
|
| 58 |
return self.client.ocr.process(model="mistral-ocr-latest", document=document)
|
| 59 |
-
except
|
| 60 |
logger.error(f"OCR API call failed: {str(e)}")
|
| 61 |
raise
|
| 62 |
|
| 63 |
-
@retry(stop=stop_after_attempt(3), wait=wait_fixed(2)
|
| 64 |
def _call_chat_complete(self, model: str, messages: List[Dict], **kwargs) -> Dict:
|
| 65 |
try:
|
| 66 |
return self.client.chat.complete(model=model, messages=messages, **kwargs)
|
| 67 |
-
except
|
| 68 |
logger.error(f"Chat complete API call failed: {str(e)}")
|
| 69 |
raise
|
| 70 |
|
| 71 |
def _get_file_content(self, file_input: Union[str, bytes]) -> bytes:
|
| 72 |
if isinstance(file_input, str):
|
| 73 |
-
|
| 74 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 75 |
return file_input.read() if hasattr(file_input, 'read') else file_input
|
| 76 |
|
| 77 |
def ocr_pdf_url(self, pdf_url: str) -> str:
|
|
@@ -158,7 +164,8 @@ class OCRProcessor:
|
|
| 158 |
temperature=0
|
| 159 |
)
|
| 160 |
|
| 161 |
-
|
|
|
|
| 162 |
return self._format_structured_response(temp_path, content)
|
| 163 |
except Exception as e:
|
| 164 |
return self._handle_error("structured OCR", e)
|
|
@@ -176,7 +183,7 @@ class OCRProcessor:
|
|
| 176 |
def _format_structured_response(file_path: str, content: Dict) -> str:
|
| 177 |
languages = {lang.alpha_2: lang.name for lang in pycountry.languages if hasattr(lang, 'alpha_2')}
|
| 178 |
valid_langs = [l for l in content.get("languages", [DEFAULT_LANGUAGE]) if l in languages.values()]
|
| 179 |
-
|
| 180 |
response = {
|
| 181 |
"file_name": Path(file_path).name,
|
| 182 |
"topics": content.get("topics", []),
|
|
@@ -189,13 +196,14 @@ def create_interface():
|
|
| 189 |
with gr.Blocks(title="Mistral OCR & Structured Output App") as demo:
|
| 190 |
gr.Markdown("# Mistral OCR & Structured Output App")
|
| 191 |
gr.Markdown("Enter your Mistral API key below to use the app. Extract text from PDFs and images or get structured JSON output.")
|
|
|
|
| 192 |
|
| 193 |
api_key_input = gr.Textbox(
|
| 194 |
label="Mistral API Key",
|
| 195 |
placeholder="Enter your Mistral API key here",
|
| 196 |
type="password"
|
| 197 |
)
|
| 198 |
-
|
| 199 |
def initialize_processor(api_key):
|
| 200 |
try:
|
| 201 |
processor = OCRProcessor(api_key)
|
|
@@ -263,4 +271,5 @@ def create_interface():
|
|
| 263 |
return demo
|
| 264 |
|
| 265 |
if __name__ == "__main__":
|
|
|
|
| 266 |
create_interface().launch(share=True, debug=True)
|
|
|
|
| 3 |
import gradio as gr
|
| 4 |
from mistralai import Mistral
|
| 5 |
from mistralai.models import OCRResponse
|
|
|
|
| 6 |
from pathlib import Path
|
|
|
|
| 7 |
import pycountry
|
| 8 |
import json
|
| 9 |
import logging
|
| 10 |
+
from tenacity import retry, stop_after_attempt, wait_fixed
|
| 11 |
import tempfile
|
| 12 |
from typing import Union, Dict, List
|
| 13 |
from contextlib import contextmanager
|
| 14 |
+
import requests
|
| 15 |
|
| 16 |
# Constants
|
| 17 |
DEFAULT_LANGUAGE = "English"
|
|
|
|
| 31 |
self.client = Mistral(api_key=self.api_key)
|
| 32 |
try:
|
| 33 |
self.client.models.list() # Validate API key
|
| 34 |
+
except Exception as e:
|
| 35 |
raise ValueError(f"Invalid API key: {str(e)}")
|
| 36 |
|
| 37 |
@staticmethod
|
|
|
|
| 51 |
if os.path.exists(temp_file.name):
|
| 52 |
os.unlink(temp_file.name)
|
| 53 |
|
| 54 |
+
@retry(stop=stop_after_attempt(3), wait=wait_fixed(2))
|
| 55 |
def _call_ocr_api(self, document: Dict) -> OCRResponse:
|
| 56 |
try:
|
| 57 |
return self.client.ocr.process(model="mistral-ocr-latest", document=document)
|
| 58 |
+
except Exception as e:
|
| 59 |
logger.error(f"OCR API call failed: {str(e)}")
|
| 60 |
raise
|
| 61 |
|
| 62 |
+
@retry(stop=stop_after_attempt(3), wait=wait_fixed(2))
|
| 63 |
def _call_chat_complete(self, model: str, messages: List[Dict], **kwargs) -> Dict:
|
| 64 |
try:
|
| 65 |
return self.client.chat.complete(model=model, messages=messages, **kwargs)
|
| 66 |
+
except Exception as e:
|
| 67 |
logger.error(f"Chat complete API call failed: {str(e)}")
|
| 68 |
raise
|
| 69 |
|
| 70 |
def _get_file_content(self, file_input: Union[str, bytes]) -> bytes:
|
| 71 |
if isinstance(file_input, str):
|
| 72 |
+
if file_input.startswith("http"):
|
| 73 |
+
# Handle URLs
|
| 74 |
+
response = requests.get(file_input)
|
| 75 |
+
response.raise_for_status()
|
| 76 |
+
return response.content
|
| 77 |
+
else:
|
| 78 |
+
# Handle local file paths
|
| 79 |
+
with open(file_input, "rb") as f:
|
| 80 |
+
return f.read()
|
| 81 |
return file_input.read() if hasattr(file_input, 'read') else file_input
|
| 82 |
|
| 83 |
def ocr_pdf_url(self, pdf_url: str) -> str:
|
|
|
|
| 164 |
temperature=0
|
| 165 |
)
|
| 166 |
|
| 167 |
+
response_content = chat_response.choices[0].message.content
|
| 168 |
+
content = json.loads(response_content)
|
| 169 |
return self._format_structured_response(temp_path, content)
|
| 170 |
except Exception as e:
|
| 171 |
return self._handle_error("structured OCR", e)
|
|
|
|
| 183 |
def _format_structured_response(file_path: str, content: Dict) -> str:
|
| 184 |
languages = {lang.alpha_2: lang.name for lang in pycountry.languages if hasattr(lang, 'alpha_2')}
|
| 185 |
valid_langs = [l for l in content.get("languages", [DEFAULT_LANGUAGE]) if l in languages.values()]
|
| 186 |
+
|
| 187 |
response = {
|
| 188 |
"file_name": Path(file_path).name,
|
| 189 |
"topics": content.get("topics", []),
|
|
|
|
| 196 |
with gr.Blocks(title="Mistral OCR & Structured Output App") as demo:
|
| 197 |
gr.Markdown("# Mistral OCR & Structured Output App")
|
| 198 |
gr.Markdown("Enter your Mistral API key below to use the app. Extract text from PDFs and images or get structured JSON output.")
|
| 199 |
+
gr.Markdown("**Note:** After entering your API key, click 'Set API Key' to validate and use it.")
|
| 200 |
|
| 201 |
api_key_input = gr.Textbox(
|
| 202 |
label="Mistral API Key",
|
| 203 |
placeholder="Enter your Mistral API key here",
|
| 204 |
type="password"
|
| 205 |
)
|
| 206 |
+
|
| 207 |
def initialize_processor(api_key):
|
| 208 |
try:
|
| 209 |
processor = OCRProcessor(api_key)
|
|
|
|
| 271 |
return demo
|
| 272 |
|
| 273 |
if __name__ == "__main__":
|
| 274 |
+
print(f"===== Application Startup at {os.environ.get('START_TIME', 'Unknown')} =====")
|
| 275 |
create_interface().launch(share=True, debug=True)
|