ocr_mcp_1

Sleeping

App Files Files Community

vachaspathi commited on 28 days ago

Commit

54efa47

verified ·

1 Parent(s): 168e3cd

Update app.py

Browse files

Files changed (1) hide show

app.py +146 -308

app.py CHANGED Viewed

@@ -7,10 +7,8 @@ import os
 import gradio as gr
 import json
 import time
-import traceback
 import re
 import logging
-import base64
 import asyncio
 import gc
@@ -19,390 +17,230 @@ try:
     from ocr_engine import extract_text_from_file
     from prompts import get_ocr_extraction_prompt, get_agent_prompt
 except ImportError:
-    # Fallback
-    def extract_text_from_file(path): return "OCR Engine not loaded."
     def get_ocr_extraction_prompt(txt): return txt
     def get_agent_prompt(h, c, u): return u
-# Setup logging
 logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger("mcp_server")
-# Attempt to import transformers
-TRANSFORMERS_AVAILABLE = False
-try:
-    from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM
-    TRANSFORMERS_AVAILABLE = True
-except Exception as e:
-    logger.warning("transformers not available: %s", e)
-    TRANSFORMERS_AVAILABLE = False
-# ----------------------------
-# Load config
-# ----------------------------
 try:
     from config import (
         CLIENT_ID, CLIENT_SECRET, REFRESH_TOKEN, API_BASE,
         INVOICE_API_BASE, ORGANIZATION_ID, LOCAL_MODEL
     )
-except Exception as e:
-    raise SystemExit("Config missing. Check config.py.")
 mcp = FastMCP("ZohoCRMAgent")
-# ----------------------------
-# Analytics
-# ----------------------------
-ANALYTICS_PATH = "mcp_analytics.json"
-def _init_analytics():
-    if not os.path.exists(ANALYTICS_PATH):
-        with open(ANALYTICS_PATH, "w") as f: json.dump({}, f)
-_init_analytics()
-# ----------------------------
-# Regex JSON Extractor
-# ----------------------------
 def extract_json_safely(text: str) -> Optional[Any]:
     try:
         return json.loads(text)
     except:
-        pass
-    try:
         match = re.search(r'(\{.*\}|\[.*\])', text, re.DOTALL)
-        if match:
-            json_str = match.group(0)
-            return json.loads(json_str)
-    except:
-        pass
-    return None
-# ----------------------------
-# Local LLM loader (Lazy Loading Fix)
-# ----------------------------
-LLM_PIPELINE = None
-TOKENIZER = None
-LOADED_MODEL_NAME = None
 def init_local_model():
-    global LLM_PIPELINE, TOKENIZER, LOADED_MODEL_NAME
-    # FIX 1: Check if already loaded to prevent double-memory usage
-    if LLM_PIPELINE is not None:
-        return
-    if not LOCAL_MODEL or not TRANSFORMERS_AVAILABLE:
-        return
     try:
-        logger.info(f"Loading model: {LOCAL_MODEL}...")
-        TOKENIZER = AutoTokenizer.from_pretrained(LOCAL_MODEL, trust_remote_code=True)
-        model = AutoModelForCausalLM.from_pretrained(LOCAL_MODEL, trust_remote_code=True, device_map="auto")
         LLM_PIPELINE = pipeline("text-generation", model=model, tokenizer=TOKENIZER)
-        LOADED_MODEL_NAME = LOCAL_MODEL
-        logger.info("Model loaded successfully.")
     except Exception as e:
-        logger.error(f"Model load failed: {e}")
-# FIX 2: Removed global call to init_local_model() here.
-# It will now be called only in __main__ or lazily.
 def local_llm_generate(prompt: str, max_tokens: int = 512) -> Dict[str, Any]:
-    # FIX 3: Lazy load if accessed before main (safety net)
     if LLM_PIPELINE is None:
         init_local_model()
-    if LLM_PIPELINE is None:
-        return {"text": "LLM not loaded.", "raw": None}
     try:
-        # Includes Cache Fix from previous step
         out = LLM_PIPELINE(
             prompt,
             max_new_tokens=max_tokens,
-            return_full_text=False,
-            use_cache=False
         )
         text = out[0]["generated_text"] if out else ""
         return {"text": text, "raw": out}
     except Exception as e:
-        logger.error(f"Generation Error: {e}")
         return {"text": f"Error: {e}", "raw": None}
-# ----------------------------
-# Helper: normalize local file_path args
-# ----------------------------
-def _normalize_local_path_args(args: Any) -> Any:
-    if not isinstance(args, dict): return args
-    fp = args.get("file_path") or args.get("path")
-    if isinstance(fp, str) and fp.startswith("/mnt/data/") and os.path.exists(fp):
-        args["file_url"] = f"file://{fp}"
-    return args
-# ----------------------------
-# Zoho Auth & Tools
-# ----------------------------
 def _get_valid_token_headers() -> dict:
-    token_url = "https://accounts.zoho.in/oauth/v2/token"
-    params = {
         "refresh_token": REFRESH_TOKEN, "client_id": CLIENT_ID,
         "client_secret": CLIENT_SECRET, "grant_type": "refresh_token"
-    }
-    r = requests.post(token_url, params=params, timeout=20)
     if r.status_code == 200:
         return {"Authorization": f"Zoho-oauthtoken {r.json().get('access_token')}"}
-    # Don't crash entire app on token fail, just return None so tool can report it
-    logger.error(f"Token Refresh Failed: {r.text}")
     return {}
-@mcp.tool()
-def authenticate_zoho() -> str:
-    h = _get_valid_token_headers()
-    return "Zoho token refreshed." if h else "Failed to refresh token."
 @mcp.tool()
 def create_record(module_name: str, record_data: dict) -> str:
-    headers = _get_valid_token_headers()
-    if not headers: return "Auth Error"
-    url = f"{API_BASE}/{module_name}"
-    r = requests.post(url, headers=headers, json={"data": [record_data]}, timeout=20)
-    if r.status_code in (200, 201): return json.dumps(r.json(), ensure_ascii=False)
-    return f"Error: {r.text}"
-@mcp.tool()
-def get_records(module_name: str, page: int = 1, per_page: int = 200) -> list:
-    headers = _get_valid_token_headers()
-    if not headers: return []
-    r = requests.get(f"{API_BASE}/{module_name}", headers=headers, params={"page": page, "per_page": per_page})
-    return r.json().get("data", []) if r.status_code == 200 else []
-@mcp.tool()
-def update_record(module_name: str, record_id: str, data: dict) -> str:
-    headers = _get_valid_token_headers()
-    if not headers: return "Auth Error"
-    r = requests.put(f"{API_BASE}/{module_name}/{record_id}", headers=headers, json={"data": [data]})
-    return json.dumps(r.json()) if r.status_code == 200 else r.text
-@mcp.tool()
-def delete_record(module_name: str, record_id: str) -> str:
-    headers = _get_valid_token_headers()
-    if not headers: return "Auth Error"
-    r = requests.delete(f"{API_BASE}/{module_name}/{record_id}", headers=headers)
-    return json.dumps(r.json()) if r.status_code == 200 else r.text
-def _ensure_invoice_config():
-    if not INVOICE_API_BASE or not ORGANIZATION_ID: raise RuntimeError("Invoice Config Missing")
 @mcp.tool()
 def create_invoice(data: dict) -> str:
-    _ensure_invoice_config()
-    headers = _get_valid_token_headers()
-    if not headers: return "Auth Error"
-    params = {"organization_id": ORGANIZATION_ID}
-    r = requests.post(f"{INVOICE_API_BASE}/invoices", headers=headers, params=params, json=data)
-    if r.status_code in (200, 201): return json.dumps(r.json(), ensure_ascii=False)
-    return f"Error creating invoice: {r.text}"
-def upload_invoice_attachment(invoice_id: str, file_path: str) -> str:
-    if not os.path.exists(file_path): return "File not found"
-    headers = _get_valid_token_headers()
-    if not headers: return "Auth Error"
-    headers.pop("Content-Type", None)
-    url = f"{INVOICE_API_BASE}/invoices/{invoice_id}/attachments"
-    with open(file_path, "rb") as f:
-        files = {"attachment": (os.path.basename(file_path), f)}
-        r = requests.post(url, headers=headers, params={"organization_id": ORGANIZATION_ID}, files=files)
     return json.dumps(r.json()) if r.status_code in (200, 201) else r.text
 @mcp.tool()
 def process_document(file_path: str, target_module: Optional[str] = "Contacts") -> dict:
-    try:
-        if not os.path.exists(file_path):
-             return {"status": "error", "error": "file not found"}
-        # 1. Perform OCR
-        raw_text = extract_text_from_file(file_path)
-        if not raw_text or len(raw_text) < 5:
-            return {"status": "error", "error": "OCR failed to extract text."}
-        # 2. Use Prompt Template
-        prompt = get_ocr_extraction_prompt(raw_text)
-        # 3. Generate
-        llm_out = local_llm_generate(prompt, max_tokens=300)
-        extracted_text = llm_out.get("text", "")
-        # 4. Extract JSON
-        extracted_data = extract_json_safely(extracted_text)
-        if not extracted_data:
-            extracted_data = {"raw_llm_text": extracted_text}
-        return {
-            "status": "success",
-            "file": os.path.basename(file_path),
-            "extracted_data": extracted_data
-        }
-    except Exception as e:
-        return {"status": "error", "error": str(e)}
-# ----------------------------
-# Helpers: map LLM args -> Zoho payloads
-# ----------------------------
-def _extract_created_id_from_zoho_response(resp_json) -> Optional[str]:
-    try:
-        if isinstance(resp_json, str): resp_json = json.loads(resp_json)
-        data = resp_json.get("data") or resp_json.get("result")
-        if data and isinstance(data, list):
-             d = data[0].get("details") or data[0]
-             return str(d.get("id") or d.get("ID") or d.get("Id"))
-        if "invoice" in resp_json: return str(resp_json["invoice"].get("invoice_id"))
-    except: pass
-    return None
-def _map_contact_args_to_zoho_payload(args: dict) -> dict:
-    p = {}
-    if "contact" in args: p["Last_Name"] = args["contact"]
-    if "email" in args: p["Email"] = args["email"]
-    for k,v in args.items():
-        if k not in ["contact", "email", "items"]: p[k] = v
-    return p
-def _build_invoice_payload_for_zoho(contact_id: str, invoice_items: List[dict], currency: str = None, vat_pct: float = 0.0) -> dict:
-    line_items = []
-    for it in invoice_items:
-        qty = int(it.get("quantity", 1))
-        rate = float(str(it.get("rate", 0)).replace("$",""))
-        line_items.append({"name": it.get("name","Item"), "rate": rate, "quantity": qty})
-    payload = {"customer_id": contact_id, "line_items": line_items}
-    if currency: payload["currency_code"] = currency
-    return payload
-# ----------------------------
-# Parse & Execute
-# ----------------------------
-def parse_and_execute_model_tool_output(model_text: str, history: Optional[List] = None) -> str:
     payload = extract_json_safely(model_text)
-    if not payload:
-        return "(Parse) Model output was not valid JSON tool instruction."
-    instructions = [payload] if isinstance(payload, dict) else payload
     results = []
-    contact_id = None
-    for instr in instructions:
-        if not isinstance(instr, dict): continue
-        tool = instr.get("tool")
-        args = instr.get("args", {})
-        args = _normalize_local_path_args(args)
         if tool == "create_record":
-            res = create_record(args.get("module", "Contacts"), _map_contact_args_to_zoho_payload(args))
-            results.append(f"create_record -> {res}")
-            contact_id = _extract_created_id_from_zoho_response(res)
         elif tool == "create_invoice":
-            if not contact_id: contact_id = args.get("customer_id")
-            if contact_id:
-                inv_payload = _build_invoice_payload_for_zoho(contact_id, args.get("line_items", []))
-                res = create_invoice(inv_payload)
-                results.append(f"create_invoice -> {res}")
-            else:
-                results.append("Skipped invoice: missing contact_id")
-        elif tool == "process_document":
-             res = process_document(args.get("file_path"))
-             results.append(f"process -> {res}")
-    return "\n".join(results) if results else "No tools executed."
-# ----------------------------
-# Command Parser (Debug)
-# ----------------------------
-def try_parse_and_invoke_command(text: str):
-    if text.startswith("/mnt/data/"): return str(process_document(text))
-    return None
-# ----------------------------
-# Chat Logic
-# ----------------------------
-def deepseek_response(message: str, file_path: Optional[str] = None, history: list = []) -> str:
-    # 1. Handle File (OCR)
-    ocr_context = ""
     if file_path:
-        logger.info(f"Processing file: {file_path}")
-        doc_result = process_document(file_path)
-        if doc_result.get("status") == "success":
-            data = doc_result["extracted_data"]
-            ocr_context = json.dumps(data, ensure_ascii=False)
-            if not message:
-                message = "I uploaded a file. Create the contact and invoice."
         else:
-            return f"Error processing file: {doc_result.get('error')}"
-    # 2. Build Prompt
-    history_text = "\n".join([f"User: {h[0]}\nBot: {h[1]}" for h in history])
-    prompt = get_agent_prompt(history_text, ocr_context, message)
-    # 3. Generate
-    gen = local_llm_generate(prompt, max_tokens=256)
-    response_text = gen["text"]
-    # 4. Check for JSON Tool Call
-    tool_json = extract_json_safely(response_text)
-    if tool_json and isinstance(tool_json, (dict, list)):
-        try:
-            return parse_and_execute_model_tool_output(json.dumps(tool_json), history)
-        except Exception as e:
-            return f"(Execute) Error: {e}"
-    return response_text
-# ----------------------------
-# Chat Handler
-# ----------------------------
-def chat_handler(message, history):
-    user_text = ""
-    uploaded_file_path = None
-    if isinstance(message, dict):
-        user_text = message.get("text", "")
-        files = message.get("files", [])
-        if files: uploaded_file_path = files[0]
-    else:
-        user_text = str(message)
-    if not uploaded_file_path:
-        cmd = try_parse_and_invoke_command(user_text)
-        if cmd: return cmd
-    return deepseek_response(user_text, uploaded_file_path, history)
-# ----------------------------
-# Cleanup
-# ----------------------------
-def cleanup_event_loop():
-    gc.collect()
-    try:
-        loop = asyncio.get_event_loop()
-        if loop.is_closed():
-            asyncio.set_event_loop(asyncio.new_event_loop())
-    except RuntimeError:
-        asyncio.set_event_loop(asyncio.new_event_loop())
 if __name__ == "__main__":
-    cleanup_event_loop()
-    # FIX: Explicitly load model once here to prevent concurrent load attempts by Gradio
-    init_local_model()
-    demo = gr.ChatInterface(
-        fn=chat_handler,
-        multimodal=True,
-        textbox=gr.MultimodalTextbox(interactive=True, file_count="single", placeholder="Upload Invoice or ask to create records...")
-    )
     demo.launch(server_name="0.0.0.0", server_port=7860)

 import gradio as gr
 import json
 import time
 import re
 import logging
 import asyncio
 import gc
     from ocr_engine import extract_text_from_file
     from prompts import get_ocr_extraction_prompt, get_agent_prompt
 except ImportError:
+    def extract_text_from_file(path): return ""
     def get_ocr_extraction_prompt(txt): return txt
     def get_agent_prompt(h, c, u): return u
 logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger("mcp_server")
+# --- Load Config ---
 try:
     from config import (
         CLIENT_ID, CLIENT_SECRET, REFRESH_TOKEN, API_BASE,
         INVOICE_API_BASE, ORGANIZATION_ID, LOCAL_MODEL
     )
+except Exception:
+    raise SystemExit("Config missing.")
 mcp = FastMCP("ZohoCRMAgent")
+# --- Globals ---
+LLM_PIPELINE = None
+TOKENIZER = None
+# --- Helpers ---
 def extract_json_safely(text: str) -> Optional[Any]:
     try:
         return json.loads(text)
     except:
         match = re.search(r'(\{.*\}|\[.*\])', text, re.DOTALL)
+        return json.loads(match.group(0)) if match else None
+def _normalize_local_path_args(args: Any) -> Any:
+    if not isinstance(args, dict): return args
+    fp = args.get("file_path") or args.get("path")
+    if isinstance(fp, str) and fp.startswith("/mnt/data/") and os.path.exists(fp):
+        args["file_url"] = f"file://{fp}"
+    return args
+# --- Model Loading (Lazy & Light) ---
 def init_local_model():
+    global LLM_PIPELINE, TOKENIZER
+    if LLM_PIPELINE is not None: return
     try:
+        from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM
+        logger.info(f"Loading lighter model: {LOCAL_MODEL}...")
+        TOKENIZER = AutoTokenizer.from_pretrained(LOCAL_MODEL)
+        # Load model (Standard load is fine for Qwen on CPU)
+        model = AutoModelForCausalLM.from_pretrained(
+            LOCAL_MODEL,
+            device_map="auto",
+            torch_dtype="auto"
+        )
         LLM_PIPELINE = pipeline("text-generation", model=model, tokenizer=TOKENIZER)
+        logger.info("Model loaded.")
     except Exception as e:
+        logger.error(f"Model load error: {e}")
 def local_llm_generate(prompt: str, max_tokens: int = 512) -> Dict[str, Any]:
     if LLM_PIPELINE is None:
         init_local_model()
+    if LLM_PIPELINE is None:
+        return {"text": "Model not loaded.", "raw": None}
     try:
+        # Standard generation (Qwen is robust, no cache hacks needed)
         out = LLM_PIPELINE(
             prompt,
             max_new_tokens=max_tokens,
+            return_full_text=False,
+            do_sample=False, # Deterministic for tools
+            temperature=0.0
         )
         text = out[0]["generated_text"] if out else ""
         return {"text": text, "raw": out}
     except Exception as e:
         return {"text": f"Error: {e}", "raw": None}
+# --- Tools (Zoho) ---
 def _get_valid_token_headers() -> dict:
+    r = requests.post("https://accounts.zoho.in/oauth/v2/token", params={
         "refresh_token": REFRESH_TOKEN, "client_id": CLIENT_ID,
         "client_secret": CLIENT_SECRET, "grant_type": "refresh_token"
+    }, timeout=10)
     if r.status_code == 200:
         return {"Authorization": f"Zoho-oauthtoken {r.json().get('access_token')}"}
     return {}
 @mcp.tool()
 def create_record(module_name: str, record_data: dict) -> str:
+    h = _get_valid_token_headers()
+    if not h: return "Auth Failed"
+    r = requests.post(f"{API_BASE}/{module_name}", headers=h, json={"data": [record_data]})
+    if r.status_code in (200, 201):
+        # Extract ID for downstream use
+        try:
+            d = r.json().get("data", [{}])[0].get("details", {})
+            return json.dumps({"status": "success", "id": d.get("id"), "response": r.json()})
+        except:
+            return json.dumps(r.json())
+    return r.text
 @mcp.tool()
 def create_invoice(data: dict) -> str:
+    h = _get_valid_token_headers()
+    if not h: return "Auth Failed"
+    r = requests.post(f"{INVOICE_API_BASE}/invoices", headers=h,
+                      params={"organization_id": ORGANIZATION_ID}, json=data)
     return json.dumps(r.json()) if r.status_code in (200, 201) else r.text
 @mcp.tool()
 def process_document(file_path: str, target_module: Optional[str] = "Contacts") -> dict:
+    if not os.path.exists(file_path): return {"error": "File not found"}
+    # 1. OCR
+    raw_text = extract_text_from_file(file_path)
+    if not raw_text: return {"error": "OCR empty"}
+    # 2. LLM Extraction
+    prompt = get_ocr_extraction_prompt(raw_text)
+    res = local_llm_generate(prompt, max_tokens=300)
+    data = extract_json_safely(res["text"])
+    return {
+        "status": "success",
+        "file": os.path.basename(file_path),
+        "extracted_data": data or {"raw": res["text"]}
+    }
+# --- Executor ---
+def parse_and_execute(model_text: str, history: list) -> str:
     payload = extract_json_safely(model_text)
+    if not payload: return "No valid tool call found."
+    # Normalize
+    cmds = [payload] if isinstance(payload, dict) else payload
     results = []
+    # Context State
+    last_contact_id = None
+    for cmd in cmds:
+        if not isinstance(cmd, dict): continue
+        tool = cmd.get("tool")
+        args = _normalize_local_path_args(cmd.get("args", {}))
         if tool == "create_record":
+            res = create_record(args.get("module", "Contacts"), args)
+            results.append(f"Record: {res}")
+            # Try capture ID
+            try:
+                rj = json.loads(res)
+                if isinstance(rj, dict) and "id" in rj:
+                    last_contact_id = rj["id"]
+            except: pass
         elif tool == "create_invoice":
+            # Auto-fill contact_id if we just created one
+            if not args.get("customer_id") and last_contact_id:
+                args["customer_id"] = last_contact_id
+            # Map Items
+            items = []
+            for it in args.get("line_items", []):
+                items.append({
+                    "name": it.get("name", "Item"),
+                    "rate": float(str(it.get("rate", 0)).replace("$", "")),
+                    "quantity": int(it.get("quantity", 1))
+                })
+            payload = {"customer_id": args.get("customer_id"), "line_items": items}
+            if args.get("currency"): payload["currency_code"] = args["currency"]
+            res = create_invoice(payload)
+            results.append(f"Invoice: {res}")
+        elif tool == "process_document":
+            res = process_document(args.get("file_path"))
+            results.append(f"Processed: {res}")
+    return "\n".join(results)
+# --- Chat Core ---
+def chat_logic(message: str, file_path: str, history: list) -> str:
+    # 1. Ingest File
+    file_context = ""
     if file_path:
+        doc = process_document(file_path)
+        if doc.get("status") == "success":
+            file_context = json.dumps(doc["extracted_data"])
+            if not message: message = "Create records from this file."
         else:
+            return f"OCR Failed: {doc}"
+    # 2. Decision
+    hist_txt = "\n".join([f"U: {h[0]}\nA: {h[1]}" for h in history])
+    prompt = get_agent_prompt(hist_txt, file_context, message)
+    # 3. Gen & Execute
+    gen = local_llm_generate(prompt, max_tokens=200)
+    tool_data = extract_json_safely(gen["text"])
+    if tool_data:
+        return parse_and_execute(gen["text"], history)
+    return gen["text"]
+# --- UI ---
+def chat_handler(msg, hist):
+    txt = msg.get("text", "")
+    files = msg.get("files", [])
+    path = files[0] if files else None
+    # Direct path bypass for debugging
+    if not path and txt.startswith("/mnt/data"):
+        return str(process_document(txt))
+    return chat_logic(txt, path, hist)
 if __name__ == "__main__":
+    gc.collect()
+    # Lazy init will happen on first request, saving startup memory
+    demo = gr.ChatInterface(fn=chat_handler, multimodal=True)
     demo.launch(server_name="0.0.0.0", server_port=7860)