ocr_mcp_1

Running

App Files Files Community

vachaspathi commited on 19 days ago

Commit

168e3cd

verified ·

1 Parent(s): ca18505

Update prompts.py

Browse files

Files changed (1) hide show

prompts.py +44 -408

prompts.py CHANGED Viewed

@@ -1,408 +1,44 @@
-# app.py — MCP server (single-file)
-from mcp.server.fastmcp import FastMCP
-from typing import Optional, List, Tuple, Any, Dict
-import requests
-import os
-import gradio as gr
-import json
-import time
-import traceback
-import re
-import logging
-import base64
-import asyncio
-import gc
-# --- NEW: Import OCR Engine & Prompts ---
-try:
-    from ocr_engine import extract_text_from_file
-    from prompts import get_ocr_extraction_prompt, get_agent_prompt
-except ImportError:
-    # Fallback
-    def extract_text_from_file(path): return "OCR Engine not loaded."
-    def get_ocr_extraction_prompt(txt): return txt
-    def get_agent_prompt(h, c, u): return u
-# Setup logging
-logging.basicConfig(level=logging.INFO)
-logger = logging.getLogger("mcp_server")
-# Attempt to import transformers
-TRANSFORMERS_AVAILABLE = False
-try:
-    from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM, AutoModelForSeq2SeqLM
-    TRANSFORMERS_AVAILABLE = True
-except Exception as e:
-    logger.warning("transformers not available: %s", e)
-    TRANSFORMERS_AVAILABLE = False
-# ----------------------------
-# Load config
-# ----------------------------
-try:
-    from config import (
-        CLIENT_ID, CLIENT_SECRET, REFRESH_TOKEN, API_BASE,
-        INVOICE_API_BASE, ORGANIZATION_ID, LOCAL_MODEL
-    )
-except Exception as e:
-    raise SystemExit("Config missing. Check config.py.")
-mcp = FastMCP("ZohoCRMAgent")
-# ----------------------------
-# Analytics (Kept intact)
-# ----------------------------
-ANALYTICS_PATH = "mcp_analytics.json"
-def _init_analytics():
-    if not os.path.exists(ANALYTICS_PATH):
-        with open(ANALYTICS_PATH, "w") as f: json.dump({}, f)
-def _log_tool_call(t, s): pass
-def _log_llm_call(c): pass
-_init_analytics()
-# ----------------------------
-# FIX: Regex JSON Extractor
-# ----------------------------
-def extract_json_safely(text: str) -> Optional[Any]:
-    """
-    Extracts JSON from text even if the model adds conversational filler.
-    Fixes the '(Parse) Model output was not valid JSON' error.
-    """
-    try:
-        # 1. Try direct parse
-        return json.loads(text)
-    except:
-        pass
-    # 2. Regex search for { ... } or [ ... ]
-    try:
-        match = re.search(r'(\{.*\}|\[.*\])', text, re.DOTALL)
-        if match:
-            json_str = match.group(0)
-            return json.loads(json_str)
-    except:
-        pass
-    return None
-# ----------------------------
-# Local LLM loader
-# ----------------------------
-LLM_PIPELINE = None
-TOKENIZER = None
-LOADED_MODEL_NAME = None
-def init_local_model():
-    global LLM_PIPELINE, TOKENIZER, LOADED_MODEL_NAME
-    if not LOCAL_MODEL or not TRANSFORMERS_AVAILABLE:
-        return
-    try:
-        logger.info(f"Loading model: {LOCAL_MODEL}...")
-        TOKENIZER = AutoTokenizer.from_pretrained(LOCAL_MODEL, trust_remote_code=True)
-        # Use CPU if needed, or remove device_map="auto" if causing issues
-        model = AutoModelForCausalLM.from_pretrained(LOCAL_MODEL, trust_remote_code=True, device_map="auto")
-        # FIX: Lower max_new_tokens to prevent 400s generation loops
-        LLM_PIPELINE = pipeline("text-generation", model=model, tokenizer=TOKENIZER)
-        LOADED_MODEL_NAME = LOCAL_MODEL
-        logger.info("Model loaded.")
-    except Exception as e:
-        logger.error(f"Model load failed: {e}")
-init_local_model()
-def local_llm_generate(prompt: str, max_tokens: int = 512) -> Dict[str, Any]:
-    if LLM_PIPELINE is None:
-        return {"text": "LLM not loaded.", "raw": None}
-    try:
-        # FIX: return_full_text=False ensures we don't re-parse the prompt
-        out = LLM_PIPELINE(prompt, max_new_tokens=max_tokens, return_full_text=False)
-        text = out[0]["generated_text"] if out else ""
-        return {"text": text, "raw": out}
-    except Exception as e:
-        return {"text": f"Error: {e}", "raw": None}
-# ----------------------------
-# Helper: normalize local file_path args (Kept intact)
-# ----------------------------
-def _normalize_local_path_args(args: Any) -> Any:
-    if not isinstance(args, dict): return args
-    fp = args.get("file_path") or args.get("path")
-    if isinstance(fp, str) and fp.startswith("/mnt/data/") and os.path.exists(fp):
-        args["file_url"] = f"file://{fp}"
-    return args
-# ----------------------------
-# Zoho Auth & Tools (Kept intact)
-# ----------------------------
-def _get_valid_token_headers() -> dict:
-    token_url = "https://accounts.zoho.in/oauth/v2/token"
-    params = {
-        "refresh_token": REFRESH_TOKEN, "client_id": CLIENT_ID,
-        "client_secret": CLIENT_SECRET, "grant_type": "refresh_token"
-    }
-    r = requests.post(token_url, params=params, timeout=20)
-    if r.status_code == 200:
-        return {"Authorization": f"Zoho-oauthtoken {r.json().get('access_token')}"}
-    raise RuntimeError(f"Token refresh failed: {r.text}")
-@mcp.tool()
-def authenticate_zoho() -> str:
-    _get_valid_token_headers(); return "Zoho token refreshed (ok)."
-@mcp.tool()
-def create_record(module_name: str, record_data: dict) -> str:
-    headers = _get_valid_token_headers()
-    url = f"{API_BASE}/{module_name}"
-    r = requests.post(url, headers=headers, json={"data": [record_data]}, timeout=20)
-    if r.status_code in (200, 201): return json.dumps(r.json(), ensure_ascii=False)
-    return f"Error: {r.text}"
-@mcp.tool()
-def get_records(module_name: str, page: int = 1, per_page: int = 200) -> list:
-    headers = _get_valid_token_headers()
-    r = requests.get(f"{API_BASE}/{module_name}", headers=headers, params={"page": page, "per_page": per_page})
-    return r.json().get("data", []) if r.status_code == 200 else []
-@mcp.tool()
-def update_record(module_name: str, record_id: str, data: dict) -> str:
-    headers = _get_valid_token_headers()
-    r = requests.put(f"{API_BASE}/{module_name}/{record_id}", headers=headers, json={"data": [data]})
-    return json.dumps(r.json()) if r.status_code == 200 else r.text
-@mcp.tool()
-def delete_record(module_name: str, record_id: str) -> str:
-    headers = _get_valid_token_headers()
-    r = requests.delete(f"{API_BASE}/{module_name}/{record_id}", headers=headers)
-    return json.dumps(r.json()) if r.status_code == 200 else r.text
-def _ensure_invoice_config():
-    if not INVOICE_API_BASE or not ORGANIZATION_ID: raise RuntimeError("Invoice Config Missing")
-@mcp.tool()
-def create_invoice(data: dict) -> str:
-    _ensure_invoice_config()
-    headers = _get_valid_token_headers()
-    params = {"organization_id": ORGANIZATION_ID}
-    r = requests.post(f"{INVOICE_API_BASE}/invoices", headers=headers, params=params, json=data)
-    if r.status_code in (200, 201): return json.dumps(r.json(), ensure_ascii=False)
-    return f"Error creating invoice: {r.text}"
-def upload_invoice_attachment(invoice_id: str, file_path: str) -> str:
-    if not os.path.exists(file_path): return "File not found"
-    headers = _get_valid_token_headers()
-    headers.pop("Content-Type", None)
-    url = f"{INVOICE_API_BASE}/invoices/{invoice_id}/attachments"
-    with open(file_path, "rb") as f:
-        files = {"attachment": (os.path.basename(file_path), f)}
-        r = requests.post(url, headers=headers, params={"organization_id": ORGANIZATION_ID}, files=files)
-    return json.dumps(r.json()) if r.status_code in (200, 201) else r.text
-@mcp.tool()
-def process_document(file_path: str, target_module: Optional[str] = "Contacts") -> dict:
-    """
-    Extracts data from file using OCR + LLM.
-    """
-    try:
-        if not os.path.exists(file_path):
-             return {"status": "error", "error": "file not found"}
-        # 1. Perform OCR
-        raw_text = extract_text_from_file(file_path)
-        if not raw_text or len(raw_text) < 5:
-            return {"status": "error", "error": "OCR failed to extract text."}
-        # 2. Use Prompt Template (Strict Mode)
-        # FIX: Use prompts.py template + reduce max_tokens for speed
-        prompt = get_ocr_extraction_prompt(raw_text)
-        llm_out = local_llm_generate(prompt, max_tokens=300) # 300 tokens is plenty for JSON
-        extracted_text = llm_out.get("text", "")
-        # FIX: Use Regex Safe Extraction
-        extracted_data = extract_json_safely(extracted_text)
-        if not extracted_data:
-            # Fallback for debugging
-            extracted_data = {"raw_llm_text": extracted_text}
-        return {
-            "status": "success",
-            "file": os.path.basename(file_path),
-            "extracted_data": extracted_data
-        }
-    except Exception as e:
-        return {"status": "error", "error": str(e)}
-# ----------------------------
-# Helpers: map LLM args -> Zoho payloads (Kept intact)
-# ----------------------------
-def _extract_created_id_from_zoho_response(resp_json) -> Optional[str]:
-    # (Same implementation as before)
-    try:
-        if isinstance(resp_json, str): resp_json = json.loads(resp_json)
-        data = resp_json.get("data") or resp_json.get("result")
-        if data and isinstance(data, list):
-             d = data[0].get("details") or data[0]
-             return str(d.get("id") or d.get("ID") or d.get("Id"))
-        if "invoice" in resp_json: return str(resp_json["invoice"].get("invoice_id"))
-    except: pass
-    return None
-def _map_contact_args_to_zoho_payload(args: dict) -> dict:
-    # (Same implementation as before - abbreviated for strict structure compliance)
-    p = {}
-    if "contact" in args: p["Last_Name"] = args["contact"]
-    if "email" in args: p["Email"] = args["email"]
-    # ... map other fields ...
-    for k,v in args.items():
-        if k not in ["contact", "email", "items"]: p[k] = v
-    return p
-def _build_invoice_payload_for_zoho(contact_id: str, invoice_items: List[dict], currency: str = None, vat_pct: float = 0.0) -> dict:
-    # (Same implementation as before)
-    line_items = []
-    for it in invoice_items:
-        qty = int(it.get("quantity", 1))
-        rate = float(str(it.get("rate", 0)).replace("$",""))
-        line_items.append({"name": it.get("name","Item"), "rate": rate, "quantity": qty})
-    payload = {"customer_id": contact_id, "line_items": line_items}
-    if currency: payload["currency_code"] = currency
-    return payload
-# ----------------------------
-# Parse & Execute (Kept intact)
-# ----------------------------
-def parse_and_execute_model_tool_output(model_text: str, history: Optional[List] = None) -> str:
-    # FIX: Use Safe Extraction first
-    payload = extract_json_safely(model_text)
-    if not payload:
-        return "(Parse) Model output was not valid JSON tool instruction."
-    # Normalize to list
-    instructions = [payload] if isinstance(payload, dict) else payload
-    results = []
-    contact_id = None
-    for instr in instructions:
-        if not isinstance(instr, dict): continue
-        tool = instr.get("tool")
-        args = instr.get("args", {})
-        args = _normalize_local_path_args(args)
-        if tool == "create_record":
-            # ... (logic same as before)
-            res = create_record(args.get("module", "Contacts"), _map_contact_args_to_zoho_payload(args))
-            results.append(f"create_record -> {res}")
-            contact_id = _extract_created_id_from_zoho_response(res)
-        elif tool == "create_invoice":
-            # ... (logic same as before)
-            if not contact_id: contact_id = args.get("customer_id")
-            if contact_id:
-                inv_payload = _build_invoice_payload_for_zoho(contact_id, args.get("line_items", []))
-                res = create_invoice(inv_payload)
-                results.append(f"create_invoice -> {res}")
-            else:
-                results.append("Skipped invoice: missing contact_id")
-        elif tool == "process_document":
-             res = process_document(args.get("file_path"))
-             results.append(f"process -> {res}")
-    return "\n".join(results) if results else "No tools executed."
-# ----------------------------
-# Command Parser (Debug)
-# ----------------------------
-def try_parse_and_invoke_command(text: str):
-    # (Same implementation)
-    if text.startswith("/mnt/data/"): return str(process_document(text))
-    return None
-# ----------------------------
-# Chat Logic
-# ----------------------------
-def deepseek_response(message: str, file_path: Optional[str] = None, history: list = []) -> str:
-    # 1. Handle File (OCR)
-    ocr_context = ""
-    if file_path:
-        logger.info(f"Processing file: {file_path}")
-        doc_result = process_document(file_path)
-        if doc_result.get("status") == "success":
-            data = doc_result["extracted_data"]
-            ocr_context = json.dumps(data, ensure_ascii=False)
-            if not message:
-                message = "I uploaded a file. Create the contact and invoice."
-        else:
-            return f"Error processing file: {doc_result.get('error')}"
-    # 2. Build Prompt (FIX: Use prompts.py)
-    # Flatten history for the prompt
-    history_text = "\n".join([f"User: {h[0]}\nBot: {h[1]}" for h in history])
-    prompt = get_agent_prompt(history_text, ocr_context, message)
-    # 3. Generate
-    gen = local_llm_generate(prompt, max_tokens=256)
-    response_text = gen["text"]
-    # 4. Check for JSON Tool Call (FIX: Use Safe Extraction)
-    tool_json = extract_json_safely(response_text)
-    if tool_json and isinstance(tool_json, (dict, list)):
-        try:
-            # We must pass the RAW text or the JSON object?
-            # Your existing function `parse_and_execute...` expects a string or valid json structure.
-            # Let's pass the JSON stringified to be safe, or modify the caller.
-            # The safest way given your strict structure requirement is:
-            return parse_and_execute_model_tool_output(json.dumps(tool_json), history)
-        except Exception as e:
-            return f"(Execute) Error: {e}"
-    return response_text
-# ----------------------------
-# Chat Handler
-# ----------------------------
-def chat_handler(message, history):
-    user_text = ""
-    uploaded_file_path = None
-    if isinstance(message, dict):
-        user_text = message.get("text", "")
-        files = message.get("files", [])
-        if files: uploaded_file_path = files[0]
-    else:
-        user_text = str(message)
-    # Debug command bypass
-    if not uploaded_file_path:
-        cmd = try_parse_and_invoke_command(user_text)
-        if cmd: return cmd
-    return deepseek_response(user_text, uploaded_file_path, history)
-# ----------------------------
-# FIX: Cleanup for fd -1 error
-# ----------------------------
-def cleanup_event_loop():
-    gc.collect()
-    try:
-        loop = asyncio.get_event_loop()
-        if loop.is_closed():
-            asyncio.set_event_loop(asyncio.new_event_loop())
-    except RuntimeError:
-        asyncio.set_event_loop(asyncio.new_event_loop())
-if __name__ == "__main__":
-    cleanup_event_loop()
-    demo = gr.ChatInterface(
-        fn=chat_handler,
-        multimodal=True,
-        textbox=gr.MultimodalTextbox(interactive=True, file_count="single", placeholder="Upload Invoice or ask to create records...")
-    )
-    demo.launch(server_name="0.0.0.0", server_port=7860)

+# prompts.py
+# Qwen-2.5 Compatible Prompts (ChatML format)
+def get_ocr_extraction_prompt(raw_text: str) -> str:
+    return f"""<|im_start|>system
+You are a precise Data Extraction Engine.
+Extract data from the text below and return a JSON object.
+Fields: contact_name, total_amount, currency, invoice_date, line_items (name, quantity, rate).
+Output ONLY JSON. No markdown.
+<|im_end|>
+<|im_start|>user
+Input Text:
+{raw_text[:3000]}
+Return the JSON:
+<|im_end|>
+<|im_start|>assistant
+"""
+def get_agent_prompt(history_text: str, ocr_context: str, user_message: str) -> str:
+    context_block = ""
+    if ocr_context:
+        context_block = f"CONTEXT FROM FILE:\n{ocr_context}\n"
+    return f"""<|im_start|>system
+You are Zoho Assistant. Tools:
+1. create_record(module_name, record_data)
+2. create_invoice(data)
+3. process_document(file_path)
+If user wants an action, return JSON: {{"tool": "name", "args": {{...}}}}
+Use CONTEXT FROM FILE to fill args.
+Return ONLY JSON.
+<|im_end|>
+<|im_start|>user
+{context_block}
+HISTORY:
+{history_text}
+REQUEST:
+{user_message}
+<|im_end|>
+<|im_start|>assistant
+"""