ocr_mcp_1

Sleeping

App Files Files Community

vachaspathi commited on 28 days ago

Commit

5a38a8f

verified ·

1 Parent(s): d3333ba

Update app.py

Browse files

Files changed (1) hide show

app.py +30 -43

app.py CHANGED Viewed

@@ -6,12 +6,9 @@ import requests
 import os
 import gradio as gr
 import json
-import time
 import re
 import logging
-import asyncio
 import gc
-import shutil
 # --- Import OCR Engine & Prompts ---
 try:
@@ -20,7 +17,7 @@ try:
 except ImportError:
     def extract_text_from_file(path): return ""
     def get_ocr_extraction_prompt(txt): return txt
-    def get_agent_prompt(h, c, u): return u
 logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger("mcp_server")
@@ -83,12 +80,11 @@ def local_llm_generate(prompt: str, max_tokens: int = 512) -> Dict[str, Any]:
         return {"text": "Model not loaded.", "raw": None}
     try:
-        # FIX: Removed invalid flags 'temperature', 'top_p', etc. when do_sample is False
         out = LLM_PIPELINE(
             prompt,
             max_new_tokens=max_tokens,
             return_full_text=False,
-            do_sample=False  # Deterministic
         )
         text = out[0]["generated_text"] if out else ""
         return {"text": text, "raw": out}
@@ -113,7 +109,7 @@ def create_record(module_name: str, record_data: dict) -> str:
     if r.status_code in (200, 201):
         try:
             d = r.json().get("data", [{}])[0].get("details", {})
-            return json.dumps({"status": "success", "id": d.get("id"), "response": r.json()})
         except:
             return json.dumps(r.json())
     return r.text
@@ -129,7 +125,6 @@ def create_invoice(data: dict) -> str:
 @mcp.tool()
 def process_document(file_path: str, target_module: Optional[str] = "Contacts") -> dict:
     if not os.path.exists(file_path):
-        logger.error(f"process_document: File not found at {file_path}")
         return {"error": f"File not found at path: {file_path}"}
     # 1. OCR
@@ -163,7 +158,7 @@ def parse_and_execute(model_text: str, history: list) -> str:
         args = _normalize_local_path_args(cmd.get("args", {}))
         if tool == "create_record":
-            res = create_record(args.get("module", "Contacts"), args)
             results.append(f"Record: {res}")
             try:
                 rj = json.loads(res)
@@ -172,57 +167,56 @@ def parse_and_execute(model_text: str, history: list) -> str:
             except: pass
         elif tool == "create_invoice":
             if not args.get("customer_id") and last_contact_id:
                 args["customer_id"] = last_contact_id
-            items = []
-            for it in args.get("line_items", []):
-                items.append({
-                    "name": it.get("name", "Item"),
-                    "rate": float(str(it.get("rate", 0)).replace("$", "")),
-                    "quantity": int(it.get("quantity", 1))
-                })
-            payload = {"customer_id": args.get("customer_id"), "line_items": items}
-            if args.get("currency"): payload["currency_code"] = args["currency"]
-            res = create_invoice(payload)
-            results.append(f"Invoice: {res}")
-        elif tool == "process_document":
-            # NOTE: Prompts try to prevent this, but if it happens, we rely on args being correct
-            res = process_document(args.get("file_path"))
-            results.append(f"Processed: {res}")
     return "\n".join(results)
 # --- Chat Core ---
 def chat_logic(message: str, file_path: str, history: list) -> str:
-    # 1. Ingest File IMMEDIATELY
-    file_context = ""
     if file_path:
-        logger.info(f"Ingesting file from path: {file_path}")
         doc = process_document(file_path)
         if doc.get("status") == "success":
-            file_context = json.dumps(doc["extracted_data"])
-            if not message: message = "Create records from this file."
         else:
-            return f"OCR Failed: {doc}"
-    # 2. Decision Prompt (With context injected)
     hist_txt = "\n".join([f"U: {h[0]}\nA: {h[1]}" for h in history])
-    prompt = get_agent_prompt(hist_txt, file_context, message)
-    # 3. Gen & Execute
-    gen = local_llm_generate(prompt, max_tokens=200)
     logger.info(f"LLM Decision: {gen['text']}")
     tool_data = extract_json_safely(gen["text"])
     if tool_data:
         return parse_and_execute(gen["text"], history)
     return gen["text"]
 # --- UI ---
@@ -231,13 +225,6 @@ def chat_handler(msg, hist):
     files = msg.get("files", [])
     path = files[0] if files else None
-    if path:
-        logger.info(f"UI received file: {path}")
-    # Direct path bypass for debugging
-    if not path and txt.startswith("/mnt/data"):
-        return str(process_document(txt))
     return chat_logic(txt, path, hist)
 if __name__ == "__main__":

 import os
 import gradio as gr
 import json
 import re
 import logging
 import gc
 # --- Import OCR Engine & Prompts ---
 try:
 except ImportError:
     def extract_text_from_file(path): return ""
     def get_ocr_extraction_prompt(txt): return txt
+    def get_agent_prompt(h, u): return u
 logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger("mcp_server")
         return {"text": "Model not loaded.", "raw": None}
     try:
         out = LLM_PIPELINE(
             prompt,
             max_new_tokens=max_tokens,
             return_full_text=False,
+            do_sample=False
         )
         text = out[0]["generated_text"] if out else ""
         return {"text": text, "raw": out}
     if r.status_code in (200, 201):
         try:
             d = r.json().get("data", [{}])[0].get("details", {})
+            return json.dumps({"status": "success", "id": d.get("id"), "zoho_response": r.json()})
         except:
             return json.dumps(r.json())
     return r.text
 @mcp.tool()
 def process_document(file_path: str, target_module: Optional[str] = "Contacts") -> dict:
     if not os.path.exists(file_path):
         return {"error": f"File not found at path: {file_path}"}
     # 1. OCR
         args = _normalize_local_path_args(cmd.get("args", {}))
         if tool == "create_record":
+            res = create_record(args.get("module_name", "Contacts"), args.get("record_data", {}))
             results.append(f"Record: {res}")
             try:
                 rj = json.loads(res)
             except: pass
         elif tool == "create_invoice":
+            # Auto-fill contact_id if we just created one
             if not args.get("customer_id") and last_contact_id:
                 args["customer_id"] = last_contact_id
+            # Map Items from strict structure
+            invoice_payload = args # Assuming LLM passes correct structure, or map here
+            if last_contact_id and "customer_id" not in invoice_payload:
+                 invoice_payload["customer_id"] = last_contact_id
+            res = create_invoice(invoice_payload)
+            results.append(f"Invoice: {res}")
     return "\n".join(results)
 # --- Chat Core ---
 def chat_logic(message: str, file_path: str, history: list) -> str:
+    # PHASE 1: File Upload -> Extraction Only (No Zoho Auth yet)
     if file_path:
+        logger.info(f"Processing file: {file_path}")
         doc = process_document(file_path)
         if doc.get("status") == "success":
+            extracted_json = json.dumps(doc["extracted_data"], indent=2)
+            # We return this text. It gets added to history.
+            # The User must then say "Yes, push it" to trigger Phase 2.
+            return (
+                f"I extracted the following data from **{doc['file']}**:\n\n"
+                f"```json\n{extracted_json}\n```\n\n"
+                "Please review it. If it looks correct, type **'Create Invoice'** or **'Push to Zoho'**."
+            )
         else:
+            return f"OCR Failed: {doc.get('error')}"
+    # PHASE 2: Text Interaction (Check History for JSON + Intent)
     hist_txt = "\n".join([f"U: {h[0]}\nA: {h[1]}" for h in history])
+    # The Prompt now checks history for JSON and waits for explicit "save/push" keywords
+    prompt = get_agent_prompt(hist_txt, message)
+    gen = local_llm_generate(prompt, max_tokens=256)
     logger.info(f"LLM Decision: {gen['text']}")
     tool_data = extract_json_safely(gen["text"])
     if tool_data:
+        # User confirmed -> Execute Tool (Triggers Zoho Auth)
         return parse_and_execute(gen["text"], history)
+    # Just chat/clarification
     return gen["text"]
 # --- UI ---
     files = msg.get("files", [])
     path = files[0] if files else None
     return chat_logic(txt, path, hist)
 if __name__ == "__main__":