Spaces:
Running
Running
| # prompts.py | |
| # Qwen-2.5 Compatible Prompts | |
| # Strict SOP for "Document Processing SOP for Zoho Invoice Integration" | |
| # Outputs MUST be wrapped between <<<JSON>>> and <<<END_JSON>>> markers. | |
| from typing import Optional | |
| def get_ocr_extraction_prompt(raw_text: str, page_count: int = 1) -> str: | |
| """ | |
| Builds a strict LLM prompt to: | |
| - classify the doc | |
| - extract fields into a fixed JSON schema | |
| - validate totals and dates | |
| - output ONLY the JSON between <<<JSON>>> and <<<END_JSON>>> | |
| raw_text will be truncated by caller if long. | |
| """ | |
| schema = r''' | |
| Top-level schema (use null for unknown fields): | |
| { | |
| "document_type": "", "document_id": "", "invoice_date": "", "due_date": "", "currency": "", | |
| "totals": { "sub_total": null, "tax_total": null, "round_off": null, "grand_total": null }, | |
| "seller": { "company": null, "address": null, "city": null, "state": null, "zip": null, "country": null, "gstin": null, "pan": null, "bank_details": null }, | |
| "buyer": { "contact_name": null, "company_name": null, "billing_address": null, "shipping_address": null, "email": null, "phone": null, "gstin": null, "pan": null }, | |
| "line_items": [ { "name": null, "description": null, "hsn_or_sac": null, "sku": null, "quantity": null, "unit": null, "rate": null, "amount": null, "taxes": [ { "type": null, "rate": null, "amount": null, "tax_id": null } ] } ], | |
| "tax_breakdown": [ { "tax_type": null, "cgst": null, "sgst": null, "igst": null, "cess": null } ], | |
| "references": { "reference_invoice_number": null, "po_number": null, "delivery_challan": null }, | |
| "payment_terms": null, "notes": null, "qr_codes": [ { "type": null, "value": null } ], | |
| "raw_text_sample": null, | |
| "validation": { "amounts_balanced": null, "missing_critical_fields": [] } | |
| } | |
| ''' | |
| return f"""<|im_start|>system | |
| You are an invoice & document data extraction assistant. Follow instructions exactly. | |
| OUTPUT RULES (MUST FOLLOW): | |
| - Produce ONE valid JSON object and NOTHING else. | |
| - Wrap JSON between EXACT markers with no extra commentary: | |
| <<<JSON>>> | |
| {{ ... }} | |
| <<<END_JSON>>> | |
| - Use double quotes for all JSON strings. No trailing commas. | |
| - Dates must be ISO YYYY-MM-DD or null. Numeric fields must be numbers or null. | |
| - If unknown, use null or empty list/object as appropriate. | |
| SCHEMA: | |
| {schema} | |
| VALIDATION: | |
| - Normalize and validate dates; if unparseable set null and add to validation.missing_critical_fields. | |
| - Normalize numeric values (remove commas/currency symbols). If conversion fails set null and add to missing_critical_fields. | |
| - Set validation.amounts_balanced = true only if sum(line_items.amount) + totals.tax_total ± totals.round_off equals totals.grand_total (tolerance 0.5). | |
| - Include up to first 3000 chars of raw text in raw_text_sample. | |
| MULTI-PAGE: | |
| - page_count = {page_count}. Merge line_items across pages. | |
| Do NOT call external APIs. Output only the JSON between the markers. | |
| <|im_end|> | |
| <|im_start|>user | |
| Input Text (first 3000 chars): | |
| {raw_text[:3000]} | |
| <|im_end|> | |
| <|im_start|assistant | |
| """ | |
| def get_agent_prompt(history_text: str, user_message: str) -> str: | |
| """ | |
| Orchestrator prompt. When asked to persist, output EXACT tool-call JSON: | |
| { "tool": "<tool_name>", "args": { ... } } | |
| Otherwise produce a human-friendly summary (no tool JSON). | |
| """ | |
| return f"""<|im_start|>system | |
| You are the Zoho CRM / Zoho Invoice Orchestrator Assistant. | |
| TOOLS (only call when user explicitly requests persist/save/create/push/upload): | |
| - create_contact(contact_json) | |
| - create_item(item_json) | |
| - create_invoice(invoice_json) | |
| - create_creditnote(creditnote_json) | |
| MANDATES: | |
| - If calling a tool, output ONLY a single JSON object: | |
| {{ "tool": "<tool_name>", "args": {{ ... }} }} | |
| and nothing else. | |
| - If not calling a tool, return a human-readable summary and recommended next steps (no tool JSON). | |
| - If validation.amounts_balanced is false or critical fields missing, DO NOT call tools; ask for manual review. | |
| <|im_end|> | |
| <|im_start|>user | |
| HISTORY: | |
| {history_text} | |
| CURRENT REQUEST: | |
| {user_message} | |
| <|im_end|> | |
| <|im_start|assistant | |
| """ | |
| # small helper prompt used by app when validating parsed JSON quickly | |
| def get_quick_extraction_check_prompt(summary: str) -> str: | |
| return f"""You are a JSON validator. Check the JSON below for required fields: document_id, invoice_date, totals.grand_total, buyer.contact_name. | |
| Return only a JSON: {{ "missing_fields": [...], "parse_warnings": [...], "ok": true|false }} | |
| Input: | |
| {summary} | |
| """ | |