TrueWrite-Scan-Backend / pdf_reports.py
GopalKrushnaMahapatra's picture
Update pdf_reports.py
e1acb44 verified
# pdf_reports.py
import os
import uuid
from datetime import datetime
from reportlab.lib.pagesizes import A4
from reportlab.lib.styles import ParagraphStyle, getSampleStyleSheet
from reportlab.lib.enums import TA_CENTER, TA_RIGHT, TA_JUSTIFY, TA_LEFT
from reportlab.lib import colors
from reportlab.platypus import (
BaseDocTemplate, PageTemplate, Frame, Paragraph, Spacer, Table, TableStyle
)
from reportlab.pdfbase.ttfonts import TTFont
from reportlab.pdfbase import pdfmetrics
# Try to load a nicer serif; fallback to Times
try:
pdfmetrics.registerFont(TTFont('DejaVuSerif', '/usr/share/fonts/truetype/dejavu/DejaVuSerif.ttf'))
BODY_FONT = 'DejaVuSerif'
except Exception:
BODY_FONT = 'Times-Roman'
def _build_doc(filepath, title_text, tiles, counts, sections, matched_sources, footer_text):
PAGE_WIDTH, PAGE_HEIGHT = A4
MARGIN = 36
usable_width = PAGE_WIDTH - 2 * MARGIN
styles = getSampleStyleSheet()
styles.add(ParagraphStyle(name='ReportTitle', fontName=BODY_FONT, fontSize=18, alignment=TA_CENTER, leading=22))
styles.add(ParagraphStyle(name='SmallRight', fontName=BODY_FONT, fontSize=9, alignment=TA_RIGHT, textColor=colors.HexColor("#555555")))
styles.add(ParagraphStyle(name='TileBig', fontName=BODY_FONT, fontSize=30, alignment=TA_CENTER, leading=32))
styles.add(ParagraphStyle(name='TileLabel', fontName=BODY_FONT, fontSize=10, alignment=TA_CENTER, textColor=colors.HexColor("#666666")))
styles.add(ParagraphStyle(name='SectionHeading', fontName=BODY_FONT, fontSize=13, spaceBefore=8, spaceAfter=4, leading=15))
styles.add(ParagraphStyle(name='Body', fontName=BODY_FONT, fontSize=11, leading=15, alignment=TA_JUSTIFY))
styles.add(ParagraphStyle(name='HighlightYellow', fontName=BODY_FONT, fontSize=11, leading=15, backColor=colors.HexColor("#fff3b0"), alignment=TA_JUSTIFY))
styles.add(ParagraphStyle(name='HighlightRed', fontName=BODY_FONT, fontSize=11, leading=15, backColor=colors.HexColor("#ffd6d6"), alignment=TA_JUSTIFY))
styles.add(ParagraphStyle(name='Footer', fontName=BODY_FONT, fontSize=9, alignment=TA_RIGHT, textColor=colors.HexColor("#666666")))
styles.add(ParagraphStyle(name='MatchedHeader', fontName=BODY_FONT, fontSize=12, leading=14, alignment=TA_LEFT, spaceBefore=6, spaceAfter=6))
def header_footer(canvas, doc):
canvas.saveState()
date_str = datetime.now().strftime("%d %B %Y, %H:%M")
canvas.setFont(BODY_FONT, 9)
canvas.setFillColor(colors.HexColor("#555555"))
canvas.drawString(MARGIN, PAGE_HEIGHT - MARGIN + 8, f"Date: {date_str}")
canvas.setFont(BODY_FONT, 16)
canvas.setFillColor(colors.black)
canvas.drawCentredString(PAGE_WIDTH / 2.0, PAGE_HEIGHT - MARGIN + 4, title_text)
canvas.setFont(BODY_FONT, 9)
canvas.setFillColor(colors.HexColor("#666666"))
canvas.drawRightString(PAGE_WIDTH - MARGIN, MARGIN - 10, f"Page {doc.page}")
canvas.restoreState()
doc = BaseDocTemplate(filepath, pagesize=A4,
leftMargin=MARGIN, rightMargin=MARGIN,
topMargin=MARGIN, bottomMargin=MARGIN)
frame = Frame(MARGIN, MARGIN, usable_width, PAGE_HEIGHT - 2 * MARGIN, id='normal')
template = PageTemplate(id='report', frames=[frame], onPage=header_footer)
doc.addPageTemplates([template])
story = []
# Tiles (4 small summary tiles)
tile_values = tiles # list of 4 dicts: {'value': '12%', 'label': 'Plagiarism'}
tiles_data = [
[Paragraph(f"<b>{tile_values[i]['value']}</b>", styles['TileBig']) for i in range(4)],
[Paragraph(tile_values[i]['label'], styles['TileLabel']) for i in range(4)]
]
tiles_table = Table(tiles_data, colWidths=[usable_width / 4.0] * 4, rowHeights=[46, 18])
tiles_table.setStyle(TableStyle([
('BACKGROUND', (0, 0), (-1, 0), colors.HexColor("#f7f7f9")),
('ALIGN', (0, 0), (-1, -1), 'CENTER'),
('VALIGN', (0, 0), (-1, -1), 'MIDDLE'),
('BOX', (0, 0), (-1, -1), 0.6, colors.HexColor("#dddddd")),
]))
story.append(tiles_table)
story.append(Spacer(1, 12))
# Counts row
if counts:
counts_table = Table([list(counts.keys()), list(counts.values())],
colWidths=[usable_width / len(counts)] * len(counts))
counts_table.setStyle(TableStyle([
('BACKGROUND', (0, 0), (-1, 0), colors.HexColor("#f4f6f7")),
('ALIGN', (0, 0), (-1, -1), 'CENTER'),
('BOX', (0, 0), (-1, -1), 0.5, colors.HexColor("#e6e6e6")),
]))
story.append(counts_table)
story.append(Spacer(1, 12))
# Sections + highlighting: sections is list of dicts: {'heading': 'Abstract', 'paragraphs': [p1, p2...]}
for sec in sections or []:
if sec.get('heading'):
story.append(Paragraph(sec['heading'], styles['SectionHeading']))
for para in sec.get('paragraphs', []):
# para may be dict {'text': '...', 'highlight':'yellow'/'red'/None}
if isinstance(para, dict):
text = para.get('text', '')
hl = para.get('highlight')
if hl == 'yellow':
story.append(Paragraph(text, styles['HighlightYellow']))
elif hl == 'red':
story.append(Paragraph(text, styles['HighlightRed']))
else:
story.append(Paragraph(text, styles['Body']))
else:
story.append(Paragraph(para, styles['Body']))
story.append(Spacer(1, 6))
story.append(Spacer(1, 10))
# Matched Sources table (if any)
if matched_sources:
story.append(Paragraph("Matched Sources", styles['MatchedHeader']))
ms_table_data = [["#", "Source Title", "URL", "Similarity"]]
for i, ms in enumerate(matched_sources, start=1):
title_par = Paragraph(ms.get('title', ''), styles['Body'])
url_par = Paragraph(f'<link href="{ms.get("url", "")}">{ms.get("url", "")}</link>', styles['Body'])
ms_table_data.append([str(i), title_par, url_par, ms.get('similarity', '')])
ms_table = Table(ms_table_data, colWidths=[30, usable_width * 0.35, usable_width * 0.45, usable_width * 0.15])
ms_table.setStyle(TableStyle([
('BACKGROUND', (0, 0), (-1, 0), colors.HexColor("#f2f4f5")),
('TEXTCOLOR', (0, 0), (-1, 0), colors.HexColor("#333333")),
('ALIGN', (0, 0), (-1, 0), 'CENTER'),
('VALIGN', (0, 0), (-1, -1), 'MIDDLE'),
('BOX', (0, 0), (-1, -1), 0.6, colors.HexColor("#e0e0e0")),
('INNERGRID', (0, 0), (-1, -1), 0.4, colors.HexColor("#efefef")),
('LEFTPADDING', (1, 1), (1, -1), 6),
('LEFTPADDING', (2, 1), (2, -1), 6),
]))
story.append(ms_table)
story.append(Spacer(1, 14))
# Matched Source Overview block (footer_text)
if footer_text:
matched_table = Table(
[[Paragraph("<b>Matched Source Overview</b>", styles['Body'])],
[Paragraph(footer_text, styles['Body'])]],
colWidths=[usable_width]
)
matched_table.setStyle(TableStyle([
('BACKGROUND', (0, 0), (-1, 0), colors.HexColor("#f7fafb")),
('BOX', (0, 0), (-1, -1), 0.5, colors.HexColor("#e6e6e6")),
('LEFTPADDING', (0, 0), (-1, -1), 8),
('RIGHTPADDING', (0, 0), (-1, -1), 8),
('TOPPADDING', (0, 0), (-1, -1), 6),
('BOTTOMPADDING', (0, 0), (-1, -1), 6),
]))
story.append(matched_table)
story.append(Spacer(1, 24))
story.append(Paragraph("Generated by TrueWrite Scan • https://gopalkrushnamahapatra-truewrite-scan.static.hf.space", styles['Footer']))
doc.build(story)
def generate_report(report_type: str, out_dir: str = "/tmp", **kwargs) -> str:
"""
report_type: "ai" | "grammar" | "plagiarism"
kwargs expected:
- title_text: str
- tiles: list of 4 dicts [{'value': '12%', 'label': 'Plagiarism'}, ...]
- counts: dict {'Words': 950, ...}
- sections: list [{'heading':'','paragraphs':[...]}]
- matched_sources: list [{'title','url','similarity'}]
- footer_text: str
Returns: path to generated PDF
"""
os.makedirs(out_dir, exist_ok=True)
filename = f"{report_type}_report_{uuid.uuid4().hex[:8]}.pdf"
filepath = os.path.join(out_dir, filename)
title_text = kwargs.get('title_text', "Report")
tiles = kwargs.get('tiles') or [
{'value': '0%', 'label': 'Plagiarism'},
{'value': '0%', 'label': 'Exact Match'},
{'value': '0%', 'label': 'Partial Match'},
{'value': '100%', 'label': 'Unique'},
]
counts = kwargs.get('counts') or {}
sections = kwargs.get('sections') or []
matched_sources = kwargs.get('matched_sources') or []
footer_text = kwargs.get('footer_text') or ''
_build_doc(filepath, title_text, tiles, counts, sections, matched_sources, footer_text)
return filepath