feat: Use LibreOffice for DOCX to PDF conversion in board document preview
Some checks are pending
NordaBiz Tests / Unit & Integration Tests (push) Waiting to run
NordaBiz Tests / E2E Tests (Playwright) (push) Blocked by required conditions
NordaBiz Tests / Smoke Tests (Production) (push) Blocked by required conditions
NordaBiz Tests / Send Failure Notification (push) Blocked by required conditions
Some checks are pending
NordaBiz Tests / Unit & Integration Tests (push) Waiting to run
NordaBiz Tests / E2E Tests (Playwright) (push) Blocked by required conditions
NordaBiz Tests / Smoke Tests (Production) (push) Blocked by required conditions
NordaBiz Tests / Send Failure Notification (push) Blocked by required conditions
- Converts DOCX/DOC to PDF using soffice --headless - Caches converted PDFs alongside originals - Falls back to mammoth HTML if LibreOffice fails - Preserves full document formatting and graphics Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
parent
4c20e17855
commit
e41187478b
@ -13,6 +13,9 @@ Endpoints:
|
||||
"""
|
||||
|
||||
import os
|
||||
import subprocess
|
||||
import tempfile
|
||||
import shutil
|
||||
from datetime import datetime
|
||||
from flask import (
|
||||
render_template, request, redirect, url_for, flash,
|
||||
@ -27,6 +30,48 @@ from utils.decorators import rada_member_required, office_manager_required
|
||||
from services.document_upload_service import DocumentUploadService
|
||||
|
||||
|
||||
def convert_docx_to_pdf(docx_path: str) -> str | None:
|
||||
"""
|
||||
Convert DOCX to PDF using LibreOffice headless.
|
||||
Returns path to generated PDF or None on failure.
|
||||
PDF is stored alongside the original with .pdf extension.
|
||||
"""
|
||||
# Generate PDF path (same location, .pdf extension)
|
||||
pdf_path = docx_path.rsplit('.', 1)[0] + '.pdf'
|
||||
|
||||
# Check if PDF already exists (cached)
|
||||
if os.path.exists(pdf_path):
|
||||
return pdf_path
|
||||
|
||||
# Convert using LibreOffice
|
||||
try:
|
||||
output_dir = os.path.dirname(docx_path)
|
||||
result = subprocess.run(
|
||||
[
|
||||
'soffice',
|
||||
'--headless',
|
||||
'--convert-to', 'pdf',
|
||||
'--outdir', output_dir,
|
||||
docx_path
|
||||
],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=60 # 60 seconds timeout
|
||||
)
|
||||
|
||||
if result.returncode == 0 and os.path.exists(pdf_path):
|
||||
# Ensure www-data can read it
|
||||
os.chmod(pdf_path, 0o644)
|
||||
return pdf_path
|
||||
else:
|
||||
return None
|
||||
|
||||
except subprocess.TimeoutExpired:
|
||||
return None
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
|
||||
@bp.route('/')
|
||||
@login_required
|
||||
@rada_member_required
|
||||
@ -126,30 +171,41 @@ def view(doc_id):
|
||||
)
|
||||
|
||||
elif document.file_extension in ('docx', 'doc'):
|
||||
# Convert DOCX to HTML using mammoth
|
||||
try:
|
||||
import mammoth
|
||||
# Convert DOCX to PDF using LibreOffice (preserves formatting)
|
||||
pdf_path = convert_docx_to_pdf(file_path)
|
||||
|
||||
with open(file_path, 'rb') as docx_file:
|
||||
result = mammoth.convert_to_html(docx_file)
|
||||
html_content = result.value
|
||||
|
||||
# Render in template with styling
|
||||
return render_template(
|
||||
'board/view_document.html',
|
||||
document=document,
|
||||
html_content=html_content,
|
||||
conversion_messages=result.messages
|
||||
if pdf_path and os.path.exists(pdf_path):
|
||||
# Serve the converted PDF inline
|
||||
pdf_filename = document.original_filename.rsplit('.', 1)[0] + '.pdf'
|
||||
return send_file(
|
||||
pdf_path,
|
||||
as_attachment=False,
|
||||
download_name=pdf_filename,
|
||||
mimetype='application/pdf'
|
||||
)
|
||||
else:
|
||||
# Fallback to mammoth HTML conversion
|
||||
current_app.logger.warning(
|
||||
f"LibreOffice conversion failed for {document.title}, falling back to mammoth"
|
||||
)
|
||||
try:
|
||||
import mammoth
|
||||
|
||||
except ImportError:
|
||||
current_app.logger.error("mammoth library not installed")
|
||||
flash('Podgląd dokumentów DOCX nie jest dostępny.', 'error')
|
||||
return redirect(url_for('board.index'))
|
||||
except Exception as e:
|
||||
current_app.logger.error(f"Failed to convert DOCX: {e}")
|
||||
flash('Błąd podczas konwersji dokumentu.', 'error')
|
||||
return redirect(url_for('board.index'))
|
||||
with open(file_path, 'rb') as docx_file:
|
||||
result = mammoth.convert_to_html(docx_file)
|
||||
html_content = result.value
|
||||
|
||||
return render_template(
|
||||
'board/view_document.html',
|
||||
document=document,
|
||||
html_content=html_content,
|
||||
conversion_messages=result.messages
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
current_app.logger.error(f"Failed to convert DOCX: {e}")
|
||||
flash('Błąd podczas konwersji dokumentu.', 'error')
|
||||
return redirect(url_for('board.index'))
|
||||
|
||||
else:
|
||||
# Unknown format - redirect to download
|
||||
|
||||
Loading…
Reference in New Issue
Block a user