feat: Use LibreOffice for DOCX to PDF conversion in board document preview
Some checks are pending
NordaBiz Tests / Unit & Integration Tests (push) Waiting to run
NordaBiz Tests / E2E Tests (Playwright) (push) Blocked by required conditions
NordaBiz Tests / Smoke Tests (Production) (push) Blocked by required conditions
NordaBiz Tests / Send Failure Notification (push) Blocked by required conditions

- Converts DOCX/DOC to PDF using soffice --headless
- Caches converted PDFs alongside originals
- Falls back to mammoth HTML if LibreOffice fails
- Preserves full document formatting and graphics

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
Maciej Pienczyn 2026-02-03 19:05:32 +01:00
parent 4c20e17855
commit e41187478b

View File

@ -13,6 +13,9 @@ Endpoints:
"""
import os
import subprocess
import tempfile
import shutil
from datetime import datetime
from flask import (
render_template, request, redirect, url_for, flash,
@ -27,6 +30,48 @@ from utils.decorators import rada_member_required, office_manager_required
from services.document_upload_service import DocumentUploadService
def convert_docx_to_pdf(docx_path: str) -> str | None:
"""
Convert DOCX to PDF using LibreOffice headless.
Returns path to generated PDF or None on failure.
PDF is stored alongside the original with .pdf extension.
"""
# Generate PDF path (same location, .pdf extension)
pdf_path = docx_path.rsplit('.', 1)[0] + '.pdf'
# Check if PDF already exists (cached)
if os.path.exists(pdf_path):
return pdf_path
# Convert using LibreOffice
try:
output_dir = os.path.dirname(docx_path)
result = subprocess.run(
[
'soffice',
'--headless',
'--convert-to', 'pdf',
'--outdir', output_dir,
docx_path
],
capture_output=True,
text=True,
timeout=60 # 60 seconds timeout
)
if result.returncode == 0 and os.path.exists(pdf_path):
# Ensure www-data can read it
os.chmod(pdf_path, 0o644)
return pdf_path
else:
return None
except subprocess.TimeoutExpired:
return None
except Exception:
return None
@bp.route('/')
@login_required
@rada_member_required
@ -126,30 +171,41 @@ def view(doc_id):
)
elif document.file_extension in ('docx', 'doc'):
# Convert DOCX to HTML using mammoth
try:
import mammoth
# Convert DOCX to PDF using LibreOffice (preserves formatting)
pdf_path = convert_docx_to_pdf(file_path)
with open(file_path, 'rb') as docx_file:
result = mammoth.convert_to_html(docx_file)
html_content = result.value
# Render in template with styling
return render_template(
'board/view_document.html',
document=document,
html_content=html_content,
conversion_messages=result.messages
if pdf_path and os.path.exists(pdf_path):
# Serve the converted PDF inline
pdf_filename = document.original_filename.rsplit('.', 1)[0] + '.pdf'
return send_file(
pdf_path,
as_attachment=False,
download_name=pdf_filename,
mimetype='application/pdf'
)
else:
# Fallback to mammoth HTML conversion
current_app.logger.warning(
f"LibreOffice conversion failed for {document.title}, falling back to mammoth"
)
try:
import mammoth
except ImportError:
current_app.logger.error("mammoth library not installed")
flash('Podgląd dokumentów DOCX nie jest dostępny.', 'error')
return redirect(url_for('board.index'))
except Exception as e:
current_app.logger.error(f"Failed to convert DOCX: {e}")
flash('Błąd podczas konwersji dokumentu.', 'error')
return redirect(url_for('board.index'))
with open(file_path, 'rb') as docx_file:
result = mammoth.convert_to_html(docx_file)
html_content = result.value
return render_template(
'board/view_document.html',
document=document,
html_content=html_content,
conversion_messages=result.messages
)
except Exception as e:
current_app.logger.error(f"Failed to convert DOCX: {e}")
flash('Błąd podczas konwersji dokumentu.', 'error')
return redirect(url_for('board.index'))
else:
# Unknown format - redirect to download