nordabiz/services/document_upload_service.py
Maciej Pienczyn 5030b71beb
Some checks are pending
NordaBiz Tests / Unit & Integration Tests (push) Waiting to run
NordaBiz Tests / E2E Tests (Playwright) (push) Blocked by required conditions
NordaBiz Tests / Smoke Tests (Production) (push) Blocked by required conditions
NordaBiz Tests / Send Failure Notification (push) Blocked by required conditions
chore: update Author to Maciej Pienczyn, InPi sp. z o.o. across all files
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-10 08:20:47 +02:00

238 lines
7.3 KiB
Python

"""
Board Document Upload Service
=============================
Secure file upload handling for Rada Izby (Board Council) documents.
Supports PDF, DOCX, DOC files up to 50MB.
Features:
- File type validation (magic bytes + extension)
- Size limits
- UUID-based filenames for security
- Date-organized storage structure
- Protected storage outside webroot
Author: Maciej Pienczyn, InPi sp. z o.o.
Created: 2026-02-03
"""
import os
import uuid
import logging
from datetime import datetime
from typing import Tuple, Optional
from werkzeug.datastructures import FileStorage
logger = logging.getLogger(__name__)
# Configuration
ALLOWED_EXTENSIONS = {'pdf', 'docx', 'doc'}
ALLOWED_MIME_TYPES = {
'application/pdf',
'application/msword',
'application/vnd.openxmlformats-officedocument.wordprocessingml.document'
}
MAX_FILE_SIZE = 50 * 1024 * 1024 # 50MB
# Storage path - OUTSIDE webroot for security
UPLOAD_BASE_PATH = '/data/board-docs'
# Magic bytes for document validation
DOCUMENT_SIGNATURES = {
b'%PDF': 'pdf', # PDF files
b'PK\x03\x04': 'docx', # DOCX (ZIP-based Office format)
b'\xd0\xcf\x11\xe0\xa1\xb1\x1a\xe1': 'doc', # DOC (OLE Compound Document)
}
# MIME type mapping
MIME_TYPES = {
'pdf': 'application/pdf',
'doc': 'application/msword',
'docx': 'application/vnd.openxmlformats-officedocument.wordprocessingml.document'
}
class DocumentUploadService:
"""Secure file upload service for board documents"""
@staticmethod
def validate_file(file: FileStorage) -> Tuple[bool, str]:
"""
Validate uploaded document file.
Args:
file: Werkzeug FileStorage object
Returns:
Tuple of (is_valid, error_message)
"""
# Check if file exists
if not file or file.filename == '':
return False, 'Nie wybrano pliku'
# Check extension
ext = file.filename.rsplit('.', 1)[-1].lower() if '.' in file.filename else ''
if ext not in ALLOWED_EXTENSIONS:
return False, f'Niedozwolony format pliku. Dozwolone: {", ".join(sorted(ALLOWED_EXTENSIONS))}'
# Check file size
file.seek(0, 2) # Seek to end
size = file.tell()
file.seek(0) # Reset to beginning
if size > MAX_FILE_SIZE:
return False, f'Plik jest za duży (max {MAX_FILE_SIZE // 1024 // 1024}MB)'
if size == 0:
return False, 'Plik jest pusty'
# Verify magic bytes (actual file type)
header = file.read(16)
file.seek(0)
detected_type = None
for signature, file_type in DOCUMENT_SIGNATURES.items():
if header.startswith(signature):
detected_type = file_type
break
if not detected_type:
return False, 'Plik nie jest prawidłowym dokumentem (PDF, DOCX lub DOC)'
# Check if extension matches detected type
if detected_type != ext:
# Allow docx detected as zip (PK signature)
if not (detected_type == 'docx' and ext == 'docx'):
return False, f'Rozszerzenie pliku ({ext}) nie odpowiada zawartości ({detected_type})'
return True, ''
@staticmethod
def generate_stored_filename(original_filename: str) -> str:
"""
Generate secure UUID-based filename preserving extension.
Args:
original_filename: Original filename from upload
Returns:
UUID-based filename with original extension
"""
ext = original_filename.rsplit('.', 1)[-1].lower() if '.' in original_filename else 'bin'
return f"{uuid.uuid4()}.{ext}"
@staticmethod
def get_upload_path() -> str:
"""
Get upload directory path with date-based organization.
Returns:
Full path to upload directory
"""
now = datetime.now()
path = os.path.join(UPLOAD_BASE_PATH, str(now.year), f"{now.month:02d}")
os.makedirs(path, exist_ok=True)
return path
@staticmethod
def save_file(file: FileStorage) -> Tuple[str, str, int, str]:
"""
Save document file securely.
Args:
file: Werkzeug FileStorage object
Returns:
Tuple of (stored_filename, file_path, file_size, mime_type)
"""
stored_filename = DocumentUploadService.generate_stored_filename(file.filename)
upload_dir = DocumentUploadService.get_upload_path()
file_path = os.path.join(upload_dir, stored_filename)
# Determine mime type
ext = stored_filename.rsplit('.', 1)[-1].lower()
mime_type = MIME_TYPES.get(ext, 'application/octet-stream')
# Save file
file.seek(0)
file.save(file_path)
file_size = os.path.getsize(file_path)
logger.info(f"Saved board document: {stored_filename} ({file_size} bytes)")
return stored_filename, file_path, file_size, mime_type
@staticmethod
def delete_file(stored_filename: str, uploaded_at: Optional[datetime] = None) -> bool:
"""
Delete document file from storage.
Args:
stored_filename: UUID-based filename
uploaded_at: Upload timestamp to determine path
Returns:
True if deleted, False otherwise
"""
if uploaded_at:
# Try exact path first
path = os.path.join(
UPLOAD_BASE_PATH,
str(uploaded_at.year), f"{uploaded_at.month:02d}",
stored_filename
)
if os.path.exists(path):
try:
os.remove(path)
logger.info(f"Deleted board document: {stored_filename}")
return True
except OSError as e:
logger.error(f"Failed to delete {stored_filename}: {e}")
return False
# Search in all date directories
for root, dirs, files in os.walk(UPLOAD_BASE_PATH):
if stored_filename in files:
try:
os.remove(os.path.join(root, stored_filename))
logger.info(f"Deleted board document: {stored_filename}")
return True
except OSError as e:
logger.error(f"Failed to delete {stored_filename}: {e}")
return False
logger.warning(f"Document not found for deletion: {stored_filename}")
return False
@staticmethod
def get_file_path(stored_filename: str, uploaded_at: datetime) -> str:
"""
Get full path to the stored file.
Args:
stored_filename: UUID-based filename
uploaded_at: Upload timestamp
Returns:
Full path to the file
"""
return os.path.join(
UPLOAD_BASE_PATH,
str(uploaded_at.year), f"{uploaded_at.month:02d}",
stored_filename
)
@staticmethod
def file_exists(stored_filename: str, uploaded_at: datetime) -> bool:
"""
Check if file exists in storage.
Args:
stored_filename: UUID-based filename
uploaded_at: Upload timestamp
Returns:
True if file exists, False otherwise
"""
path = DocumentUploadService.get_file_path(stored_filename, uploaded_at)
return os.path.exists(path)