feat(ai): Upgrade to Gemini 3 Flash + add 503 fallback resilience
Some checks are pending
NordaBiz Tests / Unit & Integration Tests (push) Waiting to run
NordaBiz Tests / E2E Tests (Playwright) (push) Blocked by required conditions
NordaBiz Tests / Smoke Tests (Production) (push) Blocked by required conditions
NordaBiz Tests / Send Failure Notification (push) Blocked by required conditions
Some checks are pending
NordaBiz Tests / Unit & Integration Tests (push) Waiting to run
NordaBiz Tests / E2E Tests (Playwright) (push) Blocked by required conditions
NordaBiz Tests / Smoke Tests (Production) (push) Blocked by required conditions
NordaBiz Tests / Send Failure Notification (push) Blocked by required conditions
- Switch primary model from flash-lite (2.5) to 3-flash (Gemini 3 Flash Preview) for better reasoning and thinking mode across all AI features - Add _is_retryable() method to handle 503 UNAVAILABLE (server overload) in addition to existing 429 rate limit fallback - Fallback chain: 3-flash → 2.5-flash-lite → 2.5-flash Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
parent
aa49c18f7a
commit
3d26ea6119
2
app.py
2
app.py
@ -308,7 +308,7 @@ login_manager.login_message = 'Zaloguj się, aby uzyskać dostęp do tej strony.
|
||||
|
||||
# Initialize Gemini service
|
||||
try:
|
||||
gemini_service.init_gemini_service(model='flash-lite') # Paid tier: Unlimited RPD, fallback: 3-flash (10K RPD) → 2.5-flash (10K RPD)
|
||||
gemini_service.init_gemini_service(model='3-flash') # Paid tier: 10K RPD, thinking mode, fallback: 2.5-flash-lite (Unlimited) → 2.5-flash (10K)
|
||||
logger.info("Gemini service initialized successfully")
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to initialize Gemini service: {e}")
|
||||
|
||||
@ -193,6 +193,13 @@ class GeminiService:
|
||||
error_str = str(error)
|
||||
return '429' in error_str or 'RESOURCE_EXHAUSTED' in error_str
|
||||
|
||||
@staticmethod
|
||||
def _is_retryable(error: Exception) -> bool:
|
||||
"""Check if error is retryable (rate limit or server overload)."""
|
||||
error_str = str(error)
|
||||
return ('429' in error_str or 'RESOURCE_EXHAUSTED' in error_str or
|
||||
'503' in error_str or 'UNAVAILABLE' in error_str)
|
||||
|
||||
def _build_generation_config(self, model: str, temperature: float,
|
||||
max_tokens: Optional[int],
|
||||
thinking_level: Optional[str]) -> types.GenerateContentConfig:
|
||||
@ -324,12 +331,12 @@ class GeminiService:
|
||||
return response_text
|
||||
|
||||
except Exception as e:
|
||||
if self._is_rate_limited(e) and model != models_to_try[-1]:
|
||||
logger.warning(f"Rate limited on {model}, trying next fallback...")
|
||||
if self._is_retryable(e) and model != models_to_try[-1]:
|
||||
logger.warning(f"Retryable error on {model} ({type(e).__name__}), trying next fallback...")
|
||||
last_error = e
|
||||
continue
|
||||
|
||||
# Non-429 error or last model in chain — fail
|
||||
# Non-retryable error or last model in chain — fail
|
||||
latency_ms = int((time.time() - start_time) * 1000)
|
||||
|
||||
self._log_api_cost(
|
||||
|
||||
Loading…
Reference in New Issue
Block a user