feat: update Gemini models — migrate 3-pro to 3.1-pro, add 3.1-flash-lite, remove old SDK
Some checks are pending
NordaBiz Tests / Unit & Integration Tests (push) Waiting to run
NordaBiz Tests / E2E Tests (Playwright) (push) Blocked by required conditions
NordaBiz Tests / Smoke Tests (Production) (push) Blocked by required conditions
NordaBiz Tests / Send Failure Notification (push) Blocked by required conditions
Some checks are pending
NordaBiz Tests / Unit & Integration Tests (push) Waiting to run
NordaBiz Tests / E2E Tests (Playwright) (push) Blocked by required conditions
NordaBiz Tests / Smoke Tests (Production) (push) Blocked by required conditions
NordaBiz Tests / Send Failure Notification (push) Blocked by required conditions
- Replace gemini-3-pro-preview with gemini-3.1-pro-preview (old deprecated March 9) - Add gemini-3.1-flash-lite-preview as quality fallback in chain - Remove last google.generativeai import from zopk_knowledge_service.py - Update pricing, thinking models, and preview models sets - Keep '3-pro' alias for backward compatibility across codebase Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
parent
917d686a10
commit
3bc69f9455
@ -967,7 +967,7 @@ def _log_json_parse_failure(company_id: int, audit_type: str, details: str):
|
||||
try:
|
||||
log = AIUsageLog(
|
||||
request_type='audit_json_parse_failure',
|
||||
model='gemini-3-pro-preview',
|
||||
model='gemini-3.1-pro-preview',
|
||||
tokens_input=0,
|
||||
tokens_output=0,
|
||||
cost_cents=0,
|
||||
|
||||
@ -195,7 +195,7 @@ def chat_send_message(conversation_id):
|
||||
# Map model choice to actual model name and thinking level
|
||||
model_map = {
|
||||
'flash': '3-flash', # Gemini 3 Flash - 10K RPD, thinking mode
|
||||
'pro': '3-pro' # Gemini 3 Pro - 250 RPD, premium
|
||||
'pro': '3-pro' # Gemini 3.1 Pro - premium reasoning
|
||||
}
|
||||
thinking_map = {
|
||||
'flash': 'high',
|
||||
|
||||
@ -38,27 +38,29 @@ except ImportError:
|
||||
|
||||
# Available Gemini models (2026 - Gemini 3 generation available)
|
||||
GEMINI_MODELS = {
|
||||
'flash': 'gemini-2.5-flash', # Best for general use - balanced cost/quality
|
||||
'flash-lite': 'gemini-2.5-flash-lite', # Ultra cheap - $0.10/$0.40 per 1M tokens
|
||||
'pro': 'gemini-2.5-pro', # High quality - best reasoning/coding
|
||||
'flash-2.0': 'gemini-2.0-flash', # Second generation - 1M context window (wycofywany 31.03.2026)
|
||||
'3-flash': 'gemini-3-flash-preview', # Gemini 3 Flash - 7x lepszy reasoning, thinking mode
|
||||
'3-pro': 'gemini-3-pro-preview', # Gemini 3 Pro - najlepszy reasoning, 2M context
|
||||
'flash': 'gemini-2.5-flash', # Balanced cost/quality
|
||||
'flash-lite': 'gemini-2.5-flash-lite', # Ultra cheap - $0.10/$0.40 per 1M tokens
|
||||
'pro': 'gemini-2.5-pro', # High quality 2.5 gen
|
||||
'flash-2.0': 'gemini-2.0-flash', # Second generation (wycofywany 31.03.2026)
|
||||
'3-flash': 'gemini-3-flash-preview', # Gemini 3 Flash - thinking mode
|
||||
'3-pro': 'gemini-3.1-pro-preview', # Gemini 3.1 Pro (alias zachowany dla kompatybilności)
|
||||
'3.1-pro': 'gemini-3.1-pro-preview', # Gemini 3.1 Pro - najlepszy reasoning
|
||||
'3.1-flash-lite': 'gemini-3.1-flash-lite-preview', # Gemini 3.1 Flash Lite - szybki, tani
|
||||
}
|
||||
|
||||
# Models that support thinking mode
|
||||
THINKING_MODELS = {'gemini-3-flash-preview', 'gemini-3-pro-preview'}
|
||||
THINKING_MODELS = {'gemini-3-flash-preview', 'gemini-3.1-pro-preview', 'gemini-3.1-flash-lite-preview'}
|
||||
|
||||
# Preview models — monitor for GA release to switch for better stability
|
||||
# Track at: https://ai.google.dev/gemini-api/docs/models
|
||||
PREVIEW_MODELS = {'gemini-3-flash-preview', 'gemini-3-pro-preview'}
|
||||
PREVIEW_MODELS = {'gemini-3-flash-preview', 'gemini-3.1-pro-preview', 'gemini-3.1-flash-lite-preview'}
|
||||
|
||||
# Fallback chain for rate limit (429) resilience — Paid Tier 1
|
||||
# Order: primary → fast fallback → backup
|
||||
# Order: primary → quality fallback → cheapest fallback
|
||||
MODEL_FALLBACK_CHAIN = [
|
||||
'gemini-3-flash-preview', # 10K RPD paid tier - thinking mode
|
||||
'gemini-2.5-flash-lite', # Unlimited RPD paid tier - fast fallback
|
||||
'gemini-2.5-flash', # 10K RPD paid tier - backup
|
||||
'gemini-3-flash-preview', # 10K RPD - thinking mode, primary
|
||||
'gemini-3.1-flash-lite-preview', # Quality fallback - gen 3.1
|
||||
'gemini-2.5-flash-lite', # Unlimited RPD - cheapest, last resort
|
||||
]
|
||||
|
||||
# Available thinking levels for Gemini 3 Flash
|
||||
@ -77,7 +79,8 @@ GEMINI_PRICING = {
|
||||
'gemini-2.5-pro': {'input': 1.25, 'output': 10.00, 'thinking': 0},
|
||||
'gemini-2.0-flash': {'input': 0.10, 'output': 0.40, 'thinking': 0},
|
||||
'gemini-3-flash-preview': {'input': 0.50, 'output': 3.00, 'thinking': 1.00}, # Paid tier
|
||||
'gemini-3-pro-preview': {'input': 2.00, 'output': 12.00, 'thinking': 4.00}, # Paid tier
|
||||
'gemini-3.1-pro-preview': {'input': 2.00, 'output': 12.00, 'thinking': 4.00}, # Paid tier
|
||||
'gemini-3.1-flash-lite-preview': {'input': 0.25, 'output': 1.50, 'thinking': 0}, # Paid tier
|
||||
}
|
||||
|
||||
|
||||
|
||||
@ -2653,7 +2653,7 @@ def categorize_milestones_with_ai(
|
||||
|
||||
Adds AI-improved titles, categories, and extracts dates more accurately.
|
||||
"""
|
||||
import google.generativeai as genai
|
||||
from gemini_service import GeminiService
|
||||
import json
|
||||
|
||||
if not suggestions:
|
||||
@ -2684,11 +2684,11 @@ Odpowiedz TYLKO jako JSON array:
|
||||
[{{"id": 1, "category": "...", "short_title": "...", "target_date": "YYYY-MM-DD lub null", "status": "..."}}]"""
|
||||
|
||||
try:
|
||||
model = genai.GenerativeModel(model_name)
|
||||
response = model.generate_content(prompt)
|
||||
service = GeminiService(model=model_name)
|
||||
response_text = service.generate_text(prompt)
|
||||
|
||||
# Parse response
|
||||
response_text = response.text.strip()
|
||||
response_text = response_text.strip()
|
||||
if response_text.startswith('```'):
|
||||
response_text = response_text.split('```')[1]
|
||||
if response_text.startswith('json'):
|
||||
|
||||
Loading…
Reference in New Issue
Block a user