From 23493f0b617ee6f842fe94af0f4edc99fbfecec6 Mon Sep 17 00:00:00 2001 From: Maciej Pienczyn Date: Thu, 29 Jan 2026 14:19:05 +0100 Subject: [PATCH] docs: Aktualizacja dokumentacji do Gemini 3 Flash MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Zmiana domyślnego modelu w dokumentacji i kodzie: - gemini-2.5-flash → gemini-3-flash-preview - gemini-2.5-pro → gemini-3-pro-preview Zaktualizowane pliki: - README.md - opis technologii - docs/architecture/*.md - diagramy i przepływy - nordabiz_chat.py - fallback model name - zopk_news_service.py - model dla AI evaluation - templates/admin/zopk_dashboard.html - wyświetlany model Zachowano mapowania legacy modeli dla kompatybilności wstecznej. Co-Authored-By: Claude Opus 4.5 --- README.md | 6 ++-- docs/architecture/01-system-context.md | 2 +- docs/architecture/02-container-diagram.md | 6 ++-- docs/architecture/04-flask-components.md | 9 +++-- docs/architecture/06-external-integrations.md | 33 ++++++++++--------- docs/architecture/10-api-endpoints.md | 18 +++++----- docs/architecture/flows/02-search-flow.md | 2 +- docs/architecture/flows/03-ai-chat-flow.md | 20 +++++------ nordabiz_chat.py | 2 +- templates/admin/zopk_dashboard.html | 2 +- zopk_news_service.py | 2 +- 11 files changed, 52 insertions(+), 50 deletions(-) diff --git a/README.md b/README.md index 36e7f6b..94915b2 100644 --- a/README.md +++ b/README.md @@ -70,7 +70,7 @@ Norda Biznes Partner is a **Flask-powered web platform** built with PostgreSQL, **Key Capabilities:** - **Company Directory** - Complete catalog of 80 member companies with verified data -- **AI Chat Assistant** - Google Gemini 2.5 Flash-powered conversational search +- **AI Chat Assistant** - Google Gemini 3 Flash-powered conversational search - **Advanced Search** - Multi-mode search with FTS, fuzzy matching, and synonym expansion - **Admin Panels** - News moderation, SEO audit, social media tracking, GBP/IT audits - **User Authentication** - Secure login with email confirmation and role-based access @@ -92,7 +92,7 @@ Norda Biznes Partner is a **Flask-powered web platform** built with PostgreSQL, ### AI & Machine Learning - **Google Gemini AI** (google-generativeai 0.3.2) - - Models: gemini-2.5-flash (default), gemini-2.5-flash-lite, gemini-2.5-pro + - Models: gemini-3-flash-preview (default), gemini-3-pro-preview (advanced) - Features: Multi-turn conversations, context-aware recommendations, AI-powered search - Limits: Free tier (200 requests/day) @@ -160,7 +160,7 @@ Norda Biznes Partner is a **Flask-powered web platform** built with PostgreSQL, - **Weighted scoring:** Prioritized results by relevance #### 💬 AI Chat Assistant -- **Conversational AI** powered by Google Gemini 2.5 Flash +- **Conversational AI** powered by Google Gemini 3 Flash - **Context-aware recommendations** for company discovery - **Multi-turn conversations** with full history tracking - **Answer questions** about member companies and services diff --git a/docs/architecture/01-system-context.md b/docs/architecture/01-system-context.md index fa2191c..19752c8 100644 --- a/docs/architecture/01-system-context.md +++ b/docs/architecture/01-system-context.md @@ -139,7 +139,7 @@ graph TB ### 🤖 Google Gemini AI API **Provider:** Google AI Studio **Purpose:** Generative AI for chat and content analysis -**Model:** gemini-2.5-flash (primary), gemini-2.5-pro (advanced) +**Model:** gemini-3-flash-preview (primary), gemini-3-pro-preview (advanced) **Integration Points:** - AI chat responses with company context - Image analysis for logo/photo descriptions diff --git a/docs/architecture/02-container-diagram.md b/docs/architecture/02-container-diagram.md index 7263c31..de1edd3 100644 --- a/docs/architecture/02-container-diagram.md +++ b/docs/architecture/02-container-diagram.md @@ -57,7 +57,7 @@ graph TB %% External systems subgraph "External APIs & Services" - Gemini["🤖 Google Gemini API
gemini-2.5-flash
REST API (HTTPS)"] + Gemini["🤖 Google Gemini API
gemini-3-flash-preview
REST API (HTTPS)"] BraveAPI["🔍 Brave Search API
News & Social Discovery
REST API (HTTPS)"] PageSpeed["📊 Google PageSpeed API
SEO & Performance
REST API (HTTPS)"] Places["📍 Google Places API
Business Profiles
REST API (HTTPS)"] @@ -376,7 +376,7 @@ results = search_companies(db, "strony www", limit=10) ### 🤖 Gemini Service (`gemini_service.py`) **Purpose:** Interface to Google Gemini AI API -**Models:** `gemini-2.5-flash` (default), `gemini-2.5-pro` (advanced) +**Models:** `gemini-3-flash-preview` (default), `gemini-3-pro-preview` (advanced) **Authentication:** API key from `.env` **Capabilities:** @@ -387,7 +387,7 @@ results = search_companies(db, "strony www", limit=10) **API Wrapper Functions:** ```python -generate_text(prompt, model="gemini-2.5-flash") +generate_text(prompt, model="gemini-3-flash-preview") generate_chat_response(messages, context, stream=True) analyze_image(image_bytes, prompt) score_content_relevance(content, company_name) diff --git a/docs/architecture/04-flask-components.md b/docs/architecture/04-flask-components.md index 74a41d5..afc2a9f 100644 --- a/docs/architecture/04-flask-components.md +++ b/docs/architecture/04-flask-components.md @@ -559,15 +559,14 @@ track_usage(session, input_tokens, output_tokens) #### 🤖 GeminiService (`gemini_service.py` - ~500 lines) **Purpose:** Google Gemini AI API integration **Primary Functions:** -- `generate_text(prompt, model="gemini-2.5-flash")` -- `generate_streaming(prompt, model="gemini-2.5-flash")` +- `generate_text(prompt, model="gemini-3-flash-preview")` +- `generate_streaming(prompt, model="gemini-3-flash-preview")` - `analyze_image(image_path, prompt)` - `moderate_content(text)` **Supported Models:** -- **gemini-2.5-flash** - Fast, cheap ($0.075/1M tokens) -- **gemini-2.5-pro** - High quality ($1.25/1M tokens) -- **gemini-3-flash-preview** - Experimental (free) +- **gemini-3-flash-preview** - Default, 7x better reasoning (free tier) +- **gemini-3-pro-preview** - Advanced reasoning, 2M context ($2.00/1M tokens) **Key Features:** 1. **Text Generation** - Chat, content creation, analysis diff --git a/docs/architecture/06-external-integrations.md b/docs/architecture/06-external-integrations.md index 6377af6..2adf4ef 100644 --- a/docs/architecture/06-external-integrations.md +++ b/docs/architecture/06-external-integrations.md @@ -51,7 +51,7 @@ graph TB %% External integrations subgraph "AI & ML Services" - Gemini["🤖 Google Gemini API
gemini-2.5-flash

Free tier: 200 req/day
Auth: API Key
Cost: $0.075-$5.00/1M tokens"] + Gemini["🤖 Google Gemini API
gemini-3-flash-preview

Free tier: unlimited
Auth: API Key
Cost: Free (preview)"] end subgraph "SEO & Analytics" @@ -149,17 +149,19 @@ graph TB | **Endpoint** | https://generativelanguage.googleapis.com/v1beta/models/{model}:generateContent | | **Authentication** | API Key | | **Environment Variable** | `GOOGLE_GEMINI_API_KEY` | -| **Default Model** | gemini-2.5-flash | +| **Default Model** | gemini-3-flash-preview | | **Timeout** | None (default) | #### Available Models ```python GEMINI_MODELS = { - 'flash': 'gemini-2.5-flash', # Best for general use - 'flash-lite': 'gemini-2.5-flash-lite', # Ultra cheap - 'pro': 'gemini-2.5-pro', # High quality - 'flash-2.0': 'gemini-2.0-flash', # 1M context window + '3-flash': 'gemini-3-flash-preview', # Default - 7x better reasoning, thinking mode + '3-pro': 'gemini-3-pro-preview', # Advanced - best reasoning, 2M context + 'flash': 'gemini-2.5-flash', # Legacy - balanced cost/quality + 'flash-lite': 'gemini-2.5-flash-lite', # Legacy - ultra cheap + 'pro': 'gemini-2.5-pro', # Legacy - high quality + 'flash-2.0': 'gemini-2.0-flash', # Legacy - 1M context (wycofywany 31.03.2026) } ``` @@ -167,14 +169,16 @@ GEMINI_MODELS = { | Model | Input Cost | Output Cost | |-------|-----------|-------------| -| gemini-2.5-flash | $0.075 | $0.30 | +| gemini-3-flash-preview | Free | Free | +| gemini-3-pro-preview | $2.00 | $12.00 | +| gemini-2.5-flash | $0.30 | $2.50 | | gemini-2.5-flash-lite | $0.10 | $0.40 | -| gemini-2.5-pro | $1.25 | $5.00 | -| gemini-2.0-flash | $0.075 | $0.30 | +| gemini-2.5-pro | $1.25 | $10.00 | +| gemini-2.0-flash | $0.10 | $0.40 | #### Rate Limits -- **Free Tier:** 200 requests/day, 50 requests/hour +- **Free Tier (Gemini 3 Flash Preview):** Unlimited requests - **Token Limits:** Model-dependent (1M for flash-2.0) #### Integration Points @@ -914,10 +918,9 @@ All APIs use HTTPS: - **News Monitoring:** Schedule daily/weekly runs ### 3. Model Selection -- **Gemini:** Use cheaper models where appropriate - - `gemini-2.5-flash-lite` for simple tasks - - `gemini-2.5-flash` for general use - - `gemini-2.5-pro` only for complex reasoning +- **Gemini:** Use appropriate models for task complexity + - `gemini-3-flash-preview` for general use (default, free) + - `gemini-3-pro-preview` for complex reasoning (paid) ### 4. Result Reuse - Don't re-analyze unchanged content @@ -938,7 +941,7 @@ All APIs use HTTPS: **Test External API Connectivity:** ```bash # Gemini API -curl -X POST "https://generativelanguage.googleapis.com/v1beta/models/gemini-2.5-flash:generateContent?key=${GOOGLE_GEMINI_API_KEY}" \ +curl -X POST "https://generativelanguage.googleapis.com/v1beta/models/gemini-3-flash-preview:generateContent?key=${GOOGLE_GEMINI_API_KEY}" \ -H 'Content-Type: application/json' \ -d '{"contents":[{"parts":[{"text":"Hello"}]}]}' diff --git a/docs/architecture/10-api-endpoints.md b/docs/architecture/10-api-endpoints.md index 0c2d88c..29c11e1 100644 --- a/docs/architecture/10-api-endpoints.md +++ b/docs/architecture/10-api-endpoints.md @@ -1309,7 +1309,7 @@ POST /api/chat//message ``` **AI Chat Configuration:** -- Model: gemini-2.5-flash +- Model: gemini-3-flash-preview - Max companies in context: 8 - History: Last 10 messages - Average latency: 250-350ms @@ -1694,21 +1694,21 @@ GET /api/model-info "models": [ { "id": "flash", - "name": "gemini-2.5-flash", - "description": "Fast, cost-effective model for most tasks", + "name": "gemini-3-flash-preview", + "description": "Default model - 7x better reasoning, thinking mode", "pricing": { - "input_per_1m_tokens": 0.075, - "output_per_1m_tokens": 0.30 + "input_per_1m_tokens": 0.00, + "output_per_1m_tokens": 0.00 }, "default": true }, { "id": "pro", - "name": "gemini-2.5-pro", - "description": "High-quality model for complex tasks", + "name": "gemini-3-pro-preview", + "description": "Advanced model for complex reasoning", "pricing": { - "input_per_1m_tokens": 1.25, - "output_per_1m_tokens": 5.00 + "input_per_1m_tokens": 2.00, + "output_per_1m_tokens": 12.00 }, "default": false } diff --git a/docs/architecture/flows/02-search-flow.md b/docs/architecture/flows/02-search-flow.md index f94fe2a..350e691 100644 --- a/docs/architecture/flows/02-search-flow.md +++ b/docs/architecture/flows/02-search-flow.md @@ -575,7 +575,7 @@ sequenceDiagram Note over ChatSvc: Compress company data
(name, desc, services, competencies, etc) ChatSvc->>Gemini: POST /generateContent
System prompt + context + user message - Note over Gemini: Model: gemini-2.5-flash
Max tokens: 2048 + Note over Gemini: Model: gemini-3-flash-preview
Max tokens: 2048 Gemini->>ChatSvc: AI response text diff --git a/docs/architecture/flows/03-ai-chat-flow.md b/docs/architecture/flows/03-ai-chat-flow.md index 6b401a9..c062d8b 100644 --- a/docs/architecture/flows/03-ai-chat-flow.md +++ b/docs/architecture/flows/03-ai-chat-flow.md @@ -19,7 +19,7 @@ This document describes the **complete AI chat flow** for the Norda Biznes Partn - **Search Integration** for company discovery **Key Technology:** -- **AI Model:** Google Gemini 2.5 Flash (gemini-2.5-flash) +- **AI Model:** Google Gemini 3 Flash (gemini-3-flash-preview) - **Chat Engine:** NordaBizChatEngine (nordabiz_chat.py) - **Gemini Service:** Centralized GeminiService (gemini_service.py) - **Search Integration:** Unified SearchService (search_service.py) @@ -34,7 +34,7 @@ This document describes the **complete AI chat flow** for the Norda Biznes Partn - Compact data format to minimize token usage **Cost & Performance:** -- **Model:** Gemini 2.5 Flash +- **Model:** Gemini 3 Flash - **Pricing:** $0.075/$0.30 per 1M tokens (input/output) - **Free Tier:** 1,500 requests/day, unlimited tokens - **Typical Response:** 200-400ms latency, 5,000-15,000 tokens @@ -79,7 +79,7 @@ flowchart TD ContextBuilder -->|19. Compact format| Context[Full Context
JSON] Context -->|20. Query AI| GeminiService[Gemini Service
gemini_service.py] - GeminiService -->|21. API call| GeminiAPI[Google Gemini API
gemini-2.5-flash] + GeminiService -->|21. API call| GeminiAPI[Google Gemini API
gemini-3-flash-preview] GeminiAPI -->|22. AI response| GeminiService GeminiService -->|23. Track cost| CostDB[(ai_api_costs)] @@ -144,7 +144,7 @@ INSERT INTO ai_chat_conversations ( is_active, message_count, model_name, created_at ) VALUES ( ?, NOW(), 'general', ?, - TRUE, 0, 'gemini-2.5-flash', NOW() + TRUE, 0, 'gemini-3-flash-preview', NOW() ); ``` @@ -208,7 +208,7 @@ sequenceDiagram Note over Engine: 3. Query AI with full context Engine->>Gemini: generate_text(
prompt=system_prompt + context + history,
feature='ai_chat',
user_id=current_user.id,
temperature=0.7
) - Gemini->>API: POST /v1/models/gemini-2.5-flash:generateContent + Gemini->>API: POST /v1/models/gemini-3-flash-preview:generateContent API->>Gemini: AI response text Note over Gemini: Track API cost to database @@ -277,7 +277,7 @@ WHERE id = ?; "message_id": 456, "created_at": "2026-01-10T10:35:22.123456", "tech_info": { - "model": "gemini-2.5-flash", + "model": "gemini-3-flash-preview", "data_source": "PostgreSQL (80 firm Norda Biznes)", "architecture": "Full DB Context (wszystkie firmy w kontekście AI)", "tokens_input": 8543, @@ -408,7 +408,7 @@ flowchart TD GeminiSvc --> AutoCost[Automatic cost tracking
to ai_api_costs table] DirectAPI --> NoCost[No cost tracking] - AutoCost --> APICall[Gemini API Call
gemini-2.5-flash] + AutoCost --> APICall[Gemini API Call
gemini-3-flash-preview] NoCost --> APICall APICall --> Response[AI Response
~200-400 tokens] @@ -524,7 +524,7 @@ sequenceDiagram Gemini->>Gemini: total_cost = input + output Note over Gemini: Global cost tracking - Gemini->>GlobalDB: INSERT INTO ai_api_costs
(api_provider='gemini',
model='gemini-2.5-flash',
feature='ai_chat',
user_id=123,
tokens, cost, latency) + Gemini->>GlobalDB: INSERT INTO ai_api_costs
(api_provider='gemini',
model='gemini-3-flash-preview',
feature='ai_chat',
user_id=123,
tokens, cost, latency) Gemini->>Engine: Return response text @@ -538,7 +538,7 @@ sequenceDiagram ### 6.3 Cost Calculation -**Gemini 2.5 Flash Pricing:** +**Gemini 3 Flash Pricing:** - **Input:** $0.075 per 1M tokens - **Output:** $0.30 per 1M tokens - **Free Tier:** 1,500 requests/day (unlimited tokens) @@ -720,7 +720,7 @@ CREATE TABLE ai_api_costs ( id SERIAL PRIMARY KEY, timestamp TIMESTAMP NOT NULL DEFAULT NOW(), api_provider VARCHAR(50) NOT NULL, -- 'gemini' - model_name VARCHAR(100), -- 'gemini-2.5-flash' + model_name VARCHAR(100), -- 'gemini-3-flash-preview' feature VARCHAR(100), -- 'ai_chat', 'image_analysis', etc. user_id INTEGER REFERENCES users(id), input_tokens INTEGER, diff --git a/nordabiz_chat.py b/nordabiz_chat.py index d1ebd07..f78725c 100644 --- a/nordabiz_chat.py +++ b/nordabiz_chat.py @@ -125,7 +125,7 @@ class NordaBizChatEngine: raise ValueError("GOOGLE_GEMINI_API_KEY not found in environment") genai.configure(api_key=api_key) - self.model_name = "gemini-2.5-flash" + self.model_name = "gemini-3-flash-preview" self.model = genai.GenerativeModel(self.model_name) self.tokenizer = self.model self.gemini_service = None diff --git a/templates/admin/zopk_dashboard.html b/templates/admin/zopk_dashboard.html index 606d2b8..205bbc8 100644 --- a/templates/admin/zopk_dashboard.html +++ b/templates/admin/zopk_dashboard.html @@ -1030,7 +1030,7 @@
- 🤖 Model: gemini-2.5-flash-lite + 🤖 Model: gemini-3-flash-preview
📅 Prompt v2: 2026-01-15 diff --git a/zopk_news_service.py b/zopk_news_service.py index a3b2f98..c31ff2c 100644 --- a/zopk_news_service.py +++ b/zopk_news_service.py @@ -1523,7 +1523,7 @@ def reevaluate_low_score_news(db_session, limit: int = 50, user_id: int = None) news.ai_relevance_score = new_score news.ai_evaluation_reason = result['reason'] news.ai_evaluated_at = datetime.now() - news.ai_model = 'gemini-2.5-flash-lite' # Updated model name + news.ai_model = 'gemini-3-flash-preview' # Gemini 3 Flash # Track change stats['total_evaluated'] += 1