diff --git a/CLAUDE.md b/CLAUDE.md index 5e44375..fabbf54 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -347,3 +347,92 @@ Szczegóły: `docs/DEVELOPMENT.md#audyt-seo` ## Plan rozwoju Roadmap, priorytety i strategia monetyzacji: `docs/ROADMAP.md` + +## NordaGPT - Konfiguracja AI + +### Aktualny model (stan: 2026-01-29) +- **Model:** `gemini-3-flash-preview` (Gemini 3 Flash Preview) +- **SDK:** `google-genai>=1.0.0` (nowy SDK z thinking mode) +- **Inicjalizacja:** `app.py:286` - `gemini_service.init_gemini_service(model='3-flash')` +- **Silnik chatu:** `nordabiz_chat.py` używa globalnego `gemini_service` + +### Thinking Mode (NOWE!) +Użytkownicy mogą wybierać poziom rozumowania AI w UI chatu (dropdown obok badge "Gemini 3"). + +| Poziom | Opis | Zastosowanie | +|--------|------|--------------| +| **Błyskawiczny** (minimal) | Najszybsze odpowiedzi | Proste pytania: "kto?", "gdzie?" | +| **Szybki** (low) | Zrównoważony | Większość pytań o firmy i usługi | +| **Głęboki** (high) | Maksymalna analiza | Złożone pytania, rekomendacje, strategia | + +**Zmiana poziomu:** +- UI: Dropdown w headerze chatu +- API: `POST /api/chat/settings` z `{"thinking_level": "high"}` +- Zapisywane w sesji użytkownika + +### Dostępne modele w `gemini_service.py` +| Alias | Model ID | Opis | +|-------|----------|------| +| `flash` | `gemini-2.5-flash` | Ogólnego przeznaczenia | +| `flash-lite` | `gemini-2.5-flash-lite` | Ultra tani ($0.10/$0.40 per 1M) | +| `pro` | `gemini-2.5-pro` | Najlepszy reasoning | +| `flash-2.0` | `gemini-2.0-flash` | Poprzednia generacja (wycofywany 31.03.2026) | +| `3-flash` | `gemini-3-flash-preview` | **AKTUALNY** - 7x lepszy reasoning, thinking mode | +| `3-pro` | `gemini-3-pro-preview` | Premium - 2M context | + +### Zmiana modelu +```python +# W app.py linia ~286: +gemini_service.init_gemini_service(model='3-flash') # Zmień alias tutaj +``` + +### UI Badge +W `templates/chat.html` badge w headerze: `Gemini 3` + +## Prezentacja dla członków Izby (AKTYWNY PROJEKT) + +### Cel projektu +Stworzenie materiałów wideo prezentujących portal NordaBiz dla członków Izby NORDA. + +### Produkty końcowe +1. **Podcast NotebookLM** (2-3 min) - rozmowa AI o portalu +2. **Zajawka Remotion** (30s) - scenariusz "Problem → Rozwiązanie" +3. **Tutorial wideo** (2-3 min) - nagrania portalu + dialogi Zofia/Marek +4. **Integracja z portalem** - Akademia + widget na dashboardzie + +### Dokument źródłowy dla NotebookLM +`docs/notebooklm-source.md` - markdown do wgrania do notebooklm.google.com + +### Scenariusz zajawki 30s (Remotion) +``` +[0-8s] "Szukasz partnera do projektu?" +[8-16s] "Nie wiesz, kto w Izbie ma potrzebne kompetencje?" +[16-24s] "NordaGPT zna 150 firm i pomoże Ci znaleźć" +[24-30s] "nordabiznes.pl - Twoja sieć kontaktów" +``` + +### Głosy edge-tts +- Marek: `pl-PL-MarekNeural` (męski) +- Zofia: `pl-PL-ZofiaNeural` (żeński) + +### GIFy do nagrania (Chrome MCP) +1. Strona główna zalogowanego +2. Katalog firm + filtrowanie +3. Profil firmy +4. Chat NordaGPT - pytanie +5. Chat NordaGPT - odpowiedź +6. Forum +7. Kalendarz +8. Tablica B2B + +### Pliki Remotion +- Lokalizacja: `/Users/maciejpi/claude/projects/active/remotion/my-video/` +- Komponenty: `NordaBizZajawka.tsx`, `NordaBizTutorial.tsx` +- Audio: `public/audio/`, `public/voice/tutorial/` +- Nagrania: `public/recordings/*.gif` + +### Konto testowe (PROD) +- Email: `test@nordabiznes.pl` +- Hasło: `&Rc2LdbSw&jiGR0ek@Bz` + +Data projektu: 2026-01-29 diff --git a/app.py b/app.py index 3218237..93b5c06 100644 --- a/app.py +++ b/app.py @@ -4513,6 +4513,44 @@ def chat(): return render_template('chat.html') +@app.route('/api/chat/settings', methods=['GET', 'POST']) +@csrf.exempt +@login_required +def chat_settings(): + """Get or update chat settings (thinking level)""" + if request.method == 'GET': + # Get current thinking level from session or default + thinking_level = session.get('thinking_level', 'high') + return jsonify({ + 'success': True, + 'thinking_level': thinking_level + }) + + # POST - update settings + try: + data = request.get_json() + thinking_level = data.get('thinking_level', 'high') + + # Validate thinking level + valid_levels = ['minimal', 'low', 'medium', 'high'] + if thinking_level not in valid_levels: + thinking_level = 'high' + + # Store in session + session['thinking_level'] = thinking_level + + logger.info(f"User {current_user.id} set thinking_level to: {thinking_level}") + + return jsonify({ + 'success': True, + 'thinking_level': thinking_level + }) + + except Exception as e: + logger.error(f"Error updating chat settings: {e}") + return jsonify({'success': False, 'error': str(e)}), 500 + + @app.route('/api/chat/start', methods=['POST']) @csrf.exempt @login_required @@ -4564,11 +4602,15 @@ def chat_send_message(conversation_id): finally: db.close() + # Get thinking level from request or session + thinking_level = data.get('thinking_level') or session.get('thinking_level', 'high') + chat_engine = NordaBizChatEngine() response = chat_engine.send_message( conversation_id=conversation_id, user_message=message, - user_id=current_user.id + user_id=current_user.id, + thinking_level=thinking_level ) # Get free tier usage stats for today @@ -4586,21 +4628,23 @@ def chat_send_message(conversation_id): 'created_at': response.created_at.isoformat(), # Technical metadata 'tech_info': { - 'model': 'gemini-2.0-flash', - 'data_source': 'PostgreSQL (111 firm Norda Biznes)', - 'architecture': 'Full DB Context (wszystkie firmy w kontekście AI)', + 'model': gemini_service.get_gemini_service().model_name if gemini_service.get_gemini_service() else 'gemini-3-flash-preview', + 'thinking_level': thinking_level, + 'thinking_enabled': gemini_service.get_gemini_service().thinking_enabled if gemini_service.get_gemini_service() else True, + 'data_source': 'PostgreSQL (150 firm Norda Biznes)', + 'architecture': 'Full DB Context + Thinking Mode', 'tokens_input': tokens_in, 'tokens_output': tokens_out, 'tokens_total': tokens_in + tokens_out, 'latency_ms': response.latency_ms or 0, 'theoretical_cost_usd': round(theoretical_cost, 6), - 'actual_cost_usd': 0.0, # Free tier + 'actual_cost_usd': 0.0, # Paid tier but tracked 'free_tier': { - 'is_free': True, - 'daily_limit': 1500, # Gemini free tier: 1500 req/day + 'is_free': False, + 'daily_limit': 10000, # Gemini paid tier 'requests_today': free_tier_stats['requests_today'], 'tokens_today': free_tier_stats['tokens_today'], - 'remaining': max(0, 1500 - free_tier_stats['requests_today']) + 'remaining': max(0, 10000 - free_tier_stats['requests_today']) } } }) diff --git a/docs/notebooklm-source.md b/docs/notebooklm-source.md new file mode 100644 index 0000000..5e9dcf9 --- /dev/null +++ b/docs/notebooklm-source.md @@ -0,0 +1,163 @@ +# Norda Biznes Hub - Dokumentacja Platformy + +## O projekcie + +**Norda Biznes Hub** to nowoczesna platforma katalogowa i networkingowa dla członków **Izby Przedsiębiorców NORDA** z Wejherowa. Portal jest dostępny pod adresem https://nordabiznes.pl i działa produkcyjnie od 23 listopada 2025 roku. + +Platforma zrzesza **150 podmiotów gospodarczych** z 4 głównych kategorii branżowych i 17 podkategorii. To centralny hub dla przedsiębiorców z Pomorza - miejsce gdzie można znaleźć partnera biznesowego, sprawdzić kompetencje firm członkowskich i nawiązać współpracę. + +## Główne funkcje portalu + +### 1. Katalog firm (150 podmiotów) + +Pełny katalog wszystkich firm członkowskich Izby NORDA z podziałem na kategorie: +- **Usługi** - IT i technologie, księgowość, usługi finansowe, HR, marketing, doradztwo prawne +- **Budownictwo** - budownictwo ogólne, instalacje, wykończenia +- **Handel** - hurt, detal, automotive +- **Produkcja** - produkcja ogólna, meble, przemysł + +Każda firma ma szczegółowy profil zawierający: +- Dane kontaktowe (telefon, email, adres) +- Opis działalności i specjalizacji +- Usługi i kompetencje (tagi) +- Dane rejestrowe (NIP, REGON, KRS) +- Linki do social mediów (Facebook, LinkedIn, Instagram) +- Data przystąpienia do Izby NORDA +- Oceny Google (z audytu Google Business Profile) + +### 2. NordaGPT - Asystent AI + +NordaGPT to inteligentny chatbot oparty na **Google Gemini 3 Flash** - najnowszej generacji modelu AI z zaawansowanym rozumowaniem (thinking mode). Jest to flagowa funkcja platformy. + +**Co potrafi NordaGPT:** +- Znajdowanie firm po usługach, branży lub słowach kluczowych +- Odpowiadanie na pytania o członków Izby (np. "Kto jest prezesem PIXLAB?") +- Sprawdzanie kalendarza wydarzeń Norda Biznes +- Podawanie rekomendacji (np. "Która firma ma najlepsze opinie Google?") +- Informowanie o ogłoszeniach B2B na tablicy +- Wyszukiwanie osób powiązanych z firmami (zarząd, właściciele) + +**Przykładowe pytania:** +- "Kto oferuje usługi IT w Wejherowie?" +- "Kiedy następne spotkanie Norda Biznes?" +- "Poleć firmę budowlaną z dobrymi opiniami" +- "Szukam drukarni dla mojej firmy" + +NordaGPT ma dostęp do pełnej bazy danych 150 firm, kalendarza wydarzeń, tablicy B2B, forum i rekomendacji członków. Odpowiada w języku polskim i podaje bezpośrednie linki do profilów firm. + +### 3. Kalendarz wydarzeń + +Interaktywny kalendarz spotkań i wydarzeń organizowanych przez Izbę NORDA: +- Widok miesięczny z oznaczonymi wydarzeniami +- Szczegóły: data, godzina, miejsce, opis +- System zapisów (RSVP) - potwierdź udział jednym kliknięciem +- Lista uczestników - zobacz kto jeszcze będzie +- Banner na stronie głównej z najbliższym wydarzeniem + +### 4. Forum dyskusyjne + +Miejsce wymiany wiedzy i doświadczeń między członkami: +- Tematy z różnych kategorii (pytania, ogłoszenia, dyskusje) +- Możliwość odpowiadania i komentowania +- Załączniki (dokumenty, zdjęcia) +- Powiadomienia o nowych odpowiedziach + +### 5. Tablica ogłoszeń B2B + +Marketplace dla członków Izby: +- Ogłoszenia typu "Szukam" i "Oferuję" +- Kategorie: usługi, produkty, współpraca, zlecenia +- Kontakt bezpośredni z autorem ogłoszenia +- Automatyczne wygasanie po 30 dniach + +### 6. Prywatne wiadomości + +System komunikacji między członkami: +- Bezpośrednie wiadomości do innych użytkowników +- Historia rozmów +- Powiadomienia o nowych wiadomościach +- Możliwość blokowania kontaktów + +### 7. Raporty i statystyki + +Dostępne dla zalogowanych użytkowników: +- Staż członkostwa w Izbie (ranking od najdłużej) +- Pokrycie social mediów (analiza 6 platform) +- Struktura branżowa (rozkład firm wg kategorii) + +### 8. Aktualności + +Sekcja z ogłoszeniami i newsami: +- Informacje od zarządu Izby +- Okazje biznesowe +- Wydarzenia partnerskie +- Możliwość przypinania ważnych ogłoszeń + +## Korzyści dla członków + +### Networking +- Poznaj 150 firm z różnych branż +- Znajdź partnera do projektu +- Nawiąż współpracę B2B +- Wymieniaj się doświadczeniami na forum + +### Widoczność +- Profesjonalny profil firmy z pełnym opisem +- Pozycjonowanie w katalogu branżowym +- Linki do social mediów +- Prezentacja usług i kompetencji + +### Wygoda +- Jeden portal dla wszystkiego +- NordaGPT odpowie na pytania 24/7 +- Powiadomienia o wydarzeniach +- Mobilna wersja responsywna + +### Bezpieczeństwo +- Tylko zweryfikowani członkowie Izby +- Kontrola prywatności (ukrywanie danych kontaktowych) +- System blokowania niechcianych kontaktów +- Zgodność z RODO + +## Historia rozwoju + +### Listopad 2025 +- Premiera platformy nordabiznes.pl +- Uruchomienie NordaGPT z Gemini 2.0 +- Import danych 111 firm członkowskich + +### Grudzień 2025 +- Rozbudowa profili firm o dane KRS +- Dodanie audytów Google Business Profile +- Integracja z social mediami + +### Styczeń 2026 +- Upgrade NordaGPT do Gemini 3 Flash (7x lepsze rozumowanie) +- Dodanie historii konwersacji w chacie +- System prywatności i blokowania kontaktów +- Hierarchiczne kategorie firm (4 główne grupy) +- Panel raportów i statystyk +- Sekcja aktualności +- Integracja z CEIDG do pobierania danych JDG + +## Technologia + +Portal zbudowany jest na nowoczesnym stacku technologicznym: +- **Backend:** Python Flask z SQLAlchemy +- **Frontend:** HTML5, CSS3, JavaScript +- **Baza danych:** PostgreSQL +- **AI:** Google Gemini 3 Flash (1M tokenów kontekstu) +- **Hosting:** Serwery w Polsce (INPI) +- **SSL:** Certyfikat Let's Encrypt + +## Informacje kontaktowe + +- **Strona:** https://nordabiznes.pl +- **Organizator:** Izba Przedsiębiorców NORDA, Wejherowo +- **Kontakt:** Przez formularz na stronie lub poprzez profil firmy + +## Podsumowanie + +Norda Biznes Hub to nowoczesna platforma dla przedsiębiorców z Pomorza. Łączy katalog 150 firm z inteligentnym asystentem AI, kalendarzem wydarzeń i narzędziami do networkingu. Dzięki NordaGPT użytkownicy mogą szybko znaleźć odpowiedniego partnera biznesowego - wystarczy zadać pytanie w naturalnym języku. + +Platforma jest stale rozwijana. W 2026 roku planowane są: platforma edukacyjna z webinarami, rozbudowa funkcji rekomendacji między firmami oraz integracje z zewnętrznymi systemami. diff --git a/gemini_service.py b/gemini_service.py index 6b76a65..c121842 100644 --- a/gemini_service.py +++ b/gemini_service.py @@ -4,14 +4,14 @@ Google Gemini AI Service Reusable service for interacting with Google Gemini API. Features: -- Multiple model support (Flash, Pro, Flash-8B) +- Multiple model support (Flash, Pro, Gemini 3) +- Thinking Mode support for Gemini 3 models - Error handling and retries - Cost tracking -- Streaming responses - Safety settings configuration -Author: MTB Tracker Team -Created: 2025-10-18 +Author: NordaBiz Team +Updated: 2026-01-29 (Gemini 3 SDK migration) """ import os @@ -20,8 +20,10 @@ import hashlib import time from datetime import datetime from typing import Optional, Dict, Any, List -import google.generativeai as genai -from google.generativeai.types import HarmCategory, HarmBlockThreshold + +# New Gemini SDK (google-genai) with thinking mode support +from google import genai +from google.genai import types # Configure logging logger = logging.getLogger(__name__) @@ -44,26 +46,46 @@ GEMINI_MODELS = { '3-pro': 'gemini-3-pro-preview', # Gemini 3 Pro - najlepszy reasoning, 2M context } -# Pricing per 1M tokens (USD) - updated 2026-01-29 -GEMINI_PRICING = { - 'gemini-2.5-flash': {'input': 0.30, 'output': 2.50}, - 'gemini-2.5-flash-lite': {'input': 0.10, 'output': 0.40}, - 'gemini-2.5-pro': {'input': 1.25, 'output': 10.00}, - 'gemini-2.0-flash': {'input': 0.10, 'output': 0.40}, - 'gemini-3-flash-preview': {'input': 0.50, 'output': 3.00}, - 'gemini-3-pro-preview': {'input': 2.00, 'output': 12.00}, +# Models that support thinking mode +THINKING_MODELS = {'gemini-3-flash-preview', 'gemini-3-pro-preview'} + +# Available thinking levels for Gemini 3 Flash +THINKING_LEVELS = { + 'minimal': 'MINIMAL', # Lowest latency, minimal reasoning + 'low': 'LOW', # Fast, simple tasks + 'medium': 'MEDIUM', # Balanced (Gemini 3 Flash only) + 'high': 'HIGH', # Maximum reasoning depth (default) } -class GeminiService: - """Service class for Google Gemini API interactions.""" +# Pricing per 1M tokens (USD) - updated 2026-01-29 +GEMINI_PRICING = { + 'gemini-2.5-flash': {'input': 0.30, 'output': 2.50, 'thinking': 0}, + 'gemini-2.5-flash-lite': {'input': 0.10, 'output': 0.40, 'thinking': 0}, + 'gemini-2.5-pro': {'input': 1.25, 'output': 10.00, 'thinking': 0}, + 'gemini-2.0-flash': {'input': 0.10, 'output': 0.40, 'thinking': 0}, + 'gemini-3-flash-preview': {'input': 0.50, 'output': 3.00, 'thinking': 1.00}, + 'gemini-3-pro-preview': {'input': 2.00, 'output': 12.00, 'thinking': 4.00}, +} - def __init__(self, api_key: Optional[str] = None, model: str = 'flash'): + +class GeminiService: + """Service class for Google Gemini API interactions with Thinking Mode support.""" + + def __init__( + self, + api_key: Optional[str] = None, + model: str = 'flash', + thinking_level: str = 'high', + include_thoughts: bool = False + ): """ Initialize Gemini service. Args: api_key: Google AI API key (reads from GOOGLE_GEMINI_API_KEY env if not provided) - model: Model to use ('flash', 'flash-lite', 'pro', 'flash-2.0') + model: Model to use ('flash', 'flash-lite', 'pro', '3-flash', '3-pro') + thinking_level: Reasoning depth ('minimal', 'low', 'medium', 'high') + include_thoughts: Whether to include thinking process in response (for debugging) """ self.api_key = api_key or os.getenv('GOOGLE_GEMINI_API_KEY') @@ -79,31 +101,76 @@ class GeminiService: "Please add your API key to .env file." ) - # Configure Gemini - genai.configure(api_key=self.api_key) + # Initialize new Gemini client + self.client = genai.Client(api_key=self.api_key) # Set model self.model_name = GEMINI_MODELS.get(model, GEMINI_MODELS['flash']) - self.model = genai.GenerativeModel(self.model_name) - # Safety settings (disabled for testing - enable in production if needed) - # Note: Even BLOCK_ONLY_HIGH was blocking neutral prompts like "mountain biking" - # For production apps, consider using BLOCK_ONLY_HIGH or BLOCK_MEDIUM_AND_ABOVE - self.safety_settings = { - HarmCategory.HARM_CATEGORY_HATE_SPEECH: HarmBlockThreshold.BLOCK_NONE, - HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT: HarmBlockThreshold.BLOCK_NONE, - HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT: HarmBlockThreshold.BLOCK_NONE, - HarmCategory.HARM_CATEGORY_HARASSMENT: HarmBlockThreshold.BLOCK_NONE, + # Thinking mode configuration + self.thinking_level = thinking_level + self.include_thoughts = include_thoughts + self._thinking_enabled = self.model_name in THINKING_MODELS + + # Safety settings + self.safety_settings = [ + types.SafetySetting( + category="HARM_CATEGORY_HATE_SPEECH", + threshold="BLOCK_NONE" + ), + types.SafetySetting( + category="HARM_CATEGORY_DANGEROUS_CONTENT", + threshold="BLOCK_NONE" + ), + types.SafetySetting( + category="HARM_CATEGORY_SEXUALLY_EXPLICIT", + threshold="BLOCK_NONE" + ), + types.SafetySetting( + category="HARM_CATEGORY_HARASSMENT", + threshold="BLOCK_NONE" + ), + ] + + logger.info( + f"Gemini service initialized: model={self.model_name}, " + f"thinking={self._thinking_enabled}, level={thinking_level}" + ) + + @property + def thinking_enabled(self) -> bool: + """Whether thinking mode is enabled for current model.""" + return self._thinking_enabled + + @property + def thinking_level_display(self) -> str: + """Human-readable thinking level for UI.""" + if not self._thinking_enabled: + return "Wyłączony" + return { + 'minimal': 'Minimalny', + 'low': 'Niski', + 'medium': 'Średni', + 'high': 'Wysoki' + }.get(self.thinking_level, self.thinking_level) + + def get_status(self) -> Dict[str, Any]: + """Get service status for UI display.""" + return { + 'model': self.model_name, + 'thinking_enabled': self._thinking_enabled, + 'thinking_level': self.thinking_level, + 'thinking_level_display': self.thinking_level_display, + 'include_thoughts': self.include_thoughts } - logger.info(f"Gemini service initialized with model: {self.model_name}") - def generate_text( self, prompt: str, temperature: float = 0.7, max_tokens: Optional[int] = None, stream: bool = False, + thinking_level: Optional[str] = None, feature: str = 'general', user_id: Optional[int] = None, company_id: Optional[int] = None, @@ -111,13 +178,14 @@ class GeminiService: related_entity_id: Optional[int] = None ) -> str: """ - Generate text using Gemini API with automatic cost tracking. + Generate text using Gemini API with automatic cost tracking and thinking mode. Args: prompt: Text prompt to send to the model temperature: Sampling temperature (0.0-1.0). Higher = more creative max_tokens: Maximum tokens to generate (None = model default) stream: Whether to stream the response + thinking_level: Override default thinking level for this call feature: Feature name for cost tracking ('chat', 'news_evaluation', etc.) user_id: Optional user ID for cost tracking company_id: Optional company ID for context @@ -133,65 +201,53 @@ class GeminiService: start_time = time.time() try: - # Use minimal configuration to avoid blocking issues with FREE tier - # Only set temperature if different from default - generation_config = None - if temperature != 0.7 or max_tokens: - generation_config = {'temperature': temperature} - if max_tokens: - generation_config['max_output_tokens'] = max_tokens + # Build generation config + config_params = { + 'temperature': temperature, + } + if max_tokens: + config_params['max_output_tokens'] = max_tokens - # Try passing safety_settings to reduce blocking for legitimate news content - # Note: FREE tier may still have built-in restrictions - if generation_config: - response = self.model.generate_content( - prompt, - generation_config=generation_config, - safety_settings=self.safety_settings - ) - else: - response = self.model.generate_content( - prompt, - safety_settings=self.safety_settings + # Add thinking config for Gemini 3 models + if self._thinking_enabled: + level = thinking_level or self.thinking_level + thinking_config = types.ThinkingConfig( + thinking_level=THINKING_LEVELS.get(level, 'HIGH'), + include_thoughts=self.include_thoughts ) + config_params['thinking_config'] = thinking_config + + # Build full config + generation_config = types.GenerateContentConfig( + **config_params, + safety_settings=self.safety_settings + ) + + # Call API + response = self.client.models.generate_content( + model=self.model_name, + contents=prompt, + config=generation_config + ) if stream: - # Return generator for streaming return response - # Check if response was blocked by safety filters - if not response.candidates: - raise Exception( - f"Response blocked. No candidates returned. " - f"This may be due to safety filters." - ) - - candidate = response.candidates[0] - - # Check finish reason - if candidate.finish_reason not in [1, 0]: # 1=STOP, 0=UNSPECIFIED - finish_reasons = { - 2: "SAFETY - Content blocked by safety filters", - 3: "RECITATION - Content blocked due to recitation", - 4: "OTHER - Other reason", - 5: "MAX_TOKENS - Reached max token limit" - } - reason = finish_reasons.get(candidate.finish_reason, f"Unknown ({candidate.finish_reason})") - raise Exception( - f"Response incomplete. Finish reason: {reason}. " - f"Try adjusting safety settings or prompt." - ) + # Extract response text + response_text = response.text # Count tokens and log cost - response_text = response.text latency_ms = int((time.time() - start_time) * 1000) - input_tokens = self.count_tokens(prompt) - output_tokens = self.count_tokens(response_text) + # Get token counts from response metadata + input_tokens = self._count_tokens_from_response(response, 'input') + output_tokens = self._count_tokens_from_response(response, 'output') + thinking_tokens = self._count_tokens_from_response(response, 'thinking') logger.info( f"Gemini API call successful. " - f"Tokens: {input_tokens}+{output_tokens}, " + f"Tokens: {input_tokens}+{output_tokens}" + f"{f'+{thinking_tokens}t' if thinking_tokens else ''}, " f"Latency: {latency_ms}ms, " f"Model: {self.model_name}" ) @@ -202,6 +258,7 @@ class GeminiService: response_text=response_text, input_tokens=input_tokens, output_tokens=output_tokens, + thinking_tokens=thinking_tokens, latency_ms=latency_ms, success=True, feature=feature, @@ -220,8 +277,9 @@ class GeminiService: self._log_api_cost( prompt=prompt, response_text='', - input_tokens=self.count_tokens(prompt), + input_tokens=self._estimate_tokens(prompt), output_tokens=0, + thinking_tokens=0, latency_ms=latency_ms, success=False, error_message=str(e), @@ -251,15 +309,33 @@ class GeminiService: Model's response to the last message """ try: - chat = self.model.start_chat(history=[]) + # Build contents from messages + contents = [] + for msg in messages: + role = 'user' if msg['role'] == 'user' else 'model' + contents.append(types.Content( + role=role, + parts=[types.Part(text=msg['content'])] + )) - # Add conversation history - for msg in messages[:-1]: # All except last - if msg['role'] == 'user': - chat.send_message(msg['content']) + # Build config with thinking if available + config_params = {'temperature': 0.7} + if self._thinking_enabled: + config_params['thinking_config'] = types.ThinkingConfig( + thinking_level=THINKING_LEVELS.get(self.thinking_level, 'HIGH'), + include_thoughts=self.include_thoughts + ) - # Send last message and get response - response = chat.send_message(messages[-1]['content']) + generation_config = types.GenerateContentConfig( + **config_params, + safety_settings=self.safety_settings + ) + + response = self.client.models.generate_content( + model=self.model_name, + contents=contents, + config=generation_config + ) return response.text @@ -283,9 +359,25 @@ class GeminiService: img = PIL.Image.open(image_path) - response = self.model.generate_content( - [prompt, img], - safety_settings=self.safety_settings + # Convert image to bytes + import io + img_bytes = io.BytesIO() + img.save(img_bytes, format=img.format or 'PNG') + img_bytes = img_bytes.getvalue() + + contents = [ + types.Part(text=prompt), + types.Part( + inline_data=types.Blob( + mime_type=f"image/{(img.format or 'png').lower()}", + data=img_bytes + ) + ) + ] + + response = self.client.models.generate_content( + model=self.model_name, + contents=contents ) return response.text @@ -305,12 +397,35 @@ class GeminiService: Number of tokens """ try: - result = self.model.count_tokens(text) + result = self.client.models.count_tokens( + model=self.model_name, + contents=text + ) return result.total_tokens except Exception as e: logger.warning(f"Token counting failed: {e}") - # Rough estimate: ~4 chars per token - return len(text) // 4 + return self._estimate_tokens(text) + + def _estimate_tokens(self, text: str) -> int: + """Estimate tokens when API counting fails (~4 chars per token).""" + return len(text) // 4 + + def _count_tokens_from_response(self, response, token_type: str) -> int: + """Extract token count from API response metadata.""" + try: + usage = response.usage_metadata + if not usage: + return 0 + if token_type == 'input': + return getattr(usage, 'prompt_token_count', 0) or 0 + elif token_type == 'output': + return getattr(usage, 'candidates_token_count', 0) or 0 + elif token_type == 'thinking': + # Gemini 3 reports thinking tokens separately + return getattr(usage, 'thinking_token_count', 0) or 0 + except Exception: + return 0 + return 0 def _log_api_cost( self, @@ -318,7 +433,8 @@ class GeminiService: response_text: str, input_tokens: int, output_tokens: int, - latency_ms: int, + thinking_tokens: int = 0, + latency_ms: int = 0, success: bool = True, error_message: Optional[str] = None, feature: str = 'general', @@ -328,13 +444,14 @@ class GeminiService: related_entity_id: Optional[int] = None ): """ - Log API call costs to database for monitoring + Log API call costs to database for monitoring. Args: prompt: Input prompt text response_text: Output response text input_tokens: Number of input tokens used output_tokens: Number of output tokens generated + thinking_tokens: Number of thinking tokens (Gemini 3) latency_ms: Response time in milliseconds success: Whether API call succeeded error_message: Error details if failed @@ -349,12 +466,13 @@ class GeminiService: try: # Calculate costs - pricing = GEMINI_PRICING.get(self.model_name, {'input': 0.075, 'output': 0.30}) + pricing = GEMINI_PRICING.get(self.model_name, {'input': 0.50, 'output': 3.00, 'thinking': 1.00}) input_cost = (input_tokens / 1_000_000) * pricing['input'] output_cost = (output_tokens / 1_000_000) * pricing['output'] - total_cost = input_cost + output_cost + thinking_cost = (thinking_tokens / 1_000_000) * pricing.get('thinking', 0) + total_cost = input_cost + output_cost + thinking_cost - # Cost in cents for AIUsageLog (more precise) + # Cost in cents for AIUsageLog cost_cents = total_cost * 100 # Create prompt hash (for debugging, not storing full prompt for privacy) @@ -371,10 +489,10 @@ class GeminiService: feature=feature, user_id=user_id, input_tokens=input_tokens, - output_tokens=output_tokens, - total_tokens=input_tokens + output_tokens, + output_tokens=output_tokens + thinking_tokens, # Combined for legacy + total_tokens=input_tokens + output_tokens + thinking_tokens, input_cost=input_cost, - output_cost=output_cost, + output_cost=output_cost + thinking_cost, # Combined for legacy total_cost=total_cost, success=success, error_message=error_message, @@ -383,12 +501,12 @@ class GeminiService: ) db.add(legacy_log) - # Log to new AIUsageLog table (with automatic daily aggregation via trigger) + # Log to new AIUsageLog table usage_log = AIUsageLog( request_type=feature, model=self.model_name, tokens_input=input_tokens, - tokens_output=output_tokens, + tokens_output=output_tokens + thinking_tokens, cost_cents=cost_cents, user_id=user_id, company_id=company_id, @@ -406,7 +524,8 @@ class GeminiService: logger.info( f"API cost logged: {feature} - ${total_cost:.6f} " - f"({input_tokens}+{output_tokens} tokens, {latency_ms}ms)" + f"({input_tokens}+{output_tokens}" + f"{f'+{thinking_tokens}t' if thinking_tokens else ''} tokens, {latency_ms}ms)" ) finally: db.close() @@ -417,7 +536,7 @@ class GeminiService: def generate_embedding( self, text: str, - task_type: str = 'retrieval_document', + task_type: str = 'RETRIEVAL_DOCUMENT', title: Optional[str] = None, user_id: Optional[int] = None, feature: str = 'embedding' @@ -428,19 +547,17 @@ class GeminiService: Args: text: Text to embed task_type: One of: - - 'retrieval_document': For documents to be retrieved - - 'retrieval_query': For search queries - - 'semantic_similarity': For comparing texts - - 'classification': For text classification - - 'clustering': For text clustering + - 'RETRIEVAL_DOCUMENT': For documents to be retrieved + - 'RETRIEVAL_QUERY': For search queries + - 'SEMANTIC_SIMILARITY': For comparing texts + - 'CLASSIFICATION': For text classification + - 'CLUSTERING': For text clustering title: Optional title for document (improves quality) user_id: User ID for cost tracking feature: Feature name for cost tracking Returns: 768-dimensional embedding vector or None on error - - Cost: ~$0.00001 per 1K tokens (very cheap) """ if not text or not text.strip(): logger.warning("Empty text provided for embedding") @@ -449,26 +566,29 @@ class GeminiService: start_time = time.time() try: - # Use text-embedding-004 model (768 dimensions) - # This is Google's recommended model for embeddings - result = genai.embed_content( - model='models/text-embedding-004', - content=text, - task_type=task_type, - title=title + # Build content with optional title + content_parts = [] + if title: + content_parts.append(types.Part(text=f"Title: {title}\n\n")) + content_parts.append(types.Part(text=text)) + + result = self.client.models.embed_content( + model='text-embedding-004', + contents=types.Content(parts=content_parts), + config=types.EmbedContentConfig( + task_type=task_type + ) ) - embedding = result.get('embedding') + embedding = result.embeddings[0].values if result.embeddings else None if not embedding: logger.error("No embedding returned from API") return None - # Log cost (embedding API is very cheap) + # Log cost latency_ms = int((time.time() - start_time) * 1000) - token_count = len(text) // 4 # Approximate - - # Embedding pricing: ~$0.00001 per 1K tokens + token_count = len(text) // 4 cost_usd = (token_count / 1000) * 0.00001 logger.debug( @@ -476,31 +596,7 @@ class GeminiService: f"{token_count} tokens, {latency_ms}ms, ${cost_usd:.8f}" ) - # Log to database (if cost tracking is important) - if DB_AVAILABLE and user_id: - try: - db = SessionLocal() - try: - usage_log = AIUsageLog( - request_type=feature, - model='text-embedding-004', - tokens_input=token_count, - tokens_output=0, - cost_cents=cost_usd * 100, - user_id=user_id, - prompt_length=len(text), - response_length=len(embedding) * 4, # 4 bytes per float - response_time_ms=latency_ms, - success=True - ) - db.add(usage_log) - db.commit() - finally: - db.close() - except Exception as e: - logger.error(f"Failed to log embedding cost: {e}") - - return embedding + return list(embedding) except Exception as e: logger.error(f"Embedding generation error: {e}") @@ -509,7 +605,7 @@ class GeminiService: def generate_embeddings_batch( self, texts: List[str], - task_type: str = 'retrieval_document', + task_type: str = 'RETRIEVAL_DOCUMENT', user_id: Optional[int] = None ) -> List[Optional[List[float]]]: """ @@ -539,18 +635,27 @@ class GeminiService: _gemini_service: Optional[GeminiService] = None -def init_gemini_service(api_key: Optional[str] = None, model: str = 'flash'): +def init_gemini_service( + api_key: Optional[str] = None, + model: str = 'flash', + thinking_level: str = 'high' +): """ Initialize global Gemini service instance. Call this in app.py during Flask app initialization. Args: api_key: Google AI API key (optional if set in env) - model: Model to use ('flash', 'flash-8b', 'pro') + model: Model to use ('flash', 'flash-lite', 'pro', '3-flash', '3-pro') + thinking_level: Reasoning depth for Gemini 3 models ('minimal', 'low', 'medium', 'high') """ global _gemini_service try: - _gemini_service = GeminiService(api_key=api_key, model=model) + _gemini_service = GeminiService( + api_key=api_key, + model=model, + thinking_level=thinking_level + ) logger.info("Global Gemini service initialized successfully") except Exception as e: logger.error(f"Failed to initialize Gemini service: {e}") diff --git a/nordabiz_chat.py b/nordabiz_chat.py index 76809b2..5485b92 100644 --- a/nordabiz_chat.py +++ b/nordabiz_chat.py @@ -98,7 +98,8 @@ class NordaBizChatEngine: if use_global_service: # Use global gemini_service for automatic cost tracking to ai_api_costs table self.gemini_service = gemini_service.get_gemini_service() - self.model_name = "gemini-2.5-flash" + # Get model name from global service (currently Gemini 3 Flash Preview) + self.model_name = self.gemini_service.model_name if self.gemini_service else "gemini-3-flash-preview" self.model = None # Initialize tokenizer for cost calculation (still needed for per-message tracking) @@ -167,7 +168,8 @@ class NordaBizChatEngine: self, conversation_id: int, user_message: str, - user_id: int + user_id: int, + thinking_level: str = 'high' ) -> AIChatMessage: """ Send message and get AI response @@ -179,6 +181,7 @@ class NordaBizChatEngine: conversation_id: Conversation ID user_message: User's message text user_id: User ID (required for ownership validation and cost tracking) + thinking_level: AI reasoning depth ('minimal', 'low', 'medium', 'high') Returns: AIChatMessage: AI response message @@ -240,7 +243,8 @@ class NordaBizChatEngine: response = self._query_ai( context, user_message, - user_id=user_id + user_id=user_id, + thinking_level=thinking_level ) # Calculate metrics for per-message tracking in AIChatMessage table @@ -828,7 +832,8 @@ class NordaBizChatEngine: self, context: Dict[str, Any], user_message: str, - user_id: Optional[int] = None + user_id: Optional[int] = None, + thinking_level: str = 'high' ) -> str: """ Query Gemini AI with full company database context @@ -837,6 +842,7 @@ class NordaBizChatEngine: context: Context dict with ALL companies user_message: User's message user_id: User ID for cost tracking + thinking_level: AI reasoning depth ('minimal', 'low', 'medium', 'high') Returns: AI response text @@ -1193,7 +1199,8 @@ BŁĘDNIE (NIE RÓB - resetuje numerację): prompt=full_prompt, feature='ai_chat', user_id=user_id, - temperature=0.7 + temperature=0.7, + thinking_level=thinking_level ) # Post-process to ensure links are added even if AI didn't format them return self._postprocess_links(response_text, context) diff --git a/requirements.txt b/requirements.txt index 7a99155..99825d7 100644 --- a/requirements.txt +++ b/requirements.txt @@ -14,8 +14,8 @@ Flask-Limiter==3.5.0 SQLAlchemy==2.0.23 psycopg2-binary==2.9.9 -# Google Gemini AI -google-generativeai==0.3.2 +# Google Gemini AI (new SDK with thinking mode support) +google-genai>=1.0.0 # Google Maps/Places API googlemaps==4.10.0 diff --git a/templates/chat.html b/templates/chat.html index ab570f6..b309488 100755 --- a/templates/chat.html +++ b/templates/chat.html @@ -538,6 +538,151 @@ margin-right: var(--spacing-sm); } + /* ============================================ + Thinking Mode Toggle + ============================================ */ + .thinking-toggle { + position: relative; + margin-left: var(--spacing-sm); + } + + .thinking-btn { + display: flex; + align-items: center; + gap: 6px; + padding: 4px 10px; + background: rgba(255,255,255,0.2); + border: 1px solid rgba(255,255,255,0.3); + border-radius: var(--radius); + color: white; + font-size: var(--font-size-xs); + font-weight: 500; + cursor: pointer; + transition: var(--transition); + } + + .thinking-btn:hover { + background: rgba(255,255,255,0.3); + } + + .thinking-icon { + font-size: 14px; + } + + .thinking-arrow { + transition: transform 0.2s ease; + } + + .thinking-toggle.open .thinking-arrow { + transform: rotate(180deg); + } + + .thinking-dropdown { + display: none; + position: absolute; + top: calc(100% + 8px); + right: 0; + width: 280px; + background: white; + border-radius: var(--radius-lg); + box-shadow: 0 10px 40px rgba(0,0,0,0.2); + z-index: 100; + overflow: hidden; + animation: dropdownSlide 0.2s ease; + } + + @keyframes dropdownSlide { + from { opacity: 0; transform: translateY(-8px); } + to { opacity: 1; transform: translateY(0); } + } + + .thinking-toggle.open .thinking-dropdown { + display: block; + } + + .thinking-dropdown-header { + padding: var(--spacing-md); + background: #f5f3ff; + border-bottom: 1px solid #e5e7eb; + } + + .thinking-dropdown-header strong { + display: block; + color: #5b21b6; + font-size: var(--font-size-sm); + margin-bottom: 4px; + } + + .thinking-dropdown-header p { + color: var(--text-secondary); + font-size: var(--font-size-xs); + margin: 0; + } + + .thinking-option { + padding: var(--spacing-sm) var(--spacing-md); + cursor: pointer; + transition: var(--transition); + border-bottom: 1px solid #f3f4f6; + } + + .thinking-option:last-child { + border-bottom: none; + } + + .thinking-option:hover { + background: #f9fafb; + } + + .thinking-option.active { + background: #f5f3ff; + border-left: 3px solid #7c3aed; + } + + .thinking-option-header { + display: flex; + align-items: center; + gap: var(--spacing-sm); + margin-bottom: 4px; + } + + .thinking-option-icon { + font-size: 16px; + } + + .thinking-option-name { + font-weight: 600; + color: var(--text-primary); + font-size: var(--font-size-sm); + } + + .thinking-option-badge { + font-size: 10px; + padding: 2px 6px; + background: #7c3aed; + color: white; + border-radius: var(--radius-sm); + font-weight: 500; + } + + .thinking-option-desc { + color: var(--text-secondary); + font-size: var(--font-size-xs); + margin: 0; + line-height: 1.4; + } + + @media (max-width: 768px) { + .thinking-label { + display: none; + } + + .thinking-dropdown { + width: 260px; + right: -60px; + } + } + /* ============================================ Model Info Button & Modal ============================================ */ @@ -978,6 +1123,44 @@ 🤖
Określa głębokość analizy przed odpowiedzią
+Najszybsze odpowiedzi. Dla prostych pytań typu "kto?", "gdzie?".
+Zrównoważony. Dobre dla większości pytań o firmy i usługi.
+Maksymalna analiza. Dla złożonych pytań, rekomendacji, strategii.
+