- Zmiana nazwy: "Norda Biznes Hub" → "Norda Biznes Partner" - Aktualizacja modelu AI: Gemini 2.0 Flash → Gemini 3 Flash - Zachowano historyczne odniesienia w timeline i dokumentacji Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
517 lines
17 KiB
JavaScript
Executable File
517 lines
17 KiB
JavaScript
Executable File
#!/usr/bin/env node
|
||
|
||
/**
|
||
* Norda Biznes - Company Data Collector Agent
|
||
*
|
||
* Automatycznie zbiera dane o firmach członkowskich z różnych źródeł:
|
||
* - ALEO.com (NIP, REGON, KRS, dane urzędowe)
|
||
* - Google Search (znajdowanie stron WWW)
|
||
* - Strony firmowe (kontakt, usługi, kompetencje)
|
||
*
|
||
* Autor: Norda Biznes Partner Development
|
||
* Data: 2025-11-23
|
||
*/
|
||
|
||
const fs = require('fs');
|
||
const path = require('path');
|
||
|
||
// Konfiguracja
|
||
const CONFIG = {
|
||
dataDir: path.join(__dirname, '..', 'data'),
|
||
companiesBasicPath: path.join(__dirname, '..', 'data', 'companies-basic.json'),
|
||
companiesFullPath: path.join(__dirname, '..', 'data', 'companies-full.json'),
|
||
dataSourcesPath: path.join(__dirname, '..', 'data', 'data-sources.md'),
|
||
collectionLogPath: path.join(__dirname, '..', 'data', 'collection-log.md'),
|
||
delayBetweenRequests: 2000, // 2 sekundy między zapytaniami
|
||
maxRetries: 3,
|
||
timeout: 30000
|
||
};
|
||
|
||
// Kolory dla konsoli
|
||
const colors = {
|
||
reset: '\x1b[0m',
|
||
green: '\x1b[32m',
|
||
yellow: '\x1b[33m',
|
||
red: '\x1b[31m',
|
||
blue: '\x1b[34m',
|
||
cyan: '\x1b[36m'
|
||
};
|
||
|
||
/**
|
||
* Company Data Collector Agent
|
||
*/
|
||
class CompanyDataCollector {
|
||
constructor() {
|
||
this.stats = {
|
||
total: 0,
|
||
processed: 0,
|
||
complete: 0,
|
||
partial: 0,
|
||
failed: 0,
|
||
startTime: null,
|
||
endTime: null
|
||
};
|
||
|
||
this.log = [];
|
||
}
|
||
|
||
/**
|
||
* Główna funkcja uruchamiająca agenta
|
||
*/
|
||
async run(options = {}) {
|
||
console.log(colors.cyan + '\n🤖 Norda Biznes - Company Data Collector Agent' + colors.reset);
|
||
console.log(colors.cyan + '================================================\n' + colors.reset);
|
||
|
||
this.stats.startTime = new Date();
|
||
|
||
try {
|
||
// Wczytaj firmy do zbadania
|
||
const companies = this.loadCompanies();
|
||
this.stats.total = companies.length;
|
||
|
||
console.log(colors.blue + `📊 Załadowano ${companies.length} firm do zbadania\n` + colors.reset);
|
||
|
||
// Filtruj firmy jeśli podano opcje
|
||
let companiesToProcess = companies;
|
||
if (options.limit) {
|
||
companiesToProcess = companies.slice(0, options.limit);
|
||
console.log(colors.yellow + `⚠️ Limit: przetwarzanie ${options.limit} firm\n` + colors.reset);
|
||
}
|
||
|
||
if (options.skipProcessed) {
|
||
const fullData = this.loadFullData();
|
||
const processedIds = new Set(fullData.companies.map(c => c.id));
|
||
companiesToProcess = companiesToProcess.filter(c => !processedIds.has(c.id));
|
||
console.log(colors.yellow + `⚠️ Pomijanie już przetworzonych: pozostało ${companiesToProcess.length} firm\n` + colors.reset);
|
||
}
|
||
|
||
// Przetwarzaj każdą firmę
|
||
for (const company of companiesToProcess) {
|
||
await this.processCompany(company);
|
||
this.stats.processed++;
|
||
|
||
// Opóźnienie między firmami
|
||
if (this.stats.processed < companiesToProcess.length) {
|
||
await this.delay(CONFIG.delayBetweenRequests);
|
||
}
|
||
}
|
||
|
||
// Zakończ i zapisz wyniki
|
||
this.stats.endTime = new Date();
|
||
this.generateReport();
|
||
this.saveLog();
|
||
this.updateDataSources();
|
||
|
||
console.log(colors.green + '\n✅ Agent zakończył pracę!\n' + colors.reset);
|
||
|
||
} catch (error) {
|
||
console.error(colors.red + '\n❌ Błąd agenta: ' + error.message + colors.reset);
|
||
throw error;
|
||
}
|
||
}
|
||
|
||
/**
|
||
* Przetwarzanie pojedynczej firmy
|
||
*/
|
||
async processCompany(company) {
|
||
console.log(colors.cyan + `\n[${this.stats.processed + 1}/${this.stats.total}] Przetwarzanie: ${company.name}` + colors.reset);
|
||
console.log(colors.blue + '─'.repeat(60) + colors.reset);
|
||
|
||
const companyData = {
|
||
id: company.id,
|
||
name: company.name,
|
||
category: company.category,
|
||
description: {
|
||
short: company.description,
|
||
full: null
|
||
},
|
||
...this.createEmptyCompanyStructure()
|
||
};
|
||
|
||
const logEntry = {
|
||
timestamp: new Date().toISOString(),
|
||
company: company.name,
|
||
steps: [],
|
||
success: false
|
||
};
|
||
|
||
try {
|
||
// KROK 1: Szukaj w ALEO.com (NIP, REGON, KRS)
|
||
console.log(colors.yellow + '📡 KROK 1: Wyszukiwanie w ALEO.com...' + colors.reset);
|
||
const aleoData = await this.searchAleo(company.name);
|
||
if (aleoData) {
|
||
Object.assign(companyData, aleoData);
|
||
logEntry.steps.push({ step: 'ALEO', status: 'success', data: 'NIP, REGON, KRS' });
|
||
console.log(colors.green + ` ✅ Znaleziono dane urzędowe` + colors.reset);
|
||
} else {
|
||
logEntry.steps.push({ step: 'ALEO', status: 'not_found' });
|
||
console.log(colors.yellow + ` ⚠️ Nie znaleziono w ALEO` + colors.reset);
|
||
}
|
||
|
||
// KROK 2: Znajdź stronę WWW przez Google
|
||
console.log(colors.yellow + '🔍 KROK 2: Szukanie strony WWW...' + colors.reset);
|
||
const websiteUrl = await this.findWebsite(company.name);
|
||
if (websiteUrl) {
|
||
companyData.website = websiteUrl;
|
||
logEntry.steps.push({ step: 'Google', status: 'success', data: websiteUrl });
|
||
console.log(colors.green + ` ✅ Znaleziono stronę: ${websiteUrl}` + colors.reset);
|
||
|
||
// KROK 3: Ekstraktuj dane ze strony WWW
|
||
console.log(colors.yellow + '🌐 KROK 3: Pobieranie danych ze strony...' + colors.reset);
|
||
const websiteData = await this.scrapeWebsite(websiteUrl);
|
||
if (websiteData) {
|
||
this.mergeWebsiteData(companyData, websiteData);
|
||
logEntry.steps.push({ step: 'Website', status: 'success', data: 'contact, services' });
|
||
console.log(colors.green + ` ✅ Pobrano dane ze strony` + colors.reset);
|
||
} else {
|
||
logEntry.steps.push({ step: 'Website', status: 'failed' });
|
||
console.log(colors.yellow + ` ⚠️ Nie udało się pobrać danych` + colors.reset);
|
||
}
|
||
} else {
|
||
logEntry.steps.push({ step: 'Google', status: 'not_found' });
|
||
console.log(colors.yellow + ` ⚠️ Nie znaleziono strony WWW` + colors.reset);
|
||
}
|
||
|
||
// Oceń jakość danych
|
||
companyData.dataQuality = this.assessDataQuality(companyData);
|
||
companyData.lastUpdated = new Date().toISOString().split('T')[0];
|
||
companyData.status = companyData.dataQuality === 'complete' ? 'complete' : 'partial';
|
||
|
||
// Zapisz dane firmy
|
||
this.saveCompanyData(companyData);
|
||
|
||
// Aktualizuj statystyki
|
||
if (companyData.dataQuality === 'complete') {
|
||
this.stats.complete++;
|
||
console.log(colors.green + `✅ Pełne dane zebrane` + colors.reset);
|
||
} else {
|
||
this.stats.partial++;
|
||
console.log(colors.yellow + `⚠️ Częściowe dane (${companyData.dataQuality})` + colors.reset);
|
||
}
|
||
|
||
logEntry.success = true;
|
||
|
||
} catch (error) {
|
||
this.stats.failed++;
|
||
logEntry.error = error.message;
|
||
console.error(colors.red + `❌ Błąd: ${error.message}` + colors.reset);
|
||
}
|
||
|
||
this.log.push(logEntry);
|
||
}
|
||
|
||
/**
|
||
* Wyszukiwanie firmy w ALEO.com
|
||
* UWAGA: To jest symulacja - wymaga rzeczywistego web scrapingu lub API
|
||
*/
|
||
async searchAleo(companyName) {
|
||
// TODO: Implementacja rzeczywistego wyszukiwania w ALEO.com
|
||
// Możliwe podejścia:
|
||
// 1. Web scraping z użyciem puppeteer/playwright
|
||
// 2. API jeśli dostępne
|
||
// 3. Ręczne wyszukiwanie i import danych
|
||
|
||
console.log(colors.yellow + ` ℹ️ ALEO search: "${companyName}" (symulacja)` + colors.reset);
|
||
|
||
// Symulacja - zwróć null (nie znaleziono)
|
||
// W rzeczywistej implementacji tutaj byłby kod scrapingu
|
||
return null;
|
||
}
|
||
|
||
/**
|
||
* Znajdowanie strony WWW firmy przez Google
|
||
* UWAGA: To jest symulacja - wymaga Google Custom Search API lub scraping
|
||
*/
|
||
async findWebsite(companyName) {
|
||
// TODO: Implementacja Google Custom Search API lub scraping
|
||
// https://developers.google.com/custom-search/v1/overview
|
||
|
||
console.log(colors.yellow + ` ℹ️ Google search: "${companyName} Wejherowo" (symulacja)` + colors.reset);
|
||
|
||
// Symulacja - zwróć null
|
||
return null;
|
||
}
|
||
|
||
/**
|
||
* Scraping danych ze strony WWW firmy
|
||
* UWAGA: To jest symulacja - wymaga puppeteer/playwright lub cheerio
|
||
*/
|
||
async scrapeWebsite(url) {
|
||
// TODO: Implementacja web scrapingu
|
||
// Możliwe narzędzia:
|
||
// 1. puppeteer - dla stron z JavaScriptem
|
||
// 2. cheerio - dla statycznych stron HTML
|
||
// 3. playwright - bardziej zaawansowane
|
||
|
||
console.log(colors.yellow + ` ℹ️ Scraping: ${url} (symulacja)` + colors.reset);
|
||
|
||
// Symulacja - zwróć null
|
||
return null;
|
||
}
|
||
|
||
/**
|
||
* Merge danych ze strony WWW do obiektu firmy
|
||
*/
|
||
mergeWebsiteData(companyData, websiteData) {
|
||
if (websiteData.email) companyData.email = websiteData.email;
|
||
if (websiteData.phone) companyData.phone = websiteData.phone;
|
||
if (websiteData.address) companyData.address = websiteData.address;
|
||
if (websiteData.services) companyData.services = websiteData.services;
|
||
if (websiteData.competencies) companyData.competencies = websiteData.competencies;
|
||
if (websiteData.description) companyData.description.full = websiteData.description;
|
||
if (websiteData.social) companyData.social = { ...companyData.social, ...websiteData.social };
|
||
}
|
||
|
||
/**
|
||
* Oceń jakość zebranych danych
|
||
*/
|
||
assessDataQuality(company) {
|
||
const requiredFields = ['nip', 'website', 'email', 'phone'];
|
||
const highPriorityFields = ['regon', 'krs', 'address', 'services'];
|
||
|
||
const hasRequired = requiredFields.every(field => company[field]);
|
||
const hasHighPriority = highPriorityFields.filter(field => {
|
||
if (field === 'address') return company.address && company.address.full;
|
||
if (field === 'services') return company.services && company.services.length > 0;
|
||
return company[field];
|
||
}).length;
|
||
|
||
if (hasRequired && hasHighPriority >= 3) return 'complete';
|
||
if (hasRequired || hasHighPriority >= 2) return 'partial';
|
||
return 'basic';
|
||
}
|
||
|
||
/**
|
||
* Utwórz pustą strukturę danych firmy
|
||
*/
|
||
createEmptyCompanyStructure() {
|
||
return {
|
||
legalName: null,
|
||
nip: null,
|
||
regon: null,
|
||
krs: null,
|
||
website: null,
|
||
email: null,
|
||
phone: null,
|
||
address: {
|
||
street: null,
|
||
city: null,
|
||
postal: null,
|
||
full: null
|
||
},
|
||
services: [],
|
||
competencies: [],
|
||
social: {
|
||
facebook: null,
|
||
linkedin: null,
|
||
instagram: null
|
||
},
|
||
financialData: {},
|
||
yearEstablished: null,
|
||
certifications: [],
|
||
awards: [],
|
||
status: 'pending',
|
||
dataQuality: 'basic',
|
||
lastUpdated: null,
|
||
sources: []
|
||
};
|
||
}
|
||
|
||
/**
|
||
* Wczytaj listę firm z companies-basic.json
|
||
*/
|
||
loadCompanies() {
|
||
try {
|
||
const data = fs.readFileSync(CONFIG.companiesBasicPath, 'utf8');
|
||
const json = JSON.parse(data);
|
||
return json.companies;
|
||
} catch (error) {
|
||
console.error(colors.red + '❌ Nie można wczytać companies-basic.json: ' + error.message + colors.reset);
|
||
throw error;
|
||
}
|
||
}
|
||
|
||
/**
|
||
* Wczytaj pełne dane firm
|
||
*/
|
||
loadFullData() {
|
||
try {
|
||
const data = fs.readFileSync(CONFIG.companiesFullPath, 'utf8');
|
||
return JSON.parse(data);
|
||
} catch (error) {
|
||
// Jeśli plik nie istnieje, utwórz nową strukturę
|
||
return {
|
||
meta: {
|
||
total: 0,
|
||
researched: 0,
|
||
complete: 0,
|
||
partial: 0,
|
||
pending: 0,
|
||
lastUpdated: new Date().toISOString().split('T')[0],
|
||
version: '2.0'
|
||
},
|
||
companies: []
|
||
};
|
||
}
|
||
}
|
||
|
||
/**
|
||
* Zapisz dane firmy do companies-full.json
|
||
*/
|
||
saveCompanyData(companyData) {
|
||
try {
|
||
const fullData = this.loadFullData();
|
||
|
||
// Sprawdź czy firma już istnieje
|
||
const existingIndex = fullData.companies.findIndex(c => c.id === companyData.id);
|
||
if (existingIndex >= 0) {
|
||
fullData.companies[existingIndex] = companyData;
|
||
} else {
|
||
fullData.companies.push(companyData);
|
||
}
|
||
|
||
// Aktualizuj metadane
|
||
fullData.meta.total = fullData.companies.length;
|
||
fullData.meta.researched = fullData.companies.length;
|
||
fullData.meta.complete = fullData.companies.filter(c => c.dataQuality === 'complete').length;
|
||
fullData.meta.partial = fullData.companies.filter(c => c.dataQuality === 'partial').length;
|
||
fullData.meta.pending = this.stats.total - fullData.meta.researched;
|
||
fullData.meta.lastUpdated = new Date().toISOString().split('T')[0];
|
||
|
||
// Zapisz do pliku
|
||
fs.writeFileSync(CONFIG.companiesFullPath, JSON.stringify(fullData, null, 2));
|
||
|
||
} catch (error) {
|
||
console.error(colors.red + '❌ Błąd zapisu danych: ' + error.message + colors.reset);
|
||
}
|
||
}
|
||
|
||
/**
|
||
* Wygeneruj raport końcowy
|
||
*/
|
||
generateReport() {
|
||
const duration = (this.stats.endTime - this.stats.startTime) / 1000 / 60; // minuty
|
||
|
||
console.log(colors.cyan + '\n\n' + '='.repeat(60) + colors.reset);
|
||
console.log(colors.cyan + '📊 RAPORT KOŃCOWY' + colors.reset);
|
||
console.log(colors.cyan + '='.repeat(60) + '\n' + colors.reset);
|
||
|
||
console.log(colors.blue + `Przetworzono: ${this.stats.processed}/${this.stats.total} firm` + colors.reset);
|
||
console.log(colors.green + `Pełne dane: ${this.stats.complete} (${Math.round(this.stats.complete/this.stats.processed*100)}%)` + colors.reset);
|
||
console.log(colors.yellow + `Częściowe dane: ${this.stats.partial} (${Math.round(this.stats.partial/this.stats.processed*100)}%)` + colors.reset);
|
||
console.log(colors.red + `Niepowodzenia: ${this.stats.failed} (${Math.round(this.stats.failed/this.stats.processed*100)}%)` + colors.reset);
|
||
console.log(colors.blue + `\nCzas działania: ${duration.toFixed(2)} minut` + colors.reset);
|
||
console.log(colors.blue + `Średnio na firmę: ${(duration/this.stats.processed).toFixed(2)} min` + colors.reset);
|
||
|
||
console.log(colors.cyan + '\n' + '='.repeat(60) + '\n' + colors.reset);
|
||
}
|
||
|
||
/**
|
||
* Zapisz log działania agenta
|
||
*/
|
||
saveLog() {
|
||
try {
|
||
const logContent = this.generateLogContent();
|
||
fs.writeFileSync(CONFIG.collectionLogPath, logContent);
|
||
console.log(colors.green + `✅ Log zapisany: ${CONFIG.collectionLogPath}` + colors.reset);
|
||
} catch (error) {
|
||
console.error(colors.red + '❌ Błąd zapisu logu: ' + error.message + colors.reset);
|
||
}
|
||
}
|
||
|
||
/**
|
||
* Wygeneruj zawartość logu
|
||
*/
|
||
generateLogContent() {
|
||
let content = `# Collection Log - Norda Biznes Companies\n\n`;
|
||
content += `**Data:** ${new Date().toISOString()}\n`;
|
||
content += `**Przetworzono:** ${this.stats.processed}/${this.stats.total} firm\n`;
|
||
content += `**Pełne dane:** ${this.stats.complete}\n`;
|
||
content += `**Częściowe dane:** ${this.stats.partial}\n`;
|
||
content += `**Niepowodzenia:** ${this.stats.failed}\n\n`;
|
||
content += `---\n\n`;
|
||
|
||
content += `## Szczegóły przetwarzania\n\n`;
|
||
|
||
for (const entry of this.log) {
|
||
content += `### ${entry.company}\n`;
|
||
content += `- **Czas:** ${entry.timestamp}\n`;
|
||
content += `- **Sukces:** ${entry.success ? '✅' : '❌'}\n`;
|
||
content += `- **Kroki:**\n`;
|
||
|
||
for (const step of entry.steps) {
|
||
const status = step.status === 'success' ? '✅' : step.status === 'not_found' ? '⚠️' : '❌';
|
||
content += ` - ${status} ${step.step}: ${step.data || step.status}\n`;
|
||
}
|
||
|
||
if (entry.error) {
|
||
content += `- **Błąd:** ${entry.error}\n`;
|
||
}
|
||
|
||
content += `\n`;
|
||
}
|
||
|
||
return content;
|
||
}
|
||
|
||
/**
|
||
* Aktualizuj data-sources.md
|
||
*/
|
||
updateDataSources() {
|
||
// TODO: Implementacja aktualizacji data-sources.md z nowymi statystykami
|
||
console.log(colors.yellow + '⚠️ data-sources.md: aktualizacja ręczna wymagana' + colors.reset);
|
||
}
|
||
|
||
/**
|
||
* Opóźnienie (delay)
|
||
*/
|
||
delay(ms) {
|
||
return new Promise(resolve => setTimeout(resolve, ms));
|
||
}
|
||
}
|
||
|
||
// CLI Interface
|
||
if (require.main === module) {
|
||
const args = process.argv.slice(2);
|
||
const options = {
|
||
limit: null,
|
||
skipProcessed: false
|
||
};
|
||
|
||
// Parse argumentów
|
||
for (let i = 0; i < args.length; i++) {
|
||
if (args[i] === '--limit' && args[i + 1]) {
|
||
options.limit = parseInt(args[i + 1]);
|
||
i++;
|
||
} else if (args[i] === '--skip-processed') {
|
||
options.skipProcessed = true;
|
||
} else if (args[i] === '--help') {
|
||
console.log(`
|
||
Norda Biznes - Company Data Collector Agent
|
||
|
||
Usage: node company-data-collector.js [options]
|
||
|
||
Options:
|
||
--limit N Przetwórz tylko pierwsze N firm
|
||
--skip-processed Pomiń firmy już przetworzone
|
||
--help Pokaż tę pomoc
|
||
|
||
Examples:
|
||
node company-data-collector.js
|
||
node company-data-collector.js --limit 5
|
||
node company-data-collector.js --skip-processed --limit 10
|
||
`);
|
||
process.exit(0);
|
||
}
|
||
}
|
||
|
||
// Uruchom agenta
|
||
const agent = new CompanyDataCollector();
|
||
agent.run(options).catch(error => {
|
||
console.error('Fatal error:', error);
|
||
process.exit(1);
|
||
});
|
||
}
|
||
|
||
module.exports = CompanyDataCollector;
|