nordabiz/database/migrate-json-to-pg.js
2026-01-01 14:01:49 +01:00

608 lines
18 KiB
JavaScript
Raw Blame History

This file contains invisible Unicode characters

This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env node
/**
* Norda Biznes - JSON to PostgreSQL Migration Script
*
* Migrates company data from JSON files to PostgreSQL database
*
* Usage:
* node database/migrate-json-to-pg.js [options]
*
* Options:
* --dry-run Show what would be migrated without actually doing it
* --verbose Show detailed progress
*/
const fs = require('fs');
const path = require('path');
// PostgreSQL client (will be required later)
let pg;
try {
pg = require('pg');
} catch (e) {
console.error('❌ PostgreSQL client not installed. Run: npm install pg');
process.exit(1);
}
const { Pool } = pg;
// ============================================================
// CONFIGURATION
// ============================================================
const CONFIG = {
// Database connection (update these for NORDABIZ-01)
db: {
host: process.env.DB_HOST || 'localhost',
port: process.env.DB_PORT || 5432,
database: process.env.DB_NAME || 'nordabiz',
user: process.env.DB_USER || 'nordabiz_app',
password: process.env.DB_PASSWORD || 'your_password_here'
},
// File paths
files: {
companiesBasic: path.join(__dirname, '../data/companies-basic.json'),
companiesFull: path.join(__dirname, '../data/companies-full.json'),
dataSources: path.join(__dirname, '../data/data-sources.md')
}
};
// ============================================================
// HELPER FUNCTIONS
// ============================================================
/**
* Create a URL-friendly slug from company name
*/
function slugify(text) {
return text
.toString()
.toLowerCase()
.trim()
.replace(/\s+/g, '-') // Replace spaces with -
.replace(/[^\w\-]+/g, '') // Remove all non-word chars
.replace(/\-\-+/g, '-') // Replace multiple - with single -
.replace(/^-+/, '') // Trim - from start of text
.replace(/-+$/, ''); // Trim - from end of text
}
/**
* Map category name to category ID
*/
const CATEGORY_MAP = {
'IT': 1,
'Produkcja': 2,
'Handel': 3,
'Usługi': 4,
'Budownictwo': 5,
'Doradztwo': 6,
'Media': 7,
'Finanse': 8,
'Inne': 9
};
/**
* Get category ID by name
*/
function getCategoryId(categoryName) {
return CATEGORY_MAP[categoryName] || 9; // Default to "Inne"
}
/**
* Map data quality from JSON to DB enum
*/
function mapDataQuality(quality) {
const map = {
'basic': 'basic',
'partial': 'partial',
'complete': 'complete',
'verified': 'verified'
};
return map[quality] || 'basic';
}
/**
* Map status from JSON to DB enum
*/
function mapStatus(status) {
const map = {
'pending': 'pending',
'pending_research': 'pending',
'partial': 'active',
'complete': 'active',
'active': 'active',
'inactive': 'inactive',
'archived': 'archived'
};
return map[status] || 'active';
}
// ============================================================
// DATABASE OPERATIONS
// ============================================================
class DatabaseMigrator {
constructor(config) {
this.config = config;
this.pool = null;
this.stats = {
companies: { inserted: 0, updated: 0, failed: 0 },
services: { inserted: 0, linked: 0 },
competencies: { inserted: 0, linked: 0 },
certifications: { inserted: 0 },
awards: { inserted: 0 },
financialData: { inserted: 0 },
socialMedia: { inserted: 0 }
};
}
/**
* Connect to database
*/
async connect() {
console.log('\n🔌 Łączenie z bazą danych...');
console.log(` Host: ${this.config.db.host}:${this.config.db.port}`);
console.log(` Database: ${this.config.db.database}`);
this.pool = new Pool(this.config.db);
try {
const client = await this.pool.connect();
const result = await client.query('SELECT NOW()');
client.release();
console.log(` ✅ Połączono: ${result.rows[0].now}`);
return true;
} catch (error) {
console.error(` ❌ Błąd połączenia: ${error.message}`);
throw error;
}
}
/**
* Disconnect from database
*/
async disconnect() {
if (this.pool) {
await this.pool.end();
console.log('\n🔌 Rozłączono z bazą danych');
}
}
/**
* Insert or update company
*/
async upsertCompany(companyData) {
const slug = slugify(companyData.name);
const categoryId = getCategoryId(companyData.category);
const dataQuality = mapDataQuality(companyData.dataQuality);
const status = mapStatus(companyData.status);
const query = `
INSERT INTO companies (
name, legal_name, slug, category_id,
description_short, description_full,
nip, regon, krs,
website, email, phone,
address_street, address_city, address_postal, address_full,
year_established, employees_count, capital_amount,
status, data_quality,
researched_at, last_updated
) VALUES (
$1, $2, $3, $4,
$5, $6,
$7, $8, $9,
$10, $11, $12,
$13, $14, $15, $16,
$17, $18, $19,
$20::company_status, $21::data_quality_level,
$22, $23
)
ON CONFLICT (slug) DO UPDATE SET
legal_name = EXCLUDED.legal_name,
category_id = EXCLUDED.category_id,
description_short = EXCLUDED.description_short,
description_full = EXCLUDED.description_full,
nip = COALESCE(EXCLUDED.nip, companies.nip),
regon = COALESCE(EXCLUDED.regon, companies.regon),
krs = COALESCE(EXCLUDED.krs, companies.krs),
website = COALESCE(EXCLUDED.website, companies.website),
email = COALESCE(EXCLUDED.email, companies.email),
phone = COALESCE(EXCLUDED.phone, companies.phone),
address_street = COALESCE(EXCLUDED.address_street, companies.address_street),
address_city = COALESCE(EXCLUDED.address_city, companies.address_city),
address_postal = COALESCE(EXCLUDED.address_postal, companies.address_postal),
address_full = COALESCE(EXCLUDED.address_full, companies.address_full),
year_established = COALESCE(EXCLUDED.year_established, companies.year_established),
employees_count = COALESCE(EXCLUDED.employees_count, companies.employees_count),
capital_amount = COALESCE(EXCLUDED.capital_amount, companies.capital_amount),
status = EXCLUDED.status,
data_quality = EXCLUDED.data_quality,
last_updated = EXCLUDED.last_updated
RETURNING id, name, (xmax = 0) AS inserted
`;
const values = [
companyData.name,
companyData.legalName || null,
slug,
categoryId,
companyData.description?.short || null,
companyData.description?.full || null,
companyData.nip || null,
companyData.regon || null,
companyData.krs || null,
companyData.website || null,
companyData.email || null,
companyData.phone || null,
companyData.address?.street || null,
companyData.address?.city || null,
companyData.address?.postal || null,
companyData.address?.full || null,
companyData.yearEstablished || null,
companyData.employeesCount || null,
companyData.capitalAmount || null,
status,
dataQuality,
companyData.lastUpdated || new Date().toISOString(),
new Date().toISOString()
];
try {
const result = await this.pool.query(query, values);
const row = result.rows[0];
if (row.inserted) {
this.stats.companies.inserted++;
console.log(` ✅ Dodano: ${row.name} (ID: ${row.id})`);
} else {
this.stats.companies.updated++;
console.log(` 🔄 Zaktualizowano: ${row.name} (ID: ${row.id})`);
}
return row.id;
} catch (error) {
this.stats.companies.failed++;
console.error(` ❌ Błąd dla ${companyData.name}: ${error.message}`);
return null;
}
}
/**
* Insert services for company
*/
async insertServices(companyId, services) {
if (!services || services.length === 0) return;
for (const serviceName of services) {
try {
// Insert service if not exists
const serviceResult = await this.pool.query(
`INSERT INTO services (name, slug)
VALUES ($1, $2)
ON CONFLICT (slug) DO UPDATE SET name = EXCLUDED.name
RETURNING id`,
[serviceName, slugify(serviceName)]
);
const serviceId = serviceResult.rows[0].id;
// Link to company
await this.pool.query(
`INSERT INTO company_services (company_id, service_id)
VALUES ($1, $2)
ON CONFLICT DO NOTHING`,
[companyId, serviceId]
);
this.stats.services.linked++;
} catch (error) {
console.error(` ⚠️ Service "${serviceName}": ${error.message}`);
}
}
}
/**
* Insert competencies for company
*/
async insertCompetencies(companyId, competencies) {
if (!competencies || competencies.length === 0) return;
for (const competencyName of competencies) {
try {
// Insert competency if not exists
const competencyResult = await this.pool.query(
`INSERT INTO competencies (name, slug)
VALUES ($1, $2)
ON CONFLICT (slug) DO UPDATE SET name = EXCLUDED.name
RETURNING id`,
[competencyName, slugify(competencyName)]
);
const competencyId = competencyResult.rows[0].id;
// Link to company
await this.pool.query(
`INSERT INTO company_competencies (company_id, competency_id)
VALUES ($1, $2)
ON CONFLICT DO NOTHING`,
[companyId, competencyId]
);
this.stats.competencies.linked++;
} catch (error) {
console.error(` ⚠️ Competency "${competencyName}": ${error.message}`);
}
}
}
/**
* Insert certifications for company
*/
async insertCertifications(companyId, certifications) {
if (!certifications || certifications.length === 0) return;
for (const cert of certifications) {
try {
await this.pool.query(
`INSERT INTO certifications (company_id, name, issuer, issue_date)
VALUES ($1, $2, $3, $4)`,
[companyId, cert.name || cert, cert.issuer || null, cert.date || null]
);
this.stats.certifications.inserted++;
} catch (error) {
console.error(` ⚠️ Certification error: ${error.message}`);
}
}
}
/**
* Insert awards for company
*/
async insertAwards(companyId, awards) {
if (!awards || awards.length === 0) return;
for (const award of awards) {
try {
await this.pool.query(
`INSERT INTO awards (company_id, name, issuer, year, description)
VALUES ($1, $2, $3, $4, $5)`,
[
companyId,
award.name || award,
award.issuer || null,
award.year || null,
award.description || null
]
);
this.stats.awards.inserted++;
} catch (error) {
console.error(` ⚠️ Award error: ${error.message}`);
}
}
}
/**
* Insert financial data for company
*/
async insertFinancialData(companyId, financialData) {
if (!financialData || Object.keys(financialData).length === 0) return;
try {
await this.pool.query(
`INSERT INTO financial_data (
company_id, year, revenue, profit, assets, employees
) VALUES ($1, $2, $3, $4, $5, $6)
ON CONFLICT (company_id, year) DO UPDATE SET
revenue = EXCLUDED.revenue,
profit = EXCLUDED.profit,
assets = EXCLUDED.assets,
employees = EXCLUDED.employees`,
[
companyId,
financialData.year || new Date().getFullYear(),
financialData.revenue || null,
financialData.profit || null,
financialData.assets || null,
financialData.employees || null
]
);
this.stats.financialData.inserted++;
} catch (error) {
console.error(` ⚠️ Financial data error: ${error.message}`);
}
}
/**
* Insert social media for company
*/
async insertSocialMedia(companyId, social) {
if (!social) return;
const platforms = ['facebook', 'linkedin', 'instagram', 'twitter'];
for (const platform of platforms) {
if (social[platform]) {
try {
await this.pool.query(
`INSERT INTO social_media (company_id, platform, url)
VALUES ($1, $2, $3)
ON CONFLICT (company_id, platform) DO UPDATE SET url = EXCLUDED.url`,
[companyId, platform, social[platform]]
);
this.stats.socialMedia.inserted++;
} catch (error) {
console.error(` ⚠️ Social media ${platform} error: ${error.message}`);
}
}
}
}
/**
* Link data sources to company
*/
async linkDataSources(companyId, sources) {
if (!sources || sources.length === 0) return;
for (const sourceName of sources) {
try {
// Get source ID
const sourceResult = await this.pool.query(
'SELECT id FROM data_sources WHERE name ILIKE $1 OR url ILIKE $1',
[`%${sourceName}%`]
);
if (sourceResult.rows.length > 0) {
const sourceId = sourceResult.rows[0].id;
await this.pool.query(
`INSERT INTO company_data_sources (company_id, source_id)
VALUES ($1, $2)
ON CONFLICT DO NOTHING`,
[companyId, sourceId]
);
}
} catch (error) {
console.error(` ⚠️ Source link error: ${error.message}`);
}
}
}
/**
* Migrate all companies from JSON
*/
async migrateCompanies(companiesData) {
console.log(`\n📦 Migracja ${companiesData.length} firm...`);
for (const company of companiesData) {
console.log(`\n[${company.id}] ${company.name}`);
// Insert main company record
const companyId = await this.upsertCompany(company);
if (companyId) {
// Insert related data
await this.insertServices(companyId, company.services);
await this.insertCompetencies(companyId, company.competencies);
await this.insertCertifications(companyId, company.certifications);
await this.insertAwards(companyId, company.awards);
await this.insertFinancialData(companyId, company.financialData);
await this.insertSocialMedia(companyId, company.social);
await this.linkDataSources(companyId, company.sources);
}
}
}
/**
* Print migration statistics
*/
printStats() {
console.log('\n' + '='.repeat(60));
console.log('📊 RAPORT MIGRACJI');
console.log('='.repeat(60));
console.log(`\n📋 Firmy:`);
console.log(` ✅ Dodane: ${this.stats.companies.inserted}`);
console.log(` 🔄 Zaktualizowane: ${this.stats.companies.updated}`);
console.log(` ❌ Błędy: ${this.stats.companies.failed}`);
console.log(`\n🔧 Usługi:`);
console.log(` 🔗 Połączone: ${this.stats.services.linked}`);
console.log(`\n💡 Kompetencje:`);
console.log(` 🔗 Połączone: ${this.stats.competencies.linked}`);
if (this.stats.certifications.inserted > 0) {
console.log(`\n🏆 Certyfikaty: ${this.stats.certifications.inserted}`);
}
if (this.stats.awards.inserted > 0) {
console.log(`🥇 Nagrody: ${this.stats.awards.inserted}`);
}
if (this.stats.financialData.inserted > 0) {
console.log(`💰 Dane finansowe: ${this.stats.financialData.inserted}`);
}
if (this.stats.socialMedia.inserted > 0) {
console.log(`📱 Social media: ${this.stats.socialMedia.inserted}`);
}
console.log('\n' + '='.repeat(60));
}
}
// ============================================================
// MAIN EXECUTION
// ============================================================
async function main() {
console.log('🤖 Norda Biznes - JSON → PostgreSQL Migration');
console.log('='.repeat(60));
// Parse arguments
const args = process.argv.slice(2);
const dryRun = args.includes('--dry-run');
const verbose = args.includes('--verbose');
if (dryRun) {
console.log('⚠️ DRY RUN MODE - Nie będzie zmian w bazie danych\n');
}
// Load JSON data
console.log('\n📂 Wczytywanie danych JSON...');
let companiesData;
try {
const fullData = JSON.parse(fs.readFileSync(CONFIG.files.companiesFull, 'utf8'));
companiesData = fullData.companies || [];
console.log(` ✅ Załadowano ${companiesData.length} firm z companies-full.json`);
} catch (error) {
console.error(` ❌ Błąd wczytywania: ${error.message}`);
process.exit(1);
}
if (companiesData.length === 0) {
console.log('\n⚠ Brak danych do migracji!');
process.exit(0);
}
if (dryRun) {
console.log('\n📋 Firmy do migracji:');
companiesData.forEach((c, i) => {
console.log(` ${i + 1}. ${c.name} (${c.category}) - ${c.dataQuality}`);
});
console.log('\n✅ Dry run zakończony');
return;
}
// Migrate to database
const migrator = new DatabaseMigrator(CONFIG);
try {
await migrator.connect();
await migrator.migrateCompanies(companiesData);
migrator.printStats();
} catch (error) {
console.error(`\n❌ Błąd migracji: ${error.message}`);
if (verbose) {
console.error(error.stack);
}
process.exit(1);
} finally {
await migrator.disconnect();
}
console.log('\n✅ Migracja zakończona pomyślnie!');
}
// Run if called directly
if (require.main === module) {
main().catch(error => {
console.error('Fatal error:', error);
process.exit(1);
});
}
module.exports = { DatabaseMigrator, CONFIG };