608 lines
18 KiB
JavaScript
608 lines
18 KiB
JavaScript
#!/usr/bin/env node
|
||
|
||
/**
|
||
* Norda Biznes - JSON to PostgreSQL Migration Script
|
||
*
|
||
* Migrates company data from JSON files to PostgreSQL database
|
||
*
|
||
* Usage:
|
||
* node database/migrate-json-to-pg.js [options]
|
||
*
|
||
* Options:
|
||
* --dry-run Show what would be migrated without actually doing it
|
||
* --verbose Show detailed progress
|
||
*/
|
||
|
||
const fs = require('fs');
|
||
const path = require('path');
|
||
|
||
// PostgreSQL client (will be required later)
|
||
let pg;
|
||
try {
|
||
pg = require('pg');
|
||
} catch (e) {
|
||
console.error('❌ PostgreSQL client not installed. Run: npm install pg');
|
||
process.exit(1);
|
||
}
|
||
|
||
const { Pool } = pg;
|
||
|
||
// ============================================================
|
||
// CONFIGURATION
|
||
// ============================================================
|
||
|
||
const CONFIG = {
|
||
// Database connection (update these for NORDABIZ-01)
|
||
db: {
|
||
host: process.env.DB_HOST || 'localhost',
|
||
port: process.env.DB_PORT || 5432,
|
||
database: process.env.DB_NAME || 'nordabiz',
|
||
user: process.env.DB_USER || 'nordabiz_app',
|
||
password: process.env.DB_PASSWORD || 'your_password_here'
|
||
},
|
||
|
||
// File paths
|
||
files: {
|
||
companiesBasic: path.join(__dirname, '../data/companies-basic.json'),
|
||
companiesFull: path.join(__dirname, '../data/companies-full.json'),
|
||
dataSources: path.join(__dirname, '../data/data-sources.md')
|
||
}
|
||
};
|
||
|
||
// ============================================================
|
||
// HELPER FUNCTIONS
|
||
// ============================================================
|
||
|
||
/**
|
||
* Create a URL-friendly slug from company name
|
||
*/
|
||
function slugify(text) {
|
||
return text
|
||
.toString()
|
||
.toLowerCase()
|
||
.trim()
|
||
.replace(/\s+/g, '-') // Replace spaces with -
|
||
.replace(/[^\w\-]+/g, '') // Remove all non-word chars
|
||
.replace(/\-\-+/g, '-') // Replace multiple - with single -
|
||
.replace(/^-+/, '') // Trim - from start of text
|
||
.replace(/-+$/, ''); // Trim - from end of text
|
||
}
|
||
|
||
/**
|
||
* Map category name to category ID
|
||
*/
|
||
const CATEGORY_MAP = {
|
||
'IT': 1,
|
||
'Produkcja': 2,
|
||
'Handel': 3,
|
||
'Usługi': 4,
|
||
'Budownictwo': 5,
|
||
'Doradztwo': 6,
|
||
'Media': 7,
|
||
'Finanse': 8,
|
||
'Inne': 9
|
||
};
|
||
|
||
/**
|
||
* Get category ID by name
|
||
*/
|
||
function getCategoryId(categoryName) {
|
||
return CATEGORY_MAP[categoryName] || 9; // Default to "Inne"
|
||
}
|
||
|
||
/**
|
||
* Map data quality from JSON to DB enum
|
||
*/
|
||
function mapDataQuality(quality) {
|
||
const map = {
|
||
'basic': 'basic',
|
||
'partial': 'partial',
|
||
'complete': 'complete',
|
||
'verified': 'verified'
|
||
};
|
||
return map[quality] || 'basic';
|
||
}
|
||
|
||
/**
|
||
* Map status from JSON to DB enum
|
||
*/
|
||
function mapStatus(status) {
|
||
const map = {
|
||
'pending': 'pending',
|
||
'pending_research': 'pending',
|
||
'partial': 'active',
|
||
'complete': 'active',
|
||
'active': 'active',
|
||
'inactive': 'inactive',
|
||
'archived': 'archived'
|
||
};
|
||
return map[status] || 'active';
|
||
}
|
||
|
||
// ============================================================
|
||
// DATABASE OPERATIONS
|
||
// ============================================================
|
||
|
||
class DatabaseMigrator {
|
||
constructor(config) {
|
||
this.config = config;
|
||
this.pool = null;
|
||
this.stats = {
|
||
companies: { inserted: 0, updated: 0, failed: 0 },
|
||
services: { inserted: 0, linked: 0 },
|
||
competencies: { inserted: 0, linked: 0 },
|
||
certifications: { inserted: 0 },
|
||
awards: { inserted: 0 },
|
||
financialData: { inserted: 0 },
|
||
socialMedia: { inserted: 0 }
|
||
};
|
||
}
|
||
|
||
/**
|
||
* Connect to database
|
||
*/
|
||
async connect() {
|
||
console.log('\n🔌 Łączenie z bazą danych...');
|
||
console.log(` Host: ${this.config.db.host}:${this.config.db.port}`);
|
||
console.log(` Database: ${this.config.db.database}`);
|
||
|
||
this.pool = new Pool(this.config.db);
|
||
|
||
try {
|
||
const client = await this.pool.connect();
|
||
const result = await client.query('SELECT NOW()');
|
||
client.release();
|
||
console.log(` ✅ Połączono: ${result.rows[0].now}`);
|
||
return true;
|
||
} catch (error) {
|
||
console.error(` ❌ Błąd połączenia: ${error.message}`);
|
||
throw error;
|
||
}
|
||
}
|
||
|
||
/**
|
||
* Disconnect from database
|
||
*/
|
||
async disconnect() {
|
||
if (this.pool) {
|
||
await this.pool.end();
|
||
console.log('\n🔌 Rozłączono z bazą danych');
|
||
}
|
||
}
|
||
|
||
/**
|
||
* Insert or update company
|
||
*/
|
||
async upsertCompany(companyData) {
|
||
const slug = slugify(companyData.name);
|
||
const categoryId = getCategoryId(companyData.category);
|
||
const dataQuality = mapDataQuality(companyData.dataQuality);
|
||
const status = mapStatus(companyData.status);
|
||
|
||
const query = `
|
||
INSERT INTO companies (
|
||
name, legal_name, slug, category_id,
|
||
description_short, description_full,
|
||
nip, regon, krs,
|
||
website, email, phone,
|
||
address_street, address_city, address_postal, address_full,
|
||
year_established, employees_count, capital_amount,
|
||
status, data_quality,
|
||
researched_at, last_updated
|
||
) VALUES (
|
||
$1, $2, $3, $4,
|
||
$5, $6,
|
||
$7, $8, $9,
|
||
$10, $11, $12,
|
||
$13, $14, $15, $16,
|
||
$17, $18, $19,
|
||
$20::company_status, $21::data_quality_level,
|
||
$22, $23
|
||
)
|
||
ON CONFLICT (slug) DO UPDATE SET
|
||
legal_name = EXCLUDED.legal_name,
|
||
category_id = EXCLUDED.category_id,
|
||
description_short = EXCLUDED.description_short,
|
||
description_full = EXCLUDED.description_full,
|
||
nip = COALESCE(EXCLUDED.nip, companies.nip),
|
||
regon = COALESCE(EXCLUDED.regon, companies.regon),
|
||
krs = COALESCE(EXCLUDED.krs, companies.krs),
|
||
website = COALESCE(EXCLUDED.website, companies.website),
|
||
email = COALESCE(EXCLUDED.email, companies.email),
|
||
phone = COALESCE(EXCLUDED.phone, companies.phone),
|
||
address_street = COALESCE(EXCLUDED.address_street, companies.address_street),
|
||
address_city = COALESCE(EXCLUDED.address_city, companies.address_city),
|
||
address_postal = COALESCE(EXCLUDED.address_postal, companies.address_postal),
|
||
address_full = COALESCE(EXCLUDED.address_full, companies.address_full),
|
||
year_established = COALESCE(EXCLUDED.year_established, companies.year_established),
|
||
employees_count = COALESCE(EXCLUDED.employees_count, companies.employees_count),
|
||
capital_amount = COALESCE(EXCLUDED.capital_amount, companies.capital_amount),
|
||
status = EXCLUDED.status,
|
||
data_quality = EXCLUDED.data_quality,
|
||
last_updated = EXCLUDED.last_updated
|
||
RETURNING id, name, (xmax = 0) AS inserted
|
||
`;
|
||
|
||
const values = [
|
||
companyData.name,
|
||
companyData.legalName || null,
|
||
slug,
|
||
categoryId,
|
||
companyData.description?.short || null,
|
||
companyData.description?.full || null,
|
||
companyData.nip || null,
|
||
companyData.regon || null,
|
||
companyData.krs || null,
|
||
companyData.website || null,
|
||
companyData.email || null,
|
||
companyData.phone || null,
|
||
companyData.address?.street || null,
|
||
companyData.address?.city || null,
|
||
companyData.address?.postal || null,
|
||
companyData.address?.full || null,
|
||
companyData.yearEstablished || null,
|
||
companyData.employeesCount || null,
|
||
companyData.capitalAmount || null,
|
||
status,
|
||
dataQuality,
|
||
companyData.lastUpdated || new Date().toISOString(),
|
||
new Date().toISOString()
|
||
];
|
||
|
||
try {
|
||
const result = await this.pool.query(query, values);
|
||
const row = result.rows[0];
|
||
|
||
if (row.inserted) {
|
||
this.stats.companies.inserted++;
|
||
console.log(` ✅ Dodano: ${row.name} (ID: ${row.id})`);
|
||
} else {
|
||
this.stats.companies.updated++;
|
||
console.log(` 🔄 Zaktualizowano: ${row.name} (ID: ${row.id})`);
|
||
}
|
||
|
||
return row.id;
|
||
} catch (error) {
|
||
this.stats.companies.failed++;
|
||
console.error(` ❌ Błąd dla ${companyData.name}: ${error.message}`);
|
||
return null;
|
||
}
|
||
}
|
||
|
||
/**
|
||
* Insert services for company
|
||
*/
|
||
async insertServices(companyId, services) {
|
||
if (!services || services.length === 0) return;
|
||
|
||
for (const serviceName of services) {
|
||
try {
|
||
// Insert service if not exists
|
||
const serviceResult = await this.pool.query(
|
||
`INSERT INTO services (name, slug)
|
||
VALUES ($1, $2)
|
||
ON CONFLICT (slug) DO UPDATE SET name = EXCLUDED.name
|
||
RETURNING id`,
|
||
[serviceName, slugify(serviceName)]
|
||
);
|
||
|
||
const serviceId = serviceResult.rows[0].id;
|
||
|
||
// Link to company
|
||
await this.pool.query(
|
||
`INSERT INTO company_services (company_id, service_id)
|
||
VALUES ($1, $2)
|
||
ON CONFLICT DO NOTHING`,
|
||
[companyId, serviceId]
|
||
);
|
||
|
||
this.stats.services.linked++;
|
||
} catch (error) {
|
||
console.error(` ⚠️ Service "${serviceName}": ${error.message}`);
|
||
}
|
||
}
|
||
}
|
||
|
||
/**
|
||
* Insert competencies for company
|
||
*/
|
||
async insertCompetencies(companyId, competencies) {
|
||
if (!competencies || competencies.length === 0) return;
|
||
|
||
for (const competencyName of competencies) {
|
||
try {
|
||
// Insert competency if not exists
|
||
const competencyResult = await this.pool.query(
|
||
`INSERT INTO competencies (name, slug)
|
||
VALUES ($1, $2)
|
||
ON CONFLICT (slug) DO UPDATE SET name = EXCLUDED.name
|
||
RETURNING id`,
|
||
[competencyName, slugify(competencyName)]
|
||
);
|
||
|
||
const competencyId = competencyResult.rows[0].id;
|
||
|
||
// Link to company
|
||
await this.pool.query(
|
||
`INSERT INTO company_competencies (company_id, competency_id)
|
||
VALUES ($1, $2)
|
||
ON CONFLICT DO NOTHING`,
|
||
[companyId, competencyId]
|
||
);
|
||
|
||
this.stats.competencies.linked++;
|
||
} catch (error) {
|
||
console.error(` ⚠️ Competency "${competencyName}": ${error.message}`);
|
||
}
|
||
}
|
||
}
|
||
|
||
/**
|
||
* Insert certifications for company
|
||
*/
|
||
async insertCertifications(companyId, certifications) {
|
||
if (!certifications || certifications.length === 0) return;
|
||
|
||
for (const cert of certifications) {
|
||
try {
|
||
await this.pool.query(
|
||
`INSERT INTO certifications (company_id, name, issuer, issue_date)
|
||
VALUES ($1, $2, $3, $4)`,
|
||
[companyId, cert.name || cert, cert.issuer || null, cert.date || null]
|
||
);
|
||
this.stats.certifications.inserted++;
|
||
} catch (error) {
|
||
console.error(` ⚠️ Certification error: ${error.message}`);
|
||
}
|
||
}
|
||
}
|
||
|
||
/**
|
||
* Insert awards for company
|
||
*/
|
||
async insertAwards(companyId, awards) {
|
||
if (!awards || awards.length === 0) return;
|
||
|
||
for (const award of awards) {
|
||
try {
|
||
await this.pool.query(
|
||
`INSERT INTO awards (company_id, name, issuer, year, description)
|
||
VALUES ($1, $2, $3, $4, $5)`,
|
||
[
|
||
companyId,
|
||
award.name || award,
|
||
award.issuer || null,
|
||
award.year || null,
|
||
award.description || null
|
||
]
|
||
);
|
||
this.stats.awards.inserted++;
|
||
} catch (error) {
|
||
console.error(` ⚠️ Award error: ${error.message}`);
|
||
}
|
||
}
|
||
}
|
||
|
||
/**
|
||
* Insert financial data for company
|
||
*/
|
||
async insertFinancialData(companyId, financialData) {
|
||
if (!financialData || Object.keys(financialData).length === 0) return;
|
||
|
||
try {
|
||
await this.pool.query(
|
||
`INSERT INTO financial_data (
|
||
company_id, year, revenue, profit, assets, employees
|
||
) VALUES ($1, $2, $3, $4, $5, $6)
|
||
ON CONFLICT (company_id, year) DO UPDATE SET
|
||
revenue = EXCLUDED.revenue,
|
||
profit = EXCLUDED.profit,
|
||
assets = EXCLUDED.assets,
|
||
employees = EXCLUDED.employees`,
|
||
[
|
||
companyId,
|
||
financialData.year || new Date().getFullYear(),
|
||
financialData.revenue || null,
|
||
financialData.profit || null,
|
||
financialData.assets || null,
|
||
financialData.employees || null
|
||
]
|
||
);
|
||
this.stats.financialData.inserted++;
|
||
} catch (error) {
|
||
console.error(` ⚠️ Financial data error: ${error.message}`);
|
||
}
|
||
}
|
||
|
||
/**
|
||
* Insert social media for company
|
||
*/
|
||
async insertSocialMedia(companyId, social) {
|
||
if (!social) return;
|
||
|
||
const platforms = ['facebook', 'linkedin', 'instagram', 'twitter'];
|
||
|
||
for (const platform of platforms) {
|
||
if (social[platform]) {
|
||
try {
|
||
await this.pool.query(
|
||
`INSERT INTO social_media (company_id, platform, url)
|
||
VALUES ($1, $2, $3)
|
||
ON CONFLICT (company_id, platform) DO UPDATE SET url = EXCLUDED.url`,
|
||
[companyId, platform, social[platform]]
|
||
);
|
||
this.stats.socialMedia.inserted++;
|
||
} catch (error) {
|
||
console.error(` ⚠️ Social media ${platform} error: ${error.message}`);
|
||
}
|
||
}
|
||
}
|
||
}
|
||
|
||
/**
|
||
* Link data sources to company
|
||
*/
|
||
async linkDataSources(companyId, sources) {
|
||
if (!sources || sources.length === 0) return;
|
||
|
||
for (const sourceName of sources) {
|
||
try {
|
||
// Get source ID
|
||
const sourceResult = await this.pool.query(
|
||
'SELECT id FROM data_sources WHERE name ILIKE $1 OR url ILIKE $1',
|
||
[`%${sourceName}%`]
|
||
);
|
||
|
||
if (sourceResult.rows.length > 0) {
|
||
const sourceId = sourceResult.rows[0].id;
|
||
|
||
await this.pool.query(
|
||
`INSERT INTO company_data_sources (company_id, source_id)
|
||
VALUES ($1, $2)
|
||
ON CONFLICT DO NOTHING`,
|
||
[companyId, sourceId]
|
||
);
|
||
}
|
||
} catch (error) {
|
||
console.error(` ⚠️ Source link error: ${error.message}`);
|
||
}
|
||
}
|
||
}
|
||
|
||
/**
|
||
* Migrate all companies from JSON
|
||
*/
|
||
async migrateCompanies(companiesData) {
|
||
console.log(`\n📦 Migracja ${companiesData.length} firm...`);
|
||
|
||
for (const company of companiesData) {
|
||
console.log(`\n[${company.id}] ${company.name}`);
|
||
|
||
// Insert main company record
|
||
const companyId = await this.upsertCompany(company);
|
||
|
||
if (companyId) {
|
||
// Insert related data
|
||
await this.insertServices(companyId, company.services);
|
||
await this.insertCompetencies(companyId, company.competencies);
|
||
await this.insertCertifications(companyId, company.certifications);
|
||
await this.insertAwards(companyId, company.awards);
|
||
await this.insertFinancialData(companyId, company.financialData);
|
||
await this.insertSocialMedia(companyId, company.social);
|
||
await this.linkDataSources(companyId, company.sources);
|
||
}
|
||
}
|
||
}
|
||
|
||
/**
|
||
* Print migration statistics
|
||
*/
|
||
printStats() {
|
||
console.log('\n' + '='.repeat(60));
|
||
console.log('📊 RAPORT MIGRACJI');
|
||
console.log('='.repeat(60));
|
||
console.log(`\n📋 Firmy:`);
|
||
console.log(` ✅ Dodane: ${this.stats.companies.inserted}`);
|
||
console.log(` 🔄 Zaktualizowane: ${this.stats.companies.updated}`);
|
||
console.log(` ❌ Błędy: ${this.stats.companies.failed}`);
|
||
|
||
console.log(`\n🔧 Usługi:`);
|
||
console.log(` 🔗 Połączone: ${this.stats.services.linked}`);
|
||
|
||
console.log(`\n💡 Kompetencje:`);
|
||
console.log(` 🔗 Połączone: ${this.stats.competencies.linked}`);
|
||
|
||
if (this.stats.certifications.inserted > 0) {
|
||
console.log(`\n🏆 Certyfikaty: ${this.stats.certifications.inserted}`);
|
||
}
|
||
|
||
if (this.stats.awards.inserted > 0) {
|
||
console.log(`🥇 Nagrody: ${this.stats.awards.inserted}`);
|
||
}
|
||
|
||
if (this.stats.financialData.inserted > 0) {
|
||
console.log(`💰 Dane finansowe: ${this.stats.financialData.inserted}`);
|
||
}
|
||
|
||
if (this.stats.socialMedia.inserted > 0) {
|
||
console.log(`📱 Social media: ${this.stats.socialMedia.inserted}`);
|
||
}
|
||
|
||
console.log('\n' + '='.repeat(60));
|
||
}
|
||
}
|
||
|
||
// ============================================================
|
||
// MAIN EXECUTION
|
||
// ============================================================
|
||
|
||
async function main() {
|
||
console.log('🤖 Norda Biznes - JSON → PostgreSQL Migration');
|
||
console.log('='.repeat(60));
|
||
|
||
// Parse arguments
|
||
const args = process.argv.slice(2);
|
||
const dryRun = args.includes('--dry-run');
|
||
const verbose = args.includes('--verbose');
|
||
|
||
if (dryRun) {
|
||
console.log('⚠️ DRY RUN MODE - Nie będzie zmian w bazie danych\n');
|
||
}
|
||
|
||
// Load JSON data
|
||
console.log('\n📂 Wczytywanie danych JSON...');
|
||
|
||
let companiesData;
|
||
try {
|
||
const fullData = JSON.parse(fs.readFileSync(CONFIG.files.companiesFull, 'utf8'));
|
||
companiesData = fullData.companies || [];
|
||
console.log(` ✅ Załadowano ${companiesData.length} firm z companies-full.json`);
|
||
} catch (error) {
|
||
console.error(` ❌ Błąd wczytywania: ${error.message}`);
|
||
process.exit(1);
|
||
}
|
||
|
||
if (companiesData.length === 0) {
|
||
console.log('\n⚠️ Brak danych do migracji!');
|
||
process.exit(0);
|
||
}
|
||
|
||
if (dryRun) {
|
||
console.log('\n📋 Firmy do migracji:');
|
||
companiesData.forEach((c, i) => {
|
||
console.log(` ${i + 1}. ${c.name} (${c.category}) - ${c.dataQuality}`);
|
||
});
|
||
console.log('\n✅ Dry run zakończony');
|
||
return;
|
||
}
|
||
|
||
// Migrate to database
|
||
const migrator = new DatabaseMigrator(CONFIG);
|
||
|
||
try {
|
||
await migrator.connect();
|
||
await migrator.migrateCompanies(companiesData);
|
||
migrator.printStats();
|
||
} catch (error) {
|
||
console.error(`\n❌ Błąd migracji: ${error.message}`);
|
||
if (verbose) {
|
||
console.error(error.stack);
|
||
}
|
||
process.exit(1);
|
||
} finally {
|
||
await migrator.disconnect();
|
||
}
|
||
|
||
console.log('\n✅ Migracja zakończona pomyślnie!');
|
||
}
|
||
|
||
// Run if called directly
|
||
if (require.main === module) {
|
||
main().catch(error => {
|
||
console.error('Fatal error:', error);
|
||
process.exit(1);
|
||
});
|
||
}
|
||
|
||
module.exports = { DatabaseMigrator, CONFIG };
|