const { Pool } = require('pg'); const { execSync } = require('child_process'); const fs = require('fs'); const readline = require('readline'); require('dotenv').config(); const DATA_URL = 'https://download.geonames.org/export/dump/cities5000.zip'; const ZIP_FILE = '/tmp/cities.zip'; const TXT_FILE = '/tmp/cities5000.txt'; async function importGeoNames() { const pool = new Pool({ connectionString: process.env.DATABASE_URL || 'postgresql://line_of_sight:line_of_sight_pass@postgres:5432/line_of_sight' }); try { console.log('Downloading GeoNames cities5000 (Pop > 5000)...'); execSync(`wget -q ${DATA_URL} -O ${ZIP_FILE}`); console.log('Extracting data...'); execSync(`unzip -o ${ZIP_FILE} -d /tmp`); console.log('Connecting to database...'); const client = await pool.connect(); // Ensure table is clean await client.query('TRUNCATE TABLE cities'); console.log('Starting stream import...'); const fileStream = fs.createReadStream(TXT_FILE); const rl = readline.createInterface({ input: fileStream, crlfDelay: Infinity }); let batch = []; const batchSize = 500; let count = 0; for await (const line of rl) { const parts = line.split('\t'); if (parts.length < 15) continue; const name = parts[1]; // name const lat = parseFloat(parts[4]); const lon = parseFloat(parts[5]); const country = parts[8]; // country code const population = parseInt(parts[14]) || 0; batch.push({ name, lat, lon, country, population }); if (batch.length >= batchSize) { await insertBatch(client, batch); count += batch.length; if (count % 5000 === 0) console.log(`Imported ${count} cities...`); batch = []; } } if (batch.length > 0) { await insertBatch(client, batch); count += batch.length; } console.log(`SUCCESS: Imported ${count} cities and towns.`); client.release(); } catch (err) { console.error('ERROR during import:', err); } finally { // Cleanup if (fs.existsSync(ZIP_FILE)) fs.unlinkSync(ZIP_FILE); if (fs.existsSync(TXT_FILE)) fs.unlinkSync(TXT_FILE); await pool.end(); } } async function insertBatch(client, batch) { const queryParts = []; const values = []; batch.forEach((city, index) => { const base = index * 5; queryParts.push(`($${base + 1}, $${base + 2}, $${base + 3}, ST_SetSRID(ST_MakePoint($${base + 4}, $${base + 5}), 4326)::geography)`); values.push(city.name, city.population, city.country, city.lon, city.lat); }); await client.query( `INSERT INTO cities (name, population, country, geom) VALUES ${queryParts.join(',')}`, values ); } importGeoNames();