covid19/generate.js

599 lines
18 KiB
JavaScript
Raw Normal View History

2020-04-27 03:04:16 +00:00
#!/usr/bin/env node
const path = require('path');
const fs = require('fs');
const parseCsv = require('csv-parse/lib/sync');
const pug = require('pug');
2020-04-29 00:38:11 +00:00
const {execSync} = require('child_process');
2020-04-27 03:04:16 +00:00
const publicDir = path.join(__dirname, 'public');
const templatesDir = path.join(__dirname, 'tmpl');
2020-04-28 03:02:31 +00:00
const dataDir = path.join(__dirname, 'data');
2020-04-29 00:38:11 +00:00
const covidGitDir = path.join(__dirname, 'COVID-19');
const covidDataDir = path.join(covidGitDir, 'csse_covid_19_data');
2020-04-28 03:02:31 +00:00
const timeSeriesDir = path.join(covidDataDir, 'csse_covid_19_time_series');
2020-04-27 03:04:16 +00:00
const promiseMe = (fn) => {
return new Promise((resolve, reject) => {
fn((err, result) => {
if (err) {
reject(err);
return;
}
resolve(result);
});
});
};
fs.mkdirSync(path.join(__dirname, 'public', 'countries'), {
recursive: true,
});
fs.copyFileSync(
path.join(__dirname, 'node_modules', 'chart.js', 'dist', 'Chart.bundle.min.js'),
path.join(publicDir, 'Chart.bundle.js'),
);
fs.copyFileSync(
path.join(__dirname, 'node_modules', 'bootstrap', 'dist', 'css', 'bootstrap.css'),
path.join(publicDir, 'bootstrap.css'),
);
const deathsGlobalCsv = path.join(timeSeriesDir, 'time_series_covid19_deaths_global.csv');
const confirmedGlobalCsv = path.join(timeSeriesDir, 'time_series_covid19_confirmed_global.csv');
const deathsUSCsv = path.join(timeSeriesDir, 'time_series_covid19_deaths_US.csv');
const confirmedUSCsv = path.join(timeSeriesDir, 'time_series_covid19_confirmed_US.csv');
2020-04-28 03:02:31 +00:00
const populationStatesCsv = path.join(dataDir, 'SCPRC-EST2019-18+POP-RES.csv');
const populationCountriesCsv = path.join(dataDir, 'population-world-wikipedia.tsv');
2020-04-27 03:04:16 +00:00
2020-04-29 00:38:11 +00:00
const lastGlobalDeathsUpdate = execSync(`git -C "${covidGitDir}" log -n 1 --pretty=format:'%ci' "${deathsGlobalCsv}"`, {
encoding: 'utf8',
});
const lastUSDeathsUpdate = execSync(`git -C "${covidGitDir}" log -n 1 --pretty=format:'%ci' "${deathsUSCsv}"`, {
encoding: 'utf8',
});
const lastUpdate = new Date(lastGlobalDeathsUpdate > lastUSDeathsUpdate ?
lastGlobalDeathsUpdate :
lastUSDeathsUpdate
);
2020-04-27 03:04:16 +00:00
const zeroPad = value => value < 10 ? `0${value}` : value.toString();
const toSafeName = x => x.replace(/[^A-Za-z]/g, '-').toLowerCase();
const processGlobalDeaths = async () => {
const globalStart = Date.now();
let start = Date.now();
const timeSeriesGlobalRaw = fs.readFileSync(deathsGlobalCsv, {encoding: 'utf8'});
console.log(`read global deaths CSV in ${Date.now() - start}ms`);
start = Date.now();
const timeSeriesUSRaw = fs.readFileSync(deathsUSCsv, { encoding: 'utf8' });
console.log(`read US deaths CSV in ${Date.now() - start}ms`);
2020-04-27 03:04:16 +00:00
2020-04-28 03:02:31 +00:00
start = Date.now();
const populationUSRaw = fs.readFileSync(populationStatesCsv, {encoding: 'utf8'});
console.log(`read US states population CSV in ${Date.now() - start}ms`);
start = Date.now();
const populationCountriesRaw = fs.readFileSync(populationCountriesCsv, {encoding: 'utf8'});
console.log(`read countries population CSV in ${Date.now() - start}ms`);
2020-04-27 03:04:16 +00:00
start = Date.now();
let tsGlobalRecords = parseCsv(timeSeriesGlobalRaw, {
cast: true,
columns: true,
});
console.log(`parsed global deaths CSV in ${Date.now() - start}ms`);
start = Date.now();
let tsUSRecords = parseCsv(timeSeriesUSRaw, {
cast: true,
columns: true,
});
console.log(`parsed US deaths CSV in ${Date.now() - start}ms`);
2020-04-28 03:02:31 +00:00
start = Date.now();
let populationUSStateRecords = parseCsv(populationUSRaw, {
cast: true,
columns: true,
});
console.log(`parsed US states population CSV in ${Date.now() - start}ms`);
start = Date.now();
let populationCountriesRecords = parseCsv(populationCountriesRaw, {
cast: true,
columns: true,
delimiter: '\t',
ltrim: true,
rtrim: true,
});
console.log(`parsed countries population CSV in ${Date.now() - start}ms`);
const statePopulationMap = populationUSStateRecords
.sort((a, b) => a.NAME.localeCompare(b.NAME))
.reduce((map, item) => {
map[item.NAME] = Number(item.POPESTIMATE2019);
return map;
}, {});
const countryNameMap = {
'Cape Verde': 'Cabo Verde',
'DR Congo': 'Congo (Brazzaville)',
'Congo': 'Congo (Kinshasa)',
'Ivory Coast': 'Cote d\'Ivoire',
'Czech Republic': 'Czechia',
'Vatican City': 'Holy See',
'South Korea': 'Korea, South',
'Taiwan': 'Taiwan*',
'East Timor': 'Timor-Leste',
'Palestine': 'West Bank and Gaza',
'Myanmar': 'Burma',
'São Tomé and Príncipe': 'Sao Tome and Principe',
};
const countryPopulationMap = populationCountriesRecords
.sort((a, b) => a.Country.localeCompare(b.Country))
.reduce((map, item) => {
map[item.Country] = Number(item.Pop_2019.toString().replace(/,/g, ''));
const otherName = countryNameMap[item.Country];
if (otherName) {
map[otherName] = map[item.Country];
}
return map;
}, {});
countryPopulationMap['Diamond Princess'] = statePopulationMap['Diamond Princess'] = 3711;
countryPopulationMap['MS Zaandam'] = 1829;
countryPopulationMap['Kosovo'] = 1810463;
statePopulationMap['Grand Princess'] = 3533;
2020-04-27 03:04:16 +00:00
// tsGlobalRecords = tsGlobalRecords.filter((record) => {
2020-04-28 03:02:31 +00:00
// return record['Country/Region'] === 'US';
2020-04-27 03:04:16 +00:00
// });
2020-04-28 03:02:31 +00:00
const getGrowthRate = (record) => {
const ts = record.timeSeriesDaily;
const len = ts.length;
if (len < 2) {
return 0;
}
const latest = len - 1;
const earliest = Math.max(len - 14, 0);
const pow = 1/(latest - earliest + 1);
const hi = ts[latest].value;
const lo = Math.max(ts[earliest].value, 0.5);
if (hi === 0 && lo < 1) {
return 0;
}
return Math.pow((hi / lo), pow) - 1;
};
// state/county data is separated for the US and doesn't need to be rolled up
tsUSRecords.forEach((usRecord) => {
const newRecord = {
...usRecord,
needsRollup: false,
Long: usRecord.Long_,
'Province/State': usRecord.Province_State,
'Country/Region': usRecord.Country_Region,
};
delete newRecord.UID;
delete newRecord.iso2;
delete newRecord.iso3;
delete newRecord.code3;
delete newRecord.FIPS;
delete newRecord.Combined_Key;
delete newRecord.Long_;
delete newRecord.Province_State;
delete newRecord.Country_Region;
tsGlobalRecords.push(newRecord);
});
2020-04-27 03:04:16 +00:00
start = Date.now();
tsGlobalRecords.forEach((record) => {
record.timeSeriesDaily = [];
record.timeSeriesMonthly = [];
const dateColumns = Object.keys(record).filter(x => /^\d+\/\d+\/\d+$/.test(x))
.map(key => {
return {
key,
date: new Date(key),
};
})
.sort((a, b) => {
if (a.date.getTime() === b.date.getTime()) {
return 0;
}
return a.date.getTime() < b.date.getTime() ? -1 : 1;
});
const toSortableDate = date => [
date.getUTCFullYear(),
zeroPad(date.getUTCMonth() + 1),
zeroPad(date.getUTCDate()),
].join('-');
dateColumns.forEach((obj) => {
const value = Number(record[obj.key]) || 0;
const date = obj.date;
delete record[obj.key];
const sortableKey = toSortableDate(date);
const lastItem = record.timeSeriesDaily[record.timeSeriesDaily.length - 1];
record.timeSeriesDaily.push({
key: sortableKey,
value,
delta: lastItem ? value - lastItem.value : 0,
});
});
const monthlyTotals = [];
const monthlyMaxes = {};
record.timeSeriesDaily.forEach((item) => {
const key = item.key.replace(/-\d+$/, '');
if (!(key in monthlyMaxes)) {
monthlyMaxes[key] = {
date: item.key,
value: item.value,
};
} else if (item.key > monthlyMaxes[key].date) {
monthlyMaxes[key] = {
date: item.key,
value: item.value,
}
}
});
Object.keys(monthlyMaxes).forEach((key) => {
monthlyTotals.push({
key,
value: monthlyMaxes[key].value,
});
});
monthlyTotals.sort((a, b) => a.key.localeCompare(b.key));
monthlyTotals.forEach((item, i) => {
const prev = monthlyTotals[i - 1];
item.delta = prev ? item.value - prev.value : 0;
});
record.total = record.timeSeriesDaily.length ?
record.timeSeriesDaily[record.timeSeriesDaily.length - 1].value :
0;
record.timeSeriesMonthly = monthlyTotals;
record.state = record['Province/State'];
record.country = record['Country/Region'];
record.lat = record.Lat;
record.long = record.Long;
record.county = record.Admin2 || '';
record.population = record.Population || null;
if (record.country === 'US') {
record.country = 'United States';
}
2020-04-27 03:04:16 +00:00
record.countrySafeName = toSafeName(record.country);
record.stateSafeName = toSafeName(record.state);
record.countySafeName = toSafeName(record.county);
2020-04-27 03:04:16 +00:00
delete record['Province/State'];
delete record['Country/Region'];
delete record.Lat;
delete record.Long;
delete record.Admin2;
delete record.Population;
2020-04-28 03:02:31 +00:00
if (!record.population && !record.state && !record.county) {
const mappedPop = countryPopulationMap[record.country];
if (!mappedPop) {
console.log(require('util').inspect(countryPopulationMap, false, null, true));
throw new Error('found no population for ' + record.country);
}
record.population = mappedPop;
} else if (!record.population && record.state && !record.county && record.country === 'United States') {
// US territories and cruise ships
const mappedPop = statePopulationMap[record.state];
if (!mappedPop) {
console.log(require('util').inspect(statePopulationMap, false, null, true));
throw new Error('found no population for ' + record.state);
}
// console.log(`setting population to ${mappedPop} for ${record.state}`);
record.population = mappedPop;
}
record.deathsPerMillion = !!record.population ? record.total / record.population * 1000000 : 0;
record.deathGrowthRate = getGrowthRate(record);
2020-04-27 03:04:16 +00:00
});
tsGlobalRecords.sort((a, b) => {
if (a.country === b.country) {
return a.state.localeCompare(b.state);
}
return a.country.localeCompare(b.country);
});
const perCountryTotals = {};
const perStateTotals = {};
2020-04-27 03:04:16 +00:00
tsGlobalRecords.forEach((record) => {
perCountryTotals[record.country] = perCountryTotals[record.country] || {
total: 0,
2020-04-28 03:02:31 +00:00
population: 0,
2020-04-27 03:04:16 +00:00
timeSeriesDaily: {},
timeSeriesMonthly: {},
states: [],
safeName: record.countrySafeName,
};
const item = perCountryTotals[record.country];
2020-04-28 03:02:31 +00:00
if (!record.state && !record.county) {
// country population
item.population = record.population;
}
2020-04-27 03:04:16 +00:00
2020-04-29 00:58:53 +00:00
if (record.county) {
record.name = record.county;
record.safeName = toSafeName(record.county);
}
// roll up up state/county data
if (record.state && record.county) {
const stateItem = perStateTotals[record.state] = perStateTotals[record.state] || {
name: record.state,
safeName: record.stateSafeName,
country: record.country,
countrySafeName: record.countrySafeName,
total: 0,
2020-04-28 03:02:31 +00:00
population: 0,
deathsPerMillion: 0,
timeSeriesDaily: {},
timeSeriesMonthly: {},
counties: [],
};
2020-04-28 03:02:31 +00:00
if (!stateItem.population && record.country === 'United States') {
const population = statePopulationMap[record.state];
if (!population) {
console.log(require('util').inspect(statePopulationMap, false, null, true));
throw new Error('no population for ' + record.state);
}
stateItem.population = population;
}
stateItem.total += record.total;
record.timeSeriesDaily.forEach((ts) => {
stateItem.timeSeriesDaily[ts.key] = stateItem.timeSeriesDaily[ts.key] || {
value: 0,
delta: 0,
};
stateItem.timeSeriesDaily[ts.key].value += ts.value;
stateItem.timeSeriesDaily[ts.key].delta += ts.delta;
});
record.timeSeriesMonthly.forEach((ts) => {
stateItem.timeSeriesMonthly[ts.key] = stateItem.timeSeriesMonthly[ts.key] || {
value: 0,
delta: 0,
};
stateItem.timeSeriesMonthly[ts.key].value += ts.value;
stateItem.timeSeriesMonthly[ts.key].delta += ts.delta;
});
stateItem.counties.push(record);
} else {
item.states.push(record);
}
if (record.needsRollup === false) {
return;
}
item.total += record.total;
2020-04-27 03:04:16 +00:00
record.timeSeriesDaily.forEach((ts) => {
item.timeSeriesDaily[ts.key] = item.timeSeriesDaily[ts.key] || {
value: 0,
delta: 0,
};
item.timeSeriesDaily[ts.key].value += ts.value;
item.timeSeriesDaily[ts.key].delta += ts.delta;
});
record.timeSeriesMonthly.forEach((ts) => {
item.timeSeriesMonthly[ts.key] = item.timeSeriesMonthly[ts.key] || {
value: 0,
delta: 0,
};
item.timeSeriesMonthly[ts.key].value += ts.value;
item.timeSeriesMonthly[ts.key].delta += ts.delta;
});
});
Object.keys(perStateTotals).forEach((stateName) => {
const item = perStateTotals[stateName];
const stateItem = {
name: stateName,
safeName: item.safeName,
country: item.country,
countrySafeName: item.countrySafeName,
total: item.total,
counties: item.counties,
2020-04-28 03:02:31 +00:00
population: item.population,
deathsPerMillion: item.population > 0 ? item.total / item.population * 1000000 : 0,
timeSeriesDaily: Object.keys(item.timeSeriesDaily).sort().map((date) => {
return {
key: date,
value: item.timeSeriesDaily[date].value,
delta: item.timeSeriesDaily[date].delta,
};
}),
timeSeriesMonthly: Object.keys(item.timeSeriesMonthly).sort().map((date) => {
return {
key: date,
value: item.timeSeriesMonthly[date].value,
delta: item.timeSeriesMonthly[date].delta,
};
}),
};
2020-04-28 03:02:31 +00:00
stateItem.deathGrowthRate = getGrowthRate(stateItem);
// insert into states array for the country
perCountryTotals[item.country].states.push(stateItem);
});
2020-04-27 03:04:16 +00:00
const countryArr = Object.keys(perCountryTotals).map((countryName) => {
const item = perCountryTotals[countryName];
2020-04-28 03:02:31 +00:00
if (!item.population) {
// some countries don't have roll up (e.g. Canada, China, Australia)
// and only contain province data instead of data for the whole country.
// population doesn't get set in those cases.
item.population = countryPopulationMap[countryName];
}
const countryItem = {
2020-04-27 03:04:16 +00:00
name: countryName,
safeName: item.safeName,
total: item.total,
states: item.states,
2020-04-28 03:02:31 +00:00
population: item.population,
deathsPerMillion: item.population > 0 ? item.total / item.population * 1000000 : 0,
2020-04-27 03:04:16 +00:00
timeSeriesDaily: Object.keys(item.timeSeriesDaily).sort().map((date) => {
return {
key: date,
value: item.timeSeriesDaily[date].value,
delta: item.timeSeriesDaily[date].delta,
};
}),
timeSeriesMonthly: Object.keys(item.timeSeriesMonthly).sort().map((date) => {
return {
key: date,
value: item.timeSeriesMonthly[date].value,
delta: item.timeSeriesMonthly[date].delta,
};
}),
};
2020-04-28 03:02:31 +00:00
countryItem.deathGrowthRate = getGrowthRate(countryItem);
return countryItem;
2020-04-27 03:04:16 +00:00
});
2020-04-27 23:10:38 +00:00
const worldData = {
name: 'The World',
safeName: 'the-world',
total: 0,
countries: countryArr,
timeSeriesDaily: {},
timeSeriesMonthly: {},
};
countryArr.forEach((countryData) => {
worldData.total += countryData.total;
countryData.timeSeriesDaily.forEach((ts) => {
worldData.timeSeriesDaily[ts.key] = worldData.timeSeriesDaily[ts.key] || {
value: 0,
delta: 0,
};
worldData.timeSeriesDaily[ts.key].value += ts.value;
worldData.timeSeriesDaily[ts.key].delta += ts.delta;
});
countryData.timeSeriesMonthly.forEach((ts) => {
worldData.timeSeriesMonthly[ts.key] = worldData.timeSeriesMonthly[ts.key] || {
value: 0,
delta: 0,
};
worldData.timeSeriesMonthly[ts.key].value += ts.value;
worldData.timeSeriesMonthly[ts.key].delta += ts.delta;
});
});
worldData.timeSeriesDaily = Object.keys(worldData.timeSeriesDaily).sort().map((date) => {
return {
key: date,
value: worldData.timeSeriesDaily[date].value,
delta: worldData.timeSeriesDaily[date].delta,
};
});
worldData.timeSeriesMonthly = Object.keys(worldData.timeSeriesMonthly).sort().map((date) => {
return {
key: date,
value: worldData.timeSeriesMonthly[date].value,
delta: worldData.timeSeriesMonthly[date].delta,
};
});
2020-04-28 03:02:31 +00:00
worldData.deathGrowthRate = getGrowthRate(worldData);
2020-04-29 05:22:17 +00:00
worldData.population = 7781841000;
worldData.deathsPerMillion = worldData.total / worldData.population * 1000000;
2020-04-28 03:02:31 +00:00
2020-04-27 03:04:16 +00:00
console.log(`transformed data in ${Date.now() - start}ms`);
start = Date.now();
2020-04-27 23:10:38 +00:00
const worldTmpl = path.join(templatesDir, 'world.pug');
const worldHtml = pug.renderFile(worldTmpl, {
data: worldData,
2020-04-29 00:38:11 +00:00
$title: 'The World',
lastUpdate,
2020-04-27 03:04:16 +00:00
});
const targetFile = path.join(publicDir, 'index.html');
2020-04-27 23:10:38 +00:00
fs.writeFileSync(targetFile, worldHtml);
2020-04-27 03:04:16 +00:00
console.log(`wrote to ${targetFile} in ${Date.now() - start}ms`);
// fs.writeFileSync(path.join(publicDir, 'countries.json'), JSON.stringify(countryArr, null, ' '));
const singleCountryTmpl = path.join(templatesDir, 'country.pug');
const singleStateTmpl = path.join(templatesDir, 'state.pug');
2020-04-29 00:38:11 +00:00
const countryFn = pug.compileFile(singleCountryTmpl);
const stateFn = pug.compileFile(singleStateTmpl);
2020-04-27 03:04:16 +00:00
await Promise.all(countryArr.map(async (countryData) => {
const start = Date.now();
const targetFile = path.join(publicDir, 'countries', countryData.safeName + '.html');
2020-04-29 00:38:11 +00:00
const countryHtml = countryFn({
2020-04-27 03:04:16 +00:00
data: countryData,
$title: countryData.name,
2020-04-29 00:38:11 +00:00
lastUpdate,
2020-04-27 03:04:16 +00:00
});
console.log(`writing to ${targetFile}`);
await promiseMe(callback => fs.writeFile(targetFile, countryHtml, callback));
console.log(`wrote to ${targetFile} in ${Date.now() - start}ms`);
if (countryData.states.length) {
2020-04-29 00:38:11 +00:00
await Promise.all(countryData.states.map(async (stateData) => {
if (!stateData.name || !stateData.counties || !stateData.counties.length) {
2020-04-29 00:38:11 +00:00
return;
}
const start = Date.now();
const targetFile = path.join(publicDir, 'countries', countryData.safeName + '-' + stateData.safeName + '.html');
2020-04-29 00:38:11 +00:00
const stateHtml = stateFn({
data: stateData,
$title: stateData.name + ' - ' + countryData.name,
2020-04-29 00:38:11 +00:00
lastUpdate,
});
await promiseMe(callback => fs.writeFile(targetFile, stateHtml, callback));
console.log(`wrote to ${targetFile} in ${Date.now() - start}ms`);
2020-04-29 00:38:11 +00:00
}));
}
2020-04-27 03:04:16 +00:00
}));
console.log(`finished in ${((Date.now() - globalStart) / 1000).toFixed(2)}s`);
};
processGlobalDeaths()
.then(() => {
console.log('all done');
})
.catch((err) => {
console.log(err);
process.exit(1);
});