-
Notifications
You must be signed in to change notification settings - Fork 2
/
populate-database.js
169 lines (141 loc) · 5.57 KB
/
populate-database.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
const fs = require("fs");
const Papa = require("papaparse");
const MongoClient = require("mongodb").MongoClient;
function removeHiddenFiles(filelist) {
return filelist.filter(f => !f.startsWith("."));
}
function readAllCSV(datadir) {
let files = fs.readdirSync(datadir);
files = removeHiddenFiles(files);
let csvfiles = files.filter(f => f.endsWith("csv"));
let keys = csvfiles.map(f => f.split("_")[1].split(".")[0]);
let data = {};
for (let i = 0; i < keys.length; i++) {
// trim required because files have trailing whitespace
let content = fs
.readFileSync(`${datadir}/${csvfiles[i]}`)
.toString()
.trim();
data[keys[i]] = Papa.parse(content, { header: true }).data;
}
return data;
}
function readAllGeoJson(datadir) {
let files = fs.readdirSync(datadir);
files = removeHiddenFiles(files);
let geojsonfiles = files.filter(f => f.endsWith("json"));
let keys = geojsonfiles.map(f => f.split("_")[1].split(".")[0]);
let data = {};
for (let i = 0; i < keys.length; i++) {
data[keys[i]] = require(`${datadir}/${geojsonfiles[i]}`);
}
return data;
}
function calculateCentroid(coordinates) {
coordinates = coordinates[0][0][0]=== undefined? coordinates: coordinates[0]
let centroid_x = 0.0;
let centroid_y = 0.0;
for (let i of coordinates){
centroid_x += i[0];
centroid_y += i[1];
}
centroid = [centroid_x/coordinates.length, centroid_y/coordinates.length];
return {"type": "Point", "coordinates":centroid};
}
function combineCSVandGeoJson(allCSV, allGeoJson) {
let combinedData = {};
for (let key of Object.keys(allCSV)) {
// each key goes to each collection in Mongo
// entries are individual records in a collection
let entries = [];
for (let i = 0; i < allCSV[key].length; i++) {
entries.push({
location: allGeoJson[key].geometries[i],
centroid: calculateCentroid(allGeoJson[key].geometries[i].coordinates),
metadata: allCSV[key][i]
});
}
combinedData[key] = entries;
}
return combinedData;
}
async function insertCombinedDataToMongo(combinedData, dbUrl, dbName) {
try {
var conn = await MongoClient.connect(dbUrl);
var db = conn.db(dbName);
} catch (e) {
console.log("error connecting", e);
return;
}
for (let coll of Object.keys(combinedData)) {
// console.log('Inserting', combinedData[coll].length, 'items to', coll)
try {
process.stdout.write(`Dropping collection ${coll} ... `);
await db.dropCollection(coll);
process.stdout.write(" DONE \n");
} catch (e) {}
process.stdout.write(
`Inserting ${combinedData[coll].length} items to ${coll} ... `
);
await db.collection(coll).insertMany(combinedData[coll]);
process.stdout.write(" DONE \n");
// console.log('DONE')
}
await conn.close();
console.log("All entries made successfully");
}
async function markRandomSchoolsAndHospitals(lulcData, dbUrl, dbName) {
let villages = lulcData.filter(x => (x['metadata']['lc_code'] == 'BURV') || (x['metadata']['lc_code'] == 'BURH'));
let nSchools = (Math.abs(0.5 - Math.random()) * 10) % villages.length + 1;
let nHospitals = (Math.abs(0.5 - Math.random()) * 10) % villages.length + 1;
villages.sort(function() { return Math.random() }); // shuffle
let schoolsHere = villages.slice(0, nSchools);
for (let each of schoolsHere) {
each.location = each.centroid;
}
villages.sort(function() { return Math.random() }); // shuffle
let hospitalsHere = villages.slice(0, nHospitals);
for (let each of hospitalsHere) {
each.location = each.centroid;
}
try {
var conn = await MongoClient.connect(dbUrl);
var db = conn.db(dbName);
} catch (e) {
console.log("error connecting", e);
return;
}
try {
await db.dropCollection('School');
await db.dropCollection('Hospital');
} catch (e) {}
process.stdout.write(`Making schools at ${schoolsHere.length} out of ${villages.length} settlements ... `);
await db.collection('School').insertMany(schoolsHere);
db.collection('School').createIndex({location:"2dsphere"});
process.stdout.write('DONE \n')
process.stdout.write(`Making hospitals at ${hospitalsHere.length} out of ${villages.length} settlements`);
await db.collection('Hospital').insertMany(hospitalsHere);
db.collection('Hospital').createIndex({location:"2dsphere"});
process.stdout.write('DONE \n')
conn.close();
}
module.exports = {
readAllCSV,
readAllGeoJson,
combineCSVandGeoJson,
insertCombinedDataToMongo
};
if (require.main === module) {
// console.log(readAllCSV('./dataset/Kurudu-csv'))
// console.log(readAllGeoJson('./dataset/Kurudu-geojson'))
let allCSV = readAllCSV("../dataset/Kurudu-csv");
let allGeoJson = readAllGeoJson("../dataset/Kurudu-geojson");
let combinedData = combineCSVandGeoJson(allCSV, allGeoJson);
// console.log(combinedData);
console.log(combinedData["LULC"][0]);
console.log("hello world ")
let dbUrl = 'mongodb://localhost:27017/';
let dbName = 'sih';
markRandomSchoolsAndHospitals(combinedData['LULC'], dbUrl, dbName);
insertCombinedDataToMongo(combinedData, dbUrl, dbName);
}