Skip to content

Commit

Permalink
major refactor
Browse files Browse the repository at this point in the history
  • Loading branch information
glynnbird committed Jul 16, 2015
1 parent 470a09d commit e1bd49b
Show file tree
Hide file tree
Showing 15 changed files with 377 additions and 206 deletions.
7 changes: 7 additions & 0 deletions LICENSE.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
The Apache License, version 2.0

licensed under the apache license, version 2.0 (the "license"); you may not use this file except in compliance with the license. you may obtain a copy of the license at

http://www.apache.org/licenses/LICENSE-2.0

unless required by applicable law or agreed to in writing, software distributed under the license is distributed on an "as is" basis, without warranties or conditions of any kind, either express or implied. see the license for the specific language governing permissions and limitations under the license.
54 changes: 53 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,8 @@

## Introduction

When populating CouchDB databases, often the source of the data is initially a CSV or TSV file. CouchImport is designed to assist you with importing flat data into CouchDB efficiently.
When populating CouchDB databases, often the source of the data is initially a CSV or TSV file. CouchImport is designed to assist you with importing flat data into CouchDB efficiently.
It can be used either as command-line utilities `couchimport` and `couchexport` or the underlying functions can be used programatically:

* simply pipe the data file to 'couchimport' on the command line
* handles tab or comma separated data
Expand Down Expand Up @@ -191,3 +192,54 @@ N.B.
* COUCH_DELIMETER or --delimiter can be used to provide a custom column delimiter
* if your document values contain carriage returns or the column delimiter, then this may not be the tool for you

## Using programmatically

In your project, add `couchimport` into the dependencies of your package.json or run `npm install couchimport`. In your code, require
the library with

```
var couchimport = require('couchimport');
```

and your options are set in an object whose keys are the same as the COUCH_* environment variables:

e.g.

```
var opts = { COUCH_DELIMITER: ",", COUCH_URL: "http://localhost:5984", COUCH_DATABASE: "mydb" };
```

To import data from a readable stream (rs):

```
var rs = process.stdin;
couchimport.importStream(rs, opts, function(err,data) {
console.log("done");
});
```

To import data from a named file:

```
couchimport.importFile("input.txt", opts, function(err,data) {
console.log("done",err,data);
});
```

To export data to a writable stream (ws):

```
var ws = process.stdout;
couchimport.exportStream(ws, opts, function(err, data) {
console.log("done",err,data);
});
```


To export data to a named file:

```
couchimport.exportFile("output.txt", opts, function(err, data) {
console.log("done",err,data);
});
```
165 changes: 165 additions & 0 deletions app.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,165 @@
var fs = require('fs'),
async = require('async'),
debugimport = require('debug')('couchimport'),
debugexport = require('debug')('couchexport'),
defaults = require('./includes/defaults.js');


// import a file stream into CouchDB
// rs - readable stream
// opts - an options object, or null for defaults
// callback - called when complete
var importStream = function(rs, opts, callback) {

// sort the paramters
if (typeof callback == "undefined" && typeof opts == "function") {
callback = opts;
opts = null;
}

// merge default options
opts = defaults.merge(opts);

// load dependencies
var writer = require('./includes/writer.js')(opts.COUCH_URL, opts.COUCH_DATABASE, opts.COUCH_BUFFER_SIZE),
transformer = require('./includes/transformer.js')(opts.COUCH_TRANSFORM, opts.COUCH_META);

// if this is a JSON stream
if (opts.COUCH_FILETYPE == "json") {

if (!opts.COUCH_JSON_PATH) {
var msg = "ERROR: you must specify a JSON path using --jsonpath or COUCH_JSON_PATH";
debugimport(msg);
return callback(msg, null);
}
// pipe the file to a streaming JSON parser
var JSONStream = require('JSONStream');
rs.pipe(JSONStream.parse(opts.COUCH_JSON_PATH))
.pipe(transformer) // process each object
.pipe(writer); // write the data

} else {

// load the CSV parser
var parse = require('csv-parse'),
objectifier = parse({delimiter: opts.COUCH_DELIMITER, columns: true, skip_empty_lines: true, relax: true});

// pipe the input to the output, via transformation functions
rs.pipe(objectifier) // turn each line into an object
.pipe(transformer) // process each object
.pipe(writer); // write the data
}

writer.on('finish', function() {
callback(null, null);
});

rs.on('error', function(e) {
callback(e, null);
});

};

// import a named file into CouchDB
// filename - name of the file stream
// opts - an options object, or null for defaults
// callback - called when complete
var importFile = function(filename, opts, callback) {
importStream(fs.createReadStream(filename), opts, callback);
};

// export to a writable stream
// ws - writable stream
// opts - an options object, or null for defaults
// callback - called when complete
var exportStream = function (ws, opts, callback) {
var escape = null;

// sort the paramters
if (typeof callback == "undefined" && typeof opts == "function") {
callback = opts;
opts = null;
}

// merge default options
opts = defaults.merge(opts);

var total = 0,
headings = [],
lastsize = 0,
reader = require('./includes/reader.js')(opts.COUCH_URL, opts.COUCH_DATABASE, opts.COUCH_BUFFER_SIZE);

// export a row as a CSV
var exportAsCSV = function(row) {

// ignore design docs
if (row._id.match(/^_design/)) {
return;
}

// if we are extracting headings
if (headings.length ==0) {
headings = Object.keys(row);
ws.write(headings.join(opts.COUCH_DELIMITER) + "\n");
}

// output columns
var cols = [];
for(var i in headings) {
var v = row[headings[i]];
var t = typeof v;
if (v == null) {
cols.push("null");
} else if (t == "undefined") {
cols.push("");
} else if (t == "string") {
cols.push(v);
} else {
cols.push(v.toString());
}
}
ws.write(cols.join(opts.COUCH_DELIMITER) + "\n");
}

async.doUntil(function(callback){
reader(function(err, data) {
if(err) {
return callback(true);
}
lastsize = data.length;
total += lastsize;
for (var i in data) {
exportAsCSV(data[i]);
}
debugexport("Output", data.length, "[" + total + "]");
callback(null);
});
},
function() {
return (lastsize == 0 || escape);
},
function(err){
debugexport("Output complete");
callback(escape, null);
});

ws.on("error", function(err) {
escape = err;
});

};

// export to a named file
// filename - name of the file stream
// opts - an options object, or null for defaults
// callback - called when complete
var exportFile = function(filename, opts, callback) {
exportStream(fs.createWriteStream(filename), opts, callback);
};

module.exports = {
importStream: importStream,
importFile: importFile,
exportStream: exportStream,
exportFile: exportFile
}
6 changes: 5 additions & 1 deletion bin/couchexport.bin.js
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
#!/usr/bin/env node
require('../couchexport.js');
process.env.DEBUG=(process.env.DEBUG)?process.env.DEBUG+",couchexport":"couchexport"
var couchimport = require('../app.js');
var config = require('../includes/config.js');
couchimport.exportStream(process.stdout, config, function(err,data) {
});

7 changes: 6 additions & 1 deletion bin/couchimport.bin.js
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
#!/usr/bin/env node
require('../couchimport.js');
process.env.DEBUG=(process.env.DEBUG)?process.env.DEBUG+",couchimport":"couchimport"
var couchimport = require('../app.js'),
config = require('../includes/config.js');
couchimport.importStream(process.stdin, config, function(err,data) {
});


60 changes: 0 additions & 60 deletions couchexport.js

This file was deleted.

24 changes: 0 additions & 24 deletions couchimport.js

This file was deleted.

46 changes: 0 additions & 46 deletions includes/cloudant.js

This file was deleted.

Loading

0 comments on commit e1bd49b

Please sign in to comment.