Skip to content

Commit

Permalink
added preview mode 0.4.0
Browse files Browse the repository at this point in the history
  • Loading branch information
glynnbird committed Jul 1, 2016
1 parent b5b67ef commit d53bc0a
Show file tree
Hide file tree
Showing 10 changed files with 186 additions and 37 deletions.
25 changes: 22 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,22 @@ Written 500 ( 2000 )

The configuration, whether default or overriden from environment variables is show, followed by a line of output for each block of 500 documents written, plus a cumulative total.

## Preview mode

If you want to see a preview of the JSON that would be created from your csv/tsv files then add `--preview true` to your command-line:

```
> cat text.txt | couchimport --preview true
Detected a TAB column delimiter
{ product_id: '1',
brand: 'Gibson',
type: 'Electric',
range: 'ES 330',
sold: 'FALSE' }
```

As well as showing a JSON preview, preview mode also attempts to detect the column delimiter character for you.

## Importing large JSON documents

If your source document is a GeoJSON text file, `couchimport` can be used. Let's say your JSON looks like this:
Expand All @@ -138,6 +154,7 @@ and we need to import each feature object into CouchDB as separate documents, th
* COUCH_BUFFER_SIZE - the number of records written to CouchDB per bulk write (defaults to 500, not required)
* COUCH_FILETYPE - the type of file being imported, either "text" or "json" (defaults to "text", not required)
* COUCH_JSON_PATH - the path into the incoming JSON document (only required for COUCH_FILETYPE=json imports)
* COUCH_PREVIEW - run in preview mode


## Command-line parameters
Expand All @@ -152,6 +169,7 @@ You can now optionally override the environment variables by passing in command-
* --buffer - the number of records written to CouchDB per bulk write (defaults to 500, not required)
* --type - the type of file being imported, either "text" or "json" (defaults to "text", not required)
* --jsonpath - the path into the incoming JSON document (only required for type=json imports)
* --preview - if 'true', runs in preview mode

e.g.

Expand Down Expand Up @@ -193,6 +211,7 @@ N.B.
* COUCH_DELIMETER or --delimiter can be used to provide a custom column delimiter
* if your document values contain carriage returns or the column delimiter, then this may not be the tool for you


## Using programmatically

In your project, add `couchimport` into the dependencies of your package.json or run `npm install couchimport`. In your code, require
Expand Down Expand Up @@ -248,16 +267,16 @@ To export data to a named file:
To preview a file:

```
couchimport.previewCSVFile('./hp.csv', opts, function(err, data) {
console.log("done", err, data);
couchimport.previewCSVFile('./hp.csv', opts, function(err, data, delimiter) {
console.log("done", err, data, delimiter);
});
```

To preview a CSV/TSV on a URL:

```
couchimport.previewURL('https://myhosting.com/hp.csv', opts, function(err, data) {
console.log("done", err, data);
console.log("done", err, data, delimiter);
});
```

Expand Down
9 changes: 8 additions & 1 deletion app.js
Original file line number Diff line number Diff line change
Expand Up @@ -172,12 +172,19 @@ var previewCSVFile = preview.file;
// callback - called when complete
var previewURL = preview.url;


// load the first 10k of a URL and parse the first 3 lines
// URL - name of the file to load
// opts - an options object, or null for defaults
// callback - called when complete
var previewStream = preview.stream;

module.exports = {
importStream: importStream,
importFile: importFile,
exportStream: exportStream,
exportFile: exportFile,
previewCSVFile: previewCSVFile,
previewURL: previewURL
previewURL: previewURL,
previewStream: previewStream
}
33 changes: 23 additions & 10 deletions bin/couchimport.bin.js
Original file line number Diff line number Diff line change
Expand Up @@ -3,14 +3,27 @@ process.env.DEBUG=(process.env.DEBUG)?process.env.DEBUG+",couchimport":"couchimp
var debug = require('debug')('couchimport'),
couchimport = require('../app.js'),
config = require('../includes/config.js');

// import data from a stdin
couchimport.importStream(process.stdin, config, function(err,data) {
debug("Import complete");
}).on("written", function(data) {
debug("Written ok:" + data.documents + " - failed: " + data.failed + " - (" + data.total + ")");
}).on("writeerror", function(err) {
debug("ERROR", err);
});


if(config.COUCH_PREVIEW) {
couchimport.previewStream(process.stdin, config, function(err, data, delimiter) {
switch(delimiter) {
case ',': console.log("Detected a COMMA column delimiter"); break;
case '\t': console.log("Detected a TAB column delimiter"); break;
default: console.log("Detected an unknown column delimiter"); break;
}
if (data && data.length > 0) {
console.log(data[0]);
}
});
} else {
// import data from a stdin
couchimport.importStream(process.stdin, config, function(err,data) {
debug("Import complete");
}).on("written", function(data) {
debug("Written ok:" + data.documents + " - failed: " + data.failed + " - (" + data.total + ")");
}).on("writeerror", function(err) {
debug("ERROR", err);
});
}


8 changes: 8 additions & 0 deletions includes/config.js
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,11 @@ if( typeof process.env.COUCH_PARALLELISM != "undefined") {
theconfig.COUCH_PARALLELISM = parseInt(process.env.COUCH_PARALLELISM);
}

// if this is preview mode
if( typeof process.env.COUCH_PREVIEW != "undefined") {
theconfig.COUCH_PREVIEW = true;
}

// override with command-line parameters
if(argv.url) {
theconfig.COUCH_URL = argv.url;
Expand Down Expand Up @@ -80,6 +85,9 @@ if(argv.jsonpath) {
if(argv.parallelism) {
theconfig.COUCH_PARALLELISM = parseInt(argv.parallelism);
}
if(argv.preview) {
theconfig.COUCH_PREVIEW = true;
}


debug("******************");
Expand Down
3 changes: 2 additions & 1 deletion includes/defaults.js
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,8 @@ var defaults = {
COUCH_JSON_PATH: null,
COUCH_TRANSFORM: null,
COUCH_META: null,
COUCH_PARALLELISM: 1
COUCH_PARALLELISM: 1,
COUCH_PREVIEW: false
};

var get = function() {
Expand Down
62 changes: 45 additions & 17 deletions includes/preview.js
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,35 @@ var parse = require('csv-parse');
var fs = require('fs');
var defaults = require('./defaults.js');

var analyseString = function(str, callback) {
var lines = str.split("\n");
str = lines.splice(0,4).join("\n") + "\n"
var csv = parse(str, {delimiter: ',', columns: true, skip_empty_lines: true, relax: true}, function(err1, csvdata) {
var tsv = parse(str, {delimiter: '\t', columns: true, skip_empty_lines: true, relax: true}, function(err2, tsvdata) {

var delimiter = '?'; // unknown

// look at CSV version
if (!err1) {
if (csvdata && csvdata.length>0 && Object.keys(csvdata[0]).length >1) {
delimiter = ',';
return callback(null, csvdata, delimiter);
}
}

// look at TSV version
if (!err2) {
if (tsvdata && tsvdata.length>0 && Object.keys(tsvdata[0]).length >1) {
delimiter = '\t';
return callback(null, tsvdata, delimiter);
}
}
// not sure what type of data it is
return callback(null, '', delimiter)
})
});
}


// preview a URL
var first10kURL = function(u, callback) {
Expand Down Expand Up @@ -52,33 +81,32 @@ var url = function(u, opts, callback) {
if (err) {
return callback(err, null);
}
var lines = data.split("\n");
str = lines.splice(0,4).join("\n") + "\n";
parse(str, {delimiter: opts.COUCH_DELIMITER, columns: true, skip_empty_lines: true, relax: true}, callback)
analyseString(data, callback);
})
};

var file = function(filename, opts, callback) {

// merge default options
opts = defaults.merge(opts);

fs.open(filename, 'r', function(status, fd) {
if (status) {
return callback(status.message, null);
var rs = fs.createReadStream(filename, { encoding: 'utf8'});
stream(rs, opts, callback);
};

var stream = function(rs, opts, callback) {
var str = '';
rs.on('readable', function() {
str = rs.read(10000).toString('utf8');
rs.destroy(rs);
analyseString(str, callback);
}).on('error', function(e) {
if (!str) {
callback(e, null, '?');
}
var buffer = new Buffer(10000);
fs.read(fd, buffer, 0, 10000, 0, function(err, num) {
var str = buffer.toString('utf-8', 0, num);
var lines = str.split("\n");
str = lines.splice(0,4).join("\n") + "\n";
fs.close(fd);
parse(str, {delimiter: opts.COUCH_DELIMITER, columns: true, skip_empty_lines: true, relax: true}, callback)
});
});
};

module.exports = {
file: file,
url: url
url: url,
stream: stream
}
10 changes: 5 additions & 5 deletions package.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"name": "couchimport",
"version": "0.3.4",
"version": "0.4.0",
"description": "CouchImport - command-line helper to bulk import/export data from CSV/TSV",
"repository": "https://github.com/glynnbird/couchimport.git",
"keywords": [
Expand All @@ -11,19 +11,19 @@
"couchimport",
"couchexport"
],
"author": "Glynn Bird",
"author": "Glynn Bird <[email protected]>",
"license": "Apache-2.0",
"dependencies": {
"JSONStream": "1.1.1",
"async": "1.5.2",
"JSONStream": "1.1.3",
"async": "2.0.0-rc.6",
"cloudant": "1.4.2",
"csv-parse": "1.1.1",
"debug": "2.2.0",
"minimist": "1.2.0"
},
"devDependencies": {
"mocha": "2.5.3",
"should": "9.0.0",
"should": "9.0.2",
"nock": "^8.0.0"
},
"main": "./app.js",
Expand Down
Loading

0 comments on commit d53bc0a

Please sign in to comment.