Skip to content

Commit

Permalink
fixes #60, removing unnecessary call to preprocess content
Browse files Browse the repository at this point in the history
  • Loading branch information
dbashford committed Nov 23, 2015
1 parent 936b890 commit c28ec1f
Show file tree
Hide file tree
Showing 7 changed files with 4 additions and 31 deletions.
2 changes: 1 addition & 1 deletion lib/extractors/docx.js
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ var _calculateExtractedText = function( inText ) {
text += localText + "\n";
});

return util.replaceTextChars( text );
return text;
};

var extractText = function( filePath, options, cb ) {
Expand Down
4 changes: 1 addition & 3 deletions lib/extractors/odt.js
Original file line number Diff line number Diff line change
Expand Up @@ -41,10 +41,8 @@ var extractText = function( filePath, options, cb ) {
for (var i = 0; i < nodes.length; i++) {
nodeTexts.push($(nodes[i]).text());
}
var contentWithBreaks = nodeTexts.join("\n");
contentWithBreaks = util.replaceTextChars(contentWithBreaks);

cb( null, contentWithBreaks );
cb( null, nodeTexts.join("\n") );
});
}
});
Expand Down
14 changes: 0 additions & 14 deletions lib/util.js
Original file line number Diff line number Diff line change
@@ -1,11 +1,5 @@
var exec = require('child_process').exec;

var SINGLE_QUOTES = /[\u2018|\u2019]/g
, DOUBLE_QUOTES = /[\u201C|\u201D]/g
, MULTI_SPACES = /[^\S\r\n]{2,}/g
, NON_ASCII_CHARS = /[^\x00-\x7F\x80-\xFF]/g
;

var yauzlError = function( err, cb ) {
var msg = err.message;
if ( msg === "end of central directory record signature not found" ) {
Expand Down Expand Up @@ -37,13 +31,6 @@ var unzipCheck = function(type, cb) {
);
};

var replaceTextChars = function(text) {
return text.trim()
.replace( SINGLE_QUOTES, "'" )
.replace( DOUBLE_QUOTES, '"' )
.replace( MULTI_SPACES, ' ' );
};

var getTextFromZipFile = function( zipfile, entry, cb ) {
zipfile.openReadStream( entry, function( err, readStream ) {
if ( err ) {
Expand Down Expand Up @@ -74,7 +61,6 @@ var getTextFromZipFile = function( zipfile, entry, cb ) {
module.exports = {
createExecOptions: createExecOptions,
unzipCheck: unzipCheck,
replaceTextChars: replaceTextChars,
getTextFromZipFile: getTextFromZipFile,
yauzlError: yauzlError
};
1 change: 0 additions & 1 deletion test/cli_test.js
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@ describe("cli", function(){
it("will extract text", function(done) {
exec( cliPath + " " + testFilePath,
function( error, stdout, stderr ) {
console.log(stdout)
expect(stdout).to.eql(".foo {color:red}\n");
done();
}
Expand Down
2 changes: 1 addition & 1 deletion test/extract_test.js
Original file line number Diff line number Diff line change
Expand Up @@ -269,7 +269,7 @@ describe('textract', function() {
fromFileWithPath(filePath, function( error, text ) {
expect(error).to.be.null;
expect(text).to.be.a('string');
expect(text).to.eql( "this is a test document that won't be extracted properly." );
expect(text).to.eql( "this is a test document that won't be extracted properly. " );
done();
});
});
Expand Down
1 change: 1 addition & 0 deletions test/url_test.js
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
var path = require("path");

describe("fromUrl tests", function() {
this.timeout(3000);

var test = function(ext, name, _text) {
it('will ' + ext + ' files', function(done) {
Expand Down
11 changes: 0 additions & 11 deletions test/util_test.js

This file was deleted.

0 comments on commit c28ec1f

Please sign in to comment.