-
Notifications
You must be signed in to change notification settings - Fork 38
/
Copy pathexcel-as-json.coffee
221 lines (193 loc) · 7.73 KB
/
excel-as-json.coffee
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
# Create a list of json objects; 1 object per excel sheet row
#
# Assume: Excel spreadsheet is a rectangle of data, where the first row is
# object keys and remaining rows are object values and the desired json
# is a list of objects. Alternatively, data may be column oriented with
# col 0 containing key names.
#
# Dotted notation: Key row (0) containing firstName, lastName, address.street,
# address.city, address.state, address.zip would produce, per row, a doc with
# first and last names and an embedded doc named address, with the address.
#
# Arrays: may be indexed (phones[0].number) or flat (aliases[]). Indexed
# arrays imply a list of objects. Flat arrays imply a semicolon delimited list.
#
# USE:
# From a shell
# coffee src/excel-as-json.coffee
#
fs = require 'fs'
path = require 'path'
excel = require 'excel'
BOOLTEXT = ['true', 'false']
BOOLVALS = {'true': true, 'false': false}
isArray = (obj) ->
Object.prototype.toString.call(obj) is '[object Array]'
# Extract key name and array index from names[1] or names[]
# return [keyIsList, keyName, index]
# for names[1] return [true, keyName, index]
# for names[] return [true, keyName, undefined]
# for names return [false, keyName, undefined]
parseKeyName = (key) ->
index = key.match(/\[(\d+)\]$/)
switch
when index then [true, key.split('[')[0], Number(index[1])]
when key[-2..] is '[]' then [true, key[...-2], undefined]
else [false, key, undefined]
# Convert a list of values to a list of more native forms
convertValueList = (list, options) ->
(convertValue(item, options) for item in list)
# Convert values to native types
# Note: all values from the excel module are text
convertValue = (value, options) ->
# isFinite returns true for empty or blank strings, check for those first
if value.length == 0 || !/\S/.test(value)
value
else if isFinite(value)
if options.convertTextToNumber
Number(value)
else
value
else
testVal = value.toLowerCase()
if testVal in BOOLTEXT
BOOLVALS[testVal]
else
value
# Assign a value to a dotted property key - set values on sub-objects
assign = (obj, key, value, options) ->
# On first call, a key is a string. Recursed calls, a key is an array
key = key.split '.' unless typeof key is 'object'
# Array element accessors look like phones[0].type or aliases[]
[keyIsList, keyName, index] = parseKeyName key.shift()
if key.length
if keyIsList
# if our object is already an array, ensure an object exists for this index
if isArray obj[keyName]
unless obj[keyName][index]
obj[keyName].push({}) for i in [obj[keyName].length..index]
# else set this value to an array large enough to contain this index
else
obj[keyName] = ({} for i in [0..index])
assign obj[keyName][index], key, value, options
else
obj[keyName] ?= {}
assign obj[keyName], key, value, options
else
if keyIsList and index?
console.error "WARNING: Unexpected key path terminal containing an indexed list for <#{keyName}>"
console.error "WARNING: Indexed arrays indicate a list of objects and should not be the last element in a key path"
console.error "WARNING: The last element of a key path should be a key name or flat array. E.g. alias, aliases[]"
if (keyIsList and not index?)
if value != ''
obj[keyName] = convertValueList(value.split(';'), options)
else if !options.omitEmptyFields
obj[keyName] = []
else
if !(options.omitEmptyFields && value == '')
obj[keyName] = convertValue(value, options)
# Transpose a 2D array
transpose = (matrix) ->
(t[i] for t in matrix) for i in [0...matrix[0].length]
# Convert 2D array to nested objects. If row oriented data, row 0 is dotted key names.
# Column oriented data is transposed
convert = (data, options) ->
data = transpose data if options.isColOriented
keys = data[0]
rows = data[1..]
result = []
for row in rows
item = {}
assign(item, keys[index], value, options) for value, index in row
result.push item
return result
# Write JSON encoded data to file
# call back is callback(err)
write = (data, dst, callback) ->
# Create the target directory if it does not exist
dir = path.dirname(dst)
fs.mkdirSync dir if !fs.existsSync(dir)
fs.writeFile dst, JSON.stringify(data, null, 2), (err) ->
if err then callback "Error writing file #{dst}: #{err}"
else callback undefined
# src: xlsx file that we will read sheet 0 of
# dst: file path to write json to. If null, simply return the result
# options: see below
# callback(err, data): callback for completion notification
#
# options:
# sheet: string; 1: numeric, 1-based index of target sheet
# isColOriented: boolean: false; are objects stored in excel columns; key names in col A
# omitEmptyFields: boolean: false: do not include keys with empty values in json output. empty values are stored as ''
# TODO: this is probably better named omitKeysWithEmptyValues
# convertTextToNumber boolean: true; if text looks like a number, convert it to a number
#
# convertExcel(src, dst) <br/>
# will write a row oriented xlsx sheet 1 to `dst` as JSON with no notification
# convertExcel(src, dst, {isColOriented: true}) <br/>
# will write a col oriented xlsx sheet 1 to file with no notification
# convertExcel(src, dst, {isColOriented: true}, callback) <br/>
# will write a col oriented xlsx to file and notify with errors and parsed data
# convertExcel(src, null, null, callback) <br/>
# will parse a row oriented xslx using default options and return errors and the parsed data in the callback
#
_DEFAULT_OPTIONS =
sheet: '1'
isColOriented: false
omitEmptyFields: false
convertTextToNumber: true
# Ensure options sane, provide defaults as appropriate
_validateOptions = (options) ->
if !options
options = _DEFAULT_OPTIONS
else
if !options.hasOwnProperty('sheet')
options.sheet = '1'
else
# ensure sheet is a text representation of a number
if !isNaN(parseFloat(options.sheet)) && isFinite(options.sheet)
if options.sheet < 1
options.sheet = '1'
else
# could be 3 or '3'; force to be '3'
options.sheet = '' + options.sheet
else
# something bizarre like true, [Function: isNaN], etc
options.sheet = '1'
if !options.hasOwnProperty('isColOriented')
options.isColOriented = false
if !options.hasOwnProperty('omitEmptyFields')
options.omitEmptyFields = false
if !options.hasOwnProperty('convertTextToNumber')
options.convertTextToNumber = true
options
processFile = (src, dst, options=_DEFAULT_OPTIONS, callback=undefined) ->
options = _validateOptions(options)
# provide a callback if the user did not
if !callback then callback = (err, data) ->
# NOTE: 'excel' does not properly bubble file not found and prints
# an ugly error we can't trap, so look for this common error first
if not fs.existsSync src
callback "Cannot find src file #{src}"
else
excel src, options.sheet, (err, data) ->
if err
callback "Error reading #{src}: #{err}"
else
result = convert data, options
if dst
write result, dst, (err) ->
if err then callback err
else callback undefined, result
else
callback undefined, result
# This is the single expected module entry point
exports.processFile = processFile
# Unsupported use
# Exposing remaining functionality for unexpected use cases, testing, etc.
exports.assign = assign
exports.convert = convert
exports.convertValue = convertValue
exports.parseKeyName = parseKeyName
exports._validateOptions = _validateOptions
exports.transpose = transpose