-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathflic
executable file
·447 lines (366 loc) · 16.2 KB
/
flic
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
#!/usr/bin/python -B
# -*- coding: utf-8 -*-
###############################################################################
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
###############################################################################
"""
flic - Format LICences.
Usage:
flic --input [input file] --template [template file] > [output file]
Options:
--template Name of the template to use (in flic_templates dir)
--input License information JSON file (output of slic)
(Can be specified more than once)
This script is quite specific to the needs of generating the license info
from the B2G codebase for Firefox OS, but could be adapted. It does
copyright holder canonicalization and amalgamation, for example, which
not everyone may want.
"""
_version_ = (1, 0, 0)
import logging
logging.basicConfig(filename="flic.log")
log = logging.getLogger("flic")
import sys
import re
import json
import argparse
from os.path import dirname, exists, join, basename, split
from jinja2 import Environment, PackageLoader
import textwrap
import cgi
from slic_results import SlicResults
import utils
# A function used in the template
def template_exists(template):
log.debug("Looking for template: %s" % template)
path = join(dirname(sys.argv[0]), "flic_templates", template)
return exists(path)
# A mapping to reduce the number of times you get a given copyright holder
# listed "twice" because their name has multiple forms
canonical_holders = {
'Silicon Graphics': 'Silicon Graphics Computer Systems, Inc.',
'The Regents of the University of California. All rights reserved.':
'Regents of the University of California. All rights reserved.',
'Mozilla Foundation.': 'Mozilla Foundation',
'Android Open Source Project': 'The Android Open Source Project',
'The Android Open Source Project All rights reserved.': 'The Android Open Source Project. All rights reserved.',
'Student Information Processing Board of the Massachusetts Institute of Technology.':
'by the Student Information Processing Board of the Massachusetts Institute of Technology',
'World Wide Web Consortium, (Massachusetts Institute of Technology, Institut National de Recherche en Informatique et en Automatique, Keio University). All':
'World Wide Web Consortium, (Massachusetts Institute of Technology, Institut National de Recherche en Informatique et en Automatique, Keio University). All Rights Reserved.',
'Renesas Technology.': 'Renesas Technology',
}
def tidy_holder(holder):
# Ick. Prevent "obvious" duplications of copyright holders
if holder in canonical_holders:
holder = canonical_holders[holder]
return holder
# Take an array of integer years and turn it into a comma-separated string
# list of years and ranges
def _join_years(years):
if not years:
return ""
# uniq
set = {}
map(set.__setitem__, years, [])
years = set.keys()
# sort
years.sort()
years_as_str = []
range_start = None
for i in range(len(years) - 1):
if years[i + 1] == years[i] + 1:
if not range_start:
range_start = years[i]
elif range_start:
# No range continuation, pending value; finish range
years_as_str.append("%i-%i" % (range_start, years[i]))
range_start = None
else:
# No range continuation, no pending value
years_as_str.append(str(years[i]))
# Final year
if range_start:
# Pending value; finish range
years_as_str.append("%i-%i" % (range_start, years[-1]))
else:
# No pending value
years_as_str.append(str(years[-1]))
return ", ".join(years_as_str)
# Take a string list of years and ranges and turn it into an array of integer
# years
def _split_years(string):
if not string:
return []
years = []
for piece in string.split(','):
if re.search("^\s*$", piece):
# Blank line
continue
cw_piece = utils.collapse(piece)
if re.search("-", piece):
# Range
rng = piece.split('-')
if not rng[0] or not rng[1]:
continue
if (re.search("^\s*$", rng[0])):
continue
if (re.search("^\s*$", rng[1])):
years.append(_canonicalize_year(rng[0]))
continue
start = _canonicalize_year(rng[0])
end = _canonicalize_year(rng[1])
if start < 1970 or end > 2030:
continue
for i in range(start, end + 1):
years.append(i)
elif len(cw_piece) > 5:
# Space-separated years? 5 rather than 4 to try and deal with
# foolish typos such as "20010".
sp_years = [_canonicalize_year(year) for year in piece.split()]
years.extend(sp_years)
elif len(cw_piece) == 4 or len(cw_piece) == 2:
# Single year
years.append(_canonicalize_year(piece))
else:
log.warning("Year with strange length: '%s'" % cw_piece)
return years
# Make string year an integer, and expand from 2 digits to 4 if necessary
def _canonicalize_year(year):
assert year != ''
year = int(year)
if year > 100 and year < 1970:
log.warning("Strange year: '%s'" % year)
if year < 100:
if year > 70:
year = 1900 + year
else:
year = 2000 + year
return year
###############################################################################
# main()
###############################################################################
def main(argv):
env = Environment(loader=PackageLoader('__main__', 'flic_templates'),
trim_blocks=True,
extensions=['jinja2.ext.do'])
parser = argparse.ArgumentParser(description=\
'Collate and process license information.')
parser.add_argument('--template', metavar="<template>",
help='render the given template name')
parser.add_argument('-i', '--input', metavar="<input file>", action="append",
help='JSON output of "slic" program')
parser.add_argument('-V', '--version', action="store_true",
help='show version number and exit')
parser.add_argument('-d', '--debug', action="store_true",
help='output debugging info to flic.log')
args = parser.parse_args()
if args.version:
ver = '.'.join([str(part) for part in _version_])
print "flic %s" % ver
return
if args.debug:
log.setLevel(logging.DEBUG)
# Load occurrence data file
bytag = SlicResults()
for filename in args.input:
bytag.load_json(filename)
# For some licenses, we have a specific set text and so even if small
# variants are found, we choose to ignore them and amalgamate all the
# files and copyright holders into a single entry
single_entry_licenses = ['BSD-4-Clause_RTFM',
'BSDProtection',
'FTL_fulltext',
'IJG_urlref',
'BSD\|GPL-2.0',
'MirOS',
'BSD-3-Clause_urlref',
'W3C_urlref',
'MIT_urlref',
'BSL-1.0_urlref',
]
s_e_ls = bytag.pop_by_re(single_entry_licenses)
s_e_ls.unify()
bytag.update(s_e_ls)
# Convert text to HTML, including inserting 2x<br> at para breaks for
# formatting on small screens
for data in bytag.itervalues():
if not 'text' in data:
continue
html = "\n".join(data['text'])
html = cgi.escape(html)
# Empty line before bullets
html = re.sub(r"\n\s*([0-9]\.|\*|-)", r"\n\n \1", html)
# Empty line above acknowledgement text
html = re.sub(r"(acknowledge?ment:\n)", r"\1\n", html)
# While we're at it...
html = re.sub("``", '“', html)
html = re.sub("''", '”', html)
# Replace all empty lines by double <br>s
html = re.sub(r"(\n){2,}", '<br><br>', html)
data['html'] = html
# Post-process and amalgamate copyright lines
# \xa9 is the copyright symbol
copy_re = re.compile("""Copyright(s|ed)?:?\s*
(\(C\)|\xa9)?\s*
(?P<years>[-\d,\s]*)\s
(?P<holder>.*)$""",
re.IGNORECASE | re.VERBOSE)
for data in bytag.itervalues():
if not 'copyrights' in data:
continue
copyrights = data['copyrights']
# Amalgamate years
holders = {}
for i in range(len(copyrights)):
match = copy_re.search(copyrights[i])
if match:
hits = match.groupdict()
log.info("Hits: %r" % hits)
holder = tidy_holder(hits['holder'])
years = _split_years(hits['years'])
if holder in holders:
holders[holder].extend(years)
else:
holders[holder] = years
else:
log.warning("(C) line doesn't match re: %s" % copyrights[i])
# Rebuild copyright lines
clean_copyrights = []
for holder in holders:
log.debug("Years: %r" % holders[holder])
years = _join_years(holders[holder])
spacer = " " if years else ""
copyright = u"Copyright \xa9 %s%s%s" % (years,
spacer,
holder)
log.debug("Clean C line: %s" % copyright)
clean_copyrights.append(copyright)
data['copyrights'] = clean_copyrights
# Reconcile Bison exception, which can be in a different comment to the GPL
# license and so is noted as a different "license" :-|
def resolve_bison_exception(bisonfile):
log.debug("Trying to resolve bisonexception for %s" % bisonfile)
for tag in bytag:
if not re.search("^GPL", tag):
continue
if re.search("^GPL-1\.0\+-with-bison-exception", tag):
continue
for data in bytag[tag]:
gplfiles = data['files']
for gplfile in gplfiles:
if gplfile == bisonfile:
log.info("Resolved bisonexception for %s" % bisonfile)
gplfiles.remove(gplfile)
# Make sure GPL goes away if there are no GPLed files
# left
if not gplfiles:
bytag[tag].remove(data)
if not bytag[tag]:
del(bytag[tag])
return True
log.warning("Unable to resolve bisonexception for %s" % bisonfile)
return False
if 'GPL-1.0+-with-bison-exception' in bytag:
for data in bytag['GPL-1.0+-with-bison-exception']:
bisonfiles = data['files']
for bisonfile in bisonfiles:
resolve_bison_exception(bisonfile)
# Sometimes a file header says "see file FOO for the license". We give all
# such licenses in slic a tag which includes the string 'fileref'. We must
# now go through all files so tagged and make sure that a corresponding
# license file has been found and included.
# Make a hash lookup table of all files found which match any of the special
# filenames. It's always possible that the Android people will have renamed
# the file to "NOTICE"...
fileref_names = {
'COPYING_fileref': ['COPYING', 'NOTICE'],
'COPYING_fileref2': ['COPYING', 'NOTICE'],
'COPYRIGHT_fileref': ['COPYRIGHT', 'NOTICE'],
'LICENSE_fileref': ['LICENSE', 'NOTICE'],
'bzip2-1.0.6_fileref': ['LICENSE', 'NOTICE'],
'BSD-3-Clause_fileref_xiph': ['COPYING', 'NOTICE'],
'BSD_fileref': ['LICENSE', 'NOTICE', 'README'],
'BSD_fileref_2': ['LICENSE.txt'], # sic
'BSD_fileref_3': ['README'],
'MIT_fileref': ['COPYING', 'NOTICE'],
'MIT|GPL-2.0_fileref': ['MIT-LICENSE.txt', 'NOTICE'],
'FTL_fileref': ['LICENSE.txt', 'docs/FTL.TXT', 'NOTICE'],
'ISC_fileref': ['LICENSE', 'NOTICE'],
'IJG_fileref': ['README'],
'jsimdext_fileref': ['jsimdext.inc'],
# .po files have some "same license as" boilerplate
'po_fileref': ['COPYING', 'LICENSE', 'NOTICE'],
'Zlib_fileref': ['zlib.h'],
'Libpng_fileref': ['png.h'],
'LICENSE.txt_fileref': ['LICENSE.txt']
}
# Unique
unique_filenames = {}
for fileref in fileref_names:
map(unique_filenames.__setitem__, fileref_names[fileref], [])
license_files = {}
license_files_re = re.compile("^(" + "|".join(unique_filenames.keys()) + ")")
# Gather list of "LICENSE"-type files
for data in bytag.itervalues():
files = data['files']
for file in files:
filename = basename(file)
if license_files_re.match(filename):
license_files[file] = data['tag']
# For each file marked as having a "fileref" license, see if an appropriate
# file in a higher directory is present and has been included somewhere.
# Assuming it has, we can ignore the fileref file itself because the
# license has been noted when we included the referred-to file.
def find_license_file_for(file, license_file_names):
log.debug("File path: %s" % file)
for license_file_name in license_file_names:
log.debug("Trying to find license file: %s" % license_file_name)
dir = dirname(file)
log.debug("Starting directory: %s" % dir)
while dir != "." and dir != "" and dir != "./gecko":
log.debug("Looking for %s" % join(dir, license_file_name))
if join(dir, license_file_name) in license_files:
log.debug("Found license %s in dir: %s" % (license_file_name, dir))
return True
# Up one level
dir = re.sub("/$", "", dir)
dir = dirname(dir)
log.debug("Moving up to dir: %s" % dir)
log.debug("Found no license file for %s" % file)
return False
fileref_problem_files = []
fileref_problem_dirs = {}
for tag in bytag:
if not re.search("fileref", tag):
continue
if not tag in fileref_names:
log.warning("No license file info for fileref tag '%s'" % tag)
continue
license_file_names = fileref_names[tag]
for data in bytag[tag]:
log.debug("Checking filerefs for tag %s" % tag)
for file in data['files']:
# We ignore some copies of header files which are also found
# elsewhere
log.info(file)
if re.search("xulrunner-sdk", file):
continue
if not find_license_file_for(file, license_file_names):
fileref_problem_files.append(file)
fileref_problem_dirs[split(file)[0]] = 1
# Render output
if args.template:
template = env.get_template(args.template)
log.info("Rendering")
print template.render({
'licenses': bytag,
'template_exists': template_exists,
'fileref_problem_files': fileref_problem_files,
'fileref_problem_dirs': fileref_problem_dirs,
'license_files': license_files
}).encode('utf-8')
if __name__ == "__main__":
sys.exit(main(sys.argv))