-
Notifications
You must be signed in to change notification settings - Fork 58
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add new generateLicenseXml algo and script to beautify the xml
- Loading branch information
Showing
8 changed files
with
526 additions
and
104 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,256 @@ | ||
#!/usr/bin/env python3 | ||
# | ||
# quick-n-dirty formatter for SPDX licenses in XML format | ||
# | ||
# Copyright (c) 2017 Alexios Zavras | ||
# SPDX-License-Identifier: MIT | ||
# | ||
|
||
#----------------------------------------------------------------- | ||
# configuration parameters, self-explanatory :-) | ||
# they are simply defaults; can be overwritten by command-line options | ||
|
||
INDENT = 2 | ||
LINE_LENGTH = 80 | ||
|
||
# which tags are inline and which appear on their own lines | ||
TAGS_inline = [ | ||
'alt', | ||
'b', | ||
'br', | ||
'copyright', | ||
'url', | ||
|
||
'crossRef', | ||
'bullet', | ||
|
||
] | ||
TAGS_block = [ | ||
'body', | ||
'header', | ||
'li', | ||
'license', | ||
'list', | ||
'notes', | ||
'optional', | ||
'p', | ||
'SPDX', | ||
'title', | ||
'urls', | ||
|
||
'SPDXLicenseCollection', | ||
'license', | ||
'crossRefs', | ||
'standardLicenseHeader', | ||
'notes', | ||
'titleText', | ||
'item', | ||
'copyrightText', | ||
'text' | ||
|
||
] | ||
|
||
# attributes for tags, in the order we want them to appear | ||
ATTRS_SEQ = { | ||
'SPDXLicenseCollection': [ | ||
'xmlns', | ||
], | ||
'license': [ | ||
'isOsiApproved', | ||
'licenseId', | ||
'name', | ||
'listVersionAdded', | ||
], | ||
'alt': [ | ||
'name', | ||
'match', | ||
], | ||
} | ||
|
||
# namespace for all tags | ||
NAMESPACE_URL = 'http://www.spdx.org/license' | ||
NAMESPACE='{http://www.spdx.org/license}' | ||
|
||
#----------------------------------------------------------------- | ||
|
||
VERSION = '1.0' | ||
|
||
import argparse | ||
import datetime | ||
import logging | ||
import re | ||
import shutil | ||
import sys | ||
import xml.etree.ElementTree as et | ||
|
||
NL = '\n' | ||
XML_PROLOG = """<?xml version="1.0" encoding="UTF-8"?>""" | ||
|
||
logging.basicConfig(filename="error.log", format="%(levelname)s : %(asctime)s : %(message)s") | ||
logger = logging.getLogger() | ||
|
||
def process(fname): | ||
tree = et.parse(fname) | ||
root = tree.getroot() | ||
if root.tag == 'spdx': | ||
root.tag = 'SPDX' | ||
logger.error('changing root element to SPDX (capital letters)') | ||
#ts = '{:%Y%m%d%H%M%S%z}'.format(datetime.datetime.now()) | ||
root.set('xmlns', NAMESPACE_URL) | ||
blocks = pretty(root, 0) | ||
ser = fmt(blocks) | ||
|
||
with open(fname, 'w') as f: | ||
f.write(XML_PROLOG+"\n") | ||
f.write(ser) | ||
|
||
|
||
|
||
def pretty(node, level): | ||
ser = '' | ||
tag = node.tag | ||
if tag.startswith(NAMESPACE): | ||
tag = tag[len(NAMESPACE):] | ||
text = singlespaceline(node.text) | ||
tail = singlespaceline(node.tail) | ||
# print("\t", level, tag, 'text=', text, 'tail=', tail, node.attrib) | ||
start_tag = "<" + tag | ||
if node.attrib: | ||
for a in ATTRS_SEQ[tag]: | ||
if a in node.attrib: | ||
start_tag += ' {}="{}"'.format(a, node.attrib[a]) | ||
del node.attrib[a] | ||
if node.attrib: | ||
logger.error('more attrs remaining in {}: {}'.format(tag, node.attrib.keys())) | ||
start_tag += ">" | ||
end_tag = "</" + tag + ">" | ||
if tag in config['block']: | ||
child_level = level + 1 | ||
before = '{0}{1}#{2}{0}{3}#'.format(NL, level, start_tag, child_level) | ||
after = '{0}{1}#{2}{0}'.format(NL, level, end_tag) | ||
elif tag in config['inline']: | ||
child_level = level | ||
before = start_tag | ||
after = '{1}{0}{2}#'.format(NL, end_tag, level) | ||
else: | ||
logger.error('Tag "{}" neither block nor inline!'.format(tag)) | ||
child_level = level | ||
before = start_tag | ||
after = end_tag | ||
ser += before | ||
if text: | ||
text = text.replace('&', '&').replace('>', '>').replace('<', '<') | ||
ser += text | ||
for child in node: | ||
ser += pretty(child, child_level) | ||
ser += after | ||
if tail: | ||
ser += tail | ||
ser = ser.replace('\n\n', '\n') | ||
return ser | ||
|
||
def fmt(blocks): | ||
bregexp = re.compile(r'((?P<level>\d+)#)?(?P<paragraph>.*)') | ||
ser = '' | ||
for line in blocks.split('\n'): | ||
if line == '': | ||
continue | ||
m = bregexp.match(line) | ||
if m.group('level'): | ||
l = int(m.group('level')) | ||
else: | ||
logger.error('Block without level: "{}"'.format(line)) | ||
par = m.group('paragraph') | ||
if par == '': | ||
continue | ||
indent = l * config['lvl_indent'] | ||
width = config['max_width'] - indent | ||
for fmtline in to_lines(par, width): | ||
ser += indent * ' ' + fmtline + '\n' | ||
return ser | ||
|
||
|
||
def to_lines(text, width): | ||
words = text.split() | ||
count = len(words) | ||
last_offset = 0 | ||
offsets = [last_offset] | ||
for w in words: | ||
last_offset += len(w) | ||
offsets.append(last_offset) | ||
|
||
cost = [0] + [10 ** 20] * count | ||
breaks = [0] + [0] * count | ||
for i in range(count): | ||
j = i + 1 | ||
while j <= count: | ||
w = offsets[j] - offsets[i] + j - i - 1 | ||
if w > width: | ||
break | ||
penalty = cost[i] + (width - w) ** 2 | ||
if penalty < cost[j]: | ||
cost[j] = penalty | ||
breaks[j] = i | ||
j += 1 | ||
lines = [] | ||
last = count | ||
while last > 0: | ||
first = breaks[last] | ||
lines.append(' '.join(words[first:last])) | ||
last = first | ||
lines.reverse() | ||
return lines | ||
|
||
|
||
def singlespaceline(txt): | ||
if txt: | ||
txt = txt.strip() | ||
txt = re.sub(r'\s+', ' ', txt) | ||
return txt | ||
|
||
|
||
# main program | ||
|
||
if NAMESPACE: | ||
full_TAGS_inline = list(NAMESPACE+e for e in TAGS_inline) | ||
full_TAGS_block = list(NAMESPACE+e for e in TAGS_block) | ||
full_ATTRS_SEQ = dict((NAMESPACE+k, v) for k,v in ATTRS_SEQ.items()) | ||
|
||
if __name__ == '__main__': | ||
|
||
parser = argparse.ArgumentParser( | ||
description='Indent XML file(s)') | ||
parser.add_argument('filename', nargs='+', | ||
help='the XML files to process') | ||
parser.add_argument('-w', '--width', action='store', type=int, | ||
default = LINE_LENGTH, | ||
help='the maximum width of the lines in output') | ||
parser.add_argument('-i', '--indent', action='store', type=int, | ||
default = INDENT, | ||
help='the number of spaces each level is indented') | ||
parser.add_argument('--inline-tags', action='store', | ||
help='space-separated list of tags to be rendered inline') | ||
parser.add_argument('--block-tags', action='store', | ||
help='space-separated list of tags to be rendered as blocks') | ||
parser.add_argument('-V', '--version', action='version', | ||
version='%(prog)s ' + VERSION, | ||
help='print the program version') | ||
|
||
args = parser.parse_args() | ||
|
||
config = dict() | ||
config['inline'] = TAGS_inline | ||
config['block'] = TAGS_block | ||
config['max_width'] = args.width | ||
config['lvl_indent'] = args.indent | ||
if args.inline_tags: | ||
config['inline'] = args.inline_tags.split() | ||
if args.block_tags: | ||
config['block'] = args.block_tags.split() | ||
|
||
for fname in args.filename: | ||
try: | ||
process(fname) | ||
except et.ParseError as e: | ||
logger.error('XML Parse Error: ' + str(e)) | ||
print('XML Parse Error: ' + str(e)) |
Oops, something went wrong.