Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add new generateLicenseXml algo and script to beautify the xml #132

Merged
merged 1 commit into from
Aug 27, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
256 changes: 256 additions & 0 deletions src/app/formatxml.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,256 @@
#!/usr/bin/env python3
#
# quick-n-dirty formatter for SPDX licenses in XML format
#
# Copyright (c) 2017 Alexios Zavras
# SPDX-License-Identifier: MIT
#

#-----------------------------------------------------------------
# configuration parameters, self-explanatory :-)
# they are simply defaults; can be overwritten by command-line options

INDENT = 2
LINE_LENGTH = 80

# which tags are inline and which appear on their own lines
TAGS_inline = [
'alt',
'b',
'br',
'copyright',
'url',

'crossRef',
'bullet',

]
TAGS_block = [
'body',
'header',
'li',
'license',
'list',
'notes',
'optional',
'p',
'SPDX',
'title',
'urls',

'SPDXLicenseCollection',
'license',
'crossRefs',
'standardLicenseHeader',
'notes',
'titleText',
'item',
'copyrightText',
'text'

]

# attributes for tags, in the order we want them to appear
ATTRS_SEQ = {
'SPDXLicenseCollection': [
'xmlns',
],
'license': [
'isOsiApproved',
'licenseId',
'name',
'listVersionAdded',
],
'alt': [
'name',
'match',
],
}

# namespace for all tags
NAMESPACE_URL = 'http://www.spdx.org/license'
NAMESPACE='{http://www.spdx.org/license}'

#-----------------------------------------------------------------

VERSION = '1.0'

import argparse
import datetime
import logging
import re
import shutil
import sys
import xml.etree.ElementTree as et

NL = '\n'
XML_PROLOG = """<?xml version="1.0" encoding="UTF-8"?>"""

logging.basicConfig(filename="error.log", format="%(levelname)s : %(asctime)s : %(message)s")
logger = logging.getLogger()

def process(fname):
tree = et.parse(fname)
root = tree.getroot()
if root.tag == 'spdx':
root.tag = 'SPDX'
logger.error('changing root element to SPDX (capital letters)')
#ts = '{:%Y%m%d%H%M%S%z}'.format(datetime.datetime.now())
root.set('xmlns', NAMESPACE_URL)
blocks = pretty(root, 0)
ser = fmt(blocks)

with open(fname, 'w') as f:
f.write(XML_PROLOG+"\n")
f.write(ser)



def pretty(node, level):
ser = ''
tag = node.tag
if tag.startswith(NAMESPACE):
tag = tag[len(NAMESPACE):]
text = singlespaceline(node.text)
tail = singlespaceline(node.tail)
# print("\t", level, tag, 'text=', text, 'tail=', tail, node.attrib)
start_tag = "<" + tag
if node.attrib:
for a in ATTRS_SEQ[tag]:
if a in node.attrib:
start_tag += ' {}="{}"'.format(a, node.attrib[a])
del node.attrib[a]
if node.attrib:
logger.error('more attrs remaining in {}: {}'.format(tag, node.attrib.keys()))
start_tag += ">"
end_tag = "</" + tag + ">"
if tag in config['block']:
child_level = level + 1
before = '{0}{1}#{2}{0}{3}#'.format(NL, level, start_tag, child_level)
after = '{0}{1}#{2}{0}'.format(NL, level, end_tag)
elif tag in config['inline']:
child_level = level
before = start_tag
after = '{1}{0}{2}#'.format(NL, end_tag, level)
else:
logger.error('Tag "{}" neither block nor inline!'.format(tag))
child_level = level
before = start_tag
after = end_tag
ser += before
if text:
text = text.replace('&', '&amp;').replace('>', '&gt;').replace('<', '&lt;')
ser += text
for child in node:
ser += pretty(child, child_level)
ser += after
if tail:
ser += tail
ser = ser.replace('\n\n', '\n')
return ser

def fmt(blocks):
bregexp = re.compile(r'((?P<level>\d+)#)?(?P<paragraph>.*)')
ser = ''
for line in blocks.split('\n'):
if line == '':
continue
m = bregexp.match(line)
if m.group('level'):
l = int(m.group('level'))
else:
logger.error('Block without level: "{}"'.format(line))
par = m.group('paragraph')
if par == '':
continue
indent = l * config['lvl_indent']
width = config['max_width'] - indent
for fmtline in to_lines(par, width):
ser += indent * ' ' + fmtline + '\n'
return ser


def to_lines(text, width):
words = text.split()
count = len(words)
last_offset = 0
offsets = [last_offset]
for w in words:
last_offset += len(w)
offsets.append(last_offset)

cost = [0] + [10 ** 20] * count
breaks = [0] + [0] * count
for i in range(count):
j = i + 1
while j <= count:
w = offsets[j] - offsets[i] + j - i - 1
if w > width:
break
penalty = cost[i] + (width - w) ** 2
if penalty < cost[j]:
cost[j] = penalty
breaks[j] = i
j += 1
lines = []
last = count
while last > 0:
first = breaks[last]
lines.append(' '.join(words[first:last]))
last = first
lines.reverse()
return lines


def singlespaceline(txt):
if txt:
txt = txt.strip()
txt = re.sub(r'\s+', ' ', txt)
return txt


# main program

if NAMESPACE:
full_TAGS_inline = list(NAMESPACE+e for e in TAGS_inline)
full_TAGS_block = list(NAMESPACE+e for e in TAGS_block)
full_ATTRS_SEQ = dict((NAMESPACE+k, v) for k,v in ATTRS_SEQ.items())

if __name__ == '__main__':

parser = argparse.ArgumentParser(
description='Indent XML file(s)')
parser.add_argument('filename', nargs='+',
help='the XML files to process')
parser.add_argument('-w', '--width', action='store', type=int,
default = LINE_LENGTH,
help='the maximum width of the lines in output')
parser.add_argument('-i', '--indent', action='store', type=int,
default = INDENT,
help='the number of spaces each level is indented')
parser.add_argument('--inline-tags', action='store',
help='space-separated list of tags to be rendered inline')
parser.add_argument('--block-tags', action='store',
help='space-separated list of tags to be rendered as blocks')
parser.add_argument('-V', '--version', action='version',
version='%(prog)s ' + VERSION,
help='print the program version')

args = parser.parse_args()

config = dict()
config['inline'] = TAGS_inline
config['block'] = TAGS_block
config['max_width'] = args.width
config['lvl_indent'] = args.indent
if args.inline_tags:
config['inline'] = args.inline_tags.split()
if args.block_tags:
config['block'] = args.block_tags.split()

for fname in args.filename:
try:
process(fname)
except et.ParseError as e:
logger.error('XML Parse Error: ' + str(e))
print('XML Parse Error: ' + str(e))
Loading