Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: strict mode for parsing #55

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,8 @@ Parsing::
doc = Document.parse(samplexml)
print(doc.trade.agreement.seller.name)

``Document.parse()`` taskes a boolean parameter ``strict`` which defaults to ``True``. This means that the parser will raise an error if it encounters any unknown element. If you set it to ``False``, the parser will not raise an error and parse whatever it can.

Generating::

import os
Expand Down Expand Up @@ -130,6 +132,7 @@ Generating::
with open("output.pdf", "wb") as f:
f.write(new_pdf_bytes)

``Document.serialize()`` will validate the generated XML against the specified schema and raise an error if it is not valid. If you want to avoid validation, you can set the ``schema`` parameter to ``None``.

Development
-----------
Expand Down
12 changes: 6 additions & 6 deletions drafthorse/models/container.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,9 @@ def get_tag(self):
def empty_element(self):
return self.child_type()

def add_from_etree(self, root):
def add_from_etree(self, root, strict=True):
childel = self.empty_element()
childel.from_etree(root)
childel.from_etree(root, strict)
self.add(childel)


Expand All @@ -46,7 +46,7 @@ def append_to(self, node):
self.set_element(el, child)
el.append_to(node)

def add_from_etree(self, root):
def add_from_etree(self, root, strict=True):
self.add(root.text)


Expand All @@ -60,7 +60,7 @@ def set_element(self, el, child):
el._amount = child[0]
el._currency = child[1]

def add_from_etree(self, root):
def add_from_etree(self, root, strict=True):
self.add((root.text, root.attrib.get("currencyID")))


Expand All @@ -74,7 +74,7 @@ def set_element(self, el, child):
el._text = child[1]
el._scheme_id = child[0]

def add_from_etree(self, root):
def add_from_etree(self, root, strict=True):
self.add((root.attrib["schemeID"], root.text))


Expand All @@ -87,5 +87,5 @@ def empty_element(self):
def set_element(self, el, child):
el._text = child

def add_from_etree(self, root):
def add_from_etree(self, root, strict=True):
self.add(root.text)
41 changes: 22 additions & 19 deletions drafthorse/models/elements.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@ def __setattr__(self, key, value):
)
return super().__setattr__(key, value)

def from_etree(self, root):
def from_etree(self, root, strict=True):
if (
hasattr(self, "Meta")
and hasattr(self.Meta, "namespace")
Expand All @@ -109,19 +109,19 @@ def from_etree(self, root):
if child.tag in field_index:
name, _childel = field_index[child.tag]
if isinstance(getattr(self, name), Container):
getattr(self, name).add_from_etree(child)
getattr(self, name).add_from_etree(child, strict)
else:
getattr(self, name).from_etree(child)
else:
getattr(self, name).from_etree(child, strict)
elif strict:
raise TypeError("Unknown element {}".format(child.tag))
return self

@classmethod
def parse(cls, xmlinput):
def parse(cls, xmlinput, strict=True):
from lxml import etree

root = etree.fromstring(xmlinput)
return cls().from_etree(root)
return cls().from_etree(root, strict)


class StringElement(Element):
Expand Down Expand Up @@ -149,7 +149,7 @@ def to_etree(self):
node.text = self._text
return node

def from_etree(self, root):
def from_etree(self, root, strict=True):
self._text = root.text
self._set_on_input = True
return self
Expand All @@ -168,7 +168,7 @@ def to_etree(self):
def __str__(self):
return self._value

def from_etree(self, root):
def from_etree(self, root, strict=True):
self._value = Decimal(root.text)
self._set_on_input = True
return self
Expand All @@ -189,7 +189,7 @@ def to_etree(self):
def __str__(self):
return "{} {}".format(self._amount, self._unit_code)

def from_etree(self, root):
def from_etree(self, root, strict=True):
self._amount = Decimal(root.text)
self._unit_code = root.attrib["unitCode"]
self._set_on_input = True
Expand All @@ -211,7 +211,7 @@ def to_etree(self):
del node.attrib["currencyID"]
return node

def from_etree(self, root):
def from_etree(self, root, strict=True):
self._amount = Decimal(root.text)
self._currency = root.attrib.get("currencyID") or None
self._set_on_input = True
Expand All @@ -235,7 +235,7 @@ def to_etree(self):
node.attrib["listVersionID"] = self._list_version_id
return node

def from_etree(self, root):
def from_etree(self, root, strict=True):
self._text = Decimal(root.text)
self._list_id = root.attrib["listID"]
self._list_version_id = root.attrib["listVersionID"]
Expand All @@ -260,7 +260,7 @@ def to_etree(self):
node.text = self._text
return node

def from_etree(self, root):
def from_etree(self, root, strict=True):
self._mime_code = root.attrib["mimeCode"]
self._filename = root.attrib["filename"]
self._text = root.text
Expand All @@ -283,7 +283,7 @@ def to_etree(self):
node.attrib["schemeAgencyID"] = self._scheme_id
return node

def from_etree(self, root):
def from_etree(self, root, strict=True):
self._text = root.text
self._scheme_id = root.attrib["schemeAgencyID"]
self._set_on_input = True
Expand All @@ -306,7 +306,7 @@ def to_etree(self):
node.attrib["schemeID"] = self._scheme_id
return node

def from_etree(self, root):
def from_etree(self, root, strict=True):
self._text = root.text
try:
self._scheme_id = root.attrib["schemeID"]
Expand Down Expand Up @@ -346,11 +346,14 @@ def to_etree(self):
t.append(node)
return t

def from_etree(self, root):
def from_etree(self, root, strict=True):
if len(root) != 1:
raise TypeError("Date containers should have one child")
if root[0].tag != "{%s}%s" % (self._date_time_namespace, "DateTimeString"):
raise TypeError("Tag %s not recognized" % root[0].tag)
if strict:
raise TypeError("Tag %s not recognized" % root[0].tag)
else:
return self
self._format = root[0].attrib["format"]
if self._format == "102":
self._value = datetime.strptime(root[0].text, "%Y%m%d").date()
Expand All @@ -362,7 +365,7 @@ def from_etree(self, root):
self._value = w.monday()
else:
self._value = datetime.strptime(root[0].text + "1", "%G%V%u").date()
else:
elif strict:
raise TypeError(
"Date format %s cannot be parsed" % root[0].attrib["format"]
)
Expand All @@ -384,7 +387,7 @@ def to_etree(self):
t.text = self._value.strftime("%Y-%m-%dT%H:%M:%S")
return t

def from_etree(self, root):
def from_etree(self, root, strict=True):
try:
self._value = datetime.strptime(root.text, "%Y-%m-%dT%H:%M:%S").date()
except Exception:
Expand Down Expand Up @@ -416,7 +419,7 @@ def to_etree(self):
def __str__(self):
return "{}".format(self._value)

def from_etree(self, root):
def from_etree(self, root, strict=True):
if len(root) != 1:
raise TypeError("Indicator containers should have one child")
if root[0].tag != "{%s}%s" % (NS_UDT, "Indicator"):
Expand Down