Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Adressing proposed changes #18

Merged
merged 3 commits into from
Nov 5, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 10 additions & 5 deletions py_mmd_tools/log_util.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,21 @@
"""
Tool for converting metadata from MMD format to ISO format using a specific xslt.
Utility methods to create a logger.
License:
This file is part of the S-ENDA-Prototype repository (https://github.com/metno/py-mmd-tools).
S-ENDA-Prototype is licensed under GPL-3.0 (https://github.com/metno/py-mmd-tools/blob/master/LICENSE)
This file is part of the py-mmd-tools repository (https://github.com/metno/py-mmd-tools).
licensed under the Apache License 2.0 (https://www.apache.org/licenses/LICENSE-2.0)
"""

import logging
import pathlib
import os

def get_logpath(logdirpath):
"""
Args:
logdirpath ([str]): [path to a directory where to store logs]
Returns:
[bool]: [return True if an existing and writable path to directory is provided]
"""
if not pathlib.Path(logdirpath).exists():
try:
pathlib.Path(logdirpath).mkdir(parents=True, exist_ok=True)
Expand All @@ -36,11 +42,10 @@ def setup_log(name, logdirpath, logtype='stream'):
fh.setLevel(logging.DEBUG)
fh.setFormatter(log_format)
logger.addHandler(fh)
return logger
if logtype=='stream':
# streamhandler
ch = logging.StreamHandler()
ch.setLevel(logging.INFO)
ch.setFormatter(log_format)
logger.addHandler(ch)
return logger
return logger
57 changes: 0 additions & 57 deletions py_mmd_tools/xml2xml.py

This file was deleted.

71 changes: 54 additions & 17 deletions py_mmd_tools/xml_util.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,17 @@
"""
Utility tool to work on mmd xml files.
License:
This file is part of the S-ENDA-Prototype repository (https://github.com/metno/py-mmd-tools).
S-ENDA-Prototype is licensed under GPL-3.0 (https://github.com/metno/py-mmd-tools/blob/master/LICENSE)
This file is part of py-mmd-tools, licensed under the Apache License 2.0 (https://www.apache.org/licenses/LICENSE-2.0)
"""

import pathlib
import time
import errno
import os
import lxml.etree as ET
from datetime import datetime
from lxml.etree import XMLSyntaxError
import errno
import os


def xml_check(xml_file):
Expand All @@ -27,11 +27,7 @@ def xml_check(xml_file):
xml = ET.parse(xml_file)
return True
except XMLSyntaxError:
#try:
# xml = ET.XML(bytes(bytearray(xml_file, encoding="utf-8")))
# return True
#except XMLSyntaxError:
raise # return False
raise

else:
raise FileNotFoundError(errno.ENOENT, os.strerror(errno.ENOENT), xml_file)
Expand All @@ -40,19 +36,60 @@ def xml_check(xml_file):
def xsd_check(xml_file, xsd_schema=None):
"""[validate xml file from filepath]
Args:
xmlfile ([str]): [filepath to an mmd xml file]
xmlfile ([str]): [filepath to an xml file]
xsd_schema ([str]): [filepath to an xsd schema file]
Returns:
[bool]: [return True if a valid xml filepath is provided,
return False if the xmlfile is invalid, empty, or doesn't exist ]
"""
if not pathlib.Path(xsd_schema).is_file() and os.path.getsize(xsd_schema) != 0:
xmlschema_mmd = ET.XMLSchema(ET.parse(xsd_schema))
xml_doc = ET.ElementTree(file=xml_file)
if not xmlschema_mmd.validate(xml_doc):
passing=False
else:
passing=True
return passing

def xml_translate(
xml_file,
outputfile,
xslt,
xsd_validation=False,
xsd_schema=None,
):
"""[Transform XML file using xslt]
Args:
xml_file ([str]): [filepath to an xml file]
xslt ([str]): [filepath to a xsl transformation file]
xsd_validation ([bool]): [if true, performs validation on the provided xml file - requires an xsd schema]
xsd_schema ([str]): [xsd schema file used if xsd_validation is True]
outputfile ([str]): [filepath to output iso xml file]
Returns:
[bool]: [return True if a the XML filepath provided is succesfully converted using the given XSLT,
return False if the xmlfile is invalid, or doesn't exist or the xsl transformation failed ]
"""
if not pathlib.Path(xml_file).exists():
raise FileNotFoundError(errno.ENOENT, os.strerror(errno.ENOENT), xml_file)

if xsd_validation:
if xsd_schema is None:
raise TypeError
if not pathlib.Path(xsd_schema).exists():
raise FileNotFoundError(errno.ENOENT, os.strerror(errno.ENOENT), xsd_schema)
else:
if not xsd_check(xml_file, xsd_schema=xsd_schema):
raise
else:
if pathlib.Path(xsd_schema).is_file() and xml_check(xsd_schema):
xmlschema_mmd = ET.XMLSchema(ET.parse(xsd_schema))
xml_doc = ET.ElementTree(file=xml_file)
if not xmlschema_mmd.validate(xml_doc):
return False
else:
return True
if not pathlib.Path(xslt).exists():
raise FileNotFoundError(errno.ENOENT, os.strerror(errno.ENOENT), xslt)
try:
xml_doc = ET.ElementTree(file=xml_file)
transform = ET.XSLT(ET.parse(xslt))
new_doc = transform(xml_doc)
except OSError:
result=False
xml_as_string = ET.tostring(new_doc, pretty_print=True, encoding="unicode")
with open(outputfile, "w") as output:
output.write(xml_as_string)
result=True
return result
35 changes: 25 additions & 10 deletions script/xmlconverter.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
import sys
import parmap
from lxml.etree import XMLSyntaxError
from xml2xml import xml2xml
from xml_translate import xml_translate

"""
Script to run the xmlc onversion tools
Expand Down Expand Up @@ -40,14 +40,14 @@ def filelist(directory):
xml_files.append(file_path)
return xml_files

def writerecord(xml_file, xslt, outdir="/tmp"):
def translate_and_write(xml_file, xslt, outdir="/tmp"):
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

A few things on this method. I don't like that you create a tmp folder without deleting it afterwards. It clutters up the user's system. Actually, it shouldn't even be possible to create /tmp on anything else than a docker container. The software should be agnostic about the platform it's running on.

I don't think you need the check in line 45 either, since the function will eventually fail anyway.

pathlib.Path(outdir).mkdir(parents=True, exist_ok=True)
if not os.path.isfile(xslt):
raise Exception("XSLT file is missing: %s" % xslt)
outputfile = pathlib.PurePosixPath(outdir).joinpath(
pathlib.PurePosixPath(xml_file).name
)
xml2xml(
xml_translate(
xml_file=xml_file,
outputfile=outputfile,
xslt=xslt,
Expand All @@ -58,12 +58,12 @@ def main(metadata, xslt, outdir, recover=False, parallel=False):
if not recover:
xmlfiles = filelist(metadata)
else:
xmlfiles = recover_task(metadata, outdir, parallel)
xmlfiles = recover_task(sourcedir=metadata, outdir=outdir, parallel=parallel)
print(f"Sprocessing {len(xmlfiles)} files")
if parallel is True:
print(f'parallel: {parallel}')
parmap.map(
writerecord,
translate_and_write,
xmlfiles,
xslt=xslt,
outdir=outdir,
Expand All @@ -72,18 +72,33 @@ def main(metadata, xslt, outdir, recover=False, parallel=False):
else:
for i in xmlfiles:
try:
writerecord(xml_file=i, xslt=xslt, outdir=outdir)
except XMLSyntaxError:
print(f"failed on: {i}")
translate_and_write(xml_file=i, xslt=xslt, outdir=outdir)
except XMLSyntaxError as e:
print(f"failed on: {i} - {e.message}")


def check_record(record, tobedone):
"""[Returns a filepath string is the file name is present in the input list]
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

where is this method used?

Args:
record ([str]): [filepath]
tobedone ([list]): [list of filenames]
Returns:
[str]: [filepath]
"""
if pathlib.Path(record).stem in tobedone:
return record


def recover_task(metadata, outdir, parallel=False):
total = filelist(metadata)
def recover_task(sourcedir, outdir, parallel=False):
"""[Return a list of filenames which are present in the sourcedir tree but not in outdir.]
Args:
sourcedir ([str]): [filepath to directory]
outdir ([str]): [filepath to directory]
parallel ([bool]): [True to performe the operation using multicore parallel processing]
Returns:
[list]: [list of strings]
"""
total = filelist(sourcedir)
already_done = filelist(outdir)
total_stem = [pathlib.Path(i).stem for i in total]
already_done_stem = [pathlib.Path(i).stem for i in already_done]
Expand Down
69 changes: 69 additions & 0 deletions tests/data/mmd-to-csw.xsl
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
<?xml version="1.0" encoding="utf-8"?>

<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xmlns:str="http://exslt.org/strings"
xmlns:gco="http://www.isotc211.org/2005/gco"
xmlns:gmd="http://www.isotc211.org/2005/gmd"
xmlns:gml="http://www.opengis.net/gml"
xmlns:mmd="http://www.met.no/schema/mmd"
xmlns:dif="http://gcmd.gsfc.nasa.gov/Aboutus/xml/dif/"
xmlns:mapping="http://www.met.no/schema/mmd/iso2mmd"
version="1.0">
<xsl:include href="mmd-to-iso.xsl" />
<xsl:output method="xml" encoding="UTF-8" indent="yes" />
<xsl:template match="mmd:data_access">

<xsl:element name="gmd:onLine">
<xsl:element name="gmd:CI_OnlineResource">

<xsl:element name="gmd:name">
<xsl:element name="gco:CharacterString">
<xsl:value-of select="mmd:name" />
</xsl:element>
</xsl:element>

<xsl:element name="gmd:description">
<xsl:element name="gco:CharacterString">
<xsl:choose>
<xsl:when test="mmd:type = 'OGC WMS'">
<xsl:value-of select="mmd:type" />
<xsl:value-of select="str:replace(mmd:description, ',', ':')" />
</xsl:when>
<xsl:otherwise>
<xsl:value-of select="mmd:type" />
<xsl:value-of select="mmd:description" />
</xsl:otherwise>
</xsl:choose>
</xsl:element>
</xsl:element>

<xsl:element name="gmd:protocol">
<xsl:element name="gco:CharacterString">
<!--xsl:value-of select="mmd:type" / -->
<xsl:variable name="mmd_da_type" select="normalize-space(./mmd:type)" />
<xsl:variable name="mmd_da_mapping" select="document('')/*/mapping:data_access_type[@mmd=$mmd_da_type]/@iso" />
<xsl:value-of select="$mmd_da_mapping" />
</xsl:element>
</xsl:element>

<xsl:element name="gmd:linkage">
<xsl:element name="gmd:URL">
<xsl:value-of select="mmd:resource" />
</xsl:element>
</xsl:element>

</xsl:element>
</xsl:element>

</xsl:template>

<!-- Mappings for data_access type specification -->
<mapping:data_access_type iso="OGC:WMS" mmd="OGC WMS" />
<mapping:data_access_type iso="OGC:WCS" mmd="OGC WCS" />
<mapping:data_access_type iso="OGC:WFS" mmd="OGC WFS" />
<mapping:data_access_type iso="WWW:DOWNLOAD-1.0-ftp–download" mmd="FTP" />
<mapping:data_access_type iso="WWW:DOWNLOAD-1.0-http–download" mmd="HTTP" />
<mapping:data_access_type iso="WWW:LINK-1.0-http–opendap" mmd="OPeNDAP" />
</xsl:stylesheet>

Loading