diff --git a/singer/schema.py b/singer/schema.py index f54f97a..b4da4ac 100644 --- a/singer/schema.py +++ b/singer/schema.py @@ -19,6 +19,7 @@ 'type', 'additionalProperties', 'anyOf', + 'patternProperties', ] @@ -35,7 +36,7 @@ def __init__(self, type=None, format=None, properties=None, items=None, selected=None, inclusion=None, description=None, minimum=None, maximum=None, exclusiveMinimum=None, exclusiveMaximum=None, multipleOf=None, maxLength=None, minLength=None, additionalProperties=None, - anyOf=None): + anyOf=None, patternProperties=None): self.type = type self.properties = properties @@ -53,6 +54,7 @@ def __init__(self, type=None, format=None, properties=None, items=None, self.anyOf = anyOf self.format = format self.additionalProperties = additionalProperties + self.patternProperties = patternProperties def __str__(self): return json.dumps(self.to_dict()) @@ -79,8 +81,9 @@ def to_dict(self): if self.items is not None: result['items'] = self.items.to_dict() # pylint: disable=no-member + for key in STANDARD_KEYS: - if self.__dict__[key] is not None: + if self.__dict__.get(key) is not None: result[key] = self.__dict__[key] return result diff --git a/singer/transform.py b/singer/transform.py index fcb2f83..1a1bd85 100644 --- a/singer/transform.py +++ b/singer/transform.py @@ -1,4 +1,5 @@ import datetime +import re from jsonschema import RefResolver import singer.metadata @@ -162,7 +163,7 @@ def _transform_anyof(self, data, schema, path): self.errors.append(Error(path, data, schema)) return False, None - def _transform_object(self, data, schema, path): + def _transform_object(self, data, schema, path, pattern_properties): # We do not necessarily have a dict to transform here. The schema's # type could contain multiple possible values. Eg: # ["null", "object", "string"] @@ -170,14 +171,19 @@ def _transform_object(self, data, schema, path): return False, data # Don't touch an empty schema - if schema == {}: + if schema == {} and not pattern_properties: return True, data result = {} successes = [] for key, value in data.items(): - if key in schema: - success, subdata = self.transform_recur(value, schema[key], path + [key]) + # patternProperties are a map of {"pattern": { schema...}} + pattern_schemas = [schema for pattern, schema + in (pattern_properties or {}).items() + if re.match(pattern, key)] + if key in schema or pattern_schemas: + sub_schema = schema.get(key, {'anyOf': pattern_schemas}) + success, subdata = self.transform_recur(value, sub_schema, path + [key]) successes.append(success) result[key] = subdata else: @@ -238,7 +244,10 @@ def _transform(self, data, typ, schema, path): elif typ == "object": # Objects do not necessarily specify properties - return self._transform_object(data, schema.get("properties", {}), path) + return self._transform_object(data, + schema.get("properties", {}), + path, + schema.get(SchemaKey.pattern_properties)) elif typ == "array": return self._transform_array(data, schema["items"], path) diff --git a/tests/test_transform.py b/tests/test_transform.py index be25a1f..3ba57fa 100644 --- a/tests/test_transform.py +++ b/tests/test_transform.py @@ -360,3 +360,19 @@ def test_refs_resolve_preserves_existing_fields(self): result = resolve_schema_references(schema, refs) self.assertEqual(result['properties']['name']['type'], "string") self.assertEqual(result['properties']['name']['still_here'], "yep") + +class TestPatternProperties(unittest.TestCase): + def test_pattern_properties_match(self): + schema = {"type": "object", + "patternProperties": { ".+": {"type": "string"}}} + dict_value = {"name": "chicken", "unit_cost": '1.45', "SKU": '123456'} + expected = dict(dict_value) + self.assertEqual(expected, transform(dict_value, schema)) + + def test_pattern_properties_match_multiple(self): + schema = {"type": "object", + "patternProperties": { ".+?cost": {"type": "number"}, + ".+(?