Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add support for custom decimal string formatter #125

Merged
merged 8 commits into from
Mar 3, 2021
19 changes: 19 additions & 0 deletions singer/transform.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import datetime
import decimal
import logging
import re
from jsonschema import RefResolver
Expand Down Expand Up @@ -262,6 +263,8 @@ def _transform(self, data, typ, schema, path):
if typ == "null":
if data is None or data == "":
return True, None
elif isinstance(data, decimal.Decimal) and data.is_nan():
dmosorast marked this conversation as resolved.
Show resolved Hide resolved
return True, None
else:
return False, None

Expand All @@ -271,7 +274,23 @@ def _transform(self, data, typ, schema, path):
return False, None

return True, data
elif schema.get("format") == "singer.decimal":
if data is None:
return False, None

if isinstance(data, (str, float, int)):
try:
return True, str(decimal.Decimal(str(data)).normalize())
except:
return False, None
elif isinstance(data, decimal.Decimal):
# NB: This treats all NaN values as "null"
if data.is_nan() or data.is_snan() or data.is_qnan():
dmosorast marked this conversation as resolved.
Show resolved Hide resolved
return True, None
else:
return True, str(data.normalize())

return False, None
elif typ == "object":
# Objects do not necessarily specify properties
return self._transform_object(data,
Expand Down
62 changes: 62 additions & 0 deletions tests/test_transform.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
import unittest
import math
import decimal
from singer import transform
from singer.transform import *

Expand Down Expand Up @@ -49,6 +51,7 @@ def test_null_transform(self):
self.assertEqual(None, transform(None, {'type': [ 'string', 'null']}))
self.assertEqual(None, transform('', {'type': ['null']}))
self.assertEqual(None, transform(None, {'type': ['null']}))
self.assertEqual(None, transform(decimal.Decimal('NaN'), {'type': ['null']}))

def test_datetime_transform(self):
schema = {"type": "string", "format": "date-time"}
Expand Down Expand Up @@ -252,6 +255,65 @@ def test_null_object_transform(self):
empty_data = {'addrs': {}}
self.assertDictEqual(empty_data, transform(empty_data, schema))

def test_decimal_types_transform(self):
schema = {"type": "object",
"properties": {"percentage": {"type": ["null", "string"],
dmosorast marked this conversation as resolved.
Show resolved Hide resolved
"format": "singer.decimal"}}}

inf = {'percentage': 'Infinity'}
negative_inf = {'percentage': '-Infinity'}
root2 = {'percentage': math.sqrt(2)}
nan = {'percentage': decimal.Decimal('NaN')}
snan = {'percentage': decimal.Decimal('sNaN')}
null = {'percentage': None}
self.assertEquals(inf, transform(inf, schema))
self.assertEquals(negative_inf, transform(negative_inf, schema))
self.assertEquals({'percentage': str(math.sqrt(2))}, transform(root2, schema))
dmosorast marked this conversation as resolved.
Show resolved Hide resolved
self.assertEquals({'percentage': None}, transform(nan, schema))
self.assertEquals({'percentage': None}, transform(snan, schema))
self.assertEquals({'percentage':None}, transform(null, schema))

str1 = {'percentage':'0.1'}
str2 = {'percentage': '0.0000000000001'}
str3 = {'percentage': '1E+13'}
str4 = {'percentage': '100'}
str5 = {'percentage': '-100'}
self.assertEquals(str1, transform(str1, schema))
self.assertEquals({'percentage': '1E-13'}, transform(str2, schema))
self.assertEquals({'percentage': '1E+13'}, transform(str3, schema))
self.assertEquals({'percentage': '1E+2'}, transform(str4, schema))
self.assertEquals({'percentage': '-1E+2'}, transform(str5, schema))

float1 = {'percentage': 12.0000000000000000000000000001234556}
float2 = {'percentage': 0.0123}
float3 = {'percentage': 100.0123}
float4 = {'percentage': -100.0123}
self.assertEquals({'percentage':'12'}, transform(float1, schema))
self.assertEquals({'percentage':'0.0123'}, transform(float2, schema))
self.assertEquals({'percentage':'100.0123'}, transform(float3, schema))
self.assertEquals({'percentage':'-100.0123'}, transform(float4, schema))

int1 = {'percentage': 123}
int2 = {'percentage': 0}
int3 = {'percentage': -1000}
self.assertEquals({'percentage':'123'}, transform(int1, schema))
self.assertEquals({'percentage':'0'}, transform(int2, schema))
self.assertEquals({'percentage':'-1E+3'}, transform(int3, schema))

dec1 = {'percentage': decimal.Decimal('1.1010101')}
dec2 = {'percentage': decimal.Decimal('.111111111111111111111111')}
dec3 = {'percentage': decimal.Decimal('-.111111111111111111111111')}
dec4 = {'percentage': decimal.Decimal('100')}
self.assertEquals({'percentage':'1.1010101'}, transform(dec1, schema))
self.assertEquals({'percentage':'0.111111111111111111111111'}, transform(dec2, schema))
self.assertEquals({'percentage':'-0.111111111111111111111111'}, transform(dec3, schema))
self.assertEquals({'percentage':'1E+2'}, transform(dec4, schema))

bad1 = {'percentage': 'fsdkjl'}
with self.assertRaises(SchemaMismatch):
transform(bad1, schema)


class TestTransformsWithMetadata(unittest.TestCase):

def test_drops_no_data_when_not_dict(self):
Expand Down