Skip to content

Commit

Permalink
Python: Add all Iceberg types (#3234)
Browse files Browse the repository at this point in the history
  • Loading branch information
jun-he authored Oct 21, 2021
1 parent 7f687d5 commit 9f47e15
Show file tree
Hide file tree
Showing 2 changed files with 229 additions and 0 deletions.
144 changes: 144 additions & 0 deletions python/src/iceberg/types.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,144 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.

class Type(object):
def __init__(self, type_string: str, repr_string: str, is_primitive=False):
self._type_string = type_string
self._repr_string = repr_string
self._is_primitive = is_primitive

def __repr__(self):
return self._repr_string

def __str__(self):
return self._type_string

@property
def is_primitive(self) -> bool:
return self._is_primitive


class FixedType(Type):
def __init__(self, length: int):
super().__init__(f"fixed[{length}]", f"FixedType({length})", is_primitive=True)
self._length = length

@property
def length(self) -> int:
return self._length


class DecimalType(Type):
def __init__(self, precision: int, scale: int):
super().__init__(f"decimal({precision}, {scale})", f"DecimalType({precision}, {scale})", is_primitive=True)
self._precision = precision
self._scale = scale

@property
def precision(self) -> int:
return self._precision

@property
def scale(self) -> int:
return self._scale


class NestedField(object):
def __init__(self, is_optional: bool, field_id: int, name: str, field_type: Type, doc=None):
self._is_optional = is_optional
self._id = field_id
self._name = name
self._type = field_type
self._doc = doc

@property
def is_optional(self) -> bool:
return self._is_optional

@property
def is_required(self) -> bool:
return not self._is_optional

@property
def field_id(self) -> int:
return self._id

@property
def name(self) -> str:
return self._name

@property
def type(self) -> Type:
return self._type

def __repr__(self):
return (f"NestedField({self._is_optional}, {self._id}, "
f"{repr(self._name)}, {repr(self._type)}, {repr(self._doc)})")

def __str__(self):
return (f"{self._id}: {self._name}: {'optional' if self._is_optional else 'required'} {self._type}"
"" if self._doc is None else f" ({self._doc})")


class StructType(Type):
def __init__(self, fields: list):
super().__init__(f"struct<{', '.join(map(str, fields))}>", f"StructType({repr(fields)})")
self._fields = fields

@property
def fields(self) -> list:
return self._fields


class ListType(Type):
def __init__(self, element_field: NestedField):
super().__init__(f"list<{element_field.type}>", f"ListType({repr(element_field)})")
self._element_field = element_field

@property
def element(self) -> NestedField:
return self._element_field


class MapType(Type):
def __init__(self, key_field: NestedField, value_field: NestedField):
super().__init__(f"map<{key_field.type}, {value_field.type}>",
f"MapType({repr(key_field)}, {repr(value_field)})")
self._key_field = key_field
self._value_field = value_field

@property
def key(self) -> NestedField:
return self._key_field

@property
def value(self) -> NestedField:
return self._value_field


BooleanType = Type("boolean", "BooleanType", is_primitive=True)
IntegerType = Type("int", "IntegerType", is_primitive=True)
LongType = Type("long", "LongType", is_primitive=True)
FloatType = Type("float", "FloatType", is_primitive=True)
DoubleType = Type("double", "DoubleType", is_primitive=True)
DateType = Type("date", "DateType", is_primitive=True)
TimeType = Type("time", "TimeType", is_primitive=True)
TimestampType = Type("timestamp", "TimestampType", is_primitive=True)
TimestamptzType = Type("timestamptz", "TimestamptzType", is_primitive=True)
StringType = Type("string", "StringType", is_primitive=True)
UUIDType = Type("uuid", "UUIDType", is_primitive=True)
BinaryType = Type("binary", "BinaryType", is_primitive=True)
85 changes: 85 additions & 0 deletions python/tests/test_types.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.

from iceberg.types import (BinaryType, BooleanType, DateType, DecimalType, DoubleType, FixedType,
FloatType, IntegerType, ListType, LongType, MapType, NestedField, StringType,
StructType, TimestampType, TimestamptzType, TimeType, UUIDType)
import pytest


@pytest.mark.parametrize("input_type",
[BooleanType, IntegerType, LongType, FloatType, DoubleType, DateType, TimeType,
TimestampType, TimestamptzType, StringType, UUIDType, BinaryType])
def test_repr_primitive_types(input_type):
assert input_type == eval(repr(input_type))


def test_fixed_type():
type_var = FixedType(5)
assert type_var.length == 5
assert str(type_var) == str(eval(repr(type_var)))


def test_decimal_type():
type_var = DecimalType(9, 2)
assert type_var.precision == 9
assert type_var.scale == 2
assert str(type_var) == str(eval(repr(type_var)))


def test_struct_type():
type_var = StructType([NestedField(True, 1, "optional_field", IntegerType),
NestedField(False, 2, "required_field", FixedType(5)),
NestedField(False, 3, "required_field", StructType([
NestedField(True, 4, "optional_field", DecimalType(8, 2)),
NestedField(False, 5, "required_field", LongType)]))])
assert len(type_var.fields) == 3
assert str(type_var) == str(eval(repr(type_var)))


def test_list_type():
type_var = ListType(NestedField(False, 1, "required_field", StructType([
NestedField(True, 2, "optional_field", DecimalType(8, 2)),
NestedField(False, 3, "required_field", LongType)])))
assert isinstance(type_var.element.type, StructType)
assert len(type_var.element.type.fields) == 2
assert type_var.element.field_id == 1
assert str(type_var) == str(eval(repr(type_var)))


def test_map_type():
type_var = MapType(NestedField(True, 1, "optional_field", DoubleType),
NestedField(False, 2, "required_field", UUIDType))
assert type_var.key.type == DoubleType
assert type_var.key.field_id == 1
assert type_var.value.type == UUIDType
assert type_var.value.field_id == 2
assert str(type_var) == str(eval(repr(type_var)))


def test_nested_field():
field_var = NestedField(True, 1, "optional_field1", StructType([
NestedField(True, 2, "optional_field2", ListType(
NestedField(False, 3, "required_field3", DoubleType))),
NestedField(False, 4, "required_field4", MapType(
NestedField(True, 5, "optional_field5", TimeType),
NestedField(False, 6, "required_field6", UUIDType)))]))
assert field_var.is_optional
assert not field_var.is_required
assert field_var.field_id == 1
assert isinstance(field_var.type, StructType)
assert str(field_var) == str(eval(repr(field_var)))

0 comments on commit 9f47e15

Please sign in to comment.