Skip to content

Commit

Permalink
Now the types need to pass through Numba. (#31)
Browse files Browse the repository at this point in the history
The types pass through Numba: `NumpyArray` takes a literal `Type` (known at compile-time, because deriving it is complicated) and all other arrays take a runtime `Type` (because `RegularType`'s `size` needs to change in response to some slice lengths, known only at runtime).

I think I forgot to set the dynamic `RegularType.size` (after all that effort!). At least it will be easy to add.

If `isbare`, Types are not propagated through Numba so that `isbare` is preserved. However, all current tests pass without that streamlining.

* [WIP] Now the types need to pass through Numba.

* Move util.py to _util.py; it's not for users.

* Start the types-in-numba task.

* Add namespace qualifier to all Awkward types in Numba.

* Type wrapper can be very simple...

* RecordArray and RecordType have constructors based on an explicit lookup/reverselookup, so they can be round-tripped through Pickle.

* Working on pickleability of Types.

* Still working on pickleability of Types.

* All types are pickleable except for DressedType.

* All types are pickleable; boxing ArrayType works.

* Punt on Python 2.7 for pickling in pybind11 (and hence passing Types through Numba).

* Types in repr print-out.

* Working on unfinished PR028 issue: RegularType in NumpyArray.

* NumpyArray::type is now correct.

* Maybe fixed all the Windows bugs.

* Types pass through Numba.

* Added placeholders for all Type::none() insertions, just as we previously did for C++. Next: add a property that is  for bare types and use that instead of .

* Fixed bug in RegularArray/List*Array::innertype.

* NumpyArray,RegularArray,List*Array::box are all type-aware, RecordArray and Record are next.

* All *Array::box methods are type-aware.

* Fix test to be dict-order independent.

* Types will have to become runtime objects in Numba because of RegularType.

* Passing through types will require more thought.

* Both literal and by-value Types work; next, use literal for NumpyArray and by-value for everything else.

* All but RecordArray works.

* All Types in Numba work, for all array types.

* This should fix old tests.

* Only propagate Types through Numba if necessary. Also, reinstate 32-bit Python 3.7 because Numba isn't tested in Python 3.8.
  • Loading branch information
jpivarski authored Dec 11, 2019
1 parent 95a0512 commit 97ffcc4
Show file tree
Hide file tree
Showing 30 changed files with 1,100 additions and 220 deletions.
4 changes: 4 additions & 0 deletions .ci/azure-buildtest-awkward.yml
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,10 @@ jobs:
python.version: "3.6"
python.architecture: "x64"
numpy.version: "latest"
"py37-32bit":
python.version: "3.7"
python.architecture: "x86"
numpy.version: "latest"
"py37-64bit":
python.version: "3.7"
python.architecture: "x64"
Expand Down
2 changes: 1 addition & 1 deletion VERSION_INFO
Original file line number Diff line number Diff line change
@@ -1 +1 @@
0.1.28
0.1.31
1 change: 1 addition & 0 deletions awkward1/_numba/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
import awkward1._numba.libawkward
import awkward1._numba.util
import awkward1._numba.identity
import awkward1._numba.types
import awkward1._numba.content
import awkward1._numba.iterator
import awkward1._numba.fillable
Expand Down
29 changes: 21 additions & 8 deletions awkward1/_numba/array/emptyarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,12 +10,13 @@

@numba.extending.typeof_impl.register(awkward1.layout.EmptyArray)
def typeof(val, c):
return EmptyArrayType(numba.typeof(val.id))
return EmptyArrayType(numba.typeof(val.id), numba.none if val.isbare else numba.typeof(val.type))

class EmptyArrayType(content.ContentType):
def __init__(self, idtpe):
super(EmptyArrayType, self).__init__(name="EmptyArrayType(id={})".format(idtpe.name))
def __init__(self, idtpe, typetpe):
super(EmptyArrayType, self).__init__(name="ak::EmptyArrayType(id={0}, type={1})".format(idtpe.name, typetpe.name))
self.idtpe = idtpe
self.typetpe = typetpe

@property
def ndim(self):
Expand Down Expand Up @@ -73,28 +74,40 @@ def __init__(self, dmm, fe_type):
members = []
if fe_type.idtpe != numba.none:
members.append(("id", fe_type.idtpe))
if fe_type.typetpe != numba.none:
members.append(("type", fe_type.typetpe))
super(EmptyArrayModel, self).__init__(dmm, fe_type, members)

@numba.extending.unbox(EmptyArrayType)
def unbox(tpe, obj, c):
proxyout = numba.cgutils.create_struct_proxy(tpe)(c.context, c.builder)
if tpe.idtpe != numba.none:
id_obj = c.pyapi.obj_getattr_string(obj, "id")
id_obj = c.pyapi.object_getattr_string(obj, "id")
proxyout.id = c.pyapi.to_native_value(tpe.idtpe, id_obj).value
c.pyapi.decref(id_obj)
if tpe.typetpe != numba.none:
type_obj = c.pyapi.object_getattr_string(obj, "type")
proxyout.type = c.pyapi.to_native_value(tpe.typetpe, type_obj).value
c.pyapi.decref(type_obj)
is_error = numba.cgutils.is_not_null(c.builder, c.pyapi.err_occurred())
return numba.extending.NativeValue(proxyout._getvalue(), is_error)

@numba.extending.box(EmptyArrayType)
def box(tpe, val, c):
EmptyArray_obj = c.pyapi.unserialize(c.pyapi.serialize_object(awkward1.layout.EmptyArray))
proxyin = numba.cgutils.create_struct_proxy(tpe)(c.context, c.builder, value=val)
args = []
if tpe.idtpe != numba.none:
id_obj = c.pyapi.from_native_value(tpe.idtpe, proxyin.id, c.env_manager)
out = c.pyapi.call_function_objargs(EmptyArray_obj, (id_obj,))
c.pyapi.decref(id_obj)
args.append(c.pyapi.from_native_value(tpe.idtpe, proxyin.id, c.env_manager))
else:
args.append(c.pyapi.make_none())
if tpe.typetpe != numba.none:
args.append(c.pyapi.from_native_value(tpe.typetpe, proxyin.type, c.env_manager))
else:
out = c.pyapi.call_function_objargs(EmptyArray_obj, ())
args.append(c.pyapi.make_none())
out = c.pyapi.call_function_objargs(EmptyArray_obj, args)
for x in args:
c.pyapi.decref(x)
c.pyapi.decref(EmptyArray_obj)
return out

Expand Down
68 changes: 40 additions & 28 deletions awkward1/_numba/array/listarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,15 +14,16 @@
@numba.extending.typeof_impl.register(awkward1.layout.ListArrayU32)
@numba.extending.typeof_impl.register(awkward1.layout.ListArray64)
def typeof(val, c):
return ListArrayType(numba.typeof(numpy.asarray(val.starts)), numba.typeof(numpy.asarray(val.stops)), numba.typeof(val.content), numba.typeof(val.id))
return ListArrayType(numba.typeof(numpy.asarray(val.starts)), numba.typeof(numpy.asarray(val.stops)), numba.typeof(val.content), numba.typeof(val.id), numba.none if val.isbare else numba.typeof(val.type))

class ListArrayType(content.ContentType):
def __init__(self, startstpe, stopstpe, contenttpe, idtpe):
def __init__(self, startstpe, stopstpe, contenttpe, idtpe, typetpe):
assert startstpe == stopstpe
super(ListArrayType, self).__init__(name="ListArray{}{}Type({}, id={})".format("" if startstpe.dtype.signed else "U", startstpe.dtype.bitwidth, contenttpe.name, idtpe.name))
super(ListArrayType, self).__init__(name="ak::ListArray{0}{1}Type({2}, id={3}, type={4})".format("" if startstpe.dtype.signed else "U", startstpe.dtype.bitwidth, contenttpe.name, idtpe.name, typetpe.name))
self.startstpe = startstpe
self.contenttpe = contenttpe
self.idtpe = idtpe
self.typetpe = typetpe

@property
def stopstpe(self):
Expand All @@ -47,10 +48,10 @@ def getitem_range(self):
return self

def getitem_str(self, key):
return ListArrayType(self.startstpe, self.stopstpe, self.contenttpe.getitem_str(key), self.idtpe)
return ListArrayType(self.startstpe, self.stopstpe, self.contenttpe.getitem_str(key), self.idtpe, numba.none)

def getitem_tuple(self, wheretpe):
nexttpe = ListArrayType(util.index64tpe, util.index64tpe, self, numba.none)
nexttpe = ListArrayType(util.index64tpe, util.index64tpe, self, numba.none, numba.none)
outtpe = nexttpe.getitem_next(wheretpe, False)
return outtpe.getitem_int()

Expand All @@ -66,7 +67,7 @@ def getitem_next(self, wheretpe, isadvanced):

elif isinstance(headtpe, numba.types.SliceType):
contenttpe = self.contenttpe.carry().getitem_next(tailtpe, isadvanced)
return awkward1._numba.array.listoffsetarray.ListOffsetArrayType(util.indextpe(self.indexname), contenttpe, self.idtpe)
return awkward1._numba.array.listoffsetarray.ListOffsetArrayType(util.indextpe(self.indexname), contenttpe, self.idtpe, self.typetpe)

elif isinstance(headtpe, numba.types.StringLiteral):
return self.getitem_str(headtpe.literal_value).getitem_next(tailtpe, isadvanced)
Expand All @@ -82,7 +83,7 @@ def getitem_next(self, wheretpe, isadvanced):
raise NotImplementedError("array.ndim != 1")
contenttpe = self.contenttpe.carry().getitem_next(tailtpe, True)
if not isadvanced:
return awkward1._numba.array.regulararray.RegularArrayType(contenttpe, self.idtpe)
return awkward1._numba.array.regulararray.RegularArrayType(contenttpe, self.idtpe, self.typetpe)
else:
return contenttpe

Expand Down Expand Up @@ -128,6 +129,8 @@ def __init__(self, dmm, fe_type):
("content", fe_type.contenttpe)]
if fe_type.idtpe != numba.none:
members.append(("id", fe_type.idtpe))
if fe_type.typetpe != numba.none:
members.append(("type", fe_type.typetpe))
super(ListArrayModel, self).__init__(dmm, fe_type, members)

@numba.extending.unbox(ListArrayType)
Expand All @@ -152,6 +155,12 @@ def unbox(tpe, obj, c):
id_obj = c.pyapi.object_getattr_string(obj, "id")
proxyout.id = c.pyapi.to_native_value(tpe.idtpe, id_obj).value
c.pyapi.decref(id_obj)
if tpe.typetpe != numba.none:
type1_obj = c.pyapi.object_getattr_string(obj, "type")
type2_obj = c.pyapi.object_getattr_string(type1_obj, "type")
proxyout.type = c.pyapi.to_native_value(tpe.typetpe, type2_obj).value
c.pyapi.decref(type1_obj)
c.pyapi.decref(type2_obj)
is_error = numba.cgutils.is_not_null(c.builder, c.pyapi.err_occurred())
return numba.extending.NativeValue(proxyout._getvalue(), is_error)

Expand All @@ -167,7 +176,7 @@ def box(tpe, val, c):
Index_obj = c.pyapi.unserialize(c.pyapi.serialize_object(awkward1.layout.IndexU32))
ListArray_obj = c.pyapi.unserialize(c.pyapi.serialize_object(awkward1.layout.ListArrayU32))
else:
raise AssertionError("unrecognized index type: {}".format(tpe.indexname))
raise AssertionError("unrecognized index type: {0}".format(tpe.indexname))
proxyin = numba.cgutils.create_struct_proxy(tpe)(c.context, c.builder, value=val)
startsarray_obj = c.pyapi.from_native_value(tpe.startstpe, proxyin.starts, c.env_manager)
stopsarray_obj = c.pyapi.from_native_value(tpe.stopstpe, proxyin.stops, c.env_manager)
Expand All @@ -177,16 +186,19 @@ def box(tpe, val, c):
c.pyapi.decref(Index_obj)
c.pyapi.decref(startsarray_obj)
c.pyapi.decref(stopsarray_obj)
args = [starts_obj, stops_obj, content_obj]
if tpe.idtpe != numba.none:
id_obj = c.pyapi.from_native_value(tpe.idtpe, proxyin.id, c.env_manager)
out = c.pyapi.call_function_objargs(ListArray_obj, (starts_obj, stops_obj, content_obj, id_obj))
c.pyapi.decref(id_obj)
args.append(c.pyapi.from_native_value(tpe.idtpe, proxyin.id, c.env_manager))
else:
args.append(c.pyapi.make_none())
if tpe.typetpe != numba.none:
args.append(c.pyapi.from_native_value(tpe.typetpe, proxyin.type, c.env_manager))
else:
out = c.pyapi.call_function_objargs(ListArray_obj, (starts_obj, stops_obj, content_obj))
args.append(c.pyapi.make_none())
out = c.pyapi.call_function_objargs(ListArray_obj, args)
for x in args:
c.pyapi.decref(x)
c.pyapi.decref(ListArray_obj)
c.pyapi.decref(starts_obj)
c.pyapi.decref(stops_obj)
c.pyapi.decref(content_obj)
return out

@numba.extending.lower_builtin(len, ListArrayType)
Expand Down Expand Up @@ -293,7 +305,7 @@ def lower_getitem_next(context, builder, arraytpe, wheretpe, arrayval, whereval,
elif arraytpe.indexname == "U32":
kernel = cpu.kernels.awkward_listarrayU32_getitem_next_at_64
else:
raise AssertionError("unrecognized index type: {}".format(arraytpe.indexname))
raise AssertionError("unrecognized index type: {0}".format(arraytpe.indexname))

nextcarry = util.newindex64(context, builder, numba.int64, lenstarts)
util.call(context, builder, kernel,
Expand All @@ -304,7 +316,7 @@ def lower_getitem_next(context, builder, arraytpe, wheretpe, arrayval, whereval,
context.get_constant(numba.int64, 0),
context.get_constant(numba.int64, 0),
util.cast(context, builder, headtpe, numba.int64, headval)),
"in {}, indexing error".format(arraytpe.shortname))
"in {0}, indexing error".format(arraytpe.shortname))
nextcontenttpe = arraytpe.contenttpe.carry()
nextcontentval = arraytpe.contenttpe.lower_carry(context, builder, arraytpe.contenttpe, util.index64tpe, proxyin.content, nextcarry)
return nextcontenttpe.lower_getitem_next(context, builder, nextcontenttpe, tailtpe, nextcontentval, tailval, advanced)
Expand All @@ -328,7 +340,7 @@ def lower_getitem_next(context, builder, arraytpe, wheretpe, arrayval, whereval,
determine_total = cpu.kernels.awkward_listarrayU32_getitem_next_range_counts_64
fill_nextadvanced = cpu.kernels.awkward_listarrayU32_getitem_next_range_spreadadvanced_64
else:
raise AssertionError("unrecognized index type: {}".format(arraytpe.indexname))
raise AssertionError("unrecognized index type: {0}".format(arraytpe.indexname))

carrylength = numba.cgutils.alloca_once(builder, context.get_value_type(numba.int64))
util.call(context, builder, determine_carrylength,
Expand All @@ -341,7 +353,7 @@ def lower_getitem_next(context, builder, arraytpe, wheretpe, arrayval, whereval,
util.cast(context, builder, numba.intp, numba.int64, proxyslicein.start),
util.cast(context, builder, numba.intp, numba.int64, proxyslicein.stop),
util.cast(context, builder, numba.intp, numba.int64, proxyslicein.step)),
"in {}, indexing error".format(arraytpe.shortname))
"in {0}, indexing error".format(arraytpe.shortname))

nextoffsets = util.newindex(arraytpe.indexname, context, builder, numba.int64, builder.add(lenstarts, context.get_constant(numba.int64, 1)))
nextcarry = util.newindex64(context, builder, numba.int64, builder.load(carrylength))
Expand All @@ -356,7 +368,7 @@ def lower_getitem_next(context, builder, arraytpe, wheretpe, arrayval, whereval,
util.cast(context, builder, numba.intp, numba.int64, proxyslicein.start),
util.cast(context, builder, numba.intp, numba.int64, proxyslicein.stop),
util.cast(context, builder, numba.intp, numba.int64, proxyslicein.step)),
"in {}, indexing error".format(arraytpe.shortname))
"in {0}, indexing error".format(arraytpe.shortname))

nextcontenttpe = arraytpe.contenttpe.carry()
nextcontentval = arraytpe.contenttpe.lower_carry(context, builder, arraytpe.contenttpe, util.index64tpe, proxyin.content, nextcarry)
Expand All @@ -371,20 +383,20 @@ def lower_getitem_next(context, builder, arraytpe, wheretpe, arrayval, whereval,
(total,
util.arrayptr(context, builder, util.indextpe(arraytpe.indexname), nextoffsets),
lenstarts),
"in {}, indexing error".format(arraytpe.shortname))
"in {0}, indexing error".format(arraytpe.shortname))

nextadvanced = util.newindex64(context, builder, numba.int64, builder.load(total))
util.call(context, builder, fill_nextadvanced,
(util.arrayptr(context, builder, util.index64tpe, nextadvanced),
util.arrayptr(context, builder, util.index64tpe, advanced),
util.arrayptr(context, builder, util.indextpe(arraytpe.indexname), nextoffsets),
lenstarts),
"in {}, indexing error".format(arraytpe.shortname))
"in {0}, indexing error".format(arraytpe.shortname))

outcontenttpe = nextcontenttpe.getitem_next(tailtpe, True)
outcontentval = nextcontenttpe.lower_getitem_next(context, builder, nextcontenttpe, tailtpe, nextcontentval, tailval, nextadvanced)

outtpe = awkward1._numba.array.listoffsetarray.ListOffsetArrayType(util.indextpe(arraytpe.indexname), outcontenttpe, arraytpe.idtpe)
outtpe = awkward1._numba.array.listoffsetarray.ListOffsetArrayType(util.indextpe(arraytpe.indexname), outcontenttpe, arraytpe.idtpe, arraytpe.typetpe)
proxyout = numba.cgutils.create_struct_proxy(outtpe)(context, builder)
proxyout.offsets = nextoffsets
proxyout.content = outcontentval
Expand Down Expand Up @@ -418,7 +430,7 @@ def lower_getitem_next(context, builder, arraytpe, wheretpe, arrayval, whereval,
elif arraytpe.indexname == "U32":
kernel = cpu.kernels.awkward_listarrayU32_getitem_next_array_64
else:
raise AssertionError("unrecognized index type: {}".format(arraytpe.indexname))
raise AssertionError("unrecognized index type: {0}".format(arraytpe.indexname))

lencarry = builder.mul(lenstarts, lenflathead)
lenoffsets = builder.add(lenstarts, context.get_constant(numba.int64, 1))
Expand All @@ -436,15 +448,15 @@ def lower_getitem_next(context, builder, arraytpe, wheretpe, arrayval, whereval,
lenstarts,
lenflathead,
lencontent),
"in {}, indexing error".format(arraytpe.shortname))
"in {0}, indexing error".format(arraytpe.shortname))

nexttpe = arraytpe.contenttpe.carry()
nextval = arraytpe.contenttpe.lower_carry(context, builder, arraytpe.contenttpe, util.index64tpe, proxyin.content, nextcarry)

contenttpe = nexttpe.getitem_next(tailtpe, True)
contentval = nexttpe.lower_getitem_next(context, builder, nexttpe, tailtpe, nextval, tailval, nextadvanced)

outtpe = awkward1._numba.array.regulararray.RegularArrayType(contenttpe, arraytpe.idtpe)
outtpe = awkward1._numba.array.regulararray.RegularArrayType(contenttpe, arraytpe.idtpe, arraytpe.typetpe)
proxyout = numba.cgutils.create_struct_proxy(outtpe)(context, builder)
proxyout.content = contentval
proxyout.size = lenflathead
Expand All @@ -460,7 +472,7 @@ def lower_getitem_next(context, builder, arraytpe, wheretpe, arrayval, whereval,
elif arraytpe.indexname == "U32":
kernel = cpu.kernels.awkward_listarrayU32_getitem_next_array_advanced_64
else:
raise AssertionError("unrecognized index type: {}".format(arraytpe.indexname))
raise AssertionError("unrecognized index type: {0}".format(arraytpe.indexname))

nextcarry = util.newindex64(context, builder, numba.int64, lenstarts)
nextadvanced = util.newindex64(context, builder, numba.int64, lenstarts)
Expand All @@ -476,7 +488,7 @@ def lower_getitem_next(context, builder, arraytpe, wheretpe, arrayval, whereval,
lenstarts,
lenflathead,
lencontent),
"in {}, indexing error".format(arraytpe.shortname))
"in {0}, indexing error".format(arraytpe.shortname))

nexttpe = arraytpe.contenttpe.carry()
nextval = arraytpe.contenttpe.lower_carry(context, builder, arraytpe.contenttpe, util.index64tpe, proxyin.content, nextcarry)
Expand Down
Loading

0 comments on commit 97ffcc4

Please sign in to comment.