Skip to content
This repository has been archived by the owner on Jun 21, 2022. It is now read-only.

Add combinations #1

Closed
wants to merge 11 commits into from
12 changes: 12 additions & 0 deletions .vscode/settings.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
{
"python.unitTest.unittestArgs": [
"-v",
"-s",
"./tests",
"-p",
"test_*.py"
],
"python.unitTest.pyTestEnabled": false,
"python.unitTest.nosetestsEnabled": false,
"python.unitTest.unittestEnabled": true
}
143 changes: 143 additions & 0 deletions awkward/array/jagged.py
Original file line number Diff line number Diff line change
Expand Up @@ -208,6 +208,7 @@ def parents(self):
out[starts[i]:stops[i]] = i
i += 1
return out


def __len__(self): # length is determined by starts
return len(self._starts) # data can grow by appending contents and stops before starts
Expand Down Expand Up @@ -406,6 +407,78 @@ def __array_ufunc__(self, ufunc, method, *inputs, **kwargs):
return None
else:
return JaggedArray(starts, stops, result)


def argproduct(self, other):

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

add a small bit of documentation about what this function does.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Check it out. I have added simple docstrings describing the usage

'''
Performs product (combinations) of current JaggedArray with JaggedArray `other`. Return the indices of the product.

Inputs: other; a JaggedArray instance.
Output: JaggedArray containing thw two indices as an awkward.array.table.Table()

Example usage:
>>> arr1 = JaggedArray([0,1,4,4],[1,4,4,8],content=[0,1,2,3,4,5,6,7])
>>> arr2 = JaggedArray([0,1,1,4],[1,1,4,5],content=['z', 'a','b','c','d'])
>>> result = arr1.argproduct(arr2)
'''
import awkward.array.table
if not isinstance(other, JaggedArray):
raise ValueError("array given isn't instance of JaggedArray; need JaggedArrays to proceed")

if (len(self._starts) != len(other)):
raise ValueError("Number of events in each array must be equal")

starts1 = self._starts
stops1 = self._stops
counts1 = stops1 - starts1

starts2 = other.starts
stops2 = other.stops
counts2 = stops2 - starts2

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

should we check that counts1.shape == counts2.shape?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That won't be necessary, as we are doing shape checking for starts array. The shape of counts and starts are same.


pairs_counts = numpy.zeros(len(starts1)+1, dtype=self.INDEXTYPE)
pairs_counts[1:] = numpy.cumsum(counts1*counts2, dtype=self.INDEXTYPE)

def parents_from_offsets(offsets):
out = numpy.full(offsets[-1], -1, dtype=self.INDEXTYPE)
lenstarts = len(offsets)-1
i = 0
while i < lenstarts:
out[offsets[i]:offsets[i+1]] = i
i += 1
return out

pairs_indices = numpy.arange(pairs_counts[-1], dtype=self.INDEXTYPE)
pairs_parents = parents_from_offsets(pairs_counts)
pairs_parents = pairs_parents.astype(self.INDEXTYPE)

left = numpy.empty_like(pairs_indices)
right = numpy.empty_like(pairs_indices)

left[pairs_indices] = starts1[pairs_parents[pairs_indices]] + numpy.floor((pairs_indices - pairs_counts[pairs_parents[pairs_indices]])/counts2[pairs_parents[pairs_indices]]).astype(self.INDEXTYPE)
right[pairs_indices] = starts2[pairs_parents[pairs_indices]] + (pairs_indices - pairs_counts[pairs_parents[pairs_indices]]) - counts2[pairs_parents[pairs_indices]] * numpy.floor((pairs_indices - pairs_counts[pairs_parents[pairs_indices]])/counts2[pairs_parents[pairs_indices]])

return JaggedArray(pairs_counts[:-1], pairs_counts[1:], awkward.array.table.Table(pairs_indices[-1], left, right), writeable=self._writeable)

def product(self, other):
'''
Performs product ( combinations) between two JaggedArrays and returns the resulting combined content as a JaggedArray()

Inputs: other; a JaggedArray instance
Output: JaggedArray containing thw two indices as an awkward.array.table.Table()

Example usage:
>>> arr1 = JaggedArray([0,1,4,4],[1,4,4,8],content=[0,1,2,3,4,5,6,7])
>>> arr2 = JaggedArray([0,1,1,4],[1,1,4,5],content=['z', 'a','b','c','d'])
>>> result = arr1.product(arr2)
'''
import awkward.array.table
product_indexes = self.argproduct(other)
arr_list = list(product_indexes._content._content.values())
return JaggedArray(product_indexes.starts, product_indexes.stops, awkward.array.table.Table(len(arr_list[0]), self._content[arr_list[0]], other.content[arr_list[1]]))



class ByteJaggedArray(JaggedArray):
@classmethod
Expand Down Expand Up @@ -567,3 +640,73 @@ def tojagged(self, starts=None, stops=None, copy=True, writeable=True):
i += 1

return JaggedArray(starts, stops, content, writeable=writeable)

def argproduct(self, other):
'''
Performs product (combinations) of current JaggedArray with JaggedArray `other`. Return the indices of the product.

Inputs: other; a JaggedArray instance.
Output: JaggedArray containing thw two indices as an awkward.array.table.Table()

Example usage:
>>> arr1 = JaggedArray([0,1,4,4],[1,4,4,8],content=[0,1,2,3,4,5,6,7])
>>> arr2 = JaggedArray([0,1,1,4],[1,1,4,5],content=['z', 'a','b','c','d'])
>>> result = arr1.argproduct(arr2)
'''
import awkward.array.table
if not isinstance(other, JaggedArray):
raise ValueError("array given isn't instance of JaggedArray; need JaggedArrays to proceed")

if (len(self._starts) != len(other)):
raise ValueError("Number of events in each array must be equal")

starts1 = self._starts
stops1 = self._stops
counts1 = stops1 - starts1

starts2 = other.starts
stops2 = other.stops
counts2 = stops2 - starts2


pairs_counts = numpy.zeros(len(starts1)+1, dtype=self.INDEXTYPE)
pairs_counts[1:] = numpy.cumsum(counts1*counts2, dtype=self.INDEXTYPE)

def parents_from_offsets(offsets):
out = numpy.full(offsets[-1], -1, dtype=self.INDEXTYPE)
lenstarts = len(offsets)-1
i = 0
while i < lenstarts:
out[offsets[i]:offsets[i+1]] = i
i += 1
return out

pairs_indices = numpy.arange(pairs_counts[-1], dtype=self.INDEXTYPE)
pairs_parents = parents_from_offsets(pairs_counts)
pairs_parents = pairs_parents.astype(self.INDEXTYPE)

left = numpy.empty_like(pairs_indices)
right = numpy.empty_like(pairs_indices)

left[pairs_indices] = starts1[pairs_parents[pairs_indices]] + numpy.floor((pairs_indices - pairs_counts[pairs_parents[pairs_indices]])/counts2[pairs_parents[pairs_indices]]).astype(self.INDEXTYPE)
right[pairs_indices] = starts2[pairs_parents[pairs_indices]] + (pairs_indices - pairs_counts[pairs_parents[pairs_indices]]) - counts2[pairs_parents[pairs_indices]] * numpy.floor((pairs_indices - pairs_counts[pairs_parents[pairs_indices]])/counts2[pairs_parents[pairs_indices]])

return JaggedArray(pairs_counts[:-1], pairs_counts[1:], awkward.array.table.Table(pairs_indices[-1], left, right), writeable=self._writeable)

def product(self, other):
'''
Performs product ( combinations) between two JaggedArrays and returns the resulting combined content as a JaggedArray()

Inputs: other; a JaggedArray instance
Output: JaggedArray containing thw two indices as an awkward.array.table.Table()

Example usage:
>>> arr1 = JaggedArray([0,1,4,4],[1,4,4,8],content=[0,1,2,3,4,5,6,7])
>>> arr2 = JaggedArray([0,1,1,4],[1,1,4,5],content=['z', 'a','b','c','d'])
>>> result = arr1.product(arr2)
'''
import awkward.array.table
product_indexes = self.argproduct(other)
arr_list = list(product_indexes._content._content.values())

return JaggedArray(product_indexes.starts, product_indexes.stops, awkward.array.table.Table(len(arr_list[0]), self._content[arr_list[0]], other.content[arr_list[1]]))
19 changes: 19 additions & 0 deletions tests/test_jagged.py
Original file line number Diff line number Diff line change
Expand Up @@ -198,3 +198,22 @@ def test_bytejagged_set(self):
a[:] = JaggedArray.fromiter([[3, 2, 1], [], [5, 4]])
self.assertEqual(a.content.tobytes(), b"\xff\x00\x00\x00\x00\x03\x00\x00\x00\x02\x00\x00\x00\x01\x00\x00\x00\xff\xff\x05\x00\x00\x00\x04\x00\x00\x00\xff")
self.assertEqual([a[i].tolist() for i in range(len(a))], [[3, 2, 1], [], [5, 4]])

def test_jagged_argproduct(self):
starts1 = [0,1,4,4]
stops1 = [1,4,4,8]

starts2 = [0,1,1,4]
stops2 = [1,1,4,5]

arr1 = JaggedArray(starts1, stops1,content=[0,1,2,3,4,5,6,7])
arr2 = JaggedArray(starts2, stops2,content=['z', 'a','b','c','d'])

arr_product = arr1.product(arr2)
self.assertTrue((list(arr_product._content._content.values())[0]==list([0,4,5,6,7])).all())

arr_argproduct = arr1.argproduct(arr2)
self.assertTrue((list(arr_argproduct._content._content.values())[0]==list([0,4,5,6,7])).all())
self.assertTrue((list(arr_argproduct._content._content.values())[1]==list([0,4,4,4,4])).all())