From 9876fd675642db717733166f24f85180bd3a6c92 Mon Sep 17 00:00:00 2001 From: Jaydeep Nandi <34567389+Jayd-1234@users.noreply.github.com> Date: Thu, 28 Jun 2018 18:54:35 +0530 Subject: [PATCH 1/9] Add combinations Added preliminary version of combinations --- awkward/array/jagged.py | 63 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 63 insertions(+) diff --git a/awkward/array/jagged.py b/awkward/array/jagged.py index 6c494896..1fab9bb8 100644 --- a/awkward/array/jagged.py +++ b/awkward/array/jagged.py @@ -35,6 +35,7 @@ import awkward.array.base import awkward.util +from awkward import Table class JaggedArray(awkward.array.base.AwkwardArray): @classmethod @@ -406,6 +407,68 @@ def __array_ufunc__(self, ufunc, method, *inputs, **kwargs): return None else: return JaggedArray(starts, stops, result) + + def vectorized_parents(self, offsets, content): + if offsets[-1] != len(content): + raise ValueError("Given offsets isn't compatible with content length") + + index = numpy.arange(len(content), dtype=int) + below = numpy.zeros(len(content), dtype=int) + above = numpy.ones(len(content), dtype=int) * (len(offsets) - 1) + while True: + middle = (below + above) // 2 + + change_below = offsets[middle + 1] <= index + change_above = offsets[middle] > index + + if not numpy.bitwise_or(change_below, change_above).any(): + break + else: + below = numpy.where(change_below, middle + 1, below) + above = numpy.where(change_above, middle - 1, above) + return middle + + def combinations(self, Jagged_arr1, Jagged_arr2,return_indices=False, writeable=True): + if Jagged_arr1 is None: + raise ValueError("Only one array is provided, need two to proceed") + if Jagged_arr2 is None: + raise ValueError("Only one array is provided, need two to proceed") + if not (isinstance(Jagged_arr1, JaggedArray) or isinstance(Jagged_arr2, JaggedArray)): + raise ValueError("arrays given aren't instances of JaggedArray; need JaggedArrays to proceed") + + if (len(Jagged_arr1) != len(Jagged_arr2)): + raise ValueError("Number of events in each array must be equal") + + # Probably unnecessary. Can be generated on the fly + starts1 = Jagged_arr1.starts + stops1 = Jagged_arr1.stops + counts1 = stops1 - starts1 + + starts2 = Jagged_arr2.starts + stops2 = Jagged_arr2.stops + counts2 = stops2 - starts2 + + NUMEVENTS = len(Jagged_arr1) + pairs_counts = numpy.zeros(NUMEVENTS+1, dtype=numpy.int) + pairs_counts[1:] = numpy.cumsum(counts1*counts2) + pairs_counts = pairs_counts.astype(numpy.int) + + pairs_indices = numpy.arange(pairs_counts[-1]).astype(numpy.int) + pairs_parents = self.vectorized_parents(pairs_counts, pairs_indices) + pairs_parents = pairs_parents.astype(numpy.int) + + left = numpy.empty_like(pairs_indices) + right = numpy.empty_like(pairs_indices) + + left[pairs_indices] = starts1[pairs_parents[pairs_indices]] + numpy.floor((pairs_indices-pairs_counts[pairs_parents[pairs_indices]])/counts2[pairs_parents[pairs_indices]]).astype(numpy.int) + right[pairs_indices] = starts2[pairs_parents[pairs_indices]]+(pairs_indices-pairs_counts[pairs_parents[pairs_indices]])-counts2[pairs_parents[pairs_indices]]*numpy.floor((pairs_indices-pairs_counts[pairs_parents[pairs_indices]])/counts2[pairs_parents[pairs_indices]]) + + if return_indices: + return JaggedArray(pairs_counts[:-1],pairs_counts[1:], Table(pairs_indices[-1], left, right)) + else: + return JaggedArray(pairs_counts[:-1],pairs_counts[1:], Table(pairs_indices[-1],Jagged_arr1.content[left], Jagged_arr2.content[right])) + + class ByteJaggedArray(JaggedArray): @classmethod From abe229e295fd57fec3960272b797ea4d5ecdf764 Mon Sep 17 00:00:00 2001 From: Jaydeep Nandi <34567389+Jayd-1234@users.noreply.github.com> Date: Thu, 28 Jun 2018 19:12:46 +0530 Subject: [PATCH 2/9] Update jagged.py --- awkward/array/jagged.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/awkward/array/jagged.py b/awkward/array/jagged.py index 1fab9bb8..3704e2fa 100644 --- a/awkward/array/jagged.py +++ b/awkward/array/jagged.py @@ -35,7 +35,7 @@ import awkward.array.base import awkward.util -from awkward import Table +from awkward.array.table import Table class JaggedArray(awkward.array.base.AwkwardArray): @classmethod From c76957148d030518ad3843c0910cb346f8b98047 Mon Sep 17 00:00:00 2001 From: Jaydeep Nandi <34567389+Jayd-1234@users.noreply.github.com> Date: Thu, 28 Jun 2018 19:24:18 +0530 Subject: [PATCH 3/9] Change import --- awkward/array/jagged.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/awkward/array/jagged.py b/awkward/array/jagged.py index 1fab9bb8..3704e2fa 100644 --- a/awkward/array/jagged.py +++ b/awkward/array/jagged.py @@ -35,7 +35,7 @@ import awkward.array.base import awkward.util -from awkward import Table +from awkward.array.table import Table class JaggedArray(awkward.array.base.AwkwardArray): @classmethod From 9f199a6539af4b1a8393cb39458b576a85ceb13b Mon Sep 17 00:00:00 2001 From: Jaydeep Nandi <34567389+Jayd-1234@users.noreply.github.com> Date: Thu, 28 Jun 2018 20:19:55 +0530 Subject: [PATCH 4/9] Changes done --- awkward/array/jagged.py | 98 +++++++++++++++++++++++------------------ 1 file changed, 55 insertions(+), 43 deletions(-) diff --git a/awkward/array/jagged.py b/awkward/array/jagged.py index 3704e2fa..898fc2b6 100644 --- a/awkward/array/jagged.py +++ b/awkward/array/jagged.py @@ -35,7 +35,6 @@ import awkward.array.base import awkward.util -from awkward.array.table import Table class JaggedArray(awkward.array.base.AwkwardArray): @classmethod @@ -209,6 +208,7 @@ def parents(self): out[starts[i]:stops[i]] = i i += 1 return out + def __len__(self): # length is determined by starts return len(self._starts) # data can grow by appending contents and stops before starts @@ -408,66 +408,78 @@ def __array_ufunc__(self, ufunc, method, *inputs, **kwargs): else: return JaggedArray(starts, stops, result) - def vectorized_parents(self, offsets, content): - if offsets[-1] != len(content): - raise ValueError("Given offsets isn't compatible with content length") + def parents_from_offsets(self, offsets, content): + out = numpy.full(len(content), -1, dtype=self.INDEXTYPE) + lenstarts = len(offsets)-1 + i = 0 + while i < lenstarts: + out[offsets[i]:offsets[i+1]] = i + i += 1 + return out + + def argproduct(self, other): + import awkward.array.table + if not isinstance(other, JaggedArray): + raise ValueError("array given isn't instance of JaggedArray; need JaggedArrays to proceed") - index = numpy.arange(len(content), dtype=int) - below = numpy.zeros(len(content), dtype=int) - above = numpy.ones(len(content), dtype=int) * (len(offsets) - 1) - while True: - middle = (below + above) // 2 + if (len(self._starts) != len(other)): + raise ValueError("Number of events in each array must be equal") + + starts1 = self._starts + stops1 = self._stops + counts1 = stops1 - starts1 - change_below = offsets[middle + 1] <= index - change_above = offsets[middle] > index + starts2 = other.starts + stops2 = other.stops + counts2 = stops2 - starts2 - if not numpy.bitwise_or(change_below, change_above).any(): - break - else: - below = numpy.where(change_below, middle + 1, below) - above = numpy.where(change_above, middle - 1, above) - return middle + pairs_counts = numpy.zeros(len(starts1)+1, dtype=self.INDEXTYPE) + pairs_counts[1:] = numpy.cumsum(counts1*counts2) + pairs_counts = pairs_counts.astype(self.INDEXTYPE) + + pairs_indices = numpy.arange(pairs_counts[-1]).astype(self.INDEXTYPE) + pairs_parents = self.parents_from_offsets(pairs_counts, pairs_indices) + pairs_parents = pairs_parents.astype(self.INDEXTYPE) + + left = numpy.empty_like(pairs_indices) + right = numpy.empty_like(pairs_indices) + + left[pairs_indices] = starts1[pairs_parents[pairs_indices]] + numpy.floor((pairs_indices - pairs_counts[pairs_parents[pairs_indices]])/counts2[pairs_parents[pairs_indices]]).astype(self.INDEXTYPE) + right[pairs_indices] = starts2[pairs_parents[pairs_indices]] + (pairs_indices - pairs_counts[pairs_parents[pairs_indices]]) - counts2[pairs_parents[pairs_indices]] * numpy.floor((pairs_indices - pairs_counts[pairs_parents[pairs_indices]])/counts2[pairs_parents[pairs_indices]]) + + return JaggedArray(pairs_counts[:-1], pairs_counts[1:], awkward.array.table.Table(pairs_indices[-1], left, right), writeable=self._writeable) - def combinations(self, Jagged_arr1, Jagged_arr2,return_indices=False, writeable=True): - if Jagged_arr1 is None: - raise ValueError("Only one array is provided, need two to proceed") - if Jagged_arr2 is None: - raise ValueError("Only one array is provided, need two to proceed") - if not (isinstance(Jagged_arr1, JaggedArray) or isinstance(Jagged_arr2, JaggedArray)): - raise ValueError("arrays given aren't instances of JaggedArray; need JaggedArrays to proceed") + def product(self, other): + import awkward.array.table + if not isinstance(other, JaggedArray): + raise ValueError("array given isn't instance of JaggedArray; need JaggedArrays to proceed") - if (len(Jagged_arr1) != len(Jagged_arr2)): + if (len(self._starts) != len(other)): raise ValueError("Number of events in each array must be equal") - # Probably unnecessary. Can be generated on the fly - starts1 = Jagged_arr1.starts - stops1 = Jagged_arr1.stops + starts1 = self._starts + stops1 = self._stops counts1 = stops1 - starts1 - starts2 = Jagged_arr2.starts - stops2 = Jagged_arr2.stops + starts2 = other.starts + stops2 = other.stops counts2 = stops2 - starts2 - NUMEVENTS = len(Jagged_arr1) - pairs_counts = numpy.zeros(NUMEVENTS+1, dtype=numpy.int) + pairs_counts = numpy.zeros(len(starts1)+1, dtype=self.INDEXTYPE) pairs_counts[1:] = numpy.cumsum(counts1*counts2) - pairs_counts = pairs_counts.astype(numpy.int) + pairs_counts = pairs_counts.astype(self.INDEXTYPE) - pairs_indices = numpy.arange(pairs_counts[-1]).astype(numpy.int) - pairs_parents = self.vectorized_parents(pairs_counts, pairs_indices) - pairs_parents = pairs_parents.astype(numpy.int) + pairs_indices = numpy.arange(pairs_counts[-1]).astype(self.INDEXTYPE) + pairs_parents = self.parents_from_offsets(pairs_counts, pairs_indices) + pairs_parents = pairs_parents.astype(self.INDEXTYPE) left = numpy.empty_like(pairs_indices) right = numpy.empty_like(pairs_indices) - left[pairs_indices] = starts1[pairs_parents[pairs_indices]] + numpy.floor((pairs_indices-pairs_counts[pairs_parents[pairs_indices]])/counts2[pairs_parents[pairs_indices]]).astype(numpy.int) - right[pairs_indices] = starts2[pairs_parents[pairs_indices]]+(pairs_indices-pairs_counts[pairs_parents[pairs_indices]])-counts2[pairs_parents[pairs_indices]]*numpy.floor((pairs_indices-pairs_counts[pairs_parents[pairs_indices]])/counts2[pairs_parents[pairs_indices]]) - - if return_indices: - return JaggedArray(pairs_counts[:-1],pairs_counts[1:], Table(pairs_indices[-1], left, right)) - else: - return JaggedArray(pairs_counts[:-1],pairs_counts[1:], Table(pairs_indices[-1],Jagged_arr1.content[left], Jagged_arr2.content[right])) + left[pairs_indices] = starts1[pairs_parents[pairs_indices]] + numpy.floor((pairs_indices - pairs_counts[pairs_parents[pairs_indices]])/counts2[pairs_parents[pairs_indices]]).astype(self.INDEXTYPE) + right[pairs_indices] = starts2[pairs_parents[pairs_indices]] + (pairs_indices - pairs_counts[pairs_parents[pairs_indices]]) - counts2[pairs_parents[pairs_indices]] * numpy.floor((pairs_indices - pairs_counts[pairs_parents[pairs_indices]])/counts2[pairs_parents[pairs_indices]]) + return JaggedArray(pairs_counts[:-1], pairs_counts[1:], awkward.array.table.Table(pairs_indices[-1], self._content[left], other.content[right]), writeable=self._writeable) class ByteJaggedArray(JaggedArray): From 9de54a32ef7bed9629baf6eb5afafa9a8296b8c4 Mon Sep 17 00:00:00 2001 From: Jaydeep Nandi <34567389+Jayd-1234@users.noreply.github.com> Date: Thu, 28 Jun 2018 20:29:22 +0530 Subject: [PATCH 5/9] Update jagged.py --- awkward/array/jagged.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/awkward/array/jagged.py b/awkward/array/jagged.py index 3b43f344..898fc2b6 100644 --- a/awkward/array/jagged.py +++ b/awkward/array/jagged.py @@ -35,10 +35,6 @@ import awkward.array.base import awkward.util -<<<<<<< HEAD -======= -from awkward.array.table import Table ->>>>>>> abe229e295fd57fec3960272b797ea4d5ecdf764 class JaggedArray(awkward.array.base.AwkwardArray): @classmethod From 4707c2c45e348765cfd9a524856e0858175581c0 Mon Sep 17 00:00:00 2001 From: Jaydeep Nandi <34567389+Jayd-1234@users.noreply.github.com> Date: Thu, 28 Jun 2018 21:41:59 +0530 Subject: [PATCH 6/9] More refined.. Use product with argproduct --- awkward/array/jagged.py | 63 ++++++++++++++--------------------------- 1 file changed, 21 insertions(+), 42 deletions(-) diff --git a/awkward/array/jagged.py b/awkward/array/jagged.py index 3b43f344..346e1b39 100644 --- a/awkward/array/jagged.py +++ b/awkward/array/jagged.py @@ -412,14 +412,6 @@ def __array_ufunc__(self, ufunc, method, *inputs, **kwargs): else: return JaggedArray(starts, stops, result) - def parents_from_offsets(self, offsets, content): - out = numpy.full(len(content), -1, dtype=self.INDEXTYPE) - lenstarts = len(offsets)-1 - i = 0 - while i < lenstarts: - out[offsets[i]:offsets[i+1]] = i - i += 1 - return out def argproduct(self, other): import awkward.array.table @@ -436,13 +428,22 @@ def argproduct(self, other): starts2 = other.starts stops2 = other.stops counts2 = stops2 - starts2 + pairs_counts = numpy.zeros(len(starts1)+1, dtype=self.INDEXTYPE) - pairs_counts[1:] = numpy.cumsum(counts1*counts2) - pairs_counts = pairs_counts.astype(self.INDEXTYPE) + pairs_counts[1:] = numpy.cumsum(counts1*counts2, dtype=self.INDEXTYPE) - pairs_indices = numpy.arange(pairs_counts[-1]).astype(self.INDEXTYPE) - pairs_parents = self.parents_from_offsets(pairs_counts, pairs_indices) + def parents_from_offsets(offsets, content): + out = numpy.full(len(content), -1, dtype=self.INDEXTYPE) + lenstarts = len(offsets)-1 + i = 0 + while i < lenstarts: + out[offsets[i]:offsets[i+1]] = i + i += 1 + return out + + pairs_indices = numpy.arange(pairs_counts[-1], dtype=self.INDEXTYPE) + pairs_parents = parents_from_offsets(pairs_counts, pairs_indices) pairs_parents = pairs_parents.astype(self.INDEXTYPE) left = numpy.empty_like(pairs_indices) @@ -454,36 +455,14 @@ def argproduct(self, other): return JaggedArray(pairs_counts[:-1], pairs_counts[1:], awkward.array.table.Table(pairs_indices[-1], left, right), writeable=self._writeable) def product(self, other): - import awkward.array.table - if not isinstance(other, JaggedArray): - raise ValueError("array given isn't instance of JaggedArray; need JaggedArrays to proceed") - - if (len(self._starts) != len(other)): - raise ValueError("Number of events in each array must be equal") - - starts1 = self._starts - stops1 = self._stops - counts1 = stops1 - starts1 - - starts2 = other.starts - stops2 = other.stops - counts2 = stops2 - starts2 - - pairs_counts = numpy.zeros(len(starts1)+1, dtype=self.INDEXTYPE) - pairs_counts[1:] = numpy.cumsum(counts1*counts2) - pairs_counts = pairs_counts.astype(self.INDEXTYPE) - - pairs_indices = numpy.arange(pairs_counts[-1]).astype(self.INDEXTYPE) - pairs_parents = self.parents_from_offsets(pairs_counts, pairs_indices) - pairs_parents = pairs_parents.astype(self.INDEXTYPE) - - left = numpy.empty_like(pairs_indices) - right = numpy.empty_like(pairs_indices) - - left[pairs_indices] = starts1[pairs_parents[pairs_indices]] + numpy.floor((pairs_indices - pairs_counts[pairs_parents[pairs_indices]])/counts2[pairs_parents[pairs_indices]]).astype(self.INDEXTYPE) - right[pairs_indices] = starts2[pairs_parents[pairs_indices]] + (pairs_indices - pairs_counts[pairs_parents[pairs_indices]]) - counts2[pairs_parents[pairs_indices]] * numpy.floor((pairs_indices - pairs_counts[pairs_parents[pairs_indices]])/counts2[pairs_parents[pairs_indices]]) - - return JaggedArray(pairs_counts[:-1], pairs_counts[1:], awkward.array.table.Table(pairs_indices[-1], self._content[left], other.content[right]), writeable=self._writeable) + import awkward.array.table + product_indexes = self.argproduct(other) + dictvals = product_indexes._content._content.values() + arr_list = [] + for v in dictvals: + arr_list.append(v) + + return JaggedArray(product_indexes.starts, product_indexes.stops, awkward.array.table.Table( len(arr_list[0], self._content[arr_list[0]], other.content[arr_list[1]]))) class ByteJaggedArray(JaggedArray): From 87873765d6ff2df156ce4455ff705995530f7fd0 Mon Sep 17 00:00:00 2001 From: Jaydeep Nandi <34567389+Jayd-1234@users.noreply.github.com> Date: Fri, 29 Jun 2018 12:44:44 +0530 Subject: [PATCH 7/9] Fixed a bug in product. Added product to ByteJaggedArray --- awkward/array/jagged.py | 55 +++++++++++++++++++++++++++++++++++++---- 1 file changed, 50 insertions(+), 5 deletions(-) diff --git a/awkward/array/jagged.py b/awkward/array/jagged.py index 920a79cd..62ecea8a 100644 --- a/awkward/array/jagged.py +++ b/awkward/array/jagged.py @@ -453,12 +453,9 @@ def parents_from_offsets(offsets, content): def product(self, other): import awkward.array.table product_indexes = self.argproduct(other) - dictvals = product_indexes._content._content.values() - arr_list = [] - for v in dictvals: - arr_list.append(v) + arr_list = list(product_indexes._content._content.values()) + return JaggedArray(product_indexes.starts, product_indexes.stops, awkward.array.table.Table(len(arr_list[0]), self._content[arr_list[0]], other.content[arr_list[1]])) - return JaggedArray(product_indexes.starts, product_indexes.stops, awkward.array.table.Table( len(arr_list[0], self._content[arr_list[0]], other.content[arr_list[1]]))) class ByteJaggedArray(JaggedArray): @@ -621,3 +618,51 @@ def tojagged(self, starts=None, stops=None, copy=True, writeable=True): i += 1 return JaggedArray(starts, stops, content, writeable=writeable) + + def argproduct(self, other): + import awkward.array.table + if not isinstance(other, JaggedArray): + raise ValueError("array given isn't instance of JaggedArray; need JaggedArrays to proceed") + + if (len(self._starts) != len(other)): + raise ValueError("Number of events in each array must be equal") + + starts1 = self._starts + stops1 = self._stops + counts1 = stops1 - starts1 + + starts2 = other.starts + stops2 = other.stops + counts2 = stops2 - starts2 + + + pairs_counts = numpy.zeros(len(starts1)+1, dtype=self.INDEXTYPE) + pairs_counts[1:] = numpy.cumsum(counts1*counts2, dtype=self.INDEXTYPE) + + def parents_from_offsets(offsets, content): + out = numpy.full(len(content), -1, dtype=self.INDEXTYPE) + lenstarts = len(offsets)-1 + i = 0 + while i < lenstarts: + out[offsets[i]:offsets[i+1]] = i + i += 1 + return out + + pairs_indices = numpy.arange(pairs_counts[-1], dtype=self.INDEXTYPE) + pairs_parents = parents_from_offsets(pairs_counts, pairs_indices) + pairs_parents = pairs_parents.astype(self.INDEXTYPE) + + left = numpy.empty_like(pairs_indices) + right = numpy.empty_like(pairs_indices) + + left[pairs_indices] = starts1[pairs_parents[pairs_indices]] + numpy.floor((pairs_indices - pairs_counts[pairs_parents[pairs_indices]])/counts2[pairs_parents[pairs_indices]]).astype(self.INDEXTYPE) + right[pairs_indices] = starts2[pairs_parents[pairs_indices]] + (pairs_indices - pairs_counts[pairs_parents[pairs_indices]]) - counts2[pairs_parents[pairs_indices]] * numpy.floor((pairs_indices - pairs_counts[pairs_parents[pairs_indices]])/counts2[pairs_parents[pairs_indices]]) + + return JaggedArray(pairs_counts[:-1], pairs_counts[1:], awkward.array.table.Table(pairs_indices[-1], left, right), writeable=self._writeable) + + def product(self, other): + import awkward.array.table + product_indexes = self.argproduct(other) + arr_list = list(product_indexes._content._content.values()) + + return JaggedArray(product_indexes.starts, product_indexes.stops, awkward.array.table.Table(len(arr_list[0]), self._content[arr_list[0]], other.content[arr_list[1]])) From c7987f67663813c3e927d64731d5fbfe9e3910c5 Mon Sep 17 00:00:00 2001 From: Jaydeep Nandi <34567389+Jayd-1234@users.noreply.github.com> Date: Fri, 29 Jun 2018 13:05:57 +0530 Subject: [PATCH 8/9] add one test for product --- .vscode/settings.json | 12 ++++++++++++ tests/test_jagged.py | 19 +++++++++++++++++++ 2 files changed, 31 insertions(+) create mode 100644 .vscode/settings.json diff --git a/.vscode/settings.json b/.vscode/settings.json new file mode 100644 index 00000000..46b06c2e --- /dev/null +++ b/.vscode/settings.json @@ -0,0 +1,12 @@ +{ + "python.unitTest.unittestArgs": [ + "-v", + "-s", + "./tests", + "-p", + "test_*.py" + ], + "python.unitTest.pyTestEnabled": false, + "python.unitTest.nosetestsEnabled": false, + "python.unitTest.unittestEnabled": true +} \ No newline at end of file diff --git a/tests/test_jagged.py b/tests/test_jagged.py index cffaafb9..91fa03aa 100644 --- a/tests/test_jagged.py +++ b/tests/test_jagged.py @@ -198,3 +198,22 @@ def test_bytejagged_set(self): a[:] = JaggedArray.fromiter([[3, 2, 1], [], [5, 4]]) self.assertEqual(a.content.tobytes(), b"\xff\x00\x00\x00\x00\x03\x00\x00\x00\x02\x00\x00\x00\x01\x00\x00\x00\xff\xff\x05\x00\x00\x00\x04\x00\x00\x00\xff") self.assertEqual([a[i].tolist() for i in range(len(a))], [[3, 2, 1], [], [5, 4]]) + + def test_jagged_argproduct(self): + starts1 = [0,1,4,4] + stops1 = [1,4,4,8] + + starts2 = [0,1,1,4] + stops2 = [1,1,4,5] + + arr1 = JaggedArray(starts1, stops1,content=[0,1,2,3,4,5,6,7]) + arr2 = JaggedArray(starts2, stops2,content=['z', 'a','b','c','d']) + + arr_product = arr1.product(arr2) + self.assertTrue((list(arr_product._content._content.values())[0]==list([0,4,5,6,7])).all()) + + arr_argproduct = arr1.argproduct(arr2) + self.assertTrue((list(arr_argproduct._content._content.values())[0]==list([0,4,5,6,7])).all()) + self.assertTrue((list(arr_argproduct._content._content.values())[1]==list([0,4,4,4,4])).all()) + + From 8aa61a695f03582a1bb0949ff5f6a5c411e2029e Mon Sep 17 00:00:00 2001 From: Jaydeep Nandi <34567389+Jayd-1234@users.noreply.github.com> Date: Sat, 30 Jun 2018 11:10:15 +0530 Subject: [PATCH 9/9] Implemented David's suggestions - Added parents without using content - Added simple docstrings regarding usage. --- awkward/array/jagged.py | 56 ++++++++++++++++++++++++++++++++++++----- 1 file changed, 50 insertions(+), 6 deletions(-) diff --git a/awkward/array/jagged.py b/awkward/array/jagged.py index 62ecea8a..3301bb22 100644 --- a/awkward/array/jagged.py +++ b/awkward/array/jagged.py @@ -410,6 +410,17 @@ def __array_ufunc__(self, ufunc, method, *inputs, **kwargs): def argproduct(self, other): + ''' + Performs product (combinations) of current JaggedArray with JaggedArray `other`. Return the indices of the product. + + Inputs: other; a JaggedArray instance. + Output: JaggedArray containing thw two indices as an awkward.array.table.Table() + + Example usage: + >>> arr1 = JaggedArray([0,1,4,4],[1,4,4,8],content=[0,1,2,3,4,5,6,7]) + >>> arr2 = JaggedArray([0,1,1,4],[1,1,4,5],content=['z', 'a','b','c','d']) + >>> result = arr1.argproduct(arr2) + ''' import awkward.array.table if not isinstance(other, JaggedArray): raise ValueError("array given isn't instance of JaggedArray; need JaggedArrays to proceed") @@ -429,8 +440,8 @@ def argproduct(self, other): pairs_counts = numpy.zeros(len(starts1)+1, dtype=self.INDEXTYPE) pairs_counts[1:] = numpy.cumsum(counts1*counts2, dtype=self.INDEXTYPE) - def parents_from_offsets(offsets, content): - out = numpy.full(len(content), -1, dtype=self.INDEXTYPE) + def parents_from_offsets(offsets): + out = numpy.full(offsets[-1], -1, dtype=self.INDEXTYPE) lenstarts = len(offsets)-1 i = 0 while i < lenstarts: @@ -439,7 +450,7 @@ def parents_from_offsets(offsets, content): return out pairs_indices = numpy.arange(pairs_counts[-1], dtype=self.INDEXTYPE) - pairs_parents = parents_from_offsets(pairs_counts, pairs_indices) + pairs_parents = parents_from_offsets(pairs_counts) pairs_parents = pairs_parents.astype(self.INDEXTYPE) left = numpy.empty_like(pairs_indices) @@ -451,6 +462,17 @@ def parents_from_offsets(offsets, content): return JaggedArray(pairs_counts[:-1], pairs_counts[1:], awkward.array.table.Table(pairs_indices[-1], left, right), writeable=self._writeable) def product(self, other): + ''' + Performs product ( combinations) between two JaggedArrays and returns the resulting combined content as a JaggedArray() + + Inputs: other; a JaggedArray instance + Output: JaggedArray containing thw two indices as an awkward.array.table.Table() + + Example usage: + >>> arr1 = JaggedArray([0,1,4,4],[1,4,4,8],content=[0,1,2,3,4,5,6,7]) + >>> arr2 = JaggedArray([0,1,1,4],[1,1,4,5],content=['z', 'a','b','c','d']) + >>> result = arr1.product(arr2) + ''' import awkward.array.table product_indexes = self.argproduct(other) arr_list = list(product_indexes._content._content.values()) @@ -620,6 +642,17 @@ def tojagged(self, starts=None, stops=None, copy=True, writeable=True): return JaggedArray(starts, stops, content, writeable=writeable) def argproduct(self, other): + ''' + Performs product (combinations) of current JaggedArray with JaggedArray `other`. Return the indices of the product. + + Inputs: other; a JaggedArray instance. + Output: JaggedArray containing thw two indices as an awkward.array.table.Table() + + Example usage: + >>> arr1 = JaggedArray([0,1,4,4],[1,4,4,8],content=[0,1,2,3,4,5,6,7]) + >>> arr2 = JaggedArray([0,1,1,4],[1,1,4,5],content=['z', 'a','b','c','d']) + >>> result = arr1.argproduct(arr2) + ''' import awkward.array.table if not isinstance(other, JaggedArray): raise ValueError("array given isn't instance of JaggedArray; need JaggedArrays to proceed") @@ -639,8 +672,8 @@ def argproduct(self, other): pairs_counts = numpy.zeros(len(starts1)+1, dtype=self.INDEXTYPE) pairs_counts[1:] = numpy.cumsum(counts1*counts2, dtype=self.INDEXTYPE) - def parents_from_offsets(offsets, content): - out = numpy.full(len(content), -1, dtype=self.INDEXTYPE) + def parents_from_offsets(offsets): + out = numpy.full(offsets[-1], -1, dtype=self.INDEXTYPE) lenstarts = len(offsets)-1 i = 0 while i < lenstarts: @@ -649,7 +682,7 @@ def parents_from_offsets(offsets, content): return out pairs_indices = numpy.arange(pairs_counts[-1], dtype=self.INDEXTYPE) - pairs_parents = parents_from_offsets(pairs_counts, pairs_indices) + pairs_parents = parents_from_offsets(pairs_counts) pairs_parents = pairs_parents.astype(self.INDEXTYPE) left = numpy.empty_like(pairs_indices) @@ -661,6 +694,17 @@ def parents_from_offsets(offsets, content): return JaggedArray(pairs_counts[:-1], pairs_counts[1:], awkward.array.table.Table(pairs_indices[-1], left, right), writeable=self._writeable) def product(self, other): + ''' + Performs product ( combinations) between two JaggedArrays and returns the resulting combined content as a JaggedArray() + + Inputs: other; a JaggedArray instance + Output: JaggedArray containing thw two indices as an awkward.array.table.Table() + + Example usage: + >>> arr1 = JaggedArray([0,1,4,4],[1,4,4,8],content=[0,1,2,3,4,5,6,7]) + >>> arr2 = JaggedArray([0,1,1,4],[1,1,4,5],content=['z', 'a','b','c','d']) + >>> result = arr1.product(arr2) + ''' import awkward.array.table product_indexes = self.argproduct(other) arr_list = list(product_indexes._content._content.values())