diff --git a/.vscode/settings.json b/.vscode/settings.json new file mode 100644 index 00000000..46b06c2e --- /dev/null +++ b/.vscode/settings.json @@ -0,0 +1,12 @@ +{ + "python.unitTest.unittestArgs": [ + "-v", + "-s", + "./tests", + "-p", + "test_*.py" + ], + "python.unitTest.pyTestEnabled": false, + "python.unitTest.nosetestsEnabled": false, + "python.unitTest.unittestEnabled": true +} \ No newline at end of file diff --git a/awkward/array/jagged.py b/awkward/array/jagged.py index 6c494896..3301bb22 100644 --- a/awkward/array/jagged.py +++ b/awkward/array/jagged.py @@ -208,6 +208,7 @@ def parents(self): out[starts[i]:stops[i]] = i i += 1 return out + def __len__(self): # length is determined by starts return len(self._starts) # data can grow by appending contents and stops before starts @@ -406,6 +407,78 @@ def __array_ufunc__(self, ufunc, method, *inputs, **kwargs): return None else: return JaggedArray(starts, stops, result) + + + def argproduct(self, other): + ''' + Performs product (combinations) of current JaggedArray with JaggedArray `other`. Return the indices of the product. + + Inputs: other; a JaggedArray instance. + Output: JaggedArray containing thw two indices as an awkward.array.table.Table() + + Example usage: + >>> arr1 = JaggedArray([0,1,4,4],[1,4,4,8],content=[0,1,2,3,4,5,6,7]) + >>> arr2 = JaggedArray([0,1,1,4],[1,1,4,5],content=['z', 'a','b','c','d']) + >>> result = arr1.argproduct(arr2) + ''' + import awkward.array.table + if not isinstance(other, JaggedArray): + raise ValueError("array given isn't instance of JaggedArray; need JaggedArrays to proceed") + + if (len(self._starts) != len(other)): + raise ValueError("Number of events in each array must be equal") + + starts1 = self._starts + stops1 = self._stops + counts1 = stops1 - starts1 + + starts2 = other.starts + stops2 = other.stops + counts2 = stops2 - starts2 + + + pairs_counts = numpy.zeros(len(starts1)+1, dtype=self.INDEXTYPE) + pairs_counts[1:] = numpy.cumsum(counts1*counts2, dtype=self.INDEXTYPE) + + def parents_from_offsets(offsets): + out = numpy.full(offsets[-1], -1, dtype=self.INDEXTYPE) + lenstarts = len(offsets)-1 + i = 0 + while i < lenstarts: + out[offsets[i]:offsets[i+1]] = i + i += 1 + return out + + pairs_indices = numpy.arange(pairs_counts[-1], dtype=self.INDEXTYPE) + pairs_parents = parents_from_offsets(pairs_counts) + pairs_parents = pairs_parents.astype(self.INDEXTYPE) + + left = numpy.empty_like(pairs_indices) + right = numpy.empty_like(pairs_indices) + + left[pairs_indices] = starts1[pairs_parents[pairs_indices]] + numpy.floor((pairs_indices - pairs_counts[pairs_parents[pairs_indices]])/counts2[pairs_parents[pairs_indices]]).astype(self.INDEXTYPE) + right[pairs_indices] = starts2[pairs_parents[pairs_indices]] + (pairs_indices - pairs_counts[pairs_parents[pairs_indices]]) - counts2[pairs_parents[pairs_indices]] * numpy.floor((pairs_indices - pairs_counts[pairs_parents[pairs_indices]])/counts2[pairs_parents[pairs_indices]]) + + return JaggedArray(pairs_counts[:-1], pairs_counts[1:], awkward.array.table.Table(pairs_indices[-1], left, right), writeable=self._writeable) + + def product(self, other): + ''' + Performs product ( combinations) between two JaggedArrays and returns the resulting combined content as a JaggedArray() + + Inputs: other; a JaggedArray instance + Output: JaggedArray containing thw two indices as an awkward.array.table.Table() + + Example usage: + >>> arr1 = JaggedArray([0,1,4,4],[1,4,4,8],content=[0,1,2,3,4,5,6,7]) + >>> arr2 = JaggedArray([0,1,1,4],[1,1,4,5],content=['z', 'a','b','c','d']) + >>> result = arr1.product(arr2) + ''' + import awkward.array.table + product_indexes = self.argproduct(other) + arr_list = list(product_indexes._content._content.values()) + return JaggedArray(product_indexes.starts, product_indexes.stops, awkward.array.table.Table(len(arr_list[0]), self._content[arr_list[0]], other.content[arr_list[1]])) + + class ByteJaggedArray(JaggedArray): @classmethod @@ -567,3 +640,73 @@ def tojagged(self, starts=None, stops=None, copy=True, writeable=True): i += 1 return JaggedArray(starts, stops, content, writeable=writeable) + + def argproduct(self, other): + ''' + Performs product (combinations) of current JaggedArray with JaggedArray `other`. Return the indices of the product. + + Inputs: other; a JaggedArray instance. + Output: JaggedArray containing thw two indices as an awkward.array.table.Table() + + Example usage: + >>> arr1 = JaggedArray([0,1,4,4],[1,4,4,8],content=[0,1,2,3,4,5,6,7]) + >>> arr2 = JaggedArray([0,1,1,4],[1,1,4,5],content=['z', 'a','b','c','d']) + >>> result = arr1.argproduct(arr2) + ''' + import awkward.array.table + if not isinstance(other, JaggedArray): + raise ValueError("array given isn't instance of JaggedArray; need JaggedArrays to proceed") + + if (len(self._starts) != len(other)): + raise ValueError("Number of events in each array must be equal") + + starts1 = self._starts + stops1 = self._stops + counts1 = stops1 - starts1 + + starts2 = other.starts + stops2 = other.stops + counts2 = stops2 - starts2 + + + pairs_counts = numpy.zeros(len(starts1)+1, dtype=self.INDEXTYPE) + pairs_counts[1:] = numpy.cumsum(counts1*counts2, dtype=self.INDEXTYPE) + + def parents_from_offsets(offsets): + out = numpy.full(offsets[-1], -1, dtype=self.INDEXTYPE) + lenstarts = len(offsets)-1 + i = 0 + while i < lenstarts: + out[offsets[i]:offsets[i+1]] = i + i += 1 + return out + + pairs_indices = numpy.arange(pairs_counts[-1], dtype=self.INDEXTYPE) + pairs_parents = parents_from_offsets(pairs_counts) + pairs_parents = pairs_parents.astype(self.INDEXTYPE) + + left = numpy.empty_like(pairs_indices) + right = numpy.empty_like(pairs_indices) + + left[pairs_indices] = starts1[pairs_parents[pairs_indices]] + numpy.floor((pairs_indices - pairs_counts[pairs_parents[pairs_indices]])/counts2[pairs_parents[pairs_indices]]).astype(self.INDEXTYPE) + right[pairs_indices] = starts2[pairs_parents[pairs_indices]] + (pairs_indices - pairs_counts[pairs_parents[pairs_indices]]) - counts2[pairs_parents[pairs_indices]] * numpy.floor((pairs_indices - pairs_counts[pairs_parents[pairs_indices]])/counts2[pairs_parents[pairs_indices]]) + + return JaggedArray(pairs_counts[:-1], pairs_counts[1:], awkward.array.table.Table(pairs_indices[-1], left, right), writeable=self._writeable) + + def product(self, other): + ''' + Performs product ( combinations) between two JaggedArrays and returns the resulting combined content as a JaggedArray() + + Inputs: other; a JaggedArray instance + Output: JaggedArray containing thw two indices as an awkward.array.table.Table() + + Example usage: + >>> arr1 = JaggedArray([0,1,4,4],[1,4,4,8],content=[0,1,2,3,4,5,6,7]) + >>> arr2 = JaggedArray([0,1,1,4],[1,1,4,5],content=['z', 'a','b','c','d']) + >>> result = arr1.product(arr2) + ''' + import awkward.array.table + product_indexes = self.argproduct(other) + arr_list = list(product_indexes._content._content.values()) + + return JaggedArray(product_indexes.starts, product_indexes.stops, awkward.array.table.Table(len(arr_list[0]), self._content[arr_list[0]], other.content[arr_list[1]])) diff --git a/tests/test_jagged.py b/tests/test_jagged.py index cffaafb9..91fa03aa 100644 --- a/tests/test_jagged.py +++ b/tests/test_jagged.py @@ -198,3 +198,22 @@ def test_bytejagged_set(self): a[:] = JaggedArray.fromiter([[3, 2, 1], [], [5, 4]]) self.assertEqual(a.content.tobytes(), b"\xff\x00\x00\x00\x00\x03\x00\x00\x00\x02\x00\x00\x00\x01\x00\x00\x00\xff\xff\x05\x00\x00\x00\x04\x00\x00\x00\xff") self.assertEqual([a[i].tolist() for i in range(len(a))], [[3, 2, 1], [], [5, 4]]) + + def test_jagged_argproduct(self): + starts1 = [0,1,4,4] + stops1 = [1,4,4,8] + + starts2 = [0,1,1,4] + stops2 = [1,1,4,5] + + arr1 = JaggedArray(starts1, stops1,content=[0,1,2,3,4,5,6,7]) + arr2 = JaggedArray(starts2, stops2,content=['z', 'a','b','c','d']) + + arr_product = arr1.product(arr2) + self.assertTrue((list(arr_product._content._content.values())[0]==list([0,4,5,6,7])).all()) + + arr_argproduct = arr1.argproduct(arr2) + self.assertTrue((list(arr_argproduct._content._content.values())[0]==list([0,4,5,6,7])).all()) + self.assertTrue((list(arr_argproduct._content._content.values())[1]==list([0,4,4,4,4])).all()) + +