From b9c4fe6c3bad8009281b9439eea65bdebe246e2c Mon Sep 17 00:00:00 2001 From: ManasviGoyal Date: Mon, 5 Feb 2024 14:07:05 +0100 Subject: [PATCH 01/19] feat: add variable length kernels --- dev/generate-kernel-signatures.py | 2 + dev/generate-tests.py | 2 + kernel-test-data.json | 208 ++++++++++++++++++ src/awkward/_connect/cuda/__init__.py | 2 + ...kward_IndexedArray_ranges_carry_next_64.cu | 67 ++++++ .../awkward_IndexedArray_ranges_next_64.cu | 74 +++++++ 6 files changed, 355 insertions(+) create mode 100644 src/awkward/_connect/cuda/cuda_kernels/awkward_IndexedArray_ranges_carry_next_64.cu create mode 100644 src/awkward/_connect/cuda/cuda_kernels/awkward_IndexedArray_ranges_next_64.cu diff --git a/dev/generate-kernel-signatures.py b/dev/generate-kernel-signatures.py index 53e141726d..560d851a6f 100644 --- a/dev/generate-kernel-signatures.py +++ b/dev/generate-kernel-signatures.py @@ -76,6 +76,8 @@ "awkward_IndexedArray_getitem_nextcarry", "awkward_IndexedArray_getitem_nextcarry_outindex", "awkward_IndexedArray_index_of_nulls", + "awkward_IndexedArray_ranges_next_64", + "awkward_IndexedArray_ranges_carry_next_64", "awkward_IndexedArray_reduce_next_64", "awkward_IndexedArray_reduce_next_nonlocal_nextshifts_64", "awkward_IndexedArray_reduce_next_nonlocal_nextshifts_fromshifts_64", diff --git a/dev/generate-tests.py b/dev/generate-tests.py index 0360b31f48..fa8f3f0906 100644 --- a/dev/generate-tests.py +++ b/dev/generate-tests.py @@ -740,6 +740,8 @@ def gencpuunittests(specdict): "awkward_IndexedArray_getitem_nextcarry", "awkward_IndexedArray_getitem_nextcarry_outindex", "awkward_IndexedArray_index_of_nulls", + "awkward_IndexedArray_ranges_next_64", + "awkward_IndexedArray_ranges_carry_next_64", "awkward_IndexedArray_reduce_next_64", "awkward_IndexedArray_reduce_next_nonlocal_nextshifts_64", "awkward_IndexedArray_reduce_next_nonlocal_nextshifts_fromshifts_64", diff --git a/kernel-test-data.json b/kernel-test-data.json index 052edda1d4..8b4824eecf 100644 --- a/kernel-test-data.json +++ b/kernel-test-data.json @@ -2320,6 +2320,214 @@ } ] }, + { + "name": "awkward_IndexedArray_ranges_next_64", + "status": true, + "tests": [ + { + "error": false, + "message": "", + "inputs": { + "index": [], + "fromstarts": [], + "fromstops": [], + "length": 0 + }, + "outputs": { + "tostarts": [], + "tostops": [], + "tolength": [0] + } + }, + { + "error": false, + "message": "", + "inputs": { + "index": [-1], + "fromstarts": [0], + "fromstops": [1], + "length": 1 + }, + "outputs": { + "tostarts": [0], + "tostops": [0], + "tolength": [0] + } + }, + { + "error": false, + "message": "", + "inputs": { + "index": [0, 1], + "fromstarts": [0], + "fromstops": [2], + "length": 1 + }, + "outputs": { + "tostarts": [0], + "tostops": [2], + "tolength": [2] + } + }, + { + "error": false, + "message": "", + "inputs": { + "index": [0, 1, 2], + "fromstarts": [0, 2], + "fromstops": [2, 3], + "length": 2 + }, + "outputs": { + "tostarts": [0, 2], + "tostops": [2, 3], + "tolength": [3] + } + }, + { + "error": false, + "message": "", + "inputs": { + "index": [-1, -1, -1], + "fromstarts": [0, 2], + "fromstops": [2, 3], + "length": 2 + }, + "outputs": { + "tostarts": [0, 0], + "tostops": [0, 0], + "tolength": [0] + } + }, + { + "error": false, + "message": "", + "inputs": { + "index": [0, -1, 1, -1, 2], + "fromstarts": [0, 2, 3], + "fromstops": [2, 3, 5], + "length": 3 + }, + "outputs": { + "tostarts": [0, 1, 2], + "tostops": [1, 2, 3], + "tolength": [3] + } + }, + { + "error": false, + "message": "", + "inputs": { + "index": [3, -1 ,-1, 2, 1, 0, -1], + "fromstarts": [0, 1, 2, 3, 5], + "fromstops": [1, 2, 3, 5, 7], + "length": 5 + }, + "outputs": { + "tostarts": [0, 1, 1, 1, 3], + "tostops": [1, 1, 1, 3, 4], + "tolength": [4] + } + } + ] + }, + { + "name": "awkward_IndexedArray_ranges_carry_next_64", + "status": true, + "tests": [ + { + "error": false, + "message": "", + "inputs": { + "index": [], + "fromstarts": [], + "fromstops": [], + "length": 0 + }, + "outputs": { + "tocarry": [] + } + }, + { + "error": false, + "message": "", + "inputs": { + "index": [1], + "fromstarts": [0], + "fromstops": [1], + "length": 1 + }, + "outputs": { + "tocarry": [1] + } + }, + { + "error": false, + "message": "", + "inputs": { + "index": [-1], + "fromstarts": [0], + "fromstops": [1], + "length": 1 + }, + "outputs": { + "tocarry": [] + } + }, + { + "error": false, + "message": "", + "inputs": { + "index": [-1, -1, -1], + "fromstarts": [0, 2], + "fromstops": [2, 3], + "length": 2 + }, + "outputs": { + "tocarry": [] + } + }, + { + "error": false, + "message": "", + "inputs": { + "index": [0, 1, 2], + "fromstarts": [0, 2], + "fromstops": [2, 3], + "length": 2 + }, + "outputs": { + "tocarry": [0, 1, 2] + } + }, + { + "error": false, + "message": "", + "inputs": { + "index": [0, -1, 1, -1, 2], + "fromstarts": [0, 2, 3], + "fromstops": [2, 3, 5], + "length": 3 + }, + "outputs": { + "tocarry": [0, 1, 2] + } + }, + { + "error": false, + "message": "", + "inputs": { + "index": [3, -1 ,-1, 2, 1, 0, -1], + "fromstarts": [0, 1, 2, 3, 5], + "fromstops": [1, 2, 3, 5, 7], + "length": 5 + }, + "outputs": { + "tocarry": [3, 2, 1, 0] + } + } + ] + }, { "name": "awkward_ListArray_broadcast_tooffsets", "status": false, diff --git a/src/awkward/_connect/cuda/__init__.py b/src/awkward/_connect/cuda/__init__.py index df44325b91..8bdea49d6d 100644 --- a/src/awkward/_connect/cuda/__init__.py +++ b/src/awkward/_connect/cuda/__init__.py @@ -86,6 +86,8 @@ def fetch_template_specializations(kernel_dict): "awkward_IndexedArray_getitem_nextcarry_outindex", "awkward_ListArray_getitem_next_range_counts", "awkward_IndexedArray_index_of_nulls", + "awkward_IndexedArray_ranges_next_64", + "awkward_IndexedArray_ranges_carry_next_64", "awkward_IndexedArray_reduce_next_64", "awkward_IndexedArray_reduce_next_nonlocal_nextshifts_64", "awkward_IndexedArray_reduce_next_nonlocal_nextshifts_fromshifts_64", diff --git a/src/awkward/_connect/cuda/cuda_kernels/awkward_IndexedArray_ranges_carry_next_64.cu b/src/awkward/_connect/cuda/cuda_kernels/awkward_IndexedArray_ranges_carry_next_64.cu new file mode 100644 index 0000000000..2410e88ca0 --- /dev/null +++ b/src/awkward/_connect/cuda/cuda_kernels/awkward_IndexedArray_ranges_carry_next_64.cu @@ -0,0 +1,67 @@ +// BSD 3-Clause License; see https://github.com/scikit-hep/awkward-1.0/blob/main/LICENSE + +// BEGIN PYTHON +// def f(grid, block, args): +// (index, fromstarts, fromstops, length, tocarry, invocation_index, err_code) = args +// scan_in_array = cupy.empty_like(index, dtype=cupy.int64) +// cuda_kernel_templates.get_function(fetch_specialization(["awkward_IndexedArray_ranges_carry_next_64_a", index.dtype, fromstarts.dtype, fromstops.dtype, tocarry.dtype]))(grid, block, (index, fromstarts, fromstops, length, tocarry, scan_in_array, invocation_index, err_code)) +// scan_in_array = cupy.cumsum(scan_in_array) +// cuda_kernel_templates.get_function(fetch_specialization(["awkward_IndexedArray_ranges_carry_next_64_b", index.dtype, fromstarts.dtype, fromstops.dtype, tocarry.dtype]))(grid, block, (index, fromstarts, fromstops, length, tocarry, scan_in_array, invocation_index, err_code)) +// out["awkward_IndexedArray_ranges_carry_next_64_a", {dtype_specializations}] = None +// out["awkward_IndexedArray_ranges_carry_next_64_b", {dtype_specializations}] = None +// END PYTHON + +template +__global__ void +awkward_IndexedArray_ranges_carry_next_64_a( + T* index, + const C* fromstarts, + const U* fromstops, + int64_t length, + V* tocarry, + int64_t* scan_in_array, + uint64_t invocation_index, + uint64_t* err_code) { + if (err_code[0] == NO_ERROR) { + int64_t thread_id = blockIdx.x * blockDim.x + threadIdx.x; + int64_t stride = 0; + + if (thread_id < length) { + stride = fromstops[thread_id] - fromstarts[thread_id]; + for (int64_t j = 0; j < stride; j++) { + if (!(index[fromstarts[thread_id] + j] < 0)) { + scan_in_array[fromstarts[thread_id] + j] = 1; + } + else { + scan_in_array[fromstarts[thread_id] + j] = 0; + } + } + } + } +} + +template +__global__ void +awkward_IndexedArray_ranges_carry_next_64_b( + T* index, + const C* fromstarts, + const U* fromstops, + int64_t length, + V* tocarry, + int64_t* scan_in_array, + uint64_t invocation_index, + uint64_t* err_code) { + if (err_code[0] == NO_ERROR) { + int64_t thread_id = blockIdx.x * blockDim.x + threadIdx.x; + int64_t stride = 0; + + if (thread_id < length) { + stride = fromstops[thread_id] - fromstarts[thread_id]; + for (int64_t j = 0; j < stride; j++) { + if (!(index[fromstarts[thread_id] + j] < 0)) { + tocarry[scan_in_array[fromstarts[thread_id] + j] - 1] = index[fromstarts[thread_id] + j]; + } + } + } + } +} diff --git a/src/awkward/_connect/cuda/cuda_kernels/awkward_IndexedArray_ranges_next_64.cu b/src/awkward/_connect/cuda/cuda_kernels/awkward_IndexedArray_ranges_next_64.cu new file mode 100644 index 0000000000..a420f845e0 --- /dev/null +++ b/src/awkward/_connect/cuda/cuda_kernels/awkward_IndexedArray_ranges_next_64.cu @@ -0,0 +1,74 @@ +// BSD 3-Clause License; see https://github.com/scikit-hep/awkward-1.0/blob/main/LICENSE + +// BEGIN PYTHON +// def f(grid, block, args): +// (index, fromstarts, fromstops, length, tostarts, tostops, tolength, invocation_index, err_code) = args +// scan_in_array = cupy.empty_like(index, dtype=cupy.int64) +// cuda_kernel_templates.get_function(fetch_specialization(["awkward_IndexedArray_ranges_next_64_a", index.dtype, fromstarts.dtype, fromstops.dtype, tostarts.dtype, tostops.dtype, tolength.dtype]))(grid, block, (index, fromstarts, fromstops, length, tostarts, tostops, tolength, scan_in_array, invocation_index, err_code)) +// scan_in_array = cupy.cumsum(scan_in_array) +// cuda_kernel_templates.get_function(fetch_specialization(["awkward_IndexedArray_ranges_next_64_b", index.dtype, fromstarts.dtype, fromstops.dtype, tostarts.dtype, tostops.dtype, tolength.dtype]))(grid, block, (index, fromstarts, fromstops, length, tostarts, tostops, tolength, scan_in_array, invocation_index, err_code)) +// out["awkward_IndexedArray_ranges_next_64_a", {dtype_specializations}] = None +// out["awkward_IndexedArray_ranges_next_64_b", {dtype_specializations}] = None +// END PYTHON + +template +__global__ void +awkward_IndexedArray_ranges_next_64_a( + const T* index, + const C* fromstarts, + const U* fromstops, + int64_t length, + V* tostarts, + W* tostops, + X* tolength, + int64_t* scan_in_array, + uint64_t invocation_index, + uint64_t* err_code) { + if (err_code[0] == NO_ERROR) { + int64_t thread_id = blockIdx.x * blockDim.x + threadIdx.x; + int64_t stride = 0; + + if (thread_id < length) { + stride = fromstops[thread_id] - fromstarts[thread_id]; + for (int64_t j = 0; j < stride; j++) { + if (!(index[fromstarts[thread_id] + j] < 0)) { + scan_in_array[fromstarts[thread_id] + j] = 1; + } + else { + scan_in_array[fromstarts[thread_id] + j] = 0; + } + } + } + } +} + +template +__global__ void +awkward_IndexedArray_ranges_next_64_b( + const T* index, + const C* fromstarts, + const U* fromstops, + int64_t length, + V* tostarts, + W* tostops, + X* tolength, + int64_t* scan_in_array, + uint64_t invocation_index, + uint64_t* err_code) { + if (err_code[0] == NO_ERROR) { + int64_t thread_id = blockIdx.x * blockDim.x + threadIdx.x; + int64_t stride = 0; + + *tolength = length > 0 ? scan_in_array[fromstops[length - 1] - 1] : 0; + + if (thread_id < length) { + stride = fromstops[thread_id] - fromstarts[thread_id]; + tostarts[thread_id] = scan_in_array[fromstarts[thread_id] - 1]; + for (int64_t j = 0; j < stride; j++) { + if (!(index[fromstarts[thread_id] + j] < 0)) { + } + } + tostops[thread_id] = scan_in_array[fromstops[thread_id] - 1]; + } + } +} From 8c7e066d7667330ebaeb1525193054266e9da731 Mon Sep 17 00:00:00 2001 From: ManasviGoyal Date: Mon, 5 Feb 2024 14:24:47 +0100 Subject: [PATCH 02/19] fix: spec kernel errors --- kernel-specification.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/kernel-specification.yml b/kernel-specification.yml index 4fbc49edb8..6a74b8009a 100644 --- a/kernel-specification.yml +++ b/kernel-specification.yml @@ -1001,10 +1001,10 @@ kernels: stride = fromstops[i] - fromstarts[i] tostarts[i] = k for j in range(stride): - if index[fromstarts[i] + j] > 0: + if index[fromstarts[i] + j] >= 0: k = k + 1 tostops[i] = k - tolength = k + tolength[0] = k automatic-tests: false @@ -1040,7 +1040,7 @@ kernels: for i in range(length): stride = fromstops[i] - fromstarts[i] for j in range(stride): - if index[fromstarts[i] + j] > 0: + if index[fromstarts[i] + j] >= 0: tocarry[k] = index[fromstarts[i] + j] k = k + 1 automatic-tests: false From aa653c506ad0653002628bcb34524d8a563ebe2c Mon Sep 17 00:00:00 2001 From: ManasviGoyal Date: Mon, 5 Feb 2024 16:58:23 +0100 Subject: [PATCH 03/19] feat: add awkward_ListArray_broadcast_tooffsets --- dev/generate-kernel-signatures.py | 1 + dev/generate-tests.py | 1 + kernel-test-data.json | 72 +++++++++++++- src/awkward/_connect/cuda/__init__.py | 1 + .../awkward_ListArray_broadcast_tooffsets.cu | 95 +++++++++++++++++++ 5 files changed, 169 insertions(+), 1 deletion(-) create mode 100644 src/awkward/_connect/cuda/cuda_kernels/awkward_ListArray_broadcast_tooffsets.cu diff --git a/dev/generate-kernel-signatures.py b/dev/generate-kernel-signatures.py index 560d851a6f..f6b8a0aab4 100644 --- a/dev/generate-kernel-signatures.py +++ b/dev/generate-kernel-signatures.py @@ -16,6 +16,7 @@ "awkward_ListArray_min_range", "awkward_ListArray_validity", "awkward_BitMaskedArray_to_ByteMaskedArray", + "awkward_ListArray_broadcast_tooffsets", "awkward_ListArray_compact_offsets", "awkward_ListOffsetArray_flatten_offsets", "awkward_IndexedArray_overlay_mask", diff --git a/dev/generate-tests.py b/dev/generate-tests.py index fa8f3f0906..ee454d7811 100644 --- a/dev/generate-tests.py +++ b/dev/generate-tests.py @@ -680,6 +680,7 @@ def gencpuunittests(specdict): "awkward_ListArray_min_range", "awkward_ListArray_validity", "awkward_BitMaskedArray_to_ByteMaskedArray", + "awkward_ListArray_broadcast_tooffsets", "awkward_ListArray_compact_offsets", "awkward_ListOffsetArray_flatten_offsets", "awkward_IndexedArray_overlay_mask", diff --git a/kernel-test-data.json b/kernel-test-data.json index 8b4824eecf..7ec5cd264e 100644 --- a/kernel-test-data.json +++ b/kernel-test-data.json @@ -2530,8 +2530,78 @@ }, { "name": "awkward_ListArray_broadcast_tooffsets", - "status": false, + "status": true, "tests": [ + { + "error": false, + "message": "", + "inputs": { + "fromoffsets": [], + "fromstarts": [], + "fromstops": [], + "lencontent": 0, + "offsetslength": 0 + }, + "outputs": { + "tocarry": [] + } + }, + { + "error": false, + "message": "", + "inputs": { + "fromoffsets": [0, 1], + "fromstarts": [0], + "fromstops": [1], + "lencontent": 1, + "offsetslength": 2 + }, + "outputs": { + "tocarry": [0] + } + }, + { + "error": true, + "message": "stops[i] > len(content)", + "inputs": { + "fromoffsets": [0, 1], + "fromstarts": [0], + "fromstops": [2], + "lencontent": 1, + "offsetslength": 2 + }, + "outputs": { + "tocarry": [] + } + }, + { + "error": true, + "message": "broadcast's offsets must be monotonically increasing", + "inputs": { + "fromoffsets": [2, 1], + "fromstarts": [0], + "fromstops": [1], + "lencontent": 1, + "offsetslength": 2 + }, + "outputs": { + "tocarry": [] + } + }, + { + "error": true, + "message": "cannot broadcast nested list", + "inputs": { + "fromoffsets": [0, 2], + "fromstarts": [0], + "fromstops": [1], + "lencontent": 1, + "offsetslength": 2 + }, + "outputs": { + "tocarry": [] + } + }, { "error": false, "message": "", diff --git a/src/awkward/_connect/cuda/__init__.py b/src/awkward/_connect/cuda/__init__.py index 8bdea49d6d..8b3c77835d 100644 --- a/src/awkward/_connect/cuda/__init__.py +++ b/src/awkward/_connect/cuda/__init__.py @@ -92,6 +92,7 @@ def fetch_template_specializations(kernel_dict): "awkward_IndexedArray_reduce_next_nonlocal_nextshifts_64", "awkward_IndexedArray_reduce_next_nonlocal_nextshifts_fromshifts_64", "awkward_IndexedOptionArray_rpad_and_clip_mask_axis1", + "awkward_ListArray_broadcast_tooffsets", "awkward_ListArray_compact_offsets", "awkward_ListArray_getitem_jagged_carrylen", "awkward_ListArray_min_range", diff --git a/src/awkward/_connect/cuda/cuda_kernels/awkward_ListArray_broadcast_tooffsets.cu b/src/awkward/_connect/cuda/cuda_kernels/awkward_ListArray_broadcast_tooffsets.cu new file mode 100644 index 0000000000..d6c7f7289c --- /dev/null +++ b/src/awkward/_connect/cuda/cuda_kernels/awkward_ListArray_broadcast_tooffsets.cu @@ -0,0 +1,95 @@ +// BSD 3-Clause License; see https://github.com/scikit-hep/awkward-1.0/blob/main/LICENSE + +// BEGIN PYTHON +// def f(grid, block, args): +// (tocarry, fromoffsets, offsetslength, fromstarts, fromstops, lencontent, invocation_index, err_code) = args +// if offsetslength > 0: +// len_array = int(fromoffsets[offsetslength - 1]) +// else: +// len_array = 0 +// scan_in_array = cupy.empty(len_array, dtype=cupy.int64) +// cuda_kernel_templates.get_function(fetch_specialization(["awkward_ListArray_broadcast_tooffsets_a", tocarry.dtype, fromoffsets.dtype, fromstarts.dtype, fromstops.dtype]))(grid, block, (tocarry, fromoffsets, offsetslength, fromstarts, fromstops, lencontent, scan_in_array, invocation_index, err_code)) +// scan_in_array = cupy.cumsum(scan_in_array) +// cuda_kernel_templates.get_function(fetch_specialization(["awkward_ListArray_broadcast_tooffsets_b", tocarry.dtype, fromoffsets.dtype, fromstarts.dtype, fromstops.dtype]))(grid, block, (tocarry, fromoffsets, offsetslength, fromstarts, fromstops, lencontent, scan_in_array, invocation_index, err_code)) +// out["awkward_ListArray_broadcast_tooffsets_a", {dtype_specializations}] = None +// out["awkward_ListArray_broadcast_tooffsets_b", {dtype_specializations}] = None +// END PYTHON + +enum class LISTARRAY_BROADCAST_TOOFFSETS_ERRORS { + STOP_GET_LEN, // message: "stops[i] > len(content)" + OFF_DEC, // message: "broadcast's offsets must be monotonically increasing" + NESTED_ERR, // message: "cannot broadcast nested list" +}; + +template +__global__ void +awkward_ListArray_broadcast_tooffsets_a( + T* tocarry, + const C* fromoffsets, + int64_t offsetslength, + const U* fromstarts, + const V* fromstops, + int64_t lencontent, + int64_t* scan_in_array, + uint64_t invocation_index, + uint64_t* err_code) { + if (err_code[0] == NO_ERROR) { + int64_t thread_id = blockIdx.x * blockDim.x + threadIdx.x; + + if (thread_id < offsetslength - 1) { + int64_t start = (int64_t)fromstarts[thread_id]; + int64_t stop = (int64_t)fromstops[thread_id]; + + if (start != stop && stop > lencontent) { + RAISE_ERROR(LISTARRAY_BROADCAST_TOOFFSETS_ERRORS::STOP_GET_LEN) + } + int64_t count = (fromoffsets[thread_id + 1] - fromoffsets[thread_id]); + if (count < 0) { + RAISE_ERROR(LISTARRAY_BROADCAST_TOOFFSETS_ERRORS::OFF_DEC) + } + if (stop - start != count) { + RAISE_ERROR(LISTARRAY_BROADCAST_TOOFFSETS_ERRORS::NESTED_ERR) + } + for (int64_t j = start; j < stop; j++) { + scan_in_array[fromoffsets[thread_id] + j - start] = 1; + } + } + } +} + +template +__global__ void +awkward_ListArray_broadcast_tooffsets_b( + T* tocarry, + const C* fromoffsets, + int64_t offsetslength, + const U* fromstarts, + const V* fromstops, + int64_t lencontent, + int64_t* scan_in_array, + uint64_t invocation_index, + uint64_t* err_code) { + if (err_code[0] == NO_ERROR) { + int64_t thread_id = blockIdx.x * blockDim.x + threadIdx.x; + + if (thread_id < offsetslength - 1) { + int64_t start = (int64_t)fromstarts[thread_id]; + int64_t stop = (int64_t)fromstops[thread_id]; + + if (start != stop && stop > lencontent) { + RAISE_ERROR(LISTARRAY_BROADCAST_TOOFFSETS_ERRORS::STOP_GET_LEN) + } + int64_t count = (int64_t)(fromoffsets[thread_id + 1] - fromoffsets[thread_id]); + if (count < 0) { + RAISE_ERROR(LISTARRAY_BROADCAST_TOOFFSETS_ERRORS::OFF_DEC) + } + if (stop - start != count) { + RAISE_ERROR(LISTARRAY_BROADCAST_TOOFFSETS_ERRORS::NESTED_ERR) + } + + for (int64_t j = start; j < stop; j++) { + tocarry[scan_in_array[fromoffsets[thread_id] + j - start] - 1] = (T)j; + } + } + } +} From 061952502c583726b93d9cd569da954a61a84f7e Mon Sep 17 00:00:00 2001 From: ManasviGoyal Date: Mon, 5 Feb 2024 17:25:09 +0100 Subject: [PATCH 04/19] fix: awkward_ListArray_compact_offsets kernel --- kernel-test-data.json | 26 ++++++++++++++- .../awkward_ListArray_compact_offsets.cu | 33 +++++++++++++++++-- 2 files changed, 55 insertions(+), 4 deletions(-) diff --git a/kernel-test-data.json b/kernel-test-data.json index 7ec5cd264e..dab25f8ff3 100644 --- a/kernel-test-data.json +++ b/kernel-test-data.json @@ -3922,8 +3922,32 @@ }, { "name": "awkward_ListArray_compact_offsets", - "status": false, + "status": true, "tests": [ + { + "error": false, + "message": "", + "inputs": { + "fromstarts": [], + "fromstops": [], + "length": 0 + }, + "outputs": { + "tooffsets": [0] + } + }, + { + "error": false, + "message": "", + "inputs": { + "fromstarts": [1], + "fromstops": [1], + "length": 1 + }, + "outputs": { + "tooffsets": [0, 0] + } + }, { "error": true, "message": "stops[i] < starts[i]", diff --git a/src/awkward/_connect/cuda/cuda_kernels/awkward_ListArray_compact_offsets.cu b/src/awkward/_connect/cuda/cuda_kernels/awkward_ListArray_compact_offsets.cu index feec5ecd4c..73d14b5b52 100644 --- a/src/awkward/_connect/cuda/cuda_kernels/awkward_ListArray_compact_offsets.cu +++ b/src/awkward/_connect/cuda/cuda_kernels/awkward_ListArray_compact_offsets.cu @@ -7,9 +7,12 @@ enum class LISTARRAY_COMPACT_OFFSETS_ERRORS { // BEGIN PYTHON // def f(grid, block, args): // (tooffsets, fromstarts, fromstops, length, invocation_index, err_code) = args -// cuda_kernel_templates.get_function(fetch_specialization(["awkward_ListArray_compact_offsets_a", tooffsets.dtype, fromstarts.dtype, fromstops.dtype]))(grid, block, (tooffsets, fromstarts, fromstops, length, invocation_index, err_code)) -// tooffsets = inclusive_scan(grid, block, (tooffsets, invocation_index, err_code)) +// scan_in_array = cupy.empty(length, dtype=cupy.int64) +// cuda_kernel_templates.get_function(fetch_specialization(["awkward_ListArray_compact_offsets_a", tooffsets.dtype, fromstarts.dtype, fromstops.dtype]))(grid, block, (tooffsets, fromstarts, fromstops, length, scan_in_array, invocation_index, err_code)) +// scan_in_array = cupy.cumsum(scan_in_array) +// cuda_kernel_templates.get_function(fetch_specialization(["awkward_ListArray_compact_offsets_b", tooffsets.dtype, fromstarts.dtype, fromstops.dtype]))(grid, block, (tooffsets, fromstarts, fromstops, length, scan_in_array, invocation_index, err_code)) // out["awkward_ListArray_compact_offsets_a", {dtype_specializations}] = None +// out["awkward_ListArray_compact_offsets_b", {dtype_specializations}] = None // END PYTHON template @@ -19,6 +22,30 @@ awkward_ListArray_compact_offsets_a( const C* fromstarts, const U* fromstops, int64_t length, + int64_t* scan_in_array, + uint64_t invocation_index, + uint64_t* err_code) { + if (err_code[0] == NO_ERROR) { + int64_t thread_id = blockIdx.x * blockDim.x + threadIdx.x; + if (thread_id < length) { + C start = fromstarts[thread_id]; + U stop = fromstops[thread_id]; + if (stop < start) { + RAISE_ERROR(LISTARRAY_COMPACT_OFFSETS_ERRORS::ERROR_START_STOP) + } + scan_in_array[thread_id] = (stop - start); + } + } +} + +template +__global__ void +awkward_ListArray_compact_offsets_b( + T* tooffsets, + const C* fromstarts, + const U* fromstops, + int64_t length, + int64_t* scan_in_array, uint64_t invocation_index, uint64_t* err_code) { if (err_code[0] == NO_ERROR) { @@ -30,7 +57,7 @@ awkward_ListArray_compact_offsets_a( if (stop < start) { RAISE_ERROR(LISTARRAY_COMPACT_OFFSETS_ERRORS::ERROR_START_STOP) } - tooffsets[thread_id + 1] = (stop - start); + tooffsets[thread_id + 1] = scan_in_array[thread_id]; } } } From 6701dc45ab67c0aea363f0e00f674e5a41a17e13 Mon Sep 17 00:00:00 2001 From: ManasviGoyal Date: Mon, 5 Feb 2024 17:25:35 +0100 Subject: [PATCH 05/19] test: remove XFAIL --- tests-cuda/test_1276_cuda_transfers.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/tests-cuda/test_1276_cuda_transfers.py b/tests-cuda/test_1276_cuda_transfers.py index 8886fd7b70..6a9e44316a 100644 --- a/tests-cuda/test_1276_cuda_transfers.py +++ b/tests-cuda/test_1276_cuda_transfers.py @@ -115,7 +115,6 @@ def test_tocuda_unimplementedkernels5(): assert ak.to_list(copyback_regulararray) == ak.to_list(regulararray) -@pytest.mark.xfail(reason="awkward_ListArray_broadcast_tooffsets is not implemented") def test_tocuda_unimplementedkernels6(): content = ak.contents.NumpyArray( np.array([0.0, 1.1, 2.2, 3.3, 4.4, 5.5, 6.6, 7.7, 8.8, 9.9, 10.10]) @@ -155,7 +154,6 @@ def test_tocuda_unimplementedkernels7(): assert ak.to_list(copyback_recordarray) == ak.to_list(recordarray) -@pytest.mark.xfail(reason="awkward_ListArray_broadcast_tooffsets is not implemented") def test_tocuda_unimplementedkernels8(): content0 = ak.Array([[1.1, 2.2, 3.3], [], [4.4, 5.5]]).layout content1 = ak.Array( @@ -201,7 +199,6 @@ def test_tocuda_unimplementedkernels10(): assert ak.to_list(copyback_bytemaskedarray) == ak.to_list(bytemaskedarray) -@pytest.mark.xfail(reason="awkward_ListArray_broadcast_tooffsets is not implemented") def test_tocuda_unimplementedkernels11(): content = ak.contents.NumpyArray( np.array([0.0, 1.1, 2.2, 3.3, 4.4, 5.5, 6.6, 7.7, 8.8, 9.9, 10.10]) From 5f6f699690ccdab9867482f13d262837e9728d9b Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 5 Feb 2024 16:30:11 +0000 Subject: [PATCH 06/19] style: pre-commit fixes --- .../cuda_kernels/awkward_ListArray_broadcast_tooffsets.cu | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/awkward/_connect/cuda/cuda_kernels/awkward_ListArray_broadcast_tooffsets.cu b/src/awkward/_connect/cuda/cuda_kernels/awkward_ListArray_broadcast_tooffsets.cu index d6c7f7289c..1b393145af 100644 --- a/src/awkward/_connect/cuda/cuda_kernels/awkward_ListArray_broadcast_tooffsets.cu +++ b/src/awkward/_connect/cuda/cuda_kernels/awkward_ListArray_broadcast_tooffsets.cu @@ -39,7 +39,7 @@ awkward_ListArray_broadcast_tooffsets_a( if (thread_id < offsetslength - 1) { int64_t start = (int64_t)fromstarts[thread_id]; int64_t stop = (int64_t)fromstops[thread_id]; - + if (start != stop && stop > lencontent) { RAISE_ERROR(LISTARRAY_BROADCAST_TOOFFSETS_ERRORS::STOP_GET_LEN) } @@ -75,7 +75,7 @@ awkward_ListArray_broadcast_tooffsets_b( if (thread_id < offsetslength - 1) { int64_t start = (int64_t)fromstarts[thread_id]; int64_t stop = (int64_t)fromstops[thread_id]; - + if (start != stop && stop > lencontent) { RAISE_ERROR(LISTARRAY_BROADCAST_TOOFFSETS_ERRORS::STOP_GET_LEN) } From e6dc15bd01b36bf26460b491abe48c31b34e4489 Mon Sep 17 00:00:00 2001 From: ManasviGoyal Date: Tue, 6 Feb 2024 13:43:26 +0100 Subject: [PATCH 07/19] feat: add awkward_ListArray_getitem_jagged_descend.cu --- dev/generate-kernel-signatures.py | 1 + dev/generate-tests.py | 1 + kernel-test-data.json | 44 ++++++++++- src/awkward/_connect/cuda/__init__.py | 1 + .../awkward_ListArray_compact_offsets.cu | 8 +- ...wkward_ListArray_getitem_jagged_descend.cu | 78 +++++++++++++++++++ 6 files changed, 128 insertions(+), 5 deletions(-) create mode 100644 src/awkward/_connect/cuda/cuda_kernels/awkward_ListArray_getitem_jagged_descend.cu diff --git a/dev/generate-kernel-signatures.py b/dev/generate-kernel-signatures.py index f6b8a0aab4..7b4f8a53f6 100644 --- a/dev/generate-kernel-signatures.py +++ b/dev/generate-kernel-signatures.py @@ -53,6 +53,7 @@ "awkward_RegularArray_reduce_nonlocal_preparenext", "awkward_missing_repeat", "awkward_RegularArray_getitem_jagged_expand", + "awkward_ListArray_getitem_jagged_descend", "awkward_ListArray_getitem_jagged_expand", "awkward_ListArray_getitem_jagged_carrylen", "awkward_ListArray_getitem_next_array_advanced", diff --git a/dev/generate-tests.py b/dev/generate-tests.py index ee454d7811..639800e9a4 100644 --- a/dev/generate-tests.py +++ b/dev/generate-tests.py @@ -717,6 +717,7 @@ def gencpuunittests(specdict): "awkward_RegularArray_reduce_nonlocal_preparenext", "awkward_missing_repeat", "awkward_RegularArray_getitem_jagged_expand", + "awkward_ListArray_getitem_jagged_descend", "awkward_ListArray_getitem_jagged_expand", "awkward_ListArray_getitem_jagged_carrylen", "awkward_ListArray_getitem_next_array_advanced", diff --git a/kernel-test-data.json b/kernel-test-data.json index dab25f8ff3..b38ab5c2c1 100644 --- a/kernel-test-data.json +++ b/kernel-test-data.json @@ -12818,8 +12818,50 @@ }, { "name": "awkward_ListArray_getitem_jagged_descend", - "status": false, + "status": true, "tests": [ + { + "error": false, + "message": "", + "inputs": { + "fromstarts": [], + "fromstops": [], + "sliceouterlen": 0, + "slicestarts": [], + "slicestops": [] + }, + "outputs": { + "tooffsets": [0] + } + }, + { + "error": false, + "message": "", + "inputs": { + "fromstarts": [0], + "fromstops": [2], + "sliceouterlen": 1, + "slicestarts": [0], + "slicestops": [2] + }, + "outputs": { + "tooffsets": [0, 2] + } + }, + { + "error": true, + "message": "jagged slice inner length differs from array inner length", + "inputs": { + "fromstarts": [0, 2], + "fromstops": [2, 4], + "sliceouterlen": 2, + "slicestarts": [0, 2], + "slicestops": [3, 4] + }, + "outputs": { + "tooffsets": [] + } + }, { "error": false, "message": "", diff --git a/src/awkward/_connect/cuda/__init__.py b/src/awkward/_connect/cuda/__init__.py index 8b3c77835d..fc996e1af7 100644 --- a/src/awkward/_connect/cuda/__init__.py +++ b/src/awkward/_connect/cuda/__init__.py @@ -95,6 +95,7 @@ def fetch_template_specializations(kernel_dict): "awkward_ListArray_broadcast_tooffsets", "awkward_ListArray_compact_offsets", "awkward_ListArray_getitem_jagged_carrylen", + "awkward_ListArray_getitem_jagged_descend", "awkward_ListArray_min_range", "awkward_ListArray_rpad_and_clip_length_axis1", "awkward_ListOffsetArray_reduce_nonlocal_nextstarts_64", diff --git a/src/awkward/_connect/cuda/cuda_kernels/awkward_ListArray_compact_offsets.cu b/src/awkward/_connect/cuda/cuda_kernels/awkward_ListArray_compact_offsets.cu index 73d14b5b52..cdb7a82cb4 100644 --- a/src/awkward/_connect/cuda/cuda_kernels/awkward_ListArray_compact_offsets.cu +++ b/src/awkward/_connect/cuda/cuda_kernels/awkward_ListArray_compact_offsets.cu @@ -1,9 +1,5 @@ // BSD 3-Clause License; see https://github.com/scikit-hep/awkward-1.0/blob/main/LICENSE -enum class LISTARRAY_COMPACT_OFFSETS_ERRORS { - ERROR_START_STOP, // message: "stops[i] < starts[i]" -}; - // BEGIN PYTHON // def f(grid, block, args): // (tooffsets, fromstarts, fromstops, length, invocation_index, err_code) = args @@ -15,6 +11,10 @@ enum class LISTARRAY_COMPACT_OFFSETS_ERRORS { // out["awkward_ListArray_compact_offsets_b", {dtype_specializations}] = None // END PYTHON +enum class LISTARRAY_COMPACT_OFFSETS_ERRORS { + ERROR_START_STOP, // message: "stops[i] < starts[i]" +}; + template __global__ void awkward_ListArray_compact_offsets_a( diff --git a/src/awkward/_connect/cuda/cuda_kernels/awkward_ListArray_getitem_jagged_descend.cu b/src/awkward/_connect/cuda/cuda_kernels/awkward_ListArray_getitem_jagged_descend.cu new file mode 100644 index 0000000000..eb2b6aa0a4 --- /dev/null +++ b/src/awkward/_connect/cuda/cuda_kernels/awkward_ListArray_getitem_jagged_descend.cu @@ -0,0 +1,78 @@ +// BSD 3-Clause License; see https://github.com/scikit-hep/awkward-1.0/blob/main/LICENSE + +// BEGIN PYTHON +// def f(grid, block, args): +// (tooffsets, slicestarts, slicestops, sliceouterlen, fromstarts, fromstops, invocation_index, err_code) = args +// scan_in_array = cupy.empty(sliceouterlen, dtype=cupy.int64) +// cuda_kernel_templates.get_function(fetch_specialization(["awkward_ListArray_getitem_jagged_descend_a", tooffsets.dtype, slicestarts.dtype, slicestops.dtype, fromstarts.dtype, fromstops.dtype]))(grid, block, (tooffsets, slicestarts, slicestops, sliceouterlen, fromstarts, fromstops, scan_in_array, invocation_index, err_code)) +// scan_in_array = cupy.cumsum(scan_in_array) +// cuda_kernel_templates.get_function(fetch_specialization(["awkward_ListArray_getitem_jagged_descend_b", tooffsets.dtype, slicestarts.dtype, slicestops.dtype, fromstarts.dtype, fromstops.dtype]))(grid, block, (tooffsets, slicestarts, slicestops, sliceouterlen, fromstarts, fromstops, scan_in_array, invocation_index, err_code)) +// out["awkward_ListArray_getitem_jagged_descend_a", {dtype_specializations}] = None +// out["awkward_ListArray_getitem_jagged_descend_b", {dtype_specializations}] = None +// END PYTHON + +enum class LISTARRAY_GETITEM_JAGGED_DESCEND { + INN_LEN_ERR, // message: "jagged slice inner length differs from array inner length" +}; + +template +__global__ void +awkward_ListArray_getitem_jagged_descend_a( + T* tooffsets, + const C* slicestarts, + const U* slicestops, + int64_t sliceouterlen, + const V* fromstarts, + const W* fromstops, + int64_t* scan_in_array, + uint64_t invocation_index, + uint64_t* err_code) { + if (err_code[0] == NO_ERROR) { + int64_t thread_id = blockIdx.x * blockDim.x + threadIdx.x; + if (thread_id < sliceouterlen) { + int64_t slicecount = (int64_t)(slicestops[thread_id] - + slicestarts[thread_id]); + int64_t count = (int64_t)(fromstops[thread_id] - + fromstarts[thread_id]); + if (slicecount != count) { + RAISE_ERROR(LISTARRAY_GETITEM_JAGGED_DESCEND::INN_LEN_ERR) + } + scan_in_array[thread_id] = (T)count; + } + } +} + +template +__global__ void +awkward_ListArray_getitem_jagged_descend_b( + T* tooffsets, + const C* slicestarts, + const U* slicestops, + int64_t sliceouterlen, + const V* fromstarts, + const W* fromstops, + int64_t* scan_in_array, + uint64_t invocation_index, + uint64_t* err_code) { +if (err_code[0] == NO_ERROR) { + int64_t thread_id = blockIdx.x * blockDim.x + threadIdx.x; + + if (sliceouterlen == 0) { + tooffsets[0] = 0; + } + else { + tooffsets[0] = slicestarts[0]; + } + + if (thread_id < sliceouterlen) { + int64_t slicecount = (int64_t)(slicestops[thread_id] - + slicestarts[thread_id]); + int64_t count = (int64_t)(fromstops[thread_id] - + fromstarts[thread_id]); + if (slicecount != count) { + RAISE_ERROR(LISTARRAY_GETITEM_JAGGED_DESCEND::INN_LEN_ERR) + } + tooffsets[thread_id + 1] = tooffsets[0] + scan_in_array[thread_id]; + } +} +} From a5671fd57dbd4660b105f5643f679e29cbf6ec3f Mon Sep 17 00:00:00 2001 From: ManasviGoyal Date: Tue, 6 Feb 2024 14:47:41 +0100 Subject: [PATCH 08/19] feat: add awkward_ListArray_getitem_jagged_numvalid --- dev/generate-kernel-signatures.py | 3 +- dev/generate-tests.py | 3 +- kernel-specification.yml | 3 +- kernel-test-data.json | 90 +++++++++++++++++++ src/awkward/_connect/cuda/__init__.py | 1 + ...wkward_ListArray_getitem_jagged_descend.cu | 6 +- ...kward_ListArray_getitem_jagged_numvalid.cu | 68 ++++++++++++++ 7 files changed, 168 insertions(+), 6 deletions(-) create mode 100644 src/awkward/_connect/cuda/cuda_kernels/awkward_ListArray_getitem_jagged_numvalid.cu diff --git a/dev/generate-kernel-signatures.py b/dev/generate-kernel-signatures.py index 7b4f8a53f6..d29a0cd7d6 100644 --- a/dev/generate-kernel-signatures.py +++ b/dev/generate-kernel-signatures.py @@ -53,9 +53,10 @@ "awkward_RegularArray_reduce_nonlocal_preparenext", "awkward_missing_repeat", "awkward_RegularArray_getitem_jagged_expand", + "awkward_ListArray_getitem_jagged_carrylen", "awkward_ListArray_getitem_jagged_descend", "awkward_ListArray_getitem_jagged_expand", - "awkward_ListArray_getitem_jagged_carrylen", + "awkward_ListArray_getitem_jagged_numvalid", "awkward_ListArray_getitem_next_array_advanced", "awkward_ListArray_getitem_next_array", "awkward_ListArray_getitem_next_at", diff --git a/dev/generate-tests.py b/dev/generate-tests.py index 639800e9a4..a1ee3a09dc 100644 --- a/dev/generate-tests.py +++ b/dev/generate-tests.py @@ -717,9 +717,10 @@ def gencpuunittests(specdict): "awkward_RegularArray_reduce_nonlocal_preparenext", "awkward_missing_repeat", "awkward_RegularArray_getitem_jagged_expand", + "awkward_ListArray_getitem_jagged_carrylen", "awkward_ListArray_getitem_jagged_descend", "awkward_ListArray_getitem_jagged_expand", - "awkward_ListArray_getitem_jagged_carrylen", + "awkward_ListArray_getitem_jagged_numvalid", "awkward_ListArray_getitem_next_array_advanced", "awkward_ListArray_getitem_next_array", "awkward_ListArray_getitem_next_at", diff --git a/kernel-specification.yml b/kernel-specification.yml index 6a74b8009a..9b6bc649c4 100644 --- a/kernel-specification.yml +++ b/kernel-specification.yml @@ -1515,7 +1515,8 @@ kernels: if slicestop > missinglength: raise ValueError("jagged slice's offsets extend beyond its content") for j in range(slicestart, slicestop): - numvalid[0] = numvalid[0] + 1 if missing[j] >= 0 else 0 + if missing[j] >= 0: + numvalid[0] = numvalid[0] + 1 automatic-tests: false - name: awkward_ListArray_getitem_jagged_shrink diff --git a/kernel-test-data.json b/kernel-test-data.json index b38ab5c2c1..53dc58eda0 100644 --- a/kernel-test-data.json +++ b/kernel-test-data.json @@ -20433,6 +20433,96 @@ } ] }, + { + "name": "awkward_ListArray_getitem_jagged_numvalid", + "status": true, + "tests": [ + { + "error": false, + "message": "", + "inputs": { + "length": 0, + "missing": [], + "missinglength": 0, + "slicestarts": [], + "slicestops": [] + }, + "outputs": { + "numvalid": [0] + } + }, + { + "error": false, + "message": "", + "inputs": { + "length": 4, + "missing": [0, 0, 0, 0], + "missinglength": 4, + "slicestarts": [0, 2, 3, 3], + "slicestops": [2, 3, 3, 4] + }, + "outputs": { + "numvalid": [4] + } + }, + { + "error": false, + "message": "", + "inputs": { + "length": 4, + "missing": [0, -1, 0, -1, 0, -1, 0], + "missinglength": 7, + "slicestarts": [0, 2, 3, 5], + "slicestops": [2, 3, 5, 7] + }, + "outputs": { + "numvalid": [4] + } + }, + { + "error": false, + "message": "", + "inputs": { + "length": 4, + "missing": [-1, -1, -1, -1], + "missinglength": 4, + "slicestarts": [0, 2, 3, 3], + "slicestops": [2, 3, 3, 4] + }, + "outputs": { + "numvalid": [0] + } + }, + { + "error": true, + "message": "jagged slice's stops[i] < starts[i]", + "inputs": { + "length": 2, + "missing": [0, 0], + "missinglength": 2, + "slicestarts": [4, 2], + "slicestops": [2, 4] + }, + "outputs": { + "numvalid": [] + } + }, + { + "error": true, + "message": "jagged slice's offsets extend beyond its content", + "inputs": { + "length": 2, + "missing": [0], + "missinglength": 1, + "slicestarts": [0, 2], + "slicestops": [2, 3] + }, + "outputs": { + "numvalid": [] + } + } + ] + }, { "name": "awkward_ListArray_getitem_jagged_shrink", "status": false, diff --git a/src/awkward/_connect/cuda/__init__.py b/src/awkward/_connect/cuda/__init__.py index fc996e1af7..8f4a650d18 100644 --- a/src/awkward/_connect/cuda/__init__.py +++ b/src/awkward/_connect/cuda/__init__.py @@ -96,6 +96,7 @@ def fetch_template_specializations(kernel_dict): "awkward_ListArray_compact_offsets", "awkward_ListArray_getitem_jagged_carrylen", "awkward_ListArray_getitem_jagged_descend", + "awkward_ListArray_getitem_jagged_numvalid", "awkward_ListArray_min_range", "awkward_ListArray_rpad_and_clip_length_axis1", "awkward_ListOffsetArray_reduce_nonlocal_nextstarts_64", diff --git a/src/awkward/_connect/cuda/cuda_kernels/awkward_ListArray_getitem_jagged_descend.cu b/src/awkward/_connect/cuda/cuda_kernels/awkward_ListArray_getitem_jagged_descend.cu index eb2b6aa0a4..4a595c4060 100644 --- a/src/awkward/_connect/cuda/cuda_kernels/awkward_ListArray_getitem_jagged_descend.cu +++ b/src/awkward/_connect/cuda/cuda_kernels/awkward_ListArray_getitem_jagged_descend.cu @@ -11,7 +11,7 @@ // out["awkward_ListArray_getitem_jagged_descend_b", {dtype_specializations}] = None // END PYTHON -enum class LISTARRAY_GETITEM_JAGGED_DESCEND { +enum class LISTARRAY_GETITEM_JAGGED_DESCEND_ERRORS { INN_LEN_ERR, // message: "jagged slice inner length differs from array inner length" }; @@ -35,7 +35,7 @@ awkward_ListArray_getitem_jagged_descend_a( int64_t count = (int64_t)(fromstops[thread_id] - fromstarts[thread_id]); if (slicecount != count) { - RAISE_ERROR(LISTARRAY_GETITEM_JAGGED_DESCEND::INN_LEN_ERR) + RAISE_ERROR(LISTARRAY_GETITEM_JAGGED_DESCEND_ERRORS::INN_LEN_ERR) } scan_in_array[thread_id] = (T)count; } @@ -70,7 +70,7 @@ if (err_code[0] == NO_ERROR) { int64_t count = (int64_t)(fromstops[thread_id] - fromstarts[thread_id]); if (slicecount != count) { - RAISE_ERROR(LISTARRAY_GETITEM_JAGGED_DESCEND::INN_LEN_ERR) + RAISE_ERROR(LISTARRAY_GETITEM_JAGGED_DESCEND_ERRORS::INN_LEN_ERR) } tooffsets[thread_id + 1] = tooffsets[0] + scan_in_array[thread_id]; } diff --git a/src/awkward/_connect/cuda/cuda_kernels/awkward_ListArray_getitem_jagged_numvalid.cu b/src/awkward/_connect/cuda/cuda_kernels/awkward_ListArray_getitem_jagged_numvalid.cu new file mode 100644 index 0000000000..4fa04a78c5 --- /dev/null +++ b/src/awkward/_connect/cuda/cuda_kernels/awkward_ListArray_getitem_jagged_numvalid.cu @@ -0,0 +1,68 @@ +// BSD 3-Clause License; see https://github.com/scikit-hep/awkward-1.0/blob/main/LICENSE + +// BEGIN PYTHON +// def f(grid, block, args): +// (numvalid, slicestarts, slicestops, length, missing, missinglength, invocation_index, err_code) = args +// scan_in_array = cupy.empty(missinglength, dtype=cupy.int64) +// cuda_kernel_templates.get_function(fetch_specialization(["awkward_ListArray_getitem_jagged_numvalid_a", numvalid.dtype, slicestarts.dtype, slicestops.dtype, missing.dtype]))(grid, block, (numvalid, slicestarts, slicestops, length, missing, missinglength, scan_in_array, invocation_index, err_code)) +// scan_in_array = cupy.cumsum(scan_in_array) +// cuda_kernel_templates.get_function(fetch_specialization(["awkward_ListArray_getitem_jagged_numvalid_b", numvalid.dtype, slicestarts.dtype, slicestops.dtype, missing.dtype]))(grid, block, (numvalid, slicestarts, slicestops, length, missing, missinglength, scan_in_array, invocation_index, err_code)) +// out["awkward_ListArray_getitem_jagged_numvalid_a", {dtype_specializations}] = None +// out["awkward_ListArray_getitem_jagged_numvalid_b", {dtype_specializations}] = None +// END PYTHON + +enum class LISTARRAY_GETITEM_JAGGED_NUMVALID_ERRORS { + STOP_LT_START, // message: "jagged slice's stops[i] < starts[i]" + OFF_GET_CON, // message: "jagged slice's offsets extend beyond its content" +}; + +template +__global__ void +awkward_ListArray_getitem_jagged_numvalid_a( + T* numvalid, + const C* slicestarts, + const U* slicestops, + int64_t length, + const V* missing, + int64_t missinglength, + int64_t* scan_in_array, + uint64_t invocation_index, + uint64_t* err_code) { + if (err_code[0] == NO_ERROR) { + int64_t thread_id = blockIdx.x * blockDim.x + threadIdx.x; + + if (thread_id < length) { + C slicestart = slicestarts[thread_id]; + U slicestop = slicestops[thread_id]; + + if (slicestart != slicestop) { + if (slicestop < slicestart) { + RAISE_ERROR(LISTARRAY_GETITEM_JAGGED_NUMVALID_ERRORS::STOP_LT_START) + } + if (slicestop > missinglength) { + RAISE_ERROR(LISTARRAY_GETITEM_JAGGED_NUMVALID_ERRORS::OFF_GET_CON) + } + for (int64_t j = slicestart; j < slicestop; j++) { + scan_in_array[slicestarts[thread_id] + j - slicestart] = missing[j] >= 0 ? 1 : 0; + } + } + } + } +} + +template +__global__ void +awkward_ListArray_getitem_jagged_numvalid_b( + T* numvalid, + const C* slicestarts, + const U* slicestops, + int64_t length, + const V* missing, + int64_t missinglength, + int64_t* scan_in_array, + uint64_t invocation_index, + uint64_t* err_code) { + if (err_code[0] == NO_ERROR) { + *numvalid = length > 0 ? scan_in_array[missinglength - 1] : 0; + } +} From 9430c98d66d08338de5d77f62919d3cc30f1eb64 Mon Sep 17 00:00:00 2001 From: ManasviGoyal Date: Tue, 6 Feb 2024 14:55:35 +0100 Subject: [PATCH 09/19] feat: add awkward_ListArray_getitem_next_range_spreadadvanced --- dev/generate-kernel-signatures.py | 1 + dev/generate-tests.py | 1 + kernel-test-data.json | 2 +- ...Array_getitem_next_range_spreadadvanced.cu | 22 +++++++++++++++++++ 4 files changed, 25 insertions(+), 1 deletion(-) create mode 100644 src/awkward/_connect/cuda/cuda_kernels/awkward_ListArray_getitem_next_range_spreadadvanced.cu diff --git a/dev/generate-kernel-signatures.py b/dev/generate-kernel-signatures.py index d29a0cd7d6..7976c8d8b0 100644 --- a/dev/generate-kernel-signatures.py +++ b/dev/generate-kernel-signatures.py @@ -63,6 +63,7 @@ "awkward_ListArray_getitem_next_range_counts", "awkward_ListArray_rpad_and_clip_length_axis1", "awkward_ListOffsetArray_reduce_nonlocal_nextstarts_64", + "awkward_ListArray_getitem_next_range_spreadadvanced", "awkward_NumpyArray_reduce_adjust_starts_64", "awkward_NumpyArray_reduce_adjust_starts_shifts_64", "awkward_RegularArray_getitem_next_at", diff --git a/dev/generate-tests.py b/dev/generate-tests.py index a1ee3a09dc..f2f8938e40 100644 --- a/dev/generate-tests.py +++ b/dev/generate-tests.py @@ -727,6 +727,7 @@ def gencpuunittests(specdict): "awkward_ListArray_getitem_next_range_counts", "awkward_ListArray_rpad_and_clip_length_axis1", "awkward_ListOffsetArray_reduce_nonlocal_nextstarts_64", + "awkward_ListArray_getitem_next_range_spreadadvanced", "awkward_NumpyArray_reduce_adjust_starts_64", "awkward_NumpyArray_reduce_adjust_starts_shifts_64", "awkward_RegularArray_getitem_next_at", diff --git a/kernel-test-data.json b/kernel-test-data.json index 53dc58eda0..ab7fc21c14 100644 --- a/kernel-test-data.json +++ b/kernel-test-data.json @@ -9675,7 +9675,7 @@ "message": "", "inputs": { "fromadvanced": [], - "fromoffsets": [0], + "fromoffsets": [], "lenstarts": 0 }, "outputs": { diff --git a/src/awkward/_connect/cuda/cuda_kernels/awkward_ListArray_getitem_next_range_spreadadvanced.cu b/src/awkward/_connect/cuda/cuda_kernels/awkward_ListArray_getitem_next_range_spreadadvanced.cu new file mode 100644 index 0000000000..94e3ec5d07 --- /dev/null +++ b/src/awkward/_connect/cuda/cuda_kernels/awkward_ListArray_getitem_next_range_spreadadvanced.cu @@ -0,0 +1,22 @@ +// BSD 3-Clause License; see https://github.com/scikit-hep/awkward-1.0/blob/main/LICENSE + +template +__global__ void +awkward_ListArray_getitem_next_range_spreadadvanced( + T* toadvanced, + const C* fromadvanced, + const U* fromoffsets, + int64_t lenstarts, + uint64_t invocation_index, + uint64_t* err_code) { + if (err_code[0] == NO_ERROR) { + int64_t thread_id = blockIdx.x * blockDim.x + threadIdx.x; + if (thread_id < lenstarts) { + C count = fromoffsets[thread_id + 1] - fromoffsets[thread_id]; + int64_t j = threadIdx.x; + for (j = 0; j < count; j++) { + toadvanced[fromoffsets[thread_id] + j] = fromadvanced[thread_id]; + } + } + } +} From 01da944d3a08a0b4a22f61f08461c9cdc4ebcbcc Mon Sep 17 00:00:00 2001 From: ManasviGoyal Date: Tue, 6 Feb 2024 15:44:04 +0100 Subject: [PATCH 10/19] feat: add awkward_ListOffsetArray_rpad_length_axis1 --- dev/generate-kernel-signatures.py | 1 + dev/generate-tests.py | 1 + kernel-test-data.json | 58 +++++++++++++++++++ src/awkward/_connect/cuda/__init__.py | 1 + ...wkward_ListArray_getitem_jagged_descend.cu | 52 ++++++++--------- ...kward_ListOffsetArray_rpad_length_axis1.cu | 56 ++++++++++++++++++ 6 files changed, 143 insertions(+), 26 deletions(-) create mode 100644 src/awkward/_connect/cuda/cuda_kernels/awkward_ListOffsetArray_rpad_length_axis1.cu diff --git a/dev/generate-kernel-signatures.py b/dev/generate-kernel-signatures.py index 7976c8d8b0..eca7a316de 100644 --- a/dev/generate-kernel-signatures.py +++ b/dev/generate-kernel-signatures.py @@ -87,6 +87,7 @@ "awkward_IndexedArray_reduce_next_nonlocal_nextshifts_fromshifts_64", "awkward_IndexedOptionArray_rpad_and_clip_mask_axis1", "awkward_ListOffsetArray_rpad_and_clip_axis1", + "awkward_ListOffsetArray_rpad_length_axis1", # "awkward_ListOffsetArray_rpad_axis1", "awkward_MaskedArray_getitem_next_jagged_project", "awkward_UnionArray_project", diff --git a/dev/generate-tests.py b/dev/generate-tests.py index f2f8938e40..60a3667c89 100644 --- a/dev/generate-tests.py +++ b/dev/generate-tests.py @@ -751,6 +751,7 @@ def gencpuunittests(specdict): "awkward_IndexedArray_reduce_next_nonlocal_nextshifts_fromshifts_64", "awkward_IndexedOptionArray_rpad_and_clip_mask_axis1", "awkward_ListOffsetArray_rpad_and_clip_axis1", + "awkward_ListOffsetArray_rpad_length_axis1", # "awkward_ListOffsetArray_rpad_axis1", "awkward_MaskedArray_getitem_next_jagged_project", "awkward_UnionArray_project", diff --git a/kernel-test-data.json b/kernel-test-data.json index ab7fc21c14..be3ec54156 100644 --- a/kernel-test-data.json +++ b/kernel-test-data.json @@ -10217,6 +10217,64 @@ } ] }, + { + "name": "awkward_ListOffsetArray_rpad_length_axis1", + "status": true, + "tests": [ + { + "error": false, + "message": "", + "inputs": { + "fromoffsets": [], + "fromlength": 0, + "target": 0 + }, + "outputs": { + "tolength": [0], + "tooffsets": [0] + } + }, + { + "error": false, + "message": "", + "inputs": { + "fromoffsets": [1, 3], + "fromlength": 1, + "target": 3 + }, + "outputs": { + "tolength": [3], + "tooffsets": [0, 3] + } + }, + { + "error": false, + "message": "", + "inputs": { + "fromoffsets": [0, 1, 2, 3, 5, 7, 11], + "fromlength": 6, + "target": 3 + }, + "outputs": { + "tolength": [19], + "tooffsets": [0, 3, 6, 9, 12, 15, 19] + } + }, + { + "error": false, + "message": "", + "inputs": { + "fromoffsets": [0, 1, 2, 3, 4], + "fromlength": 4, + "target": 0 + }, + "outputs": { + "tolength": [4], + "tooffsets": [0, 1, 2, 3, 4] + } + } + ] + }, { "name": "awkward_MaskedArray_getitem_next_jagged_project", "status": true, diff --git a/src/awkward/_connect/cuda/__init__.py b/src/awkward/_connect/cuda/__init__.py index 8f4a650d18..5c9b1806af 100644 --- a/src/awkward/_connect/cuda/__init__.py +++ b/src/awkward/_connect/cuda/__init__.py @@ -100,6 +100,7 @@ def fetch_template_specializations(kernel_dict): "awkward_ListArray_min_range", "awkward_ListArray_rpad_and_clip_length_axis1", "awkward_ListOffsetArray_reduce_nonlocal_nextstarts_64", + "awkward_ListOffsetArray_rpad_length_axis1", "awkward_MaskedArray_getitem_next_jagged_project", "awkward_RegularArray_getitem_next_array_regularize", "awkward_RegularArray_reduce_local_nextparents", diff --git a/src/awkward/_connect/cuda/cuda_kernels/awkward_ListArray_getitem_jagged_descend.cu b/src/awkward/_connect/cuda/cuda_kernels/awkward_ListArray_getitem_jagged_descend.cu index 4a595c4060..a79b0f07f4 100644 --- a/src/awkward/_connect/cuda/cuda_kernels/awkward_ListArray_getitem_jagged_descend.cu +++ b/src/awkward/_connect/cuda/cuda_kernels/awkward_ListArray_getitem_jagged_descend.cu @@ -45,34 +45,34 @@ awkward_ListArray_getitem_jagged_descend_a( template __global__ void awkward_ListArray_getitem_jagged_descend_b( - T* tooffsets, - const C* slicestarts, - const U* slicestops, - int64_t sliceouterlen, - const V* fromstarts, - const W* fromstops, - int64_t* scan_in_array, - uint64_t invocation_index, - uint64_t* err_code) { -if (err_code[0] == NO_ERROR) { - int64_t thread_id = blockIdx.x * blockDim.x + threadIdx.x; + T* tooffsets, + const C* slicestarts, + const U* slicestops, + int64_t sliceouterlen, + const V* fromstarts, + const W* fromstops, + int64_t* scan_in_array, + uint64_t invocation_index, + uint64_t* err_code) { + if (err_code[0] == NO_ERROR) { + int64_t thread_id = blockIdx.x * blockDim.x + threadIdx.x; - if (sliceouterlen == 0) { - tooffsets[0] = 0; - } - else { - tooffsets[0] = slicestarts[0]; - } + if (sliceouterlen == 0) { + tooffsets[0] = 0; + } + else { + tooffsets[0] = slicestarts[0]; + } - if (thread_id < sliceouterlen) { - int64_t slicecount = (int64_t)(slicestops[thread_id] - - slicestarts[thread_id]); - int64_t count = (int64_t)(fromstops[thread_id] - - fromstarts[thread_id]); - if (slicecount != count) { - RAISE_ERROR(LISTARRAY_GETITEM_JAGGED_DESCEND_ERRORS::INN_LEN_ERR) + if (thread_id < sliceouterlen) { + int64_t slicecount = (int64_t)(slicestops[thread_id] - + slicestarts[thread_id]); + int64_t count = (int64_t)(fromstops[thread_id] - + fromstarts[thread_id]); + if (slicecount != count) { + RAISE_ERROR(LISTARRAY_GETITEM_JAGGED_DESCEND_ERRORS::INN_LEN_ERR) + } + tooffsets[thread_id + 1] = tooffsets[0] + scan_in_array[thread_id]; } - tooffsets[thread_id + 1] = tooffsets[0] + scan_in_array[thread_id]; } } -} diff --git a/src/awkward/_connect/cuda/cuda_kernels/awkward_ListOffsetArray_rpad_length_axis1.cu b/src/awkward/_connect/cuda/cuda_kernels/awkward_ListOffsetArray_rpad_length_axis1.cu new file mode 100644 index 0000000000..81c7aa3360 --- /dev/null +++ b/src/awkward/_connect/cuda/cuda_kernels/awkward_ListOffsetArray_rpad_length_axis1.cu @@ -0,0 +1,56 @@ +// BSD 3-Clause License; see https://github.com/scikit-hep/awkward-1.0/blob/main/LICENSE + +// BEGIN PYTHON +// def f(grid, block, args): +// (tooffsets, fromoffsets, fromlength, target, tolength, invocation_index, err_code) = args +// scan_in_array = cupy.empty(fromlength, dtype=cupy.int64) +// cuda_kernel_templates.get_function(fetch_specialization(["awkward_ListOffsetArray_rpad_length_axis1_a", tooffsets.dtype, fromoffsets.dtype, tolength.dtype]))(grid, block, (tooffsets, fromoffsets, fromlength, target, tolength, scan_in_array, invocation_index, err_code)) +// scan_in_array = cupy.cumsum(scan_in_array) +// cuda_kernel_templates.get_function(fetch_specialization(["awkward_ListOffsetArray_rpad_length_axis1_b", tooffsets.dtype, fromoffsets.dtype, tolength.dtype]))(grid, block, (tooffsets, fromoffsets, fromlength, target, tolength, scan_in_array, invocation_index, err_code)) +// out["awkward_ListOffsetArray_rpad_length_axis1_a", {dtype_specializations}] = None +// out["awkward_ListOffsetArray_rpad_length_axis1_b", {dtype_specializations}] = None +// END PYTHON + +template +__global__ void +awkward_ListOffsetArray_rpad_length_axis1_a( + T* tooffsets, + const C* fromoffsets, + int64_t fromlength, + int64_t target, + U* tolength, + int64_t* scan_in_array, + uint64_t invocation_index, + uint64_t* err_code) { + if (err_code[0] == NO_ERROR) { + int64_t thread_id = blockIdx.x * blockDim.x + threadIdx.x; + if (thread_id < fromlength) { + int64_t rangeval = fromoffsets[thread_id + 1] - fromoffsets[thread_id]; + int64_t longer = (target < rangeval) ? rangeval : target; + + scan_in_array[thread_id] = longer; + } + } +} + +template +__global__ void +awkward_ListOffsetArray_rpad_length_axis1_b( + T* tooffsets, + const C* fromoffsets, + int64_t fromlength, + int64_t target, + U* tolength, + int64_t* scan_in_array, + uint64_t invocation_index, + uint64_t* err_code) { + if (err_code[0] == NO_ERROR) { + int64_t thread_id = blockIdx.x * blockDim.x + threadIdx.x; + tooffsets[0] = 0; + + *tolength = fromlength > 0 ? scan_in_array[fromlength - 1] : 0; + if (thread_id < fromlength) { + tooffsets[thread_id + 1] = (T)scan_in_array[thread_id]; + } + } +} From 00a01e859ca8b99c8599f98391312980d00dfdbf Mon Sep 17 00:00:00 2001 From: ManasviGoyal Date: Wed, 7 Feb 2024 10:19:38 +0100 Subject: [PATCH 11/19] feat: add awkward_ListOffsetArray_toRegularArray.cpp --- ...awkward_ListOffsetArray_toRegularArray.cpp | 2 +- dev/generate-kernel-signatures.py | 1 + dev/generate-tests.py | 1 + kernel-test-data.json | 57 ++++++++++++++++++- ...kward_ListArray_getitem_jagged_numvalid.cu | 4 +- .../awkward_ListOffsetArray_toRegularArray.cu | 33 +++++++++++ 6 files changed, 94 insertions(+), 4 deletions(-) create mode 100644 src/awkward/_connect/cuda/cuda_kernels/awkward_ListOffsetArray_toRegularArray.cu diff --git a/awkward-cpp/src/cpu-kernels/awkward_ListOffsetArray_toRegularArray.cpp b/awkward-cpp/src/cpu-kernels/awkward_ListOffsetArray_toRegularArray.cpp index ecaf34f16f..1d39fe00ad 100644 --- a/awkward-cpp/src/cpu-kernels/awkward_ListOffsetArray_toRegularArray.cpp +++ b/awkward-cpp/src/cpu-kernels/awkward_ListOffsetArray_toRegularArray.cpp @@ -19,7 +19,7 @@ ERROR awkward_ListOffsetArray_toRegularArray( *size = count; } else if (*size != count) { - return failure("cannot convert to RegularArray because subarray lengths are not " "regular", i, kSliceNone, FILENAME(__LINE__)); + return failure("cannot convert to RegularArray because subarray lengths are not regular", i, kSliceNone, FILENAME(__LINE__)); } } if (*size == -1) { diff --git a/dev/generate-kernel-signatures.py b/dev/generate-kernel-signatures.py index eca7a316de..b59f121929 100644 --- a/dev/generate-kernel-signatures.py +++ b/dev/generate-kernel-signatures.py @@ -88,6 +88,7 @@ "awkward_IndexedOptionArray_rpad_and_clip_mask_axis1", "awkward_ListOffsetArray_rpad_and_clip_axis1", "awkward_ListOffsetArray_rpad_length_axis1", + "awkward_ListOffsetArray_toRegularArray", # "awkward_ListOffsetArray_rpad_axis1", "awkward_MaskedArray_getitem_next_jagged_project", "awkward_UnionArray_project", diff --git a/dev/generate-tests.py b/dev/generate-tests.py index 60a3667c89..54986aee98 100644 --- a/dev/generate-tests.py +++ b/dev/generate-tests.py @@ -752,6 +752,7 @@ def gencpuunittests(specdict): "awkward_IndexedOptionArray_rpad_and_clip_mask_axis1", "awkward_ListOffsetArray_rpad_and_clip_axis1", "awkward_ListOffsetArray_rpad_length_axis1", + "awkward_ListOffsetArray_toRegularArray", # "awkward_ListOffsetArray_rpad_axis1", "awkward_MaskedArray_getitem_next_jagged_project", "awkward_UnionArray_project", diff --git a/kernel-test-data.json b/kernel-test-data.json index be3ec54156..1a1b216d4d 100644 --- a/kernel-test-data.json +++ b/kernel-test-data.json @@ -10158,8 +10158,63 @@ }, { "name": "awkward_ListOffsetArray_toRegularArray", - "status": false, + "status": true, "tests": [ + { + "error": false, + "message": "", + "inputs": { + "fromoffsets": [], + "offsetslength": 0 + }, + "outputs": { + "size": [0] + } + }, + { + "error": false, + "message": "", + "inputs": { + "fromoffsets": [0], + "offsetslength": 1 + }, + "outputs": { + "size": [0] + } + }, + { + "error": true, + "message": "offsets must be monotonically increasing", + "inputs": { + "fromoffsets": [0, -1, -2], + "offsetslength": 3 + }, + "outputs": { + "size": [-1] + } + }, + { + "error": true, + "message": "cannot convert to RegularArray because subarray lengths are not regular", + "inputs": { + "fromoffsets": [0, 2, 5], + "offsetslength": 3 + }, + "outputs": { + "size": [-1] + } + }, + { + "error": false, + "message": "", + "inputs": { + "fromoffsets": [0, 0, 0, 0], + "offsetslength": 4 + }, + "outputs": { + "size": [0] + } + }, { "error": false, "message": "", diff --git a/src/awkward/_connect/cuda/cuda_kernels/awkward_ListArray_getitem_jagged_numvalid.cu b/src/awkward/_connect/cuda/cuda_kernels/awkward_ListArray_getitem_jagged_numvalid.cu index 4fa04a78c5..c11f2ae3e9 100644 --- a/src/awkward/_connect/cuda/cuda_kernels/awkward_ListArray_getitem_jagged_numvalid.cu +++ b/src/awkward/_connect/cuda/cuda_kernels/awkward_ListArray_getitem_jagged_numvalid.cu @@ -12,8 +12,8 @@ // END PYTHON enum class LISTARRAY_GETITEM_JAGGED_NUMVALID_ERRORS { - STOP_LT_START, // message: "jagged slice's stops[i] < starts[i]" - OFF_GET_CON, // message: "jagged slice's offsets extend beyond its content" + STOP_LT_START, // message: "jagged slice's stops[i] < starts[i]" + OFF_GET_CON, // message: "jagged slice's offsets extend beyond its content" }; template diff --git a/src/awkward/_connect/cuda/cuda_kernels/awkward_ListOffsetArray_toRegularArray.cu b/src/awkward/_connect/cuda/cuda_kernels/awkward_ListOffsetArray_toRegularArray.cu new file mode 100644 index 0000000000..d4423a6df3 --- /dev/null +++ b/src/awkward/_connect/cuda/cuda_kernels/awkward_ListOffsetArray_toRegularArray.cu @@ -0,0 +1,33 @@ +// BSD 3-Clause License; see https://github.com/scikit-hep/awkward-1.0/blob/main/LICENSE + +enum class LISTOFFSETARRAY_TOREGULARARRAY_ERRORS { + OFF_DEC, // message: "offsets must be monotonically increasing" + LEN_NOT_REG, // message: "cannot convert to RegularArray because subarray lengths are not regular" +}; + +template +__global__ void +awkward_ListOffsetArray_toRegularArray( + T* size, + const C* fromoffsets, + int64_t offsetslength, + uint64_t invocation_index, + uint64_t* err_code) { + if (err_code[0] == NO_ERROR) { + int64_t thread_id = blockIdx.x * blockDim.x + threadIdx.x; + *size = offsetslength > 1 ? (int64_t)fromoffsets[1] - (int64_t)fromoffsets[0] : -1; + + if (thread_id < offsetslength - 1) { + int64_t count = (int64_t)fromoffsets[thread_id + 1] - (int64_t)fromoffsets[thread_id]; + if (count < 0) { + RAISE_ERROR(LISTOFFSETARRAY_TOREGULARARRAY_ERRORS::OFF_DEC) + } + else if (*size != count) { + RAISE_ERROR(LISTOFFSETARRAY_TOREGULARARRAY_ERRORS::LEN_NOT_REG) + } + } + if (*size == -1) { + *size = 0; + } + } +} From 843463fb4e0134784db08e03704f9ff9c71a0290 Mon Sep 17 00:00:00 2001 From: ManasviGoyal Date: Wed, 7 Feb 2024 11:07:40 +0100 Subject: [PATCH 12/19] feat: add awkward_ListArray_localindex --- dev/generate-kernel-signatures.py | 1 + dev/generate-tests.py | 1 + kernel-test-data.json | 13 +++++++++++- ...Array_getitem_next_range_spreadadvanced.cu | 3 +-- .../awkward_ListArray_localindex.cu | 21 +++++++++++++++++++ 5 files changed, 36 insertions(+), 3 deletions(-) create mode 100644 src/awkward/_connect/cuda/cuda_kernels/awkward_ListArray_localindex.cu diff --git a/dev/generate-kernel-signatures.py b/dev/generate-kernel-signatures.py index b59f121929..5e712f2c26 100644 --- a/dev/generate-kernel-signatures.py +++ b/dev/generate-kernel-signatures.py @@ -64,6 +64,7 @@ "awkward_ListArray_rpad_and_clip_length_axis1", "awkward_ListOffsetArray_reduce_nonlocal_nextstarts_64", "awkward_ListArray_getitem_next_range_spreadadvanced", + "awkward_ListArray_localindex", "awkward_NumpyArray_reduce_adjust_starts_64", "awkward_NumpyArray_reduce_adjust_starts_shifts_64", "awkward_RegularArray_getitem_next_at", diff --git a/dev/generate-tests.py b/dev/generate-tests.py index 54986aee98..e8879fdf3b 100644 --- a/dev/generate-tests.py +++ b/dev/generate-tests.py @@ -728,6 +728,7 @@ def gencpuunittests(specdict): "awkward_ListArray_rpad_and_clip_length_axis1", "awkward_ListOffsetArray_reduce_nonlocal_nextstarts_64", "awkward_ListArray_getitem_next_range_spreadadvanced", + "awkward_ListArray_localindex", "awkward_NumpyArray_reduce_adjust_starts_64", "awkward_NumpyArray_reduce_adjust_starts_shifts_64", "awkward_RegularArray_getitem_next_at", diff --git a/kernel-test-data.json b/kernel-test-data.json index 1a1b216d4d..0782834c01 100644 --- a/kernel-test-data.json +++ b/kernel-test-data.json @@ -17565,8 +17565,19 @@ }, { "name": "awkward_ListArray_localindex", - "status": false, + "status": true, "tests": [ + { + "error": false, + "message": "", + "inputs": { + "length": 0, + "offsets": [0] + }, + "outputs": { + "toindex": [] + } + }, { "error": false, "message": "", diff --git a/src/awkward/_connect/cuda/cuda_kernels/awkward_ListArray_getitem_next_range_spreadadvanced.cu b/src/awkward/_connect/cuda/cuda_kernels/awkward_ListArray_getitem_next_range_spreadadvanced.cu index 94e3ec5d07..e43586dc00 100644 --- a/src/awkward/_connect/cuda/cuda_kernels/awkward_ListArray_getitem_next_range_spreadadvanced.cu +++ b/src/awkward/_connect/cuda/cuda_kernels/awkward_ListArray_getitem_next_range_spreadadvanced.cu @@ -13,8 +13,7 @@ awkward_ListArray_getitem_next_range_spreadadvanced( int64_t thread_id = blockIdx.x * blockDim.x + threadIdx.x; if (thread_id < lenstarts) { C count = fromoffsets[thread_id + 1] - fromoffsets[thread_id]; - int64_t j = threadIdx.x; - for (j = 0; j < count; j++) { + for (int64_t j = 0; j < count; j++) { toadvanced[fromoffsets[thread_id] + j] = fromadvanced[thread_id]; } } diff --git a/src/awkward/_connect/cuda/cuda_kernels/awkward_ListArray_localindex.cu b/src/awkward/_connect/cuda/cuda_kernels/awkward_ListArray_localindex.cu new file mode 100644 index 0000000000..7ee9dabf00 --- /dev/null +++ b/src/awkward/_connect/cuda/cuda_kernels/awkward_ListArray_localindex.cu @@ -0,0 +1,21 @@ +// BSD 3-Clause License; see https://github.com/scikit-hep/awkward-1.0/blob/main/LICENSE + +template +__global__ void +awkward_ListArray_localindex( + T* toindex, + const C* offsets, + int64_t length, + uint64_t invocation_index, + uint64_t* err_code) { + if (err_code[0] == NO_ERROR) { + int64_t thread_id = blockIdx.x * blockDim.x + threadIdx.x; + if (thread_id < length) { + int64_t start = (int64_t)offsets[thread_id]; + int64_t stop = (int64_t)offsets[thread_id + 1]; + for (int64_t j = start; j < stop; j++) { + toindex[j] = j - start; + } + } + } +} From 28188e292ffb1c0708261cdbac65f9e10c8e07cc Mon Sep 17 00:00:00 2001 From: ManasviGoyal Date: Thu, 8 Feb 2024 13:02:36 +0100 Subject: [PATCH 13/19] feat: add awkward_ListOffsetArray_reduce_local_nextparents_64.cu --- dev/generate-kernel-signatures.py | 1 + dev/generate-tests.py | 13 +++- kernel-test-data.json | 74 ++++++++++++++++++- ...OffsetArray_reduce_local_nextparents_64.cu | 23 ++++++ .../awkward_UnionArray_project.cu | 2 + .../cuda_kernels/awkward_reduce_count_64.cu | 17 +++-- 6 files changed, 118 insertions(+), 12 deletions(-) create mode 100644 src/awkward/_connect/cuda/cuda_kernels/awkward_ListOffsetArray_reduce_local_nextparents_64.cu diff --git a/dev/generate-kernel-signatures.py b/dev/generate-kernel-signatures.py index 5e712f2c26..3eb7de8664 100644 --- a/dev/generate-kernel-signatures.py +++ b/dev/generate-kernel-signatures.py @@ -93,6 +93,7 @@ # "awkward_ListOffsetArray_rpad_axis1", "awkward_MaskedArray_getitem_next_jagged_project", "awkward_UnionArray_project", + "awkward_ListOffsetArray_reduce_local_nextparents_64", "awkward_UnionArray_simplify", "awkward_UnionArray_simplify_one", "awkward_reduce_argmax", diff --git a/dev/generate-tests.py b/dev/generate-tests.py index e8879fdf3b..30e9f66425 100644 --- a/dev/generate-tests.py +++ b/dev/generate-tests.py @@ -228,8 +228,10 @@ def getdtypes(args): if "List" in typename: count = typename.count("List") typename = gettypename(typename) - if typename == "bool" or typename == "float": + if typename == "bool": typename = typename + "_" + if typename == "float": + typename = typename + "32" if count == 1: dtypes.append("cupy." + typename) elif count == 2: @@ -757,6 +759,7 @@ def gencpuunittests(specdict): # "awkward_ListOffsetArray_rpad_axis1", "awkward_MaskedArray_getitem_next_jagged_project", "awkward_UnionArray_project", + "awkward_ListOffsetArray_reduce_local_nextparents_64", "awkward_UnionArray_simplify", "awkward_UnionArray_simplify_one", "awkward_reduce_argmax", @@ -850,8 +853,10 @@ def gencudakerneltests(specdict): if "List" in typename: count = typename.count("List") typename = gettypename(typename) - if typename == "bool" or typename == "float": + if typename == "bool": typename = typename + "_" + if typename == "float": + typename = typename + "32" if count == 1: f.write( " " * 4 @@ -991,8 +996,10 @@ def gencudaunittests(specdict): if "List" in typename: count = typename.count("List") typename = gettypename(typename) - if typename == "bool" or typename == "float": + if typename == "bool": typename = typename + "_" + if typename == "float": + typename = typename + "32" if count == 1: if i < num_outputs: f.write( diff --git a/kernel-test-data.json b/kernel-test-data.json index 0782834c01..d340806928 100644 --- a/kernel-test-data.json +++ b/kernel-test-data.json @@ -18739,6 +18739,78 @@ } ] }, + { + "name": "awkward_ListOffsetArray_reduce_local_nextparents_64", + "status": true, + "tests": [ + { + "error": false, + "message": "", + "inputs": { + "length": 0, + "offsets": [] + }, + "outputs": { + "nextparents": [] + } + }, + { + "error": false, + "message": "", + "inputs": { + "length": 1, + "offsets": [0, 1] + }, + "outputs": { + "nextparents": [0] + } + }, + { + "error": false, + "message": "", + "inputs": { + "length": 18, + "offsets": [0, 0, 1, 3, 3, 6, 8, 9, 9, 9, 10, 10, 12, 15, 15, 17, 18, 18, 18] + }, + "outputs": { + "nextparents": [1, 2, 2, 4, 4, 4, 5, 5, 6, 9, 11, 11, 12, 12, 12, 14, 14, 15] + } + }, + { + "error": false, + "message": "", + "inputs": { + "length": 4, + "offsets": [0, 1, 3, 5, 5] + }, + "outputs": { + "nextparents": [0, 1, 1, 2, 2] + } + }, + { + "error": false, + "message": "", + "inputs": { + "length": 5, + "offsets": [0, 1, 1, 3, 5, 7] + }, + "outputs": { + "nextparents": [0, 2, 2, 3, 3, 4, 4] + } + }, + { + "error": false, + "message": "", + "inputs": { + "length": 5, + "offsets": [0, 0, 1, 1, 2, 2] + }, + "outputs": { + "nextparents": [1, 3] + } + } + ] + }, { "name": "awkward_ListOffsetArray_reduce_nonlocal_nextstarts_64", "status": true, @@ -22248,7 +22320,7 @@ }, { "name": "awkward_reduce_sum", - "status": false, + "status": true, "tests": [ { "error": false, diff --git a/src/awkward/_connect/cuda/cuda_kernels/awkward_ListOffsetArray_reduce_local_nextparents_64.cu b/src/awkward/_connect/cuda/cuda_kernels/awkward_ListOffsetArray_reduce_local_nextparents_64.cu new file mode 100644 index 0000000000..03de70aea8 --- /dev/null +++ b/src/awkward/_connect/cuda/cuda_kernels/awkward_ListOffsetArray_reduce_local_nextparents_64.cu @@ -0,0 +1,23 @@ +// BSD 3-Clause License; see https://github.com/scikit-hep/awkward-1.0/blob/main/LICENSE + +template +__global__ void +awkward_ListOffsetArray_reduce_local_nextparents_64( + T* nextparents, + const C* offsets, + int64_t length, + uint64_t invocation_index, + uint64_t* err_code) { + if (err_code[0] == NO_ERROR) { + int64_t thread_id = blockIdx.x * blockDim.x + threadIdx.x; + + if (thread_id < length) { + int64_t initialoffset = (int64_t)(offsets[0]); + for (int64_t j = (int64_t)(offsets[thread_id]) - initialoffset; + j < offsets[thread_id + 1] - initialoffset; + j++) { + nextparents[j] = thread_id; + } + } + } +} diff --git a/src/awkward/_connect/cuda/cuda_kernels/awkward_UnionArray_project.cu b/src/awkward/_connect/cuda/cuda_kernels/awkward_UnionArray_project.cu index b758010d14..fcb0be4fb0 100644 --- a/src/awkward/_connect/cuda/cuda_kernels/awkward_UnionArray_project.cu +++ b/src/awkward/_connect/cuda/cuda_kernels/awkward_UnionArray_project.cu @@ -1,3 +1,5 @@ +// BSD 3-Clause License; see https://github.com/scikit-hep/awkward-1.0/blob/main/LICENSE + // BEGIN PYTHON // def f(grid, block, args): // (lenout, tocarry, fromtags, fromindex, length, which, invocation_index, err_code) = args diff --git a/src/awkward/_connect/cuda/cuda_kernels/awkward_reduce_count_64.cu b/src/awkward/_connect/cuda/cuda_kernels/awkward_reduce_count_64.cu index 0870da2ff7..311f04012b 100644 --- a/src/awkward/_connect/cuda/cuda_kernels/awkward_reduce_count_64.cu +++ b/src/awkward/_connect/cuda/cuda_kernels/awkward_reduce_count_64.cu @@ -33,14 +33,15 @@ awkward_reduce_count_64_a( template __global__ void -awkward_reduce_count_64_b(T* toptr, - const bool* fromptr, - const U* parents, - int64_t lenparents, - int64_t outlength, - uint64_t* atomicAdd_toptr, - uint64_t invocation_index, - uint64_t* err_code) { +awkward_reduce_count_64_b( + T* toptr, + const bool* fromptr, + const U* parents, + int64_t lenparents, + int64_t outlength, + uint64_t* atomicAdd_toptr, + uint64_t invocation_index, + uint64_t* err_code) { if (err_code[0] == NO_ERROR) { int64_t thread_id = blockIdx.x * blockDim.x + threadIdx.x; if (thread_id < lenparents) { From ef3c6ed7ee76abece86a96d73f08bd410892e93c Mon Sep 17 00:00:00 2001 From: ManasviGoyal Date: Thu, 8 Feb 2024 13:37:02 +0100 Subject: [PATCH 14/19] feat: add awkward_ListOffsetArray_reduce_nonlocal_maxcount_offsetscopy_64.cu --- dev/generate-kernel-signatures.py | 1 + dev/generate-tests.py | 1 + kernel-test-data.json | 28 +++++++++- src/awkward/_connect/cuda/__init__.py | 1 + ...reduce_nonlocal_maxcount_offsetscopy_64.cu | 53 +++++++++++++++++++ 5 files changed, 82 insertions(+), 2 deletions(-) create mode 100644 src/awkward/_connect/cuda/cuda_kernels/awkward_ListOffsetArray_reduce_nonlocal_maxcount_offsetscopy_64.cu diff --git a/dev/generate-kernel-signatures.py b/dev/generate-kernel-signatures.py index 3eb7de8664..a3ff427302 100644 --- a/dev/generate-kernel-signatures.py +++ b/dev/generate-kernel-signatures.py @@ -94,6 +94,7 @@ "awkward_MaskedArray_getitem_next_jagged_project", "awkward_UnionArray_project", "awkward_ListOffsetArray_reduce_local_nextparents_64", + "awkward_ListOffsetArray_reduce_nonlocal_maxcount_offsetscopy_64", "awkward_UnionArray_simplify", "awkward_UnionArray_simplify_one", "awkward_reduce_argmax", diff --git a/dev/generate-tests.py b/dev/generate-tests.py index 30e9f66425..fe51c5ae81 100644 --- a/dev/generate-tests.py +++ b/dev/generate-tests.py @@ -760,6 +760,7 @@ def gencpuunittests(specdict): "awkward_MaskedArray_getitem_next_jagged_project", "awkward_UnionArray_project", "awkward_ListOffsetArray_reduce_local_nextparents_64", + "awkward_ListOffsetArray_reduce_nonlocal_maxcount_offsetscopy_64", "awkward_UnionArray_simplify", "awkward_UnionArray_simplify_one", "awkward_reduce_argmax", diff --git a/kernel-test-data.json b/kernel-test-data.json index d340806928..b1ca661cce 100644 --- a/kernel-test-data.json +++ b/kernel-test-data.json @@ -10408,8 +10408,32 @@ }, { "name": "awkward_ListOffsetArray_reduce_nonlocal_maxcount_offsetscopy_64", - "status": false, + "status": true, "tests": [ + { + "error": false, + "message": "", + "inputs": { + "length": 0, + "offsets": [0] + }, + "outputs": { + "maxcount": [0], + "offsetscopy": [0] + } + }, + { + "error": false, + "message": "", + "inputs": { + "length": 1, + "offsets": [0, 2] + }, + "outputs": { + "maxcount": [2], + "offsetscopy": [0, 2] + } + }, { "error": false, "message": "", @@ -18748,7 +18772,7 @@ "message": "", "inputs": { "length": 0, - "offsets": [] + "offsets": [0] }, "outputs": { "nextparents": [] diff --git a/src/awkward/_connect/cuda/__init__.py b/src/awkward/_connect/cuda/__init__.py index 5c9b1806af..8f196e992d 100644 --- a/src/awkward/_connect/cuda/__init__.py +++ b/src/awkward/_connect/cuda/__init__.py @@ -100,6 +100,7 @@ def fetch_template_specializations(kernel_dict): "awkward_ListArray_min_range", "awkward_ListArray_rpad_and_clip_length_axis1", "awkward_ListOffsetArray_reduce_nonlocal_nextstarts_64", + "awkward_ListOffsetArray_reduce_nonlocal_maxcount_offsetscopy_64", "awkward_ListOffsetArray_rpad_length_axis1", "awkward_MaskedArray_getitem_next_jagged_project", "awkward_RegularArray_getitem_next_array_regularize", diff --git a/src/awkward/_connect/cuda/cuda_kernels/awkward_ListOffsetArray_reduce_nonlocal_maxcount_offsetscopy_64.cu b/src/awkward/_connect/cuda/cuda_kernels/awkward_ListOffsetArray_reduce_nonlocal_maxcount_offsetscopy_64.cu new file mode 100644 index 0000000000..13a79096c8 --- /dev/null +++ b/src/awkward/_connect/cuda/cuda_kernels/awkward_ListOffsetArray_reduce_nonlocal_maxcount_offsetscopy_64.cu @@ -0,0 +1,53 @@ +// BSD 3-Clause License; see https://github.com/scikit-hep/awkward-1.0/blob/main/LICENSE + +// BEGIN PYTHON +// def f(grid, block, args): +// (maxcount, offsetscopy, offsets, length, invocation_index, err_code) = args +// scan_in_array = cupy.empty(length + 1, dtype=cupy.int64) +// cuda_kernel_templates.get_function(fetch_specialization(["awkward_ListOffsetArray_reduce_nonlocal_maxcount_offsetscopy_64_a", maxcount.dtype, offsetscopy.dtype, offsets.dtype]))(grid, block, (maxcount, offsetscopy, offsets, length, scan_in_array, invocation_index, err_code)) +// if length > 0: +// scan_in_array[0] = cupy.max(scan_in_array) +// cuda_kernel_templates.get_function(fetch_specialization(["awkward_ListOffsetArray_reduce_nonlocal_maxcount_offsetscopy_64_b", maxcount.dtype, offsetscopy.dtype, offsets.dtype]))(grid, block, (maxcount, offsetscopy, offsets, length, scan_in_array, invocation_index, err_code)) +// out["awkward_ListOffsetArray_reduce_nonlocal_maxcount_offsetscopy_64_a", {dtype_specializations}] = None +// out["awkward_ListOffsetArray_reduce_nonlocal_maxcount_offsetscopy_64_b", {dtype_specializations}] = None +// END PYTHON + +template +__global__ void +awkward_ListOffsetArray_reduce_nonlocal_maxcount_offsetscopy_64_a( + T* maxcount, + C* offsetscopy, + const U* offsets, + int64_t length, + int64_t* scan_in_array, + uint64_t invocation_index, + uint64_t* err_code) { + if (err_code[0] == NO_ERROR) { + int64_t thread_id = blockIdx.x * blockDim.x + threadIdx.x; + offsetscopy[0] = offsets[0]; + + if (thread_id < length) { + if(thread_id == 0) { + scan_in_array[0] = 0; + } + int64_t count = (offsets[thread_id + 1] - offsets[thread_id]); + scan_in_array[thread_id + 1] = count; + offsetscopy[thread_id + 1] = offsets[thread_id + 1]; + } + } +} + +template +__global__ void +awkward_ListOffsetArray_reduce_nonlocal_maxcount_offsetscopy_64_b( + T* maxcount, + C* offsetscopy, + const U* offsets, + int64_t length, + int64_t* scan_in_array, + uint64_t invocation_index, + uint64_t* err_code) { + if (err_code[0] == NO_ERROR) { + *maxcount = length > 0 ? scan_in_array[0]: 0; + } +} From f98983f870bafd45d671b9e24cf9ab15c8c861b5 Mon Sep 17 00:00:00 2001 From: ManasviGoyal Date: Fri, 9 Feb 2024 09:32:46 +0100 Subject: [PATCH 15/19] feat: add awkward_UnionArray_regular_index_getsize --- dev/generate-kernel-signatures.py | 1 + dev/generate-tests.py | 1 + kernel-test-data.json | 35 ++++++++++++++++++- src/awkward/_connect/cuda/__init__.py | 1 + ...wkward_UnionArray_regular_index_getsize.cu | 23 ++++++++++++ 5 files changed, 60 insertions(+), 1 deletion(-) create mode 100644 src/awkward/_connect/cuda/cuda_kernels/awkward_UnionArray_regular_index_getsize.cu diff --git a/dev/generate-kernel-signatures.py b/dev/generate-kernel-signatures.py index a3ff427302..4539b9dbe2 100644 --- a/dev/generate-kernel-signatures.py +++ b/dev/generate-kernel-signatures.py @@ -95,6 +95,7 @@ "awkward_UnionArray_project", "awkward_ListOffsetArray_reduce_local_nextparents_64", "awkward_ListOffsetArray_reduce_nonlocal_maxcount_offsetscopy_64", + "awkward_UnionArray_regular_index_getsize", "awkward_UnionArray_simplify", "awkward_UnionArray_simplify_one", "awkward_reduce_argmax", diff --git a/dev/generate-tests.py b/dev/generate-tests.py index fe51c5ae81..96781e0e3b 100644 --- a/dev/generate-tests.py +++ b/dev/generate-tests.py @@ -761,6 +761,7 @@ def gencpuunittests(specdict): "awkward_UnionArray_project", "awkward_ListOffsetArray_reduce_local_nextparents_64", "awkward_ListOffsetArray_reduce_nonlocal_maxcount_offsetscopy_64", + "awkward_UnionArray_regular_index_getsize", "awkward_UnionArray_simplify", "awkward_UnionArray_simplify_one", "awkward_reduce_argmax", diff --git a/kernel-test-data.json b/kernel-test-data.json index b1ca661cce..003a9469a7 100644 --- a/kernel-test-data.json +++ b/kernel-test-data.json @@ -14728,8 +14728,41 @@ }, { "name": "awkward_UnionArray_regular_index_getsize", - "status": false, + "status": true, "tests": [ + { + "error": false, + "message": "", + "inputs": { + "fromtags": [], + "length": 0 + }, + "outputs": { + "size": [1] + } + }, + { + "error": false, + "message": "", + "inputs": { + "fromtags": [0], + "length": 1 + }, + "outputs": { + "size": [1] + } + }, + { + "error": false, + "message": "", + "inputs": { + "fromtags": [-1], + "length": 1 + }, + "outputs": { + "size": [1] + } + }, { "error": false, "message": "", diff --git a/src/awkward/_connect/cuda/__init__.py b/src/awkward/_connect/cuda/__init__.py index 8f196e992d..721e8bc1cd 100644 --- a/src/awkward/_connect/cuda/__init__.py +++ b/src/awkward/_connect/cuda/__init__.py @@ -107,6 +107,7 @@ def fetch_template_specializations(kernel_dict): "awkward_RegularArray_reduce_local_nextparents", "awkward_RegularArray_reduce_nonlocal_preparenext", "awkward_UnionArray_project", + "awkward_UnionArray_regular_index_getsize", "awkward_reduce_count_64", "awkward_reduce_sum", "awkward_reduce_sum_int32_bool_64", diff --git a/src/awkward/_connect/cuda/cuda_kernels/awkward_UnionArray_regular_index_getsize.cu b/src/awkward/_connect/cuda/cuda_kernels/awkward_UnionArray_regular_index_getsize.cu new file mode 100644 index 0000000000..13fdf0937c --- /dev/null +++ b/src/awkward/_connect/cuda/cuda_kernels/awkward_UnionArray_regular_index_getsize.cu @@ -0,0 +1,23 @@ +// BSD 3-Clause License; see https://github.com/scikit-hep/awkward-1.0/blob/main/LICENSE + +// BEGIN PYTHON +// def f(grid, block, args): +// (size, fromtags, length, invocation_index, err_code) = args +// if length > 0: +// size[0] = cupy.max(fromtags) +// cuda_kernel_templates.get_function(fetch_specialization(["awkward_UnionArray_regular_index_getsize_a", size.dtype, fromtags.dtype]))(grid, block, (size, fromtags, length, invocation_index, err_code)) +// out["awkward_UnionArray_regular_index_getsize_a", {dtype_specializations}] = None +// END PYTHON + +template +__global__ void +awkward_UnionArray_regular_index_getsize_a( + T* size, + const C* fromtags, + int64_t length, + uint64_t invocation_index, + uint64_t* err_code) { + if (err_code[0] == NO_ERROR) { + *size = length > 0 && *size > 0 ? *size + 1 : 1; + } +} From 33525078c31536378fd598d2bf19f7ae9687a47d Mon Sep 17 00:00:00 2001 From: ManasviGoyal Date: Fri, 9 Feb 2024 09:37:08 +0100 Subject: [PATCH 16/19] refactor: remove _a from the name of the kernel --- src/awkward/_connect/cuda/__init__.py | 1 - .../awkward_UnionArray_regular_index_getsize.cu | 6 +++--- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/src/awkward/_connect/cuda/__init__.py b/src/awkward/_connect/cuda/__init__.py index 721e8bc1cd..8f196e992d 100644 --- a/src/awkward/_connect/cuda/__init__.py +++ b/src/awkward/_connect/cuda/__init__.py @@ -107,7 +107,6 @@ def fetch_template_specializations(kernel_dict): "awkward_RegularArray_reduce_local_nextparents", "awkward_RegularArray_reduce_nonlocal_preparenext", "awkward_UnionArray_project", - "awkward_UnionArray_regular_index_getsize", "awkward_reduce_count_64", "awkward_reduce_sum", "awkward_reduce_sum_int32_bool_64", diff --git a/src/awkward/_connect/cuda/cuda_kernels/awkward_UnionArray_regular_index_getsize.cu b/src/awkward/_connect/cuda/cuda_kernels/awkward_UnionArray_regular_index_getsize.cu index 13fdf0937c..819cc59175 100644 --- a/src/awkward/_connect/cuda/cuda_kernels/awkward_UnionArray_regular_index_getsize.cu +++ b/src/awkward/_connect/cuda/cuda_kernels/awkward_UnionArray_regular_index_getsize.cu @@ -5,13 +5,13 @@ // (size, fromtags, length, invocation_index, err_code) = args // if length > 0: // size[0] = cupy.max(fromtags) -// cuda_kernel_templates.get_function(fetch_specialization(["awkward_UnionArray_regular_index_getsize_a", size.dtype, fromtags.dtype]))(grid, block, (size, fromtags, length, invocation_index, err_code)) -// out["awkward_UnionArray_regular_index_getsize_a", {dtype_specializations}] = None +// cuda_kernel_templates.get_function(fetch_specialization(["awkward_UnionArray_regular_index_getsize", size.dtype, fromtags.dtype]))(grid, block, (size, fromtags, length, invocation_index, err_code)) +// out["awkward_UnionArray_regular_index_getsize", {dtype_specializations}] = None // END PYTHON template __global__ void -awkward_UnionArray_regular_index_getsize_a( +awkward_UnionArray_regular_index_getsize( T* size, const C* fromtags, int64_t length, From aa5cf58da3a1e1da36d2a362d966bba147f99381 Mon Sep 17 00:00:00 2001 From: ManasviGoyal Date: Fri, 9 Feb 2024 11:55:14 +0100 Subject: [PATCH 17/19] test: generate tests when outarg is also an inarg --- dev/generate-tests.py | 111 +++++++++++++++++++++++++++++------------- kernel-test-data.json | 85 +++++++++++++++++++++++++++++++- 2 files changed, 160 insertions(+), 36 deletions(-) diff --git a/dev/generate-tests.py b/dev/generate-tests.py index 96781e0e3b..4b04cd9f8a 100644 --- a/dev/generate-tests.py +++ b/dev/generate-tests.py @@ -288,8 +288,7 @@ def unittestmap(): def getunittests(test_inputs, test_outputs): unit_tests = {**test_outputs, **test_inputs} - num_outputs = len(test_outputs) - return unit_tests, num_outputs + return unit_tests def gettypename(spectype): @@ -604,15 +603,13 @@ def gencpuunittests(specdict): funcName = ( "def test_unit_cpu" + spec.name + "_" + str(num) + "():\n" ) - unit_tests, num_outputs = getunittests( - test["inputs"], test["outputs"] - ) + unit_tests = getunittests(test["inputs"], test["outputs"]) flag = checkuint(unit_tests.items(), spec.args) range = checkintrange(unit_tests.items(), test["error"], spec.args) if flag and range: num += 1 f.write(funcName) - for i, (arg, val) in enumerate(unit_tests.items()): + for arg, val in test["outputs"].items(): typename = remove_const( next( argument @@ -620,16 +617,38 @@ def gencpuunittests(specdict): if argument.name == arg ).typename ) - if i < num_outputs: - f.write( - " " * 4 - + arg - + " = " - + str([gettypeval(typename)] * len(val)) - + "\n" - ) - else: - f.write(" " * 4 + arg + " = " + str(val) + "\n") + f.write( + " " * 4 + + arg + + " = " + + str([gettypeval(typename)] * len(val)) + + "\n" + ) + if "List" in typename: + count = typename.count("List") + typename = gettypename(typename) + if count == 1: + f.write( + " " * 4 + + f"{arg} = (ctypes.c_{typename}*len({arg}))(*{arg})\n" + ) + elif count == 2: + f.write( + " " * 4 + + "{0} = ctypes.pointer(ctypes.cast((ctypes.c_{1}*len({0}[0]))(*{0}[0]),ctypes.POINTER(ctypes.c_{1})))\n".format( + arg, typename + ) + ) + for arg, val in test["inputs"].items(): + typename = remove_const( + next( + argument + for argument in spec.args + if argument.name == arg + ).typename + ) + + f.write(" " * 4 + arg + " = " + str(val) + "\n") if "List" in typename: count = typename.count("List") typename = gettypename(typename) @@ -973,9 +992,7 @@ def gencudaunittests(specdict): "def test_unit_cuda" + spec.name + "_" + str(num) + "():\n" ) dtypes = getdtypes(spec.args) - unit_tests, num_outputs = getunittests( - test["inputs"], test["outputs"] - ) + unit_tests = getunittests(test["inputs"], test["outputs"]) flag = checkuint(unit_tests.items(), spec.args) range = checkintrange(unit_tests.items(), test["error"], spec.args) if flag and range: @@ -985,7 +1002,7 @@ def gencudaunittests(specdict): "@pytest.mark.skip(reason='Kernel is not implemented properly')\n" ) f.write(funcName) - for i, (arg, val) in enumerate(unit_tests.items()): + for arg, val in test["outputs"].items(): typename = remove_const( next( argument @@ -1003,22 +1020,45 @@ def gencudaunittests(specdict): if typename == "float": typename = typename + "32" if count == 1: - if i < num_outputs: - f.write( - " " * 4 - + "{} = cupy.array({}, dtype=cupy.{})\n".format( - arg, - [gettypeval(typename)] * len(val), - typename, - ) + f.write( + " " * 4 + + "{} = cupy.array({}, dtype=cupy.{})\n".format( + arg, + [gettypeval(typename)] * len(val), + typename, ) - else: - f.write( - " " * 4 - + "{} = cupy.array({}, dtype=cupy.{})\n".format( - arg, val, typename - ) + ) + elif count == 2: + f.write( + " " * 4 + + "{} = cupy.array({}, dtype=cupy.{})\n".format( + arg, val, typename + ) + ) + for arg, val in test["inputs"].items(): + typename = remove_const( + next( + argument + for argument in spec.args + if argument.name == arg + ).typename + ) + if "List" not in typename: + f.write(" " * 4 + arg + " = " + str(val) + "\n") + if "List" in typename: + count = typename.count("List") + typename = gettypename(typename) + if typename == "bool": + typename = typename + "_" + if typename == "float": + typename = typename + "32" + if count == 1: + f.write( + " " * 4 + + "{} = cupy.array({}, dtype=cupy.{})\n".format( + arg, val, typename ) + ) elif count == 2: f.write( " " * 4 @@ -1101,7 +1141,8 @@ def genunittests(): for key in test["outputs"]: line += key + " = " + key + "," for key in test["inputs"]: - line += key + " = " + key + "," + if key not in test["outputs"]: + line += key + " = " + key + "," line = line[0 : len(line) - 1] line += ")\n" if test["error"]: diff --git a/kernel-test-data.json b/kernel-test-data.json index 003a9469a7..07452b0e15 100644 --- a/kernel-test-data.json +++ b/kernel-test-data.json @@ -1040,6 +1040,89 @@ } ] }, + { + "name": "awkward_Index_nones_as_index", + "status": true, + "tests": [ + { + "error": false, + "message": "", + "inputs": { + "length": 0, + "toindex": [] + }, + "outputs": { + "toindex": [] + } + }, + { + "error": false, + "message": "", + "inputs": { + "length": 1, + "toindex": [0] + }, + "outputs": { + "toindex": [0] + } + }, + { + "error": false, + "message": "", + "inputs": { + "length": 1, + "toindex": [-1] + }, + "outputs": { + "toindex": [0] + } + }, + { + "error": false, + "message": "", + "inputs": { + "length": 3, + "toindex": [-1, -1, -1] + }, + "outputs": { + "toindex": [0, 1, 2] + } + }, + { + "error": false, + "message": "", + "inputs": { + "length": 5, + "toindex": [0, 1, 2, 3, 4] + }, + "outputs": { + "toindex": [0, 1, 2, 3, 4] + } + }, + { + "error": false, + "message": "", + "inputs": { + "length": 5, + "toindex": [0, -1, -1, 1, -1] + }, + "outputs": { + "toindex": [0, 2, 3, 1, 4] + } + }, + { + "error": false, + "message": "", + "inputs": { + "length": 7, + "toindex": [-1, 0, -1, -1, 1, -1, 2] + }, + "outputs": { + "toindex": [3, 0, 4, 5, 1, 6, 2] + } + } + ] + }, { "name": "awkward_BitMaskedArray_to_ByteMaskedArray", "status": true, @@ -22377,7 +22460,7 @@ }, { "name": "awkward_reduce_sum", - "status": true, + "status": false, "tests": [ { "error": false, From 64bd34c7d5962bc36eaec88f56df085d0de33f77 Mon Sep 17 00:00:00 2001 From: ManasviGoyal Date: Fri, 9 Feb 2024 15:07:04 +0100 Subject: [PATCH 18/19] feat: add awkward_ListOffsetArray_drop_none_indexes --- dev/generate-kernel-signatures.py | 1 + dev/generate-tests.py | 1 + kernel-test-data.json | 84 +++++++++++++++++++ src/awkward/_connect/cuda/__init__.py | 1 + ...kward_ListOffsetArray_drop_none_indexes.cu | 76 +++++++++++++++++ 5 files changed, 163 insertions(+) create mode 100644 src/awkward/_connect/cuda/cuda_kernels/awkward_ListOffsetArray_drop_none_indexes.cu diff --git a/dev/generate-kernel-signatures.py b/dev/generate-kernel-signatures.py index 4539b9dbe2..34020e80f2 100644 --- a/dev/generate-kernel-signatures.py +++ b/dev/generate-kernel-signatures.py @@ -93,6 +93,7 @@ # "awkward_ListOffsetArray_rpad_axis1", "awkward_MaskedArray_getitem_next_jagged_project", "awkward_UnionArray_project", + "awkward_ListOffsetArray_drop_none_indexes", "awkward_ListOffsetArray_reduce_local_nextparents_64", "awkward_ListOffsetArray_reduce_nonlocal_maxcount_offsetscopy_64", "awkward_UnionArray_regular_index_getsize", diff --git a/dev/generate-tests.py b/dev/generate-tests.py index 4b04cd9f8a..7a60ff3408 100644 --- a/dev/generate-tests.py +++ b/dev/generate-tests.py @@ -778,6 +778,7 @@ def gencpuunittests(specdict): # "awkward_ListOffsetArray_rpad_axis1", "awkward_MaskedArray_getitem_next_jagged_project", "awkward_UnionArray_project", + "awkward_ListOffsetArray_drop_none_indexes", "awkward_ListOffsetArray_reduce_local_nextparents_64", "awkward_ListOffsetArray_reduce_nonlocal_maxcount_offsetscopy_64", "awkward_UnionArray_regular_index_getsize", diff --git a/kernel-test-data.json b/kernel-test-data.json index 07452b0e15..6909753b3f 100644 --- a/kernel-test-data.json +++ b/kernel-test-data.json @@ -9869,6 +9869,90 @@ } ] }, + { + "name": "awkward_ListOffsetArray_drop_none_indexes", + "status": true, + "tests": [ + { + "error": false, + "message": "", + "inputs": { + "noneindexes": [], + "length_indexes": 0, + "fromoffsets": [], + "length_offsets": 0 + }, + "outputs": { + "tooffsets": [] + } + }, + { + "error": false, + "message": "", + "inputs": { + "noneindexes": [0], + "length_indexes": 1, + "fromoffsets": [], + "length_offsets": 0 + }, + "outputs": { + "tooffsets": [] + } + }, + { + "error": false, + "message": "", + "inputs": { + "noneindexes": [], + "length_indexes": 0, + "fromoffsets": [0], + "length_offsets": 1 + }, + "outputs": { + "tooffsets": [0] + } + }, + { + "error": false, + "message": "", + "inputs": { + "noneindexes": [-1, -1, -1, -1, -1, -1, -1], + "length_indexes": 7, + "fromoffsets": [0, 2, 3, 5, 7], + "length_offsets": 5 + }, + "outputs": { + "tooffsets": [0, 0, 0, 0, 0] + } + }, + { + "error": false, + "message": "", + "inputs": { + "noneindexes": [-1, 0, -1, 0, 0, -1, 0], + "length_indexes": 7, + "fromoffsets": [0, 2, 3, 5, 7], + "length_offsets": 5 + }, + "outputs": { + "tooffsets": [0, 1, 1, 3, 4] + } + }, + { + "error": false, + "message": "", + "inputs": { + "noneindexes": [0, 0, 0, 0, 0, 0], + "length_indexes": 6, + "fromoffsets": [0, 2, 3, 5, 6], + "length_offsets": 5 + }, + "outputs": { + "tooffsets": [0, 2, 3, 5, 6] + } + } + ] + }, { "name": "awkward_ListOffsetArray_flatten_offsets", "status": true, diff --git a/src/awkward/_connect/cuda/__init__.py b/src/awkward/_connect/cuda/__init__.py index 8f196e992d..d99f08b49a 100644 --- a/src/awkward/_connect/cuda/__init__.py +++ b/src/awkward/_connect/cuda/__init__.py @@ -99,6 +99,7 @@ def fetch_template_specializations(kernel_dict): "awkward_ListArray_getitem_jagged_numvalid", "awkward_ListArray_min_range", "awkward_ListArray_rpad_and_clip_length_axis1", + "awkward_ListOffsetArray_drop_none_indexes", "awkward_ListOffsetArray_reduce_nonlocal_nextstarts_64", "awkward_ListOffsetArray_reduce_nonlocal_maxcount_offsetscopy_64", "awkward_ListOffsetArray_rpad_length_axis1", diff --git a/src/awkward/_connect/cuda/cuda_kernels/awkward_ListOffsetArray_drop_none_indexes.cu b/src/awkward/_connect/cuda/cuda_kernels/awkward_ListOffsetArray_drop_none_indexes.cu new file mode 100644 index 0000000000..7721045281 --- /dev/null +++ b/src/awkward/_connect/cuda/cuda_kernels/awkward_ListOffsetArray_drop_none_indexes.cu @@ -0,0 +1,76 @@ +// BSD 3-Clause License; see https://github.com/scikit-hep/awkward-1.0/blob/main/LICENSE + +// BEGIN PYTHON +// def f(grid, block, args): +// (tooffsets, noneindexes, fromoffsets, length_offsets, length_indexes, invocation_index, err_code) = args +// scan_in_array = cupy.empty(length_indexes, dtype=cupy.int64) +// cuda_kernel_templates.get_function(fetch_specialization(["awkward_ListOffsetArray_drop_none_indexes_a", tooffsets.dtype, noneindexes.dtype, fromoffsets.dtype]))(grid, block, (tooffsets, noneindexes, fromoffsets, length_offsets, length_indexes, scan_in_array, invocation_index, err_code)) +// scan_in_array = cupy.cumsum(scan_in_array) +// cuda_kernel_templates.get_function(fetch_specialization(["awkward_ListOffsetArray_drop_none_indexes_b", tooffsets.dtype, noneindexes.dtype, fromoffsets.dtype]))(grid, block, (tooffsets, noneindexes, fromoffsets, length_offsets, length_indexes, scan_in_array, invocation_index, err_code)) +// out["awkward_ListOffsetArray_drop_none_indexes_a", {dtype_specializations}] = None +// out["awkward_ListOffsetArray_drop_none_indexes_b", {dtype_specializations}] = None +// END PYTHON + +template +__global__ void +awkward_ListOffsetArray_drop_none_indexes_a( + T* tooffsets, + const C* noneindexes, + const U* fromoffsets, + int64_t length_offsets, + int64_t length_indexes, + int64_t* scan_in_array, + uint64_t invocation_index, + uint64_t* err_code) { + if (err_code[0] == NO_ERROR) { + int64_t thread_id = blockIdx.x * blockDim.x + threadIdx.x; + int64_t offset1 = 0; + int64_t offset2 = 0; + + if (thread_id < length_offsets) { + if (thread_id == 0) { + int64_t offset1 = 0; + } + else { + int64_t offset1 = fromoffsets[thread_id - 1]; + } + int64_t offset2 = fromoffsets[thread_id]; + for (int j = offset1; j < offset2; j++) { + if (noneindexes[j] < 0) { + scan_in_array[j] = 1; + } else { + scan_in_array[j] = 0; + } + } + } + } +} + +template +__global__ void +awkward_ListOffsetArray_drop_none_indexes_b( + T* tooffsets, + const C* noneindexes, + const U* fromoffsets, + int64_t length_offsets, + int64_t length_indexes, + int64_t* scan_in_array, + uint64_t invocation_index, + uint64_t* err_code) { + if (err_code[0] == NO_ERROR) { + int64_t thread_id = blockIdx.x * blockDim.x + threadIdx.x; + int64_t offset1 = 0; + + if (thread_id < length_offsets) { + if (thread_id == 0) { + int64_t offset1 = 0; + } + else { + int64_t offset1 = fromoffsets[thread_id - 1]; + } + int64_t offset2 = fromoffsets[thread_id]; + int64_t nr_of_nones = offset2 - offset1 > 0 ? scan_in_array[fromoffsets[thread_id] - 1] : 0; + tooffsets[thread_id] = fromoffsets[thread_id] - nr_of_nones; + } + } +} From 4fdad74016ac2e6f01d882459b0fd21d07c8a50d Mon Sep 17 00:00:00 2001 From: ManasviGoyal Date: Fri, 9 Feb 2024 15:22:37 +0100 Subject: [PATCH 19/19] fix: awkward_ListOffsetArray_drop_none_indexes --- kernel-test-data.json | 39 +++++++++++++++++++ ...kward_ListOffsetArray_drop_none_indexes.cu | 2 +- 2 files changed, 40 insertions(+), 1 deletion(-) diff --git a/kernel-test-data.json b/kernel-test-data.json index 6909753b3f..5b7f85f687 100644 --- a/kernel-test-data.json +++ b/kernel-test-data.json @@ -9950,6 +9950,45 @@ "outputs": { "tooffsets": [0, 2, 3, 5, 6] } + }, + { + "error": false, + "message": "", + "inputs": { + "noneindexes": [0, 0, 0, 0, 0, 0], + "length_indexes": 6, + "fromoffsets": [0, 0, 0, 0, 0], + "length_offsets": 5 + }, + "outputs": { + "tooffsets": [0, 0, 0, 0, 0] + } + }, + { + "error": false, + "message": "", + "inputs": { + "noneindexes": [0, 0, 0, 0, 0, 0], + "length_indexes": 6, + "fromoffsets": [0, 2, 3, 3, 6], + "length_offsets": 5 + }, + "outputs": { + "tooffsets": [0, 2, 3, 3, 6] + } + }, + { + "error": false, + "message": "", + "inputs": { + "noneindexes": [-1, -1, -1, -1, -1, -1], + "length_indexes": 6, + "fromoffsets": [0, 2, 3, 3, 6], + "length_offsets": 5 + }, + "outputs": { + "tooffsets": [0, 0, 0, 0, 0] + } } ] }, diff --git a/src/awkward/_connect/cuda/cuda_kernels/awkward_ListOffsetArray_drop_none_indexes.cu b/src/awkward/_connect/cuda/cuda_kernels/awkward_ListOffsetArray_drop_none_indexes.cu index 7721045281..e53d830e27 100644 --- a/src/awkward/_connect/cuda/cuda_kernels/awkward_ListOffsetArray_drop_none_indexes.cu +++ b/src/awkward/_connect/cuda/cuda_kernels/awkward_ListOffsetArray_drop_none_indexes.cu @@ -69,7 +69,7 @@ awkward_ListOffsetArray_drop_none_indexes_b( int64_t offset1 = fromoffsets[thread_id - 1]; } int64_t offset2 = fromoffsets[thread_id]; - int64_t nr_of_nones = offset2 - offset1 > 0 ? scan_in_array[fromoffsets[thread_id] - 1] : 0; + int64_t nr_of_nones = thread_id > 0 ? scan_in_array[fromoffsets[thread_id] - 1] : 0; tooffsets[thread_id] = fromoffsets[thread_id] - nr_of_nones; } }