From bc7a1cc1f1a19dae9bab79f11a443c7d9bd69936 Mon Sep 17 00:00:00 2001 From: Allison Vacanti Date: Tue, 29 Mar 2022 15:19:06 -0400 Subject: [PATCH] Alias deprecated TexRefInputIterator to TexObjInputIterator. Removes usage of deprecated CUDART texture reference APIs without breaking CUB's API. Other changes: - Remove obsolete checks for CUDART_VERSION >= 5.5. - Split `test_iterator.cu` to create a test that only handles the deprecated `TexRefInputIterator` API. - Reduces the scope of our deprecation suppressions. - Also removed the deprecation suppression from our CMake logic. - Enable testing for `TexObjInputIterator` without `CUB_CDP`. - Add a meaningful implementation for `ostream << TexObjInputIterator`. --- cmake/CubBuildCompilerTargets.cmake | 10 - cub/iterator/tex_obj_input_iterator.cuh | 3 + cub/iterator/tex_ref_input_iterator.cuh | 340 +----------------------- test/test_iterator.cu | 81 +----- test/test_iterator_deprecated.cu | 306 +++++++++++++++++++++ 5 files changed, 316 insertions(+), 424 deletions(-) create mode 100644 test/test_iterator_deprecated.cu diff --git a/cmake/CubBuildCompilerTargets.cmake b/cmake/CubBuildCompilerTargets.cmake index 398dba0ad4..45b640ecbc 100644 --- a/cmake/CubBuildCompilerTargets.cmake +++ b/cmake/CubBuildCompilerTargets.cmake @@ -57,11 +57,6 @@ function(cub_build_compiler_targets) # This complains about functions in CUDA system headers when used with nvcc. append_option_if_available("-Wno-unused-function" cxx_compile_options) - - # CUB uses deprecated texture functions (cudaBindTexture, etc). These - # need to be replaced, but silence the warnings for now. - # This can be removed once NVIDIA/cub#191 is fixed. - append_option_if_available("-Wno-deprecated-declarations" cxx_compile_options) endif() if ("GNU" STREQUAL "${CMAKE_CXX_COMPILER_ID}") @@ -115,10 +110,5 @@ function(cub_build_compiler_targets) $<$:-Xcudafe=--promote_warnings> # Don't complain about deprecated GPU targets. $<$:-Wno-deprecated-gpu-targets> - # Suppress deprecation warnings in nvcc < 11.5. - # TexRefInputIterator uses deprecated CUDART APIs, see NVIDIA/cub#191. - # After 11.5, we will suppress these in-code via pragma, but for older nvcc - # we have to use the big hammer: - $<$,$,11.5>>:-Wno-deprecated-declarations> ) endfunction() diff --git a/cub/iterator/tex_obj_input_iterator.cuh b/cub/iterator/tex_obj_input_iterator.cuh index e289eefcf8..e7659d43c9 100644 --- a/cub/iterator/tex_obj_input_iterator.cuh +++ b/cub/iterator/tex_obj_input_iterator.cuh @@ -306,6 +306,9 @@ public: /// ostream operator friend std::ostream& operator<<(std::ostream& os, const self_type& itr) { + os << "cub::TexObjInputIterator( ptr=" << itr.ptr + << ", offset=" << itr.tex_offset + << ", tex_obj=" << itr.tex_obj << " )"; return os; } diff --git a/cub/iterator/tex_ref_input_iterator.cuh b/cub/iterator/tex_ref_input_iterator.cuh index ecb9b5be4f..bcabe2c9b3 100644 --- a/cub/iterator/tex_ref_input_iterator.cuh +++ b/cub/iterator/tex_ref_input_iterator.cuh @@ -33,146 +33,20 @@ #pragma once -#include -#include +#include +#include -#include "../thread/thread_load.cuh" -#include "../thread/thread_store.cuh" -#include "../util_device.cuh" -#include "../util_debug.cuh" -#include "../config.cuh" - -#if (CUDART_VERSION >= 5050) || defined(DOXYGEN_ACTIVE) // This iterator is compatible with CUDA 5.5 and newer - -#if (THRUST_VERSION >= 100700) // This iterator is compatible with Thrust API 1.7 and newer - #include - #include -#endif // THRUST_VERSION +#include CUB_NAMESPACE_BEGIN - -/****************************************************************************** - * Static file-scope Tesla/Fermi-style texture references - *****************************************************************************/ - -#ifndef DOXYGEN_SHOULD_SKIP_THIS // Do not document - -// Anonymous namespace -namespace { - -/// Global texture reference specialized by type -template -struct CUB_DEPRECATED IteratorTexRef -{ - -// This class uses the deprecated cudaBindTexture / cudaUnbindTexture APIs. -// See issue NVIDIA/cub#191. -// Turn off deprecation warnings when compiling class implementation in favor -// of deprecating TexRefInputIterator instead. -#if CUB_HOST_COMPILER == CUB_HOST_COMPILER_MSVC -#pragma warning(disable:4996) -#elif CUB_HOST_COMPILER == CUB_HOST_COMPILER_GCC || \ - CUB_HOST_COMPILER == CUB_HOST_COMPILER_CLANG -#pragma GCC diagnostic push -#pragma GCC diagnostic ignored "-Wdeprecated-declarations" -#endif - -#ifdef __NVCC_DIAG_PRAGMA_SUPPORT__ -#pragma nv_diagnostic push -#pragma nv_diag_suppress 1215 -#endif - - /// And by unique ID - template - struct TexId - { - // Largest texture word we can use in device - typedef typename UnitWord::DeviceWord DeviceWord; - typedef typename UnitWord::TextureWord TextureWord; - - // Number of texture words per T - enum { - DEVICE_MULTIPLE = sizeof(T) / sizeof(DeviceWord), - TEXTURE_MULTIPLE = sizeof(T) / sizeof(TextureWord) - }; - - // Texture reference type - typedef texture TexRef; - - // Texture reference - static TexRef ref; - - /// Bind texture - static cudaError_t BindTexture(void *d_in, size_t &bytes, size_t &offset) - { - if (d_in) - { - cudaChannelFormatDesc tex_desc = cudaCreateChannelDesc(); - ref.channelDesc = tex_desc; - return (CubDebug(cudaBindTexture(&offset, ref, d_in, bytes))); - } - - return cudaSuccess; - } - - /// Unbind texture - static cudaError_t UnbindTexture() - { - return CubDebug(cudaUnbindTexture(ref)); - } - - /// Fetch element - template - static __device__ __forceinline__ T Fetch(Distance tex_offset) - { - DeviceWord temp[DEVICE_MULTIPLE]; - TextureWord *words = reinterpret_cast(temp); - - #pragma unroll - for (int i = 0; i < TEXTURE_MULTIPLE; ++i) - { - words[i] = tex1Dfetch(ref, (tex_offset * TEXTURE_MULTIPLE) + i); - } - - return reinterpret_cast(temp); - } - }; -}; - -// Texture reference definitions -template -template -typename IteratorTexRef::template TexId::TexRef IteratorTexRef::template TexId::ref = 0; - -// Re-enable deprecation warnings: -#ifdef __NVCC_DIAG_PRAGMA_SUPPORT__ -#pragma nv_diagnostic pop -#endif - -#if CUB_HOST_COMPILER == CUB_HOST_COMPILER_MSVC -#pragma warning(default:4996) -#elif CUB_HOST_COMPILER == CUB_HOST_COMPILER_GCC || \ - CUB_HOST_COMPILER == CUB_HOST_COMPILER_CLANG -#pragma GCC diagnostic pop -#endif - -} // Anonymous namespace - - -#endif // DOXYGEN_SHOULD_SKIP_THIS - - - /** * \addtogroup UtilIterator * @{ */ - - /** - * \brief A random-access input wrapper for dereferencing array values through texture cache. Uses older Tesla/Fermi-style texture references. + * \brief A random-access input wrapper for dereferencing array values through texture cache. * * \deprecated [Since 1.13.0] The CUDA texture management APIs used by * TexRefInputIterator are deprecated. Use cub::TexObjInputIterator instead. @@ -225,212 +99,10 @@ typename IteratorTexRef::template TexId::TexRef IteratorTexRef: */ template < typename T, - int UNIQUE_ID, + int /*UNIQUE_ID*/, typename OffsetT = ptrdiff_t> -class CUB_DEPRECATED TexRefInputIterator -{ - -// This class uses the deprecated cudaBindTexture / cudaUnbindTexture APIs. -// See issue NVIDIA/cub#191. -// Turn off deprecation warnings when compiling class implementation in favor -// of deprecating TexRefInputIterator instead. -#if CUB_HOST_COMPILER == CUB_HOST_COMPILER_MSVC -#pragma warning(disable:4996) -#elif CUB_HOST_COMPILER == CUB_HOST_COMPILER_GCC || \ - CUB_HOST_COMPILER == CUB_HOST_COMPILER_CLANG -#pragma GCC diagnostic push -#pragma GCC diagnostic ignored "-Wdeprecated-declarations" -#endif - -#ifdef __NVCC_DIAG_PRAGMA_SUPPORT__ -#pragma nv_diagnostic push -#pragma nv_diag_suppress 1215 -#endif - -public: - - // Required iterator traits - typedef TexRefInputIterator self_type; ///< My own type - typedef OffsetT difference_type; ///< Type to express the result of subtracting one iterator from another - typedef T value_type; ///< The type of the element the iterator can point to - typedef T* pointer; ///< The type of a pointer to an element the iterator can point to - typedef T reference; ///< The type of a reference to an element the iterator can point to - -#if (THRUST_VERSION >= 100700) - // Use Thrust's iterator categories so we can use these iterators in Thrust 1.7 (or newer) methods - typedef typename THRUST_NS_QUALIFIER::detail::iterator_facade_category< - THRUST_NS_QUALIFIER::device_system_tag, - THRUST_NS_QUALIFIER::random_access_traversal_tag, - value_type, - reference - >::type iterator_category; ///< The iterator category -#else - typedef std::random_access_iterator_tag iterator_category; ///< The iterator category -#endif // THRUST_VERSION - -private: - - T* ptr; - difference_type tex_offset; - - // Texture reference wrapper (old Tesla/Fermi-style textures) - typedef typename IteratorTexRef::template TexId TexId; - -public: -/* - /// Constructor - __host__ __device__ __forceinline__ TexRefInputIterator() - : - ptr(NULL), - tex_offset(0) - {} -*/ - /// Use this iterator to bind \p ptr with a texture reference - template - cudaError_t BindTexture( - QualifiedT *ptr, ///< Native pointer to wrap that is aligned to cudaDeviceProp::textureAlignment - size_t bytes, ///< Number of bytes in the range - size_t tex_offset = 0) ///< OffsetT (in items) from \p ptr denoting the position of the iterator - { - this->ptr = const_cast::type *>(ptr); - size_t offset; - cudaError_t retval = TexId::BindTexture(this->ptr + tex_offset, bytes, offset); - this->tex_offset = (difference_type) (offset / sizeof(QualifiedT)); - return retval; - } - - /// Unbind this iterator from its texture reference - cudaError_t UnbindTexture() - { - return TexId::UnbindTexture(); - } - - /// Postfix increment - __host__ __device__ __forceinline__ self_type operator++(int) - { - self_type retval = *this; - tex_offset++; - return retval; - } - - /// Prefix increment - __host__ __device__ __forceinline__ self_type operator++() - { - tex_offset++; - return *this; - } - - /// Indirection - __host__ __device__ __forceinline__ reference operator*() const - { - if (CUB_IS_HOST_CODE) { - // Simply dereference the pointer on the host - return ptr[tex_offset]; - } else { - #if CUB_INCLUDE_DEVICE_CODE - // Use the texture reference - return TexId::Fetch(tex_offset); - #else - // This is dead code that will never be executed. It is here - // only to avoid warnings about missing returns. - return ptr[tex_offset]; - #endif - } - } - - /// Addition - template - __host__ __device__ __forceinline__ self_type operator+(Distance n) const - { - self_type retval; - retval.ptr = ptr; - retval.tex_offset = tex_offset + n; - return retval; - } - - /// Addition assignment - template - __host__ __device__ __forceinline__ self_type& operator+=(Distance n) - { - tex_offset += n; - return *this; - } - - /// Subtraction - template - __host__ __device__ __forceinline__ self_type operator-(Distance n) const - { - self_type retval; - retval.ptr = ptr; - retval.tex_offset = tex_offset - n; - return retval; - } - - /// Subtraction assignment - template - __host__ __device__ __forceinline__ self_type& operator-=(Distance n) - { - tex_offset -= n; - return *this; - } - - /// Distance - __host__ __device__ __forceinline__ difference_type operator-(self_type other) const - { - return tex_offset - other.tex_offset; - } - - /// Array subscript - template - __host__ __device__ __forceinline__ reference operator[](Distance n) const - { - self_type offset = (*this) + n; - return *offset; - } - - /// Structure dereference - __host__ __device__ __forceinline__ pointer operator->() - { - return &(*(*this)); - } - - /// Equal to - __host__ __device__ __forceinline__ bool operator==(const self_type& rhs) - { - return ((ptr == rhs.ptr) && (tex_offset == rhs.tex_offset)); - } - - /// Not equal to - __host__ __device__ __forceinline__ bool operator!=(const self_type& rhs) - { - return ((ptr != rhs.ptr) || (tex_offset != rhs.tex_offset)); - } - - /// ostream operator - friend std::ostream& operator<<(std::ostream& os, const self_type& /*itr*/) - { - return os; - } - -// Re-enable deprecation warnings: - -#ifdef __NVCC_DIAG_PRAGMA_SUPPORT__ -#pragma nv_diagnostic pop -#endif - -#if CUB_HOST_COMPILER == CUB_HOST_COMPILER_MSVC -#pragma warning(default:4996) -#elif CUB_HOST_COMPILER == CUB_HOST_COMPILER_GCC || \ - CUB_HOST_COMPILER == CUB_HOST_COMPILER_CLANG -#pragma GCC diagnostic pop -#endif - -}; - - +using TexRefInputIterator CUB_DEPRECATED = cub::TexObjInputIterator; /** @} */ // end group UtilIterator CUB_NAMESPACE_END - -#endif // CUDART_VERSION diff --git a/test/test_iterator.cu b/test/test_iterator.cu index 1eb6f10730..1b5a6c6d03 100644 --- a/test/test_iterator.cu +++ b/test/test_iterator.cu @@ -43,7 +43,6 @@ #include #include #include -#include #include #include @@ -374,70 +373,6 @@ void TestTexObj() if (d_dummy) CubDebugExit(g_allocator.DeviceFree(d_dummy)); } - -#if CUDART_VERSION >= 5050 - -/** - * Test tex-ref texture iterator - */ -template -void TestTexRef() -{ - printf("\nTesting tex-ref iterator on type %s\n", typeid(T).name()); fflush(stdout); - - // - // Test iterator manipulation in kernel - // - - constexpr int TEST_VALUES = 11000; - constexpr unsigned int DUMMY_OFFSET = 500; - constexpr unsigned int DUMMY_TEST_VALUES = TEST_VALUES - DUMMY_OFFSET; - - T *h_data = new T[TEST_VALUES]; - for (int i = 0; i < TEST_VALUES; ++i) - { - RandomBits(h_data[i]); - } - - // Allocate device arrays - T *d_data = NULL; - T *d_dummy = NULL; - CubDebugExit(g_allocator.DeviceAllocate((void**)&d_data, sizeof(T) * TEST_VALUES)); - CubDebugExit(cudaMemcpy(d_data, h_data, sizeof(T) * TEST_VALUES, cudaMemcpyHostToDevice)); - - CubDebugExit(g_allocator.DeviceAllocate((void**)&d_dummy, sizeof(T) * DUMMY_TEST_VALUES)); - CubDebugExit(cudaMemcpy(d_dummy, h_data + DUMMY_OFFSET, sizeof(T) * DUMMY_TEST_VALUES, cudaMemcpyHostToDevice)); - - // Initialize reference data - T h_reference[8]; - h_reference[0] = h_data[0]; // Value at offset 0 - h_reference[1] = h_data[100]; // Value at offset 100 - h_reference[2] = h_data[1000]; // Value at offset 1000 - h_reference[3] = h_data[10000]; // Value at offset 10000 - h_reference[4] = h_data[1]; // Value at offset 1 - h_reference[5] = h_data[21]; // Value at offset 21 - h_reference[6] = h_data[11]; // Value at offset 11 - h_reference[7] = h_data[0]; // Value at offset 0; - - // Create and bind ref-based test iterator - TexRefInputIterator d_ref_itr; - CubDebugExit(d_ref_itr.BindTexture((CastT*) d_data, sizeof(T) * TEST_VALUES)); - - // Create and bind dummy iterator of same type to check with interferance - TexRefInputIterator d_ref_itr2; - CubDebugExit(d_ref_itr2.BindTexture((CastT*) d_dummy, sizeof(T) * DUMMY_TEST_VALUES)); - - Test(d_ref_itr, h_reference); - - CubDebugExit(d_ref_itr.UnbindTexture()); - CubDebugExit(d_ref_itr2.UnbindTexture()); - - if (h_data) delete[] h_data; - if (d_data) CubDebugExit(g_allocator.DeviceFree(d_data)); - if (d_dummy) CubDebugExit(g_allocator.DeviceFree(d_dummy)); -} - - /** * Test texture transform iterator */ @@ -492,11 +427,6 @@ void TestTexTransform() if (d_data) CubDebugExit(g_allocator.DeviceFree(d_data)); } -#endif // CUDART_VERSION - - - - /** * Run non-integer tests */ @@ -505,17 +435,8 @@ void Test(Int2Type /* is_integer */) { TestModified(); TestTransform(); - -#if CUB_CDP - // Test tex-obj iterators if CUDA dynamic parallelism enabled - TestTexObj(type_string); -#endif // CUB_CDP - -#if CUDART_VERSION >= 5050 - // Test tex-ref iterators for CUDA 5.5 - TestTexRef(); + TestTexObj(); TestTexTransform(); -#endif // CUDART_VERSION } /** diff --git a/test/test_iterator_deprecated.cu b/test/test_iterator_deprecated.cu new file mode 100644 index 0000000000..b42febe51b --- /dev/null +++ b/test/test_iterator_deprecated.cu @@ -0,0 +1,306 @@ +/****************************************************************************** + * Copyright (c) 2011, Duane Merrill. All rights reserved. + * Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of the NVIDIA CORPORATION nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + ******************************************************************************/ + +/****************************************************************************** + * Test of iterator utilities + ******************************************************************************/ + +// Ensure printing of CUDA runtime errors to console +#define CUB_STDERR + +// This file tests deprecated CUB APIs. Silence deprecation warnings: +#define CUB_IGNORE_DEPRECATED_API + +#include +#include +#include + +#include +#include +#include + +#include "test_util.h" + +using namespace cub; + +//--------------------------------------------------------------------- +// Globals, constants and typedefs +//--------------------------------------------------------------------- + +bool g_verbose = false; +CachingDeviceAllocator g_allocator(true); + +//--------------------------------------------------------------------- +// Test kernels +//--------------------------------------------------------------------- + +/** + * Test random access input iterator + */ +template < + typename InputIteratorT, + typename T> +__global__ void Kernel( + InputIteratorT d_in, + T *d_out, + InputIteratorT *d_itrs) +{ + d_out[0] = *d_in; // Value at offset 0 + d_out[1] = d_in[100]; // Value at offset 100 + d_out[2] = *(d_in + 1000); // Value at offset 1000 + d_out[3] = *(d_in + 10000); // Value at offset 10000 + + d_in++; + d_out[4] = d_in[0]; // Value at offset 1 + + d_in += 20; + d_out[5] = d_in[0]; // Value at offset 21 + d_itrs[0] = d_in; // Iterator at offset 21 + + d_in -= 10; + d_out[6] = d_in[0]; // Value at offset 11; + + d_in -= 11; + d_out[7] = d_in[0]; // Value at offset 0 + d_itrs[1] = d_in; // Iterator at offset 0 +} + + + +//--------------------------------------------------------------------- +// Host testing subroutines +//--------------------------------------------------------------------- + + +/** + * Run iterator test on device + */ +template < + typename InputIteratorT, + typename T, + int TEST_VALUES> +void Test( + InputIteratorT d_in, + T (&h_reference)[TEST_VALUES]) +{ + // Allocate device arrays + T *d_out = NULL; + InputIteratorT *d_itrs = NULL; + CubDebugExit(g_allocator.DeviceAllocate((void**)&d_out, sizeof(T) * TEST_VALUES)); + CubDebugExit(g_allocator.DeviceAllocate((void**)&d_itrs, sizeof(InputIteratorT) * 2)); + + int compare; + + // Run unguarded kernel + Kernel<<<1, 1>>>(d_in, d_out, d_itrs); + + CubDebugExit(cudaPeekAtLastError()); + CubDebugExit(cudaDeviceSynchronize()); + + // Check results + compare = CompareDeviceResults(h_reference, d_out, TEST_VALUES, g_verbose, g_verbose); + printf("\tValues: %s\n", (compare) ? "FAIL" : "PASS"); + AssertEquals(0, compare); + + // Check iterator at offset 21 + InputIteratorT h_itr = d_in + 21; + compare = CompareDeviceResults(&h_itr, d_itrs, 1, g_verbose, g_verbose); + printf("\tIterators: %s\n", (compare) ? "FAIL" : "PASS"); + AssertEquals(0, compare); + + // Check iterator at offset 0 + compare = CompareDeviceResults(&d_in, d_itrs + 1, 1, g_verbose, g_verbose); + printf("\tIterators: %s\n", (compare) ? "FAIL" : "PASS"); + AssertEquals(0, compare); + + // Cleanup + if (d_out) + { + CubDebugExit(g_allocator.DeviceFree(d_out)); + } + if (d_itrs) + { + CubDebugExit(g_allocator.DeviceFree(d_itrs)); + } +} + +/** + * Test tex-ref texture iterator + */ +template +void TestTexRef() +{ + printf("\nTesting tex-ref iterator on type %s\n", typeid(T).name()); fflush(stdout); + + // + // Test iterator manipulation in kernel + // + + constexpr int TEST_VALUES = 11000; + constexpr unsigned int DUMMY_OFFSET = 500; + constexpr unsigned int DUMMY_TEST_VALUES = TEST_VALUES - DUMMY_OFFSET; + + T *h_data = new T[TEST_VALUES]; + for (int i = 0; i < TEST_VALUES; ++i) + { + RandomBits(h_data[i]); + } + + // Allocate device arrays + T *d_data = NULL; + T *d_dummy = NULL; + CubDebugExit(g_allocator.DeviceAllocate((void**)&d_data, sizeof(T) * TEST_VALUES)); + CubDebugExit(cudaMemcpy(d_data, h_data, sizeof(T) * TEST_VALUES, cudaMemcpyHostToDevice)); + + CubDebugExit(g_allocator.DeviceAllocate((void**)&d_dummy, sizeof(T) * DUMMY_TEST_VALUES)); + CubDebugExit(cudaMemcpy(d_dummy, h_data + DUMMY_OFFSET, sizeof(T) * DUMMY_TEST_VALUES, cudaMemcpyHostToDevice)); + + // Initialize reference data + T h_reference[8]; + h_reference[0] = h_data[0]; // Value at offset 0 + h_reference[1] = h_data[100]; // Value at offset 100 + h_reference[2] = h_data[1000]; // Value at offset 1000 + h_reference[3] = h_data[10000]; // Value at offset 10000 + h_reference[4] = h_data[1]; // Value at offset 1 + h_reference[5] = h_data[21]; // Value at offset 21 + h_reference[6] = h_data[11]; // Value at offset 11 + h_reference[7] = h_data[0]; // Value at offset 0; + + // Create and bind ref-based test iterator + TexRefInputIterator d_ref_itr; + CubDebugExit(d_ref_itr.BindTexture((CastT*) d_data, sizeof(T) * TEST_VALUES)); + + // Create and bind dummy iterator of same type to check with interferance + TexRefInputIterator d_ref_itr2; + CubDebugExit(d_ref_itr2.BindTexture((CastT*) d_dummy, sizeof(T) * DUMMY_TEST_VALUES)); + + Test(d_ref_itr, h_reference); + + CubDebugExit(d_ref_itr.UnbindTexture()); + CubDebugExit(d_ref_itr2.UnbindTexture()); + + if (h_data) + { + delete[] h_data; + } + if (d_data) + { + CubDebugExit(g_allocator.DeviceFree(d_data)); + } + if (d_dummy) + { + CubDebugExit(g_allocator.DeviceFree(d_dummy)); + } +} + +/** + * Run non-integer tests + */ +template +void Test() +{ + TestTexRef(); +} + +/** + * Run tests + */ +template +void Test() +{ + // Test non-const type + Test(); + + // Test non-const type + Test(); +} + + +/** + * Main + */ +int main(int argc, char** argv) +{ + // Initialize command line + CommandLineArgs args(argc, argv); + g_verbose = args.CheckCmdLineFlag("v"); + + // Print usage + if (args.CheckCmdLineFlag("help")) + { + printf("%s " + "[--device=] " + "[--v] " + "\n", argv[0]); + exit(0); + } + + // Initialize device + CubDebugExit(args.DeviceInit()); + + // Evaluate different data types + Test(); + Test(); + Test(); + Test(); + Test(); + Test(); + Test(); + + Test(); + Test(); + Test(); + Test(); + Test(); + Test(); + Test(); + + Test(); + Test(); + Test(); + Test(); + Test(); + Test(); + Test(); + + Test(); + Test(); + Test(); + Test(); + Test(); + Test(); + Test(); + + Test(); + Test(); + + printf("\nTest complete\n"); + fflush(stdout); + + return 0; +}