diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index e993f548e1d..1e1ad94ab0b 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -72,7 +72,7 @@ repos: args: ['-fallback-style=none'] - id: cmake-format name: cmake-format - entry: bash cpp/scripts/run-cmake-format.sh cmake-format + entry: ./cpp/scripts/run-cmake-format.sh cmake-format language: python types: [cmake] # Note that pre-commit autoupdate does not update the versions @@ -81,7 +81,7 @@ repos: - cmake-format==0.6.11 - id: cmake-lint name: cmake-lint - entry: bash cpp/scripts/run-cmake-format.sh cmake-lint + entry: ./cpp/scripts/run-cmake-format.sh cmake-lint language: python types: [cmake] # Note that pre-commit autoupdate does not update the versions diff --git a/CHANGELOG.md b/CHANGELOG.md index 4dd94954a82..3ccc1ccbc8b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,9 +6,262 @@ Please see https://github.com/rapidsai/cudf/releases/tag/v22.02.00a for the late Please see https://github.com/rapidsai/cudf/releases/tag/v21.12.00a for the latest changes to this development branch. -# cuDF 21.10.00 (Date TBD) +# cuDF 21.10.00 (7 Oct 2021) -Please see https://github.com/rapidsai/cudf/releases/tag/v21.10.00a for the latest changes to this development branch. +## 🚨 Breaking Changes + +- Remove Cython APIs for table view generation ([#9199](https://github.com/rapidsai/cudf/pull/9199)) [@vyasr](https://github.com/vyasr) +- Upgrade `pandas` version in `cudf` ([#9147](https://github.com/rapidsai/cudf/pull/9147)) [@galipremsagar](https://github.com/galipremsagar) +- Make AST operators nullable ([#9096](https://github.com/rapidsai/cudf/pull/9096)) [@vyasr](https://github.com/vyasr) +- Remove the option to pass data types as strings to `read_csv` and `read_json` ([#9079](https://github.com/rapidsai/cudf/pull/9079)) [@vuule](https://github.com/vuule) +- Update JNI java CSV APIs to not use deprecated API ([#9066](https://github.com/rapidsai/cudf/pull/9066)) [@revans2](https://github.com/revans2) +- Support additional format specifiers in from_timestamps ([#9047](https://github.com/rapidsai/cudf/pull/9047)) [@davidwendt](https://github.com/davidwendt) +- Expose expression base class publicly and simplify public AST API ([#9045](https://github.com/rapidsai/cudf/pull/9045)) [@vyasr](https://github.com/vyasr) +- Add support for struct type in ORC writer ([#9025](https://github.com/rapidsai/cudf/pull/9025)) [@vuule](https://github.com/vuule) +- Remove aliases of various api.types APIs from utils.dtypes. ([#9011](https://github.com/rapidsai/cudf/pull/9011)) [@vyasr](https://github.com/vyasr) +- Java bindings for conditional join output sizes ([#9002](https://github.com/rapidsai/cudf/pull/9002)) [@jlowe](https://github.com/jlowe) +- Move compute_column API out of ast namespace ([#8957](https://github.com/rapidsai/cudf/pull/8957)) [@vyasr](https://github.com/vyasr) +- `cudf.dtype` function ([#8949](https://github.com/rapidsai/cudf/pull/8949)) [@shwina](https://github.com/shwina) +- Refactor Frame reductions ([#8944](https://github.com/rapidsai/cudf/pull/8944)) [@vyasr](https://github.com/vyasr) +- Add nested column selection to parquet reader ([#8933](https://github.com/rapidsai/cudf/pull/8933)) [@devavret](https://github.com/devavret) +- JNI Aggregation Type Changes ([#8919](https://github.com/rapidsai/cudf/pull/8919)) [@revans2](https://github.com/revans2) +- Add groupby_aggregation and groupby_scan_aggregation classes and force their usage. ([#8906](https://github.com/rapidsai/cudf/pull/8906)) [@nvdbaranec](https://github.com/nvdbaranec) +- Expand CSV and JSON reader APIs to accept `dtypes` as a vector or map of `data_type` objects ([#8856](https://github.com/rapidsai/cudf/pull/8856)) [@vuule](https://github.com/vuule) +- Change cudf docs theme to pydata theme ([#8746](https://github.com/rapidsai/cudf/pull/8746)) [@galipremsagar](https://github.com/galipremsagar) +- Enable compiled binary ops in libcudf, python and java ([#8741](https://github.com/rapidsai/cudf/pull/8741)) [@karthikeyann](https://github.com/karthikeyann) +- Make groupby transform-like op order match original data order ([#8720](https://github.com/rapidsai/cudf/pull/8720)) [@isVoid](https://github.com/isVoid) + +## 🐛 Bug Fixes + +- `fixed_point` `cudf::groupby` for `mean` aggregation ([#9296](https://github.com/rapidsai/cudf/pull/9296)) [@codereport](https://github.com/codereport) +- Fix `interleave_columns` when the input string lists column having empty child column ([#9292](https://github.com/rapidsai/cudf/pull/9292)) [@ttnghia](https://github.com/ttnghia) +- Update nvcomp to include fixes for installation of headers ([#9276](https://github.com/rapidsai/cudf/pull/9276)) [@devavret](https://github.com/devavret) +- Fix Java column leak in testParquetWriteMap ([#9271](https://github.com/rapidsai/cudf/pull/9271)) [@jlowe](https://github.com/jlowe) +- Fix call to thrust::reduce_by_key in argmin/argmax libcudf groupby ([#9263](https://github.com/rapidsai/cudf/pull/9263)) [@davidwendt](https://github.com/davidwendt) +- Fixing empty input to getMapValue crashing ([#9262](https://github.com/rapidsai/cudf/pull/9262)) [@hyperbolic2346](https://github.com/hyperbolic2346) +- Fix duplicate names issue in `MultiIndex.deserialize ` ([#9258](https://github.com/rapidsai/cudf/pull/9258)) [@galipremsagar](https://github.com/galipremsagar) +- `Dataframe.sort_index` optimizations ([#9238](https://github.com/rapidsai/cudf/pull/9238)) [@galipremsagar](https://github.com/galipremsagar) +- Temporarily disabling problematic test in parquet writer ([#9230](https://github.com/rapidsai/cudf/pull/9230)) [@devavret](https://github.com/devavret) +- Explicitly disable groupby on unsupported key types. ([#9227](https://github.com/rapidsai/cudf/pull/9227)) [@mythrocks](https://github.com/mythrocks) +- Fix `gather` for sliced input structs column ([#9218](https://github.com/rapidsai/cudf/pull/9218)) [@ttnghia](https://github.com/ttnghia) +- Fix JNI code for left semi and anti joins ([#9207](https://github.com/rapidsai/cudf/pull/9207)) [@jlowe](https://github.com/jlowe) +- Only install thrust when using a non 'system' version ([#9206](https://github.com/rapidsai/cudf/pull/9206)) [@robertmaynard](https://github.com/robertmaynard) +- Remove zlib from libcudf public CMake dependencies ([#9204](https://github.com/rapidsai/cudf/pull/9204)) [@robertmaynard](https://github.com/robertmaynard) +- Fix out-of-bounds memory read in orc gpuEncodeOrcColumnData ([#9196](https://github.com/rapidsai/cudf/pull/9196)) [@davidwendt](https://github.com/davidwendt) +- Fix `gather()` for `STRUCT` inputs with no nulls in members. ([#9194](https://github.com/rapidsai/cudf/pull/9194)) [@mythrocks](https://github.com/mythrocks) +- get_cucollections properly uses rapids_cpm_find ([#9189](https://github.com/rapidsai/cudf/pull/9189)) [@robertmaynard](https://github.com/robertmaynard) +- rapids-export correctly reference build code block and doc strings ([#9186](https://github.com/rapidsai/cudf/pull/9186)) [@robertmaynard](https://github.com/robertmaynard) +- Fix logic while parsing the sum statistic for numerical orc columns ([#9183](https://github.com/rapidsai/cudf/pull/9183)) [@ayushdg](https://github.com/ayushdg) +- Add handling for nulls in `dask_cudf.sorting.quantile_divisions` ([#9171](https://github.com/rapidsai/cudf/pull/9171)) [@charlesbluca](https://github.com/charlesbluca) +- Approximate overflow detection in ORC statistics ([#9163](https://github.com/rapidsai/cudf/pull/9163)) [@vuule](https://github.com/vuule) +- Use decimal precision metadata when reading from parquet files ([#9162](https://github.com/rapidsai/cudf/pull/9162)) [@shwina](https://github.com/shwina) +- Fix variable name in Java build script ([#9161](https://github.com/rapidsai/cudf/pull/9161)) [@jlowe](https://github.com/jlowe) +- Import rapids-cmake modules using the correct cmake variable. ([#9149](https://github.com/rapidsai/cudf/pull/9149)) [@robertmaynard](https://github.com/robertmaynard) +- Fix conditional joins with empty left table ([#9146](https://github.com/rapidsai/cudf/pull/9146)) [@vyasr](https://github.com/vyasr) +- Fix joining on indexes with duplicate level names ([#9137](https://github.com/rapidsai/cudf/pull/9137)) [@shwina](https://github.com/shwina) +- Fixes missing child column name in dtype while reading ORC file. ([#9134](https://github.com/rapidsai/cudf/pull/9134)) [@rgsl888prabhu](https://github.com/rgsl888prabhu) +- Apply type metadata after column is slice-copied ([#9131](https://github.com/rapidsai/cudf/pull/9131)) [@isVoid](https://github.com/isVoid) +- Fix a bug: inner_join_size return zero if build table is empty ([#9128](https://github.com/rapidsai/cudf/pull/9128)) [@PointKernel](https://github.com/PointKernel) +- Fix multi hive-partition parquet reading in dask-cudf ([#9122](https://github.com/rapidsai/cudf/pull/9122)) [@rjzamora](https://github.com/rjzamora) +- Support null literals in expressions ([#9117](https://github.com/rapidsai/cudf/pull/9117)) [@vyasr](https://github.com/vyasr) +- Fix cudf::hash_join output size for struct joins ([#9107](https://github.com/rapidsai/cudf/pull/9107)) [@jlowe](https://github.com/jlowe) +- Import fix ([#9104](https://github.com/rapidsai/cudf/pull/9104)) [@shwina](https://github.com/shwina) +- Fix cudf::strings::is_fixed_point checking of overflow for decimal32 ([#9093](https://github.com/rapidsai/cudf/pull/9093)) [@davidwendt](https://github.com/davidwendt) +- Fix branch_stack calculation in `row_bit_count()` ([#9076](https://github.com/rapidsai/cudf/pull/9076)) [@mythrocks](https://github.com/mythrocks) +- Fetch rapids-cmake to work around cuCollection cmake issue ([#9075](https://github.com/rapidsai/cudf/pull/9075)) [@jlowe](https://github.com/jlowe) +- Fix compilation errors in groupby benchmarks. ([#9072](https://github.com/rapidsai/cudf/pull/9072)) [@nvdbaranec](https://github.com/nvdbaranec) +- Preserve float16 upscaling ([#9069](https://github.com/rapidsai/cudf/pull/9069)) [@galipremsagar](https://github.com/galipremsagar) +- Fix memcheck read error in libcudf contiguous_split ([#9067](https://github.com/rapidsai/cudf/pull/9067)) [@davidwendt](https://github.com/davidwendt) +- Add support for reading ORC file with no row group index ([#9060](https://github.com/rapidsai/cudf/pull/9060)) [@rgsl888prabhu](https://github.com/rgsl888prabhu) +- Various multiindex related fixes ([#9036](https://github.com/rapidsai/cudf/pull/9036)) [@shwina](https://github.com/shwina) +- Avoid rebuilding cython in build.sh ([#9034](https://github.com/rapidsai/cudf/pull/9034)) [@brandon-b-miller](https://github.com/brandon-b-miller) +- Add support for percentile dispatch in `dask_cudf` ([#9031](https://github.com/rapidsai/cudf/pull/9031)) [@galipremsagar](https://github.com/galipremsagar) +- cudf resolve nvcc 11.0 compiler crashes during codegen ([#9028](https://github.com/rapidsai/cudf/pull/9028)) [@robertmaynard](https://github.com/robertmaynard) +- Fetch correct grouping keys `agg` of dask groupby ([#9022](https://github.com/rapidsai/cudf/pull/9022)) [@galipremsagar](https://github.com/galipremsagar) +- Allow `where()` to work with a Series and `other=cudf.NA` ([#9019](https://github.com/rapidsai/cudf/pull/9019)) [@sarahyurick](https://github.com/sarahyurick) +- Use correct index when returning Series from `GroupBy.apply()` ([#9016](https://github.com/rapidsai/cudf/pull/9016)) [@charlesbluca](https://github.com/charlesbluca) +- Fix `Dataframe` indexer setitem when array is passed ([#9006](https://github.com/rapidsai/cudf/pull/9006)) [@galipremsagar](https://github.com/galipremsagar) +- Fix ORC reading of files with struct columns that have null values ([#9005](https://github.com/rapidsai/cudf/pull/9005)) [@vuule](https://github.com/vuule) +- Ensure JNI native libraries load when CompiledExpression loads ([#8997](https://github.com/rapidsai/cudf/pull/8997)) [@jlowe](https://github.com/jlowe) +- Fix memory read error in get_dremel_data in page_enc.cu ([#8995](https://github.com/rapidsai/cudf/pull/8995)) [@davidwendt](https://github.com/davidwendt) +- Fix memory write error in get_list_child_to_list_row_mapping utility ([#8994](https://github.com/rapidsai/cudf/pull/8994)) [@davidwendt](https://github.com/davidwendt) +- Fix debug compile error for csv_test.cpp ([#8981](https://github.com/rapidsai/cudf/pull/8981)) [@davidwendt](https://github.com/davidwendt) +- Fix memory read/write error in concatenate_lists_ignore_null ([#8978](https://github.com/rapidsai/cudf/pull/8978)) [@davidwendt](https://github.com/davidwendt) +- Fix concatenation of `cudf.RangeIndex` ([#8970](https://github.com/rapidsai/cudf/pull/8970)) [@galipremsagar](https://github.com/galipremsagar) +- Java conditional joins should not require matching column counts ([#8955](https://github.com/rapidsai/cudf/pull/8955)) [@jlowe](https://github.com/jlowe) +- Fix concatenate empty structs ([#8947](https://github.com/rapidsai/cudf/pull/8947)) [@sperlingxx](https://github.com/sperlingxx) +- Fix cuda-memcheck errors for some libcudf functions ([#8941](https://github.com/rapidsai/cudf/pull/8941)) [@davidwendt](https://github.com/davidwendt) +- Apply series name to result of `SeriesGroupby.apply()` ([#8939](https://github.com/rapidsai/cudf/pull/8939)) [@charlesbluca](https://github.com/charlesbluca) +- `cdef packed_columns` as `cppclass` instead of `struct` ([#8936](https://github.com/rapidsai/cudf/pull/8936)) [@charlesbluca](https://github.com/charlesbluca) +- Inserting a `cudf.NA` into a DataFrame ([#8923](https://github.com/rapidsai/cudf/pull/8923)) [@sarahyurick](https://github.com/sarahyurick) +- Support casting with Pandas dtype aliases ([#8920](https://github.com/rapidsai/cudf/pull/8920)) [@sarahyurick](https://github.com/sarahyurick) +- Allow `sort_values` to accept same `kind` values as Pandas ([#8912](https://github.com/rapidsai/cudf/pull/8912)) [@sarahyurick](https://github.com/sarahyurick) +- Enable casting to pandas nullable dtypes ([#8889](https://github.com/rapidsai/cudf/pull/8889)) [@brandon-b-miller](https://github.com/brandon-b-miller) +- Fix libcudf memory errors ([#8884](https://github.com/rapidsai/cudf/pull/8884)) [@karthikeyann](https://github.com/karthikeyann) +- Throw KeyError when accessing field from struct with nonexistent key ([#8880](https://github.com/rapidsai/cudf/pull/8880)) [@NV-jpt](https://github.com/NV-jpt) +- replace auto with auto& ref for cast<&> ([#8866](https://github.com/rapidsai/cudf/pull/8866)) [@karthikeyann](https://github.com/karthikeyann) +- Add missing include<optional> in binops ([#8864](https://github.com/rapidsai/cudf/pull/8864)) [@karthikeyann](https://github.com/karthikeyann) +- Fix `select_dtypes` to work when non-class dtypes present in dataframe ([#8849](https://github.com/rapidsai/cudf/pull/8849)) [@sarahyurick](https://github.com/sarahyurick) +- Re-enable JSON tests ([#8843](https://github.com/rapidsai/cudf/pull/8843)) [@vuule](https://github.com/vuule) +- Support header with embedded delimiter in csv writer ([#8798](https://github.com/rapidsai/cudf/pull/8798)) [@davidwendt](https://github.com/davidwendt) + +## 📖 Documentation + +- Add IO docs page in `cudf` documentation ([#9145](https://github.com/rapidsai/cudf/pull/9145)) [@galipremsagar](https://github.com/galipremsagar) +- use correct namespace in cuio code examples ([#9037](https://github.com/rapidsai/cudf/pull/9037)) [@cwharris](https://github.com/cwharris) +- Restructuring `Contributing doc` ([#9026](https://github.com/rapidsai/cudf/pull/9026)) [@iskode](https://github.com/iskode) +- Update stable version in readme ([#9008](https://github.com/rapidsai/cudf/pull/9008)) [@galipremsagar](https://github.com/galipremsagar) +- Add spans and more include guidelines to libcudf developer guide ([#8931](https://github.com/rapidsai/cudf/pull/8931)) [@harrism](https://github.com/harrism) +- Update Java build instructions to mention Arrow S3 and Docker ([#8867](https://github.com/rapidsai/cudf/pull/8867)) [@jlowe](https://github.com/jlowe) +- List GDS-enabled formats in the docs ([#8805](https://github.com/rapidsai/cudf/pull/8805)) [@vuule](https://github.com/vuule) +- Change cudf docs theme to pydata theme ([#8746](https://github.com/rapidsai/cudf/pull/8746)) [@galipremsagar](https://github.com/galipremsagar) + +## 🚀 New Features + +- Revert "Add shallow hash function and shallow equality comparison for column_view ([#9185)" (#9283](https://github.com/rapidsai/cudf/pull/9185)" (#9283)) [@karthikeyann](https://github.com/karthikeyann) +- Align `DataFrame.apply` signature with pandas ([#9275](https://github.com/rapidsai/cudf/pull/9275)) [@brandon-b-miller](https://github.com/brandon-b-miller) +- Add struct type support for `drop_list_duplicates` ([#9202](https://github.com/rapidsai/cudf/pull/9202)) [@ttnghia](https://github.com/ttnghia) +- support CUDA async memory resource in JNI ([#9201](https://github.com/rapidsai/cudf/pull/9201)) [@rongou](https://github.com/rongou) +- Add shallow hash function and shallow equality comparison for column_view ([#9185](https://github.com/rapidsai/cudf/pull/9185)) [@karthikeyann](https://github.com/karthikeyann) +- Superimpose null masks for STRUCT columns. ([#9144](https://github.com/rapidsai/cudf/pull/9144)) [@mythrocks](https://github.com/mythrocks) +- Implemented bindings for `ceil` timestamp operation ([#9141](https://github.com/rapidsai/cudf/pull/9141)) [@shaneding](https://github.com/shaneding) +- Adding MAP type support for ORC Reader ([#9132](https://github.com/rapidsai/cudf/pull/9132)) [@rgsl888prabhu](https://github.com/rgsl888prabhu) +- Implement `interleave_columns` for lists with arbitrary nested type ([#9130](https://github.com/rapidsai/cudf/pull/9130)) [@ttnghia](https://github.com/ttnghia) +- Add python bindings to fixed-size window and groupby `rolling.var`, `rolling.std` ([#9097](https://github.com/rapidsai/cudf/pull/9097)) [@isVoid](https://github.com/isVoid) +- Make AST operators nullable ([#9096](https://github.com/rapidsai/cudf/pull/9096)) [@vyasr](https://github.com/vyasr) +- Java bindings for approx_percentile ([#9094](https://github.com/rapidsai/cudf/pull/9094)) [@andygrove](https://github.com/andygrove) +- Add `dseries.struct.explode` ([#9086](https://github.com/rapidsai/cudf/pull/9086)) [@isVoid](https://github.com/isVoid) +- Add support for BaseIndexer in Rolling APIs ([#9085](https://github.com/rapidsai/cudf/pull/9085)) [@galipremsagar](https://github.com/galipremsagar) +- Remove the option to pass data types as strings to `read_csv` and `read_json` ([#9079](https://github.com/rapidsai/cudf/pull/9079)) [@vuule](https://github.com/vuule) +- Add handling for nested dicts in dask-cudf groupby ([#9054](https://github.com/rapidsai/cudf/pull/9054)) [@charlesbluca](https://github.com/charlesbluca) +- Added Series.dt.is_quarter_start and Series.dt.is_quarter_end ([#9046](https://github.com/rapidsai/cudf/pull/9046)) [@TravisHester](https://github.com/TravisHester) +- Support nested types for nth_element reduction ([#9043](https://github.com/rapidsai/cudf/pull/9043)) [@sperlingxx](https://github.com/sperlingxx) +- Update sort groupby to use non-atomic operation ([#9035](https://github.com/rapidsai/cudf/pull/9035)) [@karthikeyann](https://github.com/karthikeyann) +- Add support for struct type in ORC writer ([#9025](https://github.com/rapidsai/cudf/pull/9025)) [@vuule](https://github.com/vuule) +- Implement `interleave_columns` for structs columns ([#9012](https://github.com/rapidsai/cudf/pull/9012)) [@ttnghia](https://github.com/ttnghia) +- Add groupby first and last aggregations ([#9004](https://github.com/rapidsai/cudf/pull/9004)) [@shwina](https://github.com/shwina) +- Add `DecimalBaseColumn` and move `as_decimal_column` ([#9001](https://github.com/rapidsai/cudf/pull/9001)) [@isVoid](https://github.com/isVoid) +- Python/Cython bindings for multibyte_split ([#8998](https://github.com/rapidsai/cudf/pull/8998)) [@jdye64](https://github.com/jdye64) +- Support scalar `months` in `add_calendrical_months`, extends API to INT32 support ([#8991](https://github.com/rapidsai/cudf/pull/8991)) [@isVoid](https://github.com/isVoid) +- Added Series.dt.is_month_end ([#8989](https://github.com/rapidsai/cudf/pull/8989)) [@TravisHester](https://github.com/TravisHester) +- Support for using tdigests to compute approximate percentiles. ([#8983](https://github.com/rapidsai/cudf/pull/8983)) [@nvdbaranec](https://github.com/nvdbaranec) +- Support "unflatten" of columns flattened via `flatten_nested_columns()`: ([#8956](https://github.com/rapidsai/cudf/pull/8956)) [@mythrocks](https://github.com/mythrocks) +- Implement timestamp ceil ([#8942](https://github.com/rapidsai/cudf/pull/8942)) [@shaneding](https://github.com/shaneding) +- Add nested column selection to parquet reader ([#8933](https://github.com/rapidsai/cudf/pull/8933)) [@devavret](https://github.com/devavret) +- Expose conditional join size calculation ([#8928](https://github.com/rapidsai/cudf/pull/8928)) [@vyasr](https://github.com/vyasr) +- Support Nulls in Timeseries Generator ([#8925](https://github.com/rapidsai/cudf/pull/8925)) [@isVoid](https://github.com/isVoid) +- Avoid index equality check in `_CPackedColumns.from_py_table()` ([#8917](https://github.com/rapidsai/cudf/pull/8917)) [@charlesbluca](https://github.com/charlesbluca) +- Add dot product binary op ([#8909](https://github.com/rapidsai/cudf/pull/8909)) [@charlesbluca](https://github.com/charlesbluca) +- Expose `days_in_month` function in libcudf and add python bindings ([#8892](https://github.com/rapidsai/cudf/pull/8892)) [@isVoid](https://github.com/isVoid) +- Series string repeat ([#8882](https://github.com/rapidsai/cudf/pull/8882)) [@sarahyurick](https://github.com/sarahyurick) +- Python binding for quarters ([#8862](https://github.com/rapidsai/cudf/pull/8862)) [@shaneding](https://github.com/shaneding) +- Expand CSV and JSON reader APIs to accept `dtypes` as a vector or map of `data_type` objects ([#8856](https://github.com/rapidsai/cudf/pull/8856)) [@vuule](https://github.com/vuule) +- Add Java bindings for AST transform ([#8846](https://github.com/rapidsai/cudf/pull/8846)) [@jlowe](https://github.com/jlowe) +- Series datetime is_month_start ([#8844](https://github.com/rapidsai/cudf/pull/8844)) [@sarahyurick](https://github.com/sarahyurick) +- Support bracket syntax for cudf::strings::replace_with_backrefs group index values ([#8841](https://github.com/rapidsai/cudf/pull/8841)) [@davidwendt](https://github.com/davidwendt) +- Support `VARIANCE` and `STD` aggregation in rolling op ([#8809](https://github.com/rapidsai/cudf/pull/8809)) [@isVoid](https://github.com/isVoid) +- Add quarters to libcudf datetime ([#8779](https://github.com/rapidsai/cudf/pull/8779)) [@shaneding](https://github.com/shaneding) +- Linear Interpolation of `nan`s via `cupy` ([#8767](https://github.com/rapidsai/cudf/pull/8767)) [@brandon-b-miller](https://github.com/brandon-b-miller) +- Enable compiled binary ops in libcudf, python and java ([#8741](https://github.com/rapidsai/cudf/pull/8741)) [@karthikeyann](https://github.com/karthikeyann) +- Make groupby transform-like op order match original data order ([#8720](https://github.com/rapidsai/cudf/pull/8720)) [@isVoid](https://github.com/isVoid) +- multibyte_split ([#8702](https://github.com/rapidsai/cudf/pull/8702)) [@cwharris](https://github.com/cwharris) +- Implement JNI for `strings:repeat_strings` that repeats each string separately by different numbers of times ([#8572](https://github.com/rapidsai/cudf/pull/8572)) [@ttnghia](https://github.com/ttnghia) + +## 🛠️ Improvements + +- Pin max `dask` and `distributed` versions to `2021.09.1` ([#9286](https://github.com/rapidsai/cudf/pull/9286)) [@galipremsagar](https://github.com/galipremsagar) +- Optimized fsspec data transfer for remote file-systems ([#9265](https://github.com/rapidsai/cudf/pull/9265)) [@rjzamora](https://github.com/rjzamora) +- Skip dask-cudf tests on arm64 ([#9252](https://github.com/rapidsai/cudf/pull/9252)) [@Ethyling](https://github.com/Ethyling) +- Use nvcomp's snappy compressor in ORC writer ([#9242](https://github.com/rapidsai/cudf/pull/9242)) [@devavret](https://github.com/devavret) +- Only run imports tests on x86_64 ([#9241](https://github.com/rapidsai/cudf/pull/9241)) [@Ethyling](https://github.com/Ethyling) +- Remove unnecessary call to device_uvector::release() ([#9237](https://github.com/rapidsai/cudf/pull/9237)) [@harrism](https://github.com/harrism) +- Use nvcomp's snappy decompression in ORC reader ([#9235](https://github.com/rapidsai/cudf/pull/9235)) [@devavret](https://github.com/devavret) +- Add grouped_rolling test with STRUCT groupby keys. ([#9228](https://github.com/rapidsai/cudf/pull/9228)) [@mythrocks](https://github.com/mythrocks) +- Optimize `cudf.concat` for `axis=0` ([#9222](https://github.com/rapidsai/cudf/pull/9222)) [@galipremsagar](https://github.com/galipremsagar) +- Fix some libcudf calls not passing the stream parameter ([#9220](https://github.com/rapidsai/cudf/pull/9220)) [@davidwendt](https://github.com/davidwendt) +- Add min and max bounds for random dataframe generator numeric types ([#9211](https://github.com/rapidsai/cudf/pull/9211)) [@galipremsagar](https://github.com/galipremsagar) +- Improve performance of expression evaluation ([#9210](https://github.com/rapidsai/cudf/pull/9210)) [@vyasr](https://github.com/vyasr) +- Misc optimizations in `cudf` ([#9203](https://github.com/rapidsai/cudf/pull/9203)) [@galipremsagar](https://github.com/galipremsagar) +- Remove Cython APIs for table view generation ([#9199](https://github.com/rapidsai/cudf/pull/9199)) [@vyasr](https://github.com/vyasr) +- Add JNI support for drop_list_duplicates ([#9198](https://github.com/rapidsai/cudf/pull/9198)) [@revans2](https://github.com/revans2) +- Update pandas versions in conda recipes and requirements.txt files ([#9197](https://github.com/rapidsai/cudf/pull/9197)) [@galipremsagar](https://github.com/galipremsagar) +- Minor C++17 cleanup of `groupby.cu`: structured bindings, more concise lambda, etc ([#9193](https://github.com/rapidsai/cudf/pull/9193)) [@codereport](https://github.com/codereport) +- Explicit about bitwidth difference between cudf boolean and arrow boolean ([#9192](https://github.com/rapidsai/cudf/pull/9192)) [@isVoid](https://github.com/isVoid) +- Remove _source_index from MultiIndex ([#9191](https://github.com/rapidsai/cudf/pull/9191)) [@vyasr](https://github.com/vyasr) +- Fix typo in the name of `cudf-testing-targets.cmake` ([#9190](https://github.com/rapidsai/cudf/pull/9190)) [@trxcllnt](https://github.com/trxcllnt) +- Add support for single-digits in cudf::to_timestamps ([#9173](https://github.com/rapidsai/cudf/pull/9173)) [@davidwendt](https://github.com/davidwendt) +- Fix cufilejni build include path ([#9168](https://github.com/rapidsai/cudf/pull/9168)) [@pxLi](https://github.com/pxLi) +- `dask_cudf` dispatch registering cleanup ([#9160](https://github.com/rapidsai/cudf/pull/9160)) [@galipremsagar](https://github.com/galipremsagar) +- Remove unneeded stream/mr from a cudf::make_strings_column ([#9148](https://github.com/rapidsai/cudf/pull/9148)) [@davidwendt](https://github.com/davidwendt) +- Upgrade `pandas` version in `cudf` ([#9147](https://github.com/rapidsai/cudf/pull/9147)) [@galipremsagar](https://github.com/galipremsagar) +- make data chunk reader return unique_ptr ([#9129](https://github.com/rapidsai/cudf/pull/9129)) [@cwharris](https://github.com/cwharris) +- Add backend for `percentile_lookup` dispatch ([#9118](https://github.com/rapidsai/cudf/pull/9118)) [@galipremsagar](https://github.com/galipremsagar) +- Refactor implementation of column setitem ([#9110](https://github.com/rapidsai/cudf/pull/9110)) [@vyasr](https://github.com/vyasr) +- Fix compile warnings found using nvcc 11.4 ([#9101](https://github.com/rapidsai/cudf/pull/9101)) [@davidwendt](https://github.com/davidwendt) +- Update to UCX-Py 0.22 ([#9099](https://github.com/rapidsai/cudf/pull/9099)) [@pentschev](https://github.com/pentschev) +- Simplify read_avro by removing unnecessary writer/impl classes ([#9090](https://github.com/rapidsai/cudf/pull/9090)) [@cwharris](https://github.com/cwharris) +- Allowing %f in format to return nanoseconds ([#9081](https://github.com/rapidsai/cudf/pull/9081)) [@marlenezw](https://github.com/marlenezw) +- Java bindings for cudf::hash_join ([#9080](https://github.com/rapidsai/cudf/pull/9080)) [@jlowe](https://github.com/jlowe) +- Remove stale code in `ColumnBase._fill` ([#9078](https://github.com/rapidsai/cudf/pull/9078)) [@isVoid](https://github.com/isVoid) +- Add support for `get_group` in GroupBy ([#9070](https://github.com/rapidsai/cudf/pull/9070)) [@galipremsagar](https://github.com/galipremsagar) +- Remove remaining "support" methods from DataFrame ([#9068](https://github.com/rapidsai/cudf/pull/9068)) [@vyasr](https://github.com/vyasr) +- Update JNI java CSV APIs to not use deprecated API ([#9066](https://github.com/rapidsai/cudf/pull/9066)) [@revans2](https://github.com/revans2) +- Added method to remove null_masks if the column has no nulls ([#9061](https://github.com/rapidsai/cudf/pull/9061)) [@razajafri](https://github.com/razajafri) +- Consolidate Several Series and Dataframe Methods ([#9059](https://github.com/rapidsai/cudf/pull/9059)) [@isVoid](https://github.com/isVoid) +- Remove usage of string based `set_dtypes` for `csv` & `json` readers ([#9049](https://github.com/rapidsai/cudf/pull/9049)) [@galipremsagar](https://github.com/galipremsagar) +- Remove some debug print statements from gtests ([#9048](https://github.com/rapidsai/cudf/pull/9048)) [@davidwendt](https://github.com/davidwendt) +- Support additional format specifiers in from_timestamps ([#9047](https://github.com/rapidsai/cudf/pull/9047)) [@davidwendt](https://github.com/davidwendt) +- Expose expression base class publicly and simplify public AST API ([#9045](https://github.com/rapidsai/cudf/pull/9045)) [@vyasr](https://github.com/vyasr) +- move filepath and mmap logic out of json/csv up to functions.cpp ([#9040](https://github.com/rapidsai/cudf/pull/9040)) [@cwharris](https://github.com/cwharris) +- Refactor Index hierarchy ([#9039](https://github.com/rapidsai/cudf/pull/9039)) [@vyasr](https://github.com/vyasr) +- cudf now leverages rapids-cmake to reduce CMake boilerplate ([#9030](https://github.com/rapidsai/cudf/pull/9030)) [@robertmaynard](https://github.com/robertmaynard) +- Add support for `STRUCT` input to `groupby` ([#9024](https://github.com/rapidsai/cudf/pull/9024)) [@mythrocks](https://github.com/mythrocks) +- Refactor Frame scans ([#9021](https://github.com/rapidsai/cudf/pull/9021)) [@vyasr](https://github.com/vyasr) +- Remove duplicate `set_categories` code ([#9018](https://github.com/rapidsai/cudf/pull/9018)) [@isVoid](https://github.com/isVoid) +- Map support for ParquetWriter ([#9013](https://github.com/rapidsai/cudf/pull/9013)) [@razajafri](https://github.com/razajafri) +- Remove aliases of various api.types APIs from utils.dtypes. ([#9011](https://github.com/rapidsai/cudf/pull/9011)) [@vyasr](https://github.com/vyasr) +- Java bindings for conditional join output sizes ([#9002](https://github.com/rapidsai/cudf/pull/9002)) [@jlowe](https://github.com/jlowe) +- Remove _copy_construct factory ([#8999](https://github.com/rapidsai/cudf/pull/8999)) [@vyasr](https://github.com/vyasr) +- ENH Allow arbitrary CMake config options in build.sh ([#8996](https://github.com/rapidsai/cudf/pull/8996)) [@dillon-cullinan](https://github.com/dillon-cullinan) +- A small optimization for JNI copy column view to column vector ([#8985](https://github.com/rapidsai/cudf/pull/8985)) [@revans2](https://github.com/revans2) +- Fix nvcc warnings in ORC writer ([#8975](https://github.com/rapidsai/cudf/pull/8975)) [@devavret](https://github.com/devavret) +- Support nested structs in rank and dense rank ([#8962](https://github.com/rapidsai/cudf/pull/8962)) [@rwlee](https://github.com/rwlee) +- Move compute_column API out of ast namespace ([#8957](https://github.com/rapidsai/cudf/pull/8957)) [@vyasr](https://github.com/vyasr) +- Series datetime is_year_end and is_year_start ([#8954](https://github.com/rapidsai/cudf/pull/8954)) [@marlenezw](https://github.com/marlenezw) +- Make Java AstNode public ([#8953](https://github.com/rapidsai/cudf/pull/8953)) [@jlowe](https://github.com/jlowe) +- Replace allocate with device_uvector for subword_tokenize internal tables ([#8952](https://github.com/rapidsai/cudf/pull/8952)) [@davidwendt](https://github.com/davidwendt) +- `cudf.dtype` function ([#8949](https://github.com/rapidsai/cudf/pull/8949)) [@shwina](https://github.com/shwina) +- Refactor Frame reductions ([#8944](https://github.com/rapidsai/cudf/pull/8944)) [@vyasr](https://github.com/vyasr) +- Add deprecation warning for `Series.set_mask` API ([#8943](https://github.com/rapidsai/cudf/pull/8943)) [@galipremsagar](https://github.com/galipremsagar) +- Move AST evaluator into a separate header ([#8930](https://github.com/rapidsai/cudf/pull/8930)) [@vyasr](https://github.com/vyasr) +- JNI Aggregation Type Changes ([#8919](https://github.com/rapidsai/cudf/pull/8919)) [@revans2](https://github.com/revans2) +- Move template parameter to function parameter in cudf::detail::left_semi_anti_join ([#8914](https://github.com/rapidsai/cudf/pull/8914)) [@davidwendt](https://github.com/davidwendt) +- Upgrade `arrow` & `pyarrow` to `5.0.0` ([#8908](https://github.com/rapidsai/cudf/pull/8908)) [@galipremsagar](https://github.com/galipremsagar) +- Add groupby_aggregation and groupby_scan_aggregation classes and force their usage. ([#8906](https://github.com/rapidsai/cudf/pull/8906)) [@nvdbaranec](https://github.com/nvdbaranec) +- Move `structs_column_tests.cu` to `.cpp`. ([#8902](https://github.com/rapidsai/cudf/pull/8902)) [@mythrocks](https://github.com/mythrocks) +- Add stream and memory-resource parameters to struct-scalar copy ctor ([#8901](https://github.com/rapidsai/cudf/pull/8901)) [@davidwendt](https://github.com/davidwendt) +- Combine linearizer and ast_plan ([#8900](https://github.com/rapidsai/cudf/pull/8900)) [@vyasr](https://github.com/vyasr) +- Add Java bindings for conditional join gather maps ([#8888](https://github.com/rapidsai/cudf/pull/8888)) [@jlowe](https://github.com/jlowe) +- Remove max version pin for `dask` & `distributed` on development branch ([#8881](https://github.com/rapidsai/cudf/pull/8881)) [@galipremsagar](https://github.com/galipremsagar) +- fix cufilejni build w/ c++17 ([#8877](https://github.com/rapidsai/cudf/pull/8877)) [@pxLi](https://github.com/pxLi) +- Add struct accessor to dask-cudf ([#8874](https://github.com/rapidsai/cudf/pull/8874)) [@NV-jpt](https://github.com/NV-jpt) +- Migrate dask-cudf CudfEngine to leverage ArrowDatasetEngine ([#8871](https://github.com/rapidsai/cudf/pull/8871)) [@rjzamora](https://github.com/rjzamora) +- Add JNI for extract_quarter, add_calendrical_months, and is_leap_year ([#8863](https://github.com/rapidsai/cudf/pull/8863)) [@revans2](https://github.com/revans2) +- Change cudf::scalar copy and move constructors to protected ([#8857](https://github.com/rapidsai/cudf/pull/8857)) [@davidwendt](https://github.com/davidwendt) +- Replace `is_same<>::value` with `is_same_v<>` ([#8852](https://github.com/rapidsai/cudf/pull/8852)) [@codereport](https://github.com/codereport) +- Add min `pytorch` version to `importorskip` in pytest ([#8851](https://github.com/rapidsai/cudf/pull/8851)) [@galipremsagar](https://github.com/galipremsagar) +- Java bindings for regex replace ([#8847](https://github.com/rapidsai/cudf/pull/8847)) [@jlowe](https://github.com/jlowe) +- Remove make strings children with null mask ([#8830](https://github.com/rapidsai/cudf/pull/8830)) [@davidwendt](https://github.com/davidwendt) +- Refactor conditional joins ([#8815](https://github.com/rapidsai/cudf/pull/8815)) [@vyasr](https://github.com/vyasr) +- Small cleanup (unused headers / commented code removals) ([#8799](https://github.com/rapidsai/cudf/pull/8799)) [@codereport](https://github.com/codereport) +- ENH Replace gpuci_conda_retry with gpuci_mamba_retry ([#8770](https://github.com/rapidsai/cudf/pull/8770)) [@dillon-cullinan](https://github.com/dillon-cullinan) +- Update cudf java bindings to 21.10.0-SNAPSHOT ([#8765](https://github.com/rapidsai/cudf/pull/8765)) [@pxLi](https://github.com/pxLi) +- Refactor and improve join benchmarks with nvbench ([#8734](https://github.com/rapidsai/cudf/pull/8734)) [@PointKernel](https://github.com/PointKernel) +- Refactor Python factories and remove usage of Table for libcudf output handling ([#8687](https://github.com/rapidsai/cudf/pull/8687)) [@vyasr](https://github.com/vyasr) +- Optimize URL Decoding ([#8622](https://github.com/rapidsai/cudf/pull/8622)) [@gaohao95](https://github.com/gaohao95) +- Parquet writer dictionary encoding refactor ([#8476](https://github.com/rapidsai/cudf/pull/8476)) [@devavret](https://github.com/devavret) +- Use nvcomp's snappy decompression in parquet reader ([#8252](https://github.com/rapidsai/cudf/pull/8252)) [@devavret](https://github.com/devavret) +- Use nvcomp's snappy compressor in parquet writer ([#8229](https://github.com/rapidsai/cudf/pull/8229)) [@devavret](https://github.com/devavret) # cuDF 21.08.00 (4 Aug 2021) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index f83d7c5b759..aae62fbd47c 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -62,12 +62,12 @@ The following instructions are for developers and contributors to cuDF OSS devel Compilers: * `gcc` version 9.3+ -* `nvcc` version 11.0+ +* `nvcc` version 11.5+ * `cmake` version 3.20.1+ CUDA/GPU: -* CUDA 11.0+ +* CUDA 11.5+ * NVIDIA driver 450.80.02+ * Pascal architecture or better diff --git a/ci/benchmark/build.sh b/ci/benchmark/build.sh index bc82f638171..979db1b5034 100755 --- a/ci/benchmark/build.sh +++ b/ci/benchmark/build.sh @@ -37,7 +37,7 @@ export GBENCH_BENCHMARKS_DIR="$WORKSPACE/cpp/build/gbenchmarks/" export LIBCUDF_KERNEL_CACHE_PATH="$HOME/.jitify-cache" # Dask & Distributed git tag -export DASK_DISTRIBUTED_GIT_TAG='main' +export DASK_DISTRIBUTED_GIT_TAG='2021.11.2' function remove_libcudf_kernel_cache_dir { EXITCODE=$? diff --git a/ci/cpu/prebuild.sh b/ci/cpu/prebuild.sh index a9bc1f4c605..746c0005a47 100755 --- a/ci/cpu/prebuild.sh +++ b/ci/cpu/prebuild.sh @@ -3,15 +3,7 @@ # Copyright (c) 2020, NVIDIA CORPORATION. set -e -ARCH=$(arch) -if [ "${ARCH}" = "x86_64" ]; then - DEFAULT_CUDA_VER="11.0" -elif [ "${ARCH}" = "aarch64" ]; then - DEFAULT_CUDA_VER="11.2" -else - echo "Unsupported arch ${ARCH}" - exit 1 -fi +DEFAULT_CUDA_VER="11.5" #Always upload cudf Python package export UPLOAD_CUDF=1 diff --git a/ci/gpu/build.sh b/ci/gpu/build.sh index e2e95c34650..8f83c169330 100755 --- a/ci/gpu/build.sh +++ b/ci/gpu/build.sh @@ -1,5 +1,5 @@ #!/bin/bash -# Copyright (c) 2018-2020, NVIDIA CORPORATION. +# Copyright (c) 2018-2021, NVIDIA CORPORATION. ############################################## # cuDF GPU build and test script for CI # ############################################## @@ -31,7 +31,7 @@ export GIT_DESCRIBE_TAG=`git describe --tags` export MINOR_VERSION=`echo $GIT_DESCRIBE_TAG | grep -o -E '([0-9]+\.[0-9]+)'` # Dask & Distributed git tag -export DASK_DISTRIBUTED_GIT_TAG='main' +export DASK_DISTRIBUTED_GIT_TAG='2021.11.2' ################################################################################ # TRAP - Setup trap for removing jitify cache @@ -176,6 +176,28 @@ else ${gt} --gtest_output=xml:"$WORKSPACE/test-results/" done + ################################################################################ + # MEMCHECK - Run compute-sanitizer on GoogleTest (only in nightly builds) + ################################################################################ + if [[ "$BUILD_MODE" == "branch" && "$BUILD_TYPE" == "gpu" ]]; then + if [[ "$COMPUTE_SANITIZER_ENABLE" == "true" ]]; then + gpuci_logger "Memcheck on GoogleTests with rmm_mode=cuda" + export GTEST_CUDF_RMM_MODE=cuda + COMPUTE_SANITIZER_CMD="compute-sanitizer --tool memcheck" + mkdir -p "$WORKSPACE/test-results/" + for gt in gtests/*; do + test_name=$(basename ${gt}) + if [[ "$test_name" == "ERROR_TEST" ]]; then + continue + fi + echo "Running GoogleTest $test_name" + ${COMPUTE_SANITIZER_CMD} ${gt} | tee "$WORKSPACE/test-results/${test_name}.cs.log" + done + unset GTEST_CUDF_RMM_MODE + # test-results/*.cs.log are processed in gpuci + fi + fi + CUDF_CONDA_FILE=`find ${CONDA_ARTIFACT_PATH} -name "libcudf-*.tar.bz2"` CUDF_CONDA_FILE=`basename "$CUDF_CONDA_FILE" .tar.bz2` #get filename without extension CUDF_CONDA_FILE=${CUDF_CONDA_FILE//-/=} #convert to conda install diff --git a/ci/local/README.md b/ci/local/README.md index 96002802263..7754bcaf647 100644 --- a/ci/local/README.md +++ b/ci/local/README.md @@ -18,12 +18,12 @@ Build and test your local repository using a base gpuCI Docker image where: -H Show this help text -r Path to repository (defaults to working directory) - -i Use Docker image (default is gpuci/rapidsai:${NIGHTLY_VERSION}-cuda10.1-devel-ubuntu16.04-py3.7) + -i Use Docker image (default is gpuci/rapidsai:${NIGHTLY_VERSION}-cuda11.5-devel-ubuntu20.04-py3.8) -s Skip building and testing and start an interactive shell in a container of the Docker image ``` Example Usage: -`bash build.sh -r ~/rapids/cudf -i gpuci/rapidsai:0.16-cuda10.2-devel-ubuntu16.04-py3.7` +`bash build.sh -r ~/rapids/cudf -i gpuci/rapidsai:22.02-cuda11.5-devel-ubuntu20.04-py3.8` For a full list of available gpuCI docker images, visit our [DockerHub](https://hub.docker.com/r/gpuci/rapidsai/tags) page. @@ -42,7 +42,7 @@ There are some caveats to be aware of when using this script, especially if you ### Docker Image Build Repository -The docker image will generate build artifacts in a folder on your machine located in the `root` directory of the repository you passed to the script. For the above example, the directory is named `~/rapids/cudf/build_rapidsai_cuda10.1-ubuntu16.04-py3.7/`. Feel free to remove this directory after the script is finished. +The docker image will generate build artifacts in a folder on your machine located in the `root` directory of the repository you passed to the script. For the above example, the directory is named `~/rapids/cudf/build_rapidsai_cuda11.5-ubuntu20.04-py3.8/`. Feel free to remove this directory after the script is finished. *Note*: The script *will not* override your local build repository. Your local environment stays in tact. diff --git a/ci/local/build.sh b/ci/local/build.sh index 1bfb8b63fef..345db967264 100755 --- a/ci/local/build.sh +++ b/ci/local/build.sh @@ -3,7 +3,7 @@ GIT_DESCRIBE_TAG=`git describe --tags` MINOR_VERSION=`echo $GIT_DESCRIBE_TAG | grep -o -E '([0-9]+\.[0-9]+)'` -DOCKER_IMAGE="gpuci/rapidsai:${MINOR_VERSION}-cuda11.0-devel-ubuntu18.04-py3.7" +DOCKER_IMAGE="gpuci/rapidsai:${MINOR_VERSION}-cuda11.5-devel-ubuntu20.04-py3.8" REPO_PATH=${PWD} RAPIDS_DIR_IN_CONTAINER="/rapids" CPP_BUILD_DIR="cpp/build" diff --git a/ci/release/update-version.sh b/ci/release/update-version.sh index eeb76a15fcc..86432a92128 100755 --- a/ci/release/update-version.sh +++ b/ci/release/update-version.sh @@ -30,13 +30,13 @@ function sed_runner() { } # cpp update -sed_runner 's/'"CUDF VERSION .* LANGUAGES"'/'"CUDF VERSION ${NEXT_FULL_TAG} LANGUAGES"'/g' cpp/CMakeLists.txt +sed_runner 's/'"VERSION ${CURRENT_SHORT_TAG}.*"'/'"VERSION ${NEXT_FULL_TAG}"'/g' cpp/CMakeLists.txt # cpp libcudf_kafka update -sed_runner 's/'"CUDA_KAFKA VERSION .* LANGUAGES"'/'"CUDA_KAFKA VERSION ${NEXT_FULL_TAG} LANGUAGES"'/g' cpp/libcudf_kafka/CMakeLists.txt +sed_runner 's/'"VERSION ${CURRENT_SHORT_TAG}.*"'/'"VERSION ${NEXT_FULL_TAG}"'/g' cpp/libcudf_kafka/CMakeLists.txt # cpp cudf_jni update -sed_runner 's/'"CUDF_JNI VERSION .* LANGUAGES"'/'"CUDF_JNI VERSION ${NEXT_FULL_TAG} LANGUAGES"'/g' java/src/main/native/CMakeLists.txt +sed_runner 's/'"VERSION ${CURRENT_SHORT_TAG}.*"'/'"VERSION ${NEXT_FULL_TAG}"'/g' java/src/main/native/CMakeLists.txt # rapids-cmake version sed_runner 's/'"branch-.*\/RAPIDS.cmake"'/'"branch-${NEXT_SHORT_TAG}\/RAPIDS.cmake"'/g' fetch_rapids.cmake diff --git a/conda/environments/cudf_dev_cuda11.0.yml b/conda/environments/cudf_dev_cuda11.0.yml index 60a5959a23f..e7b92eddd9e 100644 --- a/conda/environments/cudf_dev_cuda11.0.yml +++ b/conda/environments/cudf_dev_cuda11.0.yml @@ -7,14 +7,14 @@ channels: - rapidsai-nightly - conda-forge dependencies: - - clang=11.0.0 - - clang-tools=11.0.0 - - cupy>7.1.0,<10.0.0a0 + - clang=11.1.0 + - clang-tools=11.1.0 + - cupy>=9.5.0,<10.0.0a0 - rmm=22.02.* - cmake>=3.20.1 - cmake_setuptools>=0.1.3 - python>=3.7,<3.9 - - numba>=0.53.1 + - numba>=0.54 - numpy - pandas>=1.0,<1.4.0dev0 - pyarrow=5.0.0=*cuda @@ -41,8 +41,8 @@ dependencies: - pydocstyle=6.1.1 - typing_extensions - pre-commit - - dask>=2021.09.1 - - distributed>=2021.09.1 + - dask>=2021.11.1,<=2021.11.2 + - distributed>=2021.11.1,<=2021.11.2 - streamz - arrow-cpp=5.0.0 - dlpack>=0.5,<0.6.0a0 @@ -66,3 +66,4 @@ dependencies: - git+https://github.com/dask/distributed.git@main - git+https://github.com/python-streamz/streamz.git@master - pyorc + - ptxcompiler # [linux64] diff --git a/conda/environments/cudf_dev_cuda11.2.yml b/conda/environments/cudf_dev_cuda11.2.yml index 7904593c4c7..6fe8ed0fafe 100644 --- a/conda/environments/cudf_dev_cuda11.2.yml +++ b/conda/environments/cudf_dev_cuda11.2.yml @@ -7,14 +7,14 @@ channels: - rapidsai-nightly - conda-forge dependencies: - - clang=11.0.0 - - clang-tools=11.0.0 - - cupy>7.1.0,<10.0.0a0 + - clang=11.1.0 + - clang-tools=11.1.0 + - cupy>=9.5.0,<10.0.0a0 - rmm=22.02.* - cmake>=3.20.1 - cmake_setuptools>=0.1.3 - python>=3.7,<3.9 - - numba>=0.53.1 + - numba>=0.54 - numpy - pandas>=1.0,<1.4.0dev0 - pyarrow=5.0.0=*cuda @@ -41,8 +41,8 @@ dependencies: - pydocstyle=6.1.1 - typing_extensions - pre-commit - - dask>=2021.09.1 - - distributed>=2021.09.1 + - dask>=2021.11.1,<=2021.11.2 + - distributed>=2021.11.1,<=2021.11.2 - streamz - arrow-cpp=5.0.0 - dlpack>=0.5,<0.6.0a0 @@ -66,3 +66,4 @@ dependencies: - git+https://github.com/dask/distributed.git@main - git+https://github.com/python-streamz/streamz.git@master - pyorc + - ptxcompiler # [linux64] diff --git a/conda/environments/cudf_dev_cuda11.5.yml b/conda/environments/cudf_dev_cuda11.5.yml index 63800fe786b..cc8d50a1717 100644 --- a/conda/environments/cudf_dev_cuda11.5.yml +++ b/conda/environments/cudf_dev_cuda11.5.yml @@ -7,14 +7,14 @@ channels: - rapidsai-nightly - conda-forge dependencies: - - clang=11.0.0 - - clang-tools=11.0.0 - - cupy>7.1.0,<10.0.0a0 - - rmm=21.12.* + - clang=11.1.0 + - clang-tools=11.1.0 + - cupy>=9.5.0,<10.0.0a0 + - rmm=22.02.* - cmake>=3.20.1 - cmake_setuptools>=0.1.3 - python>=3.7,<3.9 - - numba>=0.53.1 + - numba>=0.54 - numpy - pandas>=1.0,<1.4.0dev0 - pyarrow=5.0.0=*cuda @@ -41,8 +41,8 @@ dependencies: - pydocstyle=6.1.1 - typing_extensions - pre-commit - - dask>=2021.09.1 - - distributed>=2021.09.1 + - dask>=2021.11.1,<=2021.11.2 + - distributed>=2021.11.1,<=2021.11.2 - streamz - arrow-cpp=5.0.0 - dlpack>=0.5,<0.6.0a0 @@ -66,3 +66,4 @@ dependencies: - git+https://github.com/dask/distributed.git@main - git+https://github.com/python-streamz/streamz.git@master - pyorc + - ptxcompiler # [linux64] diff --git a/conda/recipes/cudf/meta.yaml b/conda/recipes/cudf/meta.yaml index 615135a6f6b..46eefbc825f 100644 --- a/conda/recipes/cudf/meta.yaml +++ b/conda/recipes/cudf/meta.yaml @@ -29,7 +29,7 @@ requirements: - python - cython >=0.29,<0.30 - setuptools - - numba >=0.53.1 + - numba >=0.54 - dlpack>=0.5,<0.6.0a0 - pyarrow 5.0.0 *cuda - libcudf {{ version }} @@ -40,8 +40,8 @@ requirements: - python - typing_extensions - pandas >=1.0,<1.4.0dev0 - - cupy >7.1.0,<10.0.0a0 - - numba >=0.53.1 + - cupy >=9.5.0,<10.0.0a0 + - numba >=0.54 - numpy - {{ pin_compatible('pyarrow', max_pin='x.x.x') }} *cuda - fastavro >=0.22.0 @@ -51,6 +51,7 @@ requirements: - nvtx >=0.2.1 - packaging - cachetools + - ptxcompiler # [linux64] # CUDA enhanced compatibility. See https://github.com/rapidsai/ptxcompiler test: # [linux64] requires: # [linux64] diff --git a/conda/recipes/custreamz/meta.yaml b/conda/recipes/custreamz/meta.yaml index dc3a17f03ab..db8af9b0bed 100644 --- a/conda/recipes/custreamz/meta.yaml +++ b/conda/recipes/custreamz/meta.yaml @@ -31,8 +31,8 @@ requirements: - python - streamz - cudf {{ version }} - - dask>=2021.09.1 - - distributed>=2021.09.1 + - dask>=2021.11.1,<=2021.11.2 + - distributed>=2021.11.1,<=2021.11.2 - python-confluent-kafka - cudf_kafka {{ version }} diff --git a/conda/recipes/dask-cudf/meta.yaml b/conda/recipes/dask-cudf/meta.yaml index 5631e262b87..d90de2d628c 100644 --- a/conda/recipes/dask-cudf/meta.yaml +++ b/conda/recipes/dask-cudf/meta.yaml @@ -4,6 +4,7 @@ {% set minor_version = version.split('.')[0] + '.' + version.split('.')[1] %} {% set py_version=environ.get('CONDA_PY', 36) %} {% set cuda_version='.'.join(environ.get('CUDA', '10.1').split('.')[:2]) %} +{% set cuda_major=cuda_version.split('.')[0] %} package: name: dask-cudf @@ -14,7 +15,7 @@ source: build: number: {{ GIT_DESCRIBE_NUMBER }} - string: cuda_{{ cuda_version }}_py{{ py_version }}_{{ GIT_DESCRIBE_HASH }}_{{ GIT_DESCRIBE_NUMBER }} + string: cuda_{{ cuda_major }}_py{{ py_version }}_{{ GIT_DESCRIBE_HASH }}_{{ GIT_DESCRIBE_NUMBER }} script_env: - VERSION_SUFFIX - PARALLEL_LEVEL @@ -26,15 +27,15 @@ requirements: host: - python - cudf {{ version }} - - dask>=2021.09.1 - - distributed>=2021.09.1 + - dask>=2021.11.1,<=2021.11.2 + - distributed>=2021.11.1,<=2021.11.2 - cudatoolkit {{ cuda_version }} run: - python - cudf {{ version }} - - dask>=2021.09.1 - - distributed>=2021.09.1 - - {{ pin_compatible('cudatoolkit', max_pin='x.x') }} + - dask>=2021.11.1,<=2021.11.2 + - distributed>=2021.11.1,<=2021.11.2 + - {{ pin_compatible('cudatoolkit', max_pin='x', min_pin='x') }} test: # [linux64] requires: # [linux64] diff --git a/conda/recipes/libcudf/meta.yaml b/conda/recipes/libcudf/meta.yaml index d39c7aaa39d..e78110f3233 100644 --- a/conda/recipes/libcudf/meta.yaml +++ b/conda/recipes/libcudf/meta.yaml @@ -117,6 +117,7 @@ test: - test -f $PREFIX/include/cudf/dictionary/update_keys.hpp - test -f $PREFIX/include/cudf/filling.hpp - test -f $PREFIX/include/cudf/fixed_point/fixed_point.hpp + - test -f $PREFIX/include/cudf/fixed_point/temporary.hpp - test -f $PREFIX/include/cudf/groupby.hpp - test -f $PREFIX/include/cudf/hashing.hpp - test -f $PREFIX/include/cudf/interop.hpp diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 966728d7647..86ec24c1b7b 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -25,7 +25,7 @@ rapids_cuda_init_architectures(CUDF) project( CUDF - VERSION 21.12.00 + VERSION 22.02.00 LANGUAGES C CXX CUDA ) @@ -185,6 +185,7 @@ add_library( src/binaryop/compiled/LogicalOr.cu src/binaryop/compiled/Mod.cu src/binaryop/compiled/Mul.cu + src/binaryop/compiled/NullEquals.cu src/binaryop/compiled/NullMax.cu src/binaryop/compiled/NullMin.cu src/binaryop/compiled/PMod.cu @@ -310,6 +311,7 @@ add_library( src/io/statistics/parquet_column_statistics.cu src/io/text/multibyte_split.cu src/io/utilities/column_buffer.cpp + src/io/utilities/config_utils.cpp src/io/utilities/data_sink.cpp src/io/utilities/datasource.cpp src/io/utilities/file_io_utilities.cpp @@ -733,6 +735,27 @@ set(install_code_string [=[ set(ArrowCUDA_DIR "${Arrow_DIR}") find_dependency(ArrowCUDA) +]=] +) + +if(CUDF_ENABLE_ARROW_PARQUET) + string( + APPEND + install_code_string + [=[ + if(NOT Parquet_DIR) + set(Parquet_DIR "${Arrow_DIR}") + endif() + set(ArrowDataset_DIR "${Arrow_DIR}") + find_dependency(ArrowDataset) + ]=] + ) +endif() + +string( + APPEND + install_code_string + [=[ if(testing IN_LIST cudf_FIND_COMPONENTS) enable_language(CUDA) if(EXISTS "${CMAKE_CURRENT_LIST_DIR}/cudf-testing-dependencies.cmake") diff --git a/cpp/benchmarks/CMakeLists.txt b/cpp/benchmarks/CMakeLists.txt index fa1e61e26fd..72b247ae748 100644 --- a/cpp/benchmarks/CMakeLists.txt +++ b/cpp/benchmarks/CMakeLists.txt @@ -208,7 +208,6 @@ ConfigureBench(AST_BENCH ast/transform_benchmark.cpp) # * binaryop benchmark ---------------------------------------------------------------------------- ConfigureBench( BINARYOP_BENCH binaryop/binaryop_benchmark.cpp binaryop/compiled_binaryop_benchmark.cpp - binaryop/jit_binaryop_benchmark.cpp ) # ################################################################################################## diff --git a/cpp/benchmarks/binaryop/binaryop_benchmark.cpp b/cpp/benchmarks/binaryop/binaryop_benchmark.cpp index 9de1112a9db..314d657679b 100644 --- a/cpp/benchmarks/binaryop/binaryop_benchmark.cpp +++ b/cpp/benchmarks/binaryop/binaryop_benchmark.cpp @@ -74,14 +74,14 @@ static void BM_binaryop_transform(benchmark::State& state) auto const op = cudf::binary_operator::ADD; auto result_data_type = cudf::data_type(cudf::type_to_id()); if (reuse_columns) { - auto result = cudf::jit::binary_operation(columns.at(0), columns.at(0), op, result_data_type); + auto result = cudf::binary_operation(columns.at(0), columns.at(0), op, result_data_type); for (cudf::size_type i = 0; i < tree_levels - 1; i++) { - result = cudf::jit::binary_operation(result->view(), columns.at(0), op, result_data_type); + result = cudf::binary_operation(result->view(), columns.at(0), op, result_data_type); } } else { - auto result = cudf::jit::binary_operation(columns.at(0), columns.at(1), op, result_data_type); + auto result = cudf::binary_operation(columns.at(0), columns.at(1), op, result_data_type); std::for_each(std::next(columns.cbegin(), 2), columns.cend(), [&](auto const& col) { - result = cudf::jit::binary_operation(result->view(), col, op, result_data_type); + result = cudf::binary_operation(result->view(), col, op, result_data_type); }); } } diff --git a/cpp/benchmarks/binaryop/compiled_binaryop_benchmark.cpp b/cpp/benchmarks/binaryop/compiled_binaryop_benchmark.cpp index bc0818ace4b..8d04f8bdcb2 100644 --- a/cpp/benchmarks/binaryop/compiled_binaryop_benchmark.cpp +++ b/cpp/benchmarks/binaryop/compiled_binaryop_benchmark.cpp @@ -50,14 +50,14 @@ void BM_compiled_binaryop(benchmark::State& state, cudf::binary_operator binop) } // TODO tparam boolean for null. -#define BINARYOP_BENCHMARK_DEFINE(TypeLhs, TypeRhs, binop, TypeOut) \ +#define BINARYOP_BENCHMARK_DEFINE(name, TypeLhs, TypeRhs, binop, TypeOut) \ BENCHMARK_TEMPLATE_DEFINE_F( \ - COMPILED_BINARYOP, binop, TypeLhs, TypeRhs, TypeOut, cudf::binary_operator::binop) \ + COMPILED_BINARYOP, name, TypeLhs, TypeRhs, TypeOut, cudf::binary_operator::binop) \ (::benchmark::State & st) \ { \ BM_compiled_binaryop(st, cudf::binary_operator::binop); \ } \ - BENCHMARK_REGISTER_F(COMPILED_BINARYOP, binop) \ + BENCHMARK_REGISTER_F(COMPILED_BINARYOP, name) \ ->Unit(benchmark::kMicrosecond) \ ->UseManualTime() \ ->Arg(10000) /* 10k */ \ @@ -70,30 +70,36 @@ using namespace cudf; using namespace numeric; // clang-format off -BINARYOP_BENCHMARK_DEFINE(float, int64_t, ADD, int32_t); -BINARYOP_BENCHMARK_DEFINE(duration_s, duration_D, SUB, duration_ms); -BINARYOP_BENCHMARK_DEFINE(float, float, MUL, int64_t); -BINARYOP_BENCHMARK_DEFINE(int64_t, int64_t, DIV, int64_t); -BINARYOP_BENCHMARK_DEFINE(int64_t, int64_t, TRUE_DIV, int64_t); -BINARYOP_BENCHMARK_DEFINE(int64_t, int64_t, FLOOR_DIV, int64_t); -BINARYOP_BENCHMARK_DEFINE(double, double, MOD, double); -BINARYOP_BENCHMARK_DEFINE(int32_t, int64_t, PMOD, double); -BINARYOP_BENCHMARK_DEFINE(int32_t, uint8_t, PYMOD, int64_t); -BINARYOP_BENCHMARK_DEFINE(int64_t, int64_t, POW, double); -BINARYOP_BENCHMARK_DEFINE(float, double, LOG_BASE, double); -BINARYOP_BENCHMARK_DEFINE(float, double, ATAN2, double); -BINARYOP_BENCHMARK_DEFINE(int, int, SHIFT_LEFT, int); -BINARYOP_BENCHMARK_DEFINE(int16_t, int64_t, SHIFT_RIGHT, int); -BINARYOP_BENCHMARK_DEFINE(int64_t, int32_t, SHIFT_RIGHT_UNSIGNED, int64_t); -BINARYOP_BENCHMARK_DEFINE(int64_t, int32_t, BITWISE_AND, int16_t); -BINARYOP_BENCHMARK_DEFINE(int16_t, int32_t, BITWISE_OR, int64_t); -BINARYOP_BENCHMARK_DEFINE(int16_t, int64_t, BITWISE_XOR, int32_t); -BINARYOP_BENCHMARK_DEFINE(double, int8_t, LOGICAL_AND, bool); -BINARYOP_BENCHMARK_DEFINE(int16_t, int64_t, LOGICAL_OR, bool); -BINARYOP_BENCHMARK_DEFINE(duration_ms, duration_ns, EQUAL, bool); -BINARYOP_BENCHMARK_DEFINE(decimal32, decimal32, NOT_EQUAL, bool); -BINARYOP_BENCHMARK_DEFINE(timestamp_s, timestamp_s, LESS, bool); -BINARYOP_BENCHMARK_DEFINE(timestamp_ms, timestamp_s, GREATER, bool); -BINARYOP_BENCHMARK_DEFINE(duration_ms, duration_ns, NULL_EQUALS, bool); -BINARYOP_BENCHMARK_DEFINE(decimal32, decimal32, NULL_MAX, decimal32); -BINARYOP_BENCHMARK_DEFINE(timestamp_D, timestamp_s, NULL_MIN, timestamp_s); +BINARYOP_BENCHMARK_DEFINE(ADD_1, float, float, ADD, float); +BINARYOP_BENCHMARK_DEFINE(ADD_2, timestamp_s, duration_s, ADD, timestamp_s); +BINARYOP_BENCHMARK_DEFINE(SUB_1, duration_s, duration_D, SUB, duration_ms); +BINARYOP_BENCHMARK_DEFINE(SUB_2, int64_t, int64_t, SUB, int64_t); +BINARYOP_BENCHMARK_DEFINE(MUL_1, float, float, MUL, int64_t); +BINARYOP_BENCHMARK_DEFINE(MUL_2, duration_s, int64_t, MUL, duration_s); +BINARYOP_BENCHMARK_DEFINE(DIV_1, int64_t, int64_t, DIV, int64_t); +BINARYOP_BENCHMARK_DEFINE(DIV_2, duration_ms, int32_t, DIV, duration_ms); +BINARYOP_BENCHMARK_DEFINE(TRUE_DIV, int64_t, int64_t, TRUE_DIV, int64_t); +BINARYOP_BENCHMARK_DEFINE(FLOOR_DIV, int64_t, int64_t, FLOOR_DIV, int64_t); +BINARYOP_BENCHMARK_DEFINE(MOD_1, double, double, MOD, double); +BINARYOP_BENCHMARK_DEFINE(MOD_2, duration_ms, int64_t, MOD, duration_ms); +BINARYOP_BENCHMARK_DEFINE(PMOD, int32_t, int64_t, PMOD, double); +BINARYOP_BENCHMARK_DEFINE(PYMOD, int32_t, uint8_t, PYMOD, int64_t); +BINARYOP_BENCHMARK_DEFINE(POW, int64_t, int64_t, POW, double); +BINARYOP_BENCHMARK_DEFINE(LOG_BASE, float, double, LOG_BASE, double); +BINARYOP_BENCHMARK_DEFINE(ATAN2, float, double, ATAN2, double); +BINARYOP_BENCHMARK_DEFINE(SHIFT_LEFT, int, int, SHIFT_LEFT, int); +BINARYOP_BENCHMARK_DEFINE(SHIFT_RIGHT, int16_t, int64_t, SHIFT_RIGHT, int); +BINARYOP_BENCHMARK_DEFINE(USHIFT_RIGHT, int64_t, int32_t, SHIFT_RIGHT_UNSIGNED, int64_t); +BINARYOP_BENCHMARK_DEFINE(BITWISE_AND, int64_t, int32_t, BITWISE_AND, int16_t); +BINARYOP_BENCHMARK_DEFINE(BITWISE_OR, int16_t, int32_t, BITWISE_OR, int64_t); +BINARYOP_BENCHMARK_DEFINE(BITWISE_XOR, int16_t, int64_t, BITWISE_XOR, int32_t); +BINARYOP_BENCHMARK_DEFINE(LOGICAL_AND, double, int8_t, LOGICAL_AND, bool); +BINARYOP_BENCHMARK_DEFINE(LOGICAL_OR, int16_t, int64_t, LOGICAL_OR, bool); +BINARYOP_BENCHMARK_DEFINE(EQUAL_1, int32_t, int64_t, EQUAL, bool); +BINARYOP_BENCHMARK_DEFINE(EQUAL_2, duration_ms, duration_ns, EQUAL, bool); +BINARYOP_BENCHMARK_DEFINE(NOT_EQUAL, decimal32, decimal32, NOT_EQUAL, bool); +BINARYOP_BENCHMARK_DEFINE(LESS, timestamp_s, timestamp_s, LESS, bool); +BINARYOP_BENCHMARK_DEFINE(GREATER, timestamp_ms, timestamp_s, GREATER, bool); +BINARYOP_BENCHMARK_DEFINE(NULL_EQUALS, duration_ms, duration_ns, NULL_EQUALS, bool); +BINARYOP_BENCHMARK_DEFINE(NULL_MAX, decimal32, decimal32, NULL_MAX, decimal32); +BINARYOP_BENCHMARK_DEFINE(NULL_MIN, timestamp_D, timestamp_s, NULL_MIN, timestamp_s); diff --git a/cpp/benchmarks/binaryop/jit_binaryop_benchmark.cpp b/cpp/benchmarks/binaryop/jit_binaryop_benchmark.cpp deleted file mode 100644 index 7fda4a50ea1..00000000000 --- a/cpp/benchmarks/binaryop/jit_binaryop_benchmark.cpp +++ /dev/null @@ -1,99 +0,0 @@ -/* - * Copyright (c) 2021, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include -#include - -#include - -#include - -#include - -template -class JIT_BINARYOP : public cudf::benchmark { -}; - -template -void BM_binaryop(benchmark::State& state, cudf::binary_operator binop) -{ - const cudf::size_type column_size{(cudf::size_type)state.range(0)}; - - auto data_it = thrust::make_counting_iterator(0); - cudf::test::fixed_width_column_wrapper input1(data_it, data_it + column_size); - cudf::test::fixed_width_column_wrapper input2(data_it, data_it + column_size); - - auto lhs = cudf::column_view(input1); - auto rhs = cudf::column_view(input2); - auto output_dtype = cudf::data_type(cudf::type_to_id()); - - // Call once for hot cache. - cudf::jit::binary_operation(lhs, rhs, binop, output_dtype); - - for (auto _ : state) { - cuda_event_timer timer(state, true); - cudf::jit::binary_operation(lhs, rhs, binop, output_dtype); - } -} - -// TODO tparam boolean for null. -#define BINARYOP_BENCHMARK_DEFINE(TypeLhs, TypeRhs, binop, TypeOut) \ - BENCHMARK_TEMPLATE_DEFINE_F( \ - JIT_BINARYOP, binop, TypeLhs, TypeRhs, TypeOut, cudf::binary_operator::binop) \ - (::benchmark::State & st) \ - { \ - BM_binaryop(st, cudf::binary_operator::binop); \ - } \ - BENCHMARK_REGISTER_F(JIT_BINARYOP, binop) \ - ->Unit(benchmark::kMicrosecond) \ - ->UseManualTime() \ - ->Arg(10000) /* 10k */ \ - ->Arg(100000) /* 100k */ \ - ->Arg(1000000) /* 1M */ \ - ->Arg(10000000) /* 10M */ \ - ->Arg(100000000); /* 100M */ - -using namespace cudf; -using namespace numeric; - -// clang-format off -BINARYOP_BENCHMARK_DEFINE(float, int64_t, ADD, int32_t); -BINARYOP_BENCHMARK_DEFINE(duration_s, duration_D, SUB, duration_ms); -BINARYOP_BENCHMARK_DEFINE(float, float, MUL, int64_t); -BINARYOP_BENCHMARK_DEFINE(int64_t, int64_t, DIV, int64_t); -BINARYOP_BENCHMARK_DEFINE(int64_t, int64_t, TRUE_DIV, int64_t); -BINARYOP_BENCHMARK_DEFINE(int64_t, int64_t, FLOOR_DIV, int64_t); -BINARYOP_BENCHMARK_DEFINE(double, double, MOD, double); -BINARYOP_BENCHMARK_DEFINE(int32_t, int64_t, PMOD, double); -BINARYOP_BENCHMARK_DEFINE(int32_t, uint8_t, PYMOD, int64_t); -BINARYOP_BENCHMARK_DEFINE(int64_t, int64_t, POW, double); -BINARYOP_BENCHMARK_DEFINE(float, double, LOG_BASE, double); -BINARYOP_BENCHMARK_DEFINE(float, double, ATAN2, double); -BINARYOP_BENCHMARK_DEFINE(int, int, SHIFT_LEFT, int); -BINARYOP_BENCHMARK_DEFINE(int16_t, int64_t, SHIFT_RIGHT, int); -BINARYOP_BENCHMARK_DEFINE(int64_t, int32_t, SHIFT_RIGHT_UNSIGNED, int64_t); -BINARYOP_BENCHMARK_DEFINE(int64_t, int32_t, BITWISE_AND, int16_t); -BINARYOP_BENCHMARK_DEFINE(int16_t, int32_t, BITWISE_OR, int64_t); -BINARYOP_BENCHMARK_DEFINE(int16_t, int64_t, BITWISE_XOR, int32_t); -BINARYOP_BENCHMARK_DEFINE(double, int8_t, LOGICAL_AND, bool); -BINARYOP_BENCHMARK_DEFINE(int16_t, int64_t, LOGICAL_OR, bool); -BINARYOP_BENCHMARK_DEFINE(duration_ms, duration_ns, EQUAL, bool); -BINARYOP_BENCHMARK_DEFINE(decimal32, decimal32, NOT_EQUAL, bool); -BINARYOP_BENCHMARK_DEFINE(timestamp_s, timestamp_s, LESS, bool); -BINARYOP_BENCHMARK_DEFINE(timestamp_ms, timestamp_s, GREATER, bool); -BINARYOP_BENCHMARK_DEFINE(duration_ms, duration_ns, NULL_EQUALS, bool); -BINARYOP_BENCHMARK_DEFINE(decimal32, decimal32, NULL_MAX, decimal32); -BINARYOP_BENCHMARK_DEFINE(timestamp_D, timestamp_s, NULL_MIN, timestamp_s); diff --git a/cpp/cmake/Modules/JitifyPreprocessKernels.cmake b/cpp/cmake/Modules/JitifyPreprocessKernels.cmake index c2ad25760b8..6ab1293ab6f 100644 --- a/cpp/cmake/Modules/JitifyPreprocessKernels.cmake +++ b/cpp/cmake/Modules/JitifyPreprocessKernels.cmake @@ -51,8 +51,8 @@ function(jit_preprocess_files) endfunction() jit_preprocess_files( - SOURCE_DIRECTORY ${CUDF_SOURCE_DIR}/src FILES binaryop/jit/kernel.cu - transform/jit/masked_udf_kernel.cu transform/jit/kernel.cu rolling/jit/kernel.cu + SOURCE_DIRECTORY ${CUDF_SOURCE_DIR}/src FILES binaryop/jit/kernel.cu transform/jit/kernel.cu + rolling/jit/kernel.cu ) add_custom_target( diff --git a/cpp/cmake/thirdparty/get_arrow.cmake b/cpp/cmake/thirdparty/get_arrow.cmake index 5fe37402096..ae1448da502 100644 --- a/cpp/cmake/thirdparty/get_arrow.cmake +++ b/cpp/cmake/thirdparty/get_arrow.cmake @@ -90,7 +90,7 @@ function(find_and_configure_arrow VERSION BUILD_STATIC ENABLE_S3 ENABLE_ORC ENAB rapids_cpm_find( Arrow ${VERSION} - GLOBAL_TARGETS arrow_shared arrow_cuda_shared + GLOBAL_TARGETS arrow_shared parquet_shared arrow_cuda_shared arrow_dataset_shared CPM_ARGS GIT_REPOSITORY https://github.com/apache/arrow.git GIT_TAG apache-arrow-${VERSION} @@ -142,6 +142,15 @@ function(find_and_configure_arrow VERSION BUILD_STATIC ENABLE_S3 ENABLE_ORC ENAB set(ArrowCUDA_DIR "${Arrow_DIR}") find_package(Arrow REQUIRED QUIET) find_package(ArrowCUDA REQUIRED QUIET) + if(ENABLE_PARQUET) + if(NOT Parquet_DIR) + # Set this to enable `find_package(Parquet)` + set(Parquet_DIR "${Arrow_DIR}") + endif() + # Set this to enable `find_package(ArrowDataset)` + set(ArrowDataset_DIR "${Arrow_DIR}") + find_package(ArrowDataset REQUIRED QUIET) + endif() elseif(Arrow_ADDED) # Copy these files so we can avoid adding paths in Arrow_BINARY_DIR to # target_include_directories. That defeats ccache. @@ -182,24 +191,15 @@ function(find_and_configure_arrow VERSION BUILD_STATIC ENABLE_S3 ENABLE_ORC ENAB endif() if(Arrow_ADDED) + set(arrow_code_string [=[ - if (TARGET cudf::arrow_shared AND (NOT TARGET arrow_shared)) - add_library(arrow_shared ALIAS cudf::arrow_shared) - endif() - if (TARGET cudf::arrow_static AND (NOT TARGET arrow_static)) - add_library(arrow_static ALIAS cudf::arrow_static) - endif() - ]=] - ) - set(arrow_cuda_code_string - [=[ - if (TARGET cudf::arrow_cuda_shared AND (NOT TARGET arrow_cuda_shared)) - add_library(arrow_cuda_shared ALIAS cudf::arrow_cuda_shared) - endif() - if (TARGET cudf::arrow_cuda_static AND (NOT TARGET arrow_cuda_static)) - add_library(arrow_cuda_static ALIAS cudf::arrow_cuda_static) - endif() + if (TARGET cudf::arrow_shared AND (NOT TARGET arrow_shared)) + add_library(arrow_shared ALIAS cudf::arrow_shared) + endif() + if (TARGET cudf::arrow_static AND (NOT TARGET arrow_static)) + add_library(arrow_static ALIAS cudf::arrow_static) + endif() ]=] ) @@ -212,6 +212,17 @@ function(find_and_configure_arrow VERSION BUILD_STATIC ENABLE_S3 ENABLE_ORC ENAB FINAL_CODE_BLOCK arrow_code_string ) + set(arrow_cuda_code_string + [=[ + if (TARGET cudf::arrow_cuda_shared AND (NOT TARGET arrow_cuda_shared)) + add_library(arrow_cuda_shared ALIAS cudf::arrow_cuda_shared) + endif() + if (TARGET cudf::arrow_cuda_static AND (NOT TARGET arrow_cuda_static)) + add_library(arrow_cuda_static ALIAS cudf::arrow_cuda_static) + endif() + ]=] + ) + rapids_export( BUILD ArrowCUDA VERSION ${VERSION} @@ -220,6 +231,49 @@ function(find_and_configure_arrow VERSION BUILD_STATIC ENABLE_S3 ENABLE_ORC ENAB NAMESPACE cudf:: FINAL_CODE_BLOCK arrow_cuda_code_string ) + + if(ENABLE_PARQUET) + + set(arrow_dataset_code_string + [=[ + if (TARGET cudf::arrow_dataset_shared AND (NOT TARGET arrow_dataset_shared)) + add_library(arrow_dataset_shared ALIAS cudf::arrow_dataset_shared) + endif() + if (TARGET cudf::arrow_dataset_static AND (NOT TARGET arrow_dataset_static)) + add_library(arrow_dataset_static ALIAS cudf::arrow_dataset_static) + endif() + ]=] + ) + + rapids_export( + BUILD ArrowDataset + VERSION ${VERSION} + EXPORT_SET arrow_dataset_targets + GLOBAL_TARGETS arrow_dataset_shared arrow_dataset_static + NAMESPACE cudf:: + FINAL_CODE_BLOCK arrow_dataset_code_string + ) + + set(parquet_code_string + [=[ + if (TARGET cudf::parquet_shared AND (NOT TARGET parquet_shared)) + add_library(parquet_shared ALIAS cudf::parquet_shared) + endif() + if (TARGET cudf::parquet_static AND (NOT TARGET parquet_static)) + add_library(parquet_static ALIAS cudf::parquet_static) + endif() + ]=] + ) + + rapids_export( + BUILD Parquet + VERSION ${VERSION} + EXPORT_SET parquet_targets + GLOBAL_TARGETS parquet_shared parquet_static + NAMESPACE cudf:: + FINAL_CODE_BLOCK parquet_code_string + ) + endif() endif() # We generate the arrow-config and arrowcuda-config files when we built arrow locally, so always # do `find_dependency` @@ -230,10 +284,18 @@ function(find_and_configure_arrow VERSION BUILD_STATIC ENABLE_S3 ENABLE_ORC ENAB # ArrowCUDA_DIR to be where Arrow was found, since Arrow packages ArrowCUDA.config in a # non-standard location rapids_export_package(BUILD ArrowCUDA cudf-exports) + if(ENABLE_PARQUET) + rapids_export_package(BUILD Parquet cudf-exports) + rapids_export_package(BUILD ArrowDataset cudf-exports) + endif() include("${rapids-cmake-dir}/export/find_package_root.cmake") rapids_export_find_package_root(BUILD Arrow [=[${CMAKE_CURRENT_LIST_DIR}]=] cudf-exports) rapids_export_find_package_root(BUILD ArrowCUDA [=[${CMAKE_CURRENT_LIST_DIR}]=] cudf-exports) + if(ENABLE_PARQUET) + rapids_export_find_package_root(BUILD Parquet [=[${CMAKE_CURRENT_LIST_DIR}]=] cudf-exports) + rapids_export_find_package_root(BUILD ArrowDataset [=[${CMAKE_CURRENT_LIST_DIR}]=] cudf-exports) + endif() set(ARROW_FOUND "${ARROW_FOUND}" diff --git a/cpp/doxygen/Doxyfile b/cpp/doxygen/Doxyfile index 55e5119040e..6a556bb4b34 100644 --- a/cpp/doxygen/Doxyfile +++ b/cpp/doxygen/Doxyfile @@ -2089,7 +2089,7 @@ ENABLE_PREPROCESSING = YES # The default value is: NO. # This tag requires that the tag ENABLE_PREPROCESSING is set to YES. -MACRO_EXPANSION = NO +MACRO_EXPANSION = YES # If the EXPAND_ONLY_PREDEF and MACRO_EXPANSION tags are both set to YES then # the macro expansion is limited to the macros specified with the PREDEFINED and @@ -2097,7 +2097,7 @@ MACRO_EXPANSION = NO # The default value is: NO. # This tag requires that the tag ENABLE_PREPROCESSING is set to YES. -EXPAND_ONLY_PREDEF = NO +EXPAND_ONLY_PREDEF = YES # If the SEARCH_INCLUDES tag is set to YES, the include files in the # INCLUDE_PATH will be searched if a #include is found. @@ -2129,7 +2129,8 @@ INCLUDE_FILE_PATTERNS = # recursively expanded use the := operator instead of the = operator. # This tag requires that the tag ENABLE_PREPROCESSING is set to YES. -PREDEFINED = +PREDEFINED = __device__= \ + __host__= # If the MACRO_EXPANSION and EXPAND_ONLY_PREDEF tags are set to YES then this # tag can be used to specify a list of macro names that should be expanded. The diff --git a/cpp/include/cudf/ast/detail/expression_parser.hpp b/cpp/include/cudf/ast/detail/expression_parser.hpp index dc800bde527..4f73cb1ef6e 100644 --- a/cpp/include/cudf/ast/detail/expression_parser.hpp +++ b/cpp/include/cudf/ast/detail/expression_parser.hpp @@ -37,9 +37,9 @@ namespace detail { * linearization process but cannot be explicitly created by the user. */ enum class device_data_reference_type { - COLUMN, // A value in a table column - LITERAL, // A literal value - INTERMEDIATE // An internal temporary value + COLUMN, ///< A value in a table column + LITERAL, ///< A literal value + INTERMEDIATE ///< An internal temporary value }; /** diff --git a/cpp/include/cudf/ast/expressions.hpp b/cpp/include/cudf/ast/expressions.hpp index 7ae40a7d65f..20aaa42fb68 100644 --- a/cpp/include/cudf/ast/expressions.hpp +++ b/cpp/include/cudf/ast/expressions.hpp @@ -122,9 +122,9 @@ enum class ast_operator { * This determines which table to use in cases with two tables (e.g. joins). */ enum class table_reference { - LEFT, // Column index in the left table - RIGHT, // Column index in the right table - OUTPUT // Column index in the output table + LEFT, ///< Column index in the left table + RIGHT, ///< Column index in the right table + OUTPUT ///< Column index in the output table }; /** diff --git a/cpp/include/cudf/binaryop.hpp b/cpp/include/cudf/binaryop.hpp index fe548a36cf0..a514010c1f0 100644 --- a/cpp/include/cudf/binaryop.hpp +++ b/cpp/include/cudf/binaryop.hpp @@ -210,83 +210,5 @@ cudf::data_type binary_operation_fixed_point_output_type(binary_operator op, cudf::data_type const& lhs, cudf::data_type const& rhs); -namespace jit { -/** - * @brief Performs a binary operation between a scalar and a column. - * - * The output contains the result of `op(lhs, rhs[i])` for all `0 <= i < rhs.size()` - * The scalar is the left operand and the column elements are the right operand. - * This distinction is significant in case of non-commutative binary operations - * - * Regardless of the operator, the validity of the output value is the logical - * AND of the validity of the two operands - * - * @param lhs The left operand scalar - * @param rhs The right operand column - * @param op The binary operator - * @param output_type The desired data type of the output column - * @param mr Device memory resource used to allocate the returned column's device memory - * @return Output column of `output_type` type containing the result of - * the binary operation - * @throw cudf::logic_error if @p output_type dtype isn't fixed-width - */ -std::unique_ptr binary_operation( - scalar const& lhs, - column_view const& rhs, - binary_operator op, - data_type output_type, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); - -/** - * @brief Performs a binary operation between a column and a scalar. - * - * The output contains the result of `op(lhs[i], rhs)` for all `0 <= i < lhs.size()` - * The column elements are the left operand and the scalar is the right operand. - * This distinction is significant in case of non-commutative binary operations - * - * Regardless of the operator, the validity of the output value is the logical - * AND of the validity of the two operands - * - * @param lhs The left operand column - * @param rhs The right operand scalar - * @param op The binary operator - * @param output_type The desired data type of the output column - * @param mr Device memory resource used to allocate the returned column's device memory - * @return Output column of `output_type` type containing the result of - * the binary operation - * @throw cudf::logic_error if @p output_type dtype isn't fixed-width - */ -std::unique_ptr binary_operation( - column_view const& lhs, - scalar const& rhs, - binary_operator op, - data_type output_type, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); - -/** - * @brief Performs a binary operation between two columns. - * - * The output contains the result of `op(lhs[i], rhs[i])` for all `0 <= i < lhs.size()` - * - * Regardless of the operator, the validity of the output value is the logical - * AND of the validity of the two operands - * - * @param lhs The left operand column - * @param rhs The right operand column - * @param op The binary operator - * @param output_type The desired data type of the output column - * @param mr Device memory resource used to allocate the returned column's device memory - * @return Output column of `output_type` type containing the result of - * the binary operation - * @throw cudf::logic_error if @p lhs and @p rhs are different sizes - * @throw cudf::logic_error if @p output_type dtype isn't fixed-width - */ -std::unique_ptr binary_operation( - column_view const& lhs, - column_view const& rhs, - binary_operator op, - data_type output_type, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); -} // namespace jit /** @} */ // end of group } // namespace cudf diff --git a/cpp/include/cudf/column/column_device_view.cuh b/cpp/include/cudf/column/column_device_view.cuh index 0b7ca2096a3..6ecb0796283 100644 --- a/cpp/include/cudf/column/column_device_view.cuh +++ b/cpp/include/cudf/column/column_device_view.cuh @@ -421,39 +421,22 @@ class alignas(16) column_device_view : public detail::column_device_view_base { } /** - * @brief Returns a `numeric::decimal32` element at the specified index for a `fixed_point` + * @brief Returns a `numeric::fixed_point` element at the specified index for a `fixed_point` * column. * * If the element at the specified index is NULL, i.e., `is_null(element_index) == true`, * then any attempt to use the result will lead to undefined behavior. * * @param element_index Position of the desired element - * @return numeric::decimal32 representing the element at this index + * @return numeric::fixed_point representing the element at this index */ - template )> + template ())> __device__ T element(size_type element_index) const noexcept { using namespace numeric; + using rep = typename T::rep; auto const scale = scale_type{_type.scale()}; - return decimal32{scaled_integer{data()[element_index], scale}}; - } - - /** - * @brief Returns a `numeric::decimal64` element at the specified index for a `fixed_point` - * column. - * - * If the element at the specified index is NULL, i.e., `is_null(element_index) == true`, - * then any attempt to use the result will lead to undefined behavior. - * - * @param element_index Position of the desired element - * @return numeric::decimal64 representing the element at this index - */ - template )> - __device__ T element(size_type element_index) const noexcept - { - using namespace numeric; - auto const scale = scale_type{_type.scale()}; - return decimal64{scaled_integer{data()[element_index], scale}}; + return T{scaled_integer{data()[element_index], scale}}; } /** diff --git a/cpp/include/cudf/copying.hpp b/cpp/include/cudf/copying.hpp index ba5043fb261..81dddbd284a 100644 --- a/cpp/include/cudf/copying.hpp +++ b/cpp/include/cudf/copying.hpp @@ -41,8 +41,8 @@ namespace cudf { */ enum class out_of_bounds_policy : bool { - NULLIFY, /// Output values corresponding to out-of-bounds indices are null - DONT_CHECK /// No bounds checking is performed, better performance + NULLIFY, ///< Output values corresponding to out-of-bounds indices are null + DONT_CHECK ///< No bounds checking is performed, better performance }; /** @@ -901,8 +901,8 @@ std::unique_ptr get_element( * @brief Indicates whether a row can be sampled more than once. */ enum class sample_with_replacement : bool { - FALSE, // A row can be sampled only once - TRUE // A row can be sampled more than once + FALSE, ///< A row can be sampled only once + TRUE ///< A row can be sampled more than once }; /** diff --git a/cpp/include/cudf/datetime.hpp b/cpp/include/cudf/datetime.hpp index d67984daa7c..71e5968bf07 100644 --- a/cpp/include/cudf/datetime.hpp +++ b/cpp/include/cudf/datetime.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019, NVIDIA CORPORATION. + * Copyright (c) 2019-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -378,5 +378,96 @@ std::unique_ptr ceil_nanosecond( column_view const& column, rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); +/** + * @brief Round down to the nearest day + * + * @param column cudf::column_view of the input datetime values + * @param mr Device memory resource used to allocate device memory of the returned column. + * + * @throw cudf::logic_error if input column datatype is not TIMESTAMP + * @return cudf::column of the same datetime resolution as the input column + */ +std::unique_ptr floor_day( + cudf::column_view const& column, + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + +/** + * @brief Round down to the nearest hour + * + * @param column cudf::column_view of the input datetime values + * @param mr Device memory resource used to allocate device memory of the returned column. + * + * @throw cudf::logic_error if input column datatype is not TIMESTAMP + * @return cudf::column of the same datetime resolution as the input column + */ +std::unique_ptr floor_hour( + cudf::column_view const& column, + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + +/** + * @brief Round down to the nearest minute + * + * @param column cudf::column_view of the input datetime values + * @param mr Device memory resource used to allocate device memory of the returned column. + * + * @throw cudf::logic_error if input column datatype is not TIMESTAMP + * @return cudf::column of the same datetime resolution as the input column + */ +std::unique_ptr floor_minute( + cudf::column_view const& column, + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + +/** + * @brief Round down to the nearest second + * + * @param column cudf::column_view of the input datetime values + * @param mr Device memory resource used to allocate device memory of the returned column. + * + * @throw cudf::logic_error if input column datatype is not TIMESTAMP + * @return cudf::column of the same datetime resolution as the input column + */ +std::unique_ptr floor_second( + cudf::column_view const& column, + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + +/** + * @brief Round down to the nearest millisecond + * + * @param column cudf::column_view of the input datetime values + * @param mr Device memory resource used to allocate device memory of the returned column. + * + * @throw cudf::logic_error if input column datatype is not TIMESTAMP + * @return cudf::column of the same datetime resolution as the input column + */ +std::unique_ptr floor_millisecond( + column_view const& column, + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + +/** + * @brief Round down to the nearest microsecond + * + * @param column cudf::column_view of the input datetime values + * @param mr Device memory resource used to allocate device memory of the returned column. + * + * @throw cudf::logic_error if input column datatype is not TIMESTAMP + * @return cudf::column of the same datetime resolution as the input column + */ +std::unique_ptr floor_microsecond( + column_view const& column, + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + +/** + * @brief Round down to the nearest nanosecond + * + * @param column cudf::column_view of the input datetime values + * @param mr Device memory resource used to allocate device memory of the returned column. + * + * @throw cudf::logic_error if input column datatype is not TIMESTAMP + * @return cudf::column of the same datetime resolution as the input column + */ +std::unique_ptr floor_nanosecond( + column_view const& column, + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + } // namespace datetime } // namespace cudf diff --git a/cpp/include/cudf/detail/aggregation/aggregation.cuh b/cpp/include/cudf/detail/aggregation/aggregation.cuh index 53c1f47c201..47aa7d18489 100644 --- a/cpp/include/cudf/detail/aggregation/aggregation.cuh +++ b/cpp/include/cudf/detail/aggregation/aggregation.cuh @@ -23,6 +23,7 @@ #include #include #include +#include #include #include @@ -131,7 +132,8 @@ struct update_target_element< aggregation::MIN, target_has_nulls, source_has_nulls, - std::enable_if_t() && !is_fixed_point()>> { + std::enable_if_t() && cudf::has_atomic_support() && + !is_fixed_point()>> { __device__ void operator()(mutable_column_device_view target, size_type target_index, column_device_view source, @@ -148,11 +150,13 @@ struct update_target_element< }; template -struct update_target_element()>> { +struct update_target_element< + Source, + aggregation::MIN, + target_has_nulls, + source_has_nulls, + std::enable_if_t() && + cudf::has_atomic_support>()>> { __device__ void operator()(mutable_column_device_view target, size_type target_index, column_device_view source, @@ -177,7 +181,8 @@ struct update_target_element< aggregation::MAX, target_has_nulls, source_has_nulls, - std::enable_if_t() && !is_fixed_point()>> { + std::enable_if_t() && cudf::has_atomic_support() && + !is_fixed_point()>> { __device__ void operator()(mutable_column_device_view target, size_type target_index, column_device_view source, @@ -194,11 +199,13 @@ struct update_target_element< }; template -struct update_target_element()>> { +struct update_target_element< + Source, + aggregation::MAX, + target_has_nulls, + source_has_nulls, + std::enable_if_t() && + cudf::has_atomic_support>()>> { __device__ void operator()(mutable_column_device_view target, size_type target_index, column_device_view source, @@ -223,7 +230,8 @@ struct update_target_element< aggregation::SUM, target_has_nulls, source_has_nulls, - std::enable_if_t() && !is_fixed_point()>> { + std::enable_if_t() && cudf::has_atomic_support() && + !is_fixed_point()>> { __device__ void operator()(mutable_column_device_view target, size_type target_index, column_device_view source, @@ -240,11 +248,13 @@ struct update_target_element< }; template -struct update_target_element()>> { +struct update_target_element< + Source, + aggregation::SUM, + target_has_nulls, + source_has_nulls, + std::enable_if_t() && + cudf::has_atomic_support>()>> { __device__ void operator()(mutable_column_device_view target, size_type target_index, column_device_view source, @@ -267,7 +277,8 @@ struct update_target_element @@ -581,9 +592,7 @@ struct identity_initializer { template static constexpr bool is_supported() { - // Note: !is_fixed_point() means that aggregations for fixed_point should happen on the - // underlying type (see device_storage_type_t), not that fixed_point is not supported - return cudf::is_fixed_width() && !is_fixed_point() and + return cudf::is_fixed_width() and (k == aggregation::SUM or k == aggregation::MIN or k == aggregation::MAX or k == aggregation::COUNT_VALID or k == aggregation::COUNT_ALL or k == aggregation::ARGMAX or k == aggregation::ARGMIN or @@ -596,7 +605,8 @@ struct identity_initializer { std::enable_if_t, void>::value, T> identity_from_operator() { - return corresponding_operator_t::template identity(); + using DeviceType = device_storage_type_t; + return corresponding_operator_t::template identity(); } template @@ -613,9 +623,11 @@ struct identity_initializer { if constexpr (cudf::is_timestamp()) return k == aggregation::ARGMAX ? T{typename T::duration(ARGMAX_SENTINEL)} : T{typename T::duration(ARGMIN_SENTINEL)}; - else - return k == aggregation::ARGMAX ? static_cast(ARGMAX_SENTINEL) - : static_cast(ARGMIN_SENTINEL); + else { + using DeviceType = device_storage_type_t; + return k == aggregation::ARGMAX ? static_cast(ARGMAX_SENTINEL) + : static_cast(ARGMIN_SENTINEL); + } } return identity_from_operator(); } @@ -625,7 +637,11 @@ struct identity_initializer { std::enable_if_t(), void> operator()(mutable_column_view const& col, rmm::cuda_stream_view stream) { - thrust::fill(rmm::exec_policy(stream), col.begin(), col.end(), get_identity()); + using DeviceType = device_storage_type_t; + thrust::fill(rmm::exec_policy(stream), + col.begin(), + col.end(), + get_identity()); } template diff --git a/cpp/include/cudf/detail/aggregation/aggregation.hpp b/cpp/include/cudf/detail/aggregation/aggregation.hpp index 69bde7f57fd..c2bd7a4893c 100644 --- a/cpp/include/cudf/detail/aggregation/aggregation.hpp +++ b/cpp/include/cudf/detail/aggregation/aggregation.hpp @@ -1085,8 +1085,8 @@ template struct target_type_impl< Source, k, - std::enable_if_t() && not is_chrono() && - not is_fixed_point() && (k == aggregation::MEAN)>> { + std::enable_if_t() and not is_chrono() and + not is_fixed_point() and (k == aggregation::MEAN)>> { using type = double; }; @@ -1113,12 +1113,13 @@ struct target_type_impl< using type = int64_t; }; -// Summing fixed_point numbers, always use the decimal64 accumulator +// Summing fixed_point numbers template -struct target_type_impl() && (k == aggregation::SUM)>> { - using type = numeric::decimal64; +struct target_type_impl< + Source, + k, + std::enable_if_t() && (k == aggregation::SUM)>> { + using type = Source; }; // Summing/Multiplying float/doubles, use same type accumulator diff --git a/cpp/include/cudf/detail/binaryop.hpp b/cpp/include/cudf/detail/binaryop.hpp index ce7731ef7d2..9fa31d0e01d 100644 --- a/cpp/include/cudf/detail/binaryop.hpp +++ b/cpp/include/cudf/detail/binaryop.hpp @@ -22,52 +22,9 @@ namespace cudf { //! Inner interfaces and implementations namespace detail { -namespace jit { -/** - * @copydoc cudf::jit::binary_operation(scalar const&, column_view const&, binary_operator, - * data_type, rmm::mr::device_memory_resource *) - * - * @param stream CUDA stream used for device memory operations and kernel launches. - */ -std::unique_ptr binary_operation( - scalar const& lhs, - column_view const& rhs, - binary_operator op, - data_type output_type, - rmm::cuda_stream_view stream = rmm::cuda_stream_default, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** - * @copydoc cudf::jit::binary_operation(column_view const&, scalar const&, binary_operator, - * data_type, rmm::mr::device_memory_resource *) - * - * @param stream CUDA stream used for device memory operations and kernel launches. - */ -std::unique_ptr binary_operation( - column_view const& lhs, - scalar const& rhs, - binary_operator op, - data_type output_type, - rmm::cuda_stream_view stream = rmm::cuda_stream_default, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); - -/** - * @copydoc cudf::jit::binary_operation(column_view const&, column_view const&, - * binary_operator, data_type, rmm::mr::device_memory_resource *) - * - * @param stream CUDA stream used for device memory operations and kernel launches. - */ -std::unique_ptr binary_operation( - column_view const& lhs, - column_view const& rhs, - binary_operator op, - data_type output_type, - rmm::cuda_stream_view stream = rmm::cuda_stream_default, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); -} // namespace jit - -/** - * @copydoc cudf::jit::binary_operation(column_view const&, column_view const&, + * @copydoc cudf::binary_operation(column_view const&, column_view const&, * std::string const&, data_type, rmm::mr::device_memory_resource *) * * @param stream CUDA stream used for device memory operations and kernel launches. diff --git a/cpp/include/cudf/detail/copy_if.cuh b/cpp/include/cudf/detail/copy_if.cuh index bb5cfa5c6e0..fb4c636fcb0 100644 --- a/cpp/include/cudf/detail/copy_if.cuh +++ b/cpp/include/cudf/detail/copy_if.cuh @@ -217,12 +217,7 @@ struct DeviceType()>> { }; template -struct DeviceType>> { - using type = typename cudf::device_storage_type_t; -}; - -template -struct DeviceType>> { +struct DeviceType()>> { using type = typename cudf::device_storage_type_t; }; diff --git a/cpp/include/cudf/detail/gather.cuh b/cpp/include/cudf/detail/gather.cuh index 594191d275d..08dbdb6f1a0 100644 --- a/cpp/include/cudf/detail/gather.cuh +++ b/cpp/include/cudf/detail/gather.cuh @@ -66,9 +66,9 @@ struct bounds_checker { * @brief The operation to perform when a gather map index is out of bounds */ enum class gather_bitmask_op { - DONT_CHECK, // Don't check for out of bounds indices - PASSTHROUGH, // Preserve mask at rows with out of bounds indices - NULLIFY, // Nullify rows with out of bounds indices + DONT_CHECK, ///< Don't check for out of bounds indices + PASSTHROUGH, ///< Preserve mask at rows with out of bounds indices + NULLIFY, ///< Nullify rows with out of bounds indices }; template diff --git a/cpp/include/cudf/detail/iterator.cuh b/cpp/include/cudf/detail/iterator.cuh index deb161fd9c2..3e789299716 100644 --- a/cpp/include/cudf/detail/iterator.cuh +++ b/cpp/include/cudf/detail/iterator.cuh @@ -102,9 +102,8 @@ struct null_replaced_value_accessor { bool has_nulls = true) : col{col}, null_replacement{null_val}, has_nulls{has_nulls} { - CUDF_EXPECTS(type_to_id() == device_storage_type_id(col.type().id()), + CUDF_EXPECTS(type_id_matches_device_storage_type(col.type().id()), "the data type mismatch"); - // verify validity bitmask is non-null, otherwise, is_null_nocheck() will crash if (has_nulls) CUDF_EXPECTS(col.nullable(), "column with nulls must have a validity bitmask"); } diff --git a/cpp/include/cudf/detail/scan.hpp b/cpp/include/cudf/detail/scan.hpp index 113c15f19a1..8e3db1c7b10 100644 --- a/cpp/include/cudf/detail/scan.hpp +++ b/cpp/include/cudf/detail/scan.hpp @@ -26,22 +26,25 @@ namespace detail { /** * @brief Computes the exclusive scan of a column. * - * The null values are skipped for the operation, and if an input element - * at `i` is null, then the output element at `i` will also be null. + * The null values are skipped for the operation, and if an input element at `i` is null, then the + * output element at `i` will also be null. * - * The identity value for the column type as per the aggregation type - * is used for the value of the first element in the output column. + * The identity value for the column type as per the aggregation type is used for the value of the + * first element in the output column. * - * @throws cudf::logic_error if column data_type is not an arithmetic type. + * Struct columns are allowed with aggregation types Min and Max. * - * @param input The input column view for the scan - * @param agg unique_ptr to aggregation operator applied by the scan - * @param null_handling Exclude null values when computing the result if - * null_policy::EXCLUDE. Include nulls if null_policy::INCLUDE. - * Any operation with a null results in a null. + * @throws cudf::logic_error if column data_type is not an arithmetic type or struct type but the + * `agg` is not Min or Max. + * + * @param input The input column view for the scan. + * @param agg unique_ptr to aggregation operator applied by the scan. + * @param null_handling Exclude null values when computing the result if null_policy::EXCLUDE. + * Include nulls if null_policy::INCLUDE. Any operation with a null results in + * a null. * @param stream CUDA stream used for device memory operations and kernel launches. - * @param mr Device memory resource used to allocate the returned scalar's device memory - * @returns Column with scan results + * @param mr Device memory resource used to allocate the returned scalar's device memory. + * @returns Column with scan results. */ std::unique_ptr scan_exclusive(column_view const& input, std::unique_ptr const& agg, @@ -52,22 +55,22 @@ std::unique_ptr scan_exclusive(column_view const& input, /** * @brief Computes the inclusive scan of a column. * - * The null values are skipped for the operation, and if an input element - * at `i` is null, then the output element at `i` will also be null. + * The null values are skipped for the operation, and if an input element at `i` is null, then the + * output element at `i` will also be null. * - * String columns are allowed with aggregation types Min and Max. + * String and struct columns are allowed with aggregation types Min and Max. * - * @throws cudf::logic_error if column data_type is not an arithmetic type - * or string type but the `agg` is not Min or Max + * @throws cudf::logic_error if column data_type is not an arithmetic type or string/struct types + * but the `agg` is not Min or Max. * - * @param input The input column view for the scan - * @param agg unique_ptr to aggregation operator applied by the scan - * @param null_handling Exclude null values when computing the result if - * null_policy::EXCLUDE. Include nulls if null_policy::INCLUDE. - * Any operation with a null results in a null. + * @param input The input column view for the scan. + * @param agg unique_ptr to aggregation operator applied by the scan. + * @param null_handling Exclude null values when computing the result if null_policy::EXCLUDE. + * Include nulls if null_policy::INCLUDE. Any operation with a null results in + * a null. * @param stream CUDA stream used for device memory operations and kernel launches. - * @param mr Device memory resource used to allocate the returned scalar's device memory - * @returns Column with scan results + * @param mr Device memory resource used to allocate the returned scalar's device memory. + * @returns Column with scan results. */ std::unique_ptr scan_inclusive(column_view const& input, std::unique_ptr const& agg, @@ -76,24 +79,24 @@ std::unique_ptr scan_inclusive(column_view const& input, rmm::mr::device_memory_resource* mr); /** - * @brief Generate row ranks for a column + * @brief Generate row ranks for a column. * - * @param order_by Input column to generate ranks for - * @param stream CUDA stream used for device memory operations and kernel launches - * @param mr Device memory resource used to allocate the returned column's device memory - * @return rank values + * @param order_by Input column to generate ranks for. + * @param stream CUDA stream used for device memory operations and kernel launches. + * @param mr Device memory resource used to allocate the returned column's device memory. + * @return rank values. */ std::unique_ptr inclusive_rank_scan(column_view const& order_by, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr); /** - * @brief Generate row dense ranks for a column + * @brief Generate row dense ranks for a column. * - * @param order_by Input column to generate ranks for - * @param stream CUDA stream used for device memory operations and kernel launches - * @param mr Device memory resource used to allocate the returned column's device memory - * @return rank values + * @param order_by Input column to generate ranks for. + * @param stream CUDA stream used for device memory operations and kernel launches. + * @param mr Device memory resource used to allocate the returned column's device memory. + * @return rank values. */ std::unique_ptr inclusive_dense_rank_scan(column_view const& order_by, rmm::cuda_stream_view stream, diff --git a/cpp/include/cudf/detail/structs/utilities.hpp b/cpp/include/cudf/detail/structs/utilities.hpp index aece79107c6..6f32e3190bf 100644 --- a/cpp/include/cudf/detail/structs/utilities.hpp +++ b/cpp/include/cudf/detail/structs/utilities.hpp @@ -28,8 +28,8 @@ namespace structs { namespace detail { enum class column_nullability { - MATCH_INCOMING, // generate a null column if the incoming column has nulls - FORCE // always generate a null column + MATCH_INCOMING, ///< generate a null column if the incoming column has nulls + FORCE ///< always generate a null column }; /** diff --git a/cpp/include/cudf/detail/utilities/device_operators.cuh b/cpp/include/cudf/detail/utilities/device_operators.cuh index 7524593e5ea..95605dc8a71 100644 --- a/cpp/include/cudf/detail/utilities/device_operators.cuh +++ b/cpp/include/cudf/detail/utilities/device_operators.cuh @@ -22,6 +22,7 @@ */ #include +#include #include #include #include @@ -119,7 +120,7 @@ struct DeviceMin { CUDA_HOST_DEVICE_CALLABLE auto operator()(const T& lhs, const T& rhs) -> decltype(cudf::detail::min(lhs, rhs)) { - return cudf::detail::min(lhs, rhs); + return numeric::detail::min(lhs, rhs); } template < @@ -128,14 +129,15 @@ struct DeviceMin { !cudf::is_fixed_point()>* = nullptr> static constexpr T identity() { - return std::numeric_limits::max(); + if constexpr (cudf::is_chrono()) return T::max(); + return cuda::std::numeric_limits::max(); } template ()>* = nullptr> static constexpr T identity() { CUDF_FAIL("fixed_point does not yet support DeviceMin identity"); - return std::numeric_limits::max(); + return cuda::std::numeric_limits::max(); } // @brief identity specialized for string_view @@ -160,7 +162,7 @@ struct DeviceMax { CUDA_HOST_DEVICE_CALLABLE auto operator()(const T& lhs, const T& rhs) -> decltype(cudf::detail::max(lhs, rhs)) { - return cudf::detail::max(lhs, rhs); + return numeric::detail::max(lhs, rhs); } template < @@ -169,14 +171,15 @@ struct DeviceMax { !cudf::is_fixed_point()>* = nullptr> static constexpr T identity() { - return std::numeric_limits::lowest(); + if constexpr (cudf::is_chrono()) return T::min(); + return cuda::std::numeric_limits::lowest(); } template ()>* = nullptr> static constexpr T identity() { CUDF_FAIL("fixed_point does not yet support DeviceMax identity"); - return std::numeric_limits::lowest(); + return cuda::std::numeric_limits::lowest(); } template >* = nullptr> diff --git a/cpp/include/cudf/detail/utilities/hash_functions.cuh b/cpp/include/cudf/detail/utilities/hash_functions.cuh index ebb21492be9..f3390d9387b 100644 --- a/cpp/include/cudf/detail/utilities/hash_functions.cuh +++ b/cpp/include/cudf/detail/utilities/hash_functions.cuh @@ -274,6 +274,13 @@ MurmurHash3_32::operator()(numeric::decimal64 const& key) co return this->compute(key.value()); } +template <> +hash_value_type CUDA_DEVICE_CALLABLE +MurmurHash3_32::operator()(numeric::decimal128 const& key) const +{ + return this->compute(key.value()); +} + template <> hash_value_type CUDA_DEVICE_CALLABLE MurmurHash3_32::operator()(cudf::list_view const& key) const @@ -419,6 +426,13 @@ SparkMurmurHash3_32::operator()(numeric::decimal64 const& ke return this->compute(key.value()); } +template <> +hash_value_type CUDA_DEVICE_CALLABLE +SparkMurmurHash3_32::operator()(numeric::decimal128 const& key) const +{ + return this->compute<__int128_t>(key.value()); +} + template <> hash_value_type CUDA_DEVICE_CALLABLE SparkMurmurHash3_32::operator()(cudf::list_view const& key) const diff --git a/cpp/include/cudf/detail/utilities/integer_utils.hpp b/cpp/include/cudf/detail/utilities/integer_utils.hpp index dc919433da7..ddedab3944c 100644 --- a/cpp/include/cudf/detail/utilities/integer_utils.hpp +++ b/cpp/include/cudf/detail/utilities/integer_utils.hpp @@ -22,6 +22,8 @@ * @file Utility code involving integer arithmetic */ +#include + #include #include #include @@ -151,17 +153,11 @@ constexpr inline bool is_a_power_of_two(I val) noexcept * @return Absolute value if value type is signed. */ template -std::enable_if_t::value, T> constexpr inline absolute_value(T value) -{ - return std::abs(value); -} -// Unsigned type just returns itself. -template -std::enable_if_t::value, T> constexpr inline absolute_value(T value) +constexpr inline auto absolute_value(T value) -> T { + if constexpr (cuda::std::is_signed()) return numeric::detail::abs(value); return value; } } // namespace util - } // namespace cudf diff --git a/cpp/include/cudf/fixed_point/fixed_point.hpp b/cpp/include/cudf/fixed_point/fixed_point.hpp index af2715d1290..e8223b53997 100644 --- a/cpp/include/cudf/fixed_point/fixed_point.hpp +++ b/cpp/include/cudf/fixed_point/fixed_point.hpp @@ -17,6 +17,7 @@ #pragma once #include +#include #include // Note: The versions are used in order for Jitify to work with our fixed_point type. @@ -48,13 +49,15 @@ enum class Radix : int32_t { BASE_2 = 2, BASE_10 = 10 }; template constexpr inline auto is_supported_representation_type() { - return cuda::std::is_same_v || cuda::std::is_same_v; + return cuda::std::is_same_v || // + cuda::std::is_same_v || // + cuda::std::is_same_v; } template constexpr inline auto is_supported_construction_value_type() { - return cuda::std::is_integral::value || cuda::std::is_floating_point::value; + return cuda::std::is_integral() || cuda::std::is_floating_point::value; } // Helper functions for `fixed_point` type @@ -551,17 +554,18 @@ class fixed_point { explicit operator std::string() const { if (_scale < 0) { - auto const av = std::abs(_value); - int64_t const n = std::pow(10, -_scale); - int64_t const f = av % n; + auto const av = detail::abs(_value); + Rep const n = detail::exp10(-_scale); + Rep const f = av % n; auto const num_zeros = - std::max(0, (-_scale - static_cast(std::to_string(f).size()))); + std::max(0, (-_scale - static_cast(detail::to_string(f).size()))); auto const zeros = std::string(num_zeros, '0'); auto const sign = _value < 0 ? std::string("-") : std::string(); - return sign + std::to_string(av / n) + std::string(".") + zeros + std::to_string(av % n); + return sign + detail::to_string(av / n) + std::string(".") + zeros + + detail::to_string(av % n); } else { auto const zeros = std::string(_scale, '0'); - return std::to_string(_value) + zeros; + return detail::to_string(_value) + zeros; } } }; @@ -752,8 +756,9 @@ CUDA_HOST_DEVICE_CALLABLE bool operator>(fixed_point const& lhs, return lhs.rescaled(scale)._value > rhs.rescaled(scale)._value; } -using decimal32 = fixed_point; -using decimal64 = fixed_point; +using decimal32 = fixed_point; +using decimal64 = fixed_point; +using decimal128 = fixed_point<__int128_t, Radix::BASE_10>; /** @} */ // end of group } // namespace numeric diff --git a/cpp/include/cudf/fixed_point/temporary.hpp b/cpp/include/cudf/fixed_point/temporary.hpp new file mode 100644 index 00000000000..2b50e273517 --- /dev/null +++ b/cpp/include/cudf/fixed_point/temporary.hpp @@ -0,0 +1,86 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include + +// Note: The versions are used in order for Jitify to work with our fixed_point type. +// Jitify is needed for several algorithms (binaryop, rolling, etc) +#include +#include + +#include +#include + +namespace numeric { +namespace detail { + +template +auto to_string(T value) -> std::string +{ + if constexpr (cuda::std::is_same_v) { + auto s = std::string{}; + auto const sign = value < 0; + if (sign) { + value += 1; // avoid overflowing if value == _int128_t lowest + value *= -1; + if (value == cuda::std::numeric_limits<__int128_t>::max()) + return "-170141183460469231731687303715884105728"; + value += 1; // can add back the one, no need to avoid overflow anymore + } + while (value) { + s.push_back("0123456789"[value % 10]); + value /= 10; + } + if (sign) s.push_back('-'); + std::reverse(s.begin(), s.end()); + return s; + } else { + return std::to_string(value); + } + return std::string{}; // won't ever hit here, need to supress warning though +} + +template +constexpr auto abs(T value) +{ + return value >= 0 ? value : -value; +} + +template +CUDA_HOST_DEVICE_CALLABLE auto min(T lhs, T rhs) +{ + return lhs < rhs ? lhs : rhs; +} + +template +CUDA_HOST_DEVICE_CALLABLE auto max(T lhs, T rhs) +{ + return lhs > rhs ? lhs : rhs; +} + +template +constexpr auto exp10(int32_t exponent) +{ + BaseType value = 1; + while (exponent > 0) + value *= 10, --exponent; + return value; +} + +} // namespace detail +} // namespace numeric diff --git a/cpp/include/cudf/io/detail/csv.hpp b/cpp/include/cudf/io/detail/csv.hpp index aac44bed50e..c190340f6c1 100644 --- a/cpp/include/cudf/io/detail/csv.hpp +++ b/cpp/include/cudf/io/detail/csv.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -40,45 +40,23 @@ table_with_metadata read_csv(std::unique_ptr&& source, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr); -class writer { - public: - class impl; - - private: - std::unique_ptr _impl; - - public: - /** - * @brief Constructor for output to a file. - * - * @param sinkp The data sink to write the data to - * @param options Settings for controlling writing behavior - * @param stream CUDA stream used for device memory operations and kernel launches - * @param mr Device memory resource to use for device memory allocation - */ - writer(std::unique_ptr sinkp, - csv_writer_options const& options, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr); // cannot provide definition here (because - // _impl is incomplete hence unique_ptr has - // not enough sizeof() info) - - /** - * @brief Destructor explicitly-declared to avoid inlined in header - */ - ~writer(); +/** + * @brief Write an entire dataset to CSV format. + * + * @param sink Output sink + * @param table The set of columns + * @param metadata The metadata associated with the table + * @param options Settings for controlling behavior + * @param stream CUDA stream used for device memory operations and kernel launches. + * @param mr Device memory resource to use for device memory allocation + */ +void write_csv(data_sink* sink, + table_view const& table, + const table_metadata* metadata, + csv_writer_options const& options, + rmm::cuda_stream_view stream = rmm::cuda_stream_default, + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); - /** - * @brief Writes the entire dataset. - * - * @param table Set of columns to output - * @param metadata Table metadata and column names - * @param stream CUDA stream used for device memory operations and kernel launches. - */ - void write(table_view const& table, - const table_metadata* metadata = nullptr, - rmm::cuda_stream_view stream = rmm::cuda_stream_default); -}; } // namespace csv } // namespace detail } // namespace io diff --git a/cpp/include/cudf/io/detail/parquet.hpp b/cpp/include/cudf/io/detail/parquet.hpp index 14f27ef8eef..98922ad10a4 100644 --- a/cpp/include/cudf/io/detail/parquet.hpp +++ b/cpp/include/cudf/io/detail/parquet.hpp @@ -148,7 +148,7 @@ class writer { * @param[in] metadata_list List of input file metadata * @return A parquet-compatible blob that contains the data for all rowgroups in the list */ - static std::unique_ptr> merge_rowgroup_metadata( + static std::unique_ptr> merge_row_group_metadata( const std::vector>>& metadata_list); }; diff --git a/cpp/include/cudf/io/orc.hpp b/cpp/include/cudf/io/orc.hpp index 2a95b85465b..3bc2e6c9ef2 100644 --- a/cpp/include/cudf/io/orc.hpp +++ b/cpp/include/cudf/io/orc.hpp @@ -70,6 +70,9 @@ class orc_reader_options { // Columns that should be converted from Decimal to Float64 std::vector _decimal_cols_as_float; + // Columns that should be read as Decimal128 + std::vector _decimal128_columns; + friend orc_reader_options_builder; /** @@ -136,13 +139,18 @@ class orc_reader_options { data_type get_timestamp_type() const { return _timestamp_type; } /** - * @brief Columns that should be converted from Decimal to Float64. + * @brief Fully qualified names of columns that should be converted from Decimal to Float64. */ std::vector const& get_decimal_cols_as_float() const { return _decimal_cols_as_float; } + /** + * @brief Fully qualified names of columns that should be read as 128-bit Decimal. + */ + std::vector const& get_decimal128_columns() const { return _decimal128_columns; } + // Setters /** @@ -210,12 +218,22 @@ class orc_reader_options { /** * @brief Set columns that should be converted from Decimal to Float64 * - * @param val Vector of column names. + * @param val Vector of fully qualified column names. */ void set_decimal_cols_as_float(std::vector val) { _decimal_cols_as_float = std::move(val); } + + /** + * @brief Set columns that should be read as 128-bit Decimal + * + * @param val Vector of fully qualified column names. + */ + void set_decimal128_columns(std::vector val) + { + _decimal128_columns = std::move(val); + } }; class orc_reader_options_builder { @@ -332,6 +350,18 @@ class orc_reader_options_builder { return *this; } + /** + * @brief Columns that should be read as 128-bit Decimal + * + * @param val Vector of column names. + * @return this for chaining. + */ + orc_reader_options_builder& decimal128_columns(std::vector val) + { + options._decimal128_columns = std::move(val); + return *this; + } + /** * @brief move orc_reader_options member once it's built. */ @@ -445,24 +475,24 @@ class orc_writer_options { /** * @brief Whether writing column statistics is enabled/disabled. */ - bool enable_statistics() const { return _enable_statistics; } + bool is_enabled_statistics() const { return _enable_statistics; } /** * @brief Returns maximum stripe size, in bytes. */ - auto stripe_size_bytes() const { return _stripe_size_bytes; } + auto get_stripe_size_bytes() const { return _stripe_size_bytes; } /** * @brief Returns maximum stripe size, in rows. */ - auto stripe_size_rows() const { return _stripe_size_rows; } + auto get_stripe_size_rows() const { return _stripe_size_rows; } /** * @brief Returns the row index stride. */ - auto row_index_stride() const + auto get_row_index_stride() const { - auto const unaligned_stride = std::min(_row_index_stride, stripe_size_rows()); + auto const unaligned_stride = std::min(_row_index_stride, get_stripe_size_rows()); return unaligned_stride - unaligned_stride % 8; } @@ -739,24 +769,24 @@ class chunked_orc_writer_options { /** * @brief Whether writing column statistics is enabled/disabled. */ - bool enable_statistics() const { return _enable_statistics; } + bool is_enabled_statistics() const { return _enable_statistics; } /** * @brief Returns maximum stripe size, in bytes. */ - auto stripe_size_bytes() const { return _stripe_size_bytes; } + auto get_stripe_size_bytes() const { return _stripe_size_bytes; } /** * @brief Returns maximum stripe size, in rows. */ - auto stripe_size_rows() const { return _stripe_size_rows; } + auto get_stripe_size_rows() const { return _stripe_size_rows; } /** * @brief Returns the row index stride. */ - auto row_index_stride() const + auto get_row_index_stride() const { - auto const unaligned_stride = std::min(_row_index_stride, stripe_size_rows()); + auto const unaligned_stride = std::min(_row_index_stride, get_stripe_size_rows()); return unaligned_stride - unaligned_stride % 8; } diff --git a/cpp/include/cudf/io/parquet.hpp b/cpp/include/cudf/io/parquet.hpp index 660ec051304..88cf7416506 100644 --- a/cpp/include/cudf/io/parquet.hpp +++ b/cpp/include/cudf/io/parquet.hpp @@ -37,6 +37,9 @@ namespace io { * @file */ +constexpr size_t default_row_group_size_bytes = 128 * 1024 * 1024; // 128MB +constexpr size_type default_row_group_size_rows = 1000000; + /** * @brief Builds parquet_reader_options to use for `read_parquet()`. */ @@ -398,6 +401,10 @@ class parquet_writer_options { bool _write_timestamps_as_int96 = false; // Column chunks file path to be set in the raw output metadata std::string _column_chunks_file_path; + // Maximum size of each row group (unless smaller than a single page) + size_t _row_group_size_bytes = default_row_group_size_bytes; + // Maximum number of rows in row group (unless smaller than a single page) + size_type _row_group_size_rows = default_row_group_size_rows; /** * @brief Constructor from sink and table. @@ -472,6 +479,16 @@ class parquet_writer_options { */ std::string get_column_chunks_file_path() const { return _column_chunks_file_path; } + /** + * @brief Returns maximum row group size, in bytes. + */ + auto get_row_group_size_bytes() const { return _row_group_size_bytes; } + + /** + * @brief Returns maximum row group size, in rows. + */ + auto get_row_group_size_rows() const { return _row_group_size_rows; } + /** * @brief Sets metadata. * @@ -510,6 +527,28 @@ class parquet_writer_options { { _column_chunks_file_path.assign(file_path); } + + /** + * @brief Sets the maximum row group size, in bytes. + */ + void set_row_group_size_bytes(size_t size_bytes) + { + CUDF_EXPECTS( + size_bytes >= 512 * 1024, + "The maximum row group size cannot be smaller than the page size, which is 512KB."); + _row_group_size_bytes = size_bytes; + } + + /** + * @brief Sets the maximum row group size, in rows. + */ + void set_row_group_size_rows(size_type size_rows) + { + CUDF_EXPECTS( + size_rows >= 5000, + "The maximum row group size cannot be smaller than the page size, which is 5000 rows."); + _row_group_size_rows = size_rows; + } }; class parquet_writer_options_builder { @@ -582,6 +621,30 @@ class parquet_writer_options_builder { return *this; } + /** + * @brief Sets the maximum row group size, in bytes. + * + * @param val maximum row group size + * @return this for chaining. + */ + parquet_writer_options_builder& row_group_size_bytes(size_t val) + { + options.set_row_group_size_bytes(val); + return *this; + } + + /** + * @brief Sets the maximum number of rows in output row groups. + * + * @param val maximum number or rows + * @return this for chaining. + */ + parquet_writer_options_builder& row_group_size_rows(size_type val) + { + options.set_row_group_size_rows(val); + return *this; + } + /** * @brief Sets whether int96 timestamps are written or not in parquet_writer_options. * @@ -637,7 +700,7 @@ std::unique_ptr> write_parquet( * @param[in] metadata_list List of input file metadata. * @return A parquet-compatible blob that contains the data for all row groups in the list. */ -std::unique_ptr> merge_rowgroup_metadata( +std::unique_ptr> merge_row_group_metadata( const std::vector>>& metadata_list); /** @@ -660,6 +723,10 @@ class chunked_parquet_writer_options { // Parquet writer can write INT96 or TIMESTAMP_MICROS. Defaults to TIMESTAMP_MICROS. // If true then overrides any per-column setting in _metadata. bool _write_timestamps_as_int96 = false; + // Maximum size of each row group (unless smaller than a single page) + size_t _row_group_size_bytes = default_row_group_size_bytes; + // Maximum number of rows in row group (unless smaller than a single page) + size_type _row_group_size_rows = default_row_group_size_rows; /** * @brief Constructor from sink. @@ -703,6 +770,16 @@ class chunked_parquet_writer_options { */ bool is_enabled_int96_timestamps() const { return _write_timestamps_as_int96; } + /** + * @brief Returns maximum row group size, in bytes. + */ + auto get_row_group_size_bytes() const { return _row_group_size_bytes; } + + /** + * @brief Returns maximum row group size, in rows. + */ + auto get_row_group_size_rows() const { return _row_group_size_rows; } + /** * @brief Sets metadata. * @@ -732,6 +809,28 @@ class chunked_parquet_writer_options { */ void enable_int96_timestamps(bool req) { _write_timestamps_as_int96 = req; } + /** + * @brief Sets the maximum row group size, in bytes. + */ + void set_row_group_size_bytes(size_t size_bytes) + { + CUDF_EXPECTS( + size_bytes >= 512 * 1024, + "The maximum row group size cannot be smaller than the page size, which is 512KB."); + _row_group_size_bytes = size_bytes; + } + + /** + * @brief Sets the maximum row group size, in rows. + */ + void set_row_group_size_rows(size_type size_rows) + { + CUDF_EXPECTS( + size_rows >= 5000, + "The maximum row group size cannot be smaller than the page size, which is 5000 rows."); + _row_group_size_rows = size_rows; + } + /** * @brief creates builder to build chunked_parquet_writer_options. * @@ -811,6 +910,30 @@ class chunked_parquet_writer_options_builder { return *this; } + /** + * @brief Sets the maximum row group size, in bytes. + * + * @param val maximum row group size + * @return this for chaining. + */ + chunked_parquet_writer_options_builder& row_group_size_bytes(size_t val) + { + options.set_row_group_size_bytes(val); + return *this; + } + + /** + * @brief Sets the maximum number of rows in output row groups. + * + * @param val maximum number or rows + * @return this for chaining. + */ + chunked_parquet_writer_options_builder& row_group_size_rows(size_type val) + { + options.set_row_group_size_rows(val); + return *this; + } + /** * @brief move chunked_parquet_writer_options member once it's built. */ diff --git a/cpp/include/cudf/io/types.hpp b/cpp/include/cudf/io/types.hpp index ac965e2d416..cf6be8a20af 100644 --- a/cpp/include/cudf/io/types.hpp +++ b/cpp/include/cudf/io/types.hpp @@ -87,9 +87,9 @@ enum class quote_style { * @brief Column statistics granularity type for parquet/orc writers */ enum statistics_freq { - STATISTICS_NONE = 0, //!< No column statistics - STATISTICS_ROWGROUP = 1, //!< Per-Rowgroup column statistics - STATISTICS_PAGE = 2, //!< Per-page column statistics + STATISTICS_NONE = 0, ///< No column statistics + STATISTICS_ROWGROUP = 1, ///< Per-Rowgroup column statistics + STATISTICS_PAGE = 2, ///< Per-page column statistics }; /** diff --git a/cpp/include/cudf/lists/combine.hpp b/cpp/include/cudf/lists/combine.hpp index a9407ed57ca..61a81e8a745 100644 --- a/cpp/include/cudf/lists/combine.hpp +++ b/cpp/include/cudf/lists/combine.hpp @@ -26,7 +26,7 @@ namespace lists { * @file */ -/* +/** * @brief Flag to specify whether a null list element will be ignored from concatenation, or the * entire concatenation result involving null list elements will be a null element. */ diff --git a/cpp/include/cudf/scalar/scalar.hpp b/cpp/include/cudf/scalar/scalar.hpp index aa98c2e6404..dc2df368bae 100644 --- a/cpp/include/cudf/scalar/scalar.hpp +++ b/cpp/include/cudf/scalar/scalar.hpp @@ -359,7 +359,7 @@ class fixed_point_scalar : public scalar { rep_type value(rmm::cuda_stream_view stream = rmm::cuda_stream_default) const; /** - * @brief Get the decimal32 or decimal64. + * @brief Get the decimal32, decimal64 or decimal128. * * @param stream CUDA stream used for device memory operations. */ diff --git a/cpp/include/cudf/scalar/scalar_device_view.cuh b/cpp/include/cudf/scalar/scalar_device_view.cuh index 884b412d3e2..56afa150dfc 100644 --- a/cpp/include/cudf/scalar/scalar_device_view.cuh +++ b/cpp/include/cudf/scalar/scalar_device_view.cuh @@ -91,6 +91,12 @@ class fixed_width_scalar_device_view_base : public detail::scalar_device_view_ba return *data(); } + /** + * @brief Stores the value in scalar + * + * @tparam T The desired type + * @param value The value to store in scalar + */ template __device__ void set_value(T value) { @@ -159,6 +165,11 @@ class fixed_width_scalar_device_view : public detail::fixed_width_scalar_device_ return fixed_width_scalar_device_view_base::value(); } + /** + * @brief Stores the value in scalar + * + * @param value The value to store in scalar + */ __device__ void set_value(T value) { fixed_width_scalar_device_view_base::set_value(value); } /** @@ -218,6 +229,11 @@ class fixed_point_scalar_device_view : public detail::scalar_device_view_base { { } + /** + * @brief Stores the value in scalar + * + * @param value The value to store in scalar + */ __device__ void set_value(rep_type value) { *_data = value; } /** diff --git a/cpp/include/cudf/strings/char_types/char_types.hpp b/cpp/include/cudf/strings/char_types/char_types.hpp index 2af79de0716..04d65065bd3 100644 --- a/cpp/include/cudf/strings/char_types/char_types.hpp +++ b/cpp/include/cudf/strings/char_types/char_types.hpp @@ -37,16 +37,16 @@ namespace strings { * does not match to any explicitly named enumerator. */ enum string_character_types : uint32_t { - DECIMAL = 1 << 0, /// all decimal characters - NUMERIC = 1 << 1, /// all numeric characters - DIGIT = 1 << 2, /// all digit characters - ALPHA = 1 << 3, /// all alphabetic characters - SPACE = 1 << 4, /// all space characters - UPPER = 1 << 5, /// all upper case characters - LOWER = 1 << 6, /// all lower case characters - ALPHANUM = DECIMAL | NUMERIC | DIGIT | ALPHA, /// all alphanumeric characters - CASE_TYPES = UPPER | LOWER, /// all case-able characters - ALL_TYPES = ALPHANUM | CASE_TYPES | SPACE /// all character types + DECIMAL = 1 << 0, ///< all decimal characters + NUMERIC = 1 << 1, ///< all numeric characters + DIGIT = 1 << 2, ///< all digit characters + ALPHA = 1 << 3, ///< all alphabetic characters + SPACE = 1 << 4, ///< all space characters + UPPER = 1 << 5, ///< all upper case characters + LOWER = 1 << 6, ///< all lower case characters + ALPHANUM = DECIMAL | NUMERIC | DIGIT | ALPHA, ///< all alphanumeric characters + CASE_TYPES = UPPER | LOWER, ///< all case-able characters + ALL_TYPES = ALPHANUM | CASE_TYPES | SPACE ///< all character types }; /** diff --git a/cpp/include/cudf/strings/convert/convert_lists.hpp b/cpp/include/cudf/strings/convert/convert_lists.hpp index ec22186ea99..279bf44e7fc 100644 --- a/cpp/include/cudf/strings/convert/convert_lists.hpp +++ b/cpp/include/cudf/strings/convert/convert_lists.hpp @@ -50,7 +50,7 @@ namespace strings { * * @param input Lists column to format. * @param na_rep Replacment string for null elements. - * @param separator Strings to use for enclosing list components and separating elements. + * @param separators Strings to use for enclosing list components and separating elements. * @param mr Device memory resource used to allocate the returned column's device memory. * @return New strings column. */ diff --git a/cpp/include/cudf/strings/detail/convert/fixed_point.cuh b/cpp/include/cudf/strings/detail/convert/fixed_point.cuh index 56205c161b1..aa3f544202f 100644 --- a/cpp/include/cudf/strings/detail/convert/fixed_point.cuh +++ b/cpp/include/cudf/strings/detail/convert/fixed_point.cuh @@ -17,7 +17,7 @@ #include #include -#include +#include namespace cudf { namespace strings { @@ -46,7 +46,7 @@ __device__ inline thrust::pair parse_integer( constexpr UnsignedDecimalType decimal_max = (std::numeric_limits::max() - 9L) / 10L; - uint64_t value = 0; // for checking overflow + __uint128_t value = 0; // for checking overflow int32_t exp_offset = 0; bool decimal_found = false; @@ -137,7 +137,7 @@ __device__ DecimalType parse_decimal(char const* iter, char const* iter_end, int // if string begins with a sign, continue with next character if (sign != 0) ++iter; - using UnsignedDecimalType = std::make_unsigned_t; + using UnsignedDecimalType = cuda::std::make_unsigned_t; auto [value, exp_offset] = parse_integer(iter, iter_end); if (value == 0) { return DecimalType{0}; } @@ -150,11 +150,9 @@ __device__ DecimalType parse_decimal(char const* iter, char const* iter_end, int exp_ten += exp_offset; // shift the output value based on the exp_ten and the scale values - if (exp_ten < scale) { - value = value / static_cast(exp10(static_cast(scale - exp_ten))); - } else { - value = value * static_cast(exp10(static_cast(exp_ten - scale))); - } + value = exp_ten < scale + ? value / static_cast(exp10(static_cast(scale - exp_ten))) + : value * static_cast(exp10(static_cast(exp_ten - scale))); return static_cast(value) * (sign == 0 ? 1 : sign); } diff --git a/cpp/include/cudf/strings/detail/gather.cuh b/cpp/include/cudf/strings/detail/gather.cuh index ec4a88a0e46..eb7258830ce 100644 --- a/cpp/include/cudf/strings/detail/gather.cuh +++ b/cpp/include/cudf/strings/detail/gather.cuh @@ -315,7 +315,7 @@ std::unique_ptr gather( d_out_offsets + output_count, [] __device__(auto size) { return static_cast(size); }, size_t{0}, - thrust::plus{}); + thrust::plus{}); CUDF_EXPECTS(total_bytes < static_cast(std::numeric_limits::max()), "total size of output strings is too large for a cudf column"); diff --git a/cpp/include/cudf/strings/regex/flags.hpp b/cpp/include/cudf/strings/regex/flags.hpp index f6aee6d22cc..637b3b0851b 100644 --- a/cpp/include/cudf/strings/regex/flags.hpp +++ b/cpp/include/cudf/strings/regex/flags.hpp @@ -33,9 +33,9 @@ namespace strings { * and to match the Python flag values. */ enum regex_flags : uint32_t { - DEFAULT = 0, /// default - MULTILINE = 8, /// the '^' and '$' honor new-line characters - DOTALL = 16 /// the '.' matching includes new-line characters + DEFAULT = 0, ///< default + MULTILINE = 8, ///< the '^' and '$' honor new-line characters + DOTALL = 16 ///< the '.' matching includes new-line characters }; /** diff --git a/cpp/include/cudf/strings/string.cuh b/cpp/include/cudf/strings/string.cuh index 82da5ad8f10..d85d19d7f10 100644 --- a/cpp/include/cudf/strings/string.cuh +++ b/cpp/include/cudf/strings/string.cuh @@ -52,6 +52,43 @@ inline __device__ bool is_integer(string_view const& d_str) thrust::seq, begin, end, [] __device__(auto chr) { return chr >= '0' && chr <= '9'; }); } +/** + * @brief Returns true if input contains the not-a-number string. + * + * The following are valid for this function: "NAN" and "NaN" + * @param d_str input string + * @return true if input is as valid NaN string. + */ +inline __device__ bool is_nan_str(string_view const& d_str) +{ + auto const ptr = d_str.data(); + return (d_str.size_bytes() == 3) && (ptr[0] == 'N' || ptr[0] == 'n') && + (ptr[1] == 'A' || ptr[1] == 'a') && (ptr[2] == 'N' || ptr[2] == 'n'); +} + +/** + * @brief Returns true if input contains the infinity string. + * + * The following are valid for this function: "INF", "INFINITY", and "Inf" + * @param d_str input string + * @return true if input is as valid Inf string. + */ +inline __device__ bool is_inf_str(string_view const& d_str) +{ + auto const ptr = d_str.data(); + auto const size = d_str.size_bytes(); + + if (size != 3 && size != 8) return false; + + auto const prefix_valid = (ptr[0] == 'I' || ptr[0] == 'i') && (ptr[1] == 'N' || ptr[1] == 'n') && + (ptr[2] == 'F' || ptr[2] == 'f'); + + return prefix_valid && + ((size == 3) || ((ptr[3] == 'I' || ptr[3] == 'i') && (ptr[4] == 'N' || ptr[4] == 'n') && + (ptr[5] == 'I' || ptr[5] == 'i') && (ptr[6] == 'T' || ptr[6] == 't') && + (ptr[7] == 'Y' || ptr[7] == 'y'))); +} + /** * @brief Returns `true` if all characters in the string * are valid for conversion to a float type. @@ -65,8 +102,8 @@ inline __device__ bool is_integer(string_view const& d_str) * An empty string returns `false`. * No bounds checking is performed to verify if the value would fit * within a specific float type. - * The following strings are also allowed "NaN", "Inf" and, "-Inf" - * and will return true. + * The following strings are also allowed and will return true: + * "NaN", "NAN", "Inf", "INF", "INFINITY" * * @param d_str String to check. * @return true if string has valid float characters @@ -74,29 +111,32 @@ inline __device__ bool is_integer(string_view const& d_str) inline __device__ bool is_float(string_view const& d_str) { if (d_str.empty()) return false; - // strings allowed by the converter - if (d_str.compare("NaN", 3) == 0) return true; - if (d_str.compare("Inf", 3) == 0) return true; - if (d_str.compare("-Inf", 4) == 0) return true; bool decimal_found = false; bool exponent_found = false; size_type bytes = d_str.size_bytes(); const char* data = d_str.data(); // sign character allowed at the beginning of the string - size_type chidx = (*data == '-' || *data == '+') ? 1 : 0; - bool result = chidx < bytes; + size_type ch_idx = (*data == '-' || *data == '+') ? 1 : 0; + + bool result = ch_idx < bytes; + // check for nan and infinity strings + if (result && data[ch_idx] > '9') { + auto const inf_nan = string_view(data + ch_idx, bytes - ch_idx); + if (is_nan_str(inf_nan) || is_inf_str(inf_nan)) return true; + } + // check for float chars [0-9] and a single decimal '.' // and scientific notation [eE][+-][0-9] - for (; chidx < bytes; ++chidx) { - auto chr = data[chidx]; + for (; ch_idx < bytes; ++ch_idx) { + auto chr = data[ch_idx]; if (chr >= '0' && chr <= '9') continue; if (!decimal_found && chr == '.') { decimal_found = true; // no more decimals continue; } if (!exponent_found && (chr == 'e' || chr == 'E')) { - if (chidx + 1 < bytes) chr = data[chidx + 1]; - if (chr == '-' || chr == '+') ++chidx; + if (ch_idx + 1 < bytes) chr = data[ch_idx + 1]; + if (chr == '-' || chr == '+') ++ch_idx; decimal_found = true; // no decimal allowed in exponent exponent_found = true; // no more exponents continue; diff --git a/cpp/include/cudf/strings/strip.hpp b/cpp/include/cudf/strings/strip.hpp index 72863bdf23b..fe9cd41e780 100644 --- a/cpp/include/cudf/strings/strip.hpp +++ b/cpp/include/cudf/strings/strip.hpp @@ -31,9 +31,9 @@ namespace strings { * @brief Direction identifier for strip() function. */ enum class strip_type { - LEFT, //<< strip characters from the beginning of the string - RIGHT, //<< strip characters from the end of the string - BOTH //<< strip characters from the beginning and end of the string + LEFT, ///< strip characters from the beginning of the string + RIGHT, ///< strip characters from the end of the string + BOTH ///< strip characters from the beginning and end of the string }; /** diff --git a/cpp/include/cudf/strings/translate.hpp b/cpp/include/cudf/strings/translate.hpp index e014f88c451..0cbf6b22029 100644 --- a/cpp/include/cudf/strings/translate.hpp +++ b/cpp/include/cudf/strings/translate.hpp @@ -60,7 +60,10 @@ std::unique_ptr translate( /** * @brief Removes or keeps the specified character ranges in cudf::strings::filter_characters */ -enum class filter_type : bool { KEEP, REMOVE }; +enum class filter_type : bool { + KEEP, ///< All characters but those specified are removed + REMOVE ///< Only the specified characters are removed +}; /** * @brief Removes ranges of characters from each string in a strings column. diff --git a/cpp/include/cudf/table/row_operators.cuh b/cpp/include/cudf/table/row_operators.cuh index c719c564a87..70ccac2f75d 100644 --- a/cpp/include/cudf/table/row_operators.cuh +++ b/cpp/include/cudf/table/row_operators.cuh @@ -67,7 +67,7 @@ __device__ weak_ordering compare_elements(Element lhs, Element rhs) } } // namespace detail -/* +/** * @brief A specialization for floating-point `Element` type relational comparison * to derive the order of the elements with respect to `lhs`. Specialization is to * handle `nan` in the order shown below. @@ -187,6 +187,7 @@ class element_equality_comparator { * * @param lhs_element_index The index of the first element * @param rhs_element_index The index of the second element + * @return True if both lhs and rhs element are both nulls and `nulls_are_equal` is true, or equal * */ template transform( bool is_ptx, rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); -std::unique_ptr generalized_masked_op( - table_view const& data_view, - std::string const& binary_udf, - data_type output_type, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); - /** * @brief Creates a null_mask from `input` by converting `NaN` to null and * preserving existing null values and also returns new null_count. diff --git a/cpp/include/cudf/types.hpp b/cpp/include/cudf/types.hpp index e026ae9ac0f..13d5f8e06bc 100644 --- a/cpp/include/cudf/types.hpp +++ b/cpp/include/cudf/types.hpp @@ -236,6 +236,7 @@ enum class type_id : int32_t { LIST, ///< List elements DECIMAL32, ///< Fixed-point type with int32_t DECIMAL64, ///< Fixed-point type with int64_t + DECIMAL128, ///< Fixed-point type with __int128_t STRUCT, ///< Struct elements // `NUM_TYPE_IDS` must be last! NUM_TYPE_IDS ///< Total number of type ids @@ -271,7 +272,7 @@ class data_type { */ explicit data_type(type_id id, int32_t scale) : _id{id}, _fixed_point_scale{scale} { - assert(id == type_id::DECIMAL32 || id == type_id::DECIMAL64); + assert(id == type_id::DECIMAL32 || id == type_id::DECIMAL64 || id == type_id::DECIMAL128); } /** diff --git a/cpp/include/cudf/unary.hpp b/cpp/include/cudf/unary.hpp index 254a7988e2e..36f08b7f23e 100644 --- a/cpp/include/cudf/unary.hpp +++ b/cpp/include/cudf/unary.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2020, NVIDIA CORPORATION. + * Copyright (c) 2018-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -28,28 +28,28 @@ namespace cudf { */ enum class unary_operator : int32_t { - SIN, // < Trigonometric sine - COS, // < Trigonometric cosine - TAN, // < Trigonometric tangent - ARCSIN, // < Trigonometric sine inverse - ARCCOS, // < Trigonometric cosine inverse - ARCTAN, // < Trigonometric tangent inverse - SINH, // < Hyperbolic sine - COSH, // < Hyperbolic cosine - TANH, // < Hyperbolic tangent - ARCSINH, // < Hyperbolic sine inverse - ARCCOSH, // < Hyperbolic cosine inverse - ARCTANH, // < Hyperbolic tangent inverse - EXP, // < Exponential (base e, Euler number) - LOG, // < Natural Logarithm (base e) - SQRT, // < Square-root (x^0.5) - CBRT, // < Cube-root (x^(1.0/3)) - CEIL, // < Smallest integer value not less than arg - FLOOR, // < largest integer value not greater than arg - ABS, // < Absolute value - RINT, // < Rounds the floating-point argument arg to an integer value - BIT_INVERT, // < Bitwise Not (~) - NOT, // < Logical Not (!) + SIN, ///< Trigonometric sine + COS, ///< Trigonometric cosine + TAN, ///< Trigonometric tangent + ARCSIN, ///< Trigonometric sine inverse + ARCCOS, ///< Trigonometric cosine inverse + ARCTAN, ///< Trigonometric tangent inverse + SINH, ///< Hyperbolic sine + COSH, ///< Hyperbolic cosine + TANH, ///< Hyperbolic tangent + ARCSINH, ///< Hyperbolic sine inverse + ARCCOSH, ///< Hyperbolic cosine inverse + ARCTANH, ///< Hyperbolic tangent inverse + EXP, ///< Exponential (base e, Euler number) + LOG, ///< Natural Logarithm (base e) + SQRT, ///< Square-root (x^0.5) + CBRT, ///< Cube-root (x^(1.0/3)) + CEIL, ///< Smallest integer value not less than arg + FLOOR, ///< largest integer value not greater than arg + ABS, ///< Absolute value + RINT, ///< Rounds the floating-point argument arg to an integer value + BIT_INVERT, ///< Bitwise Not (~) + NOT, ///< Logical Not (!) }; /** diff --git a/cpp/include/cudf/utilities/traits.cuh b/cpp/include/cudf/utilities/traits.cuh new file mode 100644 index 00000000000..43587ffa583 --- /dev/null +++ b/cpp/include/cudf/utilities/traits.cuh @@ -0,0 +1,67 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include + +#include + +namespace cudf { + +/** + * @addtogroup utility_types + * @{ + * @file + */ + +/** + * @brief Indicates whether the type `T` has support for atomics + * + * @tparam T The type to verify + * @return true `T` has support for atomics + * @return false `T` no support for atomics + */ +template +constexpr inline bool has_atomic_support() +{ + return cuda::std::atomic::is_always_lock_free; +} + +struct has_atomic_support_impl { + template + constexpr bool operator()() + { + return has_atomic_support(); + } +}; + +/** + * @brief Indicates whether `type` has support for atomics + * + * @param type The `data_type` to verify + * @return true `type` has support for atomics + * @return false `type` no support for atomics + */ +constexpr inline bool has_atomic_support(data_type type) +{ + return cudf::type_dispatcher(type, has_atomic_support_impl{}); +} + +/** @} */ + +} // namespace cudf diff --git a/cpp/include/cudf/utilities/traits.hpp b/cpp/include/cudf/utilities/traits.hpp index 40a833112e1..d1bd3049ba3 100644 --- a/cpp/include/cudf/utilities/traits.hpp +++ b/cpp/include/cudf/utilities/traits.hpp @@ -177,7 +177,7 @@ inline bool is_equality_comparable(data_type type) template constexpr inline bool is_numeric() { - return std::is_integral::value or std::is_floating_point::value; + return cuda::std::is_integral() or std::is_floating_point::value; } struct is_numeric_impl { @@ -404,7 +404,8 @@ constexpr inline bool is_timestamp(data_type type) template constexpr inline bool is_fixed_point() { - return std::is_same_v || std::is_same_v; + return std::is_same_v || std::is_same_v || + std::is_same_v; } struct is_fixed_point_impl { diff --git a/cpp/include/cudf/utilities/type_dispatcher.hpp b/cpp/include/cudf/utilities/type_dispatcher.hpp index 857ddafa82c..d7d38aba4f3 100644 --- a/cpp/include/cudf/utilities/type_dispatcher.hpp +++ b/cpp/include/cudf/utilities/type_dispatcher.hpp @@ -85,8 +85,9 @@ using id_to_type = typename id_to_type_impl::type; /** * @brief "Returns" the corresponding type that is stored on the device when using `cudf::column` * - * For `decimal32`, the storage type is an `int32_t`. - * For `decimal64`, the storage type is an `int64_t`. + * For `decimal32`, the storage type is an `int32_t`. + * For `decimal64`, the storage type is an `int64_t`. + * For `decimal128`, the storage type is an `__int128_t`. * * Use this "type function" with the `using` type alias: * @code @@ -98,25 +99,11 @@ using id_to_type = typename id_to_type_impl::type; // clang-format off template using device_storage_type_t = - std::conditional_t, int32_t, - std::conditional_t, int64_t, T>>; + std::conditional_t, int32_t, + std::conditional_t, int64_t, + std::conditional_t, __int128_t, T>>>; // clang-format on -/** - * @brief Returns the corresponding `type_id` of type stored on device for a given `type_id` - * - * @param id The given `type_id` - * @return Corresponding `type_id` of type stored on device - */ -inline type_id device_storage_type_id(type_id id) -{ - switch (id) { - case type_id::DECIMAL32: return type_id::INT32; - case type_id::DECIMAL64: return type_id::INT64; - default: return id; - } -} - /** * @brief Checks if `fixed_point`-like types have template type `T` matching the column's * stored type id @@ -127,10 +114,24 @@ inline type_id device_storage_type_id(type_id id) * @return `false` If T does not match the stored column `type_id` */ template -bool type_id_matches_device_storage_type(type_id id) +constexpr bool type_id_matches_device_storage_type(type_id id) { return (id == type_id::DECIMAL32 && std::is_same_v) || - (id == type_id::DECIMAL64 && std::is_same_v) || id == type_to_id(); + (id == type_id::DECIMAL64 && std::is_same_v) || + (id == type_id::DECIMAL128 && std::is_same_v) || id == type_to_id(); +} + +/** + * @brief Checks if `id` is fixed_point (DECIMAL32/64/128) + * + * @return `true` if `id` is `DECIMAL32`, `DECIMAL64` or `DECIMAL128` + * @return `false` otherwise + */ +constexpr bool is_fixed_point(cudf::type_id id) +{ + return id == type_id::DECIMAL32 or // + id == type_id::DECIMAL64 or // + id == type_id::DECIMAL128; } /** @@ -188,6 +189,7 @@ CUDF_TYPE_MAPPING(dictionary32, type_id::DICTIONARY32); CUDF_TYPE_MAPPING(cudf::list_view, type_id::LIST); CUDF_TYPE_MAPPING(numeric::decimal32, type_id::DECIMAL32); CUDF_TYPE_MAPPING(numeric::decimal64, type_id::DECIMAL64); +CUDF_TYPE_MAPPING(numeric::decimal128, type_id::DECIMAL128); CUDF_TYPE_MAPPING(cudf::struct_view, type_id::STRUCT); /** @@ -221,6 +223,7 @@ MAP_NUMERIC_SCALAR(int8_t) MAP_NUMERIC_SCALAR(int16_t) MAP_NUMERIC_SCALAR(int32_t) MAP_NUMERIC_SCALAR(int64_t) +MAP_NUMERIC_SCALAR(__int128_t) MAP_NUMERIC_SCALAR(uint8_t) MAP_NUMERIC_SCALAR(uint16_t) MAP_NUMERIC_SCALAR(uint32_t) @@ -253,6 +256,12 @@ struct type_to_scalar_type_impl { using ScalarDeviceType = cudf::fixed_point_scalar_device_view; }; +template <> +struct type_to_scalar_type_impl { + using ScalarType = cudf::fixed_point_scalar; + using ScalarDeviceType = cudf::fixed_point_scalar_device_view; +}; + template <> // TODO: this is a temporary solution for make_pair_iterator struct type_to_scalar_type_impl { using ScalarType = cudf::numeric_scalar; @@ -492,6 +501,9 @@ CUDF_HDFI constexpr decltype(auto) type_dispatcher(cudf::data_type dtype, Functo case type_id::DECIMAL64: return f.template operator()::type>( std::forward(args)...); + case type_id::DECIMAL128: + return f.template operator()::type>( + std::forward(args)...); case type_id::STRUCT: return f.template operator()::type>( std::forward(args)...); @@ -519,7 +531,7 @@ template struct double_type_dispatcher_second_type { #pragma nv_exec_check_disable template - CUDA_HOST_DEVICE_CALLABLE decltype(auto) operator()(F&& f, Ts&&... args) const + CUDF_HDFI decltype(auto) operator()(F&& f, Ts&&... args) const { return f.template operator()(std::forward(args)...); } @@ -529,9 +541,7 @@ template