rapidsai · kkraus14 · Sep 11, 2019 · Aug 26, 2019 · Aug 26, 2019 · Aug 26, 2019
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -25,6 +25,7 @@
 - PR #2648 Cython/Python reorg
 - PR #2588 Update Series.append documentation
 - PR #2632 Replace dask-cudf set_index code with upstream
+- PR #2682 Add cudf.set_allocator() function for easier allocator init
 - PR #2642 Improve null printing and testing
 - PR #2747 Add missing Cython headers / cudftestutil lib to conda package for cuspatial build
 - PR #2706 Compute CSV format in device code to speedup performance

@@ -1,6 +1,5 @@
 # Copyright (c) 2018-2019, NVIDIA CORPORATION.
 
-from librmm_cffi import librmm as rmm
 
 from cudf import core, datasets
 from cudf._version import get_versions
@@ -30,6 +29,7 @@
     read_orc,
     read_parquet,
 )
+from cudf.utils.utils import set_allocator
 
 __version__ = get_versions()["version"]
 del get_versions
@@ -1,11 +1,12 @@
+import functools
 from math import ceil, isinf, isnan
 
 import numpy as np
 import pandas as pd
 import pyarrow as pa
 from numba import njit
 
-from librmm_cffi import librmm as rmm
+from librmm_cffi import librmm as rmm, librmm_config as rmm_cfg
 
 mask_dtype = np.dtype(np.int8)
 mask_bitsize = mask_dtype.itemsize * 8
@@ -179,6 +180,23 @@ def compare_and_get_name(a, b):
     return None
 
 
+def initfunc(f):
+    """
+    Decorator for initialization functions that should
+    be run exactly once.
+    """
+
+    @functools.wraps(f)
+    def wrapper(*args, **kwargs):
+        if wrapper.initialized:
+            return
+        wrapper.initialized = True
+        return f(*args, **kwargs)
+
+    wrapper.initialized = False
+    return wrapper
+
+
 def get_null_series(size, dtype=np.bool):
     """
     Creates a null series of provided dtype and size
@@ -212,3 +230,60 @@ def __array_function__(self, *args, **kwargs):
 
 
 IS_NEP18_ACTIVE = _is_nep18_active()
+
+
+def _set_rmm_config(
+    use_managed_memory=False,
+    use_pool_allocator=False,
+    initial_pool_size=None,
+    enable_logging=False,
+):
+    """
+    Parameters
+    ----------
+    use_managed_memory : bool, optional
+        If ``True``, use cudaMallocManaged as underlying allocator.
+        If ``False`` (default), use  cudaMalloc.
+    use_pool_allocator : bool
+        If ``True``, enable pool mode.
+        If ``False`` (default), disable pool mode.
+    initial_pool_size : int, optional
+        If ``use_pool_allocator=True``, sets initial pool size.
+        If ``None``, us
+es 1/2 of total GPU memory.
+    enable_logging : bool, optional
+        Enable logging (default ``False``).
+        Enabling this option will introduce performance overhead.
+    """
+    rmm.finalize()
+    rmm_cfg.use_managed_memory = use_managed_memory
+    if use_pool_allocator:
+        rmm_cfg.use_pool_allocator = use_pool_allocator
+        if initial_pool_size is None:
+            initial_pool_size = 0  # 0 means 1/2 GPU memory
+        elif initial_pool_size == 0:
+            initial_pool_size = 1  # Since "0" is semantic value, use 1 byte
+        if not isinstance(initial_pool_size, int):
+            raise TypeError("initial_pool_size must be an integer")
+        rmm_cfg.initial_pool_size = initial_pool_size
+    rmm_cfg.enable_logging = enable_logging
+    rmm.initialize()
+
+
+@initfunc
+def set_allocator(allocator="default", pool=False, initial_pool_size=None):
+    """
+    Set the GPU memory allocator. This function should be run only once,
+    before any cudf objects are created.
+
+    allocator : {"default", "managed"}
+        "default": use default allocator.
+        "managed": use managed memory allocator.
+    pool : bool
+        Enable memory pool.
+    initial_pool_size : int
+        Memory pool size in bytes. If ``None`` (default), 1/2 of total
+        GPU memory is used. If ``pool=False``, this argument is ignored.
+    """
+    use_managed_memory = True if allocator == "managed" else False
+    _set_rmm_config(use_managed_memory, pool, initial_pool_size)