diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 3623db5a283..69f6634b5c2 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -32,6 +32,15 @@ repos:
                 language: system
                 files: \.(cu|cuh|h|hpp|cpp|inl)$
                 args: ['-fallback-style=none']
+      - repo: local
+        hooks:
+              - id: mypy
+                name: mypy
+                description: mypy
+                pass_filenames: false
+                entry: mypy --config-file=python/cudf/setup.cfg python/cudf/cudf
+                language: system
+                types: [python]
 
 default_language_version:
       python: python3
diff --git a/ci/checks/style.sh b/ci/checks/style.sh
index 2534f857ee4..17599c6d74d 100755
--- a/ci/checks/style.sh
+++ b/ci/checks/style.sh
@@ -29,6 +29,10 @@ FLAKE_RETVAL=$?
 FLAKE_CYTHON=`flake8 --config=python/.flake8.cython`
 FLAKE_CYTHON_RETVAL=$?
 
+# Run mypy and get results/return code
+MYPY_CUDF=`mypy --config=python/cudf/setup.cfg python/cudf/cudf`
+MYPY_CUDF_RETVAL=$?
+
 # Run clang-format and check for a consistent code format
 CLANG_FORMAT=`python cpp/scripts/run-clang-format.py 2>&1`
 CLANG_FORMAT_RETVAL=$?
@@ -66,6 +70,14 @@ else
   echo -e "\n\n>>>> PASSED: flake8-cython style check\n\n"
 fi
 
+if [ "$MYPY_CUDF_RETVAL" != "0" ]; then
+  echo -e "\n\n>>>> FAILED: mypy style check; begin output\n\n"
+  echo -e "$MYPY_CUDF"
+  echo -e "\n\n>>>> FAILED: mypy style check; end output\n\n"
+else
+  echo -e "\n\n>>>> PASSED: mypy style check\n\n"
+fi
+
 if [ "$CLANG_FORMAT_RETVAL" != "0" ]; then
   echo -e "\n\n>>>> FAILED: clang format check; begin output\n\n"
   echo -e "$CLANG_FORMAT"
@@ -79,7 +91,7 @@ HEADER_META=`ci/checks/headers_test.sh`
 HEADER_META_RETVAL=$?
 echo -e "$HEADER_META"
 
-RETVALS=($ISORT_RETVAL $BLACK_RETVAL $FLAKE_RETVAL $FLAKE_CYTHON_RETVAL $CLANG_FORMAT_RETVAL $HEADER_META_RETVAL)
+RETVALS=($ISORT_RETVAL $BLACK_RETVAL $FLAKE_RETVAL $FLAKE_CYTHON_RETVAL $CLANG_FORMAT_RETVAL $HEADER_META_RETVAL $MYPY_CUDF_RETVAL)
 IFS=$'\n'
 RETVAL=`echo "${RETVALS[*]}" | sort -nr | head -n1`
 
diff --git a/conda/environments/cudf_dev_cuda10.1.yml b/conda/environments/cudf_dev_cuda10.1.yml
index 24882d9b3e2..b810b87111a 100644
--- a/conda/environments/cudf_dev_cuda10.1.yml
+++ b/conda/environments/cudf_dev_cuda10.1.yml
@@ -40,6 +40,8 @@ dependencies:
   - flake8=3.8.3
   - black=19.10
   - isort=5.0.7
+  - mypy=0.782
+  - typing_extensions
   - pre_commit
   - dask>=2.22.0
   - distributed>=2.22.0
diff --git a/conda/environments/cudf_dev_cuda10.2.yml b/conda/environments/cudf_dev_cuda10.2.yml
index 49675fe2154..b4e95bc6730 100644
--- a/conda/environments/cudf_dev_cuda10.2.yml
+++ b/conda/environments/cudf_dev_cuda10.2.yml
@@ -40,6 +40,8 @@ dependencies:
   - flake8=3.8.3
   - black=19.10
   - isort=5.0.7
+  - mypy=0.782
+  - typing_extensions
   - pre_commit
   - dask>=2.22.0
   - distributed>=2.22.0
diff --git a/conda/environments/cudf_dev_cuda11.0.yml b/conda/environments/cudf_dev_cuda11.0.yml
index 2917c2c3ce0..3b21f00ab16 100644
--- a/conda/environments/cudf_dev_cuda11.0.yml
+++ b/conda/environments/cudf_dev_cuda11.0.yml
@@ -40,6 +40,8 @@ dependencies:
   - flake8=3.8.3
   - black=19.10
   - isort=5.0.7
+  - mypy=0.782
+  - typing_extensions
   - pre_commit
   - dask>=2.22.0
   - distributed>=2.22.0
diff --git a/conda/recipes/cudf/meta.yaml b/conda/recipes/cudf/meta.yaml
index ea93c5eb279..c5f7bd34c25 100644
--- a/conda/recipes/cudf/meta.yaml
+++ b/conda/recipes/cudf/meta.yaml
@@ -34,6 +34,7 @@ requirements:
   run:
     - protobuf
     - python
+    - typing_extensions
     - pandas >=1.0,<1.2.0dev0
     - cupy >7.1.0,<9.0.0a0
     - numba >=0.49.0
diff --git a/python/cudf/cudf/_lib/__init__.py b/python/cudf/cudf/_lib/__init__.py
index be2d4ef5f51..0293518a5d9 100644
--- a/python/cudf/cudf/_lib/__init__.py
+++ b/python/cudf/cudf/_lib/__init__.py
@@ -10,13 +10,16 @@
     datetime,
     filling,
     gpuarrow,
+    groupby,
     hash,
     interop,
     join,
+    json,
     merge,
     null_mask,
     nvtext,
     orc,
+    parquet,
     partitioning,
     quantiles,
     reduce,
@@ -27,6 +30,7 @@
     search,
     sort,
     stream_compaction,
+    string_casting,
     strings,
     table,
     transpose,
diff --git a/python/cudf/cudf/_lib/column.pyi b/python/cudf/cudf/_lib/column.pyi
new file mode 100644
index 00000000000..0f8c044410d
--- /dev/null
+++ b/python/cudf/cudf/_lib/column.pyi
@@ -0,0 +1,124 @@
+# Copyright (c) 2021, NVIDIA CORPORATION.
+
+from __future__ import annotations
+from typing import Tuple, Union, TypeVar, Optional
+
+from cudf._typing import DtypeObj, Dtype, ScalarLike
+from cudf.core.buffer import Buffer
+from cudf.core.column import ColumnBase
+
+
+T = TypeVar("T")
+
+class Column:
+    _data: Optional[Buffer]
+    _mask: Optional[Buffer]
+    _base_data: Optional[Buffer]
+    _base_mask: Optional[Buffer]
+    _dtype: DtypeObj
+    _offset: int
+    _null_count: int
+    _children: Tuple[ColumnBase, ...]
+    _base_children: Tuple[ColumnBase, ...]
+
+    def __init__(
+        self,
+        data: Optional[Buffer],
+        dtype: Dtype,
+        size: int = None,
+        mask: Optional[Buffer] = None,
+        offset: int = None,
+        null_count: int = None,
+        children: Tuple[ColumnBase, ...] = (),
+    ) -> None:
+        ...
+
+    @property
+    def base_size(self) -> int:
+        ...
+
+    @property
+    def dtype(self) -> DtypeObj:
+        ...
+
+    @property
+    def size(self) -> int:
+        ...
+
+    @property
+    def base_data(self) -> Optional[Buffer]:
+        ...
+
+    @property
+    def base_data_ptr(self) -> int:
+        ...
+
+    @property
+    def data(self) -> Optional[Buffer]:
+        ...
+
+    @property
+    def data_ptr(self) -> int:
+        ...
+
+    def set_base_data(self, value: Buffer) -> None:
+        ...
+
+    @property
+    def nullable(self) -> bool:
+        ...
+
+    @property
+    def has_nulls(self) -> bool:
+        ...
+
+    @property
+    def base_mask(self) -> Optional[Buffer]:
+        ...
+
+    @property
+    def base_mask_ptr(self) -> int:
+        ...
+
+    @property
+    def mask(self) -> Optional[Buffer]:
+        ...
+
+    @property
+    def mask_ptr(self) -> int:
+        ...
+
+    def set_base_mask(self, value: Optional[Buffer]) -> None:
+        ...
+
+    def set_mask(self: T, value: Optional[Buffer]) -> T:
+        ...
+
+    @property
+    def null_count(self) -> int:
+        ...
+
+    @property
+    def offset(self) -> int:
+        ...
+
+    @property
+    def base_children(self) -> Tuple[ColumnBase, ...]:
+        ...
+
+    @property
+    def children(self) -> Tuple[ColumnBase, ...]:
+        ...
+
+    def set_base_children(self, value: Tuple[ColumnBase, ...]) -> None:
+        ...
+
+    def _mimic_inplace(self, other_col: ColumnBase, inplace=False) -> Optional[ColumnBase]:
+        ...
+
+    @staticmethod
+    def from_scalar(
+        val: ScalarLike,
+        size: int
+    ) -> ColumnBase:  # TODO: This should be Scalar, not ScalarLike
+        ...
diff --git a/python/cudf/cudf/_lib/column.pyx b/python/cudf/cudf/_lib/column.pyx
index 7989b62d8c7..28dacb5e944 100644
--- a/python/cudf/cudf/_lib/column.pyx
+++ b/python/cudf/cudf/_lib/column.pyx
@@ -60,14 +60,14 @@ cdef class Column:
     The *dtype* indicates the Column's element type.
     """
     def __init__(
-            self,
-            object data,
-            int size,
-            object dtype,
-            object mask=None,
-            int offset=0,
-            object null_count=None,
-            object children=()
+        self,
+        object data,
+        int size,
+        object dtype,
+        object mask=None,
+        int offset=0,
+        object null_count=None,
+        object children=()
     ):
 
         self._size = size
@@ -247,10 +247,10 @@ cdef class Column:
             )
 
         return cudf.core.column.build_column(
-            self.data,
-            self.dtype,
-            mask,
-            self.size,
+            data=self.data,
+            dtype=self.dtype,
+            mask=mask,
+            size=self.size,
             offset=0,
             children=self.children
         )
@@ -561,25 +561,22 @@ cdef class Column:
         children = tuple(children)
 
         result = cudf.core.column.build_column(
-            data,
-            dtype,
-            mask,
-            size,
-            offset,
-            null_count,
-            tuple(children)
+            data=data,
+            dtype=dtype,
+            mask=mask,
+            size=size,
+            offset=offset,
+            null_count=null_count,
+            children=tuple(children)
         )
 
         return result
 
-
-def make_column_from_scalar(object py_val, size_type size):
-
-    cdef DeviceScalar val = py_val.device_value
-
-    cdef const scalar* c_val = val.get_raw_ptr()
-    cdef unique_ptr[column] c_result
-    with nogil:
-        c_result = move(cpp_make_column_from_scalar(c_val[0], size))
-
-    return Column.from_unique_ptr(move(c_result))
+    @staticmethod
+    def from_scalar(py_val, size_type size):
+        cdef DeviceScalar val = py_val.device_value
+        cdef const scalar* c_val = val.get_raw_ptr()
+        cdef unique_ptr[column] c_result
+        with nogil:
+            c_result = move(cpp_make_column_from_scalar(c_val[0], size))
+        return Column.from_unique_ptr(move(c_result))
diff --git a/python/cudf/cudf/_lib/table.pyi b/python/cudf/cudf/_lib/table.pyi
new file mode 100644
index 00000000000..772e940f812
--- /dev/null
+++ b/python/cudf/cudf/_lib/table.pyi
@@ -0,0 +1,29 @@
+# Copyright (c) 2021, NVIDIA CORPORATION.
+
+from typing import List, Any, Optional, TYPE_CHECKING
+
+import cudf
+
+class Table(object):
+    _data: cudf.core.column_accessor.ColumnAccessor
+    _index: Optional[cudf.core.index.Index]
+
+    def __init__(self, data: object = None, index: object = None) -> None: ...
+
+    @property
+    def _num_columns(self) -> int: ...
+
+    @property
+    def _num_indices(self) -> int: ...
+
+    @property
+    def _num_rows(self) -> int: ...
+
+    @property
+    def _column_names(self) -> List[Any]: ...
+
+    @property
+    def _index_names(self) -> List[Any]: ...
+
+    @property
+    def _columns(self) -> List[Any]: ... # TODO: actually, a list of columns
diff --git a/python/cudf/cudf/_typing.py b/python/cudf/cudf/_typing.py
new file mode 100644
index 00000000000..0087daa1676
--- /dev/null
+++ b/python/cudf/cudf/_typing.py
@@ -0,0 +1,28 @@
+# Copyright (c) 2021, NVIDIA CORPORATION.
+
+from typing import TYPE_CHECKING, Any, TypeVar, Union
+
+import numpy as np
+from pandas import Period, Timedelta, Timestamp
+from pandas.api.extensions import ExtensionDtype
+
+if TYPE_CHECKING:
+    import cudf
+
+# Many of these are from
+# https://github.com/pandas-dev/pandas/blob/master/pandas/_typing.py
+
+Dtype = Union["ExtensionDtype", str, np.dtype]
+DtypeObj = Union["ExtensionDtype", np.dtype]
+
+# scalars
+DatetimeLikeScalar = TypeVar(
+    "DatetimeLikeScalar", Period, Timestamp, Timedelta
+)
+ScalarLike = Any
+
+# columns
+ColumnLike = Any
+
+# binary operation
+BinaryOperand = Union["cudf.Scalar", "cudf.core.column.ColumnBase"]
diff --git a/python/cudf/cudf/core/__init__.py b/python/cudf/cudf/core/__init__.py
index d6c232373c7..91a369c31f8 100644
--- a/python/cudf/cudf/core/__init__.py
+++ b/python/cudf/cudf/core/__init__.py
@@ -1,6 +1,6 @@
 # Copyright (c) 2018-2020, NVIDIA CORPORATION.
 
-from cudf.core import buffer, column, common
+from cudf.core import buffer, column, column_accessor, common
 from cudf.core.buffer import Buffer
 from cudf.core.dataframe import DataFrame, from_pandas, merge
 from cudf.core.index import (
diff --git a/python/cudf/cudf/core/abc.py b/python/cudf/cudf/core/abc.py
index 02150a79d57..0550b1d4de0 100644
--- a/python/cudf/cudf/core/abc.py
+++ b/python/cudf/cudf/core/abc.py
@@ -12,9 +12,9 @@
     try:
         import pickle5 as pickle
     except ImportError:
-        import pickle
+        import pickle  # type: ignore
 else:
-    import pickle
+    import pickle  # type: ignore
 
 
 class Serializable(abc.ABC):
diff --git a/python/cudf/cudf/core/buffer.py b/python/cudf/cudf/core/buffer.py
index 08bc068c28c..350346a87f9 100644
--- a/python/cudf/cudf/core/buffer.py
+++ b/python/cudf/cudf/core/buffer.py
@@ -1,7 +1,10 @@
 # Copyright (c) 2020, NVIDIA CORPORATION.
+from __future__ import annotations
+
 import functools
 import operator
 import pickle
+from typing import Any, Dict, Optional, Tuple
 
 import numpy as np
 
@@ -12,7 +15,13 @@
 
 
 class Buffer(Serializable):
-    def __init__(self, data=None, size=None, owner=None):
+    ptr: int
+    size: int
+    _owner: Any
+
+    def __init__(
+        self, data: Any = None, size: Optional[int] = None, owner: Any = None
+    ):
         """
         A Buffer represents a device memory allocation.
 
@@ -36,7 +45,6 @@ def __init__(self, data=None, size=None, owner=None):
         elif hasattr(data, "__array_interface__") or hasattr(
             data, "__cuda_array_interface__"
         ):
-
             self._init_from_array_like(data, owner)
         elif isinstance(data, memoryview):
             self._init_from_array_like(np.asarray(data), owner)
@@ -57,15 +65,15 @@ def __init__(self, data=None, size=None, owner=None):
                 raise TypeError("data must be Buffer, array-like or integer")
             self._init_from_array_like(np.asarray(data), owner)
 
-    def __len__(self):
+    def __len__(self) -> int:
         return self.size
 
     @property
-    def nbytes(self):
+    def nbytes(self) -> int:
         return self.size
 
     @property
-    def __cuda_array_interface__(self):
+    def __cuda_array_interface__(self) -> dict:
         intf = {
             "data": (self.ptr, False),
             "shape": (self.size,),
@@ -102,8 +110,8 @@ def _init_from_array_like(self, data, owner):
                 f"Cannot construct Buffer from {data.__class__.__name__}"
             )
 
-    def serialize(self):
-        header = {}
+    def serialize(self) -> Tuple[dict, list]:
+        header = {}  # type: Dict[Any, Any]
         header["type-serialized"] = pickle.dumps(type(self))
         header["constructor-kwargs"] = {}
         header["desc"] = self.__cuda_array_interface__.copy()
@@ -112,7 +120,7 @@ def serialize(self):
         return header, frames
 
     @classmethod
-    def deserialize(cls, header, frames):
+    def deserialize(cls, header: dict, frames: list) -> Buffer:
         buf = cls(frames[0], **header["constructor-kwargs"])
 
         if header["desc"]["shape"] != buf.__cuda_array_interface__["shape"]:
@@ -125,7 +133,7 @@ def deserialize(cls, header, frames):
         return buf
 
     @classmethod
-    def empty(cls, size):
+    def empty(cls, size: int) -> Buffer:
         dbuf = DeviceBuffer(size=size)
         return Buffer(dbuf)
 
diff --git a/python/cudf/cudf/core/column/categorical.py b/python/cudf/cudf/core/column/categorical.py
index ff514e6c6f0..498851c47ee 100644
--- a/python/cudf/cudf/core/column/categorical.py
+++ b/python/cudf/cudf/core/column/categorical.py
@@ -1,12 +1,27 @@
 # Copyright (c) 2018-2020, NVIDIA CORPORATION.
+from __future__ import annotations
+
 import pickle
+from typing import (
+    TYPE_CHECKING,
+    Any,
+    Dict,
+    Mapping,
+    Optional,
+    Tuple,
+    Union,
+    cast,
+)
 
 import numpy as np
 import pandas as pd
+from numba import cuda
 
 import cudf
 from cudf import _lib as libcudf
+from cudf._lib.scalar import as_device_scalar
 from cudf._lib.transform import bools_to_mask
+from cudf._typing import ColumnLike, Dtype, ScalarLike
 from cudf.core.buffer import Buffer
 from cudf.core.column import column
 from cudf.core.column.methods import ColumnMethodsMixin
@@ -18,9 +33,23 @@
     min_unsigned_type,
 )
 
+if TYPE_CHECKING:
+    from cudf.core.column import (
+        ColumnBase,
+        DatetimeColumn,
+        NumericalColumn,
+        StringColumn,
+        TimeDeltaColumn,
+    )
+
+
+ParentType = Union["cudf.Series", "cudf.Index"]
+
 
 class CategoricalAccessor(ColumnMethodsMixin):
-    def __init__(self, column, parent=None):
+    _column: CategoricalColumn
+
+    def __init__(self, column: Any, parent: ParentType = None):
         """
         Accessor object for categorical properties of the Series values.
         Be aware that assigning to `categories` is a inplace operation,
@@ -28,7 +57,8 @@ def __init__(self, column, parent=None):
 
         Parameters
         ----------
-        data : Series or CategoricalIndex
+        column : Column
+        parent : Series or CategoricalIndex
 
         Examples
         --------
@@ -77,34 +107,35 @@ def __init__(self, column, parent=None):
             raise AttributeError(
                 "Can only use .cat accessor with a 'category' dtype"
             )
-        self._column = column
-        self._parent = parent
+        super().__init__(column=column, parent=parent)
 
     @property
-    def categories(self):
+    def categories(self) -> "cudf.Index":
         """
         The categories of this categorical.
         """
         return cudf.core.index.as_index(self._column.categories)
 
     @property
-    def codes(self):
+    def codes(self) -> "cudf.Series":
         """
         Return Series of codes as well as the index.
         """
-        return cudf.Series(
-            self._column.codes,
-            index=self._parent.index if self._parent is not None else None,
+        index = (
+            self._parent.index
+            if isinstance(self._parent, cudf.Series)
+            else None
         )
+        return cudf.Series(self._column.codes, index=index)
 
     @property
-    def ordered(self):
+    def ordered(self) -> bool:
         """
         Whether the categories have an ordered relationship.
         """
         return self._column.ordered
 
-    def as_ordered(self, inplace=False):
+    def as_ordered(self, inplace: bool = False) -> Optional[ParentType]:
         """
         Set the Categorical to be ordered.
 
@@ -165,7 +196,7 @@ def as_ordered(self, inplace=False):
 
         return self._return_or_inplace(out_col, inplace=inplace)
 
-    def as_unordered(self, inplace=False):
+    def as_unordered(self, inplace: bool = False) -> Optional[ParentType]:
         """
         Set the Categorical to be unordered.
 
@@ -237,7 +268,9 @@ def as_unordered(self, inplace=False):
 
         return self._return_or_inplace(out_col, inplace=inplace)
 
-    def add_categories(self, new_categories, inplace=False):
+    def add_categories(
+        self, new_categories: Any, inplace: bool = False
+    ) -> Optional[ParentType]:
         """
         Add new categories.
 
@@ -320,7 +353,9 @@ def add_categories(self, new_categories, inplace=False):
 
         return self._return_or_inplace(out_col, inplace=inplace)
 
-    def remove_categories(self, removals, inplace=False):
+    def remove_categories(
+        self, removals: Any, inplace: bool = False,
+    ) -> Optional[ParentType]:
         """
         Remove the specified categories.
 
@@ -411,8 +446,12 @@ def remove_categories(self, removals, inplace=False):
         return self._return_or_inplace(out_col, inplace=inplace)
 
     def set_categories(
-        self, new_categories, ordered=None, rename=False, inplace=False,
-    ):
+        self,
+        new_categories: Any,
+        ordered: bool = False,
+        rename: bool = False,
+        inplace: bool = False,
+    ) -> Optional[ParentType]:
         """
         Set the categories to the specified new_categories.
 
@@ -539,7 +578,12 @@ def set_categories(
                 )
         return self._return_or_inplace(out_col, inplace=inplace)
 
-    def reorder_categories(self, new_categories, ordered=False, inplace=False):
+    def reorder_categories(
+        self,
+        new_categories: Any,
+        ordered: bool = False,
+        inplace: bool = False,
+    ) -> Optional[ParentType]:
         """
         Reorder categories as specified in new_categories.
 
@@ -621,9 +665,9 @@ def reorder_categories(self, new_categories, ordered=False, inplace=False):
 
         return self._return_or_inplace(out_col, inplace=inplace)
 
-    def _categories_equal(self, new_categories, ordered=None):
-        ordered = ordered if ordered is not None else self.ordered
-
+    def _categories_equal(
+        self, new_categories: ColumnBase, ordered=False
+    ) -> bool:
         cur_categories = self._column.categories
         if len(new_categories) != len(cur_categories):
             return False
@@ -640,8 +684,12 @@ def _categories_equal(self, new_categories, ordered=None):
         return cur_categories.equals(new_categories)
 
     def _set_categories(
-        self, current_categories, new_categories, is_unique=False, ordered=None
-    ):
+        self,
+        current_categories: Any,
+        new_categories: Any,
+        is_unique: bool = False,
+        ordered: bool = False,
+    ) -> CategoricalColumn:
         """Returns a new CategoricalColumn with the categories set to the
         specified *new_categories*.
 
@@ -705,14 +753,17 @@ class CategoricalColumn(column.ColumnBase):
     """Implements operations for Columns of Categorical type
     """
 
+    _codes: Optional[NumericalColumn]
+    _children: Tuple[NumericalColumn]
+
     def __init__(
         self,
-        dtype,
-        mask=None,
-        size=None,
-        offset=0,
-        null_count=None,
-        children=(),
+        dtype: CategoricalDtype,
+        mask: Buffer = None,
+        size: int = None,
+        offset: int = 0,
+        null_count: int = None,
+        children: Tuple["column.ColumnBase", ...] = (),
     ):
         """
         Parameters
@@ -722,7 +773,7 @@ def __init__(
             The validity mask
         offset : int
             Data offset
-        children : Tuple[Column]
+        children : Tuple[ColumnBase]
             Two non-null columns containing the categories and codes
             respectively
         """
@@ -745,24 +796,23 @@ def __init__(
             null_count=null_count,
             children=children,
         )
-
         self._codes = None
 
     @property
-    def base_size(self):
+    def base_size(self) -> int:
         return int(
             (self.base_children[0].size) / self.base_children[0].dtype.itemsize
         )
 
-    def __contains__(self, item):
+    def __contains__(self, item: ScalarLike) -> bool:
         try:
             self._encode(item)
         except ValueError:
             return False
         return self._encode(item) in self.as_numerical
 
-    def serialize(self):
-        header = {}
+    def serialize(self) -> Tuple[dict, list]:
+        header = {}  # type: Dict[Any, Any]
         frames = []
         header["type-serialized"] = pickle.dumps(type(self))
         header["dtype"], dtype_frames = self.dtype.serialize()
@@ -771,7 +821,7 @@ def serialize(self):
         header["data"], data_frames = self.codes.serialize()
         header["data_frames_count"] = len(data_frames)
         frames.extend(data_frames)
-        if self.nullable:
+        if self.mask is not None:
             mask_header, mask_frames = self.mask.serialize()
             header["mask"] = mask_header
             frames.extend(mask_frames)
@@ -779,7 +829,7 @@ def serialize(self):
         return header, frames
 
     @classmethod
-    def deserialize(cls, header, frames):
+    def deserialize(cls, header: dict, frames: list) -> CategoricalColumn:
         n_dtype_frames = header["dtype_frames_count"]
         dtype = CategoricalDtype.deserialize(
             header["dtype"], frames[:n_dtype_frames]
@@ -796,11 +846,14 @@ def deserialize(cls, header, frames):
             mask = Buffer.deserialize(
                 header["mask"], [frames[n_dtype_frames + n_data_frames]]
             )
-        return column.build_column(
-            data=None,
-            dtype=dtype,
-            mask=mask,
-            children=(column.as_column(data.base_data, dtype=data.dtype),),
+        return cast(
+            CategoricalColumn,
+            column.build_column(
+                data=None,
+                dtype=dtype,
+                mask=mask,
+                children=(column.as_column(data.base_data, dtype=data.dtype),),
+            ),
         )
 
     def set_base_data(self, value):
@@ -812,16 +865,16 @@ def set_base_data(self, value):
         else:
             super().set_base_data(value)
 
-    def set_base_mask(self, value):
+    def set_base_mask(self, value: Optional[Buffer]):
         super().set_base_mask(value)
         self._codes = None
 
-    def set_base_children(self, value):
+    def set_base_children(self, value: Tuple[ColumnBase, ...]):
         super().set_base_children(value)
         self._codes = None
 
     @property
-    def children(self):
+    def children(self) -> Tuple[NumericalColumn]:
         if self._children is None:
             codes_column = self.base_children[0]
 
@@ -829,20 +882,26 @@ def children(self):
             buf.ptr = buf.ptr + (self.offset * codes_column.dtype.itemsize)
             buf.size = self.size * codes_column.dtype.itemsize
 
-            codes_column = column.build_column(
-                data=buf, dtype=codes_column.dtype, size=self.size,
+            codes_column = cast(
+                cudf.core.column.NumericalColumn,
+                column.build_column(
+                    data=buf, dtype=codes_column.dtype, size=self.size,
+                ),
             )
             self._children = (codes_column,)
         return self._children
 
     @property
-    def as_numerical(self):
-        return column.build_column(
-            data=self.codes.data, dtype=self.codes.dtype, mask=self.mask
+    def as_numerical(self) -> NumericalColumn:
+        return cast(
+            cudf.core.column.NumericalColumn,
+            column.build_column(
+                data=self.codes.data, dtype=self.codes.dtype, mask=self.mask
+            ),
         )
 
     @property
-    def categories(self):
+    def categories(self) -> ColumnBase:
         return self.dtype.categories._values
 
     @categories.setter
@@ -852,30 +911,82 @@ def categories(self, value):
         )
 
     @property
-    def codes(self):
+    def codes(self) -> NumericalColumn:
         if self._codes is None:
             self._codes = self.children[0].set_mask(self.mask)
-        return self._codes
+        return cast(cudf.core.column.NumericalColumn, self._codes)
 
     @property
-    def ordered(self):
+    def ordered(self) -> bool:
         return self.dtype.ordered
 
     @ordered.setter
-    def ordered(self, value):
+    def ordered(self, value: bool):
         self.dtype.ordered = value
 
-    def cat(self, parent=None):
+    def cat(self, parent: ParentType = None):
         return CategoricalAccessor(self, parent=parent)
 
-    def unary_operator(self, unaryop):
+    def unary_operator(self, unaryop: str):
         raise TypeError(
             f"Series of dtype `category` cannot perform the operation: "
             f"{unaryop}"
         )
 
-    def binary_operator(self, op, rhs, reflect=False):
+    def __setitem__(self, key, value):
+        if cudf.utils.dtypes.is_scalar(value):
+            value = self._encode(value) if value is not None else value
+        else:
+            value = cudf.core.column.as_column(value).astype(self.dtype)
+            value = value.codes
+        codes = self.codes
+        codes[key] = value
+        out = cudf.core.column.build_categorical_column(
+            categories=self.categories,
+            codes=codes,
+            mask=codes.base_mask,
+            size=codes.size,
+            offset=self.offset,
+            ordered=self.ordered,
+        )
+        self._mimic_inplace(out, inplace=True)
+
+    def _fill(
+        self,
+        fill_value: ScalarLike,
+        begin: int,
+        end: int,
+        inplace: bool = False,
+    ) -> "column.ColumnBase":
+        if end <= begin or begin >= self.size:
+            return self if inplace else self.copy()
+
+        fill_code = self._encode(fill_value)
+        fill_scalar = as_device_scalar(fill_code, self.codes.dtype)
+
+        result = self if inplace else self.copy()
+
+        libcudf.filling.fill_in_place(result.codes, begin, end, fill_scalar)
+        return result
+
+    def slice(
+        self, start: int, stop: int, stride: int = None
+    ) -> "column.ColumnBase":
+        codes = self.codes.slice(start, stop, stride)
+        return cudf.core.column.build_categorical_column(
+            categories=self.categories,
+            codes=cudf.core.column.as_column(
+                codes.base_data, dtype=codes.dtype
+            ),
+            mask=codes.base_mask,
+            ordered=self.ordered,
+            size=codes.size,
+            offset=codes.offset,
+        )
 
+    def binary_operator(
+        self, op: str, rhs, reflect: bool = False
+    ) -> ColumnBase:
         if not (self.ordered and rhs.ordered) and op not in ("eq", "ne"):
             if op in ("lt", "gt", "le", "ge"):
                 raise TypeError(
@@ -889,7 +1000,7 @@ def binary_operator(self, op, rhs, reflect=False):
             raise TypeError("Categoricals can only compare with the same type")
         return self.as_numerical.binary_operator(op, rhs.as_numerical)
 
-    def normalize_binop_value(self, other):
+    def normalize_binop_value(self, other: ScalarLike) -> CategoricalColumn:
 
         if isinstance(other, np.ndarray) and other.ndim == 0:
             other = other.item()
@@ -905,7 +1016,9 @@ def normalize_binop_value(self, other):
         )
         return col
 
-    def sort_by_values(self, ascending=True, na_position="last"):
+    def sort_by_values(
+        self, ascending: bool = True, na_position="last"
+    ) -> Tuple[CategoricalColumn, NumericalColumn]:
         codes, inds = self.as_numerical.sort_by_values(ascending, na_position)
         col = column.build_categorical_column(
             categories=self.dtype.categories,
@@ -916,19 +1029,21 @@ def sort_by_values(self, ascending=True, na_position="last"):
         )
         return col, inds
 
-    def element_indexing(self, index):
+    def element_indexing(self, index: int) -> ScalarLike:
         val = self.as_numerical.element_indexing(index)
-        return self._decode(val) if val is not None else val
+        return self._decode(int(val)) if val is not None else val
 
     @property
-    def __cuda_array_interface__(self):
+    def __cuda_array_interface__(self) -> Mapping[str, Any]:
         raise TypeError(
             "Categorical does not support `__cuda_array_interface__`."
             " Please consider using `.codes` or `.categories`"
             " if you need this functionality."
         )
 
-    def to_pandas(self, index=None, nullable=False):
+    def to_pandas(
+        self, index: ColumnLike = None, nullable: bool = False, **kwargs
+    ) -> pd.Series:
         signed_dtype = min_signed_type(len(self.categories))
         codes = self.cat().codes.astype(signed_dtype).fillna(-1).to_array()
         categories = self.categories.to_pandas()
@@ -938,7 +1053,7 @@ def to_pandas(self, index=None, nullable=False):
         return pd.Series(data, index=index)
 
     @property
-    def values_host(self):
+    def values_host(self) -> np.ndarray:
         """
         Return a numpy representation of the CategoricalColumn.
         """
@@ -951,7 +1066,16 @@ def values(self):
         """
         raise NotImplementedError("cudf.Categorical is not yet implemented")
 
-    def unique(self):
+    def clip(self, lo: ScalarLike, hi: ScalarLike) -> "column.ColumnBase":
+        return (
+            self.astype(self.categories.dtype).clip(lo, hi).astype(self.dtype)
+        )
+
+    @property
+    def data_array_view(self) -> cuda.devicearray.DeviceNDArray:
+        return self.codes.data_array_view
+
+    def unique(self) -> CategoricalColumn:
         codes = self.as_numerical.unique()
         return column.build_categorical_column(
             categories=self.categories,
@@ -962,18 +1086,23 @@ def unique(self):
             ordered=self.ordered,
         )
 
-    def _encode(self, value):
+    def _encode(self, value) -> ScalarLike:
         return self.categories.find_first_value(value)
 
-    def _decode(self, value):
+    def _decode(self, value: int) -> ScalarLike:
         if value == self.default_na_value():
             return None
         return self.categories.element_indexing(value)
 
-    def default_na_value(self):
+    def default_na_value(self) -> ScalarLike:
         return -1
 
-    def find_and_replace(self, to_replace, replacement, all_nan):
+    def find_and_replace(
+        self,
+        to_replace: ColumnLike,
+        replacement: ColumnLike,
+        all_nan: bool = False,
+    ) -> CategoricalColumn:
         """
         Return col with *to_replace* replaced with *replacement*.
         """
@@ -1038,7 +1167,9 @@ def find_and_replace(self, to_replace, replacement, all_nan):
             ordered=self.dtype.ordered,
         )
 
-    def fillna(self, fill_value=None, method=None):
+    def fillna(
+        self, fill_value: Any = None, method: Any = None, dtype: Dtype = None
+    ) -> CategoricalColumn:
         """
         Fill null values with *fill_value*
         """
@@ -1084,20 +1215,22 @@ def fillna(self, fill_value=None, method=None):
 
         return result
 
-    def find_first_value(self, value, closest=False):
+    def find_first_value(
+        self, value: ScalarLike, closest: bool = False
+    ) -> int:
         """
         Returns offset of first value that matches
         """
         return self.as_numerical.find_first_value(self._encode(value))
 
-    def find_last_value(self, value, closest=False):
+    def find_last_value(self, value: ScalarLike, closest: bool = False) -> int:
         """
         Returns offset of last value that matches
         """
         return self.as_numerical.find_last_value(self._encode(value))
 
     @property
-    def is_monotonic_increasing(self):
+    def is_monotonic_increasing(self) -> bool:
         if not hasattr(self, "_is_monotonic_increasing"):
             self._is_monotonic_increasing = (
                 self.ordered and self.as_numerical.is_monotonic_increasing
@@ -1105,14 +1238,16 @@ def is_monotonic_increasing(self):
         return self._is_monotonic_increasing
 
     @property
-    def is_monotonic_decreasing(self):
+    def is_monotonic_decreasing(self) -> bool:
         if not hasattr(self, "_is_monotonic_decreasing"):
             self._is_monotonic_decreasing = (
                 self.ordered and self.as_numerical.is_monotonic_decreasing
             )
         return self._is_monotonic_decreasing
 
-    def as_categorical_column(self, dtype, **kwargs):
+    def as_categorical_column(
+        self, dtype: Dtype, **kwargs
+    ) -> CategoricalColumn:
         if isinstance(dtype, str) and dtype == "category":
             return self
         if (
@@ -1129,6 +1264,9 @@ def as_categorical_column(self, dtype, **kwargs):
                 categories=dtype.categories, ordered=dtype.ordered
             )
 
+        if not isinstance(dtype, CategoricalDtype):
+            raise ValueError("dtype must be CategoricalDtype")
+
         if not isinstance(self.categories, type(dtype.categories._values)):
             # If both categories are of different Column types,
             # return a column full of Nulls.
@@ -1138,25 +1276,25 @@ def as_categorical_column(self, dtype, **kwargs):
             new_categories=dtype.categories, ordered=dtype.ordered
         )
 
-    def as_numerical_column(self, dtype):
+    def as_numerical_column(self, dtype: Dtype) -> NumericalColumn:
         return self._get_decategorized_column().as_numerical_column(dtype)
 
-    def as_string_column(self, dtype, **kwargs):
+    def as_string_column(self, dtype, format=None) -> StringColumn:
         return self._get_decategorized_column().as_string_column(
-            dtype, **kwargs
+            dtype, format=format
         )
 
-    def as_datetime_column(self, dtype, **kwargs):
+    def as_datetime_column(self, dtype, **kwargs) -> DatetimeColumn:
         return self._get_decategorized_column().as_datetime_column(
             dtype, **kwargs
         )
 
-    def as_timedelta_column(self, dtype, **kwargs):
+    def as_timedelta_column(self, dtype, **kwargs) -> TimeDeltaColumn:
         return self._get_decategorized_column().as_timedelta_column(
             dtype, **kwargs
         )
 
-    def _get_decategorized_column(self):
+    def _get_decategorized_column(self) -> ColumnBase:
         if self.null_count == len(self):
             # self.categories is empty; just return codes
             return self.cat().codes._column
@@ -1165,7 +1303,7 @@ def _get_decategorized_column(self):
         out = out.set_mask(self.mask)
         return out
 
-    def copy(self, deep=True):
+    def copy(self, deep: bool = True) -> CategoricalColumn:
         if deep:
             copied_col = libcudf.copying.copy_column(self)
             copied_cat = libcudf.copying.copy_column(self.dtype._categories)
@@ -1192,12 +1330,13 @@ def copy(self, deep=True):
                 size=self.size,
             )
 
-    def __sizeof__(self):
+    def __sizeof__(self) -> int:
         return (
             self.cat().categories.__sizeof__() + self.cat().codes.__sizeof__()
         )
 
-    def _memory_usage(self, deep=False):
+    def _memory_usage(self, **kwargs) -> int:
+        deep = kwargs.get("deep", False)
         if deep:
             return self.__sizeof__()
         else:
@@ -1206,22 +1345,25 @@ def _memory_usage(self, deep=False):
                 + self.cat().codes.memory_usage()
             )
 
-    def _mimic_inplace(self, other_col, inplace=False):
+    def _mimic_inplace(
+        self, other_col: ColumnBase, inplace: bool = False
+    ) -> Optional[ColumnBase]:
         out = super()._mimic_inplace(other_col, inplace=inplace)
-        if inplace:
+        if inplace and isinstance(other_col, CategoricalColumn):
             self._codes = other_col._codes
         return out
 
-    def view(self, dtype):
+    def view(self, dtype: Dtype) -> ColumnBase:
         raise NotImplementedError(
             "Categorical column views are not currently supported"
         )
 
 
-def _create_empty_categorical_column(categorical_column, dtype):
-
+def _create_empty_categorical_column(
+    categorical_column: CategoricalColumn, dtype: "CategoricalDtype"
+) -> CategoricalColumn:
     return column.build_categorical_column(
-        categories=dtype.categories,
+        categories=column.as_column(dtype.categories),
         codes=column.as_column(
             cudf.utils.utils.scalar_broadcast_to(
                 categorical_column.default_na_value(),
@@ -1236,7 +1378,9 @@ def _create_empty_categorical_column(categorical_column, dtype):
     )
 
 
-def pandas_categorical_as_column(categorical, codes=None):
+def pandas_categorical_as_column(
+    categorical: ColumnLike, codes: ColumnLike = None
+) -> CategoricalColumn:
 
     """Creates a CategoricalColumn from a pandas.Categorical
 
diff --git a/python/cudf/cudf/core/column/column.py b/python/cudf/cudf/core/column/column.py
index 7008604a1c3..670dd456de9 100644
--- a/python/cudf/cudf/core/column/column.py
+++ b/python/cudf/cudf/core/column/column.py
@@ -1,9 +1,24 @@
 # Copyright (c) 2018-2021, NVIDIA CORPORATION.
+from __future__ import annotations
 
+import builtins
 import pickle
 import warnings
-from numbers import Number
+from collections.abc import MutableSequence
 from types import SimpleNamespace
+from typing import (
+    Any,
+    Callable,
+    Dict,
+    List,
+    Mapping,
+    Optional,
+    Sequence,
+    Tuple,
+    TypeVar,
+    Union,
+    cast,
+)
 
 import cupy
 import numpy as np
@@ -22,6 +37,7 @@
 from cudf._lib.scalar import as_device_scalar
 from cudf._lib.stream_compaction import distinct_count as cpp_distinct_count
 from cudf._lib.transform import bools_to_mask
+from cudf._typing import BinaryOperand, ColumnLike, Dtype, ScalarLike
 from cudf.core.abc import Serializable
 from cudf.core.buffer import Buffer
 from cudf.core.dtypes import CategoricalDtype
@@ -44,68 +60,34 @@
 )
 from cudf.utils.utils import mask_dtype
 
+T = TypeVar("T", bound="ColumnBase")
 
-class ColumnBase(Column, Serializable):
-    def __init__(
-        self,
-        data,
-        size,
-        dtype,
-        mask=None,
-        offset=0,
-        null_count=None,
-        children=(),
-    ):
-        """
-        Parameters
-        ----------
-        data : Buffer
-        dtype
-            The type associated with the data Buffer
-        mask : Buffer, optional
-        children : tuple, optional
-        """
-        super().__init__(
-            data,
-            size=size,
-            dtype=dtype,
-            mask=mask,
-            offset=offset,
-            children=children,
-        )
 
-    def as_frame(self):
+class ColumnBase(Column, Serializable):
+    def as_frame(self) -> "cudf.core.frame.Frame":
         """
         Converts a Column to Frame
         """
         return cudf.core.frame.Frame({None: self.copy(deep=False)})
 
     @property
-    def data_array_view(self):
+    def data_array_view(self) -> "cuda.devicearray.DeviceNDArray":
         """
         View the data as a device array object
         """
-        if self.dtype == "object":
-            raise ValueError("Cannot get an array view of a StringColumn")
-
-        if is_categorical_dtype(self.dtype):
-            return self.codes.data_array_view
-        else:
-            dtype = self.dtype
-
         result = cuda.as_cuda_array(self.data)
         # Workaround until `.view(...)` can change itemsize
         # xref: https://github.com/numba/numba/issues/4829
         result = cuda.devicearray.DeviceNDArray(
-            shape=(result.nbytes // dtype.itemsize,),
-            strides=(dtype.itemsize,),
-            dtype=dtype,
+            shape=(result.nbytes // self.dtype.itemsize,),
+            strides=(self.dtype.itemsize,),
+            dtype=self.dtype,
             gpu_data=result.gpu_data,
         )
         return result
 
     @property
-    def mask_array_view(self):
+    def mask_array_view(self) -> "cuda.devicearray.DeviceNDArray":
         """
         View the mask as a device array
         """
@@ -122,10 +104,12 @@ def mask_array_view(self):
         )
         return result
 
-    def __len__(self):
+    def __len__(self) -> int:
         return self.size
 
-    def to_pandas(self, index=None, nullable=False, **kwargs):
+    def to_pandas(
+        self, index: ColumnLike = None, nullable: bool = False, **kwargs
+    ) -> "pd.Series":
         if nullable and self.dtype in cudf_dtypes_to_pandas_dtypes:
             pandas_nullable_dtype = cudf_dtypes_to_pandas_dtypes[self.dtype]
             arrow_array = self.to_arrow()
@@ -144,14 +128,14 @@ def __iter__(self):
         cudf.utils.utils.raise_iteration_error(obj=self)
 
     @property
-    def values_host(self):
+    def values_host(self) -> "np.ndarray":
         """
         Return a numpy representation of the Column.
         """
         return self.data_array_view.copy_to_host()
 
     @property
-    def values(self):
+    def values(self) -> "cupy.ndarray":
         """
         Return a CuPy representation of the Column.
         """
@@ -163,14 +147,18 @@ def values(self):
 
         return cupy.asarray(self.data_array_view)
 
-    def clip(self, lo, hi):
-        if is_categorical_dtype(self):
-            input_col = self.astype(self.categories.dtype)
-            return libcudf.replace.clip(input_col, lo, hi).astype(self.dtype)
-        else:
-            return libcudf.replace.clip(self, lo, hi)
+    def find_and_replace(
+        self: T,
+        to_replace: ColumnLike,
+        replacement: ColumnLike,
+        all_nan: bool = False,
+    ) -> T:
+        raise NotImplementedError
 
-    def equals(self, other, check_dtypes=False):
+    def clip(self, lo: ScalarLike, hi: ScalarLike) -> ColumnBase:
+        return libcudf.replace.clip(self, lo, hi)
+
+    def equals(self, other: ColumnBase, check_dtypes: bool = False) -> bool:
         if self is other:
             return True
         if other is None or len(self) != len(other):
@@ -180,21 +168,32 @@ def equals(self, other, check_dtypes=False):
                 return False
         return (self == other).min()
 
-    def all(self):
+    def all(self) -> bool:
         return bool(libcudf.reduce.reduce("all", self, dtype=np.bool_))
 
-    def any(self):
+    def any(self) -> bool:
         return bool(libcudf.reduce.reduce("any", self, dtype=np.bool_))
 
-    def __sizeof__(self):
-        n = self.data.size
+    def __sizeof__(self) -> int:
+        n = 0
+        if self.data is not None:
+            n += self.data.size
         if self.nullable:
             n += bitmask_allocation_size_bytes(self.size)
         return n
 
-    @classmethod
-    def _concat(cls, objs, dtype=None):
+    def cat(
+        self, parent=None
+    ) -> "cudf.core.column.categorical.CategoricalAccessor":
+        raise NotImplementedError()
 
+    def str(self, parent=None) -> "cudf.core.column.string.StringMethods":
+        raise NotImplementedError()
+
+    @classmethod
+    def _concat(
+        cls, objs: "MutableSequence[ColumnBase]", dtype: Dtype = None
+    ) -> ColumnBase:
         if len(objs) == 0:
             dtype = pd.api.types.pandas_dtype(dtype)
             if is_categorical_dtype(dtype):
@@ -282,7 +281,7 @@ def _concat(cls, objs, dtype=None):
 
         if is_categorical:
             col = build_categorical_column(
-                categories=cats,
+                categories=as_column(cats),
                 codes=as_column(col.base_data, dtype=col.dtype),
                 mask=col.base_mask,
                 size=col.size,
@@ -291,7 +290,7 @@ def _concat(cls, objs, dtype=None):
 
         return col
 
-    def dropna(self, drop_nan=False):
+    def dropna(self, drop_nan: bool = False) -> ColumnBase:
         if drop_nan:
             col = self.nans_to_nulls()
         else:
@@ -301,7 +300,7 @@ def dropna(self, drop_nan=False):
         )
         return dropped_col
 
-    def to_arrow(self):
+    def to_arrow(self) -> pa.Array:
         """Convert to PyArrow Array
 
         Examples
@@ -350,7 +349,7 @@ def to_arrow(self):
         )["None"].chunk(0)
 
     @classmethod
-    def from_arrow(cls, array):
+    def from_arrow(cls, array: pa.Array) -> ColumnBase:
         """
         Convert PyArrow Array/ChunkedArray to column
 
@@ -412,15 +411,18 @@ def from_arrow(cls, array):
             "None"
         ]
 
-    def _get_mask_as_column(self):
+    def _get_mask_as_column(self) -> ColumnBase:
         return libcudf.transform.mask_to_bools(
             self.base_mask, self.offset, self.offset + len(self)
         )
 
-    def _memory_usage(self, **kwargs):
+    def _memory_usage(self, **kwargs) -> int:
         return self.__sizeof__()
 
-    def to_gpu_array(self, fillna=None):
+    def default_na_value(self) -> Any:
+        raise NotImplementedError()
+
+    def to_gpu_array(self, fillna=None) -> "cuda.devicearray.DeviceNDArray":
         """Get a dense numba device array for the data.
 
         Parameters
@@ -439,7 +441,7 @@ def to_gpu_array(self, fillna=None):
         else:
             return self.dropna(drop_nan=False).data_array_view
 
-    def to_array(self, fillna=None):
+    def to_array(self, fillna=None) -> "np.array":
         """Get a dense numpy array for the data.
 
         Parameters
@@ -458,13 +460,16 @@ def to_array(self, fillna=None):
 
         return self.to_gpu_array(fillna=fillna).copy_to_host()
 
-    def _fill(self, fill_value, begin=0, end=-1, inplace=False):
+    def _fill(
+        self,
+        fill_value: ScalarLike,
+        begin: int,
+        end: int,
+        inplace: bool = False,
+    ) -> Optional[ColumnBase]:
         if end <= begin or begin >= self.size:
             return self if inplace else self.copy()
 
-        if is_categorical_dtype(self.dtype):
-            return self._fill_categorical(fill_value, begin, end, inplace)
-
         fill_scalar = as_device_scalar(fill_value, self.dtype)
 
         if not inplace:
@@ -484,7 +489,6 @@ def _fill(self, fill_value, begin=0, end=-1, inplace=False):
 
         return self
 
-    def _fill_categorical(self, fill_value, begin, end, inplace):
         fill_code = self._encode(fill_value)
         fill_scalar = as_device_scalar(fill_code, self.codes.dtype)
 
@@ -493,16 +497,16 @@ def _fill_categorical(self, fill_value, begin, end, inplace):
         libcudf.filling.fill_in_place(result.codes, begin, end, fill_scalar)
         return result
 
-    def shift(self, offset, fill_value):
+    def shift(self, offset: int, fill_value: ScalarLike) -> ColumnBase:
         return libcudf.copying.shift(self, offset, fill_value)
 
     @property
-    def valid_count(self):
+    def valid_count(self) -> int:
         """Number of non-null values"""
         return len(self) - self.null_count
 
     @property
-    def nullmask(self):
+    def nullmask(self) -> Buffer:
         """The gpu buffer for the null-mask
         """
         if self.nullable:
@@ -510,7 +514,7 @@ def nullmask(self):
         else:
             raise ValueError("Column has no null mask")
 
-    def copy(self, deep=True):
+    def copy(self, deep: bool = True) -> ColumnBase:
         """Columns are immutable, so a deep copy produces a copy of the
         underlying data and mask and a shallow copy creates a new column and
         copies the references of the data and mask.
@@ -527,7 +531,7 @@ def copy(self, deep=True):
                 children=self.base_children,
             )
 
-    def view(self, dtype):
+    def view(self, dtype: Dtype) -> ColumnBase:
         """
         View the data underlying a column as different dtype.
         The source column must divide evenly into the size of
@@ -569,6 +573,7 @@ def view(self, dtype):
                     + f" total bytes into {dtype} with size {dtype.itemsize}"
                 )
 
+            assert self.base_data is not None
             new_buf_ptr = (
                 self.base_data.ptr + self.offset * self.dtype.itemsize
             )
@@ -580,7 +585,7 @@ def view(self, dtype):
             )
             return build_column(view_buf, dtype=dtype)
 
-    def element_indexing(self, index):
+    def element_indexing(self, index: int):
         """Default implementation for indexing to an element
 
         Raises
@@ -595,46 +600,29 @@ def element_indexing(self, index):
 
         return libcudf.copying.get_element(self, index).value
 
-    def __getitem__(self, arg):
+    def slice(self, start: int, stop: int, stride: int = None) -> ColumnBase:
+        if start < 0:
+            start = start + len(self)
+        if stop < 0:
+            stop = stop + len(self)
+        if start >= stop:
+            return column_empty(0, self.dtype, masked=True)
+        # compute mask slice
+        if stride == 1 or stride is None:
+            return libcudf.copying.column_slice(self, [start, stop])[0]
+        else:
+            # Need to create a gather map for given slice with stride
+            gather_map = arange(
+                start=start, stop=stop, step=stride, dtype=np.dtype(np.int32),
+            )
+            return self.take(gather_map)
 
-        if isinstance(arg, Number):
-            arg = int(arg)
-            return self.element_indexing(arg)
+    def __getitem__(self, arg) -> Union[ScalarLike, ColumnBase]:
+        if is_scalar(arg):
+            return self.element_indexing(int(arg))
         elif isinstance(arg, slice):
-
-            if is_categorical_dtype(self):
-                codes = self.codes[arg]
-                return build_categorical_column(
-                    categories=self.categories,
-                    codes=as_column(codes.base_data, dtype=codes.dtype),
-                    mask=codes.base_mask,
-                    ordered=self.ordered,
-                    size=codes.size,
-                    offset=codes.offset,
-                )
-
             start, stop, stride = arg.indices(len(self))
-
-            if start < 0:
-                start = start + len(self)
-            if stop < 0:
-                stop = stop + len(self)
-
-            if start >= stop:
-                return column_empty(0, self.dtype, masked=True)
-            # compute mask slice
-            if stride == 1 or stride is None:
-
-                return libcudf.copying.column_slice(self, [start, stop])[0]
-            else:
-                # Need to create a gather map for given slice with stride
-                gather_map = arange(
-                    start=start,
-                    stop=stop,
-                    step=stride,
-                    dtype=np.dtype(np.int32),
-                )
-                return self.take(gather_map)
+            return self.slice(start, stop, stride)
         else:
             arg = as_column(arg)
             if len(arg) == 0:
@@ -645,7 +633,7 @@ def __getitem__(self, arg):
                 return self.apply_boolean_mask(arg)
             raise NotImplementedError(type(arg))
 
-    def __setitem__(self, key, value):
+    def __setitem__(self, key: Any, value: Any):
         """
         Set the value of self[key] to value.
 
@@ -686,10 +674,7 @@ def __setitem__(self, key, value):
             nelem = len(key)
 
         if is_scalar(value):
-            if is_categorical_dtype(self.dtype):
-                value = self._encode(value)
-            else:
-                value = self.dtype.type(value) if value is not None else value
+            value = self.dtype.type(value) if value is not None else value
         else:
             if len(value) != nelem:
                 msg = (
@@ -699,9 +684,6 @@ def __setitem__(self, key, value):
                 )
                 raise ValueError(msg)
             value = as_column(value).astype(self.dtype)
-            if is_categorical_dtype(value.dtype):
-                value = value.cat().set_categories(self.categories)
-                assert self.dtype == value.dtype
 
         if (
             isinstance(key, slice)
@@ -712,34 +694,11 @@ def __setitem__(self, key, value):
             out = libcudf.copying.copy_range(
                 value, self, 0, nelem, key_start, key_stop, False
             )
-            if is_categorical_dtype(value.dtype):
-                out = build_categorical_column(
-                    categories=value.categories,
-                    codes=as_column(out.base_data, dtype=out.dtype),
-                    mask=out.base_mask,
-                    size=out.size,
-                    offset=out.offset,
-                    ordered=value.ordered,
-                )
         else:
             try:
                 if is_scalar(value):
                     input = self
-                    if is_categorical_dtype(self.dtype):
-                        input = self.codes
-
                     out = input.as_frame()._scatter(key, [value])._as_column()
-
-                    if is_categorical_dtype(self.dtype):
-                        out = build_categorical_column(
-                            categories=self.categories,
-                            codes=as_column(out.base_data, dtype=out.dtype),
-                            mask=out.base_mask,
-                            size=out.size,
-                            offset=out.offset,
-                            ordered=self.ordered,
-                        )
-
                 else:
                     if not isinstance(value, Column):
                         value = as_column(value)
@@ -757,7 +716,12 @@ def __setitem__(self, key, value):
 
         self._mimic_inplace(out, inplace=True)
 
-    def fillna(self, value=None, method=None, dtype=None):
+    def fillna(
+        self: T,
+        value: Any = None,
+        method: builtins.str = None,
+        dtype: Dtype = None,
+    ) -> T:
         """Fill null values with ``value``.
 
         Returns a copy with null filled.
@@ -766,7 +730,7 @@ def fillna(self, value=None, method=None, dtype=None):
             input_col=self, replacement=value, method=method, dtype=dtype
         )
 
-    def isnull(self):
+    def isnull(self) -> ColumnBase:
         """Identify missing values in a Column.
         """
         result = libcudf.unary.is_null(self)
@@ -778,12 +742,12 @@ def isnull(self):
 
         return result
 
-    def isna(self):
+    def isna(self) -> ColumnBase:
         """Identify missing values in a Column. Alias for isnull.
         """
         return self.isnull()
 
-    def notnull(self):
+    def notnull(self) -> ColumnBase:
         """Identify non-missing values in a Column.
         """
         result = libcudf.unary.is_valid(self)
@@ -795,12 +759,14 @@ def notnull(self):
 
         return result
 
-    def notna(self):
+    def notna(self) -> ColumnBase:
         """Identify non-missing values in a Column. Alias for notnull.
         """
         return self.notnull()
 
-    def find_first_value(self, value):
+    def find_first_value(
+        self, value: ScalarLike, closest: bool = False
+    ) -> int:
         """
         Returns offset of first value that matches
         """
@@ -811,7 +777,7 @@ def find_first_value(self, value):
             raise ValueError("value not found")
         return indices[0]
 
-    def find_last_value(self, value):
+    def find_last_value(self, value: ScalarLike, closest: bool = False) -> int:
         """
         Returns offset of last value that matches
         """
@@ -822,21 +788,26 @@ def find_last_value(self, value):
             raise ValueError("value not found")
         return indices[-1]
 
-    def append(self, other):
+    def append(self, other: ColumnBase) -> ColumnBase:
         return ColumnBase._concat([self, as_column(other)])
 
-    def quantile(self, q, interpolation, exact):
+    def quantile(
+        self,
+        q: Union[float, Sequence[float]],
+        interpolation: builtins.str,
+        exact: bool,
+    ) -> ColumnBase:
         raise TypeError(f"cannot perform quantile with type {self.dtype}")
 
-    def median(self, skipna=None):
+    def median(self, skipna: bool = None) -> ScalarLike:
         raise TypeError(f"cannot perform median with type {self.dtype}")
 
-    def take(self, indices, keep_index=True):
+    def take(self: T, indices: ColumnBase, keep_index: bool = True) -> T:
         """Return Column by taking values from the corresponding *indices*.
         """
         # Handle zero size
         if indices.size == 0:
-            return column_empty_like(self, newsize=0)
+            return cast(T, column_empty_like(self, newsize=0))
         try:
             return (
                 self.as_frame()
@@ -850,7 +821,7 @@ def take(self, indices, keep_index=True):
                 ) from e
             raise
 
-    def isin(self, values):
+    def isin(self, values: Sequence) -> ColumnBase:
         """Check whether values are contained in the Column.
 
         Parameters
@@ -905,17 +876,17 @@ def isin(self, values):
                 rhs = as_column(pd.Categorical.from_codes([-1], categories=[]))
                 rhs = rhs.cat().set_categories(lhs_cats).astype(self.dtype)
 
-        lhs = cudf.DataFrame({"x": lhs, "orig_order": arange(len(lhs))})
-        rhs = cudf.DataFrame(
+        ldf = cudf.DataFrame({"x": lhs, "orig_order": arange(len(lhs))})
+        rdf = cudf.DataFrame(
             {"x": rhs, "bool": full(len(rhs), True, dtype="bool")}
         )
-        res = lhs.merge(rhs, on="x", how="left").sort_values(by="orig_order")
+        res = ldf.merge(rdf, on="x", how="left").sort_values(by="orig_order")
         res = res.drop_duplicates(subset="orig_order", ignore_index=True)
         res = res._data["bool"].fillna(False)
 
         return res
 
-    def as_mask(self):
+    def as_mask(self) -> Buffer:
         """Convert booleans to bitmask
 
         Returns
@@ -935,15 +906,15 @@ def to_dlpack(self):
         return cudf.io.dlpack.to_dlpack(self)
 
     @property
-    def is_unique(self):
+    def is_unique(self) -> bool:
         return self.distinct_count() == len(self)
 
     @property
-    def is_monotonic(self):
+    def is_monotonic(self) -> bool:
         return self.is_monotonic_increasing
 
     @property
-    def is_monotonic_increasing(self):
+    def is_monotonic_increasing(self) -> bool:
         if not hasattr(self, "_is_monotonic_increasing"):
             if self.has_nulls:
                 self._is_monotonic_increasing = False
@@ -954,7 +925,7 @@ def is_monotonic_increasing(self):
         return self._is_monotonic_increasing
 
     @property
-    def is_monotonic_decreasing(self):
+    def is_monotonic_decreasing(self) -> bool:
         if not hasattr(self, "_is_monotonic_decreasing"):
             if self.has_nulls:
                 self._is_monotonic_decreasing = False
@@ -964,14 +935,16 @@ def is_monotonic_decreasing(self):
                 )
         return self._is_monotonic_decreasing
 
-    def get_slice_bound(self, label, side, kind):
+    def get_slice_bound(
+        self, label: ScalarLike, side: builtins.str, kind: builtins.str
+    ) -> int:
         """
         Calculate slice bound that corresponds to given label.
         Returns leftmost (one-past-the-rightmost if ``side=='right'``) position
         of given label.
         Parameters
         ----------
-        label : object
+        label : Scalar
         side : {'left', 'right'}
         kind : {'ix', 'loc', 'getitem'}
         """
@@ -986,21 +959,29 @@ def get_slice_bound(self, label, side, kind):
         #       Not currently using `kind` argument.
         if side == "left":
             return self.find_first_value(label, closest=True)
-        if side == "right":
+        elif side == "right":
             return self.find_last_value(label, closest=True) + 1
+        else:
+            raise ValueError(f"Invalid value for side: {side}")
 
-    def sort_by_values(self, ascending=True, na_position="last"):
+    def sort_by_values(
+        self: ColumnBase,
+        ascending: bool = True,
+        na_position: builtins.str = "last",
+    ) -> Tuple[ColumnBase, "cudf.core.column.NumericalColumn"]:
         col_inds = self.as_frame()._get_sorted_inds(ascending, na_position)
-        col_keys = self[col_inds]
+        col_keys = self.take(col_inds)
         return col_keys, col_inds
 
-    def distinct_count(self, method="sort", dropna=True):
+    def distinct_count(
+        self, method: builtins.str = "sort", dropna: bool = True
+    ) -> int:
         if method != "sort":
             msg = "non sort based distinct_count() not implemented yet"
             raise NotImplementedError(msg)
         return cpp_distinct_count(self, ignore_nulls=dropna)
 
-    def astype(self, dtype, **kwargs):
+    def astype(self, dtype: Dtype, **kwargs) -> ColumnBase:
         if is_categorical_dtype(dtype):
             return self.as_categorical_column(dtype, **kwargs)
         elif pd.api.types.pandas_dtype(dtype).type in {
@@ -1022,7 +1003,7 @@ def astype(self, dtype, **kwargs):
         else:
             return self.as_numerical_column(dtype)
 
-    def as_categorical_column(self, dtype, **kwargs):
+    def as_categorical_column(self, dtype, **kwargs) -> ColumnBase:
         if "ordered" in kwargs:
             ordered = kwargs["ordered"]
         else:
@@ -1065,26 +1046,36 @@ def as_categorical_column(self, dtype, **kwargs):
             ordered=ordered,
         )
 
-    def as_numerical_column(self, dtype):
+    def as_numerical_column(
+        self, dtype: Dtype
+    ) -> "cudf.core.column.NumericalColumn":
         raise NotImplementedError
 
-    def as_datetime_column(self, dtype, **kwargs):
+    def as_datetime_column(
+        self, dtype: Dtype, **kwargs
+    ) -> "cudf.core.column.DatetimeColumn":
         raise NotImplementedError
 
-    def as_timedelta_column(self, dtype, **kwargs):
+    def as_timedelta_column(
+        self, dtype: Dtype, **kwargs
+    ) -> "cudf.core.column.TimeDeltaColumn":
         raise NotImplementedError
 
-    def as_string_column(self, dtype, **kwargs):
+    def as_string_column(
+        self, dtype: Dtype, format=None
+    ) -> "cudf.core.column.StringColumn":
         raise NotImplementedError
 
-    def apply_boolean_mask(self, mask):
+    def apply_boolean_mask(self, mask) -> ColumnBase:
         mask = as_column(mask, dtype="bool")
         result = (
             self.as_frame()._apply_boolean_mask(boolean_mask=mask)._as_column()
         )
         return result
 
-    def argsort(self, ascending=True, na_position="last"):
+    def argsort(
+        self, ascending: bool = True, na_position: builtins.str = "last"
+    ) -> ColumnBase:
 
         sorted_indices = self.as_frame()._get_sorted_inds(
             ascending=ascending, na_position=na_position
@@ -1092,7 +1083,7 @@ def argsort(self, ascending=True, na_position="last"):
         return sorted_indices
 
     @property
-    def __cuda_array_interface__(self):
+    def __cuda_array_interface__(self) -> Mapping[builtins.str, Any]:
         output = {
             "shape": (len(self),),
             "strides": (self.dtype.itemsize,),
@@ -1164,14 +1155,18 @@ def __ge__(self, other):
         return self.binary_operator("ge", other)
 
     def searchsorted(
-        self, value, side="left", ascending=True, na_position="last"
+        self,
+        value,
+        side: builtins.str = "left",
+        ascending: bool = True,
+        na_position: builtins.str = "last",
     ):
         values = as_column(value).as_frame()
         return self.as_frame().searchsorted(
             values, side, ascending=ascending, na_position=na_position
         )
 
-    def unique(self):
+    def unique(self) -> ColumnBase:
         """
         Get unique values in the data
         """
@@ -1181,17 +1176,18 @@ def unique(self):
             ._as_column()
         )
 
-    def serialize(self):
-        header = {}
+    def serialize(self) -> Tuple[dict, list]:
+        header = {}  # type: Dict[Any, Any]
         frames = []
         header["type-serialized"] = pickle.dumps(type(self))
         header["dtype"] = self.dtype.str
 
-        data_header, data_frames = self.data.serialize()
-        header["data"] = data_header
-        frames.extend(data_frames)
+        if self.data is not None:
+            data_header, data_frames = self.data.serialize()
+            header["data"] = data_header
+            frames.extend(data_frames)
 
-        if self.nullable:
+        if self.mask is not None:
             mask_header, mask_frames = self.mask.serialize()
             header["mask"] = mask_header
             frames.extend(mask_frames)
@@ -1200,7 +1196,7 @@ def serialize(self):
         return header, frames
 
     @classmethod
-    def deserialize(cls, header, frames):
+    def deserialize(cls, header: dict, frames: list) -> ColumnBase:
         dtype = header["dtype"]
         data = Buffer.deserialize(header["data"], [frames[0]])
         mask = None
@@ -1208,63 +1204,71 @@ def deserialize(cls, header, frames):
             mask = Buffer.deserialize(header["mask"], [frames[1]])
         return build_column(data=data, dtype=dtype, mask=mask)
 
-    def binary_operator(self, op, other, reflect=False):
+    def binary_operator(
+        self, op: builtins.str, other: BinaryOperand, reflect: bool = False
+    ) -> ColumnBase:
         raise NotImplementedError
 
-    def min(self, skipna=None, dtype=None):
+    def min(self, skipna: bool = None, dtype: Dtype = None):
         result_col = self._process_for_reduction(skipna=skipna)
         if isinstance(result_col, ColumnBase):
             return libcudf.reduce.reduce("min", result_col, dtype=dtype)
         else:
             return result_col
 
-    def max(self, skipna=None, dtype=None):
+    def max(self, skipna: bool = None, dtype: Dtype = None):
         result_col = self._process_for_reduction(skipna=skipna)
         if isinstance(result_col, ColumnBase):
             return libcudf.reduce.reduce("max", result_col, dtype=dtype)
         else:
             return result_col
 
-    def sum(self, skipna=None, dtype=None, min_count=0):
+    def sum(
+        self, skipna: bool = None, dtype: Dtype = None, min_count: int = 0
+    ):
         raise TypeError(f"cannot perform sum with type {self.dtype}")
 
-    def product(self, skipna=None, dtype=None, min_count=0):
+    def product(
+        self, skipna: bool = None, dtype: Dtype = None, min_count: int = 0
+    ):
         raise TypeError(f"cannot perform prod with type {self.dtype}")
 
-    def mean(self, skipna=None, dtype=None):
+    def mean(self, skipna: bool = None, dtype: Dtype = None):
         raise TypeError(f"cannot perform mean with type {self.dtype}")
 
-    def std(self, skipna=None, ddof=1, dtype=np.float64):
+    def std(self, skipna: bool = None, ddof=1, dtype: Dtype = np.float64):
         raise TypeError(f"cannot perform std with type {self.dtype}")
 
-    def var(self, skipna=None, ddof=1, dtype=np.float64):
+    def var(self, skipna: bool = None, ddof=1, dtype: Dtype = np.float64):
         raise TypeError(f"cannot perform var with type {self.dtype}")
 
-    def kurtosis(self, skipna=None):
+    def kurtosis(self, skipna: bool = None):
         raise TypeError(f"cannot perform kurt with type {self.dtype}")
 
-    def skew(self, skipna=None):
+    def skew(self, skipna: bool = None):
         raise TypeError(f"cannot perform skew with type {self.dtype}")
 
-    def cov(self, other):
+    def cov(self, other: ColumnBase):
         raise TypeError(
             f"cannot perform covarience with types {self.dtype}, "
             f"{other.dtype}"
         )
 
-    def corr(self, other):
+    def corr(self, other: ColumnBase):
         raise TypeError(
             f"cannot perform corr with types {self.dtype}, {other.dtype}"
         )
 
-    def nans_to_nulls(self):
+    def nans_to_nulls(self: T) -> T:
         if self.dtype.kind == "f":
             newmask = libcudf.transform.nans_to_nulls(self)
             return self.set_mask(newmask)
         else:
             return self
 
-    def _process_for_reduction(self, skipna=None, min_count=0):
+    def _process_for_reduction(
+        self, skipna: bool = None, min_count: int = 0
+    ) -> Union[ColumnBase, ScalarLike]:
         skipna = True if skipna is None else skipna
 
         if skipna:
@@ -1289,8 +1293,13 @@ def _process_for_reduction(self, skipna=None, min_count=0):
         return result_col
 
     def scatter_to_table(
-        self, row_indices, column_indices, names, nrows=None, ncols=None
-    ):
+        self,
+        row_indices: ColumnBase,
+        column_indices: ColumnBase,
+        names: List[Any],
+        nrows: int = None,
+        ncols: int = None,
+    ) -> "cudf.core.frame.Frame":
         """
         Scatters values from the column into a table.
 
@@ -1335,7 +1344,12 @@ def scatter_to_table(
         )
 
 
-def column_empty_like(column, dtype=None, masked=False, newsize=None):
+def column_empty_like(
+    column: ColumnBase,
+    dtype: Dtype = None,
+    masked: bool = False,
+    newsize: int = None,
+) -> ColumnBase:
     """Allocate a new column like the given *column*
     """
     if dtype is None:
@@ -1347,6 +1361,7 @@ def column_empty_like(column, dtype=None, masked=False, newsize=None):
         and is_categorical_dtype(column.dtype)
         and dtype == column.dtype
     ):
+        column = cast("cudf.core.column.CategoricalColumn", column)
         codes = column_empty_like(column.codes, masked=masked, newsize=newsize)
         return build_column(
             data=None,
@@ -1359,7 +1374,9 @@ def column_empty_like(column, dtype=None, masked=False, newsize=None):
     return column_empty(row_count, dtype, masked)
 
 
-def column_empty_like_same_mask(column, dtype):
+def column_empty_like_same_mask(
+    column: ColumnBase, dtype: Dtype
+) -> ColumnBase:
     """Create a new empty Column with the same length and the same mask.
 
     Parameters
@@ -1373,11 +1390,13 @@ def column_empty_like_same_mask(column, dtype):
     return result
 
 
-def column_empty(row_count, dtype="object", masked=False):
+def column_empty(
+    row_count: int, dtype: Dtype = "object", masked: bool = False
+) -> ColumnBase:
     """Allocate a new column like the given row_count and dtype.
     """
     dtype = pd.api.types.pandas_dtype(dtype)
-    children = ()
+    children = ()  # type: Tuple[ColumnBase, ...]
 
     if is_categorical_dtype(dtype):
         data = None
@@ -1410,8 +1429,15 @@ def column_empty(row_count, dtype="object", masked=False):
 
 
 def build_column(
-    data, dtype, mask=None, size=None, offset=0, null_count=None, children=()
-):
+    data: Union[Buffer, None],
+    dtype: Dtype,
+    *,
+    size: int = None,
+    mask: Buffer = None,
+    offset: int = 0,
+    null_count: int = None,
+    children: Tuple[ColumnBase, ...] = (),
+) -> ColumnBase:
     """
     Build a Column of the appropriate type from the given parameters
 
@@ -1446,6 +1472,7 @@ def build_column(
             children=children,
         )
     elif dtype.type is np.datetime64:
+        assert data is not None
         return cudf.core.column.DatetimeColumn(
             data=data,
             dtype=dtype,
@@ -1455,6 +1482,7 @@ def build_column(
             null_count=null_count,
         )
     elif dtype.type is np.timedelta64:
+        assert data is not None
         return cudf.core.column.TimeDeltaColumn(
             data=data,
             dtype=dtype,
@@ -1483,8 +1511,8 @@ def build_column(
     elif is_struct_dtype(dtype):
         return cudf.core.column.StructColumn(
             data=data,
-            size=size,
             dtype=dtype,
+            size=size,
             mask=mask,
             null_count=null_count,
             children=children,
@@ -1499,6 +1527,7 @@ def build_column(
             children=children,
         )
     else:
+        assert data is not None
         return cudf.core.column.NumericalColumn(
             data=data,
             dtype=dtype,
@@ -1510,14 +1539,14 @@ def build_column(
 
 
 def build_categorical_column(
-    categories,
-    codes,
-    mask=None,
-    size=None,
-    offset=0,
-    null_count=None,
-    ordered=None,
-):
+    categories: ColumnBase,
+    codes: ColumnBase,
+    mask: Buffer = None,
+    size: int = None,
+    offset: int = 0,
+    null_count: int = None,
+    ordered: bool = None,
+) -> "cudf.core.column.CategoricalColumn":
     """
     Build a CategoricalColumn
 
@@ -1541,9 +1570,9 @@ def build_categorical_column(
     if codes.dtype != codes_dtype:
         codes = codes.astype(codes_dtype)
 
-    dtype = CategoricalDtype(categories=as_column(categories), ordered=ordered)
+    dtype = CategoricalDtype(categories=categories, ordered=ordered)
 
-    return build_column(
+    result = build_column(
         data=None,
         dtype=dtype,
         mask=mask,
@@ -1552,9 +1581,15 @@ def build_categorical_column(
         null_count=null_count,
         children=(codes,),
     )
+    return cast("cudf.core.column.CategoricalColumn", result)
 
 
-def as_column(arbitrary, nan_as_null=None, dtype=None, length=None):
+def as_column(
+    arbitrary: Any,
+    nan_as_null: bool = None,
+    dtype: Dtype = None,
+    length: int = None,
+):
     """Create a Column from an arbitrary object
 
     Parameters
@@ -1791,7 +1826,10 @@ def as_column(arbitrary, nan_as_null=None, dtype=None, length=None):
                 mask = data.mask
 
             data = cudf.core.column.timedelta.TimeDeltaColumn(
-                data=buffer, mask=mask, dtype=arbitrary.dtype
+                data=buffer,
+                size=len(arbitrary),
+                mask=mask,
+                dtype=arbitrary.dtype,
             )
         elif arb_dtype.kind in ("O", "U"):
             data = as_column(
@@ -1840,9 +1878,7 @@ def as_column(arbitrary, nan_as_null=None, dtype=None, length=None):
             np.asarray(arbitrary), dtype=dtype, nan_as_null=nan_as_null
         )
     elif isinstance(arbitrary, cudf.Scalar):
-        data = libcudf.column.make_column_from_scalar(
-            arbitrary, length if length else 1
-        )
+        data = ColumnBase.from_scalar(arbitrary, length if length else 1)
     elif isinstance(arbitrary, pd.core.arrays.masked.BaseMaskedArray):
         cudf_dtype = arbitrary._data.dtype
 
@@ -1924,7 +1960,11 @@ def as_column(arbitrary, nan_as_null=None, dtype=None, length=None):
     return data
 
 
-def column_applymap(udf, column, out_dtype):
+def column_applymap(
+    udf: Callable[[ScalarLike], ScalarLike],
+    column: ColumnBase,
+    out_dtype: Dtype,
+) -> ColumnBase:
     """Apply an element-wise function to transform the values in the Column.
 
     Parameters
@@ -1972,7 +2012,7 @@ def kernel_non_masked(values, results):
     return as_column(results)
 
 
-def _data_from_cuda_array_interface_desc(obj):
+def _data_from_cuda_array_interface_desc(obj) -> Buffer:
     desc = obj.__cuda_array_interface__
     ptr = desc["data"][0]
     nelem = desc["shape"][0] if len(desc["shape"]) > 0 else 1
@@ -1982,7 +2022,7 @@ def _data_from_cuda_array_interface_desc(obj):
     return data
 
 
-def _mask_from_cuda_array_interface_desc(obj):
+def _mask_from_cuda_array_interface_desc(obj) -> Union[Buffer, None]:
     desc = obj.__cuda_array_interface__
     mask = desc.get("mask", None)
 
@@ -2005,7 +2045,7 @@ def _mask_from_cuda_array_interface_desc(obj):
     return mask
 
 
-def serialize_columns(columns):
+def serialize_columns(columns) -> Tuple[List[dict], List]:
     """
     Return the headers and frames resulting
     from serializing a list of Column
@@ -2020,7 +2060,7 @@ def serialize_columns(columns):
     frames : list
         list of frames
     """
-    headers = []
+    headers = []  # type List[Dict[Any, Any], ...]
     frames = []
 
     if len(columns) > 0:
@@ -2032,7 +2072,7 @@ def serialize_columns(columns):
     return headers, frames
 
 
-def deserialize_columns(headers, frames):
+def deserialize_columns(headers: List[dict], frames: List) -> List[ColumnBase]:
     """
     Construct a list of Columns from a list of headers
     and frames.
@@ -2050,7 +2090,12 @@ def deserialize_columns(headers, frames):
     return columns
 
 
-def arange(start, stop=None, step=1, dtype=None):
+def arange(
+    start: Union[int, float],
+    stop: Union[int, float] = None,
+    step: Union[int, float] = 1,
+    dtype=None,
+) -> ColumnBase:
     """
     Returns a column with evenly spaced values within a given interval.
 
@@ -2103,7 +2148,7 @@ def arange(start, stop=None, step=1, dtype=None):
     )
 
 
-def full(size, fill_value, dtype=None):
+def full(size: int, fill_value: ScalarLike, dtype: Dtype = None) -> ColumnBase:
     """
     Returns a column of given size and dtype, filled with a given value.
 
@@ -2134,7 +2179,4 @@ def full(size, fill_value, dtype=None):
     4    7
     dtype: int8
     """
-
-    return libcudf.column.make_column_from_scalar(
-        cudf.Scalar(fill_value, dtype), size
-    )
+    return ColumnBase.from_scalar(cudf.Scalar(fill_value, dtype), size)
diff --git a/python/cudf/cudf/core/column/datetime.py b/python/cudf/cudf/core/column/datetime.py
index 4561b1f68f2..8ae16288050 100644
--- a/python/cudf/cudf/core/column/datetime.py
+++ b/python/cudf/cudf/core/column/datetime.py
@@ -1,7 +1,10 @@
 # Copyright (c) 2019-2020, NVIDIA CORPORATION.
+from __future__ import annotations
+
 import datetime as dt
 import re
 from numbers import Number
+from typing import Any, Sequence, Union, cast
 
 import numpy as np
 import pandas as pd
@@ -9,7 +12,9 @@
 
 import cudf
 from cudf import _lib as libcudf
-from cudf.core.column import column, string
+from cudf._typing import DatetimeLikeScalar, Dtype, DtypeObj, ScalarLike
+from cudf.core.buffer import Buffer
+from cudf.core.column import ColumnBase, column, string
 from cudf.utils.dtypes import is_scalar
 from cudf.utils.utils import _fillna_natwise
 
@@ -34,7 +39,13 @@
 
 class DatetimeColumn(column.ColumnBase):
     def __init__(
-        self, data, dtype, mask=None, size=None, offset=0, null_count=None
+        self,
+        data: Buffer,
+        dtype: DtypeObj,
+        mask: Buffer = None,
+        size: int = None,
+        offset: int = 0,
+        null_count: int = None,
     ):
         """
         Parameters
@@ -66,49 +77,51 @@ def __init__(
 
         self._time_unit, _ = np.datetime_data(self.dtype)
 
-    def __contains__(self, item):
+    def __contains__(self, item: ScalarLike) -> bool:
         try:
-            item = np.datetime64(item, self._time_unit)
+            item_as_dt64 = np.datetime64(item, self._time_unit)
         except ValueError:
             # If item cannot be converted to datetime type
             # np.datetime64 raises ValueError, hence `item`
             # cannot exist in `self`.
             return False
-        return item.astype("int64") in self.as_numerical
+        return item_as_dt64.astype("int64") in self.as_numerical
 
     @property
-    def time_unit(self):
+    def time_unit(self) -> str:
         return self._time_unit
 
     @property
-    def year(self):
+    def year(self) -> ColumnBase:
         return self.get_dt_field("year")
 
     @property
-    def month(self):
+    def month(self) -> ColumnBase:
         return self.get_dt_field("month")
 
     @property
-    def day(self):
+    def day(self) -> ColumnBase:
         return self.get_dt_field("day")
 
     @property
-    def hour(self):
+    def hour(self) -> ColumnBase:
         return self.get_dt_field("hour")
 
     @property
-    def minute(self):
+    def minute(self) -> ColumnBase:
         return self.get_dt_field("minute")
 
     @property
-    def second(self):
+    def second(self) -> ColumnBase:
         return self.get_dt_field("second")
 
     @property
-    def weekday(self):
+    def weekday(self) -> ColumnBase:
         return self.get_dt_field("weekday")
 
-    def to_pandas(self, index=None, **kwargs):
+    def to_pandas(
+        self, index: "cudf.Index" = None, nullable: bool = False, **kwargs
+    ) -> "cudf.Series":
         # Workaround until following issue is fixed:
         # https://issues.apache.org/jira/browse/ARROW-9772
 
@@ -122,10 +135,10 @@ def to_pandas(self, index=None, **kwargs):
 
         return pd_series
 
-    def get_dt_field(self, field):
+    def get_dt_field(self, field: str) -> ColumnBase:
         return libcudf.datetime.extract_datetime_component(self, field)
 
-    def normalize_binop_value(self, other):
+    def normalize_binop_value(self, other: DatetimeLikeScalar) -> ScalarLike:
         if isinstance(other, cudf.Scalar):
             return other
 
@@ -162,30 +175,41 @@ def normalize_binop_value(self, other):
             raise TypeError(f"cannot normalize {type(other)}")
 
     @property
-    def as_numerical(self):
-        return column.build_column(
-            data=self.base_data,
-            dtype=np.int64,
-            mask=self.base_mask,
-            offset=self.offset,
-            size=self.size,
+    def as_numerical(self) -> "cudf.core.column.NumericalColumn":
+        return cast(
+            "cudf.core.column.NumericalColumn",
+            column.build_column(
+                data=self.base_data,
+                dtype=np.int64,
+                mask=self.base_mask,
+                offset=self.offset,
+                size=self.size,
+            ),
         )
 
-    def as_datetime_column(self, dtype, **kwargs):
+    def as_datetime_column(self, dtype: Dtype, **kwargs) -> DatetimeColumn:
         dtype = np.dtype(dtype)
         if dtype == self.dtype:
             return self
         return libcudf.unary.cast(self, dtype=dtype)
 
-    def as_timedelta_column(self, dtype, **kwargs):
+    def as_timedelta_column(
+        self, dtype: Dtype, **kwargs
+    ) -> "cudf.core.column.TimeDeltaColumn":
         raise TypeError(
             f"cannot astype a datetimelike from [{self.dtype}] to [{dtype}]"
         )
 
-    def as_numerical_column(self, dtype):
-        return self.as_numerical.astype(dtype)
+    def as_numerical_column(
+        self, dtype: Dtype
+    ) -> "cudf.core.column.NumericalColumn":
+        return cast(
+            "cudf.core.column.NumericalColumn", self.as_numerical.astype(dtype)
+        )
 
-    def as_string_column(self, dtype, format=None):
+    def as_string_column(
+        self, dtype: Dtype, format=None
+    ) -> "cudf.core.column.StringColumn":
         if format is None:
             format = _dtype_to_format_conversion.get(
                 self.dtype.name, "%Y-%m-%d %H:%M:%S"
@@ -195,20 +219,25 @@ def as_string_column(self, dtype, format=None):
                 np.dtype(self.dtype)
             ](self, format)
         else:
-            return column.column_empty(0, dtype="object", masked=False)
+            return cast(
+                "cudf.core.column.StringColumn",
+                column.column_empty(0, dtype="object", masked=False),
+            )
 
-    def default_na_value(self):
+    def default_na_value(self) -> DatetimeLikeScalar:
         """Returns the default NA value for this column
         """
         return np.datetime64("nat", self.time_unit)
 
-    def mean(self, skipna=None, dtype=np.float64):
+    def mean(self, skipna=None, dtype=np.float64) -> ScalarLike:
         return pd.Timestamp(
             self.as_numerical.mean(skipna=skipna, dtype=dtype),
             unit=self.time_unit,
         )
 
-    def quantile(self, q, interpolation, exact):
+    def quantile(
+        self, q: Union[float, Sequence[float]], interpolation: str, exact: bool
+    ) -> ColumnBase:
         result = self.as_numerical.quantile(
             q=q, interpolation=interpolation, exact=exact
         )
@@ -216,18 +245,23 @@ def quantile(self, q, interpolation, exact):
             return pd.Timestamp(result, unit=self.time_unit)
         return result.astype(self.dtype)
 
-    def binary_operator(self, op, rhs, reflect=False):
+    def binary_operator(
+        self,
+        op: str,
+        rhs: Union[ColumnBase, "cudf.Scalar"],
+        reflect: bool = False,
+    ) -> ColumnBase:
         if isinstance(rhs, cudf.DateOffset):
             return binop_offset(self, rhs, op)
         lhs, rhs = self, rhs
         if op in ("eq", "ne", "lt", "gt", "le", "ge"):
             out_dtype = np.bool
         elif op == "add" and pd.api.types.is_timedelta64_dtype(rhs.dtype):
-            out_dtype = cudf.core.column.timedelta._timedelta_binary_op_add(
+            out_dtype = cudf.core.column.timedelta._timedelta_add_result_dtype(
                 rhs, lhs
             )
         elif op == "sub" and pd.api.types.is_timedelta64_dtype(rhs.dtype):
-            out_dtype = cudf.core.column.timedelta._timedelta_binary_op_sub(
+            out_dtype = cudf.core.column.timedelta._timedelta_sub_result_dtype(
                 rhs if reflect else lhs, lhs if reflect else rhs
             )
         elif op == "sub" and pd.api.types.is_datetime64_dtype(rhs.dtype):
@@ -244,13 +278,11 @@ def binary_operator(self, op, rhs, reflect=False):
                 f"Series of dtype {self.dtype} cannot perform "
                 f" the operation {op}"
             )
+        return binop(lhs, rhs, op=op, out_dtype=out_dtype, reflect=reflect)
 
-        if reflect:
-            lhs, rhs = rhs, lhs
-
-        return binop(lhs, rhs, op=op, out_dtype=out_dtype)
-
-    def fillna(self, fill_value=None, method=None):
+    def fillna(
+        self, fill_value: Any = None, method: str = None, dtype: Dtype = None
+    ) -> DatetimeColumn:
         if fill_value is not None:
             if cudf.utils.utils.isnat(fill_value):
                 return _fillna_natwise(self)
@@ -262,7 +294,9 @@ def fillna(self, fill_value=None, method=None):
 
         return super().fillna(fill_value, method)
 
-    def find_first_value(self, value, closest=False):
+    def find_first_value(
+        self, value: ScalarLike, closest: bool = False
+    ) -> int:
         """
         Returns offset of first value that matches
         """
@@ -270,7 +304,7 @@ def find_first_value(self, value, closest=False):
         value = column.as_column(value, dtype=self.dtype).as_numerical[0]
         return self.as_numerical.find_first_value(value, closest=closest)
 
-    def find_last_value(self, value, closest=False):
+    def find_last_value(self, value: ScalarLike, closest: bool = False) -> int:
         """
         Returns offset of last value that matches
         """
@@ -279,10 +313,10 @@ def find_last_value(self, value, closest=False):
         return self.as_numerical.find_last_value(value, closest=closest)
 
     @property
-    def is_unique(self):
+    def is_unique(self) -> bool:
         return self.as_numerical.is_unique
 
-    def can_cast_safely(self, to_dtype):
+    def can_cast_safely(self, to_dtype: Dtype) -> bool:
         if np.issubdtype(to_dtype, np.datetime64):
 
             to_res, _ = np.datetime_data(to_dtype)
@@ -315,7 +349,15 @@ def can_cast_safely(self, to_dtype):
 
 
 @annotate("BINARY_OP", color="orange", domain="cudf_python")
-def binop(lhs, rhs, op, out_dtype):
+def binop(
+    lhs: Union[ColumnBase, ScalarLike],
+    rhs: Union[ColumnBase, ScalarLike],
+    op: str,
+    out_dtype: Dtype,
+    reflect: bool,
+) -> ColumnBase:
+    if reflect:
+        lhs, rhs = rhs, lhs
     out = libcudf.binaryop.binaryop(lhs, rhs, op, out_dtype)
     return out
 
@@ -329,11 +371,10 @@ def binop_offset(lhs, rhs, op):
         return out
 
 
-def infer_format(element, **kwargs):
+def infer_format(element: str, **kwargs) -> str:
     """
     Infers datetime format from a string, also takes cares for `ms` and `ns`
     """
-
     fmt = pd.core.tools.datetimes._guess_datetime_format(element, **kwargs)
 
     if fmt is not None:
@@ -345,8 +386,8 @@ def infer_format(element, **kwargs):
 
     # There is possibility that the element is of following format
     # '00:00:03.333333 2016-01-01'
-    second_part = re.split(r"(\D+)", element_parts[1], maxsplit=1)
-    subsecond_fmt = ".%" + str(len(second_part[0])) + "f"
+    second_parts = re.split(r"(\D+)", element_parts[1], maxsplit=1)
+    subsecond_fmt = ".%" + str(len(second_parts[0])) + "f"
 
     first_part = pd.core.tools.datetimes._guess_datetime_format(
         element_parts[0], **kwargs
@@ -360,16 +401,16 @@ def infer_format(element, **kwargs):
     if first_part is None:
         raise ValueError("Unable to infer the timestamp format from the data")
 
-    if len(second_part) > 1:
+    if len(second_parts) > 1:
         # "Z" indicates Zulu time(widely used in aviation) - Which is
         # UTC timezone that currently cudf only supports. Having any other
         # unsuppported timezone will let the code fail below
         # with a ValueError.
-        second_part.remove("Z")
-        second_part = "".join(second_part[1:])
+        second_parts.remove("Z")
+        second_part = "".join(second_parts[1:])
 
         if len(second_part) > 1:
-            # Only infer if second_part is not an empty string.
+            # Only infer if second_parts is not an empty string.
             second_part = pd.core.tools.datetimes._guess_datetime_format(
                 second_part, **kwargs
             )
diff --git a/python/cudf/cudf/core/column/lists.py b/python/cudf/cudf/core/column/lists.py
index c2aa41a5de1..8641bc88806 100644
--- a/python/cudf/cudf/core/column/lists.py
+++ b/python/cudf/cudf/core/column/lists.py
@@ -173,8 +173,7 @@ def __init__(self, column, parent=None):
             raise AttributeError(
                 "Can only use .list accessor with a 'list' dtype"
             )
-        self._column = column
-        self._parent = parent
+        super().__init__(column=column, parent=parent)
 
     @property
     def leaves(self):
diff --git a/python/cudf/cudf/core/column/methods.py b/python/cudf/cudf/core/column/methods.py
index 8395c9c3da6..eec9c2a7860 100644
--- a/python/cudf/cudf/core/column/methods.py
+++ b/python/cudf/cudf/core/column/methods.py
@@ -1,9 +1,57 @@
 # Copyright (c) 2020, NVIDIA CORPORATION.
 
+from __future__ import annotations
+
+from typing import TYPE_CHECKING, Optional, Union, overload
+
+from typing_extensions import Literal
+
 import cudf
 
+if TYPE_CHECKING:
+    from cudf.core.column import ColumnBase
+
 
 class ColumnMethodsMixin:
+    _column: ColumnBase
+    _parent: Optional[Union["cudf.Series", "cudf.Index"]]
+
+    def __init__(
+        self,
+        column: ColumnBase,
+        parent: Union["cudf.Series", "cudf.Index"] = None,
+    ):
+        self._column = column
+        self._parent = parent
+
+    @overload
+    def _return_or_inplace(
+        self, new_col, inplace: Literal[False], expand=False, retain_index=True
+    ) -> Union["cudf.Series", "cudf.Index"]:
+        ...
+
+    @overload
+    def _return_or_inplace(
+        self, new_col, expand: bool = False, retain_index: bool = True
+    ) -> Union["cudf.Series", "cudf.Index"]:
+        ...
+
+    @overload
+    def _return_or_inplace(
+        self, new_col, inplace: Literal[True], expand=False, retain_index=True
+    ) -> None:
+        ...
+
+    @overload
+    def _return_or_inplace(
+        self,
+        new_col,
+        inplace: bool = False,
+        expand: bool = False,
+        retain_index: bool = True,
+    ) -> Optional[Union["cudf.Series", "cudf.Index"]]:
+        ...
+
     def _return_or_inplace(
         self, new_col, inplace=False, expand=False, retain_index=True
     ):
@@ -19,31 +67,29 @@ def _return_or_inplace(
                     ),
                     inplace=True,
                 )
+                return None
             else:
                 self._column._mimic_inplace(new_col, inplace=True)
+                return None
         else:
+            if self._parent is None:
+                return new_col
             if expand or isinstance(
                 self._parent, (cudf.DataFrame, cudf.MultiIndex)
             ):
                 # This branch indicates the passed as new_col
-                # is actually a table-like data
+                # is a Table
                 table = new_col
 
-                if isinstance(table, cudf._lib.table.Table):
-                    if isinstance(self._parent, cudf.Index):
-                        idx = self._parent._constructor_expanddim._from_table(
-                            table=table
-                        )
-                        idx.names = None
-                        return idx
-                    else:
-                        return self._parent._constructor_expanddim(
-                            data=table._data, index=self._parent.index
-                        )
+                if isinstance(self._parent, cudf.Index):
+                    idx = self._parent._constructor_expanddim._from_table(
+                        table=table
+                    )
+                    idx.names = None
+                    return idx
                 else:
                     return self._parent._constructor_expanddim(
-                        {index: value for index, value in enumerate(table)},
-                        index=self._parent.index,
+                        data=table._data, index=self._parent.index
                     )
             elif isinstance(self._parent, cudf.Series):
                 if retain_index:
@@ -59,7 +105,4 @@ def _return_or_inplace(
                     new_col, name=self._parent.name
                 )
             else:
-                if self._parent is None:
-                    return new_col
-                else:
-                    return self._parent._mimic_inplace(new_col, inplace=False)
+                return self._parent._mimic_inplace(new_col, inplace=False)
diff --git a/python/cudf/cudf/core/column/numerical.py b/python/cudf/cudf/core/column/numerical.py
index f302a4519ed..f77c408f205 100644
--- a/python/cudf/cudf/core/column/numerical.py
+++ b/python/cudf/cudf/core/column/numerical.py
@@ -1,6 +1,8 @@
 # Copyright (c) 2018-2021, NVIDIA CORPORATION.
+from __future__ import annotations
 
 from numbers import Number
+from typing import Any, Callable, Sequence, Union, cast
 
 import numpy as np
 import pandas as pd
@@ -10,8 +12,15 @@
 import cudf
 from cudf import _lib as libcudf
 from cudf._lib.quantiles import quantile as cpp_quantile
+from cudf._typing import BinaryOperand, ColumnLike, Dtype, DtypeObj, ScalarLike
 from cudf.core.buffer import Buffer
-from cudf.core.column import as_column, build_column, column, string
+from cudf.core.column import (
+    ColumnBase,
+    as_column,
+    build_column,
+    column,
+    string,
+)
 from cudf.utils import cudautils, utils
 from cudf.utils.dtypes import (
     min_column_type,
@@ -21,9 +30,15 @@
 )
 
 
-class NumericalColumn(column.ColumnBase):
+class NumericalColumn(ColumnBase):
     def __init__(
-        self, data, dtype, mask=None, size=None, offset=0, null_count=None
+        self,
+        data: Buffer,
+        dtype: DtypeObj,
+        mask: Buffer = None,
+        size: int = None,
+        offset: int = 0,
+        null_count: int = None,
     ):
         """
         Parameters
@@ -39,6 +54,7 @@ def __init__(
         if size is None:
             size = data.size // dtype.itemsize
             size = size - offset
+
         super().__init__(
             data,
             size=size,
@@ -48,7 +64,7 @@ def __init__(
             null_count=null_count,
         )
 
-    def __contains__(self, item):
+    def __contains__(self, item: ScalarLike) -> bool:
         """
         Returns True if column contains item, else False.
         """
@@ -66,10 +82,12 @@ def __contains__(self, item):
             self, column.as_column([item], dtype=self.dtype)
         ).any()
 
-    def unary_operator(self, unaryop):
+    def unary_operator(self, unaryop: str) -> ColumnBase:
         return _numeric_column_unaryop(self, op=unaryop)
 
-    def binary_operator(self, binop, rhs, reflect=False):
+    def binary_operator(
+        self, binop: str, rhs: BinaryOperand, reflect: bool = False,
+    ) -> ColumnBase:
         int_dtypes = [
             np.dtype("int8"),
             np.dtype("int16"),
@@ -80,32 +98,33 @@ def binary_operator(self, binop, rhs, reflect=False):
             np.dtype("uint32"),
             np.dtype("uint64"),
         ]
-        tmp = rhs
-        if reflect:
-            tmp = self
-        if isinstance(rhs, (NumericalColumn, cudf.Scalar)) or np.isscalar(rhs):
+        if rhs is None:
+            out_dtype = self.dtype
+        else:
+            if not (
+                isinstance(rhs, (NumericalColumn, cudf.Scalar,),)
+                or np.isscalar(rhs)
+            ):
+                msg = "{!r} operator not supported between {} and {}"
+                raise TypeError(msg.format(binop, type(self), type(rhs)))
             out_dtype = np.result_type(self.dtype, rhs.dtype)
             if binop in ["mod", "floordiv"]:
+                tmp = self if reflect else rhs
                 if (tmp.dtype in int_dtypes) and (
                     (np.isscalar(tmp) and (0 == tmp))
                     or ((isinstance(tmp, NumericalColumn)) and (0.0 in tmp))
                 ):
                     out_dtype = np.dtype("float64")
-        elif rhs is None:
-            out_dtype = self.dtype
-        else:
-            raise TypeError(
-                f"'{binop}' operator not supported between "
-                f"{type(self).__name__} and {type(rhs).__name__}"
-            )
         return _numeric_column_binop(
             lhs=self, rhs=rhs, op=binop, out_dtype=out_dtype, reflect=reflect
         )
 
-    def _apply_scan_op(self, op):
+    def _apply_scan_op(self, op: str) -> ColumnBase:
         return libcudf.reduce.scan(op, self, True)
 
-    def normalize_binop_value(self, other):
+    def normalize_binop_value(
+        self, other: ScalarLike
+    ) -> Union[ColumnBase, ScalarLike]:
         if other is None:
             return other
         if isinstance(other, cudf.Scalar):
@@ -122,8 +141,8 @@ def normalize_binop_value(self, other):
                 return other
             other_dtype = np.promote_types(self.dtype, other_dtype)
             if other_dtype == np.dtype("float16"):
-                other = np.dtype("float32").type(other)
-                other_dtype = other.dtype
+                other_dtype = np.dtype("float32")
+                other = other_dtype.type(other)
             if self.dtype.kind == "b":
                 other_dtype = min_signed_type(other)
             if np.isscalar(other):
@@ -134,104 +153,110 @@ def normalize_binop_value(self, other):
                     other, size=len(self), dtype=other_dtype
                 )
                 return column.build_column(
-                    data=Buffer.from_array_like(ary),
-                    dtype=ary.dtype,
-                    mask=self.mask,
+                    data=Buffer(ary), dtype=ary.dtype, mask=self.mask,
                 )
         else:
             raise TypeError(f"cannot broadcast {type(other)}")
 
-    def int2ip(self):
+    def int2ip(self) -> "cudf.core.column.StringColumn":
         if self.dtype != np.dtype("int64"):
             raise TypeError("Only int64 type can be converted to ip")
 
         return libcudf.string_casting.int2ip(self)
 
-    def as_string_column(self, dtype, **kwargs):
+    def as_string_column(
+        self, dtype: Dtype, format=None
+    ) -> "cudf.core.column.StringColumn":
         if len(self) > 0:
             return string._numeric_to_str_typecast_functions[
                 np.dtype(self.dtype)
             ](self)
         else:
-            return as_column([], dtype="object")
-
-    def as_datetime_column(self, dtype, **kwargs):
+            return cast(
+                "cudf.core.column.StringColumn", as_column([], dtype="object")
+            )
 
-        return build_column(
-            data=self.astype("int64").base_data,
-            dtype=dtype,
-            mask=self.base_mask,
-            offset=self.offset,
-            size=self.size,
+    def as_datetime_column(
+        self, dtype: Dtype, **kwargs
+    ) -> "cudf.core.column.DatetimeColumn":
+        return cast(
+            "cudf.core.column.DatetimeColumn",
+            build_column(
+                data=self.astype("int64").base_data,
+                dtype=dtype,
+                mask=self.base_mask,
+                offset=self.offset,
+                size=self.size,
+            ),
         )
 
-    def as_timedelta_column(self, dtype, **kwargs):
-
-        return build_column(
-            data=self.astype("int64").base_data,
-            dtype=dtype,
-            mask=self.base_mask,
-            offset=self.offset,
-            size=self.size,
+    def as_timedelta_column(
+        self, dtype: Dtype, **kwargs
+    ) -> "cudf.core.column.TimeDeltaColumn":
+        return cast(
+            "cudf.core.column.TimeDeltaColumn",
+            build_column(
+                data=self.astype("int64").base_data,
+                dtype=dtype,
+                mask=self.base_mask,
+                offset=self.offset,
+                size=self.size,
+            ),
         )
 
-    def as_numerical_column(self, dtype):
+    def as_numerical_column(self, dtype: Dtype) -> NumericalColumn:
         dtype = np.dtype(dtype)
         if dtype == self.dtype:
             return self
         return libcudf.unary.cast(self, dtype)
 
-    def sum(self, skipna=None, dtype=None, min_count=0):
-        result_col = self._process_for_reduction(
+    def reduce(self, op: str, skipna: bool = None, **kwargs) -> float:
+        min_count = kwargs.pop("min_count", 0)
+        preprocessed = self._process_for_reduction(
             skipna=skipna, min_count=min_count
         )
-        if isinstance(result_col, cudf.core.column.ColumnBase):
-            return libcudf.reduce.reduce("sum", result_col, dtype=dtype)
+        if isinstance(preprocessed, ColumnBase):
+            return libcudf.reduce.reduce(op, preprocessed, **kwargs)
         else:
-            return result_col
+            return cast(float, preprocessed)
 
-    def product(self, skipna=None, dtype=None, min_count=0):
-        result_col = self._process_for_reduction(
-            skipna=skipna, min_count=min_count
+    def sum(
+        self, skipna: bool = None, dtype: Dtype = None, min_count: int = 0
+    ) -> float:
+        return self.reduce(
+            "sum", skipna=skipna, dtype=dtype, min_count=min_count
         )
-        if isinstance(result_col, cudf.core.column.ColumnBase):
-            return libcudf.reduce.reduce("product", result_col, dtype=dtype)
-        else:
-            return result_col
 
-    def mean(self, skipna=None, dtype=np.float64):
-        result_col = self._process_for_reduction(skipna=skipna)
-        if isinstance(result_col, cudf.core.column.ColumnBase):
-            return libcudf.reduce.reduce("mean", result_col, dtype=dtype)
-        else:
-            return result_col
+    def product(
+        self, skipna: bool = None, dtype: Dtype = None, min_count: int = 0
+    ) -> float:
+        return self.reduce(
+            "product", skipna=skipna, dtype=dtype, min_count=min_count
+        )
 
-    def var(self, skipna=None, ddof=1, dtype=np.float64):
-        result = self._process_for_reduction(skipna=skipna)
-        if isinstance(result, cudf.core.column.ColumnBase):
-            return libcudf.reduce.reduce("var", result, dtype=dtype, ddof=ddof)
-        else:
-            return result
+    def mean(self, skipna: bool = None, dtype: Dtype = np.float64) -> float:
+        return self.reduce("mean", skipna=skipna, dtype=dtype)
 
-    def std(self, skipna=None, ddof=1, dtype=np.float64):
-        result_col = self._process_for_reduction(skipna=skipna)
-        if isinstance(result_col, cudf.core.column.ColumnBase):
-            return libcudf.reduce.reduce(
-                "std", result_col, dtype=dtype, ddof=ddof
-            )
-        else:
-            return result_col
+    def var(
+        self, skipna: bool = None, ddof: int = 1, dtype: Dtype = np.float64
+    ) -> float:
+        return self.reduce("var", skipna=skipna, dtype=dtype, ddof=ddof)
 
-    def sum_of_squares(self, dtype=None):
+    def std(
+        self, skipna: bool = None, ddof: int = 1, dtype: Dtype = np.float64
+    ) -> float:
+        return self.reduce("std", skipna=skipna, dtype=dtype, ddof=ddof)
+
+    def sum_of_squares(self, dtype: Dtype = None) -> float:
         return libcudf.reduce.reduce("sum_of_squares", self, dtype=dtype)
 
-    def kurtosis(self, skipna=None):
+    def kurtosis(self, skipna: bool = None) -> float:
         skipna = True if skipna is None else skipna
 
         if len(self) == 0 or (not skipna and self.has_nulls):
             return cudf.utils.dtypes._get_nan_for_dtype(self.dtype)
 
-        self = self.nans_to_nulls().dropna()
+        self = self.nans_to_nulls().dropna()  # type: ignore
 
         if len(self) < 4:
             return cudf.utils.dtypes._get_nan_for_dtype(self.dtype)
@@ -250,13 +275,13 @@ def kurtosis(self, skipna=None):
         kurt = term_one_section_one * term_one_section_two - 3 * term_two
         return kurt
 
-    def skew(self, skipna=None):
+    def skew(self, skipna: bool = None) -> ScalarLike:
         skipna = True if skipna is None else skipna
 
         if len(self) == 0 or (not skipna and self.has_nulls):
             return cudf.utils.dtypes._get_nan_for_dtype(self.dtype)
 
-        self = self.nans_to_nulls().dropna()
+        self = self.nans_to_nulls().dropna()  # type: ignore
 
         if len(self) < 3:
             return cudf.utils.dtypes._get_nan_for_dtype(self.dtype)
@@ -273,7 +298,9 @@ def skew(self, skipna=None):
         skew = unbiased_coef * m3 / (m2 ** (3 / 2))
         return skew
 
-    def quantile(self, q, interpolation, exact):
+    def quantile(
+        self, q: Union[float, Sequence[float]], interpolation: str, exact: bool
+    ) -> NumericalColumn:
         if isinstance(q, Number) or cudf.utils.dtypes.is_list_like(q):
             np_array_q = np.asarray(q)
             if np.logical_or(np_array_q < 0, np_array_q > 1).any():
@@ -284,15 +311,14 @@ def quantile(self, q, interpolation, exact):
         # will only have values in range [0, 1]
         result = self._numeric_quantile(q, interpolation, exact)
         if isinstance(q, Number):
-            result = result[0]
             return (
                 cudf.utils.dtypes._get_nan_for_dtype(self.dtype)
-                if result is cudf.NA
-                else result
+                if result[0] is cudf.NA
+                else result[0]
             )
         return result
 
-    def median(self, skipna=None):
+    def median(self, skipna: bool = None) -> NumericalColumn:
         skipna = True if skipna is None else skipna
 
         if not skipna and self.has_nulls:
@@ -301,24 +327,17 @@ def median(self, skipna=None):
         # enforce linear in case the default ever changes
         return self.quantile(0.5, interpolation="linear", exact=True)
 
-    def _numeric_quantile(self, q, interpolation, exact):
-        is_number = isinstance(q, Number)
-
-        if is_number:
-            quant = [float(q)]
-        elif isinstance(q, list) or isinstance(q, np.ndarray):
-            quant = q
-        else:
-            msg = "`q` must be either a single element, list or numpy array"
-            raise TypeError(msg)
-
+    def _numeric_quantile(
+        self, q: Union[float, Sequence[float]], interpolation: str, exact: bool
+    ) -> NumericalColumn:
+        quant = [float(q)] if not isinstance(q, (Sequence, np.ndarray)) else q
         # get sorted indices and exclude nulls
         sorted_indices = self.as_frame()._get_sorted_inds(True, "first")
         sorted_indices = sorted_indices[self.null_count :]
 
         return cpp_quantile(self, quant, interpolation, sorted_indices, exact)
 
-    def cov(self, other):
+    def cov(self, other: ColumnBase) -> float:
         if (
             len(self) == 0
             or len(other) == 0
@@ -330,7 +349,7 @@ def cov(self, other):
         cov_sample = result.sum() / (len(self) - 1)
         return cov_sample
 
-    def corr(self, other):
+    def corr(self, other: ColumnBase) -> float:
         if len(self) == 0 or len(other) == 0:
             return cudf.utils.dtypes._get_nan_for_dtype(self.dtype)
 
@@ -341,12 +360,14 @@ def corr(self, other):
             return cudf.utils.dtypes._get_nan_for_dtype(self.dtype)
         return cov / lhs_std / rhs_std
 
-    def round(self, decimals=0):
+    def round(self, decimals: int = 0) -> NumericalColumn:
         """Round the values in the Column to the given number of decimals.
         """
         return libcudf.round.round(self, decimal_places=decimals)
 
-    def applymap(self, udf, out_dtype=None):
+    def applymap(
+        self, udf: Callable[[ScalarLike], ScalarLike], out_dtype: Dtype = None
+    ) -> ColumnBase:
         """Apply an element-wise function to transform the values in the Column.
 
         Parameters
@@ -367,7 +388,7 @@ def applymap(self, udf, out_dtype=None):
         out = column.column_applymap(udf=udf, column=self, out_dtype=out_dtype)
         return out
 
-    def default_na_value(self):
+    def default_na_value(self) -> ScalarLike:
         """Returns the default NA value for this column
         """
         dkind = self.dtype.kind
@@ -382,7 +403,12 @@ def default_na_value(self):
         else:
             raise TypeError(f"numeric column of {self.dtype} has no NaN value")
 
-    def find_and_replace(self, to_replace, replacement, all_nan):
+    def find_and_replace(
+        self,
+        to_replace: ColumnLike,
+        replacement: ColumnLike,
+        all_nan: bool = False,
+    ) -> NumericalColumn:
         """
         Return col with *to_replace* replaced with *value*.
         """
@@ -409,11 +435,16 @@ def find_and_replace(self, to_replace, replacement, all_nan):
             replaced, to_replace_col, replacement_col
         )
 
-    def fillna(self, fill_value=None, method=None, fill_nan=True):
+    def fillna(
+        self,
+        fill_value: Any = None,
+        method: str = None,
+        dtype: Dtype = None,
+        fill_nan: bool = True,
+    ) -> NumericalColumn:
         """
         Fill null values with *fill_value*
         """
-
         if fill_nan:
             col = self.nans_to_nulls()
         else:
@@ -447,7 +478,9 @@ def fillna(self, fill_value=None, method=None, fill_nan=True):
 
         return super(NumericalColumn, col).fillna(fill_value, method)
 
-    def find_first_value(self, value, closest=False):
+    def find_first_value(
+        self, value: ScalarLike, closest: bool = False
+    ) -> int:
         """
         Returns offset of first value that matches. For monotonic
         columns, returns the offset of the first larger value
@@ -476,7 +509,7 @@ def find_first_value(self, value, closest=False):
             raise ValueError("value not found")
         return found
 
-    def find_last_value(self, value, closest=False):
+    def find_last_value(self, value: ScalarLike, closest: bool = False) -> int:
         """
         Returns offset of last value that matches. For monotonic
         columns, returns the offset of the last smaller value
@@ -505,7 +538,7 @@ def find_last_value(self, value, closest=False):
             raise ValueError("value not found")
         return found
 
-    def can_cast_safely(self, to_dtype):
+    def can_cast_safely(self, to_dtype: DtypeObj) -> bool:
         """
         Returns true if all the values in self can be
         safely cast to dtype
@@ -603,9 +636,17 @@ def can_cast_safely(self, to_dtype):
             else:
                 return False
 
+        return False
+
 
 @annotate("BINARY_OP", color="orange", domain="cudf_python")
-def _numeric_column_binop(lhs, rhs, op, out_dtype, reflect=False):
+def _numeric_column_binop(
+    lhs: Union[ColumnBase, ScalarLike],
+    rhs: Union[ColumnBase, ScalarLike],
+    op: str,
+    out_dtype: Dtype,
+    reflect: bool = False,
+) -> ColumnBase:
     if reflect:
         lhs, rhs = rhs, lhs
 
@@ -622,7 +663,7 @@ def _numeric_column_binop(lhs, rhs, op, out_dtype, reflect=False):
     return out
 
 
-def _numeric_column_unaryop(operand, op):
+def _numeric_column_unaryop(operand: ColumnBase, op: str) -> ColumnBase:
     if callable(op):
         return libcudf.transform.transform(operand, op)
 
@@ -630,7 +671,7 @@ def _numeric_column_unaryop(operand, op):
     return libcudf.unary.unary_operation(operand, op)
 
 
-def _safe_cast_to_int(col, dtype):
+def _safe_cast_to_int(col: ColumnBase, dtype: DtypeObj) -> ColumnBase:
     """
     Cast given NumericalColumn to given integer dtype safely.
     """
@@ -649,7 +690,9 @@ def _safe_cast_to_int(col, dtype):
         )
 
 
-def _normalize_find_and_replace_input(input_column_dtype, col_to_normalize):
+def _normalize_find_and_replace_input(
+    input_column_dtype: DtypeObj, col_to_normalize: Union[ColumnBase, list]
+) -> ColumnBase:
     normalized_column = column.as_column(
         col_to_normalize,
         dtype=input_column_dtype if len(col_to_normalize) <= 0 else None,
@@ -691,7 +734,9 @@ def _normalize_find_and_replace_input(input_column_dtype, col_to_normalize):
     return normalized_column.astype(input_column_dtype)
 
 
-def digitize(column, bins, right=False):
+def digitize(
+    column: ColumnBase, bins: np.ndarray, right: bool = False
+) -> ColumnBase:
     """Return the indices of the bins to which each value in column belongs.
 
     Parameters
@@ -706,7 +751,7 @@ def digitize(column, bins, right=False):
 
     Returns
     -------
-    A device array containing the indices
+    A column containing the indices
     """
     if not column.dtype == bins.dtype:
         raise ValueError(
diff --git a/python/cudf/cudf/core/column/string.py b/python/cudf/cudf/core/column/string.py
index f5df440b865..0124b421266 100644
--- a/python/cudf/cudf/core/column/string.py
+++ b/python/cudf/cudf/core/column/string.py
@@ -1,10 +1,15 @@
 # Copyright (c) 2019-2020, NVIDIA CORPORATION.
+from __future__ import annotations
+
+import builtins
 import pickle
 import warnings
+from typing import Any, Dict, Optional, Sequence, Tuple, Union, cast, overload
 
 import cupy
 import numpy as np
 import pandas as pd
+from numba import cuda
 from nvtx import annotate
 
 import cudf
@@ -140,6 +145,7 @@
     translate as cpp_translate,
 )
 from cudf._lib.strings.wrap import wrap as cpp_wrap
+from cudf._typing import ColumnLike, Dtype, ScalarLike
 from cudf.core.buffer import Buffer
 from cudf.core.column import column, datetime
 from cudf.core.column.methods import ColumnMethodsMixin
@@ -197,6 +203,9 @@
 }
 
 
+ParentType = Union["cudf.Series", "cudf.Index"]
+
+
 class StringMethods(ColumnMethodsMixin):
     def __init__(self, column, parent=None):
         """
@@ -214,10 +223,9 @@ def __init__(self, column, parent=None):
             raise AttributeError(
                 "Can only use .str accessor with string values"
             )
-        self._column = column
-        self._parent = parent
+        super().__init__(column=column, parent=parent)
 
-    def htoi(self):
+    def htoi(self) -> ParentType:
         """
         Returns integer value represented by each hex string.
         String is interpretted to have hex (base-16) characters.
@@ -242,7 +250,7 @@ def htoi(self):
 
         return self._return_or_inplace(out, inplace=False)
 
-    def ip2int(self):
+    def ip2int(self) -> ParentType:
         """
         This converts ip strings to integers
 
@@ -279,7 +287,7 @@ def __getitem__(self, key):
         else:
             return self.get(key)
 
-    def len(self):
+    def len(self) -> ParentType:
         """
         Computes the length of each element in the Series/Index.
 
@@ -301,7 +309,7 @@ def len(self):
 
         return self._return_or_inplace(cpp_count_characters(self._column))
 
-    def byte_count(self):
+    def byte_count(self) -> ParentType:
         """
         Computes the number of bytes of each string in the Series/Index.
 
@@ -328,6 +336,16 @@ def byte_count(self):
         """
         return self._return_or_inplace(cpp_count_bytes(self._column),)
 
+    @overload
+    def cat(self, sep: str = None, na_rep: str = None) -> str:
+        ...
+
+    @overload
+    def cat(
+        self, others, sep: str = None, na_rep: str = None
+    ) -> Union[ParentType, "cudf.core.column.StringColumn"]:
+        ...
+
     def cat(self, others=None, sep=None, na_rep=None):
         """
         Concatenate strings in the Series/Index with given separator.
@@ -339,28 +357,28 @@ def cat(self, others=None, sep=None, na_rep=None):
 
         Parameters
         ----------
-            others : Series or List of str
-                Strings to be appended.
-                The number of strings must match ``size()`` of this instance.
-                This must be either a Series of string dtype or a Python
-                list of strings.
+        others : Series or List of str
+            Strings to be appended.
+            The number of strings must match ``size()`` of this instance.
+            This must be either a Series of string dtype or a Python
+            list of strings.
 
-            sep : str
-                If specified, this separator will be appended to each string
-                before appending the others.
+        sep : str
+            If specified, this separator will be appended to each string
+            before appending the others.
 
-            na_rep : str
-                This character will take the place of any null strings
-                (not empty strings) in either list.
+        na_rep : str
+            This character will take the place of any null strings
+            (not empty strings) in either list.
 
-                -  If ``na_rep`` is ``None``, and ``others`` is ``None``,
-                   missing values in the Series/Index are
-                   omitted from the result.
+            -  If ``na_rep`` is ``None``, and ``others`` is ``None``,
+               missing values in the Series/Index are
+               omitted from the result.
 
-                -  If ``na_rep`` is ``None``, and ``others`` is
-                   not ``None``, a row containing a missing value
-                   in any of the columns (before concatenation)
-                   will have a missing value in the result.
+            -  If ``na_rep`` is ``None``, and ``others`` is
+               not ``None``, a row containing a missing value
+               in any of the columns (before concatenation)
+               will have a missing value in the result.
 
         Returns
         -------
@@ -441,7 +459,7 @@ def cat(self, others=None, sep=None, na_rep=None):
                 out = out[0]
         return out
 
-    def join(self, sep):
+    def join(self, sep) -> ParentType:
         """
         Join lists contained as elements in the Series/Index with passed
         delimiter.
@@ -453,7 +471,9 @@ def join(self, sep):
             "Columns of arrays / lists are not yet " "supported"
         )
 
-    def extract(self, pat, flags=0, expand=True):
+    def extract(
+        self, pat: str, flags: int = 0, expand: bool = True
+    ) -> ParentType:
         """
         Extract capture groups in the regex `pat` as columns in a DataFrame.
 
@@ -517,7 +537,14 @@ def extract(self, pat, flags=0, expand=True):
         else:
             return self._return_or_inplace(out, expand=expand)
 
-    def contains(self, pat, case=True, flags=0, na=np.nan, regex=True):
+    def contains(
+        self,
+        pat: Union[str, Sequence],
+        case: bool = True,
+        flags: int = 0,
+        na=np.nan,
+        regex: bool = True,
+    ) -> ParentType:
         """
         Test if pattern or regex is contained within a string of a Series or
         Index.
@@ -646,7 +673,15 @@ def contains(self, pat, case=True, flags=0, na=np.nan, regex=True):
             )
         return self._return_or_inplace(result_col)
 
-    def replace(self, pat, repl, n=-1, case=None, flags=0, regex=True):
+    def replace(
+        self,
+        pat: Union[str, Sequence],
+        repl: Union[str, Sequence],
+        n: int = -1,
+        case=None,
+        flags: int = 0,
+        regex: bool = True,
+    ) -> ParentType:
         """
         Replace occurrences of pattern/regex in the Series/Index with some
         other string. Equivalent to `str.replace()
@@ -748,7 +783,7 @@ def replace(self, pat, repl, n=-1, case=None, flags=0, regex=True):
             ),
         )
 
-    def replace_with_backrefs(self, pat, repl):
+    def replace_with_backrefs(self, pat: str, repl: str) -> ParentType:
         """
         Use the ``repl`` back-ref template to create a new string
         with the extracted elements found using the ``pat`` expression.
@@ -778,7 +813,9 @@ def replace_with_backrefs(self, pat, repl):
             cpp_replace_with_backrefs(self._column, pat, repl)
         )
 
-    def slice(self, start=None, stop=None, step=None):
+    def slice(
+        self, start: int = None, stop: int = None, step: int = None
+    ) -> ParentType:
         """
         Slice substrings from each element in the Series or Index.
 
@@ -847,7 +884,7 @@ def slice(self, start=None, stop=None, step=None):
             cpp_slice_strings(self._column, start, stop, step),
         )
 
-    def isinteger(self):
+    def isinteger(self) -> ParentType:
         """
         Check whether all characters in each string form integer.
 
@@ -907,7 +944,7 @@ def isinteger(self):
         """
         return self._return_or_inplace(cpp_is_integer(self._column))
 
-    def ishex(self):
+    def ishex(self) -> ParentType:
         """
         Check whether all characters in each string form a hex integer.
 
@@ -946,7 +983,7 @@ def ishex(self):
         """
         return self._return_or_inplace(str_cast.is_hex(self._column))
 
-    def istimestamp(self, format):
+    def istimestamp(self, format: str) -> ParentType:
         """
         Check whether all characters in each string can be converted to
         a timestamp using the given format.
@@ -970,7 +1007,7 @@ def istimestamp(self, format):
             str_cast.istimestamp(self._column, format)
         )
 
-    def isfloat(self):
+    def isfloat(self) -> ParentType:
         """
         Check whether all characters in each string form floating value.
 
@@ -1033,7 +1070,7 @@ def isfloat(self):
         """
         return self._return_or_inplace(cpp_is_float(self._column))
 
-    def isdecimal(self):
+    def isdecimal(self) -> ParentType:
         """
         Check whether all characters in each string are decimal.
 
@@ -1094,7 +1131,7 @@ def isdecimal(self):
         """
         return self._return_or_inplace(cpp_is_decimal(self._column))
 
-    def isalnum(self):
+    def isalnum(self) -> ParentType:
         """
         Check whether all characters in each string are alphanumeric.
 
@@ -1163,7 +1200,7 @@ def isalnum(self):
         """
         return self._return_or_inplace(cpp_is_alnum(self._column))
 
-    def isalpha(self):
+    def isalpha(self) -> ParentType:
         """
         Check whether all characters in each string are alphabetic.
 
@@ -1219,7 +1256,7 @@ def isalpha(self):
         """
         return self._return_or_inplace(cpp_is_alpha(self._column))
 
-    def isdigit(self):
+    def isdigit(self) -> ParentType:
         """
         Check whether all characters in each string are digits.
 
@@ -1281,7 +1318,7 @@ def isdigit(self):
         """
         return self._return_or_inplace(cpp_is_digit(self._column))
 
-    def isnumeric(self):
+    def isnumeric(self) -> ParentType:
         """
         Check whether all characters in each string are numeric.
 
@@ -1349,7 +1386,7 @@ def isnumeric(self):
         """
         return self._return_or_inplace(cpp_is_numeric(self._column))
 
-    def isupper(self):
+    def isupper(self) -> ParentType:
         """
         Check whether all characters in each string are uppercase.
 
@@ -1406,7 +1443,7 @@ def isupper(self):
         """
         return self._return_or_inplace(cpp_is_upper(self._column))
 
-    def islower(self):
+    def islower(self) -> ParentType:
         """
         Check whether all characters in each string are lowercase.
 
@@ -1463,7 +1500,7 @@ def islower(self):
         """
         return self._return_or_inplace(cpp_is_lower(self._column))
 
-    def isipv4(self):
+    def isipv4(self) -> ParentType:
         """
         Check whether all characters in each string form an IPv4 address.
 
@@ -1487,7 +1524,7 @@ def isipv4(self):
         """
         return self._return_or_inplace(str_cast.is_ipv4(self._column))
 
-    def lower(self):
+    def lower(self) -> ParentType:
         """
         Converts all characters to lowercase.
 
@@ -1526,7 +1563,7 @@ def lower(self):
         """
         return self._return_or_inplace(cpp_to_lower(self._column))
 
-    def upper(self):
+    def upper(self) -> ParentType:
         """
         Convert each string to uppercase.
         This only applies to ASCII characters at this time.
@@ -1575,7 +1612,7 @@ def upper(self):
         """
         return self._return_or_inplace(cpp_to_upper(self._column))
 
-    def capitalize(self):
+    def capitalize(self) -> ParentType:
         """
         Convert strings in the Series/Index to be capitalized.
         This only applies to ASCII characters at this time.
@@ -1603,7 +1640,7 @@ def capitalize(self):
         """
         return self._return_or_inplace(cpp_capitalize(self._column))
 
-    def swapcase(self):
+    def swapcase(self) -> ParentType:
         """
         Change each lowercase character to uppercase and vice versa.
         This only applies to ASCII characters at this time.
@@ -1648,7 +1685,7 @@ def swapcase(self):
         """
         return self._return_or_inplace(cpp_swapcase(self._column))
 
-    def title(self):
+    def title(self) -> ParentType:
         """
         Uppercase the first letter of each letter after a space
         and lowercase the rest.
@@ -1693,7 +1730,9 @@ def title(self):
         """
         return self._return_or_inplace(cpp_title(self._column))
 
-    def filter_alphanum(self, repl=None, keep=True):
+    def filter_alphanum(
+        self, repl: str = None, keep: bool = True
+    ) -> ParentType:
         """
         Remove non-alphanumeric characters from strings in this column.
 
@@ -1728,7 +1767,9 @@ def filter_alphanum(self, repl=None, keep=True):
             cpp_filter_alphanum(self._column, cudf.Scalar(repl), keep),
         )
 
-    def slice_from(self, starts, stops):
+    def slice_from(
+        self, starts: "cudf.Series", stops: "cudf.Series"
+    ) -> ParentType:
         """
         Return substring of each string using positions for each string.
 
@@ -1771,7 +1812,9 @@ def slice_from(self, starts, stops):
             ),
         )
 
-    def slice_replace(self, start=None, stop=None, repl=None):
+    def slice_replace(
+        self, start: int = None, stop: int = None, repl: str = None
+    ) -> ParentType:
         """
         Replace the specified section of each string with a new string.
 
@@ -1856,7 +1899,7 @@ def slice_replace(self, start=None, stop=None, repl=None):
             cpp_slice_replace(self._column, start, stop, cudf.Scalar(repl)),
         )
 
-    def insert(self, start=0, repl=None):
+    def insert(self, start: int = 0, repl: str = None) -> ParentType:
         """
         Insert the specified string into each string in the specified
         position.
@@ -1906,7 +1949,7 @@ def insert(self, start=0, repl=None):
             cpp_string_insert(self._column, start, cudf.Scalar(repl)),
         )
 
-    def get(self, i=0):
+    def get(self, i: int = 0) -> ParentType:
         """
         Extract element from each component at specified position.
 
@@ -1950,7 +1993,9 @@ def get(self, i=0):
 
         return self._return_or_inplace(cpp_string_get(self._column, i))
 
-    def split(self, pat=None, n=-1, expand=None):
+    def split(
+        self, pat: str = None, n: int = -1, expand: bool = None
+    ) -> ParentType:
         """
         Split strings around given separator/delimiter.
 
@@ -2079,14 +2124,14 @@ def split(self, pat=None, n=-1, expand=None):
 
         if expand:
             if self._column.null_count == len(self._column):
-                result_table = [self._column.copy()]
+                result_table = cudf.core.frame.Frame({0: self._column.copy()})
             else:
                 result_table = cpp_split(
                     self._column, cudf.Scalar(pat, "str"), n
                 )
                 if len(result_table._data) == 1:
-                    if result_table._data[0].null_count == len(self._parent):
-                        result_table = []
+                    if result_table._data[0].null_count == len(self._column):
+                        result_table = cudf.core.frame.Frame({})
         else:
             result_table = cpp_split_record(
                 self._column, cudf.Scalar(pat, "str"), n
@@ -2094,7 +2139,9 @@ def split(self, pat=None, n=-1, expand=None):
 
         return self._return_or_inplace(result_table, expand=expand)
 
-    def rsplit(self, pat=None, n=-1, expand=None):
+    def rsplit(
+        self, pat: str = None, n: int = -1, expand: bool = None
+    ) -> ParentType:
         """
         Split strings around given separator/delimiter.
 
@@ -2232,18 +2279,18 @@ def rsplit(self, pat=None, n=-1, expand=None):
 
         if expand:
             if self._column.null_count == len(self._column):
-                result_table = [self._column.copy()]
+                result_table = cudf.core.frame.Frame({0: self._column.copy()})
             else:
                 result_table = cpp_rsplit(self._column, cudf.Scalar(pat), n)
                 if len(result_table._data) == 1:
-                    if result_table._data[0].null_count == len(self._parent):
-                        result_table = []
+                    if result_table._data[0].null_count == len(self._column):
+                        result_table = cudf.core.frame.Frame({})
         else:
             result_table = cpp_rsplit_record(self._column, cudf.Scalar(pat), n)
 
         return self._return_or_inplace(result_table, expand=expand)
 
-    def partition(self, sep=" ", expand=True):
+    def partition(self, sep: str = " ", expand: bool = True) -> ParentType:
         """
         Split the string at the first occurrence of sep.
 
@@ -2323,7 +2370,7 @@ def partition(self, sep=" ", expand=True):
             cpp_partition(self._column, cudf.Scalar(sep)), expand=expand
         )
 
-    def rpartition(self, sep=" ", expand=True):
+    def rpartition(self, sep: str = " ", expand: bool = True) -> ParentType:
         """
         Split the string at the last occurrence of sep.
 
@@ -2387,7 +2434,9 @@ def rpartition(self, sep=" ", expand=True):
             cpp_rpartition(self._column, cudf.Scalar(sep)), expand=expand
         )
 
-    def pad(self, width, side="left", fillchar=" "):
+    def pad(
+        self, width: int, side: str = "left", fillchar: str = " "
+    ) -> ParentType:
         """
         Pad strings in the Series/Index up to width.
 
@@ -2472,7 +2521,7 @@ def pad(self, width, side="left", fillchar=" "):
             cpp_pad(self._column, width, fillchar, side)
         )
 
-    def zfill(self, width):
+    def zfill(self, width: int) -> ParentType:
         """
         Pad strings in the Series/Index by prepending ‘0’ characters.
 
@@ -2545,7 +2594,7 @@ def zfill(self, width):
 
         return self._return_or_inplace(cpp_zfill(self._column, width))
 
-    def center(self, width, fillchar=" "):
+    def center(self, width: int, fillchar: str = " ") -> ParentType:
         """
         Filling left and right side of strings in the Series/Index with an
         additional character.
@@ -2617,7 +2666,7 @@ def center(self, width, fillchar=" "):
             cpp_center(self._column, width, fillchar)
         )
 
-    def ljust(self, width, fillchar=" "):
+    def ljust(self, width: int, fillchar: str = " ") -> ParentType:
         """
         Filling right side of strings in the Series/Index with an additional
         character. Equivalent to `str.ljust()
@@ -2671,7 +2720,7 @@ def ljust(self, width, fillchar=" "):
             cpp_ljust(self._column, width, fillchar)
         )
 
-    def rjust(self, width, fillchar=" "):
+    def rjust(self, width: int, fillchar: str = " ") -> ParentType:
         """
         Filling left side of strings in the Series/Index with an additional
         character. Equivalent to `str.rjust()
@@ -2725,7 +2774,7 @@ def rjust(self, width, fillchar=" "):
             cpp_rjust(self._column, width, fillchar)
         )
 
-    def strip(self, to_strip=None):
+    def strip(self, to_strip: str = None) -> ParentType:
         """
         Remove leading and trailing characters.
 
@@ -2784,7 +2833,7 @@ def strip(self, to_strip=None):
             cpp_strip(self._column, cudf.Scalar(to_strip))
         )
 
-    def lstrip(self, to_strip=None):
+    def lstrip(self, to_strip: str = None) -> ParentType:
         """
         Remove leading and trailing characters.
 
@@ -2831,7 +2880,7 @@ def lstrip(self, to_strip=None):
             cpp_lstrip(self._column, cudf.Scalar(to_strip))
         )
 
-    def rstrip(self, to_strip=None):
+    def rstrip(self, to_strip: str = None) -> ParentType:
         """
         Remove leading and trailing characters.
 
@@ -2886,7 +2935,7 @@ def rstrip(self, to_strip=None):
             cpp_rstrip(self._column, cudf.Scalar(to_strip))
         )
 
-    def wrap(self, width, **kwargs):
+    def wrap(self, width: int, **kwargs) -> ParentType:
         """
         Wrap long strings in the Series/Index to be formatted in
         paragraphs with length less than a given width.
@@ -2980,7 +3029,7 @@ def wrap(self, width, **kwargs):
 
         return self._return_or_inplace(cpp_wrap(self._column, width))
 
-    def count(self, pat, flags=0):
+    def count(self, pat: str, flags: int = 0) -> ParentType:
         """
         Count occurrences of pattern in each string of the Series/Index.
 
@@ -3040,7 +3089,9 @@ def count(self, pat, flags=0):
 
         return self._return_or_inplace(cpp_count_re(self._column, pat))
 
-    def findall(self, pat, flags=0, expand=True):
+    def findall(
+        self, pat: str, flags: int = 0, expand: bool = True
+    ) -> ParentType:
         """
         Find all occurrences of pattern or regular expression in the
         Series/Index.
@@ -3108,7 +3159,7 @@ def findall(self, pat, flags=0, expand=True):
             cpp_findall(self._column, pat), expand=expand
         )
 
-    def isempty(self):
+    def isempty(self) -> ParentType:
         """
         Check whether each string is an empty string.
 
@@ -3128,9 +3179,9 @@ def isempty(self):
         4    False
         dtype: bool
         """
-        return self._return_or_inplace((self._parent == "").fillna(False))
+        return self._return_or_inplace((self._column == "").fillna(False))
 
-    def isspace(self):
+    def isspace(self) -> ParentType:
         """
         Check whether all characters in each string are whitespace.
 
@@ -3186,7 +3237,7 @@ def isspace(self):
         """
         return self._return_or_inplace(cpp_isspace(self._column))
 
-    def endswith(self, pat):
+    def endswith(self, pat: str) -> ParentType:
         """
         Test if the end of each string element matches a pattern.
 
@@ -3240,7 +3291,7 @@ def endswith(self, pat):
 
         return self._return_or_inplace(result_col)
 
-    def startswith(self, pat):
+    def startswith(self, pat: Union[str, Sequence]) -> ParentType:
         """
         Test if the start of each string element matches a pattern.
 
@@ -3300,7 +3351,7 @@ def startswith(self, pat):
 
         return self._return_or_inplace(result_col)
 
-    def find(self, sub, start=0, end=None):
+    def find(self, sub: str, start: int = 0, end: int = None) -> ParentType:
         """
         Return lowest indexes in each strings in the Series/Index
         where the substring is fully contained between ``[start:end]``.
@@ -3355,7 +3406,7 @@ def find(self, sub, start=0, end=None):
 
         return self._return_or_inplace(result_col)
 
-    def rfind(self, sub, start=0, end=None):
+    def rfind(self, sub: str, start: int = 0, end: int = None) -> ParentType:
         """
         Return highest indexes in each strings in the Series/Index
         where the substring is fully contained between ``[start:end]``.
@@ -3414,7 +3465,7 @@ def rfind(self, sub, start=0, end=None):
 
         return self._return_or_inplace(result_col)
 
-    def index(self, sub, start=0, end=None):
+    def index(self, sub: str, start: int = 0, end: int = None) -> ParentType:
         """
         Return lowest indexes in each strings where the substring
         is fully contained between ``[start:end]``. This is the same
@@ -3474,7 +3525,7 @@ def index(self, sub, start=0, end=None):
         else:
             return result
 
-    def rindex(self, sub, start=0, end=None):
+    def rindex(self, sub: str, start: int = 0, end: int = None) -> ParentType:
         """
         Return highest indexes in each strings where the substring
         is fully contained between ``[start:end]``. This is the same
@@ -3534,7 +3585,7 @@ def rindex(self, sub, start=0, end=None):
         else:
             return result
 
-    def match(self, pat, case=True, flags=0):
+    def match(self, pat: str, case: bool = True, flags: int = 0) -> ParentType:
         """
         Determine if each string matches a regular expression.
 
@@ -3579,7 +3630,7 @@ def match(self, pat, case=True, flags=0):
 
         return self._return_or_inplace(cpp_match_re(self._column, pat))
 
-    def url_decode(self):
+    def url_decode(self) -> ParentType:
         """
         Returns a URL-decoded format of each string.
         No format checking is performed. All characters
@@ -3609,7 +3660,7 @@ def url_decode(self):
 
         return self._return_or_inplace(cpp_url_decode(self._column))
 
-    def url_encode(self):
+    def url_encode(self) -> ParentType:
         """
         Returns a URL-encoded format of each string.
         No format checking is performed.
@@ -3640,7 +3691,7 @@ def url_encode(self):
         """
         return self._return_or_inplace(cpp_url_encode(self._column))
 
-    def code_points(self):
+    def code_points(self) -> ParentType:
         """
         Returns an array by filling it with the UTF-8 code point
         values for each character of each string.
@@ -3673,14 +3724,14 @@ def code_points(self):
         """
 
         new_col = cpp_code_points(self._column)
-        if self._parent is None:
-            return new_col
-        elif isinstance(self._parent, cudf.Series):
+        if isinstance(self._parent, cudf.Series):
             return cudf.Series(new_col, name=self._parent.name)
         elif isinstance(self._parent, cudf.Index):
             return cudf.core.index.as_index(new_col, name=self._parent.name)
+        else:
+            return new_col
 
-    def translate(self, table):
+    def translate(self, table: dict) -> ParentType:
         """
         Map all characters in the string through the given
         mapping table.
@@ -3723,7 +3774,9 @@ def translate(self, table):
         table = str.maketrans(table)
         return self._return_or_inplace(cpp_translate(self._column, table))
 
-    def filter_characters(self, table, keep=True, repl=None):
+    def filter_characters(
+        self, table: dict, keep: bool = True, repl: str = None
+    ) -> ParentType:
         """
         Remove characters from each string using the character ranges
         in the given mapping table.
@@ -3774,7 +3827,7 @@ def filter_characters(self, table, keep=True, repl=None):
             ),
         )
 
-    def normalize_spaces(self):
+    def normalize_spaces(self) -> ParentType:
         """
         Remove extra whitespace between tokens and trim whitespace
         from the beginning and the end of each string.
@@ -3794,7 +3847,7 @@ def normalize_spaces(self):
         """
         return self._return_or_inplace(cpp_normalize_spaces(self._column))
 
-    def normalize_characters(self, do_lower=True):
+    def normalize_characters(self, do_lower: bool = True) -> ParentType:
         """
         Normalizes strings characters for tokenizing.
 
@@ -3843,7 +3896,7 @@ def normalize_characters(self, do_lower=True):
             cpp_normalize_characters(self._column, do_lower)
         )
 
-    def tokenize(self, delimiter=" "):
+    def tokenize(self, delimiter: str = " ") -> ParentType:
         """
         Each string is split into tokens using the provided delimiter(s).
         The sequence returned contains the tokens in the order
@@ -3890,7 +3943,9 @@ def tokenize(self, delimiter=" "):
                 for delimiters, but got {type(delimiter)}"
             )
 
-    def detokenize(self, indices, separator=" "):
+    def detokenize(
+        self, indices: "cudf.Series", separator: str = " "
+    ) -> ParentType:
         """
         Combines tokens into strings by concatenating them in the order
         in which they appear in the ``indices`` column. The ``separator`` is
@@ -3898,7 +3953,7 @@ def detokenize(self, indices, separator=" "):
 
         Parameters
         ----------
-        indices : list of ints
+        indices : Series
             Each value identifies the output row for the corresponding token.
         separator : str
             The string concatenated between each token in an output row.
@@ -3925,7 +3980,7 @@ def detokenize(self, indices, separator=" "):
             retain_index=False,
         )
 
-    def character_tokenize(self):
+    def character_tokenize(self) -> ParentType:
         """
         Each string is split into individual characters.
         The sequence returned contains each character as an individual string.
@@ -3973,14 +4028,14 @@ def character_tokenize(self):
         dtype: object
         """
         result_col = cpp_character_tokenize(self._column)
-        if self._parent is None:
-            return result_col
-        elif isinstance(self._parent, cudf.Series):
+        if isinstance(self._parent, cudf.Series):
             return cudf.Series(result_col, name=self._parent.name)
         elif isinstance(self._parent, cudf.Index):
             return cudf.core.index.as_index(result_col, name=self._parent.name)
+        else:
+            return result_col
 
-    def token_count(self, delimiter=" "):
+    def token_count(self, delimiter: str = " ") -> ParentType:
         """
         Each string is split into tokens using the provided delimiter.
         The returned integer sequence is the number of tokens in each string.
@@ -4022,7 +4077,7 @@ def token_count(self, delimiter=" "):
                 for delimiters, but got {type(delimiter)}"
             )
 
-    def ngrams(self, n=2, separator="_"):
+    def ngrams(self, n: int = 2, separator: str = "_") -> ParentType:
         """
         Generate the n-grams from a set of tokens, each record
         in series is treated a token.
@@ -4059,7 +4114,7 @@ def ngrams(self, n=2, separator="_"):
             cpp_generate_ngrams(self._column, n, separator), retain_index=False
         )
 
-    def character_ngrams(self, n=2):
+    def character_ngrams(self, n: int = 2) -> ParentType:
         """
         Generate the n-grams from characters in a column of strings.
 
@@ -4095,7 +4150,9 @@ def character_ngrams(self, n=2):
             cpp_generate_character_ngrams(self._column, n), retain_index=False
         )
 
-    def ngrams_tokenize(self, n=2, delimiter=" ", separator="_"):
+    def ngrams_tokenize(
+        self, n: int = 2, delimiter: str = " ", separator: str = "_"
+    ) -> ParentType:
         """
         Generate the n-grams using tokens from each string.
         This will tokenize each string and then generate ngrams for each
@@ -4131,7 +4188,9 @@ def ngrams_tokenize(self, n=2, delimiter=" ", separator="_"):
             retain_index=False,
         )
 
-    def replace_tokens(self, targets, replacements, delimiter=None):
+    def replace_tokens(
+        self, targets, replacements, delimiter: str = None
+    ) -> ParentType:
         """
         The targets tokens are searched for within each string in the series
         and replaced with the corresponding replacements if found.
@@ -4213,8 +4272,11 @@ def replace_tokens(self, targets, replacements, delimiter=None):
         )
 
     def filter_tokens(
-        self, min_token_length, replacement=None, delimiter=None
-    ):
+        self,
+        min_token_length: int,
+        replacement: str = None,
+        delimiter: str = None,
+    ) -> ParentType:
         """
         Remove tokens from within each string in the series that are
         smaller than min_token_length and optionally replace them
@@ -4282,13 +4344,13 @@ def filter_tokens(
 
     def subword_tokenize(
         self,
-        hash_file,
-        max_length=64,
-        stride=48,
-        do_lower=True,
-        do_truncate=False,
-        max_rows_tensor=500,
-    ):
+        hash_file: str,
+        max_length: int = 64,
+        stride: int = 48,
+        do_lower: bool = True,
+        do_truncate: bool = False,
+        max_rows_tensor: int = 500,
+    ) -> Tuple[cupy.ndarray, cupy.ndarray, cupy.ndarray]:
         """
         Run CUDA BERT subword tokenizer on cuDF strings column.
         Encodes words to token ids using vocabulary from a pretrained
@@ -4337,12 +4399,12 @@ def subword_tokenize(
 
         Returns
         -------
-        token-ids : Column
+        token-ids : cupy.ndarray
             The token-ids for each string padded with 0s to max_length.
-        attention-mask : Column
+        attention-mask : cupy.ndarray
             The mask for token-ids result where corresponding positions
             identify valid token-id values.
-        metadata : Column
+        metadata : cupy.ndarray
             Each row contains the index id of the original string and the
             first and last index of the token-ids that are non-padded and
             non-overlapping.
@@ -4383,7 +4445,7 @@ def subword_tokenize(
             cupy.asarray(metadata),
         )
 
-    def porter_stemmer_measure(self):
+    def porter_stemmer_measure(self) -> ParentType:
         """
         Compute the Porter Stemmer measure for each string.
         The Porter Stemmer algorithm is described `here
@@ -4406,7 +4468,7 @@ def porter_stemmer_measure(self):
             cpp_porter_stemmer_measure(self._column)
         )
 
-    def is_consonant(self, position):
+    def is_consonant(self, position) -> ParentType:
         """
         Return true for strings where the character at ``position`` is a
         consonant. The ``position`` parameter may also be a list of integers
@@ -4450,7 +4512,7 @@ def is_consonant(self, position):
             cpp_is_letter(self._column, ltype, position)
         )
 
-    def is_vowel(self, position):
+    def is_vowel(self, position) -> ParentType:
         """
         Return true for strings where the character at ``position`` is a
         vowel -- not a consonant. The ``position`` parameter may also be
@@ -4494,7 +4556,7 @@ def is_vowel(self, position):
             cpp_is_letter(self._column, ltype, position)
         )
 
-    def edit_distance(self, targets):
+    def edit_distance(self, targets) -> ParentType:
         """
         The ``targets`` strings are measured against the strings in this
         instance using the Levenshtein edit distance algorithm.
@@ -4576,8 +4638,17 @@ class StringColumn(column.ColumnBase):
     """Implements operations for Columns of String type
     """
 
+    _start_offset: Optional[int]
+    _end_offset: Optional[int]
+    _cached_sizeof: Optional[int]
+
     def __init__(
-        self, mask=None, size=None, offset=0, null_count=None, children=()
+        self,
+        mask: Buffer = None,
+        size: int = None,
+        offset: int = 0,
+        null_count: int = None,
+        children: Tuple["column.ColumnBase", ...] = (),
     ):
         """
         Parameters
@@ -4627,34 +4698,38 @@ def __init__(
         self._end_offset = None
 
     @property
-    def start_offset(self):
+    def start_offset(self) -> int:
         if self._start_offset is None:
             if (
                 len(self.base_children) == 2
                 and self.offset < self.base_children[0].size
             ):
-                self._start_offset = int(self.base_children[0][self.offset])
+                self._start_offset = int(
+                    self.base_children[0].element_indexing(self.offset)
+                )
             else:
                 self._start_offset = 0
 
         return self._start_offset
 
     @property
-    def end_offset(self):
+    def end_offset(self) -> int:
         if self._end_offset is None:
             if (
                 len(self.base_children) == 2
                 and (self.offset + self.size) < self.base_children[0].size
             ):
                 self._end_offset = int(
-                    self.base_children[0][self.offset + self.size]
+                    self.base_children[0].element_indexing(
+                        self.offset + self.size
+                    )
                 )
             else:
                 self._end_offset = 0
 
         return self._end_offset
 
-    def __sizeof__(self):
+    def __sizeof__(self) -> int:
         if self._cached_sizeof is None:
             n = 0
             if len(self.base_children) == 2:
@@ -4676,7 +4751,7 @@ def __sizeof__(self):
         return self._cached_sizeof
 
     @property
-    def base_size(self):
+    def base_size(self) -> int:
         if len(self.base_children) == 0:
             return 0
         else:
@@ -4685,7 +4760,13 @@ def base_size(self):
                 / self.base_children[0].dtype.itemsize
             )
 
-    def sum(self, skipna=None, dtype=None, min_count=0):
+    @property
+    def data_array_view(self) -> cuda.devicearray.DeviceNDArray:
+        raise ValueError("Cannot get an array view of a StringColumn")
+
+    def sum(
+        self, skipna: bool = None, dtype: Dtype = None, min_count: int = 0
+    ):
         result_col = self._process_for_reduction(
             skipna=skipna, min_count=min_count
         )
@@ -4703,39 +4784,38 @@ def set_base_data(self, value):
         else:
             super().set_base_data(value)
 
-    def set_base_mask(self, value):
+    def set_base_mask(self, value: Optional[Buffer]):
         super().set_base_mask(value)
 
-    def set_base_children(self, value):
+    def set_base_children(self, value: Tuple["column.ColumnBase", ...]):
         # TODO: Implement dtype validation of the children here somehow
         super().set_base_children(value)
 
-    def __contains__(self, item):
+    def __contains__(self, item: ScalarLike) -> bool:
         return True in self.str().contains(f"^{item}$")
 
-    def str(self, parent=None):
+    def str(self, parent: ParentType = None) -> StringMethods:
         return StringMethods(self, parent=parent)
 
-    def unary_operator(self, unaryop):
+    def unary_operator(self, unaryop: builtins.str):
         raise TypeError(
             f"Series of dtype `str` cannot perform the operation: "
             f"{unaryop}"
         )
 
-    def __len__(self):
+    def __len__(self) -> int:
         return self.size
 
-    def _set_mask(self, value):
-        super()._set_mask(value)
-
     @property
-    def _nbytes(self):
+    def _nbytes(self) -> int:
         if self.size == 0:
             return 0
         else:
             return self.children[1].size
 
-    def as_numerical_column(self, dtype):
+    def as_numerical_column(
+        self, dtype: Dtype
+    ) -> "cudf.core.column.NumericalColumn":
         out_dtype = np.dtype(dtype)
 
         if out_dtype.kind in {"i", "u"}:
@@ -4775,42 +4855,49 @@ def _as_datetime_or_timedelta_column(self, dtype, format):
 
         return result_col
 
-    def as_datetime_column(self, dtype, format=None):
+    def as_datetime_column(
+        self, dtype: Dtype, **kwargs
+    ) -> "cudf.core.column.DatetimeColumn":
         out_dtype = np.dtype(dtype)
 
+        # infer on host from the first not na element
+        # or return all null column if all values
+        # are null in current column
+        format = kwargs.get("format", None)
         if format is None:
-            # infer on host from the first not na element
-            # or return all null column if all values
-            # are null in current column
             if self.null_count == len(self):
-                return column.column_empty(
-                    len(self), dtype=out_dtype, masked=True
+                return cast(
+                    "cudf.core.column.DatetimeColumn",
+                    column.column_empty(
+                        len(self), dtype=out_dtype, masked=True
+                    ),
                 )
             else:
-                format = datetime.infer_format(self[self.notna()][0])
+                format = datetime.infer_format(
+                    self.apply_boolean_mask(self.notna()).element_indexing(0)
+                )
 
         return self._as_datetime_or_timedelta_column(out_dtype, format)
 
-    def as_timedelta_column(self, dtype, format=None):
+    def as_timedelta_column(
+        self, dtype: Dtype, **kwargs
+    ) -> "cudf.core.column.TimeDeltaColumn":
         out_dtype = np.dtype(dtype)
-
-        if format is None:
-            format = "%D days %H:%M:%S"
-
+        format = "%D days %H:%M:%S"
         return self._as_datetime_or_timedelta_column(out_dtype, format)
 
-    def as_string_column(self, dtype):
+    def as_string_column(self, dtype: Dtype, format=None) -> StringColumn:
         return self
 
     @property
-    def values_host(self):
+    def values_host(self) -> np.ndarray:
         """
         Return a numpy representation of the StringColumn.
         """
         return self.to_pandas().values
 
     @property
-    def values(self):
+    def values(self) -> cupy.ndarray:
         """
         Return a CuPy representation of the StringColumn.
         """
@@ -4818,7 +4905,7 @@ def values(self):
             "String Arrays is not yet implemented in cudf"
         )
 
-    def to_array(self, fillna=None):
+    def to_array(self, fillna: bool = None) -> np.ndarray:
         """Get a dense numpy array for the data.
 
         Notes
@@ -4851,8 +4938,8 @@ def __arrow_array__(self, type=None):
             "consider using .to_arrow()"
         )
 
-    def serialize(self):
-        header = {"null_count": self.null_count}
+    def serialize(self) -> Tuple[dict, list]:
+        header = {"null_count": self.null_count}  # type: Dict[Any, Any]
         header["type-serialized"] = pickle.dumps(type(self))
         header["size"] = self.size
 
@@ -4872,7 +4959,7 @@ def serialize(self):
         return header, frames
 
     @classmethod
-    def deserialize(cls, header, frames):
+    def deserialize(cls, header: dict, frames: list) -> StringColumn:
         size = header["size"]
         if not isinstance(size, int):
             size = pickle.loads(size)
@@ -4880,26 +4967,28 @@ def deserialize(cls, header, frames):
         # Deserialize the mask, value, and offset frames
         buffers = [Buffer(each_frame) for each_frame in frames]
 
+        nbuf = None
         if header["null_count"] > 0:
             nbuf = buffers[2]
-        else:
-            nbuf = None
 
         children = []
         for h, b in zip(header["subheaders"], buffers[:2]):
             column_type = pickle.loads(h["type-serialized"])
             children.append(column_type.deserialize(h, [b]))
 
-        col = column.build_column(
-            data=None,
-            dtype="str",
-            mask=nbuf,
-            children=tuple(children),
-            size=size,
+        col = cast(
+            StringColumn,
+            column.build_column(
+                data=None,
+                dtype="str",
+                mask=nbuf,
+                children=tuple(children),
+                size=size,
+            ),
         )
         return col
 
-    def can_cast_safely(self, to_dtype):
+    def can_cast_safely(self, to_dtype: Dtype) -> bool:
         to_dtype = np.dtype(to_dtype)
 
         if self.dtype == to_dtype:
@@ -4911,7 +5000,12 @@ def can_cast_safely(self, to_dtype):
         else:
             return True
 
-    def find_and_replace(self, to_replace, replacement, all_nan):
+    def find_and_replace(
+        self,
+        to_replace: ColumnLike,
+        replacement: ColumnLike,
+        all_nan: bool = False,
+    ) -> StringColumn:
         """
         Return col with *to_replace* replaced with *value*
         """
@@ -4919,7 +5013,12 @@ def find_and_replace(self, to_replace, replacement, all_nan):
         replacement = column.as_column(replacement, dtype=self.dtype)
         return libcudf.replace.replace(self, to_replace, replacement)
 
-    def fillna(self, fill_value=None, method=None):
+    def fillna(
+        self,
+        fill_value: Any = None,
+        method: builtins.str = None,
+        dtype: Dtype = None,
+    ) -> StringColumn:
         if fill_value is not None:
             if not is_scalar(fill_value):
                 fill_value = column.as_column(fill_value, dtype=self.dtype)
@@ -4927,24 +5026,26 @@ def fillna(self, fill_value=None, method=None):
         else:
             return super().fillna(method=method)
 
-    def _find_first_and_last(self, value):
+    def _find_first_and_last(self, value: ScalarLike) -> Tuple[int, int]:
         found_indices = self.str().contains(f"^{value}$")
         found_indices = libcudf.unary.cast(found_indices, dtype=np.int32)
         first = column.as_column(found_indices).find_first_value(1)
         last = column.as_column(found_indices).find_last_value(1)
         return first, last
 
-    def find_first_value(self, value, closest=False):
+    def find_first_value(
+        self, value: ScalarLike, closest: bool = False
+    ) -> int:
         return self._find_first_and_last(value)[0]
 
-    def find_last_value(self, value, closest=False):
+    def find_last_value(self, value: ScalarLike, closest: bool = False) -> int:
         return self._find_first_and_last(value)[1]
 
-    def normalize_binop_value(self, other):
+    def normalize_binop_value(self, other) -> "column.ColumnBase":
         # fastpath: gpu scalar
         if isinstance(other, cudf.Scalar) and other.dtype == "object":
             return column.as_column(other, length=len(self))
-        if isinstance(other, column.Column):
+        if isinstance(other, column.ColumnBase):
             return other.astype(self.dtype)
         elif isinstance(other, str) or other is None:
             col = utils.scalar_broadcast_to(
@@ -4959,16 +5060,18 @@ def normalize_binop_value(self, other):
         else:
             raise TypeError(f"cannot broadcast {type(other)}")
 
-    def default_na_value(self):
+    def default_na_value(self) -> ScalarLike:
         return None
 
-    def binary_operator(self, op, rhs, reflect=False):
+    def binary_operator(
+        self, op: builtins.str, rhs, reflect: bool = False
+    ) -> "column.ColumnBase":
         lhs = self
         if reflect:
             lhs, rhs = rhs, lhs
         if isinstance(rhs, (StringColumn, str, cudf.Scalar)):
             if op == "add":
-                return lhs.str().cat(others=rhs)
+                return cast("column.ColumnBase", lhs.str().cat(others=rhs))
             elif op in ("eq", "ne", "gt", "lt", "ge", "le"):
                 return _string_column_binop(self, rhs, op=op, out_dtype="bool")
 
@@ -4977,7 +5080,7 @@ def binary_operator(self, op, rhs, reflect=False):
         )
 
     @property
-    def is_unique(self):
+    def is_unique(self) -> bool:
         return len(self.unique()) == len(self)
 
     @property
@@ -4986,19 +5089,17 @@ def __cuda_array_interface__(self):
             "Strings are not yet supported via `__cuda_array_interface__`"
         )
 
-    def _mimic_inplace(self, other_col, inplace=False):
-        out = super()._mimic_inplace(other_col, inplace=inplace)
-        return out
-
     @copy_docstring(column.ColumnBase.view)
-    def view(self, dtype):
+    def view(self, dtype) -> "cudf.core.column.ColumnBase":
         if self.null_count > 0:
             raise ValueError(
                 "Can not produce a view of a string column with nulls"
             )
         dtype = np.dtype(dtype)
-        str_byte_offset = self.base_children[0][self.offset]
-        str_end_byte_offset = self.base_children[0][self.offset + self.size]
+        str_byte_offset = self.base_children[0].element_indexing(self.offset)
+        str_end_byte_offset = self.base_children[0].element_indexing(
+            self.offset + self.size
+        )
         char_dtype_size = self.base_children[1].dtype.itemsize
 
         n_bytes_to_view = (
@@ -5016,7 +5117,12 @@ def view(self, dtype):
 
 
 @annotate("BINARY_OP", color="orange", domain="cudf_python")
-def _string_column_binop(lhs, rhs, op, out_dtype):
+def _string_column_binop(
+    lhs: "column.ColumnBase",
+    rhs: "column.ColumnBase",
+    op: str,
+    out_dtype: Dtype,
+) -> "column.ColumnBase":
     out = libcudf.binaryop.binaryop(lhs=lhs, rhs=rhs, op=op, dtype=out_dtype)
     return out
 
diff --git a/python/cudf/cudf/core/column/timedelta.py b/python/cudf/cudf/core/column/timedelta.py
index 9036f1e2962..f797bdf9635 100644
--- a/python/cudf/cudf/core/column/timedelta.py
+++ b/python/cudf/cudf/core/column/timedelta.py
@@ -1,6 +1,9 @@
 # Copyright (c) 2020, NVIDIA CORPORATION.
+from __future__ import annotations
+
 import datetime as dt
 from numbers import Number
+from typing import Any, Sequence, Tuple, Union, cast
 
 import numpy as np
 import pandas as pd
@@ -9,6 +12,14 @@
 
 import cudf
 from cudf import _lib as libcudf
+from cudf._typing import (
+    BinaryOperand,
+    DatetimeLikeScalar,
+    Dtype,
+    DtypeObj,
+    ScalarLike,
+)
+from cudf.core.buffer import Buffer
 from cudf.core.column import ColumnBase, column, string
 from cudf.core.column.datetime import _numpy_to_pandas_conversion
 from cudf.utils.dtypes import is_scalar, np_to_pa_dtype
@@ -24,7 +35,13 @@
 
 class TimeDeltaColumn(column.ColumnBase):
     def __init__(
-        self, data, dtype, size, mask=None, offset=0, null_count=None
+        self,
+        data: Buffer,
+        dtype: Dtype,
+        size: int = None,
+        mask: Buffer = None,
+        offset: int = 0,
+        null_count: int = None,
     ):
         """
         Parameters
@@ -46,7 +63,9 @@ def __init__(
         dtype = np.dtype(dtype)
         if data.size % dtype.itemsize:
             raise ValueError("Buffer size must be divisible by element size")
-
+        if size is None:
+            size = data.size // dtype.itemsize
+            size = size - offset
         super().__init__(
             data,
             size=size,
@@ -61,7 +80,7 @@ def __init__(
 
         self._time_unit, _ = np.datetime_data(self.dtype)
 
-    def __contains__(self, item):
+    def __contains__(self, item: DatetimeLikeScalar) -> bool:
         try:
             item = np.timedelta64(item, self._time_unit)
         except ValueError:
@@ -71,7 +90,7 @@ def __contains__(self, item):
             return False
         return item.view("int64") in self.as_numerical
 
-    def to_arrow(self):
+    def to_arrow(self) -> pa.Array:
         mask = None
         if self.nullable:
             mask = pa.py_buffer(self.mask_array_view.copy_to_host())
@@ -84,7 +103,9 @@ def to_arrow(self):
             null_count=self.null_count,
         )
 
-    def to_pandas(self, index=None, **kwargs):
+    def to_pandas(
+        self, index=None, nullable: bool = False, **kwargs
+    ) -> pd.Series:
         # Workaround until following issue is fixed:
         # https://issues.apache.org/jira/browse/ARROW-9772
 
@@ -98,8 +119,10 @@ def to_pandas(self, index=None, **kwargs):
 
         return pd_series
 
-    def _binary_op_floordiv(self, rhs):
-        lhs, rhs = self, rhs
+    def _binary_op_floordiv(
+        self, rhs: BinaryOperand
+    ) -> Tuple["column.ColumnBase", BinaryOperand, DtypeObj]:
+        lhs = self  # type: column.ColumnBase
         if pd.api.types.is_timedelta64_dtype(rhs.dtype):
             common_dtype = determine_out_dtype(self.dtype, rhs.dtype)
             lhs = lhs.astype(common_dtype).astype("float64")
@@ -122,7 +145,7 @@ def _binary_op_floordiv(self, rhs):
 
         return lhs, rhs, out_dtype
 
-    def _binary_op_mul(self, rhs):
+    def _binary_op_mul(self, rhs: BinaryOperand) -> DtypeObj:
         if rhs.dtype.kind in ("f", "i", "u"):
             out_dtype = self.dtype
         else:
@@ -132,7 +155,7 @@ def _binary_op_mul(self, rhs):
             )
         return out_dtype
 
-    def _binary_op_mod(self, rhs):
+    def _binary_op_mod(self, rhs: BinaryOperand) -> DtypeObj:
         if pd.api.types.is_timedelta64_dtype(rhs.dtype):
             out_dtype = determine_out_dtype(self.dtype, rhs.dtype)
         elif rhs.dtype.kind in ("f", "i", "u"):
@@ -144,7 +167,7 @@ def _binary_op_mod(self, rhs):
             )
         return out_dtype
 
-    def _binary_op_eq_ne(self, rhs):
+    def _binary_op_eq_ne(self, rhs: BinaryOperand) -> DtypeObj:
         if pd.api.types.is_timedelta64_dtype(rhs.dtype):
             out_dtype = np.bool
         else:
@@ -154,7 +177,7 @@ def _binary_op_eq_ne(self, rhs):
             )
         return out_dtype
 
-    def _binary_op_lt_gt_le_ge(self, rhs):
+    def _binary_op_lt_gt_le_ge(self, rhs: BinaryOperand) -> DtypeObj:
         if pd.api.types.is_timedelta64_dtype(rhs.dtype):
             return np.bool
         else:
@@ -163,8 +186,10 @@ def _binary_op_lt_gt_le_ge(self, rhs):
                 f" and {rhs.dtype}"
             )
 
-    def _binary_op_truediv(self, rhs):
-        lhs, rhs = self, rhs
+    def _binary_op_truediv(
+        self, rhs: BinaryOperand
+    ) -> Tuple["column.ColumnBase", BinaryOperand, DtypeObj]:
+        lhs = self  # type: column.ColumnBase
         if pd.api.types.is_timedelta64_dtype(rhs.dtype):
             common_dtype = determine_out_dtype(self.dtype, rhs.dtype)
             lhs = lhs.astype(common_dtype).astype("float64")
@@ -187,7 +212,9 @@ def _binary_op_truediv(self, rhs):
 
         return lhs, rhs, out_dtype
 
-    def binary_operator(self, op, rhs, reflect=False):
+    def binary_operator(
+        self, op: str, rhs: BinaryOperand, reflect: bool = False
+    ) -> "column.ColumnBase":
         lhs, rhs = self, rhs
 
         if op in ("eq", "ne"):
@@ -199,14 +226,14 @@ def binary_operator(self, op, rhs, reflect=False):
         elif op == "mod":
             out_dtype = self._binary_op_mod(rhs)
         elif op == "truediv":
-            lhs, rhs, out_dtype = self._binary_op_truediv(rhs)
+            lhs, rhs, out_dtype = self._binary_op_truediv(rhs)  # type: ignore
         elif op == "floordiv":
-            lhs, rhs, out_dtype = self._binary_op_floordiv(rhs)
+            lhs, rhs, out_dtype = self._binary_op_floordiv(rhs)  # type: ignore
             op = "truediv"
         elif op == "add":
-            out_dtype = _timedelta_binary_op_add(lhs, rhs)
+            out_dtype = _timedelta_add_result_dtype(lhs, rhs)
         elif op == "sub":
-            out_dtype = _timedelta_binary_op_sub(lhs, rhs)
+            out_dtype = _timedelta_sub_result_dtype(lhs, rhs)
         else:
             raise TypeError(
                 f"Series of dtype {self.dtype} cannot perform "
@@ -214,10 +241,11 @@ def binary_operator(self, op, rhs, reflect=False):
             )
 
         if reflect:
-            lhs, rhs = rhs, lhs
+            lhs, rhs = rhs, lhs  # type: ignore
+
         return binop(lhs, rhs, op=op, out_dtype=out_dtype)
 
-    def normalize_binop_value(self, other):
+    def normalize_binop_value(self, other) -> BinaryOperand:
         if isinstance(other, cudf.Scalar):
             return other
 
@@ -247,30 +275,34 @@ def normalize_binop_value(self, other):
             raise TypeError(f"cannot normalize {type(other)}")
 
     @property
-    def as_numerical(self):
-
-        return column.build_column(
-            data=self.base_data,
-            dtype=np.int64,
-            mask=self.base_mask,
-            offset=self.offset,
-            size=self.size,
+    def as_numerical(self) -> "cudf.core.column.NumericalColumn":
+        return cast(
+            "cudf.core.column.NumericalColumn",
+            column.build_column(
+                data=self.base_data,
+                dtype=np.int64,
+                mask=self.base_mask,
+                offset=self.offset,
+                size=self.size,
+            ),
         )
 
-    def default_na_value(self):
+    def default_na_value(self) -> ScalarLike:
         """Returns the default NA value for this column
         """
         return np.timedelta64("nat", self.time_unit)
 
     @property
-    def time_unit(self):
+    def time_unit(self) -> str:
         return self._time_unit
 
-    def fillna(self, fill_value=None, method=None):
+    def fillna(
+        self, fill_value: Any = None, method: str = None, dtype: Dtype = None
+    ) -> TimeDeltaColumn:
         if fill_value is not None:
             if cudf.utils.utils.isnat(fill_value):
                 return _fillna_natwise(self)
-            col = self
+            col = self  # type: column.ColumnBase
             if is_scalar(fill_value):
                 if isinstance(fill_value, np.timedelta64):
                     dtype = determine_out_dtype(self.dtype, fill_value.dtype)
@@ -280,51 +312,61 @@ def fillna(self, fill_value=None, method=None):
                     fill_value = cudf.Scalar(fill_value, dtype=dtype)
             else:
                 fill_value = column.as_column(fill_value, nan_as_null=False)
-
-            return ColumnBase.fillna(col, fill_value)
+            return cast(TimeDeltaColumn, ColumnBase.fillna(col, fill_value))
         else:
             return super().fillna(method=method)
 
-    def as_numerical_column(self, dtype):
-        return self.as_numerical.astype(dtype)
+    def as_numerical_column(
+        self, dtype: Dtype
+    ) -> "cudf.core.column.NumericalColumn":
+        return cast(
+            "cudf.core.column.NumericalColumn", self.as_numerical.astype(dtype)
+        )
 
-    def as_datetime_column(self, dtype, **kwargs):
+    def as_datetime_column(
+        self, dtype: Dtype, **kwargs
+    ) -> "cudf.core.column.DatetimeColumn":
         raise TypeError(
             f"cannot astype a timedelta from [{self.dtype}] to [{dtype}]"
         )
 
-    def as_string_column(self, dtype, **kwargs):
-
-        if not kwargs.get("format"):
-            fmt = _dtype_to_format_conversion.get(
+    def as_string_column(
+        self, dtype: Dtype, format=None
+    ) -> "cudf.core.column.StringColumn":
+        if format is None:
+            format = _dtype_to_format_conversion.get(
                 self.dtype.name, "%D days %H:%M:%S"
             )
-            kwargs["format"] = fmt
         if len(self) > 0:
             return string._timedelta_to_str_typecast_functions[
                 np.dtype(self.dtype)
-            ](self, **kwargs)
+            ](self, format=format)
         else:
-            return column.column_empty(0, dtype="object", masked=False)
+            return cast(
+                "cudf.core.column.StringColumn",
+                column.column_empty(0, dtype="object", masked=False),
+            )
 
-    def as_timedelta_column(self, dtype, **kwargs):
+    def as_timedelta_column(self, dtype: Dtype, **kwargs) -> TimeDeltaColumn:
         dtype = np.dtype(dtype)
         if dtype == self.dtype:
             return self
         return libcudf.unary.cast(self, dtype=dtype)
 
-    def mean(self, skipna=None, dtype=np.float64):
+    def mean(self, skipna=None, dtype: Dtype = np.float64) -> pd.Timedelta:
         return pd.Timedelta(
             self.as_numerical.mean(skipna=skipna, dtype=dtype),
             unit=self.time_unit,
         )
 
-    def median(self, skipna=None):
+    def median(self, skipna: bool = None) -> pd.Timedelta:
         return pd.Timedelta(
             self.as_numerical.median(skipna=skipna), unit=self.time_unit
         )
 
-    def quantile(self, q, interpolation, exact):
+    def quantile(
+        self, q: Union[float, Sequence[float]], interpolation: str, exact: bool
+    ) -> "column.ColumnBase":
         result = self.as_numerical.quantile(
             q=q, interpolation=interpolation, exact=exact
         )
@@ -332,7 +374,9 @@ def quantile(self, q, interpolation, exact):
             return pd.Timedelta(result, unit=self.time_unit)
         return result.astype(self.dtype)
 
-    def sum(self, skipna=None, dtype=None, min_count=0):
+    def sum(
+        self, skipna: bool = None, dtype: Dtype = None, min_count=0
+    ) -> pd.Timedelta:
         if len(self) == 0:
             return pd.Timedelta(None, unit=self.time_unit)
         else:
@@ -343,13 +387,15 @@ def sum(self, skipna=None, dtype=None, min_count=0):
                 unit=self.time_unit,
             )
 
-    def std(self, skipna=None, ddof=1, dtype=np.float64):
+    def std(
+        self, skipna: bool = None, ddof: int = 1, dtype: Dtype = np.float64
+    ) -> pd.Timedelta:
         return pd.Timedelta(
             self.as_numerical.std(skipna=skipna, ddof=ddof, dtype=dtype),
             unit=self.time_unit,
         )
 
-    def components(self, index=None):
+    def components(self, index=None) -> "cudf.DataFrame":
         """
         Return a Dataframe of the components of the Timedeltas.
 
@@ -443,7 +489,7 @@ def components(self, index=None):
         )
 
     @property
-    def days(self):
+    def days(self) -> "cudf.core.column.NumericalColumn":
         """
         Number of days for each element.
 
@@ -456,7 +502,7 @@ def days(self):
         )
 
     @property
-    def seconds(self):
+    def seconds(self) -> "cudf.core.column.NumericalColumn":
         """
         Number of seconds (>= 0 and less than 1 day).
 
@@ -479,7 +525,7 @@ def seconds(self):
         )
 
     @property
-    def microseconds(self):
+    def microseconds(self) -> "cudf.core.column.NumericalColumn":
         """
         Number of microseconds (>= 0 and less than 1 second).
 
@@ -499,7 +545,7 @@ def microseconds(self):
         )
 
     @property
-    def nanoseconds(self):
+    def nanoseconds(self) -> "cudf.core.column.NumericalColumn":
         """
         Return the number of nanoseconds (n), where 0 <= n < 1 microsecond.
 
@@ -524,12 +570,17 @@ def nanoseconds(self):
 
 
 @annotate("BINARY_OP", color="orange", domain="cudf_python")
-def binop(lhs, rhs, op, out_dtype):
+def binop(
+    lhs: "column.ColumnBase",
+    rhs: "column.ColumnBase",
+    op: str,
+    out_dtype: DtypeObj,
+) -> "cudf.core.column.ColumnBase":
     out = libcudf.binaryop.binaryop(lhs, rhs, op, out_dtype)
     return out
 
 
-def determine_out_dtype(lhs_dtype, rhs_dtype):
+def determine_out_dtype(lhs_dtype: Dtype, rhs_dtype: Dtype) -> Dtype:
     if np.can_cast(np.dtype(lhs_dtype), np.dtype(rhs_dtype)):
         return rhs_dtype
     elif np.can_cast(np.dtype(rhs_dtype), np.dtype(lhs_dtype)):
@@ -538,7 +589,9 @@ def determine_out_dtype(lhs_dtype, rhs_dtype):
         raise TypeError(f"Cannot type-cast {lhs_dtype} and {rhs_dtype}")
 
 
-def _timedelta_binary_op_add(lhs, rhs):
+def _timedelta_add_result_dtype(
+    lhs: BinaryOperand, rhs: BinaryOperand
+) -> Dtype:
     if pd.api.types.is_timedelta64_dtype(rhs.dtype):
         out_dtype = determine_out_dtype(lhs.dtype, rhs.dtype)
     elif pd.api.types.is_datetime64_dtype(rhs.dtype):
@@ -557,7 +610,9 @@ def _timedelta_binary_op_add(lhs, rhs):
     return out_dtype
 
 
-def _timedelta_binary_op_sub(lhs, rhs):
+def _timedelta_sub_result_dtype(
+    lhs: BinaryOperand, rhs: BinaryOperand
+) -> Dtype:
     if pd.api.types.is_timedelta64_dtype(
         lhs.dtype
     ) and pd.api.types.is_timedelta64_dtype(rhs.dtype):
diff --git a/python/cudf/cudf/core/column_accessor.py b/python/cudf/cudf/core/column_accessor.py
index c750cc92f30..f5823528d02 100644
--- a/python/cudf/cudf/core/column_accessor.py
+++ b/python/cudf/cudf/core/column_accessor.py
@@ -1,5 +1,11 @@
+# Copyright (c) 2021, NVIDIA CORPORATION.
+
+from __future__ import annotations
+
 import itertools
+from collections import OrderedDict
 from collections.abc import MutableMapping
+from typing import TYPE_CHECKING, Any, Tuple, Union
 
 import pandas as pd
 
@@ -11,9 +17,22 @@
     to_nested_dict,
 )
 
+if TYPE_CHECKING:
+    from cudf.core.column import ColumnBase
+
 
 class ColumnAccessor(MutableMapping):
-    def __init__(self, data=None, multiindex=False, level_names=None):
+
+    _data: "OrderedDict[Any, ColumnBase]"
+    multiindex: bool
+    _level_names: Tuple[Any, ...]
+
+    def __init__(
+        self,
+        data: Union[MutableMapping, ColumnAccessor] = None,
+        multiindex: bool = False,
+        level_names=None,
+    ):
         """
         Parameters
         ----------
@@ -33,7 +52,7 @@ def __init__(self, data=None, multiindex=False, level_names=None):
         if isinstance(data, ColumnAccessor):
             multiindex = multiindex or data.multiindex
             level_names = level_names or data.level_names
-            self._data = data
+            self._data = data._data
             self.multiindex = multiindex
             self._level_names = level_names
 
@@ -44,21 +63,21 @@ def __init__(self, data=None, multiindex=False, level_names=None):
     def __iter__(self):
         return self._data.__iter__()
 
-    def __getitem__(self, key):
+    def __getitem__(self, key: Any) -> ColumnBase:
         return self._data[key]
 
-    def __setitem__(self, key, value):
+    def __setitem__(self, key: Any, value: Any):
         self.set_by_label(key, value)
         self._clear_cache()
 
-    def __delitem__(self, key):
+    def __delitem__(self, key: Any):
         self._data.__delitem__(key)
         self._clear_cache()
 
-    def __len__(self):
+    def __len__(self) -> int:
         return len(self._data)
 
-    def __repr__(self):
+    def __repr__(self) -> str:
         data_repr = self._data.__repr__()
         multiindex_repr = self.multiindex.__repr__()
         level_names_repr = self.level_names.__repr__()
@@ -70,14 +89,14 @@ def __repr__(self):
         )
 
     @property
-    def level_names(self):
+    def level_names(self) -> Tuple[Any, ...]:
         if self._level_names is None or len(self._level_names) == 0:
             return tuple((None,) * max(1, self.nlevels))
         else:
             return self._level_names
 
     @property
-    def nlevels(self):
+    def nlevels(self) -> int:
         if len(self._data) == 0:
             return 0
         if not self.multiindex:
@@ -86,28 +105,28 @@ def nlevels(self):
             return len(next(iter(self.keys())))
 
     @property
-    def name(self):
+    def name(self) -> Any:
         if len(self._data) == 0:
             return None
         return self.level_names[-1]
 
     @property
-    def nrows(self):
+    def nrows(self) -> int:
         if len(self._data) == 0:
             return 0
         else:
             return len(next(iter(self.values())))
 
     @cached_property
-    def names(self):
+    def names(self) -> Tuple[Any, ...]:
         return tuple(self.keys())
 
     @cached_property
-    def columns(self):
+    def columns(self) -> Tuple[ColumnBase, ...]:
         return tuple(self.values())
 
     @cached_property
-    def _grouped_data(self):
+    def _grouped_data(self) -> MutableMapping:
         """
         If self.multiindex is True,
         return the underlying mapping as a nested mapping.
@@ -125,7 +144,7 @@ def _clear_cache(self):
             except AttributeError:
                 pass
 
-    def to_pandas_index(self):
+    def to_pandas_index(self) -> pd.Index:
         """"
         Convert the keys of the ColumnAccessor to a Pandas Index object.
         """
@@ -142,7 +161,7 @@ def to_pandas_index(self):
             result = pd.Index(self.names, name=self.name, tupleize_cols=False)
         return result
 
-    def insert(self, name, value, loc=-1):
+    def insert(self, name: Any, value: Any, loc: int = -1):
         """
         Insert column into the ColumnAccessor at the specified location.
 
@@ -176,10 +195,10 @@ def insert(self, name, value, loc=-1):
         else:
             new_keys = self.names[:loc] + (name,) + self.names[loc:]
             new_values = self.columns[:loc] + (value,) + self.columns[loc:]
-            self._data = self._data.__class__(zip(new_keys, new_values),)
+            self._data = self._data.__class__(zip(new_keys, new_values))
         self._clear_cache()
 
-    def copy(self, deep=False):
+    def copy(self, deep=False) -> ColumnAccessor:
         """
         Make a copy of this ColumnAccessor.
         """
@@ -195,7 +214,7 @@ def copy(self, deep=False):
             level_names=self.level_names,
         )
 
-    def select_by_label(self, key):
+    def select_by_label(self, key: Any) -> ColumnAccessor:
         """
         Return a subset of this column accessor,
         composed of the keys specified by `key`.
@@ -218,7 +237,7 @@ def select_by_label(self, key):
                     return self._select_by_label_with_wildcard(key)
             return self._select_by_label_grouped(key)
 
-    def select_by_index(self, index):
+    def select_by_index(self, index: Any) -> ColumnAccessor:
         """
         Return a ColumnAccessor composed of the columns
         specified by index.
@@ -243,7 +262,7 @@ def select_by_index(self, index):
             data, multiindex=self.multiindex, level_names=self.level_names,
         )
 
-    def set_by_label(self, key, value):
+    def set_by_label(self, key: Any, value: Any):
         """
         Add (or modify) column by name.
 
@@ -256,14 +275,14 @@ def set_by_label(self, key, value):
         self._data[key] = value
         self._clear_cache()
 
-    def _select_by_label_list_like(self, key):
+    def _select_by_label_list_like(self, key: Any) -> ColumnAccessor:
         return self.__class__(
             to_flat_dict({k: self._grouped_data[k] for k in key}),
             multiindex=self.multiindex,
             level_names=self.level_names,
         )
 
-    def _select_by_label_grouped(self, key):
+    def _select_by_label_grouped(self, key: Any) -> ColumnAccessor:
         result = self._grouped_data[key]
         if isinstance(result, cudf.core.column.ColumnBase):
             return self.__class__({key: result})
@@ -277,7 +296,7 @@ def _select_by_label_grouped(self, key):
                 level_names=self.level_names[len(key) :],
             )
 
-    def _select_by_label_slice(self, key):
+    def _select_by_label_slice(self, key: slice) -> ColumnAccessor:
         start, stop = key.start, key.stop
         if key.step is not None:
             raise TypeError("Label slicing with step is not supported")
@@ -303,7 +322,7 @@ def _select_by_label_slice(self, key):
             level_names=self.level_names,
         )
 
-    def _select_by_label_with_wildcard(self, key):
+    def _select_by_label_with_wildcard(self, key: Any) -> ColumnAccessor:
         key = self._pad_key(key, slice(None))
         return self.__class__(
             {k: self._data[k] for k in self._data if _compare_keys(k, key)},
@@ -311,7 +330,7 @@ def _select_by_label_with_wildcard(self, key):
             level_names=self.level_names,
         )
 
-    def _pad_key(self, key, pad_value=""):
+    def _pad_key(self, key: Any, pad_value="") -> Any:
         """
         Pad the provided key to a length equal to the number
         of levels.
@@ -323,7 +342,7 @@ def _pad_key(self, key, pad_value=""):
         return key + (pad_value,) * (self.nlevels - len(key))
 
 
-def _compare_keys(target, key):
+def _compare_keys(target: Any, key: Any) -> bool:
     """
     Compare `key` to `target`.
 
diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py
index 6523b08cb27..e5626190098 100644
--- a/python/cudf/cudf/core/dataframe.py
+++ b/python/cudf/cudf/core/dataframe.py
@@ -9,6 +9,7 @@
 import warnings
 from collections import OrderedDict, defaultdict
 from collections.abc import Iterable, Mapping, Sequence
+from typing import Any, Set
 
 import cupy
 import numpy as np
@@ -2364,7 +2365,7 @@ def iteritems(self):
         for k in self:
             yield (k, self[k])
 
-    @property
+    @property  # type: ignore
     @annotate("DATAFRAME_LOC", color="blue", domain="cudf_python")
     def loc(self):
         """
@@ -2535,14 +2536,14 @@ def at(self):
         """
         return self.loc
 
-    @property
+    @property  # type: ignore
     @annotate("DATAFRAME_COLUMNS_GETTER", color="yellow", domain="cudf_python")
     def columns(self):
         """Returns a tuple of columns
         """
         return self._data.to_pandas_index()
 
-    @columns.setter
+    @columns.setter  # type: ignore
     @annotate("DATAFRAME_COLUMNS_SETTER", color="yellow", domain="cudf_python")
     def columns(self, columns):
         if isinstance(columns, (cudf.MultiIndex, cudf.Index)):
@@ -4229,7 +4230,6 @@ def join(
         )
         return df
 
-    @copy_docstring(DataFrameGroupBy.__init__)
     def groupby(
         self,
         by=None,
@@ -4274,7 +4274,6 @@ def groupby(
             sort=sort,
         )
 
-    @copy_docstring(Rolling)
     def rolling(
         self, window, min_periods=None, center=False, axis=0, win_type=None
     ):
@@ -7272,7 +7271,7 @@ def equals(self, other):
                 return False
         return super().equals(other)
 
-    _accessors = set()
+    _accessors = set()  # type: Set[Any]
 
 
 def from_pandas(obj, nan_as_null=None):
diff --git a/python/cudf/cudf/core/dtypes.py b/python/cudf/cudf/core/dtypes.py
index 1f23fcd50f8..b89b3ddb2be 100644
--- a/python/cudf/cudf/core/dtypes.py
+++ b/python/cudf/cudf/core/dtypes.py
@@ -2,6 +2,7 @@
 
 import decimal
 import pickle
+from typing import Any
 
 import numpy as np
 import pandas as pd
@@ -9,10 +10,11 @@
 from pandas.api.extensions import ExtensionDtype
 
 import cudf
+from cudf._typing import Dtype
 
 
 class CategoricalDtype(ExtensionDtype):
-    def __init__(self, categories=None, ordered=None):
+    def __init__(self, categories=None, ordered: bool = None) -> None:
         """
         dtype similar to pd.CategoricalDtype with the categories
         stored on the GPU.
@@ -21,7 +23,7 @@ def __init__(self, categories=None, ordered=None):
         self.ordered = ordered
 
     @property
-    def categories(self):
+    def categories(self) -> "cudf.core.index.Index":
         if self._categories is None:
             return cudf.core.index.as_index(
                 cudf.core.column.column_empty(0, dtype="object", masked=False)
@@ -41,23 +43,23 @@ def str(self):
         return "|O08"
 
     @classmethod
-    def from_pandas(cls, dtype):
+    def from_pandas(cls, dtype: pd.CategoricalDtype) -> "CategoricalDtype":
         return CategoricalDtype(
             categories=dtype.categories, ordered=dtype.ordered
         )
 
-    def to_pandas(self):
+    def to_pandas(self) -> pd.CategoricalDtype:
         if self.categories is None:
             categories = None
         else:
             categories = self.categories.to_pandas()
         return pd.CategoricalDtype(categories=categories, ordered=self.ordered)
 
-    def _init_categories(self, categories):
+    def _init_categories(self, categories: Any):
         if categories is None:
             return categories
         if len(categories) == 0:
-            dtype = "object"
+            dtype = "object"  # type: Any
         else:
             dtype = None
 
@@ -68,7 +70,7 @@ def _init_categories(self, categories):
         else:
             return column
 
-    def __eq__(self, other):
+    def __eq__(self, other: Dtype) -> bool:
         if isinstance(other, str):
             return other == self.name
         elif other is self:
@@ -111,10 +113,10 @@ def deserialize(cls, header, frames):
 
 
 class ListDtype(ExtensionDtype):
+    _typ: pa.ListType
+    name: str = "list"
 
-    name = "list"
-
-    def __init__(self, element_type):
+    def __init__(self, element_type: Any) -> None:
         if isinstance(element_type, ListDtype):
             self._typ = pa.list_(element_type._typ)
         else:
@@ -124,7 +126,7 @@ def __init__(self, element_type):
             self._typ = pa.list_(element_type)
 
     @property
-    def element_type(self):
+    def element_type(self) -> Dtype:
         if isinstance(self._typ.value_type, pa.ListType):
             return ListDtype.from_arrow(self._typ.value_type)
         else:
diff --git a/python/cudf/cudf/core/frame.py b/python/cudf/cudf/core/frame.py
index e60c8c52944..3d12ac2e6cc 100644
--- a/python/cudf/cudf/core/frame.py
+++ b/python/cudf/cudf/core/frame.py
@@ -5,6 +5,7 @@
 import operator
 import warnings
 from collections import OrderedDict, abc as abc
+from typing import overload
 
 import cupy
 import numpy as np
@@ -12,6 +13,7 @@
 import pyarrow as pa
 from nvtx import annotate
 from pandas.api.types import is_dict_like, is_dtype_equal
+from typing_extensions import Literal
 
 import cudf
 from cudf import _lib as libcudf
@@ -39,9 +41,23 @@ class Frame(libcudf.table.Table):
     """
 
     @classmethod
-    def _from_table(cls, table):
+    def _from_table(cls, table: "Frame"):
         return cls(table._data, index=table._index)
 
+    @overload
+    def _mimic_inplace(self, result: "Frame") -> "Frame":
+        ...
+
+    @overload
+    def _mimic_inplace(self, result: "Frame", inplace: Literal[True]):
+        ...
+
+    @overload
+    def _mimic_inplace(
+        self, result: "Frame", inplace: Literal[False]
+    ) -> "Frame":
+        ...
+
     def _mimic_inplace(self, result, inplace=False):
         if inplace:
             for col in self._data:
diff --git a/python/cudf/cudf/core/index.py b/python/cudf/cudf/core/index.py
index 219d355d3cc..e3899a403f1 100644
--- a/python/cudf/cudf/core/index.py
+++ b/python/cudf/cudf/core/index.py
@@ -1,8 +1,9 @@
 # Copyright (c) 2018-2020, NVIDIA CORPORATION.
-from __future__ import division, print_function
+from __future__ import annotations, division, print_function
 
 import pickle
 from numbers import Number
+from typing import Any, Dict, Set, Type
 
 import cupy
 import numpy as np
@@ -132,6 +133,13 @@ def __init__(
         """
         pass
 
+    @cached_property
+    def _values(self) -> ColumnBase:
+        raise NotImplementedError
+
+    def __getitem__(self, key):
+        raise NotImplementedError()
+
     def drop_duplicates(self, keep="first"):
         """
         Return Index with duplicate values removed
@@ -1485,7 +1493,11 @@ def _from_table(cls, table):
         else:
             return as_index(table)
 
-    _accessors = set()
+    _accessors = set()  # type: Set[Any]
+
+    @property
+    def _constructor_expanddim(self):
+        return cudf.MultiIndex
 
 
 class RangeIndex(Index):
@@ -1773,7 +1785,7 @@ def find_label_range(self, first=None, last=None):
 
         return begin, end
 
-    @copy_docstring(_to_frame)
+    @copy_docstring(_to_frame)  # type: ignore
     def to_frame(self, index=True, name=None):
         return _to_frame(self, index, name)
 
@@ -2028,7 +2040,7 @@ def __getitem__(self, index):
         else:
             return res
 
-    @copy_docstring(_to_frame)
+    @copy_docstring(_to_frame)  # type: ignore
     def to_frame(self, index=True, name=None):
         return _to_frame(self, index, name)
 
@@ -2705,15 +2717,11 @@ def __repr__(self):
             + ")"
         )
 
-    @copy_docstring(StringMethods.__init__)
+    @copy_docstring(StringMethods.__init__)  # type: ignore
     @property
     def str(self):
         return StringMethods(column=self._values, parent=self)
 
-    @property
-    def _constructor_expanddim(self):
-        return cudf.MultiIndex
-
     def _clean_nulls_from_index(self):
         """
         Convert all na values(if any) in Index object
@@ -2725,7 +2733,7 @@ def _clean_nulls_from_index(self):
             return self
 
 
-def as_index(arbitrary, **kwargs):
+def as_index(arbitrary, **kwargs) -> Index:
     """Create an Index from an arbitrary object
 
     Currently supported inputs are:
@@ -2794,7 +2802,7 @@ def as_index(arbitrary, **kwargs):
     np.uint64: UInt64Index,
     np.float32: Float32Index,
     np.float64: Float64Index,
-}
+}  # type: Dict[Any, Type[Index]]
 
 _index_to_dtype = {
     Int8Index: np.int8,
diff --git a/python/cudf/cudf/core/scalar.py b/python/cudf/cudf/core/scalar.py
index 3872e296ed5..4ea32c77724 100644
--- a/python/cudf/cudf/core/scalar.py
+++ b/python/cudf/cudf/core/scalar.py
@@ -329,6 +329,9 @@ def _dispatch_scalar_unaop(self, op):
             return np.ceil(self.value)
         return getattr(self.value, op)()
 
+    def astype(self, dtype):
+        return Scalar(self.device_value, dtype)
+
 
 class _NAType(object):
     def __init__(self):
diff --git a/python/cudf/cudf/core/series.py b/python/cudf/cudf/core/series.py
index 76d342eab2c..dfc687eb76d 100644
--- a/python/cudf/cudf/core/series.py
+++ b/python/cudf/cudf/core/series.py
@@ -5,6 +5,7 @@
 from collections import abc as abc
 from numbers import Number
 from shutil import get_terminal_size
+from typing import Any, Set
 from uuid import uuid4
 
 import cupy
@@ -1707,17 +1708,17 @@ def __neg__(self):
         """
         return self.__mul__(-1)
 
-    @copy_docstring(CategoricalAccessor.__init__)
+    @copy_docstring(CategoricalAccessor.__init__)  # type: ignore
     @property
     def cat(self):
         return CategoricalAccessor(column=self._column, parent=self)
 
-    @copy_docstring(StringMethods.__init__)
+    @copy_docstring(StringMethods.__init__)  # type: ignore
     @property
     def str(self):
         return StringMethods(column=self._column, parent=self)
 
-    @copy_docstring(ListMethods.__init__)
+    @copy_docstring(ListMethods.__init__)  # type: ignore
     @property
     def list(self):
         return ListMethods(column=self._column, parent=self)
@@ -4444,7 +4445,7 @@ def keys(self):
         """
         return self.index
 
-    _accessors = set()
+    _accessors = set()  # type: Set[Any]
 
 
 truediv_int_dtype_corrections = {
diff --git a/python/cudf/cudf/io/orc.py b/python/cudf/cudf/io/orc.py
index d6e0fedf8e0..7c8455b6575 100644
--- a/python/cudf/cudf/io/orc.py
+++ b/python/cudf/cudf/io/orc.py
@@ -7,9 +7,11 @@
 from pyarrow import orc as orc
 
 import cudf
-from cudf import _lib as libcudf
+from cudf._lib import orc as liborc
 from cudf.utils import ioutils
-from cudf.utils.metadata import orc_column_statistics_pb2 as cs_pb2
+from cudf.utils.metadata import (  # type: ignore
+    orc_column_statistics_pb2 as cs_pb2,
+)
 
 
 def _make_empty_df(filepath_or_buffer, columns):
@@ -127,7 +129,7 @@ def read_orc_statistics(
         column_names,
         raw_file_statistics,
         raw_stripes_statistics,
-    ) = libcudf.orc.read_raw_orc_statistics(filepath_or_buffer)
+    ) = liborc.read_raw_orc_statistics(filepath_or_buffer)
 
     # Parse column names
     column_names = [
@@ -257,7 +259,7 @@ def read_orc(
 
     if engine == "cudf":
         df = DataFrame._from_table(
-            libcudf.orc.read_orc(
+            liborc.read_orc(
                 filepath_or_buffer,
                 columns,
                 stripes,
@@ -324,9 +326,9 @@ def to_orc(df, fname, compression=None, enable_statistics=True, **kwargs):
     if ioutils.is_fsspec_open_file(path_or_buf):
         with path_or_buf as file_obj:
             file_obj = ioutils.get_IOBase_writer(file_obj)
-            libcudf.orc.write_orc(df, file_obj, compression, enable_statistics)
+            liborc.write_orc(df, file_obj, compression, enable_statistics)
     else:
-        libcudf.orc.write_orc(df, path_or_buf, compression, enable_statistics)
+        liborc.write_orc(df, path_or_buf, compression, enable_statistics)
 
 
-ORCWriter = libcudf.orc.ORCWriter
+ORCWriter = liborc.ORCWriter
diff --git a/python/cudf/cudf/testing/testing.py b/python/cudf/cudf/testing/testing.py
index bf8898825c0..2048e574acc 100644
--- a/python/cudf/cudf/testing/testing.py
+++ b/python/cudf/cudf/testing/testing.py
@@ -1,5 +1,7 @@
 # Copyright (c) 2020, NVIDIA CORPORATION.
 
+from __future__ import annotations
+
 from typing import Union
 
 import numpy as np
diff --git a/python/cudf/cudf/tests/test_column_accessor.py b/python/cudf/cudf/tests/test_column_accessor.py
index 62427cc593e..964e79a57b0 100644
--- a/python/cudf/cudf/tests/test_column_accessor.py
+++ b/python/cudf/cudf/tests/test_column_accessor.py
@@ -1,5 +1,6 @@
 # Copyright (c) 2020, NVIDIA CORPORATION.
 
+
 import pandas as pd
 import pytest
 
diff --git a/python/cudf/cudf/tests/test_orc.py b/python/cudf/cudf/tests/test_orc.py
index d590a3ddb52..85e61acd8e6 100644
--- a/python/cudf/cudf/tests/test_orc.py
+++ b/python/cudf/cudf/tests/test_orc.py
@@ -1,8 +1,7 @@
 # Copyright (c) 2019-2021, NVIDIA CORPORATION.
 
-import os
 import datetime
-import math
+import os
 from io import BytesIO
 
 import numpy as np
@@ -12,9 +11,8 @@
 import pytest
 
 import cudf
-from cudf.tests.utils import assert_eq, supported_numpy_dtypes, gen_rand_series
-
 from cudf.io.orc import ORCWriter
+from cudf.tests.utils import assert_eq, gen_rand_series, supported_numpy_dtypes
 
 
 @pytest.fixture(scope="module")
@@ -565,7 +563,7 @@ def normalized_equals(value1, value2):
 
     # Compare integers with floats now
     if isinstance(value1, float) or isinstance(value2, float):
-        return math.isclose(value1, value2)
+        return np.isclose(value1, value2)
 
     return value1 == value2
 
diff --git a/python/cudf/cudf/tests/test_serialize.py b/python/cudf/cudf/tests/test_serialize.py
index c95f408f309..656b66bf793 100644
--- a/python/cudf/cudf/tests/test_serialize.py
+++ b/python/cudf/cudf/tests/test_serialize.py
@@ -150,7 +150,7 @@ def test_serialize_groupby_df():
     outgb = gb.deserialize(*gb.serialize())
     expect = gb.mean()
     got = outgb.mean()
-    assert_eq(got, expect)
+    assert_eq(got.sort_index(), expect.sort_index())
 
 
 def test_serialize_groupby_external():
@@ -160,7 +160,7 @@ def test_serialize_groupby_external():
     outgb = gb.deserialize(*gb.serialize())
     expect = gb.mean()
     got = outgb.mean()
-    assert_eq(got, expect)
+    assert_eq(got.sort_index(), expect.sort_index())
 
 
 def test_serialize_groupby_level():
@@ -171,7 +171,7 @@ def test_serialize_groupby_level():
     expect = gb.mean()
     outgb = gb.deserialize(*gb.serialize())
     got = outgb.mean()
-    assert_eq(expect, got)
+    assert_eq(expect.sort_index(), got.sort_index())
 
 
 def test_serialize_groupby_sr():
@@ -180,7 +180,7 @@ def test_serialize_groupby_sr():
     outgb = gb.deserialize(*gb.serialize())
     got = gb.mean()
     expect = outgb.mean()
-    assert_eq(got, expect)
+    assert_eq(got.sort_index(), expect.sort_index())
 
 
 def test_serialize_datetime():
diff --git a/python/cudf/cudf/utils/applyutils.py b/python/cudf/cudf/utils/applyutils.py
index cc580bedc08..1e8beb18234 100644
--- a/python/cudf/cudf/utils/applyutils.py
+++ b/python/cudf/cudf/utils/applyutils.py
@@ -1,5 +1,7 @@
 # Copyright (c) 2018, NVIDIA CORPORATION.
+
 import functools
+from typing import Any, Dict
 
 from numba import cuda
 
@@ -332,7 +334,7 @@ def chunk_wise_kernel(nrows, chunks, {args}):
     return kernel
 
 
-_cache = dict()  # WeakKeyDictionary()
+_cache = dict()  # type: Dict[Any, Any]
 
 
 @functools.wraps(_make_row_wise_kernel)
diff --git a/python/cudf/cudf/utils/queryutils.py b/python/cudf/cudf/utils/queryutils.py
index 82a51b3f9b4..c71a6dbccb1 100644
--- a/python/cudf/cudf/utils/queryutils.py
+++ b/python/cudf/cudf/utils/queryutils.py
@@ -2,6 +2,7 @@
 
 import ast
 import datetime as dt
+from typing import Any, Dict
 
 import numpy as np
 import six
@@ -101,7 +102,7 @@ def _check_error(tree):
         raise QuerySyntaxError("too many expressions")
 
 
-_cache = {}
+_cache = {}  # type: Dict[Any, Any]
 
 
 def query_compile(expr):
diff --git a/python/cudf/setup.cfg b/python/cudf/setup.cfg
index 0b2711155d7..3067d2daafd 100644
--- a/python/cudf/setup.cfg
+++ b/python/cudf/setup.cfg
@@ -46,6 +46,21 @@ skip=
     dist
     __init__.py
 
+[mypy]
+ignore_missing_imports = True
+ 
+[mypy-cudf._lib.*]
+ignore_errors = True
+
+[mypy-cudf._version]
+ignore_errors = True
+
+[mypy-cudf.utils.metadata.orc_column_statistics_pb2]
+ignore_errors = True
+
+[mypy-cudf.tests.*]
+ignore_errors = True
+
 [tool:pytest]
 addopts =
           --benchmark-warmup=off
@@ -60,4 +75,3 @@ python_files =
 python_functions =
                    bench_*
                    test_*
-