Skip to content

Commit

Permalink
REF: names no longer needed in _form_blocks (#43114)
Browse files Browse the repository at this point in the history
  • Loading branch information
jbrockmendel authored Aug 20, 2021
1 parent f58ee5d commit f9cf479
Show file tree
Hide file tree
Showing 4 changed files with 141 additions and 43 deletions.
6 changes: 4 additions & 2 deletions pandas/core/internals/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,7 @@
from pandas.core.internals.api import make_block # pseudo-public version
from pandas.core.internals.api import (
create_block_manager_from_arrays,
make_block,
)
from pandas.core.internals.array_manager import (
ArrayManager,
SingleArrayManager,
Expand All @@ -18,7 +21,6 @@
from pandas.core.internals.managers import (
BlockManager,
SingleBlockManager,
create_block_manager_from_arrays,
create_block_manager_from_blocks,
)

Expand Down
125 changes: 124 additions & 1 deletion pandas/core/internals/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,16 @@
"""
from __future__ import annotations

from collections import defaultdict
from typing import DefaultDict

import numpy as np

from pandas._libs.internals import BlockPlacement
from pandas._typing import Dtype
from pandas._typing import (
ArrayLike,
Dtype,
)

from pandas.core.dtypes.common import (
is_datetime64tz_dtype,
Expand All @@ -20,14 +26,24 @@

from pandas.core.arrays import DatetimeArray
from pandas.core.construction import extract_array
from pandas.core.indexes.api import Index
from pandas.core.internals.blocks import (
Block,
CategoricalBlock,
DatetimeTZBlock,
ExtensionBlock,
check_ndim,
ensure_block_shape,
extract_pandas_array,
get_block_type,
maybe_coerce_values,
new_block,
)
from pandas.core.internals.managers import (
BlockManager,
construction_error,
multi_blockify,
simple_blockify,
)


Expand Down Expand Up @@ -86,3 +102,110 @@ def maybe_infer_ndim(values, placement: BlockPlacement, ndim: int | None) -> int
else:
ndim = values.ndim
return ndim


def create_block_manager_from_arrays(
arrays,
names: Index,
axes: list[Index],
consolidate: bool = True,
) -> BlockManager:
# Assertions disabled for performance
# assert isinstance(names, Index)
# assert isinstance(axes, list)
# assert all(isinstance(x, Index) for x in axes)

arrays = [extract_array(x, extract_numpy=True) for x in arrays]

try:
blocks = _form_blocks(arrays, names, axes, consolidate)
mgr = BlockManager(blocks, axes)
except ValueError as e:
raise construction_error(len(arrays), arrays[0].shape, axes, e)
if consolidate:
mgr._consolidate_inplace()
return mgr


def _form_blocks(
arrays: list[ArrayLike], names: Index, axes: list[Index], consolidate: bool
) -> list[Block]:
# put "leftover" items in float bucket, where else?
# generalize?
items_dict: DefaultDict[str, list] = defaultdict(list)
extra_locs = []

names_idx = names
if names_idx.equals(axes[0]):
names_indexer = np.arange(len(names_idx))
else:
# Assertion disabled for performance
# assert names_idx.intersection(axes[0]).is_unique
names_indexer = names_idx.get_indexer_for(axes[0])

for i, name_idx in enumerate(names_indexer):
if name_idx == -1:
extra_locs.append(i)
continue

v = arrays[name_idx]

block_type = get_block_type(v)
items_dict[block_type.__name__].append((i, v))

blocks: list[Block] = []
if len(items_dict["NumericBlock"]):
numeric_blocks = multi_blockify(
items_dict["NumericBlock"], consolidate=consolidate
)
blocks.extend(numeric_blocks)

if len(items_dict["DatetimeLikeBlock"]):
dtlike_blocks = multi_blockify(
items_dict["DatetimeLikeBlock"], consolidate=consolidate
)
blocks.extend(dtlike_blocks)

if len(items_dict["DatetimeTZBlock"]):
dttz_blocks = [
DatetimeTZBlock(
ensure_block_shape(extract_array(array), 2),
placement=BlockPlacement(i),
ndim=2,
)
for i, array in items_dict["DatetimeTZBlock"]
]
blocks.extend(dttz_blocks)

if len(items_dict["ObjectBlock"]) > 0:
object_blocks = simple_blockify(
items_dict["ObjectBlock"], np.object_, consolidate=consolidate
)
blocks.extend(object_blocks)

if len(items_dict["CategoricalBlock"]) > 0:
cat_blocks = [
CategoricalBlock(array, placement=BlockPlacement(i), ndim=2)
for i, array in items_dict["CategoricalBlock"]
]
blocks.extend(cat_blocks)

if len(items_dict["ExtensionBlock"]):
external_blocks = [
ExtensionBlock(array, placement=BlockPlacement(i), ndim=2)
for i, array in items_dict["ExtensionBlock"]
]

blocks.extend(external_blocks)

if len(extra_locs):
shape = (len(extra_locs),) + tuple(len(x) for x in axes[1:])

# empty items -> dtype object
block_values = np.empty(shape, dtype=object)
block_values.fill(np.nan)

na_block = new_block(block_values, placement=extra_locs, ndim=2)
blocks.append(na_block)

return blocks
6 changes: 3 additions & 3 deletions pandas/core/internals/construction.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,8 +85,8 @@
from pandas.core.internals.managers import (
BlockManager,
SingleBlockManager,
create_block_manager_from_arrays,
create_block_manager_from_blocks,
create_block_manager_from_column_arrays,
)

if TYPE_CHECKING:
Expand Down Expand Up @@ -131,8 +131,8 @@ def arrays_to_mgr(
axes = [columns, index]

if typ == "block":
return create_block_manager_from_arrays(
arrays, columns, axes, consolidate=consolidate
return create_block_manager_from_column_arrays(
arrays, axes, consolidate=consolidate
)
elif typ == "array":
if len(columns) != len(arrays):
Expand Down
47 changes: 10 additions & 37 deletions pandas/core/internals/managers.py
Original file line number Diff line number Diff line change
Expand Up @@ -1808,21 +1808,19 @@ def create_block_manager_from_blocks(
return mgr


def create_block_manager_from_arrays(
def create_block_manager_from_column_arrays(
arrays,
names: Index,
axes: list[Index],
consolidate: bool = True,
) -> BlockManager:
# Assertions disabled for performance
# assert isinstance(names, Index)
# assert isinstance(axes, list)
# assert all(isinstance(x, Index) for x in axes)

arrays = [extract_array(x, extract_numpy=True) for x in arrays]

try:
blocks = _form_blocks(arrays, names, axes, consolidate)
blocks = _form_blocks(arrays, consolidate)
mgr = BlockManager(blocks, axes)
except ValueError as e:
raise construction_error(len(arrays), arrays[0].shape, axes, e)
Expand Down Expand Up @@ -1860,26 +1858,11 @@ def construction_error(
# -----------------------------------------------------------------------


def _form_blocks(
arrays: list[ArrayLike], names: Index, axes: list[Index], consolidate: bool
) -> list[Block]:
# put "leftover" items in float bucket, where else?
# generalize?
items_dict: DefaultDict[str, list] = defaultdict(list)
extra_locs = []
def _form_blocks(arrays: list[ArrayLike], consolidate: bool) -> list[Block]:

names_idx = names
if names_idx.equals(axes[0]):
names_indexer = np.arange(len(names_idx))
else:
# Assertion disabled for performance
# assert names_idx.intersection(axes[0]).is_unique
names_indexer = names_idx.get_indexer_for(axes[0])
items_dict: DefaultDict[str, list] = defaultdict(list)

for i, name_idx in enumerate(names_indexer):
if name_idx == -1:
extra_locs.append(i)
continue
for i, name_idx in enumerate(range(len(arrays))):

v = arrays[name_idx]

Expand All @@ -1888,13 +1871,13 @@ def _form_blocks(

blocks: list[Block] = []
if len(items_dict["NumericBlock"]):
numeric_blocks = _multi_blockify(
numeric_blocks = multi_blockify(
items_dict["NumericBlock"], consolidate=consolidate
)
blocks.extend(numeric_blocks)

if len(items_dict["DatetimeLikeBlock"]):
dtlike_blocks = _multi_blockify(
dtlike_blocks = multi_blockify(
items_dict["DatetimeLikeBlock"], consolidate=consolidate
)
blocks.extend(dtlike_blocks)
Expand All @@ -1911,7 +1894,7 @@ def _form_blocks(
blocks.extend(dttz_blocks)

if len(items_dict["ObjectBlock"]) > 0:
object_blocks = _simple_blockify(
object_blocks = simple_blockify(
items_dict["ObjectBlock"], np.object_, consolidate=consolidate
)
blocks.extend(object_blocks)
Expand All @@ -1931,20 +1914,10 @@ def _form_blocks(

blocks.extend(external_blocks)

if len(extra_locs):
shape = (len(extra_locs),) + tuple(len(x) for x in axes[1:])

# empty items -> dtype object
block_values = np.empty(shape, dtype=object)
block_values.fill(np.nan)

na_block = new_block(block_values, placement=extra_locs, ndim=2)
blocks.append(na_block)

return blocks


def _simple_blockify(tuples, dtype, consolidate: bool) -> list[Block]:
def simple_blockify(tuples, dtype, consolidate: bool) -> list[Block]:
"""
return a single array of a block that has a single dtype; if dtype is
not None, coerce to this dtype
Expand All @@ -1962,7 +1935,7 @@ def _simple_blockify(tuples, dtype, consolidate: bool) -> list[Block]:
return [block]


def _multi_blockify(tuples, dtype: DtypeObj | None = None, consolidate: bool = True):
def multi_blockify(tuples, dtype: DtypeObj | None = None, consolidate: bool = True):
"""return an array of blocks that potentially have different dtypes"""

if not consolidate:
Expand Down

0 comments on commit f9cf479

Please sign in to comment.