Skip to content

Commit

Permalink
c
Browse files Browse the repository at this point in the history
  • Loading branch information
nameexhaustion committed Jun 20, 2024
1 parent c11bbd1 commit f8205a5
Showing 1 changed file with 186 additions and 188 deletions.
374 changes: 186 additions & 188 deletions py-polars/polars/functions/col.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,212 +67,210 @@ def _create_col(
raise TypeError(msg)


"""
Create Polars column expressions.
Notes
-----
An instance of this class is exported under the name `col`. It can be used as
though it were a function by calling, for example, `pl.col("foo")`.
See the :func:`__call__` method for further documentation.
This helper class enables an alternative syntax for creating a column expression
through attribute lookup. For example `col.foo` creates an expression equal to
`col("foo")`.
See the :func:`__getattr__` method for further documentation.
The function call syntax is considered the idiomatic way of constructing a column
expression. The alternative attribute syntax can be useful for quick prototyping as
it can save some keystrokes, but has drawbacks in both expressiveness and
readability.
Examples
--------
>>> from polars import col
>>> df = pl.DataFrame(
... {
... "foo": [1, 2],
... "bar": [3, 4],
... }
... )
Create a new column expression using the standard syntax:
>>> df.with_columns(baz=(col("foo") * col("bar")) / 2)
shape: (2, 3)
┌─────┬─────┬─────┐
│ foo ┆ bar ┆ baz │
│ --- ┆ --- ┆ --- │
│ i64 ┆ i64 ┆ f64 │
╞═════╪═════╪═════╡
│ 1 ┆ 3 ┆ 1.5 │
│ 2 ┆ 4 ┆ 4.0 │
└─────┴─────┴─────┘
Use attribute lookup to create a new column expression:
>>> df.with_columns(baz=(col.foo + col.bar))
shape: (2, 3)
┌─────┬─────┬─────┐
│ foo ┆ bar ┆ baz │
│ --- ┆ --- ┆ --- │
│ i64 ┆ i64 ┆ i64 │
╞═════╪═════╪═════╡
│ 1 ┆ 3 ┆ 4 │
│ 2 ┆ 4 ┆ 6 │
└─────┴─────┴─────┘
"""


class Col:
"""
Create one or more column expressions representing column(s) in a DataFrame.
Parameters
----------
name
The name or datatype of the column(s) to represent.
Accepts regular expression input.
Regular expressions should start with `^` and end with `$`.
*more_names
Additional names or datatypes of columns to represent,
specified as positional arguments.
See Also
--------
first
last
nth
Create Polars column expressions.
Notes
-----
An instance of this class is exported under the name `col`. It can be used as
though it were a function by calling, for example, `pl.col("foo")`.
See the :func:`__call__` method for further documentation.
This helper class enables an alternative syntax for creating a column expression
through attribute lookup. For example `col.foo` creates an expression equal to
`col("foo")`.
See the :func:`__getattr__` method for further documentation.
The function call syntax is considered the idiomatic way of constructing a column
expression. The alternative attribute syntax can be useful for quick prototyping as
it can save some keystrokes, but has drawbacks in both expressiveness and
readability.
Examples
--------
Pass a single column name to represent that column.
>>> from polars import col
>>> df = pl.DataFrame(
... {
... "ham": [1, 2],
... "hamburger": [11, 22],
... "foo": [2, 1],
... "bar": ["a", "b"],
... "foo": [1, 2],
... "bar": [3, 4],
... }
... )
>>> df.select(pl.col("foo"))
shape: (2, 1)
┌─────┐
│ foo │
│ --- │
│ i64 │
╞═════╡
│ 2 │
│ 1 │
└─────┘
Use dot syntax to save keystrokes for quick prototyping.
>>> from polars import col as c
>>> df.select(c.foo + c.ham)
shape: (2, 1)
┌─────┐
│ foo │
│ --- │
│ i64 │
╞═════╡
│ 3 │
│ 3 │
└─────┘
Use the wildcard `*` to represent all columns.
>>> df.select(pl.col("*"))
shape: (2, 4)
┌─────┬───────────┬─────┬─────┐
│ ham ┆ hamburger ┆ foo ┆ bar │
│ --- ┆ --- ┆ --- ┆ --- │
│ i64 ┆ i64 ┆ i64 ┆ str │
╞═════╪═══════════╪═════╪═════╡
│ 1 ┆ 11 ┆ 2 ┆ a │
│ 2 ┆ 22 ┆ 1 ┆ b │
└─────┴───────────┴─────┴─────┘
>>> df.select(pl.col("*").exclude("ham"))
Create a new column expression using the standard syntax:
>>> df.with_columns(baz=(col("foo") * col("bar")) / 2)
shape: (2, 3)
┌───────────┬─────┬─────┐
│ hamburger ┆ foo ┆ bar │
│ --- ┆ --- ┆ --- │
│ i64 ┆ i64 ┆ str │
╞═══════════╪═════╪═════╡
│ 11 ┆ 2 ┆ a │
│ 22 ┆ 1 ┆ b │
└───────────┴─────┴─────┘
Regular expression input is supported.
>>> df.select(pl.col("^ham.*$"))
shape: (2, 2)
┌─────┬───────────┐
│ ham ┆ hamburger │
│ --- ┆ --- │
│ i64 ┆ i64 │
╞═════╪═══════════╡
│ 1 ┆ 11 │
│ 2 ┆ 22 │
└─────┴───────────┘
Multiple columns can be represented by passing a list of names.
>>> df.select(pl.col(["hamburger", "foo"]))
shape: (2, 2)
┌───────────┬─────┐
│ hamburger ┆ foo │
│ --- ┆ --- │
│ i64 ┆ i64 │
╞═══════════╪═════╡
│ 11 ┆ 2 │
│ 22 ┆ 1 │
└───────────┴─────┘
Or use positional arguments to represent multiple columns in the same way.
>>> df.select(pl.col("hamburger", "foo"))
shape: (2, 2)
┌───────────┬─────┐
│ hamburger ┆ foo │
│ --- ┆ --- │
│ i64 ┆ i64 │
╞═══════════╪═════╡
│ 11 ┆ 2 │
│ 22 ┆ 1 │
└───────────┴─────┘
Easily select all columns that match a certain data type by passing that
datatype.
>>> df.select(pl.col(pl.String))
shape: (2, 1)
┌─────┐
│ bar │
│ --- │
│ str │
╞═════╡
│ a │
│ b │
└─────┘
>>> df.select(pl.col(pl.Int64, pl.Float64))
┌─────┬─────┬─────┐
│ foo ┆ bar ┆ baz │
│ --- ┆ --- ┆ --- │
│ i64 ┆ i64 ┆ f64 │
╞═════╪═════╪═════╡
│ 1 ┆ 3 ┆ 1.5 │
│ 2 ┆ 4 ┆ 4.0 │
└─────┴─────┴─────┘
Use attribute lookup to create a new column expression:
>>> df.with_columns(baz=(col.foo + col.bar))
shape: (2, 3)
┌─────┬───────────┬─────┐
hamhamburgerfoo
│ --- ┆ --- ┆ --- │
│ i64 ┆ i64 ┆ i64 │
╞═════╪═══════════╪═════╡
│ 1 ┆ 11 ┆ 2
│ 2 ┆ 22 ┆ 1
└─────┴───────────┴─────┘
┌─────┬─────┬─────┐
foobarbaz
│ --- ┆ --- ┆ --- │
│ i64 ┆ i64 ┆ i64 │
╞═════╪═════╪═════╡
│ 1 ┆ 3 ┆ 4
│ 2 ┆ 4 ┆ 6
└─────┴─────┴─────┘
"""

def __call__(
self,
name: str | PolarsDataType | Iterable[str] | Iterable[PolarsDataType],
*more_names: str | PolarsDataType,
) -> Expr:
"""
Create one or more column expressions representing column(s) in a DataFrame.
Parameters
----------
name
The name or datatype of the column(s) to represent.
Accepts regular expression input.
Regular expressions should start with `^` and end with `$`.
*more_names
Additional names or datatypes of columns to represent,
specified as positional arguments.
See Also
--------
first
last
nth
Examples
--------
Pass a single column name to represent that column.
>>> df = pl.DataFrame(
... {
... "ham": [1, 2],
... "hamburger": [11, 22],
... "foo": [2, 1],
... "bar": ["a", "b"],
... }
... )
>>> df.select(pl.col("foo"))
shape: (2, 1)
┌─────┐
│ foo │
│ --- │
│ i64 │
╞═════╡
│ 2 │
│ 1 │
└─────┘
Use dot syntax to save keystrokes for quick prototyping.
>>> from polars import col as c
>>> df.select(c.foo + c.ham)
shape: (2, 1)
┌─────┐
│ foo │
│ --- │
│ i64 │
╞═════╡
│ 3 │
│ 3 │
└─────┘
Use the wildcard `*` to represent all columns.
>>> df.select(pl.col("*"))
shape: (2, 4)
┌─────┬───────────┬─────┬─────┐
│ ham ┆ hamburger ┆ foo ┆ bar │
│ --- ┆ --- ┆ --- ┆ --- │
│ i64 ┆ i64 ┆ i64 ┆ str │
╞═════╪═══════════╪═════╪═════╡
│ 1 ┆ 11 ┆ 2 ┆ a │
│ 2 ┆ 22 ┆ 1 ┆ b │
└─────┴───────────┴─────┴─────┘
>>> df.select(pl.col("*").exclude("ham"))
shape: (2, 3)
┌───────────┬─────┬─────┐
│ hamburger ┆ foo ┆ bar │
│ --- ┆ --- ┆ --- │
│ i64 ┆ i64 ┆ str │
╞═══════════╪═════╪═════╡
│ 11 ┆ 2 ┆ a │
│ 22 ┆ 1 ┆ b │
└───────────┴─────┴─────┘
Regular expression input is supported.
>>> df.select(pl.col("^ham.*$"))
shape: (2, 2)
┌─────┬───────────┐
│ ham ┆ hamburger │
│ --- ┆ --- │
│ i64 ┆ i64 │
╞═════╪═══════════╡
│ 1 ┆ 11 │
│ 2 ┆ 22 │
└─────┴───────────┘
Multiple columns can be represented by passing a list of names.
>>> df.select(pl.col(["hamburger", "foo"]))
shape: (2, 2)
┌───────────┬─────┐
│ hamburger ┆ foo │
│ --- ┆ --- │
│ i64 ┆ i64 │
╞═══════════╪═════╡
│ 11 ┆ 2 │
│ 22 ┆ 1 │
└───────────┴─────┘
Or use positional arguments to represent multiple columns in the same way.
>>> df.select(pl.col("hamburger", "foo"))
shape: (2, 2)
┌───────────┬─────┐
│ hamburger ┆ foo │
│ --- ┆ --- │
│ i64 ┆ i64 │
╞═══════════╪═════╡
│ 11 ┆ 2 │
│ 22 ┆ 1 │
└───────────┴─────┘
Easily select all columns that match a certain data type by passing that
datatype.
>>> df.select(pl.col(pl.String))
shape: (2, 1)
┌─────┐
│ bar │
│ --- │
│ str │
╞═════╡
│ a │
│ b │
└─────┘
>>> df.select(pl.col(pl.Int64, pl.Float64))
shape: (2, 3)
┌─────┬───────────┬─────┐
│ ham ┆ hamburger ┆ foo │
│ --- ┆ --- ┆ --- │
│ i64 ┆ i64 ┆ i64 │
╞═════╪═══════════╪═════╡
│ 1 ┆ 11 ┆ 2 │
│ 2 ┆ 22 ┆ 1 │
└─────┴───────────┴─────┘
"""
return _create_col(name, *more_names)

def __getattr__(self, name: str) -> Expr:
Expand Down

0 comments on commit f8205a5

Please sign in to comment.