Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[SPARK-23913][SQL] Add array_intersect function #21102

Closed
wants to merge 21 commits into from
Closed
19 changes: 19 additions & 0 deletions python/pyspark/sql/functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -2033,6 +2033,25 @@ def array_distinct(col):
return Column(sc._jvm.functions.array_distinct(_to_java_column(col)))


@ignore_unicode_prefix
@since(2.4)
def array_intersect(col1, col2):
"""
Collection function: returns an array of the elements in the intersection of col1 and col2,
without duplicates.

:param col1: name of column containing array
:param col2: name of column containing array

>>> from pyspark.sql import Row
>>> df = spark.createDataFrame([Row(c1=["b", "a", "c"], c2=["c", "d", "a", "f"])])
>>> df.select(array_intersect(df.c1, df.c2)).collect()
[Row(array_intersect(c1, c2)=[u'a', u'c'])]
"""
sc = SparkContext._active_spark_context
return Column(sc._jvm.functions.array_intersect(_to_java_column(col1), _to_java_column(col2)))


@ignore_unicode_prefix
@since(2.4)
def array_union(col1, col2):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -411,6 +411,7 @@ object FunctionRegistry {
expression[CreateArray]("array"),
expression[ArrayContains]("array_contains"),
expression[ArraysOverlap]("arrays_overlap"),
expression[ArrayIntersect]("array_intersect"),
expression[ArrayJoin]("array_join"),
expression[ArrayPosition]("array_position"),
expression[ArraySort]("array_sort"),
Expand Down
Loading