Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 15 additions & 6 deletions python/pyspark/sql/connect/functions/builtin.py
Original file line number Diff line number Diff line change
Expand Up @@ -2394,22 +2394,31 @@ def unbase64(col: "ColumnOrName") -> Column:
unbase64.__doc__ = pysparkfuncs.unbase64.__doc__


def ltrim(col: "ColumnOrName") -> Column:
return _invoke_function_over_columns("ltrim", col)
def ltrim(col: "ColumnOrName", trim: Optional["ColumnOrName"] = None) -> Column:
if trim is not None:
return _invoke_function_over_columns("ltrim", trim, col)
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

it is kind of confusing on the arg order here:
1, connect: _invoke_function_over_columns("ltrim", trim, col) -> follow the order in constructor

def this(trimStr: Expression, srcStr: Expression) = this(srcStr, Option(trimStr))

2, classic: _invoke_function_over_columns("ltrim", col, trim) follows the signature in scala functions

  def ltrim(e: Column, trimString: String): Column
  def ltrim(e: Column, trim: Column): Column

Copy link
Contributor Author

@zhengruifeng zhengruifeng Oct 7, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

probably we can unify the function invocations of both connect and classic in some way

else:
return _invoke_function_over_columns("ltrim", col)


ltrim.__doc__ = pysparkfuncs.ltrim.__doc__


def rtrim(col: "ColumnOrName") -> Column:
return _invoke_function_over_columns("rtrim", col)
def rtrim(col: "ColumnOrName", trim: Optional["ColumnOrName"] = None) -> Column:
if trim is not None:
return _invoke_function_over_columns("rtrim", trim, col)
else:
return _invoke_function_over_columns("rtrim", col)


rtrim.__doc__ = pysparkfuncs.rtrim.__doc__


def trim(col: "ColumnOrName") -> Column:
return _invoke_function_over_columns("trim", col)
def trim(col: "ColumnOrName", trim: Optional["ColumnOrName"] = None) -> Column:
if trim is not None:
return _invoke_function_over_columns("trim", trim, col)
else:
return _invoke_function_over_columns("trim", col)


trim.__doc__ = pysparkfuncs.trim.__doc__
Expand Down
131 changes: 100 additions & 31 deletions python/pyspark/sql/functions/builtin.py
Original file line number Diff line number Diff line change
Expand Up @@ -10913,7 +10913,7 @@ def unbase64(col: "ColumnOrName") -> Column:


@_try_remote_functions
def ltrim(col: "ColumnOrName") -> Column:
def ltrim(col: "ColumnOrName", trim: Optional["ColumnOrName"] = None) -> Column:
"""
Trim the spaces from left end for the specified string value.

Expand All @@ -10926,6 +10926,10 @@ def ltrim(col: "ColumnOrName") -> Column:
----------
col : :class:`~pyspark.sql.Column` or str
target column to work on.
trim : :class:`~pyspark.sql.Column` or str, optional
The trim string characters to trim, the default value is a single space

.. versionadded:: 4.0.0

Returns
-------
Expand All @@ -10934,21 +10938,40 @@ def ltrim(col: "ColumnOrName") -> Column:

Examples
--------
Example 1: Trim the spaces

>>> from pyspark.sql import functions as sf
>>> df = spark.createDataFrame([" Spark", "Spark ", " Spark"], "STRING")
>>> df.select(ltrim("value").alias("r")).withColumn("length", length("r")).show()
+-------+------+
| r|length|
+-------+------+
| Spark| 5|
|Spark | 7|
| Spark| 5|
+-------+------+
>>> df.select("*", sf.ltrim("value")).show()
+--------+------------+
| value|ltrim(value)|
+--------+------------+
| Spark| Spark|
| Spark | Spark |
| Spark| Spark|
+--------+------------+

Example 2: Trim specified characters

>>> from pyspark.sql import functions as sf
>>> df = spark.createDataFrame(["***Spark", "Spark**", "*Spark"], "STRING")
>>> df.select("*", sf.ltrim("value", sf.lit("*"))).show()
+--------+--------------------------+
| value|TRIM(LEADING * FROM value)|
+--------+--------------------------+
|***Spark| Spark|
| Spark**| Spark**|
| *Spark| Spark|
+--------+--------------------------+
"""
return _invoke_function_over_columns("ltrim", col)
if trim is not None:
return _invoke_function_over_columns("ltrim", col, trim)
else:
return _invoke_function_over_columns("ltrim", col)


@_try_remote_functions
def rtrim(col: "ColumnOrName") -> Column:
def rtrim(col: "ColumnOrName", trim: Optional["ColumnOrName"] = None) -> Column:
"""
Trim the spaces from right end for the specified string value.

Expand All @@ -10961,6 +10984,10 @@ def rtrim(col: "ColumnOrName") -> Column:
----------
col : :class:`~pyspark.sql.Column` or str
target column to work on.
trim : :class:`~pyspark.sql.Column` or str, optional
The trim string characters to trim, the default value is a single space

.. versionadded:: 4.0.0

Returns
-------
Expand All @@ -10969,21 +10996,40 @@ def rtrim(col: "ColumnOrName") -> Column:

Examples
--------
Example 1: Trim the spaces

>>> from pyspark.sql import functions as sf
>>> df = spark.createDataFrame([" Spark", "Spark ", " Spark"], "STRING")
>>> df.select(rtrim("value").alias("r")).withColumn("length", length("r")).show()
+--------+------+
| r|length|
+--------+------+
| Spark| 8|
| Spark| 5|
| Spark| 6|
+--------+------+
>>> df.select("*", sf.rtrim("value")).show()
+--------+------------+
| value|rtrim(value)|
+--------+------------+
| Spark| Spark|
| Spark | Spark|
| Spark| Spark|
+--------+------------+

Example 2: Trim specified characters

>>> from pyspark.sql import functions as sf
>>> df = spark.createDataFrame(["***Spark", "Spark**", "*Spark"], "STRING")
>>> df.select("*", sf.rtrim("value", sf.lit("*"))).show()
+--------+---------------------------+
| value|TRIM(TRAILING * FROM value)|
+--------+---------------------------+
|***Spark| ***Spark|
| Spark**| Spark|
| *Spark| *Spark|
+--------+---------------------------+
"""
return _invoke_function_over_columns("rtrim", col)
if trim is not None:
return _invoke_function_over_columns("rtrim", col, trim)
else:
return _invoke_function_over_columns("rtrim", col)


@_try_remote_functions
def trim(col: "ColumnOrName") -> Column:
def trim(col: "ColumnOrName", trim: Optional["ColumnOrName"] = None) -> Column:
"""
Trim the spaces from both ends for the specified string column.

Expand All @@ -10996,6 +11042,10 @@ def trim(col: "ColumnOrName") -> Column:
----------
col : :class:`~pyspark.sql.Column` or str
target column to work on.
trim : :class:`~pyspark.sql.Column` or str, optional
The trim string characters to trim, the default value is a single space

.. versionadded:: 4.0.0

Returns
-------
Expand All @@ -11004,17 +11054,36 @@ def trim(col: "ColumnOrName") -> Column:

Examples
--------
Example 1: Trim the spaces

>>> from pyspark.sql import functions as sf
>>> df = spark.createDataFrame([" Spark", "Spark ", " Spark"], "STRING")
>>> df.select(trim("value").alias("r")).withColumn("length", length("r")).show()
+-----+------+
| r|length|
+-----+------+
|Spark| 5|
|Spark| 5|
|Spark| 5|
+-----+------+
"""
return _invoke_function_over_columns("trim", col)
>>> df.select("*", sf.trim("value")).show()
+--------+-----------+
| value|trim(value)|
+--------+-----------+
| Spark| Spark|
| Spark | Spark|
| Spark| Spark|
+--------+-----------+

Example 2: Trim specified characters

>>> from pyspark.sql import functions as sf
>>> df = spark.createDataFrame(["***Spark", "Spark**", "*Spark"], "STRING")
>>> df.select("*", sf.trim("value", sf.lit("*"))).show()
+--------+-----------------------+
| value|TRIM(BOTH * FROM value)|
+--------+-----------------------+
|***Spark| Spark|
| Spark**| Spark|
| *Spark| Spark|
+--------+-----------------------+
"""
if trim is not None:
return _invoke_function_over_columns("trim", col, trim)
else:
return _invoke_function_over_columns("trim", col)


@_try_remote_functions
Expand Down
27 changes: 24 additions & 3 deletions sql/api/src/main/scala/org/apache/spark/sql/functions.scala
Original file line number Diff line number Diff line change
Expand Up @@ -4061,7 +4061,14 @@ object functions {
* @group string_funcs
* @since 2.3.0
*/
def ltrim(e: Column, trimString: String): Column = Column.fn("ltrim", lit(trimString), e)
def ltrim(e: Column, trimString: String): Column = ltrim(e, lit(trimString))

/**
* Trim the specified character string from left end for the specified string column.
* @group string_funcs
* @since 4.0.0
*/
def ltrim(e: Column, trim: Column): Column = Column.fn("ltrim", trim, e)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is trim the expression for the trimString? Looks a bit weird to call this parameter trim. It's also not consistent with the function doc of expression StringTrim, which names this parameter as trimStr

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

unfortanately, the argument names were already not consistent between python and scala, in many functions.

in this PR I use trim to be consistent with python side btrim

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

def btrim(str: "ColumnOrName", trim: Optional["ColumnOrName"] = None) -> Column:


/**
* Calculates the byte length for the specified string column.
Expand Down Expand Up @@ -4258,7 +4265,14 @@ object functions {
* @group string_funcs
* @since 2.3.0
*/
def rtrim(e: Column, trimString: String): Column = Column.fn("rtrim", lit(trimString), e)
def rtrim(e: Column, trimString: String): Column = rtrim(e, lit(trimString))

/**
* Trim the specified character string from right end for the specified string column.
* @group string_funcs
* @since 4.0.0
*/
def rtrim(e: Column, trim: Column): Column = Column.fn("rtrim", trim, e)

/**
* Returns the soundex code for the specified expression.
Expand Down Expand Up @@ -4444,7 +4458,14 @@ object functions {
* @group string_funcs
* @since 2.3.0
*/
def trim(e: Column, trimString: String): Column = Column.fn("trim", lit(trimString), e)
def trim(e: Column, trimString: String): Column = trim(e, lit(trimString))

/**
* Trim the specified character from both ends for the specified string column.
* @group string_funcs
* @since 4.0.0
*/
def trim(e: Column, trim: Column): Column = Column.fn("trim", trim, e)

/**
* Converts a string column to upper case.
Expand Down