Skip to content

Commit

Permalink
add numRows param in DataFrame.show()
Browse files Browse the repository at this point in the history
  • Loading branch information
jackylk committed Feb 25, 2015
1 parent d641fbb commit 981be52
Show file tree
Hide file tree
Showing 2 changed files with 9 additions and 6 deletions.
6 changes: 3 additions & 3 deletions python/pyspark/sql/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -272,9 +272,9 @@ def isLocal(self):
"""
return self._jdf.isLocal()

def show(self):
def show(self, numRows=20):
"""
Print the first 20 rows.
Print the first n rows.
>>> df
DataFrame[age: int, name: string]
Expand All @@ -283,7 +283,7 @@ def show(self):
2 Alice
5 Bob
"""
print self._jdf.showString().encode('utf8', 'ignore')
print self._jdf.showString(numRows).encode('utf8', 'ignore')

def __repr__(self):
return "DataFrame[%s]" % (", ".join("%s: %s" % c for c in self.dtypes))
Expand Down
9 changes: 6 additions & 3 deletions sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala
Original file line number Diff line number Diff line change
Expand Up @@ -159,9 +159,11 @@ class DataFrame protected[sql](

/**
* Internal API for Python
* @param numRows Number of rows to show
*/
private[sql] def showString(): String = {
val data = take(20)
private[sql] def showString(numRows: Int = 20): String = {
val size = count()
val data = if (numRows > size) collect() else take(numRows)
val numCols = schema.fieldNames.length

// For cells that are beyond 20 characters, replace it with the first 17 and "..."
Expand Down Expand Up @@ -293,9 +295,10 @@ class DataFrame protected[sql](
* 1983 03 0.410516 0.442194
* 1984 04 0.450090 0.483521
* }}}
* @param numRows Number of rows to show
* @group basic
*/
def show(): Unit = println(showString())
def show(numRows: Int = 20): Unit = println(showString(numRows))

/**
* Cartesian join with another [[DataFrame]].
Expand Down

0 comments on commit 981be52

Please sign in to comment.