From 6c8f92e4394022017894425feb36769f3b0bd448 Mon Sep 17 00:00:00 2001 From: Yin Huai Date: Thu, 12 Feb 2015 15:33:34 -0800 Subject: [PATCH] Add tableNames. --- python/pyspark/sql/context.py | 18 +++++++++++- .../org/apache/spark/sql/SQLContext.scala | 28 +++++++++++++++---- 2 files changed, 40 insertions(+), 6 deletions(-) diff --git a/python/pyspark/sql/context.py b/python/pyspark/sql/context.py index 98940b3359ad8..082f1b691b196 100644 --- a/python/pyspark/sql/context.py +++ b/python/pyspark/sql/context.py @@ -622,7 +622,7 @@ def table(self, tableName): return DataFrame(self._ssql_ctx.table(tableName), self) def tables(self, dbName=None): - """Returns a DataFrame containing names of table in the given database. + """Returns a DataFrame containing names of tables in the given database. If `dbName` is not specified, the current database will be used. @@ -639,6 +639,22 @@ def tables(self, dbName=None): else: return DataFrame(self._ssql_ctx.tables(dbName), self) + def tableNames(self, dbName=None): + """Returns a list of names of tables in the database `dbName`. + + If `dbName` is not specified, the current database will be used. + + >>> sqlCtx.registerRDDAsTable(df, "table1") + >>> "table1" in sqlCtx.tableNames() + True + >>> "table1" in sqlCtx.tableNames("db") + True + """ + if dbName is None: + return [name for name in self._ssql_ctx.tableNames()] + else: + return [name for name in self._ssql_ctx.tableNames(dbName)] + def cacheTable(self, tableName): """Caches the specified table in-memory.""" self._ssql_ctx.cacheTable(tableName) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala b/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala index 762301c50c72b..0d3485a9d5b0a 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala @@ -734,6 +734,15 @@ class SQLContext(@transient val sparkContext: SparkContext) def table(tableName: String): DataFrame = DataFrame(this, catalog.lookupRelation(Seq(tableName))) + /** + * Returns a [[DataFrame]] containing names of existing tables in the given database. + * The returned DataFrame has two columns, tableName and isTemporary (a column with BooleanType + * indicating if a table is a temporary one or not). + */ + def tables(): DataFrame = { + createDataFrame(catalog.getTables(None)).toDataFrame("tableName", "isTemporary") + } + /** * Returns a [[DataFrame]] containing names of existing tables in the current database. * The returned DataFrame has two columns, tableName and isTemporary (a column with BooleanType @@ -744,12 +753,21 @@ class SQLContext(@transient val sparkContext: SparkContext) } /** - * Returns a [[DataFrame]] containing names of existing tables in the given database. - * The returned DataFrame has two columns, tableName and isTemporary (a column with BooleanType - * indicating if a table is a temporary one or not). + * Returns an array of names of tables in the current database. */ - def tables(): DataFrame = { - createDataFrame(catalog.getTables(None)).toDataFrame("tableName", "isTemporary") + def tableNames(): Array[String] = { + catalog.getTables(None).map { + case (tableName, _) => tableName + }.toArray + } + + /** + * Returns an array of names of tables in the given database. + */ + def tableNames(databaseName: String): Array[String] = { + catalog.getTables(Some(databaseName)).map { + case (tableName, _) => tableName + }.toArray } protected[sql] class SparkPlanner extends SparkStrategies {