Skip to content

Commit

Permalink
sql: Move approx_len to backend
Browse files Browse the repository at this point in the history
  • Loading branch information
astaric committed Oct 21, 2016
1 parent c8fb39c commit c494da4
Show file tree
Hide file tree
Showing 3 changed files with 35 additions and 8 deletions.
13 changes: 13 additions & 0 deletions Orange/data/sql/backend/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,19 @@ def create_variable(self, field_name, field_metadata,
"""
raise NotImplementedError

def count_approx(self, query):
"""Return estimated number of rows returned by query.
Parameters
----------
query : str
Returns
-------
Approximate number of rows
"""
raise NotImplementedError

# query related methods

def create_sql_query(
Expand Down
9 changes: 8 additions & 1 deletion Orange/data/sql/backend/postgres.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import logging
import re
import warnings
from contextlib import contextmanager
from time import time
Expand All @@ -7,7 +8,7 @@
from psycopg2.pool import ThreadedConnectionPool

from Orange.data import ContinuousVariable, DiscreteVariable, StringVariable, TimeVariable
from Orange.data.sql.backend.base import Backend, TableDesc, ToSql, BackendError
from Orange.data.sql.backend.base import Backend, ToSql, BackendError

log = logging.getLogger(__name__)

Expand Down Expand Up @@ -167,6 +168,12 @@ def _guess_variable(self, field_name, field_metadata, inspect_table):

return StringVariable(field_name)

def count_approx(self, query):
sql = "EXPLAIN " + query
with self.execute_sql_query(sql) as cur:
s = ''.join(row[0] for row in cur.fetchall())
return int(re.findall(r'rows=(\d*)', s)[0])

def __getstate__(self):
# Drop connection_pool from state as it cannot be pickled
state = dict(self.__dict__)
Expand Down
21 changes: 14 additions & 7 deletions Orange/data/sql/table.py
Original file line number Diff line number Diff line change
Expand Up @@ -264,13 +264,20 @@ def _count_rows(self):
def approx_len(self, get_exact=False):
if self._cached__len__ is not None:
return self._cached__len__
sql = "EXPLAIN " + self._sql_query(["*"])
with self.backend.execute_sql_query(sql) as cur:
s = ''.join(row[0] for row in cur.fetchall())
alen = int(re.findall('rows=(\d*)', s)[0])
if get_exact:
threading.Thread(target=len, args=(self,)).start()
return alen

approx_len = None
try:
query = self._sql_query(["*"])
approx_len = self.backend.count_approx(query)
if get_exact:
threading.Thread(target=len, args=(self,)).start()
except NotImplementedError:
pass

if approx_len is None:
approx_len = len(self)

return approx_len

_X = None
_Y = None
Expand Down

0 comments on commit c494da4

Please sign in to comment.