Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Ensure sqldata thread safety. #760

Merged
merged 2 commits into from
Apr 8, 2019
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
42 changes: 25 additions & 17 deletions coverage/sqldata.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,12 +17,18 @@
import sqlite3
import sys

from coverage import env
from coverage.backward import iitems
from coverage.data import filename_suffix
from coverage.debug import NoDebugging, SimpleReprMixin
from coverage.files import PathAliases
from coverage.misc import CoverageException, file_be_gone

if env.PY2:
from thread import get_ident as get_thread_id
else:
from threading import get_ident as get_thread_id


# Schema versions:
# 1: Released in 5.0a2
Expand Down Expand Up @@ -84,7 +90,7 @@ def __init__(self, basename=None, suffix=None, warn=None, debug=None):

self._choose_filename()
self._file_map = {}
self._db = None
self._dbs = {}
self._pid = os.getpid()

# Are we in sync with the data file?
Expand All @@ -103,35 +109,36 @@ def _choose_filename(self):
self.filename += "." + suffix

def _reset(self):
if self._db is not None:
self._db.close()
self._db = None
if self._dbs:
for db in self._dbs.values():
db.close()
self._dbs = {}
self._file_map = {}
self._have_used = False
self._current_context_id = None

def _create_db(self):
if self._debug.should('dataio'):
self._debug.write("Creating data file {!r}".format(self.filename))
self._db = Sqlite(self.filename, self._debug)
with self._db:
self._dbs[get_thread_id()] = Sqlite(self.filename, self._debug)
with self._dbs[get_thread_id()] as db:
for stmt in SCHEMA.split(';'):
stmt = " ".join(stmt.strip().split())
if stmt:
self._db.execute(stmt)
self._db.execute("insert into coverage_schema (version) values (?)", (SCHEMA_VERSION,))
self._db.execute(
db.execute(stmt)
db.execute("insert into coverage_schema (version) values (?)", (SCHEMA_VERSION,))
db.execute(
"insert into meta (has_lines, has_arcs, sys_argv) values (?, ?, ?)",
(self._has_lines, self._has_arcs, str(getattr(sys, 'argv', None)))
)

def _open_db(self):
if self._debug.should('dataio'):
self._debug.write("Opening data file {!r}".format(self.filename))
self._db = Sqlite(self.filename, self._debug)
with self._db:
self._dbs[get_thread_id()] = Sqlite(self.filename, self._debug)
with self._dbs[get_thread_id()] as db:
try:
schema_version, = self._db.execute("select version from coverage_schema").fetchone()
schema_version, = db.execute("select version from coverage_schema").fetchone()
except Exception as exc:
raise CoverageException(
"Data file {!r} doesn't seem to be a coverage data file: {}".format(
Expand All @@ -146,22 +153,23 @@ def _open_db(self):
)
)

for row in self._db.execute("select has_lines, has_arcs from meta"):
for row in db.execute("select has_lines, has_arcs from meta"):
self._has_lines, self._has_arcs = row

for path, id in self._db.execute("select path, id from file"):
for path, id in db.execute("select path, id from file"):
self._file_map[path] = id

def _connect(self):
if self._db is None:
if get_thread_id() not in self._dbs:
if os.path.exists(self.filename):
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@strichter I'll merge this today, but I'm wondering about a race condition on the creation of the database. There's a window here between checking if the file exists, and writing the initial metadata into the database. Is there a reason we don't see failures due to that race?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good call! I think that coverage starts collecting so early that it will always hit the initialization case before any threads are spawned. This is at least true in our setup where we run coverage as a shell script. In other words the first time this condition hits, we are in the main thread.

self._open_db()
else:
self._create_db()
return self._db
return self._dbs[get_thread_id()]

def __nonzero__(self):
if self._db is None and not os.path.exists(self.filename):
if (get_thread_id() not in self._dbs and
not os.path.exists(self.filename)):
return False
try:
with self._connect() as con:
Expand Down