Skip to content

Commit

Permalink
up2k: fix a mostly-harmless race
Browse files Browse the repository at this point in the history
as each chunk is written to the file, httpcli calls
up2k.confirm_chunk to register the chunk as completed, and the reply
indicates whether that was the final outstanding chunk, in which case
httpcli closes the file descriptors since there's nothing more to write

the issue is that the final chunk is registered as completed before the
file descriptors are closed, meaning there could be writes that haven't
finished flushing to disk yet

if the client decides to issue another handshake during this window,
up2k sees that all chunks are complete and calls up2k.finish_upload
even as some threads might still be flushing the final writes to disk

so the conditions to hit this bug were as follows (all must be true):
* multiprocessing is disabled
* there is a reverse-proxy
* a client has several idle connections and reuses one of those
* the server's filesystem is EXTREMELY slow, to the point where
   closing a file takes over 30 seconds

the fix is to stop handshakes from being processed while a file is
being closed, which is unfortunately a small bottleneck in that it
prohibits initiating another upload while one is being finalized, but
the required complexity to handle this better is probably not worth it
(a separate mutex for each upload session or something like that)

this issue is mostly harmless, partially because it is super tricky to
hit (only aware of it happening synthetically), and because there is
usually no harmful consequences; the worst-case is if this were to
happen exactly as the server OS decides to crash, which would make the
file appear to be fully uploaded even though it's missing some data
(all extremely unlikely, but not impossible)

there is no performance impact; if anything it should now accept
new tcp connections slightly faster thanks to more granular locking
  • Loading branch information
9001 committed Feb 13, 2024
1 parent 7c8e368 commit 6f8a588
Show file tree
Hide file tree
Showing 4 changed files with 13 additions and 8 deletions.
15 changes: 9 additions & 6 deletions copyparty/httpcli.py
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,7 @@ def __init__(self, conn: "HttpConn") -> None:

self.t0 = time.time()
self.conn = conn
self.mutex = conn.mutex # mypy404
self.u2mutex = conn.u2mutex # mypy404
self.s = conn.s
self.sr = conn.sr
self.ip = conn.addr[0]
Expand Down Expand Up @@ -1988,8 +1988,11 @@ def handle_post_json(self) -> bool:
except:
raise Pebkac(500, min_ex())

x = self.conn.hsrv.broker.ask("up2k.handle_json", body, self.u2fh.aps)
ret = x.get()
# not to protect u2fh, but to prevent handshakes while files are closing
with self.u2mutex:
x = self.conn.hsrv.broker.ask("up2k.handle_json", body, self.u2fh.aps)
ret = x.get()

if self.is_vproxied:
if "purl" in ret:
ret["purl"] = self.args.SR + ret["purl"]
Expand Down Expand Up @@ -2094,7 +2097,7 @@ def handle_post_binary(self) -> bool:
f = None
fpool = not self.args.no_fpool and sprs
if fpool:
with self.mutex:
with self.u2mutex:
try:
f = self.u2fh.pop(path)
except:
Expand Down Expand Up @@ -2137,7 +2140,7 @@ def handle_post_binary(self) -> bool:
if not fpool:
f.close()
else:
with self.mutex:
with self.u2mutex:
self.u2fh.put(path, f)
except:
# maybe busted handle (eg. disk went full)
Expand All @@ -2156,7 +2159,7 @@ def handle_post_binary(self) -> bool:
return False

if not num_left and fpool:
with self.mutex:
with self.u2mutex:
self.u2fh.close(path)

if not num_left and not self.args.nw:
Expand Down
2 changes: 1 addition & 1 deletion copyparty/httpconn.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ def __init__(
self.addr = addr
self.hsrv = hsrv

self.mutex: threading.Lock = hsrv.mutex # mypy404
self.u2mutex: threading.Lock = hsrv.u2mutex # mypy404
self.args: argparse.Namespace = hsrv.args # mypy404
self.E: EnvParams = self.args.E
self.asrv: AuthSrv = hsrv.asrv # mypy404
Expand Down
3 changes: 2 additions & 1 deletion copyparty/httpsrv.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,7 @@ def __init__(self, broker: "BrokerCli", nid: Optional[int]) -> None:
self.bound: set[tuple[str, int]] = set()
self.name = "hsrv" + nsuf
self.mutex = threading.Lock()
self.u2mutex = threading.Lock()
self.stopping = False

self.tp_nthr = 0 # actual
Expand Down Expand Up @@ -220,7 +221,7 @@ def stop_threads(self, n: int) -> None:
def periodic(self) -> None:
while True:
time.sleep(2 if self.tp_ncli or self.ncli else 10)
with self.mutex:
with self.u2mutex, self.mutex:
self.u2fh.clean()
if self.tp_q:
self.tp_ncli = max(self.ncli, self.tp_ncli - 2)
Expand Down
1 change: 1 addition & 0 deletions tests/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -243,6 +243,7 @@ def __init__(self, args, asrv, log, buf):
self.log_func = log
self.log_src = "a"
self.mutex = threading.Lock()
self.u2mutex = threading.Lock()
self.nbyte = 0
self.nid = None
self.nreq = -1
Expand Down

0 comments on commit 6f8a588

Please sign in to comment.