From 8d87f7e1aa8b0aacadcd62c6620d6edfcbf6d8c2 Mon Sep 17 00:00:00 2001 From: Abhinav Singh Date: Sat, 20 Nov 2021 22:37:19 +0530 Subject: [PATCH 01/54] Asynchronous `handle_event` and `LocalExecutor` thread --- examples/web_scraper.py | 2 +- proxy/core/acceptor/acceptor.py | 76 +++++-------- proxy/core/acceptor/local.py | 182 ++++++++++++++++++++++++++++++ proxy/core/acceptor/threadless.py | 70 +++++++----- proxy/core/acceptor/work.py | 2 +- proxy/core/base/tcp_server.py | 2 +- proxy/core/base/tcp_tunnel.py | 4 +- proxy/http/handler.py | 2 +- 8 files changed, 257 insertions(+), 83 deletions(-) create mode 100644 proxy/core/acceptor/local.py diff --git a/examples/web_scraper.py b/examples/web_scraper.py index 0cd638ade7..1b18ef4b91 100644 --- a/examples/web_scraper.py +++ b/examples/web_scraper.py @@ -44,7 +44,7 @@ def get_events(self) -> Dict[socket.socket, int]: """Return sockets and events (read or write) that we are interested in.""" return {} - def handle_events( + async def handle_events( self, readables: Readables, writables: Writables, diff --git a/proxy/core/acceptor/acceptor.py b/proxy/core/acceptor/acceptor.py index 164fdba085..ef7b81902b 100644 --- a/proxy/core/acceptor/acceptor.py +++ b/proxy/core/acceptor/acceptor.py @@ -13,10 +13,10 @@ acceptor pre """ +import queue import socket import logging import argparse -import threading import selectors import multiprocessing import multiprocessing.synchronize @@ -24,14 +24,13 @@ from multiprocessing import connection from multiprocessing.reduction import recv_handle -from typing import List, Optional, Tuple +from typing import List, Optional -from proxy.core.acceptor.executors import ThreadlessPool +from ...common.logger import Logger from ..event import EventQueue -from ...common.utils import is_threadless -from ...common.logger import Logger +from .local import LocalExecutor logger = logging.getLogger(__name__) @@ -101,8 +100,7 @@ def __init__( # File descriptor used to accept new work # Currently, a socket fd is assumed. self.sock: Optional[socket.socket] = None - # Incremented every time work() is called - self._total: int = 0 + self._local: Optional[LocalExecutor] = None def run_once(self) -> None: if self.selector is not None: @@ -115,10 +113,9 @@ def run_once(self) -> None: locked = True for _, mask in events: if mask & selectors.EVENT_READ: - if self.sock is not None: + if self.sock is not None and self._local is not None: conn, addr = self.sock.accept() - addr = None if addr == '' else addr - self._work(conn, addr) + self._local.evq.put((conn, addr)) except BlockingIOError: pass finally: @@ -142,6 +139,7 @@ def run(self) -> None: type=socket.SOCK_STREAM, ) try: + self._start_local() self.selector.register(self.sock, selectors.EVENT_READ) while not self.running.is_set(): self.run_once() @@ -149,44 +147,26 @@ def run(self) -> None: pass finally: self.selector.unregister(self.sock) + self._stop_local() self.sock.close() logger.debug('Acceptor#%d shutdown', self.idd) - def _work(self, conn: socket.socket, addr: Optional[Tuple[str, int]]) -> None: - if is_threadless(self.flags.threadless, self.flags.threaded): - # Index of worker to which this work should be dispatched - # Use round-robin strategy by default. - # - # By default all acceptors will start sending work to - # 1st workers. To randomize, we offset index by idd. - index = (self._total + self.idd) % self.flags.num_workers - thread = threading.Thread( - target=ThreadlessPool.delegate, - args=( - self.executor_pids[index], - self.executor_queues[index], - self.executor_locks[index], - conn, - addr, - self.flags.unix_socket_path, - ), - ) - thread.start() - logger.debug( - 'Dispatched work#{0}.{1} to worker#{2}'.format( - self.idd, self._total, index, - ), - ) - else: - _, thread = ThreadlessPool.start_threaded_work( - self.flags, - conn, addr, - event_queue=self.event_queue, - publisher_id=self.__class__.__name__, - ) - logger.debug( - 'Started work#{0}.{1} in thread#{2}'.format( - self.idd, self._total, thread.ident, - ), - ) - self._total += 1 + def _start_local(self) -> None: + assert self.sock + self._local = LocalExecutor( + self.idd, + self.flags, + self.sock, + queue.Queue(), + self.executor_queues, + self.executor_pids, + self.executor_locks, + self.event_queue, + ) + self._local.setDaemon(True) + self._local.start() + + def _stop_local(self) -> None: + if self._local is not None: + self._local.evq.put(False) + self._local.join() diff --git a/proxy/core/acceptor/local.py b/proxy/core/acceptor/local.py new file mode 100644 index 0000000000..104a72ddf8 --- /dev/null +++ b/proxy/core/acceptor/local.py @@ -0,0 +1,182 @@ +# -*- coding: utf-8 -*- +""" + proxy.py + ~~~~~~~~ + ⚡⚡⚡ Fast, Lightweight, Pluggable, TLS interception capable proxy server focused on + Network monitoring, controls & Application development, testing, debugging. + + :copyright: (c) 2013-present by Abhinav Singh and contributors. + :license: BSD, see LICENSE for more details. +""" +import os +import queue +import socket +import logging +import argparse +import selectors +import threading +import contextlib +import multiprocessing.synchronize + +from multiprocessing import connection +from typing import Optional, Tuple, List, Dict, Generator + +from ...common.utils import is_threadless +from ...common.types import Readables, Writables +from ...common.constants import DEFAULT_SELECTOR_SELECT_TIMEOUT + +from ..event import EventQueue, eventNames +from ..connection import TcpClientConnection + +from .executors import ThreadlessPool +from .work import Work + +logger = logging.getLogger(__name__) + + +class LocalExecutor(threading.Thread): + """Listens for READ_EVENT over a queue, accepts and initializes work.""" + + def __init__( + self, + idd: int, + flags: argparse.Namespace, + sock: socket.socket, + evq: queue.Queue, + executor_queues: List[connection.Connection], + executor_pids: List[int], + executor_locks: List[multiprocessing.synchronize.Lock], + event_queue: Optional[EventQueue] = None, + ) -> None: + super().__init__() + # Index assigned by `AcceptorPool` + self.idd = idd + self.sock = sock + self.evq = evq + self.flags = flags + self.executor_queues = executor_queues + self.executor_pids = executor_pids + self.executor_locks = executor_locks + self.event_queue = event_queue + # Incremented every time work() is called + self._total: int = 0 + self._selector: Optional[selectors.DefaultSelector] = None + self._works: Dict[int, Work] = {} + + @contextlib.contextmanager + def selected_events(self) -> Generator[ + Tuple[Readables, Writables], + None, None, + ]: + assert self._selector is not None + events: Dict[socket.socket, int] = {} + for work in self._works.values(): + worker_events = work.get_events() + events.update(worker_events) + for fd in worker_events: + # Can throw ValueError: Invalid file descriptor: -1 + # + # A guard within Work classes may not help here due to + # asynchronous nature. Hence, threadless will handle + # ValueError exceptions raised by selector.register + # for invalid fd. + self._selector.register(fd, worker_events[fd]) + ev = self._selector.select(timeout=DEFAULT_SELECTOR_SELECT_TIMEOUT) + readables = [] + writables = [] + for key, mask in ev: + if mask & selectors.EVENT_READ: + readables.append(key.fileobj) + if mask & selectors.EVENT_WRITE: + writables.append(key.fileobj) + yield (readables, writables) + for fd in events: + self._selector.unregister(fd) + + def run_once(self) -> bool: + try: + payload = self.evq.get(block=True, timeout=0.1) + if isinstance(payload, bool) and payload is False: + return True + assert isinstance(payload, tuple) + conn, addr = payload + addr = None if addr == '' else addr + self.dispatch(conn, addr) + except queue.Empty: + pass + return False + + def run(self) -> None: + self._selector = selectors.DefaultSelector() + try: + while 1: + if self.run_once(): + break + except KeyboardInterrupt: + pass + + def work(self, conn: socket.socket, addr: Optional[Tuple[str, int]]) -> None: + fileno = conn.fileno() + self._works[fileno] = self.flags.work_klass( + TcpClientConnection(conn=conn, addr=addr), + flags=self.flags, + event_queue=self.event_queue, + ) + self._works[fileno].publish_event( + event_name=eventNames.WORK_STARTED, + event_payload={'fileno': fileno, 'addr': addr}, + publisher_id=self.__class__.__name__, + ) + try: + self._works[fileno].initialize() + except Exception as e: + logger.exception( + 'Exception occurred during initialization', + exc_info=e, + ) + self._cleanup(fileno) + + def _cleanup(self, work_id: int) -> None: + # TODO: HttpProtocolHandler.shutdown can call flush which may block + self._works[work_id].shutdown() + del self._works[work_id] + os.close(work_id) + + def dispatch(self, conn: socket.socket, addr: Optional[Tuple[str, int]]) -> None: + if is_threadless(self.flags.threadless, self.flags.threaded): + # Index of worker to which this work should be dispatched + # Use round-robin strategy by default. + # + # By default all acceptors will start sending work to + # 1st workers. To randomize, we offset index by idd. + index = (self._total + self.idd) % self.flags.num_workers + thread = threading.Thread( + target=ThreadlessPool.delegate, + args=( + self.executor_pids[index], + self.executor_queues[index], + self.executor_locks[index], + conn, + addr, + self.flags.unix_socket_path, + ), + ) + thread.start() + logger.debug( + 'Dispatched work#{0}.{1} to worker#{2}'.format( + self.idd, self._total, index, + ), + ) + else: + _, thread = ThreadlessPool.start_threaded_work( + self.flags, + conn, addr, + event_queue=self.event_queue, + publisher_id=self.__class__.__name__, + ) + logger.debug( + 'Started work#{0}.{1} in thread#{2}'.format( + self.idd, self._total, thread.ident, + ), + ) + self._total += 1 diff --git a/proxy/core/acceptor/threadless.py b/proxy/core/acceptor/threadless.py index b0c4246af8..e94b2a09dc 100644 --- a/proxy/core/acceptor/threadless.py +++ b/proxy/core/acceptor/threadless.py @@ -12,11 +12,11 @@ acceptor """ -import argparse import os import socket import logging import asyncio +import argparse import selectors import contextlib import multiprocessing @@ -78,12 +78,13 @@ def __init__( @contextlib.contextmanager def selected_events(self) -> Generator[ - Tuple[Readables, Writables], + Dict[int, Tuple[Readables, Writables]], None, None, ]: assert self.selector is not None events: Dict[socket.socket, int] = {} - for work in self.works.values(): + for work_id in self.works: + work = self.works[work_id] worker_events = work.get_events() events.update(worker_events) for fd in worker_events: @@ -93,26 +94,21 @@ def selected_events(self) -> Generator[ # asynchronous nature. Hence, threadless will handle # ValueError exceptions raised by selector.register # for invalid fd. - self.selector.register(fd, worker_events[fd]) + self.selector.register(fd, worker_events[fd], data=work_id) ev = self.selector.select(timeout=DEFAULT_SELECTOR_SELECT_TIMEOUT) - readables = [] - writables = [] + # Keys are work_id and values are 2-tuple indicating readables & writables + work_by_ids: Dict[int, Tuple[Readables, Writables]] = {} for key, mask in ev: + if key.data not in work_by_ids: + work_by_ids[key.data] = ([], []) if mask & selectors.EVENT_READ: - readables.append(key.fileobj) + work_by_ids[key.data][0].append(key.fileobj) if mask & selectors.EVENT_WRITE: - writables.append(key.fileobj) - yield (readables, writables) + work_by_ids[key.data][1].append(key.fileobj) + yield work_by_ids for fd in events: self.selector.unregister(fd) - async def handle_events( - self, fileno: int, - readables: Readables, - writables: Writables - ) -> bool: - return self.works[fileno].handle_events(readables, writables) - # TODO: Use correct future typing annotations async def wait_for_tasks( self, tasks: Dict[int, Any] @@ -178,10 +174,23 @@ def cleanup(self, work_id: int) -> None: del self.works[work_id] os.close(work_id) + def _prepare_tasks(self, work_by_ids: Dict[int, Tuple[Readables, Writables]]) -> Dict[int, Any]: + tasks = {} + assert self.loop + for work_id in work_by_ids: + readables, writables = work_by_ids[work_id] + tasks[work_id] = self.loop.create_task( + self.works[work_id].handle_events( + readables, + writables, + ), + ) + return tasks + def run_once(self) -> None: assert self.loop is not None - with self.selected_events() as (readables, writables): - if len(readables) == 0 and len(writables) == 0: + with self.selected_events() as work_by_ids: + if len(work_by_ids) == 0: # Remove and shutdown inactive connections self.cleanup_inactive() return @@ -196,16 +205,15 @@ def run_once(self) -> None: # # TODO: Only send readable / writables that client originally # registered. - tasks = {} - for fileno in self.works: - tasks[fileno] = self.loop.create_task( - self.handle_events(fileno, readables, writables), - ) - # Accepted client connection from Acceptor - if self.client_queue in readables: - self.accept_client() # Wait for Threadless.handle_events to complete - self.loop.run_until_complete(self.wait_for_tasks(tasks)) + if self.client_queue.fileno() in work_by_ids: + self.accept_client() + del work_by_ids[self.client_queue.fileno()] + self.loop.run_until_complete( + self.wait_for_tasks( + self._prepare_tasks(work_by_ids), + ), + ) # Remove and shutdown inactive workers self.cleanup_inactive() @@ -216,7 +224,11 @@ def run(self) -> None: ) try: self.selector = selectors.DefaultSelector() - self.selector.register(self.client_queue, selectors.EVENT_READ) + self.selector.register( + self.client_queue.fileno(), + selectors.EVENT_READ, + data=self.client_queue.fileno(), + ) self.loop = asyncio.get_event_loop_policy().get_event_loop() while not self.running.is_set(): # logger.debug('Working on {0} works'.format(len(self.works))) @@ -225,7 +237,7 @@ def run(self) -> None: pass finally: assert self.selector is not None - self.selector.unregister(self.client_queue) + self.selector.unregister(self.client_queue.fileno()) self.client_queue.close() assert self.loop is not None self.loop.close() diff --git a/proxy/core/acceptor/work.py b/proxy/core/acceptor/work.py index ea05a8056b..486133164a 100644 --- a/proxy/core/acceptor/work.py +++ b/proxy/core/acceptor/work.py @@ -48,7 +48,7 @@ def get_events(self) -> Dict[socket.socket, int]: return {} # pragma: no cover @abstractmethod - def handle_events( + async def handle_events( self, readables: Readables, writables: Writables, diff --git a/proxy/core/base/tcp_server.py b/proxy/core/base/tcp_server.py index 4db61463cd..03e5b9f8be 100644 --- a/proxy/core/base/tcp_server.py +++ b/proxy/core/base/tcp_server.py @@ -79,7 +79,7 @@ def get_events(self) -> Dict[socket.socket, int]: events[self.work.connection] = selectors.EVENT_WRITE return events - def handle_events( + async def handle_events( self, readables: Readables, writables: Writables, diff --git a/proxy/core/base/tcp_tunnel.py b/proxy/core/base/tcp_tunnel.py index 9e4b7c156f..5eaf1cecc5 100644 --- a/proxy/core/base/tcp_tunnel.py +++ b/proxy/core/base/tcp_tunnel.py @@ -80,13 +80,13 @@ def get_events(self) -> Dict[socket.socket, int]: ev[self.upstream.connection] = selectors.EVENT_WRITE return ev - def handle_events( + async def handle_events( self, readables: Readables, writables: Writables, ) -> bool: # Handle client events - do_shutdown: bool = super().handle_events(readables, writables) + do_shutdown: bool = await super().handle_events(readables, writables) if do_shutdown: return do_shutdown # Handle server events diff --git a/proxy/http/handler.py b/proxy/http/handler.py index 0af90ad53a..59b5c5294d 100644 --- a/proxy/http/handler.py +++ b/proxy/http/handler.py @@ -170,7 +170,7 @@ def get_events(self) -> Dict[socket.socket, int]: return events # We override super().handle_events and never call it - def handle_events( + async def handle_events( self, readables: Readables, writables: Writables, From dbaf18a057ecd3d95d89994784b672e9726fde59 Mon Sep 17 00:00:00 2001 From: Abhinav Singh Date: Sat, 20 Nov 2021 23:43:32 +0530 Subject: [PATCH 02/54] Bail out on first task completion --- proxy/core/acceptor/threadless.py | 55 +++++++++++++++++-------------- 1 file changed, 30 insertions(+), 25 deletions(-) diff --git a/proxy/core/acceptor/threadless.py b/proxy/core/acceptor/threadless.py index e94b2a09dc..c4bb8fa112 100644 --- a/proxy/core/acceptor/threadless.py +++ b/proxy/core/acceptor/threadless.py @@ -23,7 +23,7 @@ from multiprocessing import connection from multiprocessing.reduction import recv_handle -from typing import Dict, Optional, Tuple, List, Generator, Any +from typing import Dict, Optional, Tuple, List, Generator, Set from .work import Work @@ -32,7 +32,7 @@ from ...common.logger import Logger from ...common.types import Readables, Writables -from ...common.constants import DEFAULT_TIMEOUT, DEFAULT_SELECTOR_SELECT_TIMEOUT +from ...common.constants import DEFAULT_SELECTOR_SELECT_TIMEOUT logger = logging.getLogger(__name__) @@ -75,6 +75,7 @@ def __init__( self.works: Dict[int, Work] = {} self.selector: Optional[selectors.DefaultSelector] = None self.loop: Optional[asyncio.AbstractEventLoop] = None + self.unfinished: Set[asyncio.Task] = set() @contextlib.contextmanager def selected_events(self) -> Generator[ @@ -111,22 +112,18 @@ def selected_events(self) -> Generator[ # TODO: Use correct future typing annotations async def wait_for_tasks( - self, tasks: Dict[int, Any] + self, + tasks: Set[asyncio.Task], ) -> None: - for work_id in tasks: - # TODO: Resolving one handle_events here can block - # resolution of other tasks. This can happen when handle_events - # is slow. - # - # Instead of sequential await, a better option would be to await on - # list of async handle_events. This will allow all handlers to run - # concurrently without blocking each other. - try: - teardown = await asyncio.wait_for(tasks[work_id], DEFAULT_TIMEOUT) - if teardown: - self.cleanup(work_id) - except asyncio.TimeoutError: - self.cleanup(work_id) + assert len(tasks) > 0 + finished, self.unfinished = await asyncio.wait( + tasks, + timeout=DEFAULT_SELECTOR_SELECT_TIMEOUT, + return_when=asyncio.FIRST_COMPLETED, + ) + for f in finished: + if f.result(): + self.cleanup(int(f.get_name())) def fromfd(self, fileno: int) -> socket.socket: return socket.fromfd( @@ -174,17 +171,22 @@ def cleanup(self, work_id: int) -> None: del self.works[work_id] os.close(work_id) - def _prepare_tasks(self, work_by_ids: Dict[int, Tuple[Readables, Writables]]) -> Dict[int, Any]: - tasks = {} + def _prepare_tasks( + self, + work_by_ids: Dict[int, Tuple[Readables, Writables]], + ) -> Set[asyncio.Task]: + tasks: Set[asyncio.Task] = set() assert self.loop for work_id in work_by_ids: readables, writables = work_by_ids[work_id] - tasks[work_id] = self.loop.create_task( + task = self.loop.create_task( self.works[work_id].handle_events( readables, writables, ), ) + task.set_name(work_id) + tasks.add(task) return tasks def run_once(self) -> None: @@ -209,11 +211,14 @@ def run_once(self) -> None: if self.client_queue.fileno() in work_by_ids: self.accept_client() del work_by_ids[self.client_queue.fileno()] - self.loop.run_until_complete( - self.wait_for_tasks( - self._prepare_tasks(work_by_ids), - ), - ) + self.unfinished.update(self._prepare_tasks(work_by_ids)) + num_works = len(self.unfinished) + if num_works > 0: + logger.debug('Executing {0} works'.format(num_works)) + self.loop.run_until_complete(self.wait_for_tasks(self.unfinished)) + logger.debug('Done executing works, {0} pending'.format( + len(self.unfinished), + )) # Remove and shutdown inactive workers self.cleanup_inactive() From 40e38cb69355b2c3dd16b3f647fef3b1e7c4cded Mon Sep 17 00:00:00 2001 From: Abhinav Singh Date: Sat, 20 Nov 2021 23:48:40 +0530 Subject: [PATCH 03/54] mypy --- proxy/core/acceptor/local.py | 4 ++-- proxy/core/acceptor/threadless.py | 8 ++++---- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/proxy/core/acceptor/local.py b/proxy/core/acceptor/local.py index 104a72ddf8..f500cb6686 100644 --- a/proxy/core/acceptor/local.py +++ b/proxy/core/acceptor/local.py @@ -19,7 +19,7 @@ import multiprocessing.synchronize from multiprocessing import connection -from typing import Optional, Tuple, List, Dict, Generator +from typing import Optional, Tuple, List, Dict, Generator, Any from ...common.utils import is_threadless from ...common.types import Readables, Writables @@ -42,7 +42,7 @@ def __init__( idd: int, flags: argparse.Namespace, sock: socket.socket, - evq: queue.Queue, + evq: queue.Queue[Any], executor_queues: List[connection.Connection], executor_pids: List[int], executor_locks: List[multiprocessing.synchronize.Lock], diff --git a/proxy/core/acceptor/threadless.py b/proxy/core/acceptor/threadless.py index c4bb8fa112..baeb8dddf2 100644 --- a/proxy/core/acceptor/threadless.py +++ b/proxy/core/acceptor/threadless.py @@ -75,7 +75,7 @@ def __init__( self.works: Dict[int, Work] = {} self.selector: Optional[selectors.DefaultSelector] = None self.loop: Optional[asyncio.AbstractEventLoop] = None - self.unfinished: Set[asyncio.Task] = set() + self.unfinished: Set[asyncio.Task[bool]] = set() @contextlib.contextmanager def selected_events(self) -> Generator[ @@ -113,7 +113,7 @@ def selected_events(self) -> Generator[ # TODO: Use correct future typing annotations async def wait_for_tasks( self, - tasks: Set[asyncio.Task], + tasks: Set[asyncio.Task[bool]], ) -> None: assert len(tasks) > 0 finished, self.unfinished = await asyncio.wait( @@ -174,8 +174,8 @@ def cleanup(self, work_id: int) -> None: def _prepare_tasks( self, work_by_ids: Dict[int, Tuple[Readables, Writables]], - ) -> Set[asyncio.Task]: - tasks: Set[asyncio.Task] = set() + ) -> Set[asyncio.Task[bool]]: + tasks: Set[asyncio.Task[bool]] = set() assert self.loop for work_id in work_by_ids: readables, writables = work_by_ids[work_id] From e0fd4ded64579fdf1d5f6ba42fe5aa6eae4f4d10 Mon Sep 17 00:00:00 2001 From: Abhinav Singh Date: Sun, 21 Nov 2021 01:38:00 +0530 Subject: [PATCH 04/54] Add `helper/benchmark.sh` and fix threaded which must now use asyncio (reduced performance of threaded) --- README.md | 85 +++++++++++++++++++++---------- helper/benchmark.sh | 43 ++++++++++++++++ helper/monitor_open_files.sh | 2 +- proxy/core/acceptor/local.py | 2 +- proxy/core/acceptor/threadless.py | 3 +- proxy/http/handler.py | 7 +-- 6 files changed, 108 insertions(+), 34 deletions(-) create mode 100755 helper/benchmark.sh diff --git a/README.md b/README.md index 6da3423661..f4ff04ddea 100644 --- a/README.md +++ b/README.md @@ -109,6 +109,7 @@ - [Setup Local Environment](#setup-local-environment) - [Setup Git Hooks](#setup-git-hooks) - [Sending a Pull Request](#sending-a-pull-request) +- [Benchmarks](#benchmarks) - [Flags](#flags) - [Changelog](#changelog) - [v2.x](#v2x) @@ -126,36 +127,56 @@ ```console # On Macbook Pro 2019 / 2.4 GHz 8-Core Intel Core i9 / 32 GB RAM - ❯ hey -n 10000 -c 100 http://localhost:8899/http-route-example - - Summary: - Total: 0.3248 secs - Slowest: 0.1007 secs - Fastest: 0.0002 secs - Average: 0.0028 secs - Requests/sec: 30784.7958 - - Total data: 190000 bytes - Size/request: 19 bytes - - Response time histogram: - 0.000 [1] | - 0.010 [9533] |■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■ - 0.020 [384] |■■ - - Latency distribution: - 10% in 0.0004 secs - 25% in 0.0007 secs - 50% in 0.0013 secs - 75% in 0.0029 secs - 90% in 0.0057 secs - 95% in 0.0097 secs - 99% in 0.0185 secs - - Status code distribution: - [200] 10000 responses + ❯ ./helper/benchmark.sh + CONCURRENCY: 100 workers, TOTAL REQUESTS: 100000 req, QPS: 5000 req/sec, TIMEOUT: 1 sec + + Summary: + Total: 3.1560 secs + Slowest: 0.0375 secs + Fastest: 0.0006 secs + Average: 0.0031 secs + Requests/sec: 31685.9140 + + Total data: 1900000 bytes + Size/request: 19 bytes + + Response time histogram: + 0.001 [1] | + 0.004 [91680] |■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■ + 0.008 [7929] |■■■ + 0.012 [263] | + 0.015 [29] | + 0.019 [8] | + 0.023 [23] | + 0.026 [15] | + 0.030 [27] | + 0.034 [16] | + 0.037 [9] | + + + Latency distribution: + 10% in 0.0022 secs + 25% in 0.0025 secs + 50% in 0.0029 secs + 75% in 0.0034 secs + 90% in 0.0041 secs + 95% in 0.0048 secs + 99% in 0.0066 secs + + Details (average, fastest, slowest): + DNS+dialup: 0.0000 secs, 0.0006 secs, 0.0375 secs + DNS-lookup: 0.0000 secs, 0.0000 secs, 0.0000 secs + req write: 0.0000 secs, 0.0000 secs, 0.0046 secs + resp wait: 0.0030 secs, 0.0006 secs, 0.0320 secs + resp read: 0.0000 secs, 0.0000 secs, 0.0029 secs + + Status code distribution: + [200] 100000 responses ``` + PS: `proxy.py` and benchmark tools are running on the same machine during the above load test. + Checkout the repo and try it for yourself. See [Benchmarks](#benchmarks) for more details. + - Lightweight - Uses only `~5-20MB` RAM - No external dependency other than standard Python library @@ -1977,6 +1998,14 @@ Every pull request is tested using GitHub actions. See [GitHub workflow](https://github.com/abhinavsingh/proxy.py/tree/develop/.github/workflows) for list of tests. +# Benchmarks + +Simply run the following command from repo root to start benchmark + +```console +❯ ./helper/benchmark.sh +``` + # Flags ```console diff --git a/helper/benchmark.sh b/helper/benchmark.sh new file mode 100755 index 0000000000..f8bc64fc8d --- /dev/null +++ b/helper/benchmark.sh @@ -0,0 +1,43 @@ +#!/bin/bash +# +# proxy.py +# ~~~~~~~~ +# ⚡⚡⚡ Fast, Lightweight, Programmable, TLS interception capable +# proxy server for Application debugging, testing and development. +# +# :copyright: (c) 2013-present by Abhinav Singh and contributors. +# :license: BSD, see LICENSE for more details. +# +TIMEOUT=1 +QPS=5000 +CONCURRENCY=100 +MILLION=100000 +OPEN_FILE_LIMIT=65536 +BACKLOG=OPEN_FILE_LIMIT +PID_FILE=/tmp/proxy.pid + +ulimit -n $OPEN_FILE_LIMIT + +# time python -m \ +# proxy \ +# --enable-web-server \ +# --plugin proxy.plugin.WebServerPlugin \ +# --backlog $BACKLOG \ +# --open-file-limit $OPEN_FILE_LIMIT \ +# --pid-file $PID_FILE \ +# --log-file /dev/null + +PID=$(cat $PID_FILE) +if [[ -z "$PID" ]]; then + echo "Either pid file doesn't exist or no pid found in the pid file" + exit 1 +fi +ADDR=$(lsof -Pan -p $PID -i | grep -v COMMAND | awk '{ print $9 }') + +echo "CONCURRENCY: $CONCURRENCY workers, TOTAL REQUESTS: $MILLION req, QPS: $QPS req/sec, TIMEOUT: $TIMEOUT sec" +hey \ + -n $MILLION \ + -c $CONCURRENCY \ + -q $QPS \ + -t $TIMEOUT \ + http://$ADDR/http-route-example diff --git a/helper/monitor_open_files.sh b/helper/monitor_open_files.sh index 7bfa48c631..7a8caa0eb1 100755 --- a/helper/monitor_open_files.sh +++ b/helper/monitor_open_files.sh @@ -1,5 +1,5 @@ #!/bin/bash - +# # proxy.py # ~~~~~~~~ # ⚡⚡⚡ Fast, Lightweight, Programmable, TLS interception capable diff --git a/proxy/core/acceptor/local.py b/proxy/core/acceptor/local.py index f500cb6686..aa13e95c98 100644 --- a/proxy/core/acceptor/local.py +++ b/proxy/core/acceptor/local.py @@ -109,7 +109,7 @@ def run_once(self) -> bool: def run(self) -> None: self._selector = selectors.DefaultSelector() try: - while 1: + while True: if self.run_once(): break except KeyboardInterrupt: diff --git a/proxy/core/acceptor/threadless.py b/proxy/core/acceptor/threadless.py index baeb8dddf2..779dad95a7 100644 --- a/proxy/core/acceptor/threadless.py +++ b/proxy/core/acceptor/threadless.py @@ -118,7 +118,8 @@ async def wait_for_tasks( assert len(tasks) > 0 finished, self.unfinished = await asyncio.wait( tasks, - timeout=DEFAULT_SELECTOR_SELECT_TIMEOUT, + timeout=(self.flags.num_workers / 2) * + DEFAULT_SELECTOR_SELECT_TIMEOUT, return_when=asyncio.FIRST_COMPLETED, ) for f in finished: diff --git a/proxy/http/handler.py b/proxy/http/handler.py index 59b5c5294d..f3d2ac6c26 100644 --- a/proxy/http/handler.py +++ b/proxy/http/handler.py @@ -16,6 +16,7 @@ import time import errno import socket +import asyncio import logging import selectors import contextlib @@ -324,7 +325,7 @@ def run(self) -> None: 'between client and server connection, tearing down...', ) break - teardown = self._run_once() + teardown = asyncio.run(self._run_once()) if teardown: break except KeyboardInterrupt: # pragma: no cover @@ -378,9 +379,9 @@ def _selected_events(self) -> SelectedEventsGeneratorType: for fd in events: self.selector.unregister(fd) - def _run_once(self) -> bool: + async def _run_once(self) -> bool: with self._selected_events() as (readables, writables): - teardown = self.handle_events(readables, writables) + teardown = await self.handle_events(readables, writables) if teardown: return True return False From f5998dd460b153a5f96046832a04043adc7a2a6e Mon Sep 17 00:00:00 2001 From: Abhinav Singh Date: Sun, 21 Nov 2021 14:07:20 +0530 Subject: [PATCH 05/54] Print open file diff from `benchmark.sh` --- helper/benchmark.sh | 40 ++++++++++++++++++++++++++++++++++++---- 1 file changed, 36 insertions(+), 4 deletions(-) diff --git a/helper/benchmark.sh b/helper/benchmark.sh index f8bc64fc8d..4bc73c5acf 100755 --- a/helper/benchmark.sh +++ b/helper/benchmark.sh @@ -8,10 +8,33 @@ # :copyright: (c) 2013-present by Abhinav Singh and contributors. # :license: BSD, see LICENSE for more details. # +usage() { + echo "Usage: ./helper/benchmark.sh" + echo "You must run this script from proxy.py repo root." +} + +DIRNAME=$(dirname "$0") +if [ "$DIRNAME" != "./helper" ]; then + usage + exit 1 +fi + +BASENAME=$(basename "$0") +if [ "$BASENAME" != "benchmark.sh" ]; then + usage + exit 1 +fi + +PWD=$(pwd) +if [ $(basename $PWD) != "proxy.py" ]; then + usage + exit 1 +fi + TIMEOUT=1 -QPS=5000 +QPS=8000 CONCURRENCY=100 -MILLION=100000 +TOTAL_REQUESTS=1000000 OPEN_FILE_LIMIT=65536 BACKLOG=OPEN_FILE_LIMIT PID_FILE=/tmp/proxy.pid @@ -34,10 +57,19 @@ if [[ -z "$PID" ]]; then fi ADDR=$(lsof -Pan -p $PID -i | grep -v COMMAND | awk '{ print $9 }') -echo "CONCURRENCY: $CONCURRENCY workers, TOTAL REQUESTS: $MILLION req, QPS: $QPS req/sec, TIMEOUT: $TIMEOUT sec" +PRE_RUN_OPEN_FILES=$(./helper/monitor_open_files.sh) + +echo "CONCURRENCY: $CONCURRENCY workers, TOTAL REQUESTS: $TOTAL_REQUESTS req, QPS: $QPS req/sec, TIMEOUT: $TIMEOUT sec" hey \ - -n $MILLION \ + -n $TOTAL_REQUESTS \ -c $CONCURRENCY \ -q $QPS \ -t $TIMEOUT \ http://$ADDR/http-route-example + +POST_RUN_OPEN_FILES=$(./helper/monitor_open_files.sh) + +echo $output + +echo "Open files diff:" +diff <( echo "$PRE_RUN_OPEN_FILES" ) <( echo "$POST_RUN_OPEN_FILES" ) From d5ef5fdcb2638e86925f6d707f8c822b284854e5 Mon Sep 17 00:00:00 2001 From: Abhinav Singh Date: Sun, 21 Nov 2021 14:08:39 +0530 Subject: [PATCH 06/54] Add `--local-executor` flag, disabled by default for now until tests are updated --- proxy/common/constants.py | 1 + proxy/core/acceptor/acceptor.py | 78 ++++++++++++++++++++++++++++++--- 2 files changed, 73 insertions(+), 6 deletions(-) diff --git a/proxy/common/constants.py b/proxy/common/constants.py index bf5773f3f5..e679a2e8ef 100644 --- a/proxy/common/constants.py +++ b/proxy/common/constants.py @@ -100,6 +100,7 @@ def _env_threadless_compliant() -> bool: DEFAULT_STATIC_SERVER_DIR = os.path.join(PROXY_PY_DIR, "public") DEFAULT_MIN_COMPRESSION_LIMIT = 20 # In bytes DEFAULT_THREADLESS = _env_threadless_compliant() +DEFAULT_LOCAL_EXECUTOR = False DEFAULT_TIMEOUT = 10.0 DEFAULT_VERSION = False DEFAULT_HTTP_PORT = 80 diff --git a/proxy/core/acceptor/acceptor.py b/proxy/core/acceptor/acceptor.py index ef7b81902b..14b9a86886 100644 --- a/proxy/core/acceptor/acceptor.py +++ b/proxy/core/acceptor/acceptor.py @@ -18,23 +18,40 @@ import logging import argparse import selectors +import threading import multiprocessing import multiprocessing.synchronize from multiprocessing import connection from multiprocessing.reduction import recv_handle -from typing import List, Optional +from typing import List, Optional, Tuple +from ...common.flag import flags +from ...common.utils import is_threadless from ...common.logger import Logger +from ...common.constants import DEFAULT_LOCAL_EXECUTOR from ..event import EventQueue from .local import LocalExecutor +from .executors import ThreadlessPool logger = logging.getLogger(__name__) +flags.add_argument( + '--local-executor', + action='store_true', + default=DEFAULT_LOCAL_EXECUTOR, + help='Default: ' + ('True' if DEFAULT_LOCAL_EXECUTOR else 'False') + '. ' + + 'Disabled by default. When enabled acceptors will make use of ' + + 'local (same process) executor instead of distributing load across ' + + 'remote (other process) executors. Enable this option to achieve CPU affinity between ' + + 'acceptors and executors, instead of using underlying OS kernel scheduling algorithm.', +) + + class Acceptor(multiprocessing.Process): """Work acceptor process. @@ -100,6 +117,8 @@ def __init__( # File descriptor used to accept new work # Currently, a socket fd is assumed. self.sock: Optional[socket.socket] = None + # Internals + self._total: Optional[int] = None self._local: Optional[LocalExecutor] = None def run_once(self) -> None: @@ -113,9 +132,14 @@ def run_once(self) -> None: locked = True for _, mask in events: if mask & selectors.EVENT_READ: - if self.sock is not None and self._local is not None: + if self.sock is not None: conn, addr = self.sock.accept() - self._local.evq.put((conn, addr)) + if self.flags.local_executor: + assert self._local + self._local.evq.put((conn, addr)) + else: + addr = None if addr == '' else addr + self._work(conn, addr) except BlockingIOError: pass finally: @@ -139,7 +163,8 @@ def run(self) -> None: type=socket.SOCK_STREAM, ) try: - self._start_local() + if self.flags.local_executor: + self._start_local() self.selector.register(self.sock, selectors.EVENT_READ) while not self.running.is_set(): self.run_once() @@ -147,7 +172,8 @@ def run(self) -> None: pass finally: self.selector.unregister(self.sock) - self._stop_local() + if self.flags.local_executor: + self._stop_local() self.sock.close() logger.debug('Acceptor#%d shutdown', self.idd) @@ -163,10 +189,50 @@ def _start_local(self) -> None: self.executor_locks, self.event_queue, ) - self._local.setDaemon(True) + self._local.daemon = True self._local.start() def _stop_local(self) -> None: if self._local is not None: self._local.evq.put(False) self._local.join() + + def _work(self, conn: socket.socket, addr: Optional[Tuple[str, int]]) -> None: + self._total = self._total or 0 + if is_threadless(self.flags.threadless, self.flags.threaded): + # Index of worker to which this work should be dispatched + # Use round-robin strategy by default. + # + # By default all acceptors will start sending work to + # 1st workers. To randomize, we offset index by idd. + index = (self._total + self.idd) % self.flags.num_workers + thread = threading.Thread( + target=ThreadlessPool.delegate, + args=( + self.executor_pids[index], + self.executor_queues[index], + self.executor_locks[index], + conn, + addr, + self.flags.unix_socket_path, + ), + ) + thread.start() + logger.debug( + 'Dispatched work#{0}.{1} to worker#{2}'.format( + self.idd, self._total, index, + ), + ) + else: + _, thread = ThreadlessPool.start_threaded_work( + self.flags, + conn, addr, + event_queue=self.event_queue, + publisher_id=self.__class__.__name__, + ) + logger.debug( + 'Started work#{0}.{1} in thread#{2}'.format( + self.idd, self._total, thread.ident, + ), + ) + self._total += 1 From aad066b4deea1186c123bcabf4a0c77756574609 Mon Sep 17 00:00:00 2001 From: Abhinav Singh Date: Sun, 21 Nov 2021 14:09:28 +0530 Subject: [PATCH 07/54] Async `handle_readables` and `handle_writables` for `HttpProtocolHandlerPlugin` interface (doesnt impact proxy/web plugins for now) --- proxy/core/base/tcp_server.py | 8 ++++---- proxy/http/handler.py | 16 ++++++++-------- proxy/http/plugin.py | 4 ++-- proxy/http/proxy/server.py | 4 ++-- proxy/http/server/web.py | 4 ++-- 5 files changed, 18 insertions(+), 18 deletions(-) diff --git a/proxy/core/base/tcp_server.py b/proxy/core/base/tcp_server.py index 03e5b9f8be..2911d454de 100644 --- a/proxy/core/base/tcp_server.py +++ b/proxy/core/base/tcp_server.py @@ -85,9 +85,9 @@ async def handle_events( writables: Writables, ) -> bool: """Return True to shutdown work.""" - teardown = self.handle_writables( + teardown = await self.handle_writables( writables, - ) or self.handle_readables(readables) + ) or await self.handle_readables(readables) if teardown: logger.debug( 'Shutting down client {0} connection'.format( @@ -96,7 +96,7 @@ async def handle_events( ) return teardown - def handle_writables(self, writables: Writables) -> bool: + async def handle_writables(self, writables: Writables) -> bool: teardown = False if self.work.connection in writables and self.work.has_buffer(): logger.debug( @@ -109,7 +109,7 @@ def handle_writables(self, writables: Writables) -> bool: self.must_flush_before_shutdown = False return teardown - def handle_readables(self, readables: Readables) -> bool: + async def handle_readables(self, readables: Readables) -> bool: teardown = False if self.work.connection in readables: data = self.work.recv(self.flags.client_recvbuf_size) diff --git a/proxy/http/handler.py b/proxy/http/handler.py index f3d2ac6c26..7c462814dd 100644 --- a/proxy/http/handler.py +++ b/proxy/http/handler.py @@ -178,24 +178,24 @@ async def handle_events( ) -> bool: """Returns True if proxy must tear down.""" # Flush buffer for ready to write sockets - teardown = self.handle_writables(writables) + teardown = await self.handle_writables(writables) if teardown: return True # Invoke plugin.write_to_descriptors for plugin in self.plugins.values(): - teardown = plugin.write_to_descriptors(writables) + teardown = await plugin.write_to_descriptors(writables) if teardown: return True # Read from ready to read sockets - teardown = self.handle_readables(readables) + teardown = await self.handle_readables(readables) if teardown: return True # Invoke plugin.read_from_descriptors for plugin in self.plugins.values(): - teardown = plugin.read_from_descriptors(readables) + teardown = await plugin.read_from_descriptors(readables) if teardown: return True @@ -249,7 +249,7 @@ def handle_data(self, data: memoryview) -> Optional[bool]: return True return False - def handle_writables(self, writables: Writables) -> bool: + async def handle_writables(self, writables: Writables) -> bool: if self.work.connection in writables and self.work.has_buffer(): logger.debug('Client is ready for writes, flushing buffer') self.last_activity = time.time() @@ -266,7 +266,7 @@ def handle_writables(self, writables: Writables) -> bool: try: # Call super() for client flush - teardown = super().handle_writables(writables) + teardown = await super().handle_writables(writables) if teardown: return True except BrokenPipeError: @@ -279,12 +279,12 @@ def handle_writables(self, writables: Writables) -> bool: return True return False - def handle_readables(self, readables: Readables) -> bool: + async def handle_readables(self, readables: Readables) -> bool: if self.work.connection in readables: logger.debug('Client is ready for reads, reading') self.last_activity = time.time() try: - teardown = super().handle_readables(readables) + teardown = await super().handle_readables(readables) if teardown: return teardown except ssl.SSLWantReadError: # Try again later diff --git a/proxy/http/plugin.py b/proxy/http/plugin.py index f5d49ef192..56f02141a6 100644 --- a/proxy/http/plugin.py +++ b/proxy/http/plugin.py @@ -79,7 +79,7 @@ def get_descriptors( return [], [] # pragma: no cover @abstractmethod - def write_to_descriptors(self, w: Writables) -> bool: + async def write_to_descriptors(self, w: Writables) -> bool: """Implementations must now write/flush data over the socket. Note that buffer management is in-build into the connection classes. @@ -90,7 +90,7 @@ def write_to_descriptors(self, w: Writables) -> bool: return False # pragma: no cover @abstractmethod - def read_from_descriptors(self, r: Readables) -> bool: + async def read_from_descriptors(self, r: Readables) -> bool: """Implementations must now read data over the socket.""" return False # pragma: no cover diff --git a/proxy/http/proxy/server.py b/proxy/http/proxy/server.py index e7c1e9d34f..c2f79e03a1 100644 --- a/proxy/http/proxy/server.py +++ b/proxy/http/proxy/server.py @@ -218,7 +218,7 @@ def _close_and_release(self) -> bool: self.upstream = None return True - def write_to_descriptors(self, w: Writables) -> bool: + async def write_to_descriptors(self, w: Writables) -> bool: if (self.upstream and self.upstream.connection not in w) or not self.upstream: # Currently, we just call write/read block of each plugins. It is # plugins responsibility to ignore this callback, if passed descriptors @@ -251,7 +251,7 @@ def write_to_descriptors(self, w: Writables) -> bool: return self._close_and_release() return False - def read_from_descriptors(self, r: Readables) -> bool: + async def read_from_descriptors(self, r: Readables) -> bool: if ( self.upstream and not self.upstream.closed and diff --git a/proxy/http/server/web.py b/proxy/http/server/web.py index d9ff773d88..194e77e591 100644 --- a/proxy/http/server/web.py +++ b/proxy/http/server/web.py @@ -245,14 +245,14 @@ def get_descriptors( w.extend(w1) return r, w - def write_to_descriptors(self, w: Writables) -> bool: + async def write_to_descriptors(self, w: Writables) -> bool: for plugin in self.plugins.values(): teardown = plugin.write_to_descriptors(w) if teardown: return True return False - def read_from_descriptors(self, r: Readables) -> bool: + async def read_from_descriptors(self, r: Readables) -> bool: for plugin in self.plugins.values(): teardown = plugin.read_from_descriptors(r) if teardown: From b25b0413b5278054f920f70c7285eeb23eb76bc1 Mon Sep 17 00:00:00 2001 From: Abhinav Singh Date: Sun, 21 Nov 2021 15:19:22 +0530 Subject: [PATCH 08/54] Async `get_events` --- examples/web_scraper.py | 2 +- helper/benchmark.sh | 4 ++- proxy/core/acceptor/acceptor.py | 13 ++++--- proxy/core/acceptor/local.py | 60 ++----------------------------- proxy/core/acceptor/threadless.py | 19 +++++----- proxy/core/acceptor/work.py | 2 +- proxy/core/base/tcp_server.py | 8 +++-- proxy/core/base/tcp_tunnel.py | 4 +-- proxy/http/handler.py | 31 +++++++--------- 9 files changed, 43 insertions(+), 100 deletions(-) diff --git a/examples/web_scraper.py b/examples/web_scraper.py index 1b18ef4b91..9bc348f376 100644 --- a/examples/web_scraper.py +++ b/examples/web_scraper.py @@ -40,7 +40,7 @@ class WebScraper(Work): only PUBSUB protocol. """ - def get_events(self) -> Dict[socket.socket, int]: + async def get_events(self) -> Dict[socket.socket, int]: """Return sockets and events (read or write) that we are interested in.""" return {} diff --git a/helper/benchmark.sh b/helper/benchmark.sh index 4bc73c5acf..b012e9d0ad 100755 --- a/helper/benchmark.sh +++ b/helper/benchmark.sh @@ -34,7 +34,7 @@ fi TIMEOUT=1 QPS=8000 CONCURRENCY=100 -TOTAL_REQUESTS=1000000 +TOTAL_REQUESTS=100000 OPEN_FILE_LIMIT=65536 BACKLOG=OPEN_FILE_LIMIT PID_FILE=/tmp/proxy.pid @@ -73,3 +73,5 @@ echo $output echo "Open files diff:" diff <( echo "$PRE_RUN_OPEN_FILES" ) <( echo "$POST_RUN_OPEN_FILES" ) + +# while true; do netstat -ant | grep .8899 | awk '{print $6}' | sort | uniq -c | sort -n; sleep 1; done diff --git a/proxy/core/acceptor/acceptor.py b/proxy/core/acceptor/acceptor.py index 14b9a86886..c11bf05ff2 100644 --- a/proxy/core/acceptor/acceptor.py +++ b/proxy/core/acceptor/acceptor.py @@ -134,6 +134,8 @@ def run_once(self) -> None: if mask & selectors.EVENT_READ: if self.sock is not None: conn, addr = self.sock.accept() + logging.debug( + 'Accepting new work#{0}'.format(conn.fileno())) if self.flags.local_executor: assert self._local self._local.evq.put((conn, addr)) @@ -219,20 +221,21 @@ def _work(self, conn: socket.socket, addr: Optional[Tuple[str, int]]) -> None: ) thread.start() logger.debug( - 'Dispatched work#{0}.{1} to worker#{2}'.format( - self.idd, self._total, index, + 'Dispatched work#{0}.{1}.{2} to worker#{3}'.format( + conn.fileno(), self.idd, self._total, index, ), ) else: _, thread = ThreadlessPool.start_threaded_work( self.flags, - conn, addr, + conn, + addr, event_queue=self.event_queue, publisher_id=self.__class__.__name__, ) logger.debug( - 'Started work#{0}.{1} in thread#{2}'.format( - self.idd, self._total, thread.ident, + 'Started work#{0}.{1}.{2} in thread#{3}'.format( + conn.fileno(), self.idd, self._total, thread.ident, ), ) self._total += 1 diff --git a/proxy/core/acceptor/local.py b/proxy/core/acceptor/local.py index aa13e95c98..3017053c53 100644 --- a/proxy/core/acceptor/local.py +++ b/proxy/core/acceptor/local.py @@ -15,18 +15,14 @@ import argparse import selectors import threading -import contextlib import multiprocessing.synchronize from multiprocessing import connection -from typing import Optional, Tuple, List, Dict, Generator, Any +from typing import Optional, Tuple, List, Dict, Any from ...common.utils import is_threadless -from ...common.types import Readables, Writables -from ...common.constants import DEFAULT_SELECTOR_SELECT_TIMEOUT -from ..event import EventQueue, eventNames -from ..connection import TcpClientConnection +from ..event import EventQueue from .executors import ThreadlessPool from .work import Work @@ -63,36 +59,6 @@ def __init__( self._selector: Optional[selectors.DefaultSelector] = None self._works: Dict[int, Work] = {} - @contextlib.contextmanager - def selected_events(self) -> Generator[ - Tuple[Readables, Writables], - None, None, - ]: - assert self._selector is not None - events: Dict[socket.socket, int] = {} - for work in self._works.values(): - worker_events = work.get_events() - events.update(worker_events) - for fd in worker_events: - # Can throw ValueError: Invalid file descriptor: -1 - # - # A guard within Work classes may not help here due to - # asynchronous nature. Hence, threadless will handle - # ValueError exceptions raised by selector.register - # for invalid fd. - self._selector.register(fd, worker_events[fd]) - ev = self._selector.select(timeout=DEFAULT_SELECTOR_SELECT_TIMEOUT) - readables = [] - writables = [] - for key, mask in ev: - if mask & selectors.EVENT_READ: - readables.append(key.fileobj) - if mask & selectors.EVENT_WRITE: - writables.append(key.fileobj) - yield (readables, writables) - for fd in events: - self._selector.unregister(fd) - def run_once(self) -> bool: try: payload = self.evq.get(block=True, timeout=0.1) @@ -115,29 +81,7 @@ def run(self) -> None: except KeyboardInterrupt: pass - def work(self, conn: socket.socket, addr: Optional[Tuple[str, int]]) -> None: - fileno = conn.fileno() - self._works[fileno] = self.flags.work_klass( - TcpClientConnection(conn=conn, addr=addr), - flags=self.flags, - event_queue=self.event_queue, - ) - self._works[fileno].publish_event( - event_name=eventNames.WORK_STARTED, - event_payload={'fileno': fileno, 'addr': addr}, - publisher_id=self.__class__.__name__, - ) - try: - self._works[fileno].initialize() - except Exception as e: - logger.exception( - 'Exception occurred during initialization', - exc_info=e, - ) - self._cleanup(fileno) - def _cleanup(self, work_id: int) -> None: - # TODO: HttpProtocolHandler.shutdown can call flush which may block self._works[work_id].shutdown() del self._works[work_id] os.close(work_id) diff --git a/proxy/core/acceptor/threadless.py b/proxy/core/acceptor/threadless.py index 779dad95a7..5f54676069 100644 --- a/proxy/core/acceptor/threadless.py +++ b/proxy/core/acceptor/threadless.py @@ -23,7 +23,7 @@ from multiprocessing import connection from multiprocessing.reduction import recv_handle -from typing import Dict, Optional, Tuple, List, Generator, Set +from typing import Dict, Optional, Tuple, List, AsyncGenerator, Set from .work import Work @@ -77,16 +77,15 @@ def __init__( self.loop: Optional[asyncio.AbstractEventLoop] = None self.unfinished: Set[asyncio.Task[bool]] = set() - @contextlib.contextmanager - def selected_events(self) -> Generator[ - Dict[int, Tuple[Readables, Writables]], - None, None, + @contextlib.asynccontextmanager + async def selected_events(self) -> AsyncGenerator[ + Dict[int, Tuple[Readables, Writables]], None, ]: assert self.selector is not None events: Dict[socket.socket, int] = {} for work_id in self.works: work = self.works[work_id] - worker_events = work.get_events() + worker_events = await work.get_events() events.update(worker_events) for fd in worker_events: # Can throw ValueError: Invalid file descriptor: -1 @@ -190,9 +189,9 @@ def _prepare_tasks( tasks.add(task) return tasks - def run_once(self) -> None: + async def run_once(self) -> None: assert self.loop is not None - with self.selected_events() as work_by_ids: + async with self.selected_events() as work_by_ids: if len(work_by_ids) == 0: # Remove and shutdown inactive connections self.cleanup_inactive() @@ -216,7 +215,7 @@ def run_once(self) -> None: num_works = len(self.unfinished) if num_works > 0: logger.debug('Executing {0} works'.format(num_works)) - self.loop.run_until_complete(self.wait_for_tasks(self.unfinished)) + await self.wait_for_tasks(self.unfinished) logger.debug('Done executing works, {0} pending'.format( len(self.unfinished), )) @@ -238,7 +237,7 @@ def run(self) -> None: self.loop = asyncio.get_event_loop_policy().get_event_loop() while not self.running.is_set(): # logger.debug('Working on {0} works'.format(len(self.works))) - self.run_once() + self.loop.run_until_complete(self.run_once()) except KeyboardInterrupt: pass finally: diff --git a/proxy/core/acceptor/work.py b/proxy/core/acceptor/work.py index 486133164a..d50fc9e0e7 100644 --- a/proxy/core/acceptor/work.py +++ b/proxy/core/acceptor/work.py @@ -43,7 +43,7 @@ def __init__( self.work = work @abstractmethod - def get_events(self) -> Dict[socket.socket, int]: + async def get_events(self) -> Dict[socket.socket, int]: """Return sockets and events (read or write) that we are interested in.""" return {} # pragma: no cover diff --git a/proxy/core/base/tcp_server.py b/proxy/core/base/tcp_server.py index 2911d454de..b11644d683 100644 --- a/proxy/core/base/tcp_server.py +++ b/proxy/core/base/tcp_server.py @@ -52,11 +52,13 @@ def __init__(self, *args: Any, **kwargs: Any) -> None: self.must_flush_before_shutdown = False if self.flags.unix_socket_path: # pragma: no cover logger.debug( - 'Connection accepted from {0}'.format(self.work.address), + 'Work#{0} accepted from {1}'.format( + self.work.connection.fileno(), self.work.address), ) else: logger.debug( - 'Connection accepted from {0}'.format(self.work.address), + 'Work#{0} accepted from {1}'.format( + self.work.connection.fileno(), self.work.address), ) @abstractmethod @@ -64,7 +66,7 @@ def handle_data(self, data: memoryview) -> Optional[bool]: """Optionally return True to close client connection.""" pass # pragma: no cover - def get_events(self) -> Dict[socket.socket, int]: + async def get_events(self) -> Dict[socket.socket, int]: events = {} # We always want to read from client # Register for EVENT_READ events diff --git a/proxy/core/base/tcp_tunnel.py b/proxy/core/base/tcp_tunnel.py index 5eaf1cecc5..ec9b1ccb7a 100644 --- a/proxy/core/base/tcp_tunnel.py +++ b/proxy/core/base/tcp_tunnel.py @@ -65,9 +65,9 @@ def shutdown(self) -> None: self.upstream.close() super().shutdown() - def get_events(self) -> Dict[socket.socket, int]: + async def get_events(self) -> Dict[socket.socket, int]: # Get default client events - ev: Dict[socket.socket, int] = super().get_events() + ev: Dict[socket.socket, int] = await super().get_events() # Read from server if we are connected if self.upstream and self.upstream._conn is not None: ev[self.upstream.connection] = selectors.EVENT_READ diff --git a/proxy/http/handler.py b/proxy/http/handler.py index 7c462814dd..9d59f26220 100644 --- a/proxy/http/handler.py +++ b/proxy/http/handler.py @@ -21,7 +21,7 @@ import selectors import contextlib -from typing import Tuple, List, Union, Optional, Generator, Dict, Any +from typing import Tuple, List, Union, Optional, AsyncGenerator, Dict, Any from .plugin import HttpProtocolHandlerPlugin from .parser import HttpParser, httpParserStates, httpParserTypes @@ -65,9 +65,9 @@ 'data sent or received by the client.', ) -SelectedEventsGeneratorType = Generator[ +SelectedEventsGeneratorType = AsyncGenerator[ Tuple[Readables, Writables], - None, None, + None, ] @@ -125,19 +125,16 @@ def shutdown(self) -> None: # Flush pending buffer in threaded mode only. # For threadless mode, BaseTcpServerHandler implements # the must_flush_before_shutdown logic automagically. - if self.selector: + if self.selector and self.work.has_buffer(): self._flush() - # Invoke plugin.on_client_connection_close for plugin in self.plugins.values(): plugin.on_client_connection_close() - logger.debug( 'Closing client connection %r ' 'at address %s has buffer %s' % (self.work.connection, self.work.address, self.work.has_buffer()), ) - conn = self.work.connection # Unwrap if wrapped before shutdown. if self._encryption_enabled() and \ @@ -152,9 +149,9 @@ def shutdown(self) -> None: logger.debug('Client connection closed') super().shutdown() - def get_events(self) -> Dict[socket.socket, int]: + async def get_events(self) -> Dict[socket.socket, int]: # Get default client events - events: Dict[socket.socket, int] = super().get_events() + events: Dict[socket.socket, int] = await super().get_events() # HttpProtocolHandlerPlugin.get_descriptors for plugin in self.plugins.values(): plugin_read_desc, plugin_write_desc = plugin.get_descriptors() @@ -181,24 +178,20 @@ async def handle_events( teardown = await self.handle_writables(writables) if teardown: return True - # Invoke plugin.write_to_descriptors for plugin in self.plugins.values(): teardown = await plugin.write_to_descriptors(writables) if teardown: return True - # Read from ready to read sockets teardown = await self.handle_readables(readables) if teardown: return True - # Invoke plugin.read_from_descriptors for plugin in self.plugins.values(): teardown = await plugin.read_from_descriptors(readables) if teardown: return True - return False def handle_data(self, data: memoryview) -> Optional[bool]: @@ -361,10 +354,10 @@ def _optionally_wrap_socket( conn = wrap_socket(conn, self.flags.keyfile, self.flags.certfile) return conn - @contextlib.contextmanager - def _selected_events(self) -> SelectedEventsGeneratorType: + @contextlib.asynccontextmanager + async def _selected_events(self) -> SelectedEventsGeneratorType: assert self.selector - events = self.get_events() + events = await self.get_events() for fd in events: self.selector.register(fd, events[fd]) ev = self.selector.select(timeout=DEFAULT_SELECTOR_SELECT_TIMEOUT) @@ -380,7 +373,7 @@ def _selected_events(self) -> SelectedEventsGeneratorType: self.selector.unregister(fd) async def _run_once(self) -> bool: - with self._selected_events() as (readables, writables): + async with self._selected_events() as (readables, writables): teardown = await self.handle_events(readables, writables) if teardown: return True @@ -388,14 +381,14 @@ async def _run_once(self) -> bool: def _flush(self) -> None: assert self.selector - if not self.work.has_buffer(): - return + logger.debug('Flushing pending data') try: self.selector.register( self.work.connection, selectors.EVENT_WRITE, ) while self.work.has_buffer(): + logging.debug('Waiting for client read ready') ev: List[ Tuple[selectors.SelectorKey, int] ] = self.selector.select(timeout=DEFAULT_SELECTOR_SELECT_TIMEOUT) From f0042dee49b879c68bf5fce1230dffede03915c0 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Sun, 21 Nov 2021 10:07:09 +0000 Subject: [PATCH 09/54] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- proxy/core/acceptor/acceptor.py | 3 ++- proxy/core/acceptor/threadless.py | 8 +++++--- proxy/core/base/tcp_server.py | 6 ++++-- 3 files changed, 11 insertions(+), 6 deletions(-) diff --git a/proxy/core/acceptor/acceptor.py b/proxy/core/acceptor/acceptor.py index c11bf05ff2..e5aba2f35a 100644 --- a/proxy/core/acceptor/acceptor.py +++ b/proxy/core/acceptor/acceptor.py @@ -135,7 +135,8 @@ def run_once(self) -> None: if self.sock is not None: conn, addr = self.sock.accept() logging.debug( - 'Accepting new work#{0}'.format(conn.fileno())) + 'Accepting new work#{0}'.format(conn.fileno()), + ) if self.flags.local_executor: assert self._local self._local.evq.put((conn, addr)) diff --git a/proxy/core/acceptor/threadless.py b/proxy/core/acceptor/threadless.py index 5f54676069..a3e2e9f48e 100644 --- a/proxy/core/acceptor/threadless.py +++ b/proxy/core/acceptor/threadless.py @@ -216,9 +216,11 @@ async def run_once(self) -> None: if num_works > 0: logger.debug('Executing {0} works'.format(num_works)) await self.wait_for_tasks(self.unfinished) - logger.debug('Done executing works, {0} pending'.format( - len(self.unfinished), - )) + logger.debug( + 'Done executing works, {0} pending'.format( + len(self.unfinished), + ), + ) # Remove and shutdown inactive workers self.cleanup_inactive() diff --git a/proxy/core/base/tcp_server.py b/proxy/core/base/tcp_server.py index b11644d683..2aa0f0574d 100644 --- a/proxy/core/base/tcp_server.py +++ b/proxy/core/base/tcp_server.py @@ -53,12 +53,14 @@ def __init__(self, *args: Any, **kwargs: Any) -> None: if self.flags.unix_socket_path: # pragma: no cover logger.debug( 'Work#{0} accepted from {1}'.format( - self.work.connection.fileno(), self.work.address), + self.work.connection.fileno(), self.work.address, + ), ) else: logger.debug( 'Work#{0} accepted from {1}'.format( - self.work.connection.fileno(), self.work.address), + self.work.connection.fileno(), self.work.address, + ), ) @abstractmethod From 2d311a610d329a99fc0df2bb57e0a8273275d99b Mon Sep 17 00:00:00 2001 From: Abhinav Singh Date: Sun, 21 Nov 2021 19:46:17 +0530 Subject: [PATCH 10/54] Address tests after async changes --- proxy/common/flag.py | 10 +- tests/assertions.py | 26 ++ .../exceptions/test_http_proxy_auth_failed.py | 62 ++- tests/http/test_http_proxy.py | 50 +-- .../http/test_http_proxy_tls_interception.py | 60 ++- tests/http/test_protocol_handler.py | 279 +++++++------ tests/http/test_web_server.py | 391 ++++++++++-------- tests/plugin/test_http_proxy_plugins.py | 113 +++-- ...ttp_proxy_plugins_with_tls_interception.py | 132 +++--- 9 files changed, 601 insertions(+), 522 deletions(-) create mode 100644 tests/assertions.py diff --git a/proxy/common/flag.py b/proxy/common/flag.py index 1c52438149..14348e663a 100644 --- a/proxy/common/flag.py +++ b/proxy/common/flag.py @@ -333,9 +333,17 @@ def initialize( ) args.pid_file = cast( Optional[str], opts.get( - 'pid_file', args.pid_file, + 'pid_file', + args.pid_file, ), ) + args.local_executor = cast( + bool, + opts.get( + 'local_executor', + args.local_executor, + ) + ) args.proxy_py_data_dir = DEFAULT_DATA_DIRECTORY_PATH os.makedirs(args.proxy_py_data_dir, exist_ok=True) diff --git a/tests/assertions.py b/tests/assertions.py new file mode 100644 index 0000000000..85d6ca6f3f --- /dev/null +++ b/tests/assertions.py @@ -0,0 +1,26 @@ +# -*- coding: utf-8 -*- +""" + proxy.py + ~~~~~~~~ + ⚡⚡⚡ Fast, Lightweight, Pluggable, TLS interception capable proxy server focused on + Network monitoring, controls & Application development, testing, debugging. + + :copyright: (c) 2013-present by Abhinav Singh and contributors. + :license: BSD, see LICENSE for more details. +""" +from typing import Any + + +class Assertions: + + def assertTrue(self, obj: Any) -> None: + assert obj + + def assertFalse(self, obj: Any) -> None: + assert not obj + + def assertEqual(self, obj1: Any, obj2: Any) -> None: + assert obj1 == obj2 + + def assertNotEqual(self, obj1: Any, obj2: Any) -> None: + assert obj1 != obj2 diff --git a/tests/http/exceptions/test_http_proxy_auth_failed.py b/tests/http/exceptions/test_http_proxy_auth_failed.py index a65fc045ff..6fa9606c0b 100644 --- a/tests/http/exceptions/test_http_proxy_auth_failed.py +++ b/tests/http/exceptions/test_http_proxy_auth_failed.py @@ -8,9 +8,10 @@ :copyright: (c) 2013-present by Abhinav Singh and contributors. :license: BSD, see LICENSE for more details. """ +import pytest import selectors -import unittest -from unittest import mock + +from pytest_mock import MockerFixture from proxy.common.flag import FlagParser from proxy.http.exception.proxy_auth_failed import ProxyAuthenticationFailed @@ -18,33 +19,32 @@ from proxy.core.connection import TcpClientConnection from proxy.common.utils import build_http_request +from ...assertions import Assertions + -class TestHttpProxyAuthFailed(unittest.TestCase): +class TestHttpProxyAuthFailed(Assertions): - @mock.patch('selectors.DefaultSelector') - @mock.patch('socket.fromfd') - def setUp( - self, - mock_fromfd: mock.Mock, - mock_selector: mock.Mock, - ) -> None: - self.mock_fromfd = mock_fromfd - self.mock_selector = mock_selector + @pytest.fixture(autouse=True) + def setUp(self, mocker: MockerFixture) -> None: + self.mock_fromfd = mocker.patch('socket.fromfd') + self.mock_selector = mocker.patch('selectors.DefaultSelector') + self.mock_server_conn = mocker.patch( + 'proxy.http.proxy.server.TcpServerConnection') self.fileno = 10 self._addr = ('127.0.0.1', 54382) self.flags = FlagParser.initialize( ["--basic-auth", "user:pass"], threaded=True, ) - self._conn = mock_fromfd.return_value + self._conn = self.mock_fromfd.return_value self.protocol_handler = HttpProtocolHandler( TcpClientConnection(self._conn, self._addr), flags=self.flags, ) self.protocol_handler.initialize() - @mock.patch('proxy.http.proxy.server.TcpServerConnection') - def test_proxy_auth_fails_without_cred(self, mock_server_conn: mock.Mock) -> None: + @pytest.mark.asyncio + async def test_proxy_auth_fails_without_cred(self) -> None: self._conn.recv.return_value = build_http_request( b'GET', b'http://upstream.host/not-found.html', headers={ @@ -62,17 +62,16 @@ def test_proxy_auth_fails_without_cred(self, mock_server_conn: mock.Mock) -> Non selectors.EVENT_READ, )], ] - - self.protocol_handler._run_once() - mock_server_conn.assert_not_called() + await self.protocol_handler._run_once() + self.mock_server_conn.assert_not_called() self.assertEqual(self.protocol_handler.work.has_buffer(), True) self.assertEqual( self.protocol_handler.work.buffer[0], ProxyAuthenticationFailed.RESPONSE_PKT, ) self._conn.send.assert_not_called() - @mock.patch('proxy.http.proxy.server.TcpServerConnection') - def test_proxy_auth_fails_with_invalid_cred(self, mock_server_conn: mock.Mock) -> None: + @pytest.mark.asyncio + async def test_proxy_auth_fails_with_invalid_cred(self) -> None: self._conn.recv.return_value = build_http_request( b'GET', b'http://upstream.host/not-found.html', headers={ @@ -91,17 +90,16 @@ def test_proxy_auth_fails_with_invalid_cred(self, mock_server_conn: mock.Mock) - selectors.EVENT_READ, )], ] - - self.protocol_handler._run_once() - mock_server_conn.assert_not_called() + await self.protocol_handler._run_once() + self.mock_server_conn.assert_not_called() self.assertEqual(self.protocol_handler.work.has_buffer(), True) self.assertEqual( self.protocol_handler.work.buffer[0], ProxyAuthenticationFailed.RESPONSE_PKT, ) self._conn.send.assert_not_called() - @mock.patch('proxy.http.proxy.server.TcpServerConnection') - def test_proxy_auth_works_with_valid_cred(self, mock_server_conn: mock.Mock) -> None: + @pytest.mark.asyncio + async def test_proxy_auth_works_with_valid_cred(self) -> None: self._conn.recv.return_value = build_http_request( b'GET', b'http://upstream.host/not-found.html', headers={ @@ -120,13 +118,12 @@ def test_proxy_auth_works_with_valid_cred(self, mock_server_conn: mock.Mock) -> selectors.EVENT_READ, )], ] - - self.protocol_handler._run_once() - mock_server_conn.assert_called_once() + await self.protocol_handler._run_once() + self.mock_server_conn.assert_called_once() self.assertEqual(self.protocol_handler.work.has_buffer(), False) - @mock.patch('proxy.http.proxy.server.TcpServerConnection') - def test_proxy_auth_works_with_mixed_case_basic_string(self, mock_server_conn: mock.Mock) -> None: + @pytest.mark.asyncio + async def test_proxy_auth_works_with_mixed_case_basic_string(self) -> None: self._conn.recv.return_value = build_http_request( b'GET', b'http://upstream.host/not-found.html', headers={ @@ -145,7 +142,6 @@ def test_proxy_auth_works_with_mixed_case_basic_string(self, mock_server_conn: m selectors.EVENT_READ, )], ] - - self.protocol_handler._run_once() - mock_server_conn.assert_called_once() + await self.protocol_handler._run_once() + self.mock_server_conn.assert_called_once() self.assertEqual(self.protocol_handler.work.has_buffer(), False) diff --git a/tests/http/test_http_proxy.py b/tests/http/test_http_proxy.py index fde5da11b0..058159b4bd 100644 --- a/tests/http/test_http_proxy.py +++ b/tests/http/test_http_proxy.py @@ -8,9 +8,10 @@ :copyright: (c) 2013-present by Abhinav Singh and contributors. :license: BSD, see LICENSE for more details. """ -import unittest +import pytest import selectors -from unittest import mock + +from pytest_mock import MockerFixture from proxy.common.constants import DEFAULT_HTTP_PORT from proxy.common.flag import FlagParser @@ -21,27 +22,24 @@ from proxy.common.utils import build_http_request -class TestHttpProxyPlugin(unittest.TestCase): +class TestHttpProxyPlugin: - @mock.patch('selectors.DefaultSelector') - @mock.patch('socket.fromfd') - def setUp( - self, - mock_fromfd: mock.Mock, - mock_selector: mock.Mock, - ) -> None: - self.mock_fromfd = mock_fromfd - self.mock_selector = mock_selector + @pytest.fixture(autouse=True) + def setUp(self, mocker: MockerFixture) -> None: + self.mock_server_conn = mocker.patch( + 'proxy.http.proxy.server.TcpServerConnection') + self.mock_selector = mocker.patch('selectors.DefaultSelector') + self.mock_fromfd = mocker.patch('socket.fromfd') self.fileno = 10 self._addr = ('127.0.0.1', 54382) self.flags = FlagParser.initialize(threaded=True) - self.plugin = mock.MagicMock() + self.plugin = mocker.MagicMock() self.flags.plugins = { b'HttpProtocolHandlerPlugin': [HttpProxyPlugin], b'HttpProxyBasePlugin': [self.plugin], } - self._conn = mock_fromfd.return_value + self._conn = self.mock_fromfd.return_value self.protocol_handler = HttpProtocolHandler( TcpClientConnection(self._conn, self._addr), flags=self.flags, @@ -51,11 +49,8 @@ def setUp( def test_proxy_plugin_initialized(self) -> None: self.plugin.assert_called() - @mock.patch('proxy.http.proxy.server.TcpServerConnection') - def test_proxy_plugin_on_and_before_upstream_connection( - self, - mock_server_conn: mock.Mock, - ) -> None: + @pytest.mark.asyncio + async def test_proxy_plugin_on_and_before_upstream_connection(self) -> None: self.plugin.return_value.write_to_descriptors.return_value = False self.plugin.return_value.read_from_descriptors.return_value = False self.plugin.return_value.before_upstream_connection.side_effect = lambda r: r @@ -80,16 +75,15 @@ def test_proxy_plugin_on_and_before_upstream_connection( )], ] - self.protocol_handler._run_once() - mock_server_conn.assert_called_with('upstream.host', DEFAULT_HTTP_PORT) + await self.protocol_handler._run_once() + + self.mock_server_conn.assert_called_with( + 'upstream.host', DEFAULT_HTTP_PORT) self.plugin.return_value.before_upstream_connection.assert_called() self.plugin.return_value.handle_client_request.assert_called() - @mock.patch('proxy.http.proxy.server.TcpServerConnection') - def test_proxy_plugin_before_upstream_connection_can_teardown( - self, - mock_server_conn: mock.Mock, - ) -> None: + @pytest.mark.asyncio + async def test_proxy_plugin_before_upstream_connection_can_teardown(self) -> None: self.plugin.return_value.write_to_descriptors.return_value = False self.plugin.return_value.read_from_descriptors.return_value = False self.plugin.return_value.before_upstream_connection.side_effect = HttpProtocolException() @@ -112,8 +106,8 @@ def test_proxy_plugin_before_upstream_connection_can_teardown( )], ] - self.protocol_handler._run_once() - mock_server_conn.assert_not_called() + await self.protocol_handler._run_once() + self.mock_server_conn.assert_not_called() self.plugin.return_value.before_upstream_connection.assert_called() def test_proxy_plugin_plugins_can_teardown_from_write_to_descriptors(self) -> None: diff --git a/tests/http/test_http_proxy_tls_interception.py b/tests/http/test_http_proxy_tls_interception.py index dda98a925e..e22acb494c 100644 --- a/tests/http/test_http_proxy_tls_interception.py +++ b/tests/http/test_http_proxy_tls_interception.py @@ -8,13 +8,14 @@ :copyright: (c) 2013-present by Abhinav Singh and contributors. :license: BSD, see LICENSE for more details. """ +import ssl import uuid -import unittest import socket -import ssl +import pytest import selectors from typing import Any +from pytest_mock import MockerFixture from unittest import mock from proxy.common.constants import DEFAULT_CA_FILE @@ -24,39 +25,26 @@ from proxy.common.utils import build_http_request, bytes_ from proxy.common.flag import FlagParser +from ..assertions import Assertions + + +class TestHttpProxyTlsInterception(Assertions): -class TestHttpProxyTlsInterception(unittest.TestCase): - - @mock.patch('ssl.wrap_socket') - @mock.patch('ssl.create_default_context') - @mock.patch('proxy.http.proxy.server.TcpServerConnection') - @mock.patch('proxy.http.proxy.server.gen_public_key') - @mock.patch('proxy.http.proxy.server.gen_csr') - @mock.patch('proxy.http.proxy.server.sign_csr') - @mock.patch('selectors.DefaultSelector') - @mock.patch('socket.fromfd') - def test_e2e( - self, - mock_fromfd: mock.Mock, - mock_selector: mock.Mock, - mock_sign_csr: mock.Mock, - mock_gen_csr: mock.Mock, - mock_gen_public_key: mock.Mock, - mock_server_conn: mock.Mock, - mock_ssl_context: mock.Mock, - mock_ssl_wrap: mock.Mock, - ) -> None: + @pytest.mark.asyncio + async def test_e2e(self, mocker: MockerFixture) -> None: host, port = uuid.uuid4().hex, 443 netloc = '{0}:{1}'.format(host, port) - self.mock_fromfd = mock_fromfd - self.mock_selector = mock_selector - self.mock_sign_csr = mock_sign_csr - self.mock_gen_csr = mock_gen_csr - self.mock_gen_public_key = mock_gen_public_key - self.mock_server_conn = mock_server_conn - self.mock_ssl_context = mock_ssl_context - self.mock_ssl_wrap = mock_ssl_wrap + self.mock_fromfd = mocker.patch('socket.fromfd') + self.mock_selector = mocker.patch('selectors.DefaultSelector') + self.mock_sign_csr = mocker.patch('proxy.http.proxy.server.sign_csr') + self.mock_gen_csr = mocker.patch('proxy.http.proxy.server.gen_csr') + self.mock_gen_public_key = mocker.patch( + 'proxy.http.proxy.server.gen_public_key') + self.mock_server_conn = mocker.patch( + 'proxy.http.proxy.server.TcpServerConnection') + self.mock_ssl_context = mocker.patch('ssl.create_default_context') + self.mock_ssl_wrap = mocker.patch('ssl.wrap_socket') self.mock_sign_csr.return_value = True self.mock_gen_csr.return_value = True @@ -95,7 +83,7 @@ def mock_connection() -> Any: b'HttpProtocolHandlerPlugin': [self.plugin, HttpProxyPlugin], b'HttpProxyBasePlugin': [self.proxy_plugin], } - self._conn = mock_fromfd.return_value + self._conn = self.mock_fromfd.return_value self.protocol_handler = HttpProtocolHandler( TcpClientConnection(self._conn, self._addr), flags=self.flags, @@ -121,9 +109,11 @@ def mock_connection() -> Any: self._conn.recv.return_value = connect_request # Prepare mocked HttpProtocolHandlerPlugin + async def asyncReturnBool(val: bool): + return val self.plugin.return_value.get_descriptors.return_value = ([], []) - self.plugin.return_value.write_to_descriptors.return_value = False - self.plugin.return_value.read_from_descriptors.return_value = False + self.plugin.return_value.write_to_descriptors.return_value = asyncReturnBool(False) + self.plugin.return_value.read_from_descriptors.return_value = asyncReturnBool(False) self.plugin.return_value.on_client_data.side_effect = lambda raw: raw self.plugin.return_value.on_request_complete.return_value = False self.plugin.return_value.on_response_chunk.side_effect = lambda chunk: chunk @@ -148,7 +138,7 @@ def mock_connection() -> Any: )], ] - self.protocol_handler._run_once() + await self.protocol_handler._run_once() # Assert our mocked plugins invocations self.plugin.return_value.get_descriptors.assert_called() diff --git a/tests/http/test_protocol_handler.py b/tests/http/test_protocol_handler.py index c37740a4f3..1f5f73fc63 100644 --- a/tests/http/test_protocol_handler.py +++ b/tests/http/test_protocol_handler.py @@ -8,12 +8,13 @@ :copyright: (c) 2013-present by Abhinav Singh and contributors. :license: BSD, see LICENSE for more details. """ -import unittest -import selectors import base64 +import pytest +import selectors -from typing import cast from unittest import mock +from pytest_mock import MockerFixture +from typing import cast, Any from proxy.common.plugins import Plugins from proxy.common.flag import FlagParser @@ -27,19 +28,33 @@ from proxy.http.exception import ProxyAuthenticationFailed, ProxyConnectionFailed from proxy.http import HttpProtocolHandler +from ..assertions import Assertions -class TestHttpProtocolHandler(unittest.TestCase): - @mock.patch('selectors.DefaultSelector') - @mock.patch('socket.fromfd') - def setUp( - self, - mock_fromfd: mock.Mock, - mock_selector: mock.Mock, - ) -> None: +def mock_selector_for_client_read(self: Any) -> None: + self.mock_selector.return_value.select.return_value = [ + ( + selectors.SelectorKey( + fileobj=self._conn, + fd=self._conn.fileno, + events=selectors.EVENT_READ, + data=None, + ), + selectors.EVENT_READ, + ), + ] + + +class TestHttpProtocolHandlerWithoutServerMock(Assertions): + + @pytest.fixture(autouse=True) + def setUp(self, mocker: MockerFixture) -> None: + self.mock_fromfd = mocker.patch('socket.fromfd') + self.mock_selector = mocker.patch('selectors.DefaultSelector') + self.fileno = 10 self._addr = ('127.0.0.1', 54382) - self._conn = mock_fromfd.return_value + self._conn = self.mock_fromfd.return_value self.http_server_port = 65535 self.flags = FlagParser.initialize(threaded=True) @@ -48,20 +63,88 @@ def setUp( bytes_(PLUGIN_WEB_SERVER), ]) - self.mock_selector = mock_selector self.protocol_handler = HttpProtocolHandler( - TcpClientConnection(self._conn, self._addr), flags=self.flags, + TcpClientConnection(self._conn, self._addr), + flags=self.flags, + ) + self.protocol_handler.initialize() + + @pytest.mark.asyncio + async def test_proxy_connection_failed(self) -> None: + mock_selector_for_client_read(self) + self._conn.recv.return_value = CRLF.join([ + b'GET http://unknown.domain HTTP/1.1', + b'Host: unknown.domain', + CRLF, + ]) + await self.protocol_handler._run_once() + self.assertEqual( + self.protocol_handler.work.buffer[0], + ProxyConnectionFailed.RESPONSE_PKT, + ) + + @pytest.mark.asyncio + async def test_proxy_authentication_failed(self) -> None: + self._conn = self.mock_fromfd.return_value + mock_selector_for_client_read(self) + flags = FlagParser.initialize( + auth_code=base64.b64encode(b'user:pass'), + threaded=True, + ) + flags.plugins = Plugins.load([ + bytes_(PLUGIN_HTTP_PROXY), + bytes_(PLUGIN_WEB_SERVER), + bytes_(PLUGIN_PROXY_AUTH), + ]) + self.protocol_handler = HttpProtocolHandler( + TcpClientConnection(self._conn, self._addr), flags=flags, ) self.protocol_handler.initialize() + self._conn.recv.return_value = CRLF.join([ + b'GET http://abhinavsingh.com HTTP/1.1', + b'Host: abhinavsingh.com', + CRLF, + ]) + await self.protocol_handler._run_once() + self.assertEqual( + self.protocol_handler.work.buffer[0], + ProxyAuthenticationFailed.RESPONSE_PKT, + ) + + +class TestHttpProtocolHandler(Assertions): + + @pytest.fixture(autouse=True) + def setUp(self, mocker: MockerFixture) -> None: + self.mock_fromfd = mocker.patch('socket.fromfd') + self.mock_selector = mocker.patch('selectors.DefaultSelector') + self.mock_server_connection = mocker.patch( + 'proxy.http.proxy.server.TcpServerConnection') + + self.fileno = 10 + self._addr = ('127.0.0.1', 54382) + self._conn = self.mock_fromfd.return_value + + self.http_server_port = 65535 + self.flags = FlagParser.initialize(threaded=True) + self.flags.plugins = Plugins.load([ + bytes_(PLUGIN_HTTP_PROXY), + bytes_(PLUGIN_WEB_SERVER), + ]) - @mock.patch('proxy.http.proxy.server.TcpServerConnection') - def test_http_get(self, mock_server_connection: mock.Mock) -> None: - server = mock_server_connection.return_value + self.protocol_handler = HttpProtocolHandler( + TcpClientConnection(self._conn, self._addr), + flags=self.flags, + ) + self.protocol_handler.initialize() + + @pytest.mark.asyncio + async def test_http_get(self) -> None: + server = self.mock_server_connection.return_value server.connect.return_value = True server.buffer_size.return_value = 0 - self.mock_selector_for_client_read_read_server_write( - self.mock_selector, server, - ) + + self.mock_selector_for_client_read_and_server_write(server) # Send request line assert self.http_server_port is not None @@ -69,7 +152,9 @@ def test_http_get(self, mock_server_connection: mock.Mock) -> None: b'GET http://localhost:%d HTTP/1.1' % self.http_server_port ) + CRLF - self.protocol_handler._run_once() + + await self.protocol_handler._run_once() + self.assertEqual( self.protocol_handler.request.state, httpParserStates.LINE_RCVD, @@ -88,14 +173,15 @@ def test_http_get(self, mock_server_connection: mock.Mock) -> None: b'Proxy-Connection: Keep-Alive', CRLF, ]) - self.assert_data_queued(mock_server_connection, server) - self.protocol_handler._run_once() + await self.assert_data_queued(server) + await self.protocol_handler._run_once() server.flush.assert_called_once() - def assert_tunnel_response( - self, mock_server_connection: mock.Mock, server: mock.Mock, + async def assert_tunnel_response( + self, + server: mock.Mock, ) -> None: - self.protocol_handler._run_once() + await self.protocol_handler._run_once() self.assertTrue( cast( HttpProxyPlugin, @@ -106,7 +192,7 @@ def assert_tunnel_response( self.protocol_handler.work.buffer[0], HttpProxyPlugin.PROXY_TUNNEL_ESTABLISHED_RESPONSE_PKT, ) - mock_server_connection.assert_called_once() + self.mock_server_connection.assert_called_once() server.connect.assert_called_once() server.queue.assert_not_called() server.closed = False @@ -117,9 +203,9 @@ def assert_tunnel_response( assert parser.code is not None self.assertEqual(int(parser.code), 200) - @mock.patch('proxy.http.proxy.server.TcpServerConnection') - def test_http_tunnel(self, mock_server_connection: mock.Mock) -> None: - server = mock_server_connection.return_value + @pytest.mark.asyncio + async def test_http_tunnel(self) -> None: + server = self.mock_server_connection.return_value server.connect.return_value = True def has_buffer() -> bool: @@ -181,74 +267,22 @@ def has_buffer() -> bool: b'Proxy-Connection: Keep-Alive', CRLF, ]) - self.assert_tunnel_response(mock_server_connection, server) + await self.assert_tunnel_response(server) # Dispatch tunnel established response to client - self.protocol_handler._run_once() - self.assert_data_queued_to_server(server) + await self.protocol_handler._run_once() + await self.assert_data_queued_to_server(server) - self.protocol_handler._run_once() + await self.protocol_handler._run_once() self.assertEqual(server.queue.call_count, 1) server.flush.assert_called_once() - def test_proxy_connection_failed(self) -> None: - self.mock_selector_for_client_read(self.mock_selector) - self._conn.recv.return_value = CRLF.join([ - b'GET http://unknown.domain HTTP/1.1', - b'Host: unknown.domain', - CRLF, - ]) - self.protocol_handler._run_once() - self.assertEqual( - self.protocol_handler.work.buffer[0], - ProxyConnectionFailed.RESPONSE_PKT, - ) + @pytest.mark.asyncio + async def test_authenticated_proxy_http_get(self) -> None: + self._conn = self.mock_fromfd.return_value + mock_selector_for_client_read(self) - @mock.patch('selectors.DefaultSelector') - @mock.patch('socket.fromfd') - def test_proxy_authentication_failed( - self, - mock_fromfd: mock.Mock, - mock_selector: mock.Mock, - ) -> None: - self._conn = mock_fromfd.return_value - self.mock_selector_for_client_read(mock_selector) - flags = FlagParser.initialize( - auth_code=base64.b64encode(b'user:pass'), - threaded=True, - ) - flags.plugins = Plugins.load([ - bytes_(PLUGIN_HTTP_PROXY), - bytes_(PLUGIN_WEB_SERVER), - bytes_(PLUGIN_PROXY_AUTH), - ]) - self.protocol_handler = HttpProtocolHandler( - TcpClientConnection(self._conn, self._addr), flags=flags, - ) - self.protocol_handler.initialize() - self._conn.recv.return_value = CRLF.join([ - b'GET http://abhinavsingh.com HTTP/1.1', - b'Host: abhinavsingh.com', - CRLF, - ]) - self.protocol_handler._run_once() - self.assertEqual( - self.protocol_handler.work.buffer[0], - ProxyAuthenticationFailed.RESPONSE_PKT, - ) - - @mock.patch('selectors.DefaultSelector') - @mock.patch('socket.fromfd') - @mock.patch('proxy.http.proxy.server.TcpServerConnection') - def test_authenticated_proxy_http_get( - self, mock_server_connection: mock.Mock, - mock_fromfd: mock.Mock, - mock_selector: mock.Mock, - ) -> None: - self._conn = mock_fromfd.return_value - self.mock_selector_for_client_read(mock_selector) - - server = mock_server_connection.return_value + server = self.mock_server_connection.return_value server.connect.return_value = True server.buffer_size.return_value = 0 @@ -268,14 +302,14 @@ def test_authenticated_proxy_http_get( assert self.http_server_port is not None self._conn.recv.return_value = b'GET http://localhost:%d HTTP/1.1' % self.http_server_port - self.protocol_handler._run_once() + await self.protocol_handler._run_once() self.assertEqual( self.protocol_handler.request.state, httpParserStates.INITIALIZED, ) self._conn.recv.return_value = CRLF - self.protocol_handler._run_once() + await self.protocol_handler._run_once() self.assertEqual( self.protocol_handler.request.state, httpParserStates.LINE_RCVD, @@ -290,23 +324,15 @@ def test_authenticated_proxy_http_get( b'Proxy-Authorization: Basic dXNlcjpwYXNz', CRLF, ]) - self.assert_data_queued(mock_server_connection, server) - - @mock.patch('selectors.DefaultSelector') - @mock.patch('socket.fromfd') - @mock.patch('proxy.http.proxy.server.TcpServerConnection') - def test_authenticated_proxy_http_tunnel( - self, mock_server_connection: mock.Mock, - mock_fromfd: mock.Mock, - mock_selector: mock.Mock, - ) -> None: - server = mock_server_connection.return_value + await self.assert_data_queued(server) + + @pytest.mark.asyncio + async def test_authenticated_proxy_http_tunnel(self) -> None: + server = self.mock_server_connection.return_value server.connect.return_value = True server.buffer_size.return_value = 0 - self._conn = mock_fromfd.return_value - self.mock_selector_for_client_read_read_server_write( - mock_selector, server, - ) + self._conn = self.mock_fromfd.return_value + self.mock_selector_for_client_read_and_server_write(server) flags = FlagParser.initialize( auth_code=base64.b64encode(b'user:pass'), @@ -331,17 +357,17 @@ def test_authenticated_proxy_http_tunnel( b'Proxy-Authorization: Basic dXNlcjpwYXNz', CRLF, ]) - self.assert_tunnel_response(mock_server_connection, server) + await self.assert_tunnel_response(server) self.protocol_handler.work.flush() - self.assert_data_queued_to_server(server) + await self.assert_data_queued_to_server(server) - self.protocol_handler._run_once() + await self.protocol_handler._run_once() server.flush.assert_called_once() - def mock_selector_for_client_read_read_server_write( - self, mock_selector: mock.Mock, server: mock.Mock, + def mock_selector_for_client_read_and_server_write( + self, server: mock.Mock, ) -> None: - mock_selector.return_value.select.side_effect = [ + self.mock_selector.return_value.select.side_effect = [ [ ( selectors.SelectorKey( @@ -377,15 +403,15 @@ def mock_selector_for_client_read_read_server_write( ], ] - def assert_data_queued( - self, mock_server_connection: mock.Mock, server: mock.Mock, + async def assert_data_queued( + self, server: mock.Mock, ) -> None: - self.protocol_handler._run_once() + await self.protocol_handler._run_once() self.assertEqual( self.protocol_handler.request.state, httpParserStates.COMPLETE, ) - mock_server_connection.assert_called_once() + self.mock_server_connection.assert_called_once() server.connect.assert_called_once() server.closed = False assert self.http_server_port is not None @@ -401,7 +427,7 @@ def assert_data_queued( self.assertEqual(server.queue.call_args_list[0][0][0].tobytes(), pkt) server.buffer_size.return_value = len(pkt) - def assert_data_queued_to_server(self, server: mock.Mock) -> None: + async def assert_data_queued_to_server(self, server: mock.Mock) -> None: assert self.http_server_port is not None self.assertEqual( self._conn.send.call_args[0][0], @@ -416,21 +442,8 @@ def assert_data_queued_to_server(self, server: mock.Mock) -> None: ]) self._conn.recv.return_value = pkt - self.protocol_handler._run_once() + await self.protocol_handler._run_once() server.queue.assert_called_once_with(pkt) server.buffer_size.return_value = len(pkt) server.flush.assert_not_called() - - def mock_selector_for_client_read(self, mock_selector: mock.Mock) -> None: - mock_selector.return_value.select.return_value = [ - ( - selectors.SelectorKey( - fileobj=self._conn, - fd=self._conn.fileno, - events=selectors.EVENT_READ, - data=None, - ), - selectors.EVENT_READ, - ), - ] diff --git a/tests/http/test_web_server.py b/tests/http/test_web_server.py index be0098128a..d81d67e3c1 100644 --- a/tests/http/test_web_server.py +++ b/tests/http/test_web_server.py @@ -8,81 +8,170 @@ :copyright: (c) 2013-present by Abhinav Singh and contributors. :license: BSD, see LICENSE for more details. """ -import gzip import os +import gzip +import pytest import tempfile -import unittest import selectors -from unittest import mock + +from typing import Any +from pytest_mock import MockerFixture +# from unittest import mock from proxy.common.plugins import Plugins from proxy.common.flag import FlagParser from proxy.core.connection import TcpClientConnection from proxy.http import HttpProtocolHandler from proxy.http.parser import HttpParser, httpParserStates, httpParserTypes -from proxy.common.utils import build_http_response, build_http_request, bytes_, text_ +from proxy.common.utils import build_http_response, build_http_request, bytes_ from proxy.common.constants import CRLF, PLUGIN_HTTP_PROXY, PLUGIN_PAC_FILE, PLUGIN_WEB_SERVER, PROXY_PY_DIR from proxy.http.server import HttpWebServerPlugin +from ..assertions import Assertions + + +PAC_FILE_PATH = os.path.join( + os.path.dirname(PROXY_PY_DIR), + 'helper', + 'proxy.pac', +) -class TestWebServerPlugin(unittest.TestCase): +PAC_FILE_CONTENT = b'function FindProxyForURL(url, host) { return "PROXY localhost:8899; DIRECT"; }' - @mock.patch('selectors.DefaultSelector') - @mock.patch('socket.fromfd') - def setUp(self, mock_fromfd: mock.Mock, mock_selector: mock.Mock) -> None: + +def test_on_client_connection_called_on_teardown(mocker: MockerFixture) -> None: + plugin = mocker.MagicMock() + mock_fromfd = mocker.patch('socket.fromfd') + flags = FlagParser.initialize(threaded=True) + flags.plugins = {b'HttpProtocolHandlerPlugin': [plugin]} + _conn = mock_fromfd.return_value + _addr = ('127.0.0.1', 54382) + protocol_handler = HttpProtocolHandler( + TcpClientConnection(_conn, _addr), + flags=flags, + ) + protocol_handler.initialize() + plugin.assert_called() + mock_run_once = mocker.patch.object(protocol_handler, '_run_once') + mock_run_once.return_value = True + protocol_handler.run() + assert _conn.closed + plugin.return_value.on_client_connection_close.assert_called() + + +def mock_selector_for_client_read(self: Any) -> None: + self.mock_selector.return_value.select.return_value = [ + ( + selectors.SelectorKey( + fileobj=self._conn, + fd=self._conn.fileno, + events=selectors.EVENT_READ, + data=None, + ), + selectors.EVENT_READ, + ), + ] + + # @mock.patch('socket.fromfd') + # def test_on_client_connection_called_on_teardown( + # self, mock_fromfd: mock.Mock, + # ) -> None: + # flags = FlagParser.initialize(threaded=True) + # plugin = mock.MagicMock() + # flags.plugins = {b'HttpProtocolHandlerPlugin': [plugin]} + # self._conn = mock_fromfd.return_value + # self.protocol_handler = HttpProtocolHandler( + # TcpClientConnection(self._conn, self._addr), + # flags=flags, + # ) + # self.protocol_handler.initialize() + # plugin.assert_called() + # with mock.patch.object(self.protocol_handler, '_run_once') as mock_run_once: + # mock_run_once.return_value = True + # self.protocol_handler.run() + # self.assertTrue(self._conn.closed) + # plugin.return_value.on_client_connection_close.assert_called() + + # @mock.patch('socket.fromfd') + # def test_on_client_connection_called_on_teardown( + # self, mock_fromfd: mock.Mock, + # ) -> None: + # flags = FlagParser.initialize(threaded=True) + # plugin = mock.MagicMock() + # flags.plugins = {b'HttpProtocolHandlerPlugin': [plugin]} + # self._conn = mock_fromfd.return_value + # self.protocol_handler = HttpProtocolHandler( + # TcpClientConnection(self._conn, self._addr), + # flags=flags, + # ) + # self.protocol_handler.initialize() + # plugin.assert_called() + # with mock.patch.object(self.protocol_handler, '_run_once') as mock_run_once: + # mock_run_once.return_value = True + # self.protocol_handler.run() + # self.assertTrue(self._conn.closed) + # plugin.return_value.on_client_connection_close.assert_called() + + # @mock.patch('socket.fromfd') + # def test_on_client_connection_called_on_teardown( + # self, mock_fromfd: mock.Mock, + # ) -> None: + # flags = FlagParser.initialize(threaded=True) + # plugin = mock.MagicMock() + # flags.plugins = {b'HttpProtocolHandlerPlugin': [plugin]} + # self._conn = mock_fromfd.return_value + # self.protocol_handler = HttpProtocolHandler( + # TcpClientConnection(self._conn, self._addr), + # flags=flags, + # ) + # self.protocol_handler.initialize() + # plugin.assert_called() + # with mock.patch.object(self.protocol_handler, '_run_once') as mock_run_once: + # mock_run_once.return_value = True + # self.protocol_handler.run() + # self.assertTrue(self._conn.closed) + # plugin.return_value.on_client_connection_close.assert_called() + + +class TestWebServerPluginWithPacFilePlugin(Assertions): + + @pytest.fixture(autouse=True, params=[ + PAC_FILE_PATH, + PAC_FILE_CONTENT, + ]) + def setUp(self, request: Any, mocker: MockerFixture) -> None: + self.mock_fromfd = mocker.patch('socket.fromfd') + self.mock_selector = mocker.patch('selectors.DefaultSelector') self.fileno = 10 self._addr = ('127.0.0.1', 54382) - self._conn = mock_fromfd.return_value - self.mock_selector = mock_selector - self.flags = FlagParser.initialize(threaded=True) + self._conn = self.mock_fromfd.return_value + self.pac_file = request.param + if isinstance(self.pac_file, str): + with open(self.pac_file, 'rb') as f: + self.expected_response = f.read() + else: + self.expected_response = PAC_FILE_CONTENT + self.flags = FlagParser.initialize( + pac_file=self.pac_file, threaded=True) self.flags.plugins = Plugins.load([ bytes_(PLUGIN_HTTP_PROXY), bytes_(PLUGIN_WEB_SERVER), + bytes_(PLUGIN_PAC_FILE), ]) self.protocol_handler = HttpProtocolHandler( TcpClientConnection(self._conn, self._addr), flags=self.flags, ) self.protocol_handler.initialize() + self._conn.recv.return_value = CRLF.join([ + b'GET / HTTP/1.1', + CRLF, + ]) + mock_selector_for_client_read(self) - @mock.patch('selectors.DefaultSelector') - @mock.patch('socket.fromfd') - def test_pac_file_served_from_disk( - self, mock_fromfd: mock.Mock, mock_selector: mock.Mock, - ) -> None: - pac_file = os.path.join( - os.path.dirname(PROXY_PY_DIR), - 'helper', - 'proxy.pac', - ) - self._conn = mock_fromfd.return_value - self.mock_selector_for_client_read(mock_selector) - self.init_and_make_pac_file_request(pac_file) - self.protocol_handler._run_once() - self.assertEqual( - self.protocol_handler.request.state, - httpParserStates.COMPLETE, - ) - with open(pac_file, 'rb') as f: - self._conn.send.called_once_with( - build_http_response( - 200, reason=b'OK', headers={ - b'Content-Type': b'application/x-ns-proxy-autoconfig', - b'Connection': b'close', - }, body=f.read(), - ), - ) - - @mock.patch('selectors.DefaultSelector') - @mock.patch('socket.fromfd') - def test_pac_file_served_from_buffer( - self, mock_fromfd: mock.Mock, mock_selector: mock.Mock, - ) -> None: - self._conn = mock_fromfd.return_value - self.mock_selector_for_client_read(mock_selector) - pac_file_content = b'function FindProxyForURL(url, host) { return "PROXY localhost:8899; DIRECT"; }' - self.init_and_make_pac_file_request(text_(pac_file_content)) - self.protocol_handler._run_once() + @pytest.mark.asyncio + async def test_pac_file_served_from_disk(self) -> None: + await self.protocol_handler._run_once() self.assertEqual( self.protocol_handler.request.state, httpParserStates.COMPLETE, @@ -92,28 +181,34 @@ def test_pac_file_served_from_buffer( 200, reason=b'OK', headers={ b'Content-Type': b'application/x-ns-proxy-autoconfig', b'Connection': b'close', - }, body=pac_file_content, + }, body=self.expected_response, ), ) - @mock.patch('selectors.DefaultSelector') - @mock.patch('socket.fromfd') - def test_default_web_server_returns_404( - self, mock_fromfd: mock.Mock, mock_selector: mock.Mock, - ) -> None: - self._conn = mock_fromfd.return_value - mock_selector.return_value.select.return_value = [ - ( - selectors.SelectorKey( - fileobj=self._conn, - fd=self._conn.fileno, - events=selectors.EVENT_READ, - data=None, - ), - selectors.EVENT_READ, - ), - ] - flags = FlagParser.initialize(threaded=True) + +class TestStaticWebServerPlugin(Assertions): + + @pytest.fixture(autouse=True) + def setUp(self, mocker: MockerFixture) -> None: + self.mock_fromfd = mocker.patch('socket.fromfd') + self.mock_selector = mocker.patch('selectors.DefaultSelector') + self.fileno = 10 + self._addr = ('127.0.0.1', 54382) + self._conn = self.mock_fromfd.return_value + # Setup a static directory + self.static_server_dir = os.path.join(tempfile.gettempdir(), 'static') + self.index_file_path = os.path.join( + self.static_server_dir, 'index.html') + self.html_file_content = b'''

Proxy.py Testing

''' + os.makedirs(self.static_server_dir, exist_ok=True) + with open(self.index_file_path, 'wb') as f: + f.write(self.html_file_content) + # + flags = FlagParser.initialize( + enable_static_server=True, + static_server_dir=self.static_server_dir, + threaded=True, + ) flags.plugins = Plugins.load([ bytes_(PLUGIN_HTTP_PROXY), bytes_(PLUGIN_WEB_SERVER), @@ -123,39 +218,13 @@ def test_default_web_server_returns_404( flags=flags, ) self.protocol_handler.initialize() - self._conn.recv.return_value = CRLF.join([ - b'GET /hello HTTP/1.1', - CRLF, - ]) - self.protocol_handler._run_once() - self.assertEqual( - self.protocol_handler.request.state, - httpParserStates.COMPLETE, - ) - self.assertEqual( - self.protocol_handler.work.buffer[0], - HttpWebServerPlugin.DEFAULT_404_RESPONSE, - ) - - @mock.patch('selectors.DefaultSelector') - @mock.patch('socket.fromfd') - def test_static_web_server_serves( - self, mock_fromfd: mock.Mock, mock_selector: mock.Mock, - ) -> None: - # Setup a static directory - static_server_dir = os.path.join(tempfile.gettempdir(), 'static') - index_file_path = os.path.join(static_server_dir, 'index.html') - html_file_content = b'''

Proxy.py Testing

''' - os.makedirs(static_server_dir, exist_ok=True) - with open(index_file_path, 'wb') as f: - f.write(html_file_content) - self._conn = mock_fromfd.return_value + @pytest.mark.asyncio + async def test_static_web_server_serves(self) -> None: self._conn.recv.return_value = build_http_request( b'GET', b'/index.html', ) - - mock_selector.return_value.select.side_effect = [ + self.mock_selector.return_value.select.side_effect = [ [( selectors.SelectorKey( fileobj=self._conn, @@ -175,29 +244,12 @@ def test_static_web_server_serves( selectors.EVENT_WRITE, )], ] + await self.protocol_handler._run_once() + await self.protocol_handler._run_once() - flags = FlagParser.initialize( - enable_static_server=True, - static_server_dir=static_server_dir, - threaded=True, - ) - flags.plugins = Plugins.load([ - bytes_(PLUGIN_HTTP_PROXY), - bytes_(PLUGIN_WEB_SERVER), - ]) - - self.protocol_handler = HttpProtocolHandler( - TcpClientConnection(self._conn, self._addr), - flags=flags, - ) - self.protocol_handler.initialize() - - self.protocol_handler._run_once() - self.protocol_handler._run_once() - - self.assertEqual(mock_selector.return_value.select.call_count, 2) + self.assertEqual(self.mock_selector.return_value.select.call_count, 2) self.assertEqual(self._conn.send.call_count, 1) - encoded_html_file_content = gzip.compress(html_file_content) + encoded_html_file_content = gzip.compress(self.html_file_content) # parse response and verify response = HttpParser(httpParserTypes.RESPONSE_PARSER) @@ -212,21 +264,15 @@ def test_static_web_server_serves( bytes_(len(encoded_html_file_content)), ) assert response.body - self.assertEqual(gzip.decompress(response.body), html_file_content) + self.assertEqual(gzip.decompress(response.body), + self.html_file_content) - @mock.patch('selectors.DefaultSelector') - @mock.patch('socket.fromfd') - def test_static_web_server_serves_404( - self, - mock_fromfd: mock.Mock, - mock_selector: mock.Mock, - ) -> None: - self._conn = mock_fromfd.return_value + @pytest.mark.asyncio + async def test_static_web_server_serves_404(self) -> None: self._conn.recv.return_value = build_http_request( b'GET', b'/not-found.html', ) - - mock_selector.return_value.select.side_effect = [ + self.mock_selector.return_value.select.side_effect = [ [( selectors.SelectorKey( fileobj=self._conn, @@ -247,67 +293,41 @@ def test_static_web_server_serves_404( )], ] - flags = FlagParser.initialize(enable_static_server=True, threaded=True) - flags.plugins = Plugins.load([ - bytes_(PLUGIN_HTTP_PROXY), - bytes_(PLUGIN_WEB_SERVER), - ]) - - self.protocol_handler = HttpProtocolHandler( - TcpClientConnection(self._conn, self._addr), - flags=flags, - ) - self.protocol_handler.initialize() - - self.protocol_handler._run_once() - self.protocol_handler._run_once() + await self.protocol_handler._run_once() + await self.protocol_handler._run_once() - self.assertEqual(mock_selector.return_value.select.call_count, 2) + self.assertEqual(self.mock_selector.return_value.select.call_count, 2) self.assertEqual(self._conn.send.call_count, 1) self.assertEqual( self._conn.send.call_args[0][0], HttpWebServerPlugin.DEFAULT_404_RESPONSE, ) - @mock.patch('socket.fromfd') - def test_on_client_connection_called_on_teardown( - self, mock_fromfd: mock.Mock, - ) -> None: - flags = FlagParser.initialize(threaded=True) - plugin = mock.MagicMock() - flags.plugins = {b'HttpProtocolHandlerPlugin': [plugin]} - self._conn = mock_fromfd.return_value - self.protocol_handler = HttpProtocolHandler( - TcpClientConnection(self._conn, self._addr), - flags=flags, - ) - self.protocol_handler.initialize() - plugin.assert_called() - with mock.patch.object(self.protocol_handler, '_run_once') as mock_run_once: - mock_run_once.return_value = True - self.protocol_handler.run() - self.assertTrue(self._conn.closed) - plugin.return_value.on_client_connection_close.assert_called() - def init_and_make_pac_file_request(self, pac_file: str) -> None: - flags = FlagParser.initialize(pac_file=pac_file, threaded=True) - flags.plugins = Plugins.load([ +class TestWebServerPlugin(Assertions): + + @pytest.fixture(autouse=True) + def setUp(self, mocker: MockerFixture) -> None: + self.mock_fromfd = mocker.patch('socket.fromfd') + self.mock_selector = mocker.patch('selectors.DefaultSelector') + self.fileno = 10 + self._addr = ('127.0.0.1', 54382) + self._conn = self.mock_fromfd.return_value + self.flags = FlagParser.initialize(threaded=True) + self.flags.plugins = Plugins.load([ bytes_(PLUGIN_HTTP_PROXY), bytes_(PLUGIN_WEB_SERVER), - bytes_(PLUGIN_PAC_FILE), ]) self.protocol_handler = HttpProtocolHandler( TcpClientConnection(self._conn, self._addr), - flags=flags, + flags=self.flags, ) self.protocol_handler.initialize() - self._conn.recv.return_value = CRLF.join([ - b'GET / HTTP/1.1', - CRLF, - ]) - def mock_selector_for_client_read(self, mock_selector: mock.Mock) -> None: - mock_selector.return_value.select.return_value = [ + @pytest.mark.asyncio + async def test_default_web_server_returns_404(self) -> None: + self._conn = self.mock_fromfd.return_value + self.mock_selector.return_value.select.return_value = [ ( selectors.SelectorKey( fileobj=self._conn, @@ -318,3 +338,26 @@ def mock_selector_for_client_read(self, mock_selector: mock.Mock) -> None: selectors.EVENT_READ, ), ] + flags = FlagParser.initialize(threaded=True) + flags.plugins = Plugins.load([ + bytes_(PLUGIN_HTTP_PROXY), + bytes_(PLUGIN_WEB_SERVER), + ]) + self.protocol_handler = HttpProtocolHandler( + TcpClientConnection(self._conn, self._addr), + flags=flags, + ) + self.protocol_handler.initialize() + self._conn.recv.return_value = CRLF.join([ + b'GET /hello HTTP/1.1', + CRLF, + ]) + await self.protocol_handler._run_once() + self.assertEqual( + self.protocol_handler.request.state, + httpParserStates.COMPLETE, + ) + self.assertEqual( + self.protocol_handler.work.buffer[0], + HttpWebServerPlugin.DEFAULT_404_RESPONSE, + ) diff --git a/tests/plugin/test_http_proxy_plugins.py b/tests/plugin/test_http_proxy_plugins.py index 864f48597a..abf90da918 100644 --- a/tests/plugin/test_http_proxy_plugins.py +++ b/tests/plugin/test_http_proxy_plugins.py @@ -8,14 +8,15 @@ :copyright: (c) 2013-present by Abhinav Singh and contributors. :license: BSD, see LICENSE for more details. """ -import unittest -import selectors import json +import pytest +import selectors -from urllib import parse as urlparse -from unittest import mock -from typing import cast from pathlib import Path +from unittest import mock +from typing import cast, Any +from urllib import parse as urlparse +from pytest_mock import MockerFixture from proxy.common.flag import FlagParser from proxy.core.connection import TcpClientConnection @@ -24,21 +25,22 @@ from proxy.http.proxy import HttpProxyPlugin from proxy.common.utils import build_http_request, bytes_, build_http_response from proxy.common.constants import PROXY_AGENT_HEADER_VALUE, DEFAULT_HTTP_PORT - from proxy.plugin import ProposedRestApiPlugin, RedirectToCustomServerPlugin from .utils import get_plugin_by_test_name +from ..assertions import Assertions + -class TestHttpProxyPluginExamples(unittest.TestCase): +class TestHttpProxyPluginExamples(Assertions): + + @pytest.fixture(autouse=True) + def setUp(self, request: Any, mocker: MockerFixture) -> None: + self.mock_fromfd = mocker.patch('socket.fromfd') + self.mock_selector = mocker.patch('selectors.DefaultSelector') + self.mock_server_conn = mocker.patch( + 'proxy.http.proxy.server.TcpServerConnection') - @mock.patch('selectors.DefaultSelector') - @mock.patch('socket.fromfd') - def setUp( - self, - mock_fromfd: mock.Mock, - mock_selector: mock.Mock, - ) -> None: self.fileno = 10 self._addr = ('127.0.0.1', 54382) adblock_json_path = Path( @@ -53,26 +55,22 @@ def setUp( ) self.plugin = mock.MagicMock() - self.mock_fromfd = mock_fromfd - self.mock_selector = mock_selector - - plugin = get_plugin_by_test_name(self._testMethodName) + plugin = get_plugin_by_test_name(request.param) self.flags.plugins = { b'HttpProtocolHandlerPlugin': [HttpProxyPlugin], b'HttpProxyBasePlugin': [plugin], } - self._conn = mock_fromfd.return_value + self._conn = self.mock_fromfd.return_value self.protocol_handler = HttpProtocolHandler( TcpClientConnection(self._conn, self._addr), flags=self.flags, ) self.protocol_handler.initialize() - @mock.patch('proxy.http.proxy.server.TcpServerConnection') - def test_modify_post_data_plugin( - self, mock_server_conn: mock.Mock, - ) -> None: + @pytest.mark.asyncio + @pytest.mark.parametrize("setUp", ['test_modify_post_data_plugin'], indirect=True) + async def test_modify_post_data_plugin(self) -> None: original = b'{"key": "value"}' modified = b'{"key": "modified"}' @@ -97,9 +95,10 @@ def test_modify_post_data_plugin( )], ] - self.protocol_handler._run_once() - mock_server_conn.assert_called_with('httpbin.org', DEFAULT_HTTP_PORT) - mock_server_conn.return_value.queue.assert_called_with( + await self.protocol_handler._run_once() + self.mock_server_conn.assert_called_with( + 'httpbin.org', DEFAULT_HTTP_PORT) + self.mock_server_conn.return_value.queue.assert_called_with( build_http_request( b'POST', b'/post', headers={ @@ -112,10 +111,9 @@ def test_modify_post_data_plugin( ), ) - @mock.patch('proxy.http.proxy.server.TcpServerConnection') - def test_proposed_rest_api_plugin( - self, mock_server_conn: mock.Mock, - ) -> None: + @pytest.mark.asyncio + @pytest.mark.parametrize("setUp", ['test_proposed_rest_api_plugin'], indirect=True) + async def test_proposed_rest_api_plugin(self) -> None: path = b'/v1/users/' self._conn.recv.return_value = build_http_request( b'GET', b'http://%s%s' % ( @@ -136,9 +134,9 @@ def test_proposed_rest_api_plugin( selectors.EVENT_READ, )], ] - self.protocol_handler._run_once() + await self.protocol_handler._run_once() - mock_server_conn.assert_not_called() + self.mock_server_conn.assert_not_called() self.assertEqual( self.protocol_handler.work.buffer[0].tobytes(), build_http_response( @@ -152,10 +150,9 @@ def test_proposed_rest_api_plugin( ), ) - @mock.patch('proxy.http.proxy.server.TcpServerConnection') - def test_redirect_to_custom_server_plugin( - self, mock_server_conn: mock.Mock, - ) -> None: + @pytest.mark.asyncio + @pytest.mark.parametrize("setUp", ['test_redirect_to_custom_server_plugin'], indirect=True) + async def test_redirect_to_custom_server_plugin(self) -> None: request = build_http_request( b'GET', b'http://example.org/get', headers={ @@ -174,13 +171,13 @@ def test_redirect_to_custom_server_plugin( selectors.EVENT_READ, )], ] - self.protocol_handler._run_once() + await self.protocol_handler._run_once() upstream = urlparse.urlsplit( RedirectToCustomServerPlugin.UPSTREAM_SERVER, ) - mock_server_conn.assert_called_with('localhost', 8899) - mock_server_conn.return_value.queue.assert_called_with( + self.mock_server_conn.assert_called_with('localhost', 8899) + self.mock_server_conn.return_value.queue.assert_called_with( build_http_request( b'GET', upstream.path, headers={ @@ -190,10 +187,9 @@ def test_redirect_to_custom_server_plugin( ), ) - @mock.patch('proxy.http.proxy.server.TcpServerConnection') - def test_filter_by_upstream_host_plugin( - self, mock_server_conn: mock.Mock, - ) -> None: + @pytest.mark.asyncio + @pytest.mark.parametrize("setUp", ['test_filter_by_upstream_host_plugin'], indirect=True) + async def test_filter_by_upstream_host_plugin(self) -> None: request = build_http_request( b'GET', b'http://facebook.com/', headers={ @@ -212,9 +208,9 @@ def test_filter_by_upstream_host_plugin( selectors.EVENT_READ, )], ] - self.protocol_handler._run_once() + await self.protocol_handler._run_once() - mock_server_conn.assert_not_called() + self.mock_server_conn.assert_not_called() self.assertEqual( self.protocol_handler.work.buffer[0].tobytes(), build_http_response( @@ -226,10 +222,9 @@ def test_filter_by_upstream_host_plugin( ), ) - @mock.patch('proxy.http.proxy.server.TcpServerConnection') - def test_man_in_the_middle_plugin( - self, mock_server_conn: mock.Mock, - ) -> None: + @pytest.mark.asyncio + @pytest.mark.parametrize("setUp", ['test_man_in_the_middle_plugin'], indirect=True) + async def test_man_in_the_middle_plugin(self) -> None: request = build_http_request( b'GET', b'http://super.secure/', headers={ @@ -238,7 +233,7 @@ def test_man_in_the_middle_plugin( ) self._conn.recv.return_value = request - server = mock_server_conn.return_value + server = self.mock_server_conn.return_value server.connect.return_value = True def has_buffer() -> bool: @@ -281,8 +276,9 @@ def closed() -> bool: ] # Client read - self.protocol_handler._run_once() - mock_server_conn.assert_called_with('super.secure', DEFAULT_HTTP_PORT) + await self.protocol_handler._run_once() + self.mock_server_conn.assert_called_with( + 'super.secure', DEFAULT_HTTP_PORT) server.connect.assert_called_once() queued_request = \ build_http_request( @@ -295,7 +291,7 @@ def closed() -> bool: server.queue.assert_called_once_with(queued_request) # Server write - self.protocol_handler._run_once() + await self.protocol_handler._run_once() server.flush.assert_called_once() # Server read @@ -304,7 +300,7 @@ def closed() -> bool: httpStatusCodes.OK, reason=b'OK', body=b'Original Response From Upstream', ) - self.protocol_handler._run_once() + await self.protocol_handler._run_once() self.assertEqual( self.protocol_handler.work.buffer[0].tobytes(), build_http_response( @@ -313,10 +309,9 @@ def closed() -> bool: ), ) - @mock.patch('proxy.http.proxy.server.TcpServerConnection') - def test_filter_by_url_regex_plugin( - self, mock_server_conn: mock.Mock, - ) -> None: + @pytest.mark.asyncio + @pytest.mark.parametrize("setUp", ['test_filter_by_url_regex_plugin'], indirect=True) + async def test_filter_by_url_regex_plugin(self) -> None: request = build_http_request( b'GET', b'http://www.facebook.com/tr/', headers={ @@ -335,7 +330,7 @@ def test_filter_by_url_regex_plugin( selectors.EVENT_READ, )], ] - self.protocol_handler._run_once() + await self.protocol_handler._run_once() self.assertEqual( self.protocol_handler.work.buffer[0].tobytes(), diff --git a/tests/plugin/test_http_proxy_plugins_with_tls_interception.py b/tests/plugin/test_http_proxy_plugins_with_tls_interception.py index d076f1558a..2aa18d7720 100644 --- a/tests/plugin/test_http_proxy_plugins_with_tls_interception.py +++ b/tests/plugin/test_http_proxy_plugins_with_tls_interception.py @@ -8,12 +8,12 @@ :copyright: (c) 2013-present by Abhinav Singh and contributors. :license: BSD, see LICENSE for more details. """ -import unittest +import ssl import socket +import pytest import selectors -import ssl -from unittest import mock +from pytest_mock import MockerFixture from typing import Any, cast from proxy.common.flag import FlagParser @@ -22,39 +22,27 @@ from proxy.http import httpMethods, httpStatusCodes, HttpProtocolHandler from proxy.http.proxy import HttpProxyPlugin +from proxy.http.parser import HttpParser from .utils import get_plugin_by_test_name +from ..assertions import Assertions + -class TestHttpProxyPluginExamplesWithTlsInterception(unittest.TestCase): - - @mock.patch('ssl.wrap_socket') - @mock.patch('ssl.create_default_context') - @mock.patch('proxy.http.proxy.server.TcpServerConnection') - @mock.patch('proxy.http.proxy.server.gen_public_key') - @mock.patch('proxy.http.proxy.server.gen_csr') - @mock.patch('proxy.http.proxy.server.sign_csr') - @mock.patch('selectors.DefaultSelector') - @mock.patch('socket.fromfd') - def setUp( - self, - mock_fromfd: mock.Mock, - mock_selector: mock.Mock, - mock_sign_csr: mock.Mock, - mock_gen_csr: mock.Mock, - mock_gen_public_key: mock.Mock, - mock_server_conn: mock.Mock, - mock_ssl_context: mock.Mock, - mock_ssl_wrap: mock.Mock, - ) -> None: - self.mock_fromfd = mock_fromfd - self.mock_selector = mock_selector - self.mock_sign_csr = mock_sign_csr - self.mock_gen_csr = mock_gen_csr - self.mock_gen_public_key = mock_gen_public_key - self.mock_server_conn = mock_server_conn - self.mock_ssl_context = mock_ssl_context - self.mock_ssl_wrap = mock_ssl_wrap +class TestHttpProxyPluginExamplesWithTlsInterception(Assertions): + + @pytest.fixture(autouse=True) + def setUp(self, request: Any, mocker: MockerFixture) -> None: + self.mock_fromfd = mocker.patch('socket.fromfd') + self.mock_selector = mocker.patch('selectors.DefaultSelector') + self.mock_sign_csr = mocker.patch('proxy.http.proxy.server.sign_csr') + self.mock_gen_csr = mocker.patch('proxy.http.proxy.server.gen_csr') + self.mock_gen_public_key = mocker.patch( + 'proxy.http.proxy.server.gen_public_key') + self.mock_server_conn = mocker.patch( + 'proxy.http.proxy.server.TcpServerConnection') + self.mock_ssl_context = mocker.patch('ssl.create_default_context') + self.mock_ssl_wrap = mocker.patch('ssl.wrap_socket') self.mock_sign_csr.return_value = True self.mock_gen_csr.return_value = True @@ -68,16 +56,16 @@ def setUp( ca_signing_key_file='ca-signing-key.pem', threaded=True, ) - self.plugin = mock.MagicMock() + self.plugin = mocker.MagicMock() - plugin = get_plugin_by_test_name(self._testMethodName) + plugin = get_plugin_by_test_name(request.param) self.flags.plugins = { b'HttpProtocolHandlerPlugin': [HttpProxyPlugin], b'HttpProxyBasePlugin': [plugin], } - self._conn = mock.MagicMock(spec=socket.socket) - mock_fromfd.return_value = self._conn + self._conn = mocker.MagicMock(spec=socket.socket) + self.mock_fromfd.return_value = self._conn self.protocol_handler = HttpProtocolHandler( TcpClientConnection(self._conn, self._addr), flags=self.flags, ) @@ -85,9 +73,9 @@ def setUp( self.server = self.mock_server_conn.return_value - self.server_ssl_connection = mock.MagicMock(spec=ssl.SSLSocket) + self.server_ssl_connection = mocker.MagicMock(spec=ssl.SSLSocket) self.mock_ssl_context.return_value.wrap_socket.return_value = self.server_ssl_connection - self.client_ssl_connection = mock.MagicMock(spec=ssl.SSLSocket) + self.client_ssl_connection = mocker.MagicMock(spec=ssl.SSLSocket) self.mock_ssl_wrap.return_value = self.client_ssl_connection def has_buffer() -> bool: @@ -106,10 +94,10 @@ def mock_connection() -> Any: lambda x, y: TcpServerConnection.wrap(self.server, x, y) self.server.has_buffer.side_effect = has_buffer - type(self.server).closed = mock.PropertyMock(side_effect=closed) + type(self.server).closed = mocker.PropertyMock(side_effect=closed) type( self.server, - ).connection = mock.PropertyMock( + ).connection = mocker.PropertyMock( side_effect=mock_connection, ) @@ -160,7 +148,11 @@ def send(raw: bytes) -> int: self._conn.recv.return_value = build_http_request( httpMethods.CONNECT, b'uni.corn:443', ) - self.protocol_handler._run_once() + + @pytest.mark.asyncio + @pytest.mark.parametrize("setUp", ['test_modify_post_data_plugin'], indirect=True) + async def test_modify_post_data_plugin(self) -> None: + await self.protocol_handler._run_once() self.assertEqual(self.mock_sign_csr.call_count, 1) self.assertEqual(self.mock_gen_csr.call_count, 1) @@ -178,32 +170,54 @@ def send(raw: bytes) -> int: ) self.assertFalse(self.protocol_handler.work.has_buffer()) - def test_modify_post_data_plugin(self) -> None: + # original = b'{"key": "value"}' modified = b'{"key": "modified"}' self.client_ssl_connection.recv.return_value = build_http_request( b'POST', b'/', headers={ b'Host': b'uni.corn', - b'Content-Type': b'application/x-www-form-urlencoded', b'Content-Length': bytes_(len(original)), + b'Content-Type': b'application/x-www-form-urlencoded', }, body=original, ) - self.protocol_handler._run_once() - self.server.queue.assert_called_with( - build_http_request( - b'POST', b'/', - headers={ - b'Host': b'uni.corn', - b'Content-Length': bytes_(len(modified)), - b'Content-Type': b'application/json', - }, - body=modified, - ), - ) + await self.protocol_handler._run_once() + self.server.queue.assert_called_once() + # pkt = build_http_request( + # b'POST', b'/', + # headers={ + # b'Host': b'uni.corn', + # b'Content-Length': bytes_(len(modified)), + # b'Content-Type': b'application/json', + # }, + # body=modified, + # ) + response = HttpParser.response( + self.server.queue.call_args_list[0][0][0].tobytes()) + self.assertEqual(response.body, modified) + + @pytest.mark.asyncio + @pytest.mark.parametrize("setUp", ['test_man_in_the_middle_plugin'], indirect=True) + async def test_man_in_the_middle_plugin(self) -> None: + await self.protocol_handler._run_once() + + self.assertEqual(self.mock_sign_csr.call_count, 1) + self.assertEqual(self.mock_gen_csr.call_count, 1) + self.assertEqual(self.mock_gen_public_key.call_count, 1) - def test_man_in_the_middle_plugin(self) -> None: + self.mock_server_conn.assert_called_once_with('uni.corn', 443) + self.server.connect.assert_called() + self.assertEqual( + self.protocol_handler.work.connection, + self.client_ssl_connection, + ) + self.assertEqual(self.server.connection, self.server_ssl_connection) + self._conn.send.assert_called_with( + HttpProxyPlugin.PROXY_TUNNEL_ESTABLISHED_RESPONSE_PKT, + ) + self.assertFalse(self.protocol_handler.work.has_buffer()) + # request = build_http_request( b'GET', b'/', headers={ @@ -213,11 +227,11 @@ def test_man_in_the_middle_plugin(self) -> None: self.client_ssl_connection.recv.return_value = request # Client read - self.protocol_handler._run_once() + await self.protocol_handler._run_once() self.server.queue.assert_called_once_with(request) # Server write - self.protocol_handler._run_once() + await self.protocol_handler._run_once() self.server.flush.assert_called_once() # Server read @@ -226,7 +240,7 @@ def test_man_in_the_middle_plugin(self) -> None: httpStatusCodes.OK, reason=b'OK', body=b'Original Response From Upstream', ) - self.protocol_handler._run_once() + await self.protocol_handler._run_once() self.assertEqual( self.protocol_handler.work.buffer[0].tobytes(), build_http_response( From 416d8524d20ad39b04cfca98be1409408c4d1d82 Mon Sep 17 00:00:00 2001 From: Abhinav Singh Date: Sun, 21 Nov 2021 20:01:39 +0530 Subject: [PATCH 11/54] mypy and flake8 --- .../exceptions/test_http_proxy_auth_failed.py | 12 ++--- tests/http/test_http_proxy.py | 8 +-- .../http/test_http_proxy_tls_interception.py | 4 +- tests/http/test_protocol_handler.py | 20 +++---- tests/http/test_web_server.py | 20 +++---- tests/plugin/test_http_proxy_plugins.py | 52 ++++++++++++++----- ...ttp_proxy_plugins_with_tls_interception.py | 20 ++++--- 7 files changed, 84 insertions(+), 52 deletions(-) diff --git a/tests/http/exceptions/test_http_proxy_auth_failed.py b/tests/http/exceptions/test_http_proxy_auth_failed.py index 6fa9606c0b..8a1bfc8d48 100644 --- a/tests/http/exceptions/test_http_proxy_auth_failed.py +++ b/tests/http/exceptions/test_http_proxy_auth_failed.py @@ -24,8 +24,8 @@ class TestHttpProxyAuthFailed(Assertions): - @pytest.fixture(autouse=True) - def setUp(self, mocker: MockerFixture) -> None: + @pytest.fixture(autouse=True) # type: ignore[misc] + def _setUp(self, mocker: MockerFixture) -> None: self.mock_fromfd = mocker.patch('socket.fromfd') self.mock_selector = mocker.patch('selectors.DefaultSelector') self.mock_server_conn = mocker.patch( @@ -43,7 +43,7 @@ def setUp(self, mocker: MockerFixture) -> None: ) self.protocol_handler.initialize() - @pytest.mark.asyncio + @pytest.mark.asyncio # type: ignore[misc] async def test_proxy_auth_fails_without_cred(self) -> None: self._conn.recv.return_value = build_http_request( b'GET', b'http://upstream.host/not-found.html', @@ -70,7 +70,7 @@ async def test_proxy_auth_fails_without_cred(self) -> None: ) self._conn.send.assert_not_called() - @pytest.mark.asyncio + @pytest.mark.asyncio # type: ignore[misc] async def test_proxy_auth_fails_with_invalid_cred(self) -> None: self._conn.recv.return_value = build_http_request( b'GET', b'http://upstream.host/not-found.html', @@ -98,7 +98,7 @@ async def test_proxy_auth_fails_with_invalid_cred(self) -> None: ) self._conn.send.assert_not_called() - @pytest.mark.asyncio + @pytest.mark.asyncio # type: ignore[misc] async def test_proxy_auth_works_with_valid_cred(self) -> None: self._conn.recv.return_value = build_http_request( b'GET', b'http://upstream.host/not-found.html', @@ -122,7 +122,7 @@ async def test_proxy_auth_works_with_valid_cred(self) -> None: self.mock_server_conn.assert_called_once() self.assertEqual(self.protocol_handler.work.has_buffer(), False) - @pytest.mark.asyncio + @pytest.mark.asyncio # type: ignore[misc] async def test_proxy_auth_works_with_mixed_case_basic_string(self) -> None: self._conn.recv.return_value = build_http_request( b'GET', b'http://upstream.host/not-found.html', diff --git a/tests/http/test_http_proxy.py b/tests/http/test_http_proxy.py index 058159b4bd..d691f855cd 100644 --- a/tests/http/test_http_proxy.py +++ b/tests/http/test_http_proxy.py @@ -24,8 +24,8 @@ class TestHttpProxyPlugin: - @pytest.fixture(autouse=True) - def setUp(self, mocker: MockerFixture) -> None: + @pytest.fixture(autouse=True) # type: ignore[misc] + def _setUp(self, mocker: MockerFixture) -> None: self.mock_server_conn = mocker.patch( 'proxy.http.proxy.server.TcpServerConnection') self.mock_selector = mocker.patch('selectors.DefaultSelector') @@ -49,7 +49,7 @@ def setUp(self, mocker: MockerFixture) -> None: def test_proxy_plugin_initialized(self) -> None: self.plugin.assert_called() - @pytest.mark.asyncio + @pytest.mark.asyncio # type: ignore[misc] async def test_proxy_plugin_on_and_before_upstream_connection(self) -> None: self.plugin.return_value.write_to_descriptors.return_value = False self.plugin.return_value.read_from_descriptors.return_value = False @@ -82,7 +82,7 @@ async def test_proxy_plugin_on_and_before_upstream_connection(self) -> None: self.plugin.return_value.before_upstream_connection.assert_called() self.plugin.return_value.handle_client_request.assert_called() - @pytest.mark.asyncio + @pytest.mark.asyncio # type: ignore[misc] async def test_proxy_plugin_before_upstream_connection_can_teardown(self) -> None: self.plugin.return_value.write_to_descriptors.return_value = False self.plugin.return_value.read_from_descriptors.return_value = False diff --git a/tests/http/test_http_proxy_tls_interception.py b/tests/http/test_http_proxy_tls_interception.py index e22acb494c..bfe2e6454c 100644 --- a/tests/http/test_http_proxy_tls_interception.py +++ b/tests/http/test_http_proxy_tls_interception.py @@ -30,7 +30,7 @@ class TestHttpProxyTlsInterception(Assertions): - @pytest.mark.asyncio + @pytest.mark.asyncio # type: ignore[misc] async def test_e2e(self, mocker: MockerFixture) -> None: host, port = uuid.uuid4().hex, 443 netloc = '{0}:{1}'.format(host, port) @@ -109,7 +109,7 @@ def mock_connection() -> Any: self._conn.recv.return_value = connect_request # Prepare mocked HttpProtocolHandlerPlugin - async def asyncReturnBool(val: bool): + async def asyncReturnBool(val: bool) -> bool: return val self.plugin.return_value.get_descriptors.return_value = ([], []) self.plugin.return_value.write_to_descriptors.return_value = asyncReturnBool(False) diff --git a/tests/http/test_protocol_handler.py b/tests/http/test_protocol_handler.py index 1f5f73fc63..6cb838c15b 100644 --- a/tests/http/test_protocol_handler.py +++ b/tests/http/test_protocol_handler.py @@ -47,8 +47,8 @@ def mock_selector_for_client_read(self: Any) -> None: class TestHttpProtocolHandlerWithoutServerMock(Assertions): - @pytest.fixture(autouse=True) - def setUp(self, mocker: MockerFixture) -> None: + @pytest.fixture(autouse=True) # type: ignore[misc] + def _setUp(self, mocker: MockerFixture) -> None: self.mock_fromfd = mocker.patch('socket.fromfd') self.mock_selector = mocker.patch('selectors.DefaultSelector') @@ -69,7 +69,7 @@ def setUp(self, mocker: MockerFixture) -> None: ) self.protocol_handler.initialize() - @pytest.mark.asyncio + @pytest.mark.asyncio # type: ignore[misc] async def test_proxy_connection_failed(self) -> None: mock_selector_for_client_read(self) self._conn.recv.return_value = CRLF.join([ @@ -83,7 +83,7 @@ async def test_proxy_connection_failed(self) -> None: ProxyConnectionFailed.RESPONSE_PKT, ) - @pytest.mark.asyncio + @pytest.mark.asyncio # type: ignore[misc] async def test_proxy_authentication_failed(self) -> None: self._conn = self.mock_fromfd.return_value mock_selector_for_client_read(self) @@ -114,8 +114,8 @@ async def test_proxy_authentication_failed(self) -> None: class TestHttpProtocolHandler(Assertions): - @pytest.fixture(autouse=True) - def setUp(self, mocker: MockerFixture) -> None: + @pytest.fixture(autouse=True) # type: ignore[misc] + def _setUp(self, mocker: MockerFixture) -> None: self.mock_fromfd = mocker.patch('socket.fromfd') self.mock_selector = mocker.patch('selectors.DefaultSelector') self.mock_server_connection = mocker.patch( @@ -138,7 +138,7 @@ def setUp(self, mocker: MockerFixture) -> None: ) self.protocol_handler.initialize() - @pytest.mark.asyncio + @pytest.mark.asyncio # type: ignore[misc] async def test_http_get(self) -> None: server = self.mock_server_connection.return_value server.connect.return_value = True @@ -203,7 +203,7 @@ async def assert_tunnel_response( assert parser.code is not None self.assertEqual(int(parser.code), 200) - @pytest.mark.asyncio + @pytest.mark.asyncio # type: ignore[misc] async def test_http_tunnel(self) -> None: server = self.mock_server_connection.return_value server.connect.return_value = True @@ -277,7 +277,7 @@ def has_buffer() -> bool: self.assertEqual(server.queue.call_count, 1) server.flush.assert_called_once() - @pytest.mark.asyncio + @pytest.mark.asyncio # type: ignore[misc] async def test_authenticated_proxy_http_get(self) -> None: self._conn = self.mock_fromfd.return_value mock_selector_for_client_read(self) @@ -326,7 +326,7 @@ async def test_authenticated_proxy_http_get(self) -> None: ]) await self.assert_data_queued(server) - @pytest.mark.asyncio + @pytest.mark.asyncio # type: ignore[misc] async def test_authenticated_proxy_http_tunnel(self) -> None: server = self.mock_server_connection.return_value server.connect.return_value = True diff --git a/tests/http/test_web_server.py b/tests/http/test_web_server.py index d81d67e3c1..259763b09a 100644 --- a/tests/http/test_web_server.py +++ b/tests/http/test_web_server.py @@ -138,8 +138,8 @@ class TestWebServerPluginWithPacFilePlugin(Assertions): @pytest.fixture(autouse=True, params=[ PAC_FILE_PATH, PAC_FILE_CONTENT, - ]) - def setUp(self, request: Any, mocker: MockerFixture) -> None: + ]) # type: ignore[misc] + def _setUp(self, request: Any, mocker: MockerFixture) -> None: self.mock_fromfd = mocker.patch('socket.fromfd') self.mock_selector = mocker.patch('selectors.DefaultSelector') self.fileno = 10 @@ -169,7 +169,7 @@ def setUp(self, request: Any, mocker: MockerFixture) -> None: ]) mock_selector_for_client_read(self) - @pytest.mark.asyncio + @pytest.mark.asyncio # type: ignore[misc] async def test_pac_file_served_from_disk(self) -> None: await self.protocol_handler._run_once() self.assertEqual( @@ -188,8 +188,8 @@ async def test_pac_file_served_from_disk(self) -> None: class TestStaticWebServerPlugin(Assertions): - @pytest.fixture(autouse=True) - def setUp(self, mocker: MockerFixture) -> None: + @pytest.fixture(autouse=True) # type: ignore[misc] + def _setUp(self, mocker: MockerFixture) -> None: self.mock_fromfd = mocker.patch('socket.fromfd') self.mock_selector = mocker.patch('selectors.DefaultSelector') self.fileno = 10 @@ -219,7 +219,7 @@ def setUp(self, mocker: MockerFixture) -> None: ) self.protocol_handler.initialize() - @pytest.mark.asyncio + @pytest.mark.asyncio # type: ignore[misc] async def test_static_web_server_serves(self) -> None: self._conn.recv.return_value = build_http_request( b'GET', b'/index.html', @@ -267,7 +267,7 @@ async def test_static_web_server_serves(self) -> None: self.assertEqual(gzip.decompress(response.body), self.html_file_content) - @pytest.mark.asyncio + @pytest.mark.asyncio # type: ignore[misc] async def test_static_web_server_serves_404(self) -> None: self._conn.recv.return_value = build_http_request( b'GET', b'/not-found.html', @@ -306,8 +306,8 @@ async def test_static_web_server_serves_404(self) -> None: class TestWebServerPlugin(Assertions): - @pytest.fixture(autouse=True) - def setUp(self, mocker: MockerFixture) -> None: + @pytest.fixture(autouse=True) # type: ignore[misc] + def _setUp(self, mocker: MockerFixture) -> None: self.mock_fromfd = mocker.patch('socket.fromfd') self.mock_selector = mocker.patch('selectors.DefaultSelector') self.fileno = 10 @@ -324,7 +324,7 @@ def setUp(self, mocker: MockerFixture) -> None: ) self.protocol_handler.initialize() - @pytest.mark.asyncio + @pytest.mark.asyncio # type: ignore[misc] async def test_default_web_server_returns_404(self) -> None: self._conn = self.mock_fromfd.return_value self.mock_selector.return_value.select.return_value = [ diff --git a/tests/plugin/test_http_proxy_plugins.py b/tests/plugin/test_http_proxy_plugins.py index abf90da918..e9a0835b47 100644 --- a/tests/plugin/test_http_proxy_plugins.py +++ b/tests/plugin/test_http_proxy_plugins.py @@ -34,8 +34,8 @@ class TestHttpProxyPluginExamples(Assertions): - @pytest.fixture(autouse=True) - def setUp(self, request: Any, mocker: MockerFixture) -> None: + @pytest.fixture(autouse=True) # type: ignore[misc] + def _setUp(self, request: Any, mocker: MockerFixture) -> None: self.mock_fromfd = mocker.patch('socket.fromfd') self.mock_selector = mocker.patch('selectors.DefaultSelector') self.mock_server_conn = mocker.patch( @@ -68,8 +68,12 @@ def setUp(self, request: Any, mocker: MockerFixture) -> None: ) self.protocol_handler.initialize() - @pytest.mark.asyncio - @pytest.mark.parametrize("setUp", ['test_modify_post_data_plugin'], indirect=True) + @pytest.mark.asyncio # type: ignore[misc] + @pytest.mark.parametrize( + "_setUp", + ('test_modify_post_data_plugin'), + indirect=True, + ) # type: ignore[misc] async def test_modify_post_data_plugin(self) -> None: original = b'{"key": "value"}' modified = b'{"key": "modified"}' @@ -111,8 +115,12 @@ async def test_modify_post_data_plugin(self) -> None: ), ) - @pytest.mark.asyncio - @pytest.mark.parametrize("setUp", ['test_proposed_rest_api_plugin'], indirect=True) + @pytest.mark.asyncio # type: ignore[misc] + @pytest.mark.parametrize( + "_setUp", + ('test_proposed_rest_api_plugin'), + indirect=True, + ) # type: ignore[misc] async def test_proposed_rest_api_plugin(self) -> None: path = b'/v1/users/' self._conn.recv.return_value = build_http_request( @@ -150,8 +158,12 @@ async def test_proposed_rest_api_plugin(self) -> None: ), ) - @pytest.mark.asyncio - @pytest.mark.parametrize("setUp", ['test_redirect_to_custom_server_plugin'], indirect=True) + @pytest.mark.asyncio # type: ignore[misc] + @pytest.mark.parametrize( + "_setUp", + ('test_redirect_to_custom_server_plugin'), + indirect=True, + ) # type: ignore[misc] async def test_redirect_to_custom_server_plugin(self) -> None: request = build_http_request( b'GET', b'http://example.org/get', @@ -187,8 +199,12 @@ async def test_redirect_to_custom_server_plugin(self) -> None: ), ) - @pytest.mark.asyncio - @pytest.mark.parametrize("setUp", ['test_filter_by_upstream_host_plugin'], indirect=True) + @pytest.mark.asyncio # type: ignore[misc] + @pytest.mark.parametrize( + "_setUp", + ('test_filter_by_upstream_host_plugin'), + indirect=True, + ) # type: ignore[misc] async def test_filter_by_upstream_host_plugin(self) -> None: request = build_http_request( b'GET', b'http://facebook.com/', @@ -222,8 +238,12 @@ async def test_filter_by_upstream_host_plugin(self) -> None: ), ) - @pytest.mark.asyncio - @pytest.mark.parametrize("setUp", ['test_man_in_the_middle_plugin'], indirect=True) + @pytest.mark.asyncio # type: ignore[misc] + @pytest.mark.parametrize( + "_setUp", + ('test_man_in_the_middle_plugin'), + indirect=True, + ) # type: ignore[misc] async def test_man_in_the_middle_plugin(self) -> None: request = build_http_request( b'GET', b'http://super.secure/', @@ -309,8 +329,12 @@ def closed() -> bool: ), ) - @pytest.mark.asyncio - @pytest.mark.parametrize("setUp", ['test_filter_by_url_regex_plugin'], indirect=True) + @pytest.mark.asyncio # type: ignore[misc] + @pytest.mark.parametrize( + "_setUp", + ('test_filter_by_url_regex_plugin'), + indirect=True, + ) # type: ignore[misc] async def test_filter_by_url_regex_plugin(self) -> None: request = build_http_request( b'GET', b'http://www.facebook.com/tr/', diff --git a/tests/plugin/test_http_proxy_plugins_with_tls_interception.py b/tests/plugin/test_http_proxy_plugins_with_tls_interception.py index 2aa18d7720..45994bdc43 100644 --- a/tests/plugin/test_http_proxy_plugins_with_tls_interception.py +++ b/tests/plugin/test_http_proxy_plugins_with_tls_interception.py @@ -31,8 +31,8 @@ class TestHttpProxyPluginExamplesWithTlsInterception(Assertions): - @pytest.fixture(autouse=True) - def setUp(self, request: Any, mocker: MockerFixture) -> None: + @pytest.fixture(autouse=True) # type: ignore[misc] + def _setUp(self, request: Any, mocker: MockerFixture) -> None: self.mock_fromfd = mocker.patch('socket.fromfd') self.mock_selector = mocker.patch('selectors.DefaultSelector') self.mock_sign_csr = mocker.patch('proxy.http.proxy.server.sign_csr') @@ -149,8 +149,12 @@ def send(raw: bytes) -> int: httpMethods.CONNECT, b'uni.corn:443', ) - @pytest.mark.asyncio - @pytest.mark.parametrize("setUp", ['test_modify_post_data_plugin'], indirect=True) + @pytest.mark.asyncio # type: ignore[misc] + @pytest.mark.parametrize( + "_setUp", + ('test_modify_post_data_plugin'), + indirect=True, + ) # type: ignore[misc] async def test_modify_post_data_plugin(self) -> None: await self.protocol_handler._run_once() @@ -197,8 +201,12 @@ async def test_modify_post_data_plugin(self) -> None: self.server.queue.call_args_list[0][0][0].tobytes()) self.assertEqual(response.body, modified) - @pytest.mark.asyncio - @pytest.mark.parametrize("setUp", ['test_man_in_the_middle_plugin'], indirect=True) + @pytest.mark.asyncio # type: ignore[misc] + @pytest.mark.parametrize( + "_setUp", + ('test_man_in_the_middle_plugin'), + indirect=True, + ) # type: ignore[misc] async def test_man_in_the_middle_plugin(self) -> None: await self.protocol_handler._run_once() From c71f91f51e1916276cf679411b8179a1840910ca Mon Sep 17 00:00:00 2001 From: Abhinav Singh Date: Sun, 21 Nov 2021 20:04:31 +0530 Subject: [PATCH 12/54] spelldoc --- docs/conf.py | 1 + proxy/core/acceptor/local.py | 4 ++++ 2 files changed, 5 insertions(+) diff --git a/docs/conf.py b/docs/conf.py index ec96d7c5dd..0cc5afdea2 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -274,4 +274,5 @@ (_py_class_role, 'UUID'), (_py_class_role, 'WebsocketFrame'), (_py_class_role, 'Url'), + (_py_class_role, '_asyncio.Task'), ] diff --git a/proxy/core/acceptor/local.py b/proxy/core/acceptor/local.py index 3017053c53..6aa3308bca 100644 --- a/proxy/core/acceptor/local.py +++ b/proxy/core/acceptor/local.py @@ -7,6 +7,10 @@ :copyright: (c) 2013-present by Abhinav Singh and contributors. :license: BSD, see LICENSE for more details. + + .. spelling:: + + acceptor """ import os import queue From 94f9ef8189542043e498b587c7602bcbf728a203 Mon Sep 17 00:00:00 2001 From: Abhinav Singh Date: Sun, 21 Nov 2021 20:08:50 +0530 Subject: [PATCH 13/54] `check.py` and trailing comma --- Makefile | 4 +--- README.md | 14 +++++++---- check.py | 24 ++++++++++--------- proxy/common/_compat.py | 18 ++++++++++---- proxy/common/flag.py | 2 +- proxy/core/acceptor/acceptor.py | 3 ++- proxy/core/acceptor/threadless.py | 8 ++++--- proxy/core/base/tcp_server.py | 6 +++-- .../exceptions/test_http_proxy_auth_failed.py | 3 ++- tests/http/test_http_proxy.py | 6 +++-- .../http/test_http_proxy_tls_interception.py | 6 +++-- tests/http/test_protocol_handler.py | 3 ++- tests/http/test_web_server.py | 22 ++++++++++------- tests/integration/test_integration.py | 13 +++++++++- tests/plugin/test_http_proxy_plugins.py | 9 ++++--- ...ttp_proxy_plugins_with_tls_interception.py | 13 ++++++---- 16 files changed, 102 insertions(+), 52 deletions(-) diff --git a/Makefile b/Makefile index 4532c00ed8..306cc95f13 100644 --- a/Makefile +++ b/Makefile @@ -27,9 +27,7 @@ endif .PHONY: container container-run container-release .PHONY: devtools dashboard dashboard-clean -all: - echo $(IMAGE_TAG) - # lib-test +all: lib-test https-certificates: # Generate server key diff --git a/README.md b/README.md index f4ff04ddea..779c27a68f 100644 --- a/README.md +++ b/README.md @@ -2011,8 +2011,8 @@ Simply run the following command from repo root to start benchmark ```console ❯ proxy -h usage: -m [-h] [--enable-events] [--enable-conn-pool] [--threadless] - [--threaded] [--num-workers NUM_WORKERS] [--backlog BACKLOG] - [--hostname HOSTNAME] [--port PORT] + [--threaded] [--num-workers NUM_WORKERS] [--local-executor] + [--backlog BACKLOG] [--hostname HOSTNAME] [--port PORT] [--unix-socket-path UNIX_SOCKET_PATH] [--num-acceptors NUM_ACCEPTORS] [--version] [--log-level LOG_LEVEL] [--log-file LOG_FILE] [--log-format LOG_FORMAT] @@ -2038,7 +2038,7 @@ usage: -m [-h] [--enable-events] [--enable-conn-pool] [--threadless] [--filtered-url-regex-config FILTERED_URL_REGEX_CONFIG] [--cloudflare-dns-mode CLOUDFLARE_DNS_MODE] -proxy.py v2.3.2 +proxy.py v2.3.2.dev184+gc71f91f.d20211121 options: -h, --help show this help message and exit @@ -2055,6 +2055,13 @@ options: handle each client connection. --num-workers NUM_WORKERS Defaults to number of CPU cores. + --local-executor Default: False. Disabled by default. When enabled + acceptors will make use of local (same process) + executor instead of distributing load across remote + (other process) executors. Enable this option to + achieve CPU affinity between acceptors and executors, + instead of using underlying OS kernel scheduling + algorithm. --backlog BACKLOG Default: 100. Maximum number of pending connections to proxy server --hostname HOSTNAME Default: ::1. Server IP address. @@ -2181,7 +2188,6 @@ options: Proxy.py not working? Report at: https://github.com/abhinavsingh/proxy.py/issues/new ``` - # Changelog ## v2.x diff --git a/check.py b/check.py index 71bb51cf63..771e3dbccb 100644 --- a/check.py +++ b/check.py @@ -53,16 +53,18 @@ sys.exit(1) # Update README.md flags section to match current library --help output -# lib_help = subprocess.check_output( -# ['python', '-m', 'proxy', '-h'] -# ) -# with open('README.md', 'rb+') as f: -# c = f.read() -# pre_flags, post_flags = c.split(b'# Flags') -# help_text, post_changelog = post_flags.split(b'# Changelog') -# f.seek(0) -# f.write(pre_flags + b'# Flags\n\n```console\n\xe2\x9d\xaf proxy -h\n' + lib_help + b'```' + -# b'\n# Changelog' + post_changelog) +lib_help = subprocess.check_output( + ['python', '-m', 'proxy', '-h'], +) +with open('README.md', 'rb+') as f: + c = f.read() + pre_flags, post_flags = c.split(b'# Flags') + help_text, post_changelog = post_flags.split(b'# Changelog') + f.seek(0) + f.write( + pre_flags + b'# Flags\n\n```console\n\xe2\x9d\xaf proxy -h\n' + lib_help + b'```' + + b'\n# Changelog' + post_changelog, + ) # Version is also hardcoded in README.md flags section readme_version_cmd = 'cat README.md | grep "proxy.py v" | tail -2 | head -1 | cut -d " " -f 2 | cut -c2-' @@ -72,7 +74,7 @@ # Doesn't contain "v" prefix readme_version = readme_version_output.decode().strip() -if readme_version != lib_version[1:].split('-')[0]: +if readme_version != lib_version: print( 'Version mismatch found. {0} (readme) vs {1} (lib).'.format( readme_version, lib_version, diff --git a/proxy/common/_compat.py b/proxy/common/_compat.py index eaaddfb9d5..41bf67b804 100644 --- a/proxy/common/_compat.py +++ b/proxy/common/_compat.py @@ -1,9 +1,19 @@ -"""Compatibility code for using Proxy.py across various versions of Python. +# -*- coding: utf-8 -*- +""" + proxy.py + ~~~~~~~~ + ⚡⚡⚡ Fast, Lightweight, Pluggable, TLS interception capable proxy server focused on + Network monitoring, controls & Application development, testing, debugging. + + :copyright: (c) 2013-present by Abhinav Singh and contributors. + :license: BSD, see LICENSE for more details. + + Compatibility code for using Proxy.py across various versions of Python. -.. spelling:: + .. spelling:: - compat - py + compat + py """ import platform diff --git a/proxy/common/flag.py b/proxy/common/flag.py index 14348e663a..d4b7da733d 100644 --- a/proxy/common/flag.py +++ b/proxy/common/flag.py @@ -342,7 +342,7 @@ def initialize( opts.get( 'local_executor', args.local_executor, - ) + ), ) args.proxy_py_data_dir = DEFAULT_DATA_DIRECTORY_PATH diff --git a/proxy/core/acceptor/acceptor.py b/proxy/core/acceptor/acceptor.py index c11bf05ff2..e5aba2f35a 100644 --- a/proxy/core/acceptor/acceptor.py +++ b/proxy/core/acceptor/acceptor.py @@ -135,7 +135,8 @@ def run_once(self) -> None: if self.sock is not None: conn, addr = self.sock.accept() logging.debug( - 'Accepting new work#{0}'.format(conn.fileno())) + 'Accepting new work#{0}'.format(conn.fileno()), + ) if self.flags.local_executor: assert self._local self._local.evq.put((conn, addr)) diff --git a/proxy/core/acceptor/threadless.py b/proxy/core/acceptor/threadless.py index 5f54676069..a3e2e9f48e 100644 --- a/proxy/core/acceptor/threadless.py +++ b/proxy/core/acceptor/threadless.py @@ -216,9 +216,11 @@ async def run_once(self) -> None: if num_works > 0: logger.debug('Executing {0} works'.format(num_works)) await self.wait_for_tasks(self.unfinished) - logger.debug('Done executing works, {0} pending'.format( - len(self.unfinished), - )) + logger.debug( + 'Done executing works, {0} pending'.format( + len(self.unfinished), + ), + ) # Remove and shutdown inactive workers self.cleanup_inactive() diff --git a/proxy/core/base/tcp_server.py b/proxy/core/base/tcp_server.py index b11644d683..2aa0f0574d 100644 --- a/proxy/core/base/tcp_server.py +++ b/proxy/core/base/tcp_server.py @@ -53,12 +53,14 @@ def __init__(self, *args: Any, **kwargs: Any) -> None: if self.flags.unix_socket_path: # pragma: no cover logger.debug( 'Work#{0} accepted from {1}'.format( - self.work.connection.fileno(), self.work.address), + self.work.connection.fileno(), self.work.address, + ), ) else: logger.debug( 'Work#{0} accepted from {1}'.format( - self.work.connection.fileno(), self.work.address), + self.work.connection.fileno(), self.work.address, + ), ) @abstractmethod diff --git a/tests/http/exceptions/test_http_proxy_auth_failed.py b/tests/http/exceptions/test_http_proxy_auth_failed.py index 8a1bfc8d48..8092b1b317 100644 --- a/tests/http/exceptions/test_http_proxy_auth_failed.py +++ b/tests/http/exceptions/test_http_proxy_auth_failed.py @@ -29,7 +29,8 @@ def _setUp(self, mocker: MockerFixture) -> None: self.mock_fromfd = mocker.patch('socket.fromfd') self.mock_selector = mocker.patch('selectors.DefaultSelector') self.mock_server_conn = mocker.patch( - 'proxy.http.proxy.server.TcpServerConnection') + 'proxy.http.proxy.server.TcpServerConnection', + ) self.fileno = 10 self._addr = ('127.0.0.1', 54382) diff --git a/tests/http/test_http_proxy.py b/tests/http/test_http_proxy.py index d691f855cd..2696e0df4e 100644 --- a/tests/http/test_http_proxy.py +++ b/tests/http/test_http_proxy.py @@ -27,7 +27,8 @@ class TestHttpProxyPlugin: @pytest.fixture(autouse=True) # type: ignore[misc] def _setUp(self, mocker: MockerFixture) -> None: self.mock_server_conn = mocker.patch( - 'proxy.http.proxy.server.TcpServerConnection') + 'proxy.http.proxy.server.TcpServerConnection', + ) self.mock_selector = mocker.patch('selectors.DefaultSelector') self.mock_fromfd = mocker.patch('socket.fromfd') @@ -78,7 +79,8 @@ async def test_proxy_plugin_on_and_before_upstream_connection(self) -> None: await self.protocol_handler._run_once() self.mock_server_conn.assert_called_with( - 'upstream.host', DEFAULT_HTTP_PORT) + 'upstream.host', DEFAULT_HTTP_PORT, + ) self.plugin.return_value.before_upstream_connection.assert_called() self.plugin.return_value.handle_client_request.assert_called() diff --git a/tests/http/test_http_proxy_tls_interception.py b/tests/http/test_http_proxy_tls_interception.py index bfe2e6454c..28d7047111 100644 --- a/tests/http/test_http_proxy_tls_interception.py +++ b/tests/http/test_http_proxy_tls_interception.py @@ -40,9 +40,11 @@ async def test_e2e(self, mocker: MockerFixture) -> None: self.mock_sign_csr = mocker.patch('proxy.http.proxy.server.sign_csr') self.mock_gen_csr = mocker.patch('proxy.http.proxy.server.gen_csr') self.mock_gen_public_key = mocker.patch( - 'proxy.http.proxy.server.gen_public_key') + 'proxy.http.proxy.server.gen_public_key', + ) self.mock_server_conn = mocker.patch( - 'proxy.http.proxy.server.TcpServerConnection') + 'proxy.http.proxy.server.TcpServerConnection', + ) self.mock_ssl_context = mocker.patch('ssl.create_default_context') self.mock_ssl_wrap = mocker.patch('ssl.wrap_socket') diff --git a/tests/http/test_protocol_handler.py b/tests/http/test_protocol_handler.py index 6cb838c15b..dbf8a030df 100644 --- a/tests/http/test_protocol_handler.py +++ b/tests/http/test_protocol_handler.py @@ -119,7 +119,8 @@ def _setUp(self, mocker: MockerFixture) -> None: self.mock_fromfd = mocker.patch('socket.fromfd') self.mock_selector = mocker.patch('selectors.DefaultSelector') self.mock_server_connection = mocker.patch( - 'proxy.http.proxy.server.TcpServerConnection') + 'proxy.http.proxy.server.TcpServerConnection', + ) self.fileno = 10 self._addr = ('127.0.0.1', 54382) diff --git a/tests/http/test_web_server.py b/tests/http/test_web_server.py index 259763b09a..e58e351f4b 100644 --- a/tests/http/test_web_server.py +++ b/tests/http/test_web_server.py @@ -135,10 +135,12 @@ def mock_selector_for_client_read(self: Any) -> None: class TestWebServerPluginWithPacFilePlugin(Assertions): - @pytest.fixture(autouse=True, params=[ - PAC_FILE_PATH, - PAC_FILE_CONTENT, - ]) # type: ignore[misc] + @pytest.fixture( + autouse=True, params=[ + PAC_FILE_PATH, + PAC_FILE_CONTENT, + ], + ) # type: ignore[misc] def _setUp(self, request: Any, mocker: MockerFixture) -> None: self.mock_fromfd = mocker.patch('socket.fromfd') self.mock_selector = mocker.patch('selectors.DefaultSelector') @@ -152,7 +154,8 @@ def _setUp(self, request: Any, mocker: MockerFixture) -> None: else: self.expected_response = PAC_FILE_CONTENT self.flags = FlagParser.initialize( - pac_file=self.pac_file, threaded=True) + pac_file=self.pac_file, threaded=True, + ) self.flags.plugins = Plugins.load([ bytes_(PLUGIN_HTTP_PROXY), bytes_(PLUGIN_WEB_SERVER), @@ -198,7 +201,8 @@ def _setUp(self, mocker: MockerFixture) -> None: # Setup a static directory self.static_server_dir = os.path.join(tempfile.gettempdir(), 'static') self.index_file_path = os.path.join( - self.static_server_dir, 'index.html') + self.static_server_dir, 'index.html', + ) self.html_file_content = b'''

Proxy.py Testing

''' os.makedirs(self.static_server_dir, exist_ok=True) with open(self.index_file_path, 'wb') as f: @@ -264,8 +268,10 @@ async def test_static_web_server_serves(self) -> None: bytes_(len(encoded_html_file_content)), ) assert response.body - self.assertEqual(gzip.decompress(response.body), - self.html_file_content) + self.assertEqual( + gzip.decompress(response.body), + self.html_file_content, + ) @pytest.mark.asyncio # type: ignore[misc] async def test_static_web_server_serves_404(self) -> None: diff --git a/tests/integration/test_integration.py b/tests/integration/test_integration.py index c1195b9871..1756ecd3eb 100644 --- a/tests/integration/test_integration.py +++ b/tests/integration/test_integration.py @@ -1,4 +1,15 @@ -"""Test the simplest proxy use scenario for smoke.""" +# -*- coding: utf-8 -*- +""" + proxy.py + ~~~~~~~~ + ⚡⚡⚡ Fast, Lightweight, Pluggable, TLS interception capable proxy server focused on + Network monitoring, controls & Application development, testing, debugging. + + :copyright: (c) 2013-present by Abhinav Singh and contributors. + :license: BSD, see LICENSE for more details. + + Test the simplest proxy use scenario for smoke. +""" from pathlib import Path from subprocess import check_output, Popen from typing import Generator diff --git a/tests/plugin/test_http_proxy_plugins.py b/tests/plugin/test_http_proxy_plugins.py index e9a0835b47..b3e3ae7efb 100644 --- a/tests/plugin/test_http_proxy_plugins.py +++ b/tests/plugin/test_http_proxy_plugins.py @@ -39,7 +39,8 @@ def _setUp(self, request: Any, mocker: MockerFixture) -> None: self.mock_fromfd = mocker.patch('socket.fromfd') self.mock_selector = mocker.patch('selectors.DefaultSelector') self.mock_server_conn = mocker.patch( - 'proxy.http.proxy.server.TcpServerConnection') + 'proxy.http.proxy.server.TcpServerConnection', + ) self.fileno = 10 self._addr = ('127.0.0.1', 54382) @@ -101,7 +102,8 @@ async def test_modify_post_data_plugin(self) -> None: await self.protocol_handler._run_once() self.mock_server_conn.assert_called_with( - 'httpbin.org', DEFAULT_HTTP_PORT) + 'httpbin.org', DEFAULT_HTTP_PORT, + ) self.mock_server_conn.return_value.queue.assert_called_with( build_http_request( b'POST', b'/post', @@ -298,7 +300,8 @@ def closed() -> bool: # Client read await self.protocol_handler._run_once() self.mock_server_conn.assert_called_with( - 'super.secure', DEFAULT_HTTP_PORT) + 'super.secure', DEFAULT_HTTP_PORT, + ) server.connect.assert_called_once() queued_request = \ build_http_request( diff --git a/tests/plugin/test_http_proxy_plugins_with_tls_interception.py b/tests/plugin/test_http_proxy_plugins_with_tls_interception.py index 45994bdc43..64c9ed5789 100644 --- a/tests/plugin/test_http_proxy_plugins_with_tls_interception.py +++ b/tests/plugin/test_http_proxy_plugins_with_tls_interception.py @@ -38,9 +38,11 @@ def _setUp(self, request: Any, mocker: MockerFixture) -> None: self.mock_sign_csr = mocker.patch('proxy.http.proxy.server.sign_csr') self.mock_gen_csr = mocker.patch('proxy.http.proxy.server.gen_csr') self.mock_gen_public_key = mocker.patch( - 'proxy.http.proxy.server.gen_public_key') + 'proxy.http.proxy.server.gen_public_key', + ) self.mock_server_conn = mocker.patch( - 'proxy.http.proxy.server.TcpServerConnection') + 'proxy.http.proxy.server.TcpServerConnection', + ) self.mock_ssl_context = mocker.patch('ssl.create_default_context') self.mock_ssl_wrap = mocker.patch('ssl.wrap_socket') @@ -151,7 +153,7 @@ def send(raw: bytes) -> int: @pytest.mark.asyncio # type: ignore[misc] @pytest.mark.parametrize( - "_setUp", + '_setUp', ('test_modify_post_data_plugin'), indirect=True, ) # type: ignore[misc] @@ -198,12 +200,13 @@ async def test_modify_post_data_plugin(self) -> None: # body=modified, # ) response = HttpParser.response( - self.server.queue.call_args_list[0][0][0].tobytes()) + self.server.queue.call_args_list[0][0][0].tobytes(), + ) self.assertEqual(response.body, modified) @pytest.mark.asyncio # type: ignore[misc] @pytest.mark.parametrize( - "_setUp", + '_setUp', ('test_man_in_the_middle_plugin'), indirect=True, ) # type: ignore[misc] From ae089f444b5ea79b91e6d2b9f220d8cb9ca20e59 Mon Sep 17 00:00:00 2001 From: Abhinav Singh Date: Sun, 21 Nov 2021 20:11:26 +0530 Subject: [PATCH 14/54] Rename to `_assertions.py` --- README.md | 3 ++- tests/{assertions.py => _assertions.py} | 0 tests/http/exceptions/test_http_proxy_auth_failed.py | 2 +- tests/http/test_http_proxy_tls_interception.py | 2 +- tests/http/test_protocol_handler.py | 2 +- tests/http/test_web_server.py | 2 +- tests/plugin/test_http_proxy_plugins.py | 2 +- tests/plugin/test_http_proxy_plugins_with_tls_interception.py | 2 +- 8 files changed, 8 insertions(+), 7 deletions(-) rename tests/{assertions.py => _assertions.py} (100%) diff --git a/README.md b/README.md index 779c27a68f..68c3979a51 100644 --- a/README.md +++ b/README.md @@ -2038,7 +2038,7 @@ usage: -m [-h] [--enable-events] [--enable-conn-pool] [--threadless] [--filtered-url-regex-config FILTERED_URL_REGEX_CONFIG] [--cloudflare-dns-mode CLOUDFLARE_DNS_MODE] -proxy.py v2.3.2.dev184+gc71f91f.d20211121 +proxy.py v2.3.2.dev185+g94f9ef8.d20211121 options: -h, --help show this help message and exit @@ -2210,3 +2210,4 @@ https://github.com/abhinavsingh/proxy.py/issues/new - Single threaded server. For detailed changelog refer to release PRs or commit history. + history. diff --git a/tests/assertions.py b/tests/_assertions.py similarity index 100% rename from tests/assertions.py rename to tests/_assertions.py diff --git a/tests/http/exceptions/test_http_proxy_auth_failed.py b/tests/http/exceptions/test_http_proxy_auth_failed.py index 8092b1b317..eaee01dd61 100644 --- a/tests/http/exceptions/test_http_proxy_auth_failed.py +++ b/tests/http/exceptions/test_http_proxy_auth_failed.py @@ -19,7 +19,7 @@ from proxy.core.connection import TcpClientConnection from proxy.common.utils import build_http_request -from ...assertions import Assertions +from ..._assertions import Assertions class TestHttpProxyAuthFailed(Assertions): diff --git a/tests/http/test_http_proxy_tls_interception.py b/tests/http/test_http_proxy_tls_interception.py index 28d7047111..54f20540b2 100644 --- a/tests/http/test_http_proxy_tls_interception.py +++ b/tests/http/test_http_proxy_tls_interception.py @@ -25,7 +25,7 @@ from proxy.common.utils import build_http_request, bytes_ from proxy.common.flag import FlagParser -from ..assertions import Assertions +from .._assertions import Assertions class TestHttpProxyTlsInterception(Assertions): diff --git a/tests/http/test_protocol_handler.py b/tests/http/test_protocol_handler.py index dbf8a030df..d70e236745 100644 --- a/tests/http/test_protocol_handler.py +++ b/tests/http/test_protocol_handler.py @@ -28,7 +28,7 @@ from proxy.http.exception import ProxyAuthenticationFailed, ProxyConnectionFailed from proxy.http import HttpProtocolHandler -from ..assertions import Assertions +from .._assertions import Assertions def mock_selector_for_client_read(self: Any) -> None: diff --git a/tests/http/test_web_server.py b/tests/http/test_web_server.py index e58e351f4b..3bb4b4bced 100644 --- a/tests/http/test_web_server.py +++ b/tests/http/test_web_server.py @@ -27,7 +27,7 @@ from proxy.common.constants import CRLF, PLUGIN_HTTP_PROXY, PLUGIN_PAC_FILE, PLUGIN_WEB_SERVER, PROXY_PY_DIR from proxy.http.server import HttpWebServerPlugin -from ..assertions import Assertions +from .._assertions import Assertions PAC_FILE_PATH = os.path.join( diff --git a/tests/plugin/test_http_proxy_plugins.py b/tests/plugin/test_http_proxy_plugins.py index b3e3ae7efb..07cdb95600 100644 --- a/tests/plugin/test_http_proxy_plugins.py +++ b/tests/plugin/test_http_proxy_plugins.py @@ -29,7 +29,7 @@ from .utils import get_plugin_by_test_name -from ..assertions import Assertions +from .._assertions import Assertions class TestHttpProxyPluginExamples(Assertions): diff --git a/tests/plugin/test_http_proxy_plugins_with_tls_interception.py b/tests/plugin/test_http_proxy_plugins_with_tls_interception.py index 64c9ed5789..074eb514a0 100644 --- a/tests/plugin/test_http_proxy_plugins_with_tls_interception.py +++ b/tests/plugin/test_http_proxy_plugins_with_tls_interception.py @@ -26,7 +26,7 @@ from .utils import get_plugin_by_test_name -from ..assertions import Assertions +from .._assertions import Assertions class TestHttpProxyPluginExamplesWithTlsInterception(Assertions): From 2a727c807dfbef68fa21927e1a6a5cc27edb9e98 Mon Sep 17 00:00:00 2001 From: Abhinav Singh Date: Sun, 21 Nov 2021 20:20:35 +0530 Subject: [PATCH 15/54] Add missing `pytest-mock` and `pytest-asyncio` deps --- requirements-testing.txt | 2 ++ 1 file changed, 2 insertions(+) diff --git a/requirements-testing.txt b/requirements-testing.txt index 6beae2c4a6..6eae0fb128 100644 --- a/requirements-testing.txt +++ b/requirements-testing.txt @@ -5,6 +5,8 @@ flake8==4.0.1 pytest==6.2.5 pytest-cov==3.0.0 pytest-xdist == 2.4.0 +pytest-mock==3.6.1 +pytest-asyncio==0.16.0 autopep8==1.6.0 mypy==0.910 py-spy==0.3.10 From c03c817879579a0df7e4be202790eded3cd36325 Mon Sep 17 00:00:00 2001 From: Abhinav Singh Date: Sun, 21 Nov 2021 20:41:22 +0530 Subject: [PATCH 16/54] Add `pytest-mock` to `pylint` deps --- README.md | 2 +- tests/{_assertions.py => assertions_test.py} | 0 tests/http/exceptions/test_http_proxy_auth_failed.py | 2 +- tests/http/test_http_proxy_tls_interception.py | 2 +- tests/http/test_protocol_handler.py | 2 +- tests/http/test_web_server.py | 2 +- tests/plugin/test_http_proxy_plugins.py | 2 +- tests/plugin/test_http_proxy_plugins_with_tls_interception.py | 2 +- tox.ini | 1 + 9 files changed, 8 insertions(+), 7 deletions(-) rename tests/{_assertions.py => assertions_test.py} (100%) diff --git a/README.md b/README.md index 68c3979a51..2671133bcc 100644 --- a/README.md +++ b/README.md @@ -2038,7 +2038,7 @@ usage: -m [-h] [--enable-events] [--enable-conn-pool] [--threadless] [--filtered-url-regex-config FILTERED_URL_REGEX_CONFIG] [--cloudflare-dns-mode CLOUDFLARE_DNS_MODE] -proxy.py v2.3.2.dev185+g94f9ef8.d20211121 +proxy.py v2.3.2.dev189+g2a727c8.d20211121 options: -h, --help show this help message and exit diff --git a/tests/_assertions.py b/tests/assertions_test.py similarity index 100% rename from tests/_assertions.py rename to tests/assertions_test.py diff --git a/tests/http/exceptions/test_http_proxy_auth_failed.py b/tests/http/exceptions/test_http_proxy_auth_failed.py index eaee01dd61..193b6b3cb2 100644 --- a/tests/http/exceptions/test_http_proxy_auth_failed.py +++ b/tests/http/exceptions/test_http_proxy_auth_failed.py @@ -19,7 +19,7 @@ from proxy.core.connection import TcpClientConnection from proxy.common.utils import build_http_request -from ..._assertions import Assertions +from ...assertions_test import Assertions class TestHttpProxyAuthFailed(Assertions): diff --git a/tests/http/test_http_proxy_tls_interception.py b/tests/http/test_http_proxy_tls_interception.py index 54f20540b2..3637821be5 100644 --- a/tests/http/test_http_proxy_tls_interception.py +++ b/tests/http/test_http_proxy_tls_interception.py @@ -25,7 +25,7 @@ from proxy.common.utils import build_http_request, bytes_ from proxy.common.flag import FlagParser -from .._assertions import Assertions +from ..assertions_test import Assertions class TestHttpProxyTlsInterception(Assertions): diff --git a/tests/http/test_protocol_handler.py b/tests/http/test_protocol_handler.py index d70e236745..df22d1bc6f 100644 --- a/tests/http/test_protocol_handler.py +++ b/tests/http/test_protocol_handler.py @@ -28,7 +28,7 @@ from proxy.http.exception import ProxyAuthenticationFailed, ProxyConnectionFailed from proxy.http import HttpProtocolHandler -from .._assertions import Assertions +from ..assertions_test import Assertions def mock_selector_for_client_read(self: Any) -> None: diff --git a/tests/http/test_web_server.py b/tests/http/test_web_server.py index 3bb4b4bced..75ffdd246a 100644 --- a/tests/http/test_web_server.py +++ b/tests/http/test_web_server.py @@ -27,7 +27,7 @@ from proxy.common.constants import CRLF, PLUGIN_HTTP_PROXY, PLUGIN_PAC_FILE, PLUGIN_WEB_SERVER, PROXY_PY_DIR from proxy.http.server import HttpWebServerPlugin -from .._assertions import Assertions +from ..assertions_test import Assertions PAC_FILE_PATH = os.path.join( diff --git a/tests/plugin/test_http_proxy_plugins.py b/tests/plugin/test_http_proxy_plugins.py index 07cdb95600..9021220d8b 100644 --- a/tests/plugin/test_http_proxy_plugins.py +++ b/tests/plugin/test_http_proxy_plugins.py @@ -29,7 +29,7 @@ from .utils import get_plugin_by_test_name -from .._assertions import Assertions +from ..assertions_test import Assertions class TestHttpProxyPluginExamples(Assertions): diff --git a/tests/plugin/test_http_proxy_plugins_with_tls_interception.py b/tests/plugin/test_http_proxy_plugins_with_tls_interception.py index 074eb514a0..2ba654da3e 100644 --- a/tests/plugin/test_http_proxy_plugins_with_tls_interception.py +++ b/tests/plugin/test_http_proxy_plugins_with_tls_interception.py @@ -26,7 +26,7 @@ from .utils import get_plugin_by_test_name -from .._assertions import Assertions +from ..assertions_test import Assertions class TestHttpProxyPluginExamplesWithTlsInterception(Assertions): diff --git a/tox.ini b/tox.ini index 5e315c8241..daa25f9bb3 100644 --- a/tox.ini +++ b/tox.ini @@ -228,6 +228,7 @@ deps = pre-commit pylint >= 2.5.3 pylint-pytest < 1.1.0 + pytest-mock >= 3.6.1 -r docs/requirements.in -r requirements-tunnel.txt isolated_build = true From 78b59aafe0fb91029c0504aea92ffc6f477a23d5 Mon Sep 17 00:00:00 2001 From: Abhinav Singh Date: Sun, 21 Nov 2021 21:12:31 +0530 Subject: [PATCH 17/54] Correct use of `parameterize` and add `PT007` to flake8 ignores --- .flake8 | 1 + README.md | 2 +- proxy/core/connection/connection.py | 1 + .../http/exceptions/test_http_proxy_auth_failed.py | 2 +- tests/http/test_http_proxy_tls_interception.py | 2 +- tests/http/test_protocol_handler.py | 2 +- tests/http/test_web_server.py | 2 +- tests/plugin/test_http_proxy_plugins.py | 14 +++++++------- ...est_http_proxy_plugins_with_tls_interception.py | 11 ++++++----- tests/{assertions_test.py => test_assertions.py} | 0 10 files changed, 20 insertions(+), 17 deletions(-) rename tests/{assertions_test.py => test_assertions.py} (100%) diff --git a/.flake8 b/.flake8 index 1bffdb4268..cf75431f6f 100644 --- a/.flake8 +++ b/.flake8 @@ -65,6 +65,7 @@ extend-ignore = N802 # FIXME: func name should be lowercase N816 # FIXME: mixed case var name P101 # FIXME: format string with unindexed params + PT007 # FIXME: wrong values type in @pytest.mark.parametrize, expected tuple PT009 # FIXME: pytest encourages use of `assert` PT018 # FIXME: use multiple `assert`s instead of one complex Q000 # FIXME: inconsistent double quotes use when single ones are needed diff --git a/README.md b/README.md index 2671133bcc..934c2893c2 100644 --- a/README.md +++ b/README.md @@ -2038,7 +2038,7 @@ usage: -m [-h] [--enable-events] [--enable-conn-pool] [--threadless] [--filtered-url-regex-config FILTERED_URL_REGEX_CONFIG] [--cloudflare-dns-mode CLOUDFLARE_DNS_MODE] -proxy.py v2.3.2.dev189+g2a727c8.d20211121 +proxy.py v2.3.2.dev190+gc03c817.d20211121 options: -h, --help show this help message and exit diff --git a/proxy/core/connection/connection.py b/proxy/core/connection/connection.py index 65b01769bd..0c44604d04 100644 --- a/proxy/core/connection/connection.py +++ b/proxy/core/connection/connection.py @@ -77,6 +77,7 @@ def has_buffer(self) -> bool: return len(self.buffer) > 0 def queue(self, mv: memoryview) -> None: + assert isinstance(mv, memoryview) self.buffer.append(mv) def flush(self) -> int: diff --git a/tests/http/exceptions/test_http_proxy_auth_failed.py b/tests/http/exceptions/test_http_proxy_auth_failed.py index 193b6b3cb2..2aacc62661 100644 --- a/tests/http/exceptions/test_http_proxy_auth_failed.py +++ b/tests/http/exceptions/test_http_proxy_auth_failed.py @@ -19,7 +19,7 @@ from proxy.core.connection import TcpClientConnection from proxy.common.utils import build_http_request -from ...assertions_test import Assertions +from ...test_assertions import Assertions class TestHttpProxyAuthFailed(Assertions): diff --git a/tests/http/test_http_proxy_tls_interception.py b/tests/http/test_http_proxy_tls_interception.py index 3637821be5..16ad094466 100644 --- a/tests/http/test_http_proxy_tls_interception.py +++ b/tests/http/test_http_proxy_tls_interception.py @@ -25,7 +25,7 @@ from proxy.common.utils import build_http_request, bytes_ from proxy.common.flag import FlagParser -from ..assertions_test import Assertions +from ..test_assertions import Assertions class TestHttpProxyTlsInterception(Assertions): diff --git a/tests/http/test_protocol_handler.py b/tests/http/test_protocol_handler.py index df22d1bc6f..c043734ffc 100644 --- a/tests/http/test_protocol_handler.py +++ b/tests/http/test_protocol_handler.py @@ -28,7 +28,7 @@ from proxy.http.exception import ProxyAuthenticationFailed, ProxyConnectionFailed from proxy.http import HttpProtocolHandler -from ..assertions_test import Assertions +from ..test_assertions import Assertions def mock_selector_for_client_read(self: Any) -> None: diff --git a/tests/http/test_web_server.py b/tests/http/test_web_server.py index 75ffdd246a..46818c5870 100644 --- a/tests/http/test_web_server.py +++ b/tests/http/test_web_server.py @@ -27,7 +27,7 @@ from proxy.common.constants import CRLF, PLUGIN_HTTP_PROXY, PLUGIN_PAC_FILE, PLUGIN_WEB_SERVER, PROXY_PY_DIR from proxy.http.server import HttpWebServerPlugin -from ..assertions_test import Assertions +from ..test_assertions import Assertions PAC_FILE_PATH = os.path.join( diff --git a/tests/plugin/test_http_proxy_plugins.py b/tests/plugin/test_http_proxy_plugins.py index 9021220d8b..bdc8094372 100644 --- a/tests/plugin/test_http_proxy_plugins.py +++ b/tests/plugin/test_http_proxy_plugins.py @@ -29,7 +29,7 @@ from .utils import get_plugin_by_test_name -from ..assertions_test import Assertions +from ..test_assertions import Assertions class TestHttpProxyPluginExamples(Assertions): @@ -72,7 +72,7 @@ def _setUp(self, request: Any, mocker: MockerFixture) -> None: @pytest.mark.asyncio # type: ignore[misc] @pytest.mark.parametrize( "_setUp", - ('test_modify_post_data_plugin'), + [('test_modify_post_data_plugin')], indirect=True, ) # type: ignore[misc] async def test_modify_post_data_plugin(self) -> None: @@ -120,7 +120,7 @@ async def test_modify_post_data_plugin(self) -> None: @pytest.mark.asyncio # type: ignore[misc] @pytest.mark.parametrize( "_setUp", - ('test_proposed_rest_api_plugin'), + [('test_proposed_rest_api_plugin')], indirect=True, ) # type: ignore[misc] async def test_proposed_rest_api_plugin(self) -> None: @@ -163,7 +163,7 @@ async def test_proposed_rest_api_plugin(self) -> None: @pytest.mark.asyncio # type: ignore[misc] @pytest.mark.parametrize( "_setUp", - ('test_redirect_to_custom_server_plugin'), + [('test_redirect_to_custom_server_plugin')], indirect=True, ) # type: ignore[misc] async def test_redirect_to_custom_server_plugin(self) -> None: @@ -204,7 +204,7 @@ async def test_redirect_to_custom_server_plugin(self) -> None: @pytest.mark.asyncio # type: ignore[misc] @pytest.mark.parametrize( "_setUp", - ('test_filter_by_upstream_host_plugin'), + [('test_filter_by_upstream_host_plugin')], indirect=True, ) # type: ignore[misc] async def test_filter_by_upstream_host_plugin(self) -> None: @@ -243,7 +243,7 @@ async def test_filter_by_upstream_host_plugin(self) -> None: @pytest.mark.asyncio # type: ignore[misc] @pytest.mark.parametrize( "_setUp", - ('test_man_in_the_middle_plugin'), + [('test_man_in_the_middle_plugin')], indirect=True, ) # type: ignore[misc] async def test_man_in_the_middle_plugin(self) -> None: @@ -335,7 +335,7 @@ def closed() -> bool: @pytest.mark.asyncio # type: ignore[misc] @pytest.mark.parametrize( "_setUp", - ('test_filter_by_url_regex_plugin'), + [('test_filter_by_url_regex_plugin')], indirect=True, ) # type: ignore[misc] async def test_filter_by_url_regex_plugin(self) -> None: diff --git a/tests/plugin/test_http_proxy_plugins_with_tls_interception.py b/tests/plugin/test_http_proxy_plugins_with_tls_interception.py index 2ba654da3e..2527ad39a0 100644 --- a/tests/plugin/test_http_proxy_plugins_with_tls_interception.py +++ b/tests/plugin/test_http_proxy_plugins_with_tls_interception.py @@ -26,7 +26,7 @@ from .utils import get_plugin_by_test_name -from ..assertions_test import Assertions +from ..test_assertions import Assertions class TestHttpProxyPluginExamplesWithTlsInterception(Assertions): @@ -154,7 +154,7 @@ def send(raw: bytes) -> int: @pytest.mark.asyncio # type: ignore[misc] @pytest.mark.parametrize( '_setUp', - ('test_modify_post_data_plugin'), + [('test_modify_post_data_plugin')], indirect=True, ) # type: ignore[misc] async def test_modify_post_data_plugin(self) -> None: @@ -207,7 +207,7 @@ async def test_modify_post_data_plugin(self) -> None: @pytest.mark.asyncio # type: ignore[misc] @pytest.mark.parametrize( '_setUp', - ('test_man_in_the_middle_plugin'), + [('test_man_in_the_middle_plugin')], indirect=True, ) # type: ignore[misc] async def test_man_in_the_middle_plugin(self) -> None: @@ -246,11 +246,12 @@ async def test_man_in_the_middle_plugin(self) -> None: self.server.flush.assert_called_once() # Server read - self.server.recv.return_value = \ + self.server.recv.return_value = memoryview( build_http_response( httpStatusCodes.OK, reason=b'OK', body=b'Original Response From Upstream', - ) + ), + ) await self.protocol_handler._run_once() self.assertEqual( self.protocol_handler.work.buffer[0].tobytes(), diff --git a/tests/assertions_test.py b/tests/test_assertions.py similarity index 100% rename from tests/assertions_test.py rename to tests/test_assertions.py From 55d3a29a60e851e29813af03fadef213f539c894 Mon Sep 17 00:00:00 2001 From: Abhinav Singh Date: Sun, 21 Nov 2021 21:33:40 +0530 Subject: [PATCH 18/54] Fix mypy hints broken for `< Python3.9` --- README.md | 3 ++- check.py | 2 +- proxy/common/_compat.py | 4 ++-- proxy/core/acceptor/threadless.py | 12 ++++++++---- proxy/core/connection/connection.py | 1 - 5 files changed, 13 insertions(+), 9 deletions(-) diff --git a/README.md b/README.md index 934c2893c2..89ea9b3f16 100644 --- a/README.md +++ b/README.md @@ -2038,7 +2038,7 @@ usage: -m [-h] [--enable-events] [--enable-conn-pool] [--threadless] [--filtered-url-regex-config FILTERED_URL_REGEX_CONFIG] [--cloudflare-dns-mode CLOUDFLARE_DNS_MODE] -proxy.py v2.3.2.dev190+gc03c817.d20211121 +proxy.py v2.3.2.dev191+g78b59aa.d20211121 options: -h, --help show this help message and exit @@ -2188,6 +2188,7 @@ options: Proxy.py not working? Report at: https://github.com/abhinavsingh/proxy.py/issues/new ``` + # Changelog ## v2.x diff --git a/check.py b/check.py index 771e3dbccb..038758a30f 100644 --- a/check.py +++ b/check.py @@ -63,7 +63,7 @@ f.seek(0) f.write( pre_flags + b'# Flags\n\n```console\n\xe2\x9d\xaf proxy -h\n' + lib_help + b'```' + - b'\n# Changelog' + post_changelog, + b'\n\n# Changelog' + post_changelog, ) # Version is also hardcoded in README.md flags section diff --git a/proxy/common/_compat.py b/proxy/common/_compat.py index 41bf67b804..c3ec75e411 100644 --- a/proxy/common/_compat.py +++ b/proxy/common/_compat.py @@ -12,8 +12,8 @@ .. spelling:: - compat - py + compat + py """ import platform diff --git a/proxy/core/acceptor/threadless.py b/proxy/core/acceptor/threadless.py index a3e2e9f48e..1740a3a948 100644 --- a/proxy/core/acceptor/threadless.py +++ b/proxy/core/acceptor/threadless.py @@ -75,7 +75,11 @@ def __init__( self.works: Dict[int, Work] = {} self.selector: Optional[selectors.DefaultSelector] = None self.loop: Optional[asyncio.AbstractEventLoop] = None - self.unfinished: Set[asyncio.Task[bool]] = set() + # If we remove single quotes for typing hint below, + # runtime exceptions will occur for < Python 3.9. + # + # Ref https://github.com/abhinavsingh/proxy.py/runs/4279055360?check_suite_focus=true + self.unfinished: Set['asyncio.Task[bool]'] = set() @contextlib.asynccontextmanager async def selected_events(self) -> AsyncGenerator[ @@ -112,7 +116,7 @@ async def selected_events(self) -> AsyncGenerator[ # TODO: Use correct future typing annotations async def wait_for_tasks( self, - tasks: Set[asyncio.Task[bool]], + tasks: Set['asyncio.Task[bool]'], ) -> None: assert len(tasks) > 0 finished, self.unfinished = await asyncio.wait( @@ -174,8 +178,8 @@ def cleanup(self, work_id: int) -> None: def _prepare_tasks( self, work_by_ids: Dict[int, Tuple[Readables, Writables]], - ) -> Set[asyncio.Task[bool]]: - tasks: Set[asyncio.Task[bool]] = set() + ) -> Set['asyncio.Task[bool]']: + tasks: Set['asyncio.Task[bool]'] = set() assert self.loop for work_id in work_by_ids: readables, writables = work_by_ids[work_id] diff --git a/proxy/core/connection/connection.py b/proxy/core/connection/connection.py index 0c44604d04..65b01769bd 100644 --- a/proxy/core/connection/connection.py +++ b/proxy/core/connection/connection.py @@ -77,7 +77,6 @@ def has_buffer(self) -> bool: return len(self.buffer) > 0 def queue(self, mv: memoryview) -> None: - assert isinstance(mv, memoryview) self.buffer.append(mv) def flush(self) -> int: From 87ff9215a1b6d6c4f52f7894c62456f60051f679 Mon Sep 17 00:00:00 2001 From: Abhinav Singh Date: Sun, 21 Nov 2021 22:30:57 +0530 Subject: [PATCH 19/54] Remove usage of `asynccontextmanager` which is not available for all Python versions that `proxy.py` supports --- proxy/common/backports.py | 81 +++++++++++++++++++++++++++++++ proxy/common/utils.py | 69 -------------------------- proxy/core/acceptor/threadless.py | 21 ++++---- proxy/http/handler.py | 25 +++++----- proxy/http/server/web.py | 2 +- 5 files changed, 106 insertions(+), 92 deletions(-) create mode 100644 proxy/common/backports.py diff --git a/proxy/common/backports.py b/proxy/common/backports.py new file mode 100644 index 0000000000..347c06a462 --- /dev/null +++ b/proxy/common/backports.py @@ -0,0 +1,81 @@ +# -*- coding: utf-8 -*- +""" + proxy.py + ~~~~~~~~ + ⚡⚡⚡ Fast, Lightweight, Pluggable, TLS interception capable proxy server focused on + Network monitoring, controls & Application development, testing, debugging. + + :copyright: (c) 2013-present by Abhinav Singh and contributors. + :license: BSD, see LICENSE for more details. +""" +import time + +from typing import Any + + +class cached_property: + """Decorator for read-only properties evaluated only once within TTL period. + It can be used to create a cached property like this:: + + import random + + # the class containing the property must be a new-style class + class MyClass: + # create property whose value is cached for ten minutes + @cached_property(ttl=600) + def randint(self): + # will only be evaluated every 10 min. at maximum. + return random.randint(0, 100) + + The value is cached in the '_cached_properties' attribute of the object instance that + has the property getter method wrapped by this decorator. The '_cached_properties' + attribute value is a dictionary which has a key for every property of the + object which is wrapped by this decorator. Each entry in the cache is + created only when the property is accessed for the first time and is a + two-element tuple with the last computed property value and the last time + it was updated in seconds since the epoch. + + The default time-to-live (TTL) is 300 seconds (5 minutes). Set the TTL to + zero for the cached value to never expire. + + To expire a cached property value manually just do:: + del instance._cached_properties[] + + Adopted from https://wiki.python.org/moin/PythonDecoratorLibrary#Cached_Properties + © 2011 Christopher Arndt, MIT License. + + NOTE: We need this function only because Python in-built are only available + for 3.8+. Hence, we must get rid of this function once proxy.py no longer + support version older than 3.8. + + .. spelling:: + + getter + Arndt + """ + + def __init__(self, ttl: float = 300.0): + self.ttl = ttl + + def __call__(self, fget: Any, doc: Any = None) -> 'cached_property': + self.fget = fget + self.__doc__ = doc or fget.__doc__ + self.__name__ = fget.__name__ + self.__module__ = fget.__module__ + return self + + def __get__(self, inst: Any, owner: Any) -> Any: + now = time.time() + try: + value, last_update = inst._cached_properties[self.__name__] + if self.ttl > 0 and now - last_update > self.ttl: # noqa: WPS333 + raise AttributeError + except (KeyError, AttributeError): + value = self.fget(inst) + try: + cache = inst._cached_properties + except AttributeError: + cache, inst._cached_properties = {}, {} + finally: + cache[self.__name__] = (value, now) + return value diff --git a/proxy/common/utils.py b/proxy/common/utils.py index f289fc49b2..b25b680522 100644 --- a/proxy/common/utils.py +++ b/proxy/common/utils.py @@ -17,7 +17,6 @@ """ import sys import ssl -import time import socket import logging import functools @@ -291,71 +290,3 @@ def set_open_file_limit(soft_limit: int) -> None: logger.debug( 'Open file soft limit set to %d', soft_limit, ) - - -class cached_property: - """Decorator for read-only properties evaluated only once within TTL period. - It can be used to create a cached property like this:: - - import random - - # the class containing the property must be a new-style class - class MyClass: - # create property whose value is cached for ten minutes - @cached_property(ttl=600) - def randint(self): - # will only be evaluated every 10 min. at maximum. - return random.randint(0, 100) - - The value is cached in the '_cached_properties' attribute of the object instance that - has the property getter method wrapped by this decorator. The '_cached_properties' - attribute value is a dictionary which has a key for every property of the - object which is wrapped by this decorator. Each entry in the cache is - created only when the property is accessed for the first time and is a - two-element tuple with the last computed property value and the last time - it was updated in seconds since the epoch. - - The default time-to-live (TTL) is 300 seconds (5 minutes). Set the TTL to - zero for the cached value to never expire. - - To expire a cached property value manually just do:: - del instance._cached_properties[] - - Adopted from https://wiki.python.org/moin/PythonDecoratorLibrary#Cached_Properties - © 2011 Christopher Arndt, MIT License. - - NOTE: We need this function only because Python in-built are only available - for 3.8+. Hence, we must get rid of this function once proxy.py no longer - support version older than 3.8. - - .. spelling:: - - getter - Arndt - """ - - def __init__(self, ttl: float = 300.0): - self.ttl = ttl - - def __call__(self, fget: Any, doc: Any = None) -> 'cached_property': - self.fget = fget - self.__doc__ = doc or fget.__doc__ - self.__name__ = fget.__name__ - self.__module__ = fget.__module__ - return self - - def __get__(self, inst: Any, owner: Any) -> Any: - now = time.time() - try: - value, last_update = inst._cached_properties[self.__name__] - if self.ttl > 0 and now - last_update > self.ttl: # noqa: WPS333 - raise AttributeError - except (KeyError, AttributeError): - value = self.fget(inst) - try: - cache = inst._cached_properties - except AttributeError: - cache, inst._cached_properties = {}, {} - finally: - cache[self.__name__] = (value, now) - return value diff --git a/proxy/core/acceptor/threadless.py b/proxy/core/acceptor/threadless.py index 1740a3a948..932488ffae 100644 --- a/proxy/core/acceptor/threadless.py +++ b/proxy/core/acceptor/threadless.py @@ -18,12 +18,11 @@ import asyncio import argparse import selectors -import contextlib import multiprocessing from multiprocessing import connection from multiprocessing.reduction import recv_handle -from typing import Dict, Optional, Tuple, List, AsyncGenerator, Set +from typing import Dict, Optional, Tuple, List, Set from .work import Work @@ -81,9 +80,9 @@ def __init__( # Ref https://github.com/abhinavsingh/proxy.py/runs/4279055360?check_suite_focus=true self.unfinished: Set['asyncio.Task[bool]'] = set() - @contextlib.asynccontextmanager - async def selected_events(self) -> AsyncGenerator[ - Dict[int, Tuple[Readables, Writables]], None, + async def _selected_events(self) -> Tuple[ + Dict[socket.socket, int], + Dict[int, Tuple[Readables, Writables]] ]: assert self.selector is not None events: Dict[socket.socket, int] = {} @@ -109,9 +108,7 @@ async def selected_events(self) -> AsyncGenerator[ work_by_ids[key.data][0].append(key.fileobj) if mask & selectors.EVENT_WRITE: work_by_ids[key.data][1].append(key.fileobj) - yield work_by_ids - for fd in events: - self.selector.unregister(fd) + return (events, work_by_ids) # TODO: Use correct future typing annotations async def wait_for_tasks( @@ -195,11 +192,17 @@ def _prepare_tasks( async def run_once(self) -> None: assert self.loop is not None - async with self.selected_events() as work_by_ids: + events, work_by_ids = await self._selected_events() + try: if len(work_by_ids) == 0: # Remove and shutdown inactive connections self.cleanup_inactive() return + finally: + assert self.selector + for fd in events: + self.selector.unregister(fd) + # Note that selector from now on is idle, # until all the logic below completes. # diff --git a/proxy/http/handler.py b/proxy/http/handler.py index 9d59f26220..e83c740b91 100644 --- a/proxy/http/handler.py +++ b/proxy/http/handler.py @@ -19,9 +19,8 @@ import asyncio import logging import selectors -import contextlib -from typing import Tuple, List, Union, Optional, AsyncGenerator, Dict, Any +from typing import Tuple, List, Union, Optional, Dict, Any from .plugin import HttpProtocolHandlerPlugin from .parser import HttpParser, httpParserStates, httpParserTypes @@ -65,11 +64,6 @@ 'data sent or received by the client.', ) -SelectedEventsGeneratorType = AsyncGenerator[ - Tuple[Readables, Writables], - None, -] - class HttpProtocolHandler(BaseTcpServerHandler): """HTTP, HTTPS, HTTP2, WebSockets protocol handler. @@ -354,8 +348,10 @@ def _optionally_wrap_socket( conn = wrap_socket(conn, self.flags.keyfile, self.flags.certfile) return conn - @contextlib.asynccontextmanager - async def _selected_events(self) -> SelectedEventsGeneratorType: + # FIXME: Returning events is only necessary because we cannot use async context manager + # for < Python 3.8. As a reason, this method is no longer a context manager and caller + # is responsible for unregistering the descriptors. + async def _selected_events(self) -> Tuple[Dict[socket.socket, int], Readables, Writables]: assert self.selector events = await self.get_events() for fd in events: @@ -368,16 +364,19 @@ async def _selected_events(self) -> SelectedEventsGeneratorType: readables.append(key.fileobj) if mask & selectors.EVENT_WRITE: writables.append(key.fileobj) - yield (readables, writables) - for fd in events: - self.selector.unregister(fd) + return (events, readables, writables) async def _run_once(self) -> bool: - async with self._selected_events() as (readables, writables): + events, readables, writables = await self._selected_events() + try: teardown = await self.handle_events(readables, writables) if teardown: return True return False + finally: + assert self.selector + for fd in events: + self.selector.unregister(fd) def _flush(self) -> None: assert self.selector diff --git a/proxy/http/server/web.py b/proxy/http/server/web.py index 194e77e591..8570515c6c 100644 --- a/proxy/http/server/web.py +++ b/proxy/http/server/web.py @@ -25,7 +25,7 @@ from ...common.constants import DEFAULT_ENABLE_STATIC_SERVER, DEFAULT_ENABLE_WEB_SERVER from ...common.constants import DEFAULT_MIN_COMPRESSION_LIMIT, DEFAULT_WEB_ACCESS_LOG_FORMAT from ...common.utils import bytes_, text_, build_http_response, build_websocket_handshake_response -from ...common.utils import cached_property +from ...common.backports import cached_property from ...common.types import Readables, Writables from ...common.flag import flags From 9436e99f8b16baddac99859b03cb339719afd837 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Sun, 21 Nov 2021 17:01:43 +0000 Subject: [PATCH 20/54] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- proxy/core/acceptor/threadless.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/proxy/core/acceptor/threadless.py b/proxy/core/acceptor/threadless.py index 932488ffae..bb77aada85 100644 --- a/proxy/core/acceptor/threadless.py +++ b/proxy/core/acceptor/threadless.py @@ -82,7 +82,7 @@ def __init__( async def _selected_events(self) -> Tuple[ Dict[socket.socket, int], - Dict[int, Tuple[Readables, Writables]] + Dict[int, Tuple[Readables, Writables]], ]: assert self.selector is not None events: Dict[socket.socket, int] = {} From 39eebc49914aa8b557866cf547eb9156f6526377 Mon Sep 17 00:00:00 2001 From: Abhinav Singh Date: Sun, 21 Nov 2021 22:48:47 +0530 Subject: [PATCH 21/54] Fix for pre-python-3.9 versions --- README.md | 3 ++- proxy/common/backports.py | 1 + proxy/core/acceptor/local.py | 5 +++-- proxy/core/acceptor/threadless.py | 2 +- 4 files changed, 7 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 89ea9b3f16..788dfb6a72 100644 --- a/README.md +++ b/README.md @@ -2038,7 +2038,7 @@ usage: -m [-h] [--enable-events] [--enable-conn-pool] [--threadless] [--filtered-url-regex-config FILTERED_URL_REGEX_CONFIG] [--cloudflare-dns-mode CLOUDFLARE_DNS_MODE] -proxy.py v2.3.2.dev191+g78b59aa.d20211121 +proxy.py v2.3.2.dev193+g87ff921.d20211121 options: -h, --help show this help message and exit @@ -2212,3 +2212,4 @@ https://github.com/abhinavsingh/proxy.py/issues/new For detailed changelog refer to release PRs or commit history. history. + history. diff --git a/proxy/common/backports.py b/proxy/common/backports.py index 347c06a462..4734287073 100644 --- a/proxy/common/backports.py +++ b/proxy/common/backports.py @@ -50,6 +50,7 @@ def randint(self): .. spelling:: + backports getter Arndt """ diff --git a/proxy/core/acceptor/local.py b/proxy/core/acceptor/local.py index 6aa3308bca..b3fba6ade2 100644 --- a/proxy/core/acceptor/local.py +++ b/proxy/core/acceptor/local.py @@ -22,7 +22,8 @@ import multiprocessing.synchronize from multiprocessing import connection -from typing import Optional, Tuple, List, Dict, Any +from typing import Optional, Tuple, List, Dict +from typing import Any # noqa: W0611 from ...common.utils import is_threadless @@ -42,7 +43,7 @@ def __init__( idd: int, flags: argparse.Namespace, sock: socket.socket, - evq: queue.Queue[Any], + evq: 'queue.Queue[Any]', executor_queues: List[connection.Connection], executor_pids: List[int], executor_locks: List[multiprocessing.synchronize.Lock], diff --git a/proxy/core/acceptor/threadless.py b/proxy/core/acceptor/threadless.py index 932488ffae..bb77aada85 100644 --- a/proxy/core/acceptor/threadless.py +++ b/proxy/core/acceptor/threadless.py @@ -82,7 +82,7 @@ def __init__( async def _selected_events(self) -> Tuple[ Dict[socket.socket, int], - Dict[int, Tuple[Readables, Writables]] + Dict[int, Tuple[Readables, Writables]], ]: assert self.selector is not None events: Dict[socket.socket, int] = {} From d89160c30ed661c4c574429a955a4e7d4b875d7d Mon Sep 17 00:00:00 2001 From: Abhinav Singh Date: Sun, 21 Nov 2021 23:16:36 +0530 Subject: [PATCH 22/54] `AsyncTask` apis `set_name` and `get_name` are not available on all supported versions --- proxy/core/acceptor/local.py | 2 +- proxy/core/acceptor/threadless.py | 10 ++++++---- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/proxy/core/acceptor/local.py b/proxy/core/acceptor/local.py index b3fba6ade2..60e2b1d785 100644 --- a/proxy/core/acceptor/local.py +++ b/proxy/core/acceptor/local.py @@ -23,7 +23,7 @@ from multiprocessing import connection from typing import Optional, Tuple, List, Dict -from typing import Any # noqa: W0611 +from typing import Any # noqa: W0611 pylint: disable=unused-import from ...common.utils import is_threadless diff --git a/proxy/core/acceptor/threadless.py b/proxy/core/acceptor/threadless.py index bb77aada85..36f6cf4e08 100644 --- a/proxy/core/acceptor/threadless.py +++ b/proxy/core/acceptor/threadless.py @@ -122,9 +122,10 @@ async def wait_for_tasks( DEFAULT_SELECTOR_SELECT_TIMEOUT, return_when=asyncio.FIRST_COMPLETED, ) - for f in finished: - if f.result(): - self.cleanup(int(f.get_name())) + for task in finished: + if task.result(): + self.cleanup(task._work_id) # type: ignore + # self.cleanup(int(task.get_name())) def fromfd(self, fileno: int) -> socket.socket: return socket.fromfd( @@ -186,7 +187,8 @@ def _prepare_tasks( writables, ), ) - task.set_name(work_id) + task._work_id = work_id # type: ignore + # task.set_name(work_id) tasks.add(task) return tasks From 0543b54dba0521ccb22adb1ae2734300c3c259b6 Mon Sep 17 00:00:00 2001 From: Abhinav Singh Date: Mon, 22 Nov 2021 00:27:25 +0530 Subject: [PATCH 23/54] Install setuptools via `lib-dep` until we recommend editable install --- .gitignore | 1 - .vscode/settings.json | 1 + Makefile | 3 ++- 3 files changed, 3 insertions(+), 2 deletions(-) diff --git a/.gitignore b/.gitignore index 217ce78fd2..78ec3ad8e0 100644 --- a/.gitignore +++ b/.gitignore @@ -8,7 +8,6 @@ .mypy_cache .hypothesis .tox -.python-version coverage.xml proxy.py.iml diff --git a/.vscode/settings.json b/.vscode/settings.json index 05ba4c5890..e956ff2ea9 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -39,6 +39,7 @@ "python.linting.pylintEnabled": true, "python.linting.pylintArgs": ["--generate-members"], "python.linting.flake8Enabled": true, + "python.linting.flake8Args": ["--config", ".flake8"], "python.linting.mypyEnabled": true, "python.formatting.provider": "autopep8", "autoDocstring.docstringFormat": "sphinx" diff --git a/Makefile b/Makefile index 306cc95f13..bc51729d9d 100644 --- a/Makefile +++ b/Makefile @@ -92,7 +92,8 @@ lib-dep: -r requirements.txt \ -r requirements-testing.txt \ -r requirements-release.txt \ - -r requirements-tunnel.txt \ + -r requirements-tunnel.txt && \ + pip install "setuptools>=42" lib-lint: python -m tox -e lint From cbd2be2b23f7c802660b242f3b63c445d0127a6f Mon Sep 17 00:00:00 2001 From: Abhinav Singh Date: Mon, 22 Nov 2021 00:30:42 +0530 Subject: [PATCH 24/54] Deprecate support for `Python 3.6` --- .github/workflows/test-library.yml | 1 - README.md | 10 +++++++--- docs/conf.py | 2 +- setup.cfg | 3 +-- 4 files changed, 9 insertions(+), 7 deletions(-) diff --git a/.github/workflows/test-library.yml b/.github/workflows/test-library.yml index 181a4adcb9..a1c7f4c114 100644 --- a/.github/workflows/test-library.yml +++ b/.github/workflows/test-library.yml @@ -429,7 +429,6 @@ jobs: # NOTE: to improve the responsiveness. It's nice to see the most # NOTE: important results first. - '3.10' - - 3.6 - 3.9 - 3.8 - 3.7 diff --git a/README.md b/README.md index 788dfb6a72..229ffae8ac 100644 --- a/README.md +++ b/README.md @@ -13,7 +13,7 @@ [![iOS, iOS Simulator](https://img.shields.io/static/v1?label=tested%20with&message=iOS%20%F0%9F%93%B1%20%7C%20iOS%20Simulator%20%F0%9F%93%B1&color=darkgreen&style=for-the-badge)](https://abhinavsingh.com/proxy-py-a-lightweight-single-file-http-proxy-server-in-python/) [![pypi version](https://img.shields.io/pypi/v/proxy.py)](https://pypi.org/project/proxy.py/) -[![Python 3.x](https://img.shields.io/static/v1?label=Python&message=3.6%20%7C%203.7%20%7C%203.8%20%7C%203.9%20%7C%203.10&color=blue)](https://www.python.org/) +[![Python 3.x](https://img.shields.io/static/v1?label=Python&message=3.7%20%7C%203.8%20%7C%203.9%20%7C%203.10&color=blue)](https://www.python.org/) [![Checked with mypy](https://img.shields.io/static/v1?label=MyPy&message=checked&color=blue)](http://mypy-lang.org/) [![lib](https://github.com/abhinavsingh/proxy.py/actions/workflows/test-library.yml/badge.svg)](https://github.com/abhinavsingh/proxy.py/actions/workflows/test-library.yml) [![codecov](https://codecov.io/gh/abhinavsingh/proxy.py/branch/develop/graph/badge.svg?token=Zh9J7b4la2)](https://codecov.io/gh/abhinavsingh/proxy.py) @@ -1709,7 +1709,7 @@ If threadless works for you, consider sending a PR by editing `_env_threadless_c ``` Hence a Python version that understands typing annotations is required. -Make sure you are using `Python 3.6+`. +Make sure you are using `Python 3.7+`. Verify the version before running `proxy.py`: @@ -1722,7 +1722,7 @@ All `typing` annotations can be replaced with `comment-only` annotations. Exampl >>> ################^^^^^^^^^^^ ``` -It will enable `proxy.py` to run on Python `pre-3.6`, even on `2.7`. +It will enable `proxy.py` to run on Python `pre-3.7`, even on `2.7`. However, as all future versions of Python will support `typing` annotations, this has not been considered. @@ -2191,6 +2191,10 @@ https://github.com/abhinavsingh/proxy.py/issues/new # Changelog +## v2.4.0 + +- No longer support `Python 3.6` due to `asyncio.run` usage in the core. + ## v2.x - No longer ~~a single file module~~. diff --git a/docs/conf.py b/docs/conf.py index 0cc5afdea2..f641ffb358 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -1,5 +1,5 @@ # pylint: disable=invalid-name -# Requires Python 3.6+ +# Requires Python 3.7+ # Ref: https://www.sphinx-doc.org/en/master/usage/configuration.html """Configuration for the Sphinx documentation generator.""" diff --git a/setup.cfg b/setup.cfg index db77dbb005..702f3a3896 100644 --- a/setup.cfg +++ b/setup.cfg @@ -49,7 +49,6 @@ classifiers = Programming Language :: Python :: Implementation Programming Language :: Python :: 3 :: Only Programming Language :: Python :: 3 - Programming Language :: Python :: 3.6 Programming Language :: Python :: 3.7 Programming Language :: Python :: 3.8 Programming Language :: Python :: 3.9 @@ -88,7 +87,7 @@ keywords = Python3 [options] -python_requires = >= 3.6 +python_requires = >= 3.7 packages = find: include_package_data = True zip_safe = False From 0c30899463aad3ae0a7f9cac5b8cd24e0f5556c7 Mon Sep 17 00:00:00 2001 From: Abhinav Singh Date: Mon, 22 Nov 2021 03:40:23 +0530 Subject: [PATCH 25/54] Use recommendation suggested here https://github.com/abhinavsingh/proxy.py/pull/769\#discussion_r753840929 --- proxy/core/acceptor/acceptor.py | 1 + proxy/core/acceptor/local.py | 6 ++---- proxy/proxy.py | 11 +++++++---- 3 files changed, 10 insertions(+), 8 deletions(-) diff --git a/proxy/core/acceptor/acceptor.py b/proxy/core/acceptor/acceptor.py index e5aba2f35a..e0b18a40ae 100644 --- a/proxy/core/acceptor/acceptor.py +++ b/proxy/core/acceptor/acceptor.py @@ -137,6 +137,7 @@ def run_once(self) -> None: logging.debug( 'Accepting new work#{0}'.format(conn.fileno()), ) + addr = None if addr == '' else addr if self.flags.local_executor: assert self._local self._local.evq.put((conn, addr)) diff --git a/proxy/core/acceptor/local.py b/proxy/core/acceptor/local.py index 60e2b1d785..093b1f1015 100644 --- a/proxy/core/acceptor/local.py +++ b/proxy/core/acceptor/local.py @@ -19,6 +19,7 @@ import argparse import selectors import threading +import contextlib import multiprocessing.synchronize from multiprocessing import connection @@ -65,16 +66,13 @@ def __init__( self._works: Dict[int, Work] = {} def run_once(self) -> bool: - try: + with contextlib.suppress(queue.Empty): payload = self.evq.get(block=True, timeout=0.1) if isinstance(payload, bool) and payload is False: return True assert isinstance(payload, tuple) conn, addr = payload - addr = None if addr == '' else addr self.dispatch(conn, addr) - except queue.Empty: - pass return False def run(self) -> None: diff --git a/proxy/proxy.py b/proxy/proxy.py index c05c6cf0ac..80431fa6fe 100644 --- a/proxy/proxy.py +++ b/proxy/proxy.py @@ -114,15 +114,18 @@ class Proxy: - """Context manager to control AcceptorPool, ExecutorPool & EventingCore lifecycle. + """Proxy is a context manager to control proxy.py library core. By default, AcceptorPool is started with :class:`~proxy.http.handler.HttpProtocolHandler` work class. By definition, it expects HTTP traffic to flow between clients and server. - Optionally, it also initializes the eventing core, a multi-process safe - pubsub system queue which can be used to build various patterns - for message sharing and/or signaling. + In ``--threadless`` mode, ThreadlessPool is also started, which is + responsible for handling work accepted by :class:`~proxy.core.acceptor.Acceptor`. + + Optionally, Proxy class also initializes the EventManager. + A multi-process safe pubsub system which can be used to build various + patterns for message sharing and/or signaling. """ def __init__(self, input_args: Optional[List[str]] = None, **opts: Any) -> None: From 5348b0af15c08e3b4acf77be86cdccf8a43b43a3 Mon Sep 17 00:00:00 2001 From: Abhinav Singh Date: Mon, 22 Nov 2021 03:49:09 +0530 Subject: [PATCH 26/54] Address recommendation here https://github.com/abhinavsingh/proxy.py/pull/769\#discussion_r753841906 --- proxy/http/handler.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/proxy/http/handler.py b/proxy/http/handler.py index e83c740b91..69f4385e17 100644 --- a/proxy/http/handler.py +++ b/proxy/http/handler.py @@ -369,10 +369,7 @@ async def _selected_events(self) -> Tuple[Dict[socket.socket, int], Readables, W async def _run_once(self) -> bool: events, readables, writables = await self._selected_events() try: - teardown = await self.handle_events(readables, writables) - if teardown: - return True - return False + return await self.handle_events(readables, writables) finally: assert self.selector for fd in events: From 5dd092638722a5436f82faf4cc18013c94196f1f Mon Sep 17 00:00:00 2001 From: Abhinav Singh Date: Mon, 22 Nov 2021 03:56:14 +0530 Subject: [PATCH 27/54] Make `Threadless` agnostic of `multiprocessing.Process` --- proxy/core/acceptor/executors.py | 17 +++++++++++------ proxy/core/acceptor/threadless.py | 3 +-- 2 files changed, 12 insertions(+), 8 deletions(-) diff --git a/proxy/core/acceptor/executors.py b/proxy/core/acceptor/executors.py index 3d40841b96..8144f38865 100644 --- a/proxy/core/acceptor/executors.py +++ b/proxy/core/acceptor/executors.py @@ -97,6 +97,7 @@ def __init__( self.work_locks: List[multiprocessing.synchronize.Lock] = [] # List of threadless workers self._workers: List[Threadless] = [] + self._processes: List[multiprocessing.Process] = [] def __enter__(self) -> 'ThreadlessPool': self.setup() @@ -189,18 +190,22 @@ def _start_worker(self, index: int) -> None: event_queue=self.event_queue, ) self._workers.append(w) - w.start() - assert w.pid - self.work_pids.append(w.pid) - logger.debug('Started threadless#%d process#%d', index, w.pid) + p = multiprocessing.Process(target=w.run) + p.daemon = True + self._processes.append(p) + p.start() + assert p.pid + self.work_pids.append(p.pid) + logger.debug('Started threadless#%d process#%d', index, p.pid) def _shutdown_workers(self) -> None: """Pop a running threadless worker and clean it up.""" for index in range(self.flags.num_workers): self._workers[index].running.set() for index in range(self.flags.num_workers): - pid = self._workers[index].pid - self._workers[index].join() + pid = self.work_pids[index] + self._processes.pop().join() + self._workers.pop() self.work_pids.pop() self.work_queues.pop().close() logger.debug('Stopped threadless process#%d', pid) diff --git a/proxy/core/acceptor/threadless.py b/proxy/core/acceptor/threadless.py index 36f6cf4e08..e97d3730e8 100644 --- a/proxy/core/acceptor/threadless.py +++ b/proxy/core/acceptor/threadless.py @@ -36,7 +36,7 @@ logger = logging.getLogger(__name__) -class Threadless(multiprocessing.Process): +class Threadless: """Work executor process. Threadless process provides an event loop, which is shared across @@ -56,7 +56,6 @@ class Threadless(multiprocessing.Process): implements :class:`~proxy.core.acceptor.work.Work` protocol. It expects a client connection as work payload and hooks into the threadless event loop to handle the client connection. - """ def __init__( From 3b8019d7a097bcc86d5972efcad8d8dd4468f869 Mon Sep 17 00:00:00 2001 From: Abhinav Singh Date: Mon, 22 Nov 2021 04:03:45 +0530 Subject: [PATCH 28/54] Acceptors must dispatch to local executor in non-blocking fashion --- proxy/core/acceptor/acceptor.py | 8 ++++---- proxy/core/acceptor/local.py | 14 ++++++-------- 2 files changed, 10 insertions(+), 12 deletions(-) diff --git a/proxy/core/acceptor/acceptor.py b/proxy/core/acceptor/acceptor.py index e0b18a40ae..603a70038b 100644 --- a/proxy/core/acceptor/acceptor.py +++ b/proxy/core/acceptor/acceptor.py @@ -138,12 +138,13 @@ def run_once(self) -> None: 'Accepting new work#{0}'.format(conn.fileno()), ) addr = None if addr == '' else addr + work = (conn, addr) if self.flags.local_executor: assert self._local - self._local.evq.put((conn, addr)) + self._local.work_queue.put_nowait(work) else: addr = None if addr == '' else addr - self._work(conn, addr) + self._work(*work) except BlockingIOError: pass finally: @@ -186,7 +187,6 @@ def _start_local(self) -> None: self._local = LocalExecutor( self.idd, self.flags, - self.sock, queue.Queue(), self.executor_queues, self.executor_pids, @@ -198,7 +198,7 @@ def _start_local(self) -> None: def _stop_local(self) -> None: if self._local is not None: - self._local.evq.put(False) + self._local.work_queue.put(False) self._local.join() def _work(self, conn: socket.socket, addr: Optional[Tuple[str, int]]) -> None: diff --git a/proxy/core/acceptor/local.py b/proxy/core/acceptor/local.py index 093b1f1015..ec3832069d 100644 --- a/proxy/core/acceptor/local.py +++ b/proxy/core/acceptor/local.py @@ -43,8 +43,7 @@ def __init__( self, idd: int, flags: argparse.Namespace, - sock: socket.socket, - evq: 'queue.Queue[Any]', + work_queue: 'queue.Queue[Any]', executor_queues: List[connection.Connection], executor_pids: List[int], executor_locks: List[multiprocessing.synchronize.Lock], @@ -53,8 +52,7 @@ def __init__( super().__init__() # Index assigned by `AcceptorPool` self.idd = idd - self.sock = sock - self.evq = evq + self.work_queue = work_queue self.flags = flags self.executor_queues = executor_queues self.executor_pids = executor_pids @@ -67,11 +65,11 @@ def __init__( def run_once(self) -> bool: with contextlib.suppress(queue.Empty): - payload = self.evq.get(block=True, timeout=0.1) - if isinstance(payload, bool) and payload is False: + work = self.work_queue.get(block=True, timeout=0.1) + if isinstance(work, bool) and work is False: return True - assert isinstance(payload, tuple) - conn, addr = payload + assert isinstance(work, tuple) + conn, addr = work self.dispatch(conn, addr) return False From 3e7ddec74d9a7b42b8e03969d49bfa690c0c52be Mon Sep 17 00:00:00 2001 From: Abhinav Singh Date: Mon, 22 Nov 2021 04:13:56 +0530 Subject: [PATCH 29/54] No daemon for executor processes and fix shutdown logic --- proxy/core/acceptor/acceptor.py | 33 +++++++++++++++++--------------- proxy/core/acceptor/executors.py | 4 ++-- 2 files changed, 20 insertions(+), 17 deletions(-) diff --git a/proxy/core/acceptor/acceptor.py b/proxy/core/acceptor/acceptor.py index 603a70038b..5edfe6a0f9 100644 --- a/proxy/core/acceptor/acceptor.py +++ b/proxy/core/acceptor/acceptor.py @@ -121,6 +121,23 @@ def __init__( self._total: Optional[int] = None self._local: Optional[LocalExecutor] = None + def accept(self, events: List[Tuple[selectors.SelectorKey, int]]) -> None: + for _, mask in events: + if mask & selectors.EVENT_READ: + if self.sock is not None: + conn, addr = self.sock.accept() + logging.debug( + 'Accepting new work#{0}'.format(conn.fileno()), + ) + addr = None if addr == '' else addr + work = (conn, addr) + if self.flags.local_executor: + assert self._local + self._local.work_queue.put_nowait(work) + else: + addr = None if addr == '' else addr + self._work(*work) + def run_once(self) -> None: if self.selector is not None: events = self.selector.select(timeout=1) @@ -130,21 +147,7 @@ def run_once(self) -> None: try: if self.lock.acquire(block=False): locked = True - for _, mask in events: - if mask & selectors.EVENT_READ: - if self.sock is not None: - conn, addr = self.sock.accept() - logging.debug( - 'Accepting new work#{0}'.format(conn.fileno()), - ) - addr = None if addr == '' else addr - work = (conn, addr) - if self.flags.local_executor: - assert self._local - self._local.work_queue.put_nowait(work) - else: - addr = None if addr == '' else addr - self._work(*work) + self.accept(events) except BlockingIOError: pass finally: diff --git a/proxy/core/acceptor/executors.py b/proxy/core/acceptor/executors.py index 8144f38865..eee48404a7 100644 --- a/proxy/core/acceptor/executors.py +++ b/proxy/core/acceptor/executors.py @@ -191,7 +191,7 @@ def _start_worker(self, index: int) -> None: ) self._workers.append(w) p = multiprocessing.Process(target=w.run) - p.daemon = True + # p.daemon = True self._processes.append(p) p.start() assert p.pid @@ -203,7 +203,7 @@ def _shutdown_workers(self) -> None: for index in range(self.flags.num_workers): self._workers[index].running.set() for index in range(self.flags.num_workers): - pid = self.work_pids[index] + pid = self.work_pids[-1] self._processes.pop().join() self._workers.pop() self.work_pids.pop() From 1c746e642167328fdd9e2ed81c6a73f4d114edac Mon Sep 17 00:00:00 2001 From: Abhinav Singh Date: Mon, 22 Nov 2021 04:32:44 +0530 Subject: [PATCH 30/54] Only return fds from `_selected_events` not all events data --- proxy/core/acceptor/threadless.py | 41 +++++++++++++++---------------- 1 file changed, 20 insertions(+), 21 deletions(-) diff --git a/proxy/core/acceptor/threadless.py b/proxy/core/acceptor/threadless.py index e97d3730e8..09edd57b53 100644 --- a/proxy/core/acceptor/threadless.py +++ b/proxy/core/acceptor/threadless.py @@ -80,15 +80,15 @@ def __init__( self.unfinished: Set['asyncio.Task[bool]'] = set() async def _selected_events(self) -> Tuple[ - Dict[socket.socket, int], + List[socket.socket], Dict[int, Tuple[Readables, Writables]], ]: + """For each work, collects events they are interested in. + Calls select for events of interest. """ assert self.selector is not None - events: Dict[socket.socket, int] = {} + fds: List[socket.socket] = [] for work_id in self.works: - work = self.works[work_id] - worker_events = await work.get_events() - events.update(worker_events) + worker_events = await self.works[work_id].get_events() for fd in worker_events: # Can throw ValueError: Invalid file descriptor: -1 # @@ -96,18 +96,26 @@ async def _selected_events(self) -> Tuple[ # asynchronous nature. Hence, threadless will handle # ValueError exceptions raised by selector.register # for invalid fd. - self.selector.register(fd, worker_events[fd], data=work_id) - ev = self.selector.select(timeout=DEFAULT_SELECTOR_SELECT_TIMEOUT) - # Keys are work_id and values are 2-tuple indicating readables & writables + self.selector.register( + fd, events=worker_events[fd], + data=work_id, + ) + fds.append(fd) + selected = self.selector.select( + timeout=DEFAULT_SELECTOR_SELECT_TIMEOUT, + ) + # Keys are work_id and values are 2-tuple indicating + # readables & writables that work_id is interested in + # and are ready for IO. work_by_ids: Dict[int, Tuple[Readables, Writables]] = {} - for key, mask in ev: + for key, mask in selected: if key.data not in work_by_ids: work_by_ids[key.data] = ([], []) if mask & selectors.EVENT_READ: work_by_ids[key.data][0].append(key.fileobj) if mask & selectors.EVENT_WRITE: work_by_ids[key.data][1].append(key.fileobj) - return (events, work_by_ids) + return (fds, work_by_ids) # TODO: Use correct future typing annotations async def wait_for_tasks( @@ -193,7 +201,7 @@ def _prepare_tasks( async def run_once(self) -> None: assert self.loop is not None - events, work_by_ids = await self._selected_events() + fds, work_by_ids = await self._selected_events() try: if len(work_by_ids) == 0: # Remove and shutdown inactive connections @@ -201,21 +209,12 @@ async def run_once(self) -> None: return finally: assert self.selector - for fd in events: + for fd in fds: self.selector.unregister(fd) - - # Note that selector from now on is idle, - # until all the logic below completes. - # - # This is where one process per CPU architecture shines, - # as other threadless processes can continue process work - # within their context. - # # Invoke Threadless.handle_events # # TODO: Only send readable / writables that client originally # registered. - # Wait for Threadless.handle_events to complete if self.client_queue.fileno() in work_by_ids: self.accept_client() del work_by_ids[self.client_queue.fileno()] From 1f4c176d8d1876c660ffad9005d529eaebab5aff Mon Sep 17 00:00:00 2001 From: Abhinav Singh Date: Mon, 22 Nov 2021 05:03:39 +0530 Subject: [PATCH 31/54] Refactor logic --- proxy/core/acceptor/threadless.py | 77 ++++++++++++++++--------------- 1 file changed, 40 insertions(+), 37 deletions(-) diff --git a/proxy/core/acceptor/threadless.py b/proxy/core/acceptor/threadless.py index 09edd57b53..4450f0ea31 100644 --- a/proxy/core/acceptor/threadless.py +++ b/proxy/core/acceptor/threadless.py @@ -78,15 +78,17 @@ def __init__( # # Ref https://github.com/abhinavsingh/proxy.py/runs/4279055360?check_suite_focus=true self.unfinished: Set['asyncio.Task[bool]'] = set() + self.wait_timeout: float = (self.flags.num_workers / 2) * DEFAULT_SELECTOR_SELECT_TIMEOUT async def _selected_events(self) -> Tuple[ List[socket.socket], Dict[int, Tuple[Readables, Writables]], + bool, ]: """For each work, collects events they are interested in. Calls select for events of interest. """ assert self.selector is not None - fds: List[socket.socket] = [] + work_fds: List[socket.socket] = [] for work_id in self.works: worker_events = await self.works[work_id].get_events() for fd in worker_events: @@ -100,7 +102,7 @@ async def _selected_events(self) -> Tuple[ fd, events=worker_events[fd], data=work_id, ) - fds.append(fd) + work_fds.append(fd) selected = self.selector.select( timeout=DEFAULT_SELECTOR_SELECT_TIMEOUT, ) @@ -108,25 +110,28 @@ async def _selected_events(self) -> Tuple[ # readables & writables that work_id is interested in # and are ready for IO. work_by_ids: Dict[int, Tuple[Readables, Writables]] = {} + client_queue_fileno = self.client_queue.fileno() + new_work_available = False for key, mask in selected: + if key.fileobj == client_queue_fileno: + assert mask & selectors.EVENT_READ + new_work_available = True + continue if key.data not in work_by_ids: work_by_ids[key.data] = ([], []) if mask & selectors.EVENT_READ: work_by_ids[key.data][0].append(key.fileobj) if mask & selectors.EVENT_WRITE: work_by_ids[key.data][1].append(key.fileobj) - return (fds, work_by_ids) + return (work_fds, work_by_ids, new_work_available) - # TODO: Use correct future typing annotations async def wait_for_tasks( self, - tasks: Set['asyncio.Task[bool]'], + pending: Set['asyncio.Task[bool]'], ) -> None: - assert len(tasks) > 0 finished, self.unfinished = await asyncio.wait( - tasks, - timeout=(self.flags.num_workers / 2) * - DEFAULT_SELECTOR_SELECT_TIMEOUT, + pending, + timeout=self.wait_timeout, return_when=asyncio.FIRST_COMPLETED, ) for task in finished: @@ -166,6 +171,10 @@ def accept_client(self) -> None: ) self.cleanup(fileno) + # TODO: Use cached property to avoid execution repeatedly + # within a second interval. Note that our selector timeout + # is 0.1 second which can unnecessarily result in cleanup + # checks within a second boundary. def cleanup_inactive(self) -> None: inactive_works: List[int] = [] for work_id in self.works: @@ -174,25 +183,21 @@ def cleanup_inactive(self) -> None: for work_id in inactive_works: self.cleanup(work_id) + # TODO: HttpProtocolHandler.shutdown can call flush which may block def cleanup(self, work_id: int) -> None: - # TODO: HttpProtocolHandler.shutdown can call flush which may block self.works[work_id].shutdown() del self.works[work_id] os.close(work_id) - def _prepare_tasks( + def _create_tasks( self, work_by_ids: Dict[int, Tuple[Readables, Writables]], ) -> Set['asyncio.Task[bool]']: - tasks: Set['asyncio.Task[bool]'] = set() assert self.loop + tasks: Set['asyncio.Task[bool]'] = set() for work_id in work_by_ids: - readables, writables = work_by_ids[work_id] task = self.loop.create_task( - self.works[work_id].handle_events( - readables, - writables, - ), + self.works[work_id].handle_events(*work_by_ids[work_id]), ) task._work_id = work_id # type: ignore # task.set_name(work_id) @@ -201,33 +206,31 @@ def _prepare_tasks( async def run_once(self) -> None: assert self.loop is not None - fds, work_by_ids = await self._selected_events() + work_fds, work_by_ids, new_work_available = await self._selected_events() try: + # Accept new work if available + # + # TODO: We must use a work klass to handle + # client_queue fd itself a.k.a. accept_client + # will become handle_readables. + if new_work_available: + self.accept_client() if len(work_by_ids) == 0: - # Remove and shutdown inactive connections self.cleanup_inactive() return finally: assert self.selector - for fd in fds: - self.selector.unregister(fd) + for wfd in work_fds: + self.selector.unregister(wfd) # Invoke Threadless.handle_events - # - # TODO: Only send readable / writables that client originally - # registered. - if self.client_queue.fileno() in work_by_ids: - self.accept_client() - del work_by_ids[self.client_queue.fileno()] - self.unfinished.update(self._prepare_tasks(work_by_ids)) - num_works = len(self.unfinished) - if num_works > 0: - logger.debug('Executing {0} works'.format(num_works)) - await self.wait_for_tasks(self.unfinished) - logger.debug( - 'Done executing works, {0} pending'.format( - len(self.unfinished), - ), - ) + self.unfinished.update(self._create_tasks(work_by_ids)) + logger.debug('Executing {0} works'.format(len(self.unfinished))) + await self.wait_for_tasks(self.unfinished) + logger.debug( + 'Done executing works, {0} pending'.format( + len(self.unfinished), + ), + ) # Remove and shutdown inactive workers self.cleanup_inactive() From 91741a7f9bbf16426ab9ac35896ad7aee50f0832 Mon Sep 17 00:00:00 2001 From: Abhinav Singh Date: Mon, 22 Nov 2021 05:07:11 +0530 Subject: [PATCH 32/54] Prefix private methods with `_` --- proxy/core/acceptor/threadless.py | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/proxy/core/acceptor/threadless.py b/proxy/core/acceptor/threadless.py index 4450f0ea31..4290c22f10 100644 --- a/proxy/core/acceptor/threadless.py +++ b/proxy/core/acceptor/threadless.py @@ -125,7 +125,7 @@ async def _selected_events(self) -> Tuple[ work_by_ids[key.data][1].append(key.fileobj) return (work_fds, work_by_ids, new_work_available) - async def wait_for_tasks( + async def _wait_for_tasks( self, pending: Set['asyncio.Task[bool]'], ) -> None: @@ -136,16 +136,16 @@ async def wait_for_tasks( ) for task in finished: if task.result(): - self.cleanup(task._work_id) # type: ignore + self._cleanup(task._work_id) # type: ignore # self.cleanup(int(task.get_name())) - def fromfd(self, fileno: int) -> socket.socket: + def _fromfd(self, fileno: int) -> socket.socket: return socket.fromfd( fileno, family=socket.AF_INET if self.flags.hostname.version == 4 else socket.AF_INET6, type=socket.SOCK_STREAM, ) - def accept_client(self) -> None: + def _accept_client(self) -> None: # Acceptor will not send address for # unix socket domain environments. addr = None @@ -153,7 +153,7 @@ def accept_client(self) -> None: addr = self.client_queue.recv() fileno = recv_handle(self.client_queue) self.works[fileno] = self.flags.work_klass( - TcpClientConnection(conn=self.fromfd(fileno), addr=addr), + TcpClientConnection(conn=self._fromfd(fileno), addr=addr), flags=self.flags, event_queue=self.event_queue, ) @@ -169,22 +169,22 @@ def accept_client(self) -> None: 'Exception occurred during initialization', exc_info=e, ) - self.cleanup(fileno) + self._cleanup(fileno) # TODO: Use cached property to avoid execution repeatedly # within a second interval. Note that our selector timeout # is 0.1 second which can unnecessarily result in cleanup # checks within a second boundary. - def cleanup_inactive(self) -> None: + def _cleanup_inactive(self) -> None: inactive_works: List[int] = [] for work_id in self.works: if self.works[work_id].is_inactive(): inactive_works.append(work_id) for work_id in inactive_works: - self.cleanup(work_id) + self._cleanup(work_id) # TODO: HttpProtocolHandler.shutdown can call flush which may block - def cleanup(self, work_id: int) -> None: + def _cleanup(self, work_id: int) -> None: self.works[work_id].shutdown() del self.works[work_id] os.close(work_id) @@ -204,7 +204,7 @@ def _create_tasks( tasks.add(task) return tasks - async def run_once(self) -> None: + async def _run_once(self) -> None: assert self.loop is not None work_fds, work_by_ids, new_work_available = await self._selected_events() try: @@ -214,9 +214,9 @@ async def run_once(self) -> None: # client_queue fd itself a.k.a. accept_client # will become handle_readables. if new_work_available: - self.accept_client() + self._accept_client() if len(work_by_ids) == 0: - self.cleanup_inactive() + self._cleanup_inactive() return finally: assert self.selector @@ -225,14 +225,14 @@ async def run_once(self) -> None: # Invoke Threadless.handle_events self.unfinished.update(self._create_tasks(work_by_ids)) logger.debug('Executing {0} works'.format(len(self.unfinished))) - await self.wait_for_tasks(self.unfinished) + await self._wait_for_tasks(self.unfinished) logger.debug( 'Done executing works, {0} pending'.format( len(self.unfinished), ), ) # Remove and shutdown inactive workers - self.cleanup_inactive() + self._cleanup_inactive() def run(self) -> None: Logger.setup( @@ -249,7 +249,7 @@ def run(self) -> None: self.loop = asyncio.get_event_loop_policy().get_event_loop() while not self.running.is_set(): # logger.debug('Working on {0} works'.format(len(self.works))) - self.loop.run_until_complete(self.run_once()) + self.loop.run_until_complete(self._run_once()) except KeyboardInterrupt: pass finally: From 3044758b44f00784d0ca6e78112821e9222a1fd1 Mon Sep 17 00:00:00 2001 From: Abhinav Singh Date: Mon, 22 Nov 2021 05:09:17 +0530 Subject: [PATCH 33/54] `work_queue` and not `client_queue` --- proxy/core/acceptor/executors.py | 2 +- proxy/core/acceptor/threadless.py | 18 +++++++++--------- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/proxy/core/acceptor/executors.py b/proxy/core/acceptor/executors.py index eee48404a7..5312feb082 100644 --- a/proxy/core/acceptor/executors.py +++ b/proxy/core/acceptor/executors.py @@ -185,7 +185,7 @@ def _start_worker(self, index: int) -> None: pipe = multiprocessing.Pipe() self.work_queues.append(pipe[0]) w = Threadless( - client_queue=pipe[1], + work_queue=pipe[1], flags=self.flags, event_queue=self.event_queue, ) diff --git a/proxy/core/acceptor/threadless.py b/proxy/core/acceptor/threadless.py index 4290c22f10..53dc550598 100644 --- a/proxy/core/acceptor/threadless.py +++ b/proxy/core/acceptor/threadless.py @@ -60,12 +60,12 @@ class Threadless: def __init__( self, - client_queue: connection.Connection, + work_queue: connection.Connection, flags: argparse.Namespace, event_queue: Optional[EventQueue] = None, ) -> None: super().__init__() - self.client_queue = client_queue + self.work_queue = work_queue self.flags = flags self.event_queue = event_queue @@ -110,7 +110,7 @@ async def _selected_events(self) -> Tuple[ # readables & writables that work_id is interested in # and are ready for IO. work_by_ids: Dict[int, Tuple[Readables, Writables]] = {} - client_queue_fileno = self.client_queue.fileno() + client_queue_fileno = self.work_queue.fileno() new_work_available = False for key, mask in selected: if key.fileobj == client_queue_fileno: @@ -150,8 +150,8 @@ def _accept_client(self) -> None: # unix socket domain environments. addr = None if not self.flags.unix_socket_path: - addr = self.client_queue.recv() - fileno = recv_handle(self.client_queue) + addr = self.work_queue.recv() + fileno = recv_handle(self.work_queue) self.works[fileno] = self.flags.work_klass( TcpClientConnection(conn=self._fromfd(fileno), addr=addr), flags=self.flags, @@ -242,9 +242,9 @@ def run(self) -> None: try: self.selector = selectors.DefaultSelector() self.selector.register( - self.client_queue.fileno(), + self.work_queue.fileno(), selectors.EVENT_READ, - data=self.client_queue.fileno(), + data=self.work_queue.fileno(), ) self.loop = asyncio.get_event_loop_policy().get_event_loop() while not self.running.is_set(): @@ -254,7 +254,7 @@ def run(self) -> None: pass finally: assert self.selector is not None - self.selector.unregister(self.client_queue.fileno()) - self.client_queue.close() + self.selector.unregister(self.work_queue.fileno()) + self.work_queue.close() assert self.loop is not None self.loop.close() From 5b04f42346d5e953167bd3747c17b15680cc6ae3 Mon Sep 17 00:00:00 2001 From: Abhinav Singh Date: Mon, 22 Nov 2021 05:54:22 +0530 Subject: [PATCH 34/54] Turn `Threadless` into an abstract executor. Introduce `RemoteExecutor` --- proxy/core/acceptor/__init__.py | 4 ++ proxy/core/acceptor/executors.py | 6 +-- proxy/core/acceptor/remote.py | 60 +++++++++++++++++++++ proxy/core/acceptor/threadless.py | 86 +++++++++++++++---------------- 4 files changed, 109 insertions(+), 47 deletions(-) create mode 100644 proxy/core/acceptor/remote.py diff --git a/proxy/core/acceptor/__init__.py b/proxy/core/acceptor/__init__.py index 73da204d72..577e2022f5 100644 --- a/proxy/core/acceptor/__init__.py +++ b/proxy/core/acceptor/__init__.py @@ -19,6 +19,8 @@ from .pool import AcceptorPool from .work import Work from .threadless import Threadless +from .remote import RemoteExecutor +from .local import LocalExecutor from .executors import ThreadlessPool from .listener import Listener @@ -27,6 +29,8 @@ 'AcceptorPool', 'Work', 'Threadless', + 'RemoteExecutor', + 'LocalExecutor', 'ThreadlessPool', 'Listener', ] diff --git a/proxy/core/acceptor/executors.py b/proxy/core/acceptor/executors.py index 5312feb082..918c4d05fd 100644 --- a/proxy/core/acceptor/executors.py +++ b/proxy/core/acceptor/executors.py @@ -24,7 +24,7 @@ from typing import Any, Optional, List, Tuple from .work import Work -from .threadless import Threadless +from .remote import RemoteExecutor from ..connection import TcpClientConnection from ..event import EventQueue, eventNames @@ -96,7 +96,7 @@ def __init__( self.work_pids: List[int] = [] self.work_locks: List[multiprocessing.synchronize.Lock] = [] # List of threadless workers - self._workers: List[Threadless] = [] + self._workers: List[RemoteExecutor] = [] self._processes: List[multiprocessing.Process] = [] def __enter__(self) -> 'ThreadlessPool': @@ -184,7 +184,7 @@ def _start_worker(self, index: int) -> None: self.work_locks.append(multiprocessing.Lock()) pipe = multiprocessing.Pipe() self.work_queues.append(pipe[0]) - w = Threadless( + w = RemoteExecutor( work_queue=pipe[1], flags=self.flags, event_queue=self.event_queue, diff --git a/proxy/core/acceptor/remote.py b/proxy/core/acceptor/remote.py new file mode 100644 index 0000000000..a4a0f929a9 --- /dev/null +++ b/proxy/core/acceptor/remote.py @@ -0,0 +1,60 @@ +# -*- coding: utf-8 -*- +""" + proxy.py + ~~~~~~~~ + ⚡⚡⚡ Fast, Lightweight, Pluggable, TLS interception capable proxy server focused on + Network monitoring, controls & Application development, testing, debugging. + + :copyright: (c) 2013-present by Abhinav Singh and contributors. + :license: BSD, see LICENSE for more details. +""" +import logging + +from typing import Optional + +from multiprocessing.reduction import recv_handle + +from ..connection import TcpClientConnection +from ..event import eventNames + +from .threadless import Threadless + +logger = logging.getLogger(__name__) + + +class RemoteExecutor(Threadless): + """RemoteExecutor receives work over a Connection object. + RemoteExecutor uses ``recv_handle`` to accept incoming work. + """ + + def work_queue_fileno(self) -> Optional[int]: + return self.work_queue.fileno() + + def close_work_queue(self) -> None: + return super().close_work_queue() + + def receive_from_work_queue(self) -> None: + # Acceptor will not send address for + # unix socket domain environments. + addr = None + if not self.flags.unix_socket_path: + addr = self.work_queue.recv() + fileno = recv_handle(self.work_queue) + self.works[fileno] = self.flags.work_klass( + TcpClientConnection(conn=self._fromfd(fileno), addr=addr), + flags=self.flags, + event_queue=self.event_queue, + ) + self.works[fileno].publish_event( + event_name=eventNames.WORK_STARTED, + event_payload={'fileno': fileno, 'addr': addr}, + publisher_id=self.__class__.__name__, + ) + try: + self.works[fileno].initialize() + except Exception as e: + logger.exception( + 'Exception occurred during initialization', + exc_info=e, + ) + self._cleanup(fileno) diff --git a/proxy/core/acceptor/threadless.py b/proxy/core/acceptor/threadless.py index 53dc550598..41e1c83383 100644 --- a/proxy/core/acceptor/threadless.py +++ b/proxy/core/acceptor/threadless.py @@ -20,23 +20,23 @@ import selectors import multiprocessing -from multiprocessing import connection -from multiprocessing.reduction import recv_handle -from typing import Dict, Optional, Tuple, List, Set +from abc import abstractmethod, ABC +from typing import Dict, Optional, Tuple, List, Set, Generic, TypeVar from .work import Work -from ..connection import TcpClientConnection -from ..event import EventQueue, eventNames +from ..event import EventQueue from ...common.logger import Logger from ...common.types import Readables, Writables from ...common.constants import DEFAULT_SELECTOR_SELECT_TIMEOUT +T = TypeVar("T") + logger = logging.getLogger(__name__) -class Threadless: +class Threadless(ABC, Generic[T]): """Work executor process. Threadless process provides an event loop, which is shared across @@ -60,7 +60,7 @@ class Threadless: def __init__( self, - work_queue: connection.Connection, + work_queue: T, flags: argparse.Namespace, event_queue: Optional[EventQueue] = None, ) -> None: @@ -78,7 +78,26 @@ def __init__( # # Ref https://github.com/abhinavsingh/proxy.py/runs/4279055360?check_suite_focus=true self.unfinished: Set['asyncio.Task[bool]'] = set() - self.wait_timeout: float = (self.flags.num_workers / 2) * DEFAULT_SELECTOR_SELECT_TIMEOUT + self.wait_timeout: float = ( + self.flags.num_workers / 2) * DEFAULT_SELECTOR_SELECT_TIMEOUT + + @abstractmethod + def receive_from_work_queue(self) -> None: + """Work queue is ready to receive new work.""" + raise NotImplementedError() + + @abstractmethod + def work_queue_fileno(self) -> Optional[int]: + """If work queue must be selected before calling + ``receive_from_work_queue`` then implementation must + return work queue fd.""" + raise NotImplementedError() + + @abstractmethod + def close_work_queue(self) -> None: + """If an fd was selectable for work queue, make sure + to close the work queue fd here.""" + raise NotImplementedError() async def _selected_events(self) -> Tuple[ List[socket.socket], @@ -110,10 +129,12 @@ async def _selected_events(self) -> Tuple[ # readables & writables that work_id is interested in # and are ready for IO. work_by_ids: Dict[int, Tuple[Readables, Writables]] = {} - client_queue_fileno = self.work_queue.fileno() new_work_available = False + wqfileno = self.work_queue_fileno() + if wqfileno is None: + new_work_available = True for key, mask in selected: - if key.fileobj == client_queue_fileno: + if wqfileno is not None and key.fileobj == wqfileno: assert mask & selectors.EVENT_READ new_work_available = True continue @@ -145,32 +166,6 @@ def _fromfd(self, fileno: int) -> socket.socket: type=socket.SOCK_STREAM, ) - def _accept_client(self) -> None: - # Acceptor will not send address for - # unix socket domain environments. - addr = None - if not self.flags.unix_socket_path: - addr = self.work_queue.recv() - fileno = recv_handle(self.work_queue) - self.works[fileno] = self.flags.work_klass( - TcpClientConnection(conn=self._fromfd(fileno), addr=addr), - flags=self.flags, - event_queue=self.event_queue, - ) - self.works[fileno].publish_event( - event_name=eventNames.WORK_STARTED, - event_payload={'fileno': fileno, 'addr': addr}, - publisher_id=self.__class__.__name__, - ) - try: - self.works[fileno].initialize() - except Exception as e: - logger.exception( - 'Exception occurred during initialization', - exc_info=e, - ) - self._cleanup(fileno) - # TODO: Use cached property to avoid execution repeatedly # within a second interval. Note that our selector timeout # is 0.1 second which can unnecessarily result in cleanup @@ -214,7 +209,7 @@ async def _run_once(self) -> None: # client_queue fd itself a.k.a. accept_client # will become handle_readables. if new_work_available: - self._accept_client() + self.receive_from_work_queue() if len(work_by_ids) == 0: self._cleanup_inactive() return @@ -239,13 +234,15 @@ def run(self) -> None: self.flags.log_file, self.flags.log_level, self.flags.log_format, ) + wqfileno = self.work_queue_fileno() try: self.selector = selectors.DefaultSelector() - self.selector.register( - self.work_queue.fileno(), - selectors.EVENT_READ, - data=self.work_queue.fileno(), - ) + if wqfileno is not None: + self.selector.register( + wqfileno, + selectors.EVENT_READ, + data=wqfileno, + ) self.loop = asyncio.get_event_loop_policy().get_event_loop() while not self.running.is_set(): # logger.debug('Working on {0} works'.format(len(self.works))) @@ -254,7 +251,8 @@ def run(self) -> None: pass finally: assert self.selector is not None - self.selector.unregister(self.work_queue.fileno()) - self.work_queue.close() + if wqfileno is not None: + self.selector.unregister(wqfileno) + self.close_work_queue() assert self.loop is not None self.loop.close() From 36dcbc1dd48f32848118aa1253fc9ef7b34b8b7b Mon Sep 17 00:00:00 2001 From: Abhinav Singh Date: Mon, 22 Nov 2021 05:58:47 +0530 Subject: [PATCH 35/54] Make `LocalExecutor` agnostic of `threading.Thread` --- proxy/core/acceptor/acceptor.py | 10 ++++++---- proxy/core/acceptor/local.py | 2 +- proxy/core/acceptor/remote.py | 2 +- 3 files changed, 8 insertions(+), 6 deletions(-) diff --git a/proxy/core/acceptor/acceptor.py b/proxy/core/acceptor/acceptor.py index 5edfe6a0f9..e2a6f386af 100644 --- a/proxy/core/acceptor/acceptor.py +++ b/proxy/core/acceptor/acceptor.py @@ -120,6 +120,7 @@ def __init__( # Internals self._total: Optional[int] = None self._local: Optional[LocalExecutor] = None + self._lthread: Optional[threading.Thread] = None def accept(self, events: List[Tuple[selectors.SelectorKey, int]]) -> None: for _, mask in events: @@ -196,13 +197,14 @@ def _start_local(self) -> None: self.executor_locks, self.event_queue, ) - self._local.daemon = True - self._local.start() + self._lthread = threading.Thread(target=self._local.run) + self._lthread.daemon = True + self._lthread.start() def _stop_local(self) -> None: - if self._local is not None: + if self._lthread is not None and self._local is not None: self._local.work_queue.put(False) - self._local.join() + self._lthread.join() def _work(self, conn: socket.socket, addr: Optional[Tuple[str, int]]) -> None: self._total = self._total or 0 diff --git a/proxy/core/acceptor/local.py b/proxy/core/acceptor/local.py index ec3832069d..00f173b3e0 100644 --- a/proxy/core/acceptor/local.py +++ b/proxy/core/acceptor/local.py @@ -36,7 +36,7 @@ logger = logging.getLogger(__name__) -class LocalExecutor(threading.Thread): +class LocalExecutor: """Listens for READ_EVENT over a queue, accepts and initializes work.""" def __init__( diff --git a/proxy/core/acceptor/remote.py b/proxy/core/acceptor/remote.py index a4a0f929a9..a9ef47df59 100644 --- a/proxy/core/acceptor/remote.py +++ b/proxy/core/acceptor/remote.py @@ -31,7 +31,7 @@ def work_queue_fileno(self) -> Optional[int]: return self.work_queue.fileno() def close_work_queue(self) -> None: - return super().close_work_queue() + self.work_queue.close() def receive_from_work_queue(self) -> None: # Acceptor will not send address for From 271652c18a620eb3e06a311d03bdd43d809df909 Mon Sep 17 00:00:00 2001 From: Abhinav Singh Date: Mon, 22 Nov 2021 07:10:30 +0530 Subject: [PATCH 36/54] `LocalExecutor` now implements `Threadless` --- Makefile | 1 + helper/benchmark.sh | 2 +- proxy/core/acceptor/acceptor.py | 10 +-- proxy/core/acceptor/local.py | 118 ++++++------------------------ proxy/core/acceptor/remote.py | 41 +++++------ proxy/core/acceptor/threadless.py | 85 +++++++++++++++------ proxy/proxy.py | 30 ++++---- 7 files changed, 126 insertions(+), 161 deletions(-) diff --git a/Makefile b/Makefile index bc51729d9d..77b52b2c0f 100644 --- a/Makefile +++ b/Makefile @@ -136,6 +136,7 @@ lib-profile: --disable-http-proxy \ --enable-web-server \ --plugin proxy.plugin.WebServerPlugin \ + --local-executor \ --log-file /dev/null devtools: diff --git a/helper/benchmark.sh b/helper/benchmark.sh index b012e9d0ad..ac32f541a1 100755 --- a/helper/benchmark.sh +++ b/helper/benchmark.sh @@ -34,7 +34,7 @@ fi TIMEOUT=1 QPS=8000 CONCURRENCY=100 -TOTAL_REQUESTS=100000 +TOTAL_REQUESTS=1000000 OPEN_FILE_LIMIT=65536 BACKLOG=OPEN_FILE_LIMIT PID_FILE=/tmp/proxy.pid diff --git a/proxy/core/acceptor/acceptor.py b/proxy/core/acceptor/acceptor.py index e2a6f386af..07e7ebd967 100644 --- a/proxy/core/acceptor/acceptor.py +++ b/proxy/core/acceptor/acceptor.py @@ -189,13 +189,9 @@ def run(self) -> None: def _start_local(self) -> None: assert self.sock self._local = LocalExecutor( - self.idd, - self.flags, - queue.Queue(), - self.executor_queues, - self.executor_pids, - self.executor_locks, - self.event_queue, + work_queue=queue.Queue(), + flags=self.flags, + event_queue=self.event_queue, ) self._lthread = threading.Thread(target=self._local.run) self._lthread.daemon = True diff --git a/proxy/core/acceptor/local.py b/proxy/core/acceptor/local.py index 00f173b3e0..d6e9ef2d65 100644 --- a/proxy/core/acceptor/local.py +++ b/proxy/core/acceptor/local.py @@ -12,116 +12,44 @@ acceptor """ -import os import queue -import socket import logging -import argparse -import selectors -import threading +import asyncio import contextlib -import multiprocessing.synchronize -from multiprocessing import connection -from typing import Optional, Tuple, List, Dict -from typing import Any # noqa: W0611 pylint: disable=unused-import +from typing import Optional +from typing import Any # noqa: W0611, F401 pylint: disable=unused-import -from ...common.utils import is_threadless - -from ..event import EventQueue - -from .executors import ThreadlessPool -from .work import Work +from .threadless import Threadless logger = logging.getLogger(__name__) -class LocalExecutor: +class LocalExecutor(Threadless['queue.Queue[Any]']): """Listens for READ_EVENT over a queue, accepts and initializes work.""" - def __init__( - self, - idd: int, - flags: argparse.Namespace, - work_queue: 'queue.Queue[Any]', - executor_queues: List[connection.Connection], - executor_pids: List[int], - executor_locks: List[multiprocessing.synchronize.Lock], - event_queue: Optional[EventQueue] = None, - ) -> None: - super().__init__() - # Index assigned by `AcceptorPool` - self.idd = idd - self.work_queue = work_queue - self.flags = flags - self.executor_queues = executor_queues - self.executor_pids = executor_pids - self.executor_locks = executor_locks - self.event_queue = event_queue - # Incremented every time work() is called - self._total: int = 0 - self._selector: Optional[selectors.DefaultSelector] = None - self._works: Dict[int, Work] = {} + def __init__(self, *args: Any, **kwargs: Any) -> None: + super().__init__(*args, **kwargs) + self._loop: Optional[asyncio.AbstractEventLoop] = None + + @property + def loop(self) -> Optional[asyncio.AbstractEventLoop]: + if self._loop is None: + self._loop = asyncio.new_event_loop() + return self._loop + + def work_queue_fileno(self) -> Optional[int]: + return None - def run_once(self) -> bool: + def receive_from_work_queue(self) -> bool: with contextlib.suppress(queue.Empty): - work = self.work_queue.get(block=True, timeout=0.1) + work = self.work_queue.get(block=False) if isinstance(work, bool) and work is False: return True assert isinstance(work, tuple) conn, addr = work - self.dispatch(conn, addr) + # NOTE: Here we are assuming to receive a connection object + # and not a fileno because we are a LocalExecutor. + fileno = conn.fileno() + self.work_on_tcp_conn(fileno=fileno, addr=addr, conn=conn) return False - - def run(self) -> None: - self._selector = selectors.DefaultSelector() - try: - while True: - if self.run_once(): - break - except KeyboardInterrupt: - pass - - def _cleanup(self, work_id: int) -> None: - self._works[work_id].shutdown() - del self._works[work_id] - os.close(work_id) - - def dispatch(self, conn: socket.socket, addr: Optional[Tuple[str, int]]) -> None: - if is_threadless(self.flags.threadless, self.flags.threaded): - # Index of worker to which this work should be dispatched - # Use round-robin strategy by default. - # - # By default all acceptors will start sending work to - # 1st workers. To randomize, we offset index by idd. - index = (self._total + self.idd) % self.flags.num_workers - thread = threading.Thread( - target=ThreadlessPool.delegate, - args=( - self.executor_pids[index], - self.executor_queues[index], - self.executor_locks[index], - conn, - addr, - self.flags.unix_socket_path, - ), - ) - thread.start() - logger.debug( - 'Dispatched work#{0}.{1} to worker#{2}'.format( - self.idd, self._total, index, - ), - ) - else: - _, thread = ThreadlessPool.start_threaded_work( - self.flags, - conn, addr, - event_queue=self.event_queue, - publisher_id=self.__class__.__name__, - ) - logger.debug( - 'Started work#{0}.{1} in thread#{2}'.format( - self.idd, self._total, thread.ident, - ), - ) - self._total += 1 diff --git a/proxy/core/acceptor/remote.py b/proxy/core/acceptor/remote.py index a9ef47df59..747304d56a 100644 --- a/proxy/core/acceptor/remote.py +++ b/proxy/core/acceptor/remote.py @@ -8,53 +8,46 @@ :copyright: (c) 2013-present by Abhinav Singh and contributors. :license: BSD, see LICENSE for more details. """ +import asyncio import logging -from typing import Optional +from typing import Optional, Any +from multiprocessing import connection from multiprocessing.reduction import recv_handle -from ..connection import TcpClientConnection -from ..event import eventNames - from .threadless import Threadless logger = logging.getLogger(__name__) -class RemoteExecutor(Threadless): +class RemoteExecutor(Threadless[connection.Connection]): """RemoteExecutor receives work over a Connection object. RemoteExecutor uses ``recv_handle`` to accept incoming work. """ + def __init__(self, *args: Any, **kwargs: Any) -> None: + super().__init__(*args, **kwargs) + self._loop: Optional[asyncio.AbstractEventLoop] = None + + @property + def loop(self) -> Optional[asyncio.AbstractEventLoop]: + if self._loop is None: + self._loop = asyncio.get_event_loop_policy().get_event_loop() + return self._loop + def work_queue_fileno(self) -> Optional[int]: return self.work_queue.fileno() def close_work_queue(self) -> None: self.work_queue.close() - def receive_from_work_queue(self) -> None: + def receive_from_work_queue(self) -> bool: # Acceptor will not send address for # unix socket domain environments. addr = None if not self.flags.unix_socket_path: addr = self.work_queue.recv() fileno = recv_handle(self.work_queue) - self.works[fileno] = self.flags.work_klass( - TcpClientConnection(conn=self._fromfd(fileno), addr=addr), - flags=self.flags, - event_queue=self.event_queue, - ) - self.works[fileno].publish_event( - event_name=eventNames.WORK_STARTED, - event_payload={'fileno': fileno, 'addr': addr}, - publisher_id=self.__class__.__name__, - ) - try: - self.works[fileno].initialize() - except Exception as e: - logger.exception( - 'Exception occurred during initialization', - exc_info=e, - ) - self._cleanup(fileno) + self.work_on_tcp_conn(fileno=fileno, addr=addr) + return False diff --git a/proxy/core/acceptor/threadless.py b/proxy/core/acceptor/threadless.py index 41e1c83383..834bdff754 100644 --- a/proxy/core/acceptor/threadless.py +++ b/proxy/core/acceptor/threadless.py @@ -13,6 +13,7 @@ acceptor """ import os +import ssl import socket import logging import asyncio @@ -20,17 +21,18 @@ import selectors import multiprocessing -from abc import abstractmethod, ABC -from typing import Dict, Optional, Tuple, List, Set, Generic, TypeVar - -from .work import Work - -from ..event import EventQueue +from abc import abstractmethod, ABC, abstractproperty +from typing import Dict, Optional, Tuple, List, Set, Generic, TypeVar, Union from ...common.logger import Logger from ...common.types import Readables, Writables from ...common.constants import DEFAULT_SELECTOR_SELECT_TIMEOUT +from ..connection import TcpClientConnection +from ..event import eventNames, EventQueue + +from .work import Work + T = TypeVar("T") logger = logging.getLogger(__name__) @@ -72,18 +74,25 @@ def __init__( self.running = multiprocessing.Event() self.works: Dict[int, Work] = {} self.selector: Optional[selectors.DefaultSelector] = None - self.loop: Optional[asyncio.AbstractEventLoop] = None # If we remove single quotes for typing hint below, # runtime exceptions will occur for < Python 3.9. # # Ref https://github.com/abhinavsingh/proxy.py/runs/4279055360?check_suite_focus=true self.unfinished: Set['asyncio.Task[bool]'] = set() - self.wait_timeout: float = ( - self.flags.num_workers / 2) * DEFAULT_SELECTOR_SELECT_TIMEOUT + self.wait_timeout: float = DEFAULT_SELECTOR_SELECT_TIMEOUT + + @abstractproperty + @property + def loop(self) -> Optional[asyncio.AbstractEventLoop]: + raise NotImplementedError() @abstractmethod - def receive_from_work_queue(self) -> None: - """Work queue is ready to receive new work.""" + def receive_from_work_queue(self) -> bool: + """Work queue is ready to receive new work. + + Receive it and call ``work_on_tcp_conn``. + + Return True to teardown the loop.""" raise NotImplementedError() @abstractmethod @@ -93,11 +102,39 @@ def work_queue_fileno(self) -> Optional[int]: return work queue fd.""" raise NotImplementedError() - @abstractmethod def close_work_queue(self) -> None: - """If an fd was selectable for work queue, make sure - to close the work queue fd here.""" - raise NotImplementedError() + """Only called if ``work_queue_fileno`` returns an integer. + If an fd is selectable for work queue, make sure + to close the work queue fd now.""" + pass # pragma: no cover + + def work_on_tcp_conn( + self, + fileno: int, + addr: Optional[Tuple[str, int]] = None, + conn: Optional[Union[ssl.SSLSocket, socket.socket]] = None, + ) -> None: + self.works[fileno] = self.flags.work_klass( + TcpClientConnection( + conn=conn or self._fromfd(fileno), + addr=addr, + ), + flags=self.flags, + event_queue=self.event_queue, + ) + self.works[fileno].publish_event( + event_name=eventNames.WORK_STARTED, + event_payload={'fileno': fileno, 'addr': addr}, + publisher_id=self.__class__.__name__, + ) + try: + self.works[fileno].initialize() + except Exception as e: + logger.exception( + 'Exception occurred during initialization', + exc_info=e, + ) + self._cleanup(fileno) async def _selected_events(self) -> Tuple[ List[socket.socket], @@ -182,7 +219,8 @@ def _cleanup_inactive(self) -> None: def _cleanup(self, work_id: int) -> None: self.works[work_id].shutdown() del self.works[work_id] - os.close(work_id) + if self.work_queue_fileno() is not None: + os.close(work_id) def _create_tasks( self, @@ -199,7 +237,7 @@ def _create_tasks( tasks.add(task) return tasks - async def _run_once(self) -> None: + async def _run_once(self) -> bool: assert self.loop is not None work_fds, work_by_ids, new_work_available = await self._selected_events() try: @@ -209,10 +247,12 @@ async def _run_once(self) -> None: # client_queue fd itself a.k.a. accept_client # will become handle_readables. if new_work_available: - self.receive_from_work_queue() + teardown = self.receive_from_work_queue() + if teardown: + return teardown if len(work_by_ids) == 0: self._cleanup_inactive() - return + return False finally: assert self.selector for wfd in work_fds: @@ -228,6 +268,7 @@ async def _run_once(self) -> None: ) # Remove and shutdown inactive workers self._cleanup_inactive() + return False def run(self) -> None: Logger.setup( @@ -243,10 +284,12 @@ def run(self) -> None: selectors.EVENT_READ, data=wqfileno, ) - self.loop = asyncio.get_event_loop_policy().get_event_loop() + assert self.loop while not self.running.is_set(): # logger.debug('Working on {0} works'.format(len(self.works))) - self.loop.run_until_complete(self._run_once()) + teardown = self.loop.run_until_complete(self._run_once()) + if teardown: + break except KeyboardInterrupt: pass finally: diff --git a/proxy/proxy.py b/proxy/proxy.py index 80431fa6fe..1af5e2d762 100644 --- a/proxy/proxy.py +++ b/proxy/proxy.py @@ -116,12 +116,14 @@ class Proxy: """Proxy is a context manager to control proxy.py library core. - By default, AcceptorPool is started with + By default, :class:`~proxy.core.pool.AcceptorPool` is started with :class:`~proxy.http.handler.HttpProtocolHandler` work class. By definition, it expects HTTP traffic to flow between clients and server. - In ``--threadless`` mode, ThreadlessPool is also started, which is - responsible for handling work accepted by :class:`~proxy.core.acceptor.Acceptor`. + In ``--threadless`` mode and without ``--local-executor``, + a :class:`~proxy.core.executors.ThreadlessPool` is also started. + Executor pool receives newly accepted work by :class:`~proxy.core.acceptor.Acceptor` + and creates an instance of work class for processing the received work. Optionally, Proxy class also initializes the EventManager. A multi-process safe pubsub system which can be used to build various @@ -172,17 +174,18 @@ def setup(self) -> None: event_queue = self.event_manager.queue \ if self.event_manager is not None \ else None - self.executors = ThreadlessPool( - flags=self.flags, - event_queue=event_queue, - ) - self.executors.setup() + if not self.flags.local_executor: + self.executors = ThreadlessPool( + flags=self.flags, + event_queue=event_queue, + ) + self.executors.setup() self.acceptors = AcceptorPool( flags=self.flags, listener=self.listener, - executor_queues=self.executors.work_queues, - executor_pids=self.executors.work_pids, - executor_locks=self.executors.work_locks, + executor_queues=self.executors.work_queues if self.executors else [], + executor_pids=self.executors.work_pids if self.executors else [], + executor_locks=self.executors.work_locks if self.executors else [], event_queue=event_queue, ) self.acceptors.setup() @@ -191,8 +194,9 @@ def setup(self) -> None: def shutdown(self) -> None: assert self.acceptors self.acceptors.shutdown() - assert self.executors - self.executors.shutdown() + if not self.flags.local_executor: + assert self.executors + self.executors.shutdown() if self.flags.enable_events: assert self.event_manager is not None self.event_manager.shutdown() From 194ce587c3c996d4931c7aefa2acb5393f26e8a8 Mon Sep 17 00:00:00 2001 From: Abhinav Singh Date: Mon, 22 Nov 2021 21:33:01 +0530 Subject: [PATCH 37/54] `get_events` and `get_descriptors` now must return int and not sock. `Threadless` now avoids repeated register/unregister and instead make use of `selectors.modify` --- Makefile | 3 + examples/web_scraper.py | 3 +- helper/benchmark.sh | 4 +- proxy/core/acceptor/acceptor.py | 13 +- proxy/core/acceptor/threadless.py | 111 +++++++++++------ proxy/core/acceptor/work.py | 3 +- proxy/core/base/tcp_server.py | 15 ++- proxy/core/base/tcp_tunnel.py | 17 ++- proxy/core/base/tcp_upstream.py | 12 +- proxy/http/handler.py | 116 +++++++++--------- proxy/http/plugin.py | 4 +- proxy/http/proxy/plugin.py | 5 +- proxy/http/proxy/server.py | 23 ++-- proxy/http/server/plugin.py | 5 +- proxy/http/server/web.py | 4 +- .../exceptions/test_http_proxy_auth_failed.py | 16 +-- tests/http/test_http_proxy.py | 8 +- 17 files changed, 192 insertions(+), 170 deletions(-) diff --git a/Makefile b/Makefile index 77b52b2c0f..bdf2052344 100644 --- a/Makefile +++ b/Makefile @@ -127,6 +127,7 @@ lib-coverage: $(OPEN) htmlcov/index.html lib-profile: + ulimit -n 65536 && \ sudo py-spy record \ -o profile.svg \ -t -F -s -- \ @@ -137,6 +138,8 @@ lib-profile: --enable-web-server \ --plugin proxy.plugin.WebServerPlugin \ --local-executor \ + --backlog 65536 \ + --open-file-limit 65536 --log-file /dev/null devtools: diff --git a/examples/web_scraper.py b/examples/web_scraper.py index 9bc348f376..4b925876c5 100644 --- a/examples/web_scraper.py +++ b/examples/web_scraper.py @@ -9,7 +9,6 @@ :license: BSD, see LICENSE for more details. """ import time -import socket from typing import Dict @@ -40,7 +39,7 @@ class WebScraper(Work): only PUBSUB protocol. """ - async def get_events(self) -> Dict[socket.socket, int]: + async def get_events(self) -> Dict[int, int]: """Return sockets and events (read or write) that we are interested in.""" return {} diff --git a/helper/benchmark.sh b/helper/benchmark.sh index ac32f541a1..653605c43c 100755 --- a/helper/benchmark.sh +++ b/helper/benchmark.sh @@ -32,9 +32,9 @@ if [ $(basename $PWD) != "proxy.py" ]; then fi TIMEOUT=1 -QPS=8000 +QPS=20000 CONCURRENCY=100 -TOTAL_REQUESTS=1000000 +TOTAL_REQUESTS=100000 OPEN_FILE_LIMIT=65536 BACKLOG=OPEN_FILE_LIMIT PID_FILE=/tmp/proxy.pid diff --git a/proxy/core/acceptor/acceptor.py b/proxy/core/acceptor/acceptor.py index 07e7ebd967..923bde34a6 100644 --- a/proxy/core/acceptor/acceptor.py +++ b/proxy/core/acceptor/acceptor.py @@ -26,6 +26,7 @@ from multiprocessing.reduction import recv_handle from typing import List, Optional, Tuple +from typing import Any from ...common.flag import flags from ...common.utils import is_threadless @@ -119,6 +120,7 @@ def __init__( self.sock: Optional[socket.socket] = None # Internals self._total: Optional[int] = None + self._local_work_queue: Optional['queue.Queue[Any]'] = None self._local: Optional[LocalExecutor] = None self._lthread: Optional[threading.Thread] = None @@ -133,8 +135,8 @@ def accept(self, events: List[Tuple[selectors.SelectorKey, int]]) -> None: addr = None if addr == '' else addr work = (conn, addr) if self.flags.local_executor: - assert self._local - self._local.work_queue.put_nowait(work) + assert self._local_work_queue + self._local_work_queue.put_nowait(work) else: addr = None if addr == '' else addr self._work(*work) @@ -188,8 +190,9 @@ def run(self) -> None: def _start_local(self) -> None: assert self.sock + self._local_work_queue = queue.Queue() self._local = LocalExecutor( - work_queue=queue.Queue(), + work_queue=self._local_work_queue, flags=self.flags, event_queue=self.event_queue, ) @@ -198,8 +201,8 @@ def _start_local(self) -> None: self._lthread.start() def _stop_local(self) -> None: - if self._lthread is not None and self._local is not None: - self._local.work_queue.put(False) + if self._lthread is not None and self._local_work_queue is not None: + self._local_work_queue.put(False) self._lthread.join() def _work(self, conn: socket.socket, addr: Optional[Tuple[str, int]]) -> None: diff --git a/proxy/core/acceptor/threadless.py b/proxy/core/acceptor/threadless.py index 834bdff754..28ef7aec89 100644 --- a/proxy/core/acceptor/threadless.py +++ b/proxy/core/acceptor/threadless.py @@ -79,6 +79,12 @@ def __init__( # # Ref https://github.com/abhinavsingh/proxy.py/runs/4279055360?check_suite_focus=true self.unfinished: Set['asyncio.Task[bool]'] = set() + self.registered_events_by_work_ids: Dict[ + # work_id + int, + # fileno, mask + Dict[int, int] + ] = {} self.wait_timeout: float = DEFAULT_SELECTOR_SELECT_TIMEOUT @abstractproperty @@ -137,28 +143,51 @@ def work_on_tcp_conn( self._cleanup(fileno) async def _selected_events(self) -> Tuple[ - List[socket.socket], Dict[int, Tuple[Readables, Writables]], bool, ]: """For each work, collects events they are interested in. Calls select for events of interest. """ assert self.selector is not None - work_fds: List[socket.socket] = [] for work_id in self.works: worker_events = await self.works[work_id].get_events() - for fd in worker_events: - # Can throw ValueError: Invalid file descriptor: -1 - # - # A guard within Work classes may not help here due to - # asynchronous nature. Hence, threadless will handle - # ValueError exceptions raised by selector.register - # for invalid fd. - self.selector.register( - fd, events=worker_events[fd], - data=work_id, - ) - work_fds.append(fd) + # NOTE: Current assumption is that multiple works will not + # be interested in the same fd. Descriptors of interests + # returned by work must be unique. + # + # TODO: Ideally we must diff and unregister socks not + # returned of interest within this _select_events call + # but exists in registered_socks_by_work_ids + for fileno in worker_events: + if work_id not in self.registered_events_by_work_ids: + self.registered_events_by_work_ids[work_id] = {} + mask = worker_events[fileno] + if fileno in self.registered_events_by_work_ids[work_id]: + oldmask = self.registered_events_by_work_ids[work_id][fileno] + if mask != oldmask: + self.selector.modify( + fileno, events=mask, + data=work_id, + ) + self.registered_events_by_work_ids[work_id][fileno] = mask + logger.debug( + 'fd#{0} modified for mask#{1} by work#{2}'.format( + fileno, mask, work_id)) + else: + # Can throw ValueError: Invalid file descriptor: -1 + # + # A guard within Work classes may not help here due to + # asynchronous nature. Hence, threadless will handle + # ValueError exceptions raised by selector.register + # for invalid fd. + self.selector.register( + fileno, events=mask, + data=work_id, + ) + self.registered_events_by_work_ids[work_id][fileno] = mask + logger.debug( + 'fd#{0} registered for mask#{1} by work#{2}'.format( + fileno, mask, work_id)) selected = self.selector.select( timeout=DEFAULT_SELECTOR_SELECT_TIMEOUT, ) @@ -181,7 +210,7 @@ async def _selected_events(self) -> Tuple[ work_by_ids[key.data][0].append(key.fileobj) if mask & selectors.EVENT_WRITE: work_by_ids[key.data][1].append(key.fileobj) - return (work_fds, work_by_ids, new_work_available) + return (work_by_ids, new_work_available) async def _wait_for_tasks( self, @@ -217,6 +246,15 @@ def _cleanup_inactive(self) -> None: # TODO: HttpProtocolHandler.shutdown can call flush which may block def _cleanup(self, work_id: int) -> None: + if work_id in self.registered_events_by_work_ids: + assert self.selector + for fileno in self.registered_events_by_work_ids[work_id]: + logger.debug( + 'fd#{0} unregistered by work#{1}'.format( + fileno, work_id)) + self.selector.unregister(fileno) + self.registered_events_by_work_ids[work_id].clear() + del self.registered_events_by_work_ids[work_id] self.works[work_id].shutdown() del self.works[work_id] if self.work_queue_fileno() is not None: @@ -239,33 +277,28 @@ def _create_tasks( async def _run_once(self) -> bool: assert self.loop is not None - work_fds, work_by_ids, new_work_available = await self._selected_events() - try: - # Accept new work if available - # - # TODO: We must use a work klass to handle - # client_queue fd itself a.k.a. accept_client - # will become handle_readables. - if new_work_available: - teardown = self.receive_from_work_queue() - if teardown: - return teardown - if len(work_by_ids) == 0: - self._cleanup_inactive() - return False - finally: - assert self.selector - for wfd in work_fds: - self.selector.unregister(wfd) + work_by_ids, new_work_available = await self._selected_events() + # Accept new work if available + # + # TODO: We must use a work klass to handle + # client_queue fd itself a.k.a. accept_client + # will become handle_readables. + if new_work_available: + teardown = self.receive_from_work_queue() + if teardown: + return teardown + if len(work_by_ids) == 0: + self._cleanup_inactive() + return False # Invoke Threadless.handle_events self.unfinished.update(self._create_tasks(work_by_ids)) - logger.debug('Executing {0} works'.format(len(self.unfinished))) + # logger.debug('Executing {0} works'.format(len(self.unfinished))) await self._wait_for_tasks(self.unfinished) - logger.debug( - 'Done executing works, {0} pending'.format( - len(self.unfinished), - ), - ) + # logger.debug( + # 'Done executing works, {0} pending, {1} registered'.format( + # len(self.unfinished), len(self.registered_events_by_work_ids), + # ), + # ) # Remove and shutdown inactive workers self._cleanup_inactive() return False diff --git a/proxy/core/acceptor/work.py b/proxy/core/acceptor/work.py index d50fc9e0e7..11b5deecc6 100644 --- a/proxy/core/acceptor/work.py +++ b/proxy/core/acceptor/work.py @@ -13,7 +13,6 @@ acceptor """ import argparse -import socket from abc import ABC, abstractmethod from uuid import uuid4, UUID @@ -43,7 +42,7 @@ def __init__( self.work = work @abstractmethod - async def get_events(self) -> Dict[socket.socket, int]: + async def get_events(self) -> Dict[int, int]: """Return sockets and events (read or write) that we are interested in.""" return {} # pragma: no cover diff --git a/proxy/core/base/tcp_server.py b/proxy/core/base/tcp_server.py index 2aa0f0574d..4f21e53983 100644 --- a/proxy/core/base/tcp_server.py +++ b/proxy/core/base/tcp_server.py @@ -12,7 +12,6 @@ tcp """ -import socket import logging import selectors @@ -68,19 +67,19 @@ def handle_data(self, data: memoryview) -> Optional[bool]: """Optionally return True to close client connection.""" pass # pragma: no cover - async def get_events(self) -> Dict[socket.socket, int]: + async def get_events(self) -> Dict[int, int]: events = {} # We always want to read from client # Register for EVENT_READ events if self.must_flush_before_shutdown is False: - events[self.work.connection] = selectors.EVENT_READ + events[self.work.connection.fileno()] = selectors.EVENT_READ # If there is pending buffer for client # also register for EVENT_WRITE events if self.work.has_buffer(): - if self.work.connection in events: - events[self.work.connection] |= selectors.EVENT_WRITE + if self.work.connection.fileno() in events: + events[self.work.connection.fileno()] |= selectors.EVENT_WRITE else: - events[self.work.connection] = selectors.EVENT_WRITE + events[self.work.connection.fileno()] = selectors.EVENT_WRITE return events async def handle_events( @@ -102,7 +101,7 @@ async def handle_events( async def handle_writables(self, writables: Writables) -> bool: teardown = False - if self.work.connection in writables and self.work.has_buffer(): + if self.work.connection.fileno() in writables and self.work.has_buffer(): logger.debug( 'Flushing buffer to client {0}'.format(self.work.address), ) @@ -115,7 +114,7 @@ async def handle_writables(self, writables: Writables) -> bool: async def handle_readables(self, readables: Readables) -> bool: teardown = False - if self.work.connection in readables: + if self.work.connection.fileno() in readables: data = self.work.recv(self.flags.client_recvbuf_size) if data is None: logger.debug( diff --git a/proxy/core/base/tcp_tunnel.py b/proxy/core/base/tcp_tunnel.py index ec9b1ccb7a..3c17ce8e31 100644 --- a/proxy/core/base/tcp_tunnel.py +++ b/proxy/core/base/tcp_tunnel.py @@ -12,7 +12,6 @@ tcp """ -import socket import logging import selectors @@ -65,19 +64,19 @@ def shutdown(self) -> None: self.upstream.close() super().shutdown() - async def get_events(self) -> Dict[socket.socket, int]: + async def get_events(self) -> Dict[int, int]: # Get default client events - ev: Dict[socket.socket, int] = await super().get_events() + ev: Dict[int, int] = await super().get_events() # Read from server if we are connected if self.upstream and self.upstream._conn is not None: - ev[self.upstream.connection] = selectors.EVENT_READ + ev[self.upstream.connection.fileno()] = selectors.EVENT_READ # If there is pending buffer for server # also register for EVENT_WRITE events if self.upstream and self.upstream.has_buffer(): - if self.upstream.connection in ev: - ev[self.upstream.connection] |= selectors.EVENT_WRITE + if self.upstream.connection.fileno() in ev: + ev[self.upstream.connection.fileno()] |= selectors.EVENT_WRITE else: - ev[self.upstream.connection] = selectors.EVENT_WRITE + ev[self.upstream.connection.fileno()] = selectors.EVENT_WRITE return ev async def handle_events( @@ -90,7 +89,7 @@ async def handle_events( if do_shutdown: return do_shutdown # Handle server events - if self.upstream and self.upstream.connection in readables: + if self.upstream and self.upstream.connection.fileno() in readables: data = self.upstream.recv() if data is None: # Server closed connection @@ -98,7 +97,7 @@ async def handle_events( return True # tunnel data to client self.work.queue(data) - if self.upstream and self.upstream.connection in writables: + if self.upstream and self.upstream.connection.fileno() in writables: self.upstream.flush() return False diff --git a/proxy/core/base/tcp_upstream.py b/proxy/core/base/tcp_upstream.py index e04ec5e90d..acbd9eab39 100644 --- a/proxy/core/base/tcp_upstream.py +++ b/proxy/core/base/tcp_upstream.py @@ -8,12 +8,10 @@ :copyright: (c) 2013-present by Abhinav Singh and contributors. :license: BSD, see LICENSE for more details. """ -from abc import ABC, abstractmethod - import ssl -import socket import logging +from abc import ABC, abstractmethod from typing import Tuple, List, Optional, Any from ...common.types import Readables, Writables @@ -67,13 +65,13 @@ def handle_upstream_data(self, raw: memoryview) -> None: def initialize_upstream(self, addr: str, port: int) -> None: self.upstream = TcpServerConnection(addr, port) - def get_descriptors(self) -> Tuple[List[socket.socket], List[socket.socket]]: + def get_descriptors(self) -> Tuple[List[int], List[int]]: if not self.upstream: return [], [] - return [self.upstream.connection], [self.upstream.connection] if self.upstream.has_buffer() else [] + return [self.upstream.connection.fileno()], [self.upstream.connection.fileno()] if self.upstream.has_buffer() else [] def read_from_descriptors(self, r: Readables) -> bool: - if self.upstream and self.upstream.connection in r: + if self.upstream and self.upstream.connection.fileno() in r: try: raw = self.upstream.recv(self.server_recvbuf_size) if raw is not None: @@ -90,7 +88,7 @@ def read_from_descriptors(self, r: Readables) -> bool: return False def write_to_descriptors(self, w: Writables) -> bool: - if self.upstream and self.upstream.connection in w and self.upstream.has_buffer(): + if self.upstream and self.upstream.connection.fileno() in w and self.upstream.has_buffer(): try: self.upstream.flush() except ssl.SSLWantWriteError: diff --git a/proxy/http/handler.py b/proxy/http/handler.py index 69f4385e17..25b9cd649e 100644 --- a/proxy/http/handler.py +++ b/proxy/http/handler.py @@ -143,22 +143,22 @@ def shutdown(self) -> None: logger.debug('Client connection closed') super().shutdown() - async def get_events(self) -> Dict[socket.socket, int]: + async def get_events(self) -> Dict[int, int]: # Get default client events - events: Dict[socket.socket, int] = await super().get_events() + events: Dict[int, int] = await super().get_events() # HttpProtocolHandlerPlugin.get_descriptors for plugin in self.plugins.values(): plugin_read_desc, plugin_write_desc = plugin.get_descriptors() - for r in plugin_read_desc: - if r not in events: - events[r] = selectors.EVENT_READ + for rfileno in plugin_read_desc: + if rfileno not in events: + events[rfileno] = selectors.EVENT_READ else: - events[r] |= selectors.EVENT_READ - for w in plugin_write_desc: - if w not in events: - events[w] = selectors.EVENT_WRITE + events[rfileno] |= selectors.EVENT_READ + for wfileno in plugin_write_desc: + if wfileno not in events: + events[wfileno] = selectors.EVENT_WRITE else: - events[w] |= selectors.EVENT_WRITE + events[wfileno] |= selectors.EVENT_WRITE return events # We override super().handle_events and never call it @@ -237,7 +237,7 @@ def handle_data(self, data: memoryview) -> Optional[bool]: return False async def handle_writables(self, writables: Writables) -> bool: - if self.work.connection in writables and self.work.has_buffer(): + if self.work.connection.fileno() in writables and self.work.has_buffer(): logger.debug('Client is ready for writes, flushing buffer') self.last_activity = time.time() @@ -267,7 +267,7 @@ async def handle_writables(self, writables: Writables) -> bool: return False async def handle_readables(self, readables: Readables) -> bool: - if self.work.connection in readables: + if self.work.connection.fileno() in readables: logger.debug('Client is ready for reads, reading') self.last_activity = time.time() try: @@ -293,40 +293,6 @@ async def handle_readables(self, readables: Readables) -> bool: return True return False - ## - # run() is here to maintain backward compatibility for threaded mode - ## - - def run(self) -> None: - """run() method is not used when in --threadless mode. - - This is here just to maintain backward compatibility with threaded mode. - """ - try: - self.initialize() - while True: - # Teardown if client buffer is empty and connection is inactive - if self.is_inactive(): - logger.debug( - 'Client buffer is empty and maximum inactivity has reached ' - 'between client and server connection, tearing down...', - ) - break - teardown = asyncio.run(self._run_once()) - if teardown: - break - except KeyboardInterrupt: # pragma: no cover - pass - except ssl.SSLError as e: - logger.exception('ssl.SSLError', exc_info=e) - except Exception as e: - logger.exception( - 'Exception while handling connection %r' % - self.work.connection, exc_info=e, - ) - finally: - self.shutdown() - ## # Internal methods ## @@ -351,7 +317,7 @@ def _optionally_wrap_socket( # FIXME: Returning events is only necessary because we cannot use async context manager # for < Python 3.8. As a reason, this method is no longer a context manager and caller # is responsible for unregistering the descriptors. - async def _selected_events(self) -> Tuple[Dict[socket.socket, int], Readables, Writables]: + async def _selected_events(self) -> Tuple[Dict[int, int], Readables, Writables]: assert self.selector events = await self.get_events() for fd in events: @@ -366,15 +332,6 @@ async def _selected_events(self) -> Tuple[Dict[socket.socket, int], Readables, W writables.append(key.fileobj) return (events, readables, writables) - async def _run_once(self) -> bool: - events, readables, writables = await self._selected_events() - try: - return await self.handle_events(readables, writables) - finally: - assert self.selector - for fd in events: - self.selector.unregister(fd) - def _flush(self) -> None: assert self.selector logger.debug('Flushing pending data') @@ -398,3 +355,50 @@ def _flush(self) -> None: def _connection_inactive_for(self) -> float: return time.time() - self.last_activity + + ## + # run() and _run_once() are here to maintain backward compatibility + # with threaded mode. These methods are only called when running + # in threaded mode. + ## + + def run(self) -> None: + """run() method is not used when in --threadless mode. + + This is here just to maintain backward compatibility with threaded mode. + """ + try: + self.initialize() + while True: + # Teardown if client buffer is empty and connection is inactive + if self.is_inactive(): + logger.debug( + 'Client buffer is empty and maximum inactivity has reached ' + 'between client and server connection, tearing down...', + ) + break + teardown = asyncio.run(self._run_once()) + if teardown: + break + except KeyboardInterrupt: # pragma: no cover + pass + except ssl.SSLError as e: + logger.exception('ssl.SSLError', exc_info=e) + except Exception as e: + logger.exception( + 'Exception while handling connection %r' % + self.work.connection, exc_info=e, + ) + finally: + self.shutdown() + + async def _run_once(self) -> bool: + events, readables, writables = await self._selected_events() + try: + return await self.handle_events(readables, writables) + finally: + assert self.selector + # TODO: Like Threadless we should not unregister + # work fds repeatedly. + for fd in events: + self.selector.unregister(fd) diff --git a/proxy/http/plugin.py b/proxy/http/plugin.py index 56f02141a6..d5510b5c2b 100644 --- a/proxy/http/plugin.py +++ b/proxy/http/plugin.py @@ -71,9 +71,7 @@ def name(self) -> str: return self.__class__.__name__ @abstractmethod - def get_descriptors( - self, - ) -> Tuple[List[socket.socket], List[socket.socket]]: + def get_descriptors(self) -> Tuple[List[int], List[int]]: """Implementations must return a list of descriptions that they wish to read from and write into.""" return [], [] # pragma: no cover diff --git a/proxy/http/proxy/plugin.py b/proxy/http/proxy/plugin.py index 94eed56934..81392d1d63 100644 --- a/proxy/http/proxy/plugin.py +++ b/proxy/http/proxy/plugin.py @@ -12,7 +12,6 @@ http """ -import socket import argparse from abc import ABC @@ -61,9 +60,7 @@ def name(self) -> str: # Since 3.4.0 # # @abstractmethod - def get_descriptors( - self, - ) -> Tuple[List[socket.socket], List[socket.socket]]: + def get_descriptors(self) -> Tuple[List[int], List[int]]: return [], [] # pragma: no cover # @abstractmethod diff --git a/proxy/http/proxy/server.py b/proxy/http/proxy/server.py index c2f79e03a1..8998780206 100644 --- a/proxy/http/proxy/server.py +++ b/proxy/http/proxy/server.py @@ -177,28 +177,24 @@ def tls_interception_enabled(self) -> bool: self.flags.ca_signing_key_file is not None and \ self.flags.ca_cert_file is not None - def get_descriptors( - self, - ) -> Tuple[List[socket.socket], List[socket.socket]]: + def get_descriptors(self) -> Tuple[List[int], List[int]]: if not self.request.has_host(): return [], [] - - r: List[socket.socket] = [] - w: List[socket.socket] = [] + r: List[int] = [] + w: List[int] = [] if ( self.upstream and not self.upstream.closed and self.upstream.connection ): - r.append(self.upstream.connection) + r.append(self.upstream.connection.fileno()) if ( self.upstream and not self.upstream.closed and self.upstream.has_buffer() and self.upstream.connection ): - w.append(self.upstream.connection) - + w.append(self.upstream.connection.fileno()) # TODO(abhinavsingh): We need to keep a mapping of plugin and # descriptors registered by them, so that within write/read blocks # we can invoke the right plugin callbacks. @@ -206,7 +202,6 @@ def get_descriptors( plugin_read_desc, plugin_write_desc = plugin.get_descriptors() r.extend(plugin_read_desc) w.extend(plugin_write_desc) - return r, w def _close_and_release(self) -> bool: @@ -219,7 +214,7 @@ def _close_and_release(self) -> bool: return True async def write_to_descriptors(self, w: Writables) -> bool: - if (self.upstream and self.upstream.connection not in w) or not self.upstream: + if (self.upstream and self.upstream.connection.fileno() not in w) or not self.upstream: # Currently, we just call write/read block of each plugins. It is # plugins responsibility to ignore this callback, if passed descriptors # doesn't contain the descriptor they registered. @@ -230,7 +225,7 @@ async def write_to_descriptors(self, w: Writables) -> bool: elif self.request.has_host() and \ self.upstream and not self.upstream.closed and \ self.upstream.has_buffer() and \ - self.upstream.connection in w: + self.upstream.connection.fileno() in w: logger.debug('Server is write ready, flushing buffer') try: self.upstream.flush() @@ -255,7 +250,7 @@ async def read_from_descriptors(self, r: Readables) -> bool: if ( self.upstream and not self.upstream.closed and - self.upstream.connection not in r + self.upstream.connection.fileno() not in r ) or not self.upstream: # Currently, we just call write/read block of each plugins. It is # plugins responsibility to ignore this callback, if passed descriptors @@ -267,7 +262,7 @@ async def read_from_descriptors(self, r: Readables) -> bool: elif self.request.has_host() \ and self.upstream \ and not self.upstream.closed \ - and self.upstream.connection in r: + and self.upstream.connection.fileno() in r: logger.debug('Server is ready for reads, reading...') try: raw = self.upstream.recv(self.flags.server_recvbuf_size) diff --git a/proxy/http/server/plugin.py b/proxy/http/server/plugin.py index 11c2e3ec76..c9a8fe2da0 100644 --- a/proxy/http/server/plugin.py +++ b/proxy/http/server/plugin.py @@ -12,7 +12,6 @@ http """ -import socket import argparse from uuid import UUID @@ -60,9 +59,7 @@ def name(self) -> str: # Since 3.4.0 # # @abstractmethod - def get_descriptors( - self, - ) -> Tuple[List[socket.socket], List[socket.socket]]: + def get_descriptors(self) -> Tuple[List[int], List[int]]: return [], [] # pragma: no cover # @abstractmethod diff --git a/proxy/http/server/web.py b/proxy/http/server/web.py index 8570515c6c..263707bd69 100644 --- a/proxy/http/server/web.py +++ b/proxy/http/server/web.py @@ -235,9 +235,7 @@ def on_request_complete(self) -> Union[socket.socket, bool]: self.client.queue(self.DEFAULT_404_RESPONSE) return True - def get_descriptors( - self, - ) -> Tuple[List[socket.socket], List[socket.socket]]: + def get_descriptors(self) -> Tuple[List[int], List[int]]: r, w = [], [] for plugin in self.plugins.values(): r1, w1 = plugin.get_descriptors() diff --git a/tests/http/exceptions/test_http_proxy_auth_failed.py b/tests/http/exceptions/test_http_proxy_auth_failed.py index 2aacc62661..9b4feb1bc5 100644 --- a/tests/http/exceptions/test_http_proxy_auth_failed.py +++ b/tests/http/exceptions/test_http_proxy_auth_failed.py @@ -55,8 +55,8 @@ async def test_proxy_auth_fails_without_cred(self) -> None: self.mock_selector.return_value.select.side_effect = [ [( selectors.SelectorKey( - fileobj=self._conn, - fd=self._conn.fileno, + fileobj=self._conn.fileno(), + fd=self._conn.fileno(), events=selectors.EVENT_READ, data=None, ), @@ -83,8 +83,8 @@ async def test_proxy_auth_fails_with_invalid_cred(self) -> None: self.mock_selector.return_value.select.side_effect = [ [( selectors.SelectorKey( - fileobj=self._conn, - fd=self._conn.fileno, + fileobj=self._conn.fileno(), + fd=self._conn.fileno(), events=selectors.EVENT_READ, data=None, ), @@ -111,8 +111,8 @@ async def test_proxy_auth_works_with_valid_cred(self) -> None: self.mock_selector.return_value.select.side_effect = [ [( selectors.SelectorKey( - fileobj=self._conn, - fd=self._conn.fileno, + fileobj=self._conn.fileno(), + fd=self._conn.fileno(), events=selectors.EVENT_READ, data=None, ), @@ -135,8 +135,8 @@ async def test_proxy_auth_works_with_mixed_case_basic_string(self) -> None: self.mock_selector.return_value.select.side_effect = [ [( selectors.SelectorKey( - fileobj=self._conn, - fd=self._conn.fileno, + fileobj=self._conn.fileno(), + fd=self._conn.fileno(), events=selectors.EVENT_READ, data=None, ), diff --git a/tests/http/test_http_proxy.py b/tests/http/test_http_proxy.py index 2696e0df4e..e60d3086c7 100644 --- a/tests/http/test_http_proxy.py +++ b/tests/http/test_http_proxy.py @@ -67,8 +67,8 @@ async def test_proxy_plugin_on_and_before_upstream_connection(self) -> None: self.mock_selector.return_value.select.side_effect = [ [( selectors.SelectorKey( - fileobj=self._conn, - fd=self._conn.fileno, + fileobj=self._conn.file(), + fd=self._conn.fileno(), events=selectors.EVENT_READ, data=None, ), @@ -99,8 +99,8 @@ async def test_proxy_plugin_before_upstream_connection_can_teardown(self) -> Non self.mock_selector.return_value.select.side_effect = [ [( selectors.SelectorKey( - fileobj=self._conn, - fd=self._conn.fileno, + fileobj=self._conn.fileno(), + fd=self._conn.fileno(), events=selectors.EVENT_READ, data=None, ), From 01d5121800ba65d4b13b9c7c67f7788b43c99b6e Mon Sep 17 00:00:00 2001 From: Abhinav Singh Date: Mon, 22 Nov 2021 22:06:36 +0530 Subject: [PATCH 38/54] Fix `main` tests --- proxy/core/base/tcp_upstream.py | 20 ++++++++++++++------ tests/test_main.py | 8 ++++++-- 2 files changed, 20 insertions(+), 8 deletions(-) diff --git a/proxy/core/base/tcp_upstream.py b/proxy/core/base/tcp_upstream.py index acbd9eab39..d0ac63567a 100644 --- a/proxy/core/base/tcp_upstream.py +++ b/proxy/core/base/tcp_upstream.py @@ -22,12 +22,13 @@ class TcpUpstreamConnectionHandler(ABC): """:class:`~proxy.core.base.TcpUpstreamConnectionHandler` can - be used to insert an upstream server connection lifecycle within - asynchronous proxy.py lifecycle. + be used to insert an upstream server connection lifecycle. Call `initialize_upstream` to initialize the upstream connection object. Then, directly use ``self.upstream`` object within your class. + See :class:`~proxy.plugin.proxy_pool.ProxyPoolPlugin` for example usage. + .. spelling:: tcp @@ -68,17 +69,22 @@ def initialize_upstream(self, addr: str, port: int) -> None: def get_descriptors(self) -> Tuple[List[int], List[int]]: if not self.upstream: return [], [] - return [self.upstream.connection.fileno()], [self.upstream.connection.fileno()] if self.upstream.has_buffer() else [] + return [self.upstream.connection.fileno()], \ + [self.upstream.connection.fileno()] \ + if self.upstream.has_buffer() \ + else [] def read_from_descriptors(self, r: Readables) -> bool: - if self.upstream and self.upstream.connection.fileno() in r: + if self.upstream and \ + self.upstream.connection.fileno() in r: try: raw = self.upstream.recv(self.server_recvbuf_size) if raw is not None: self.total_size += len(raw) self.handle_upstream_data(raw) else: - return True # Teardown because upstream proxy closed the connection + # Teardown because upstream proxy closed the connection + return True except ssl.SSLWantReadError: logger.info('Upstream SSLWantReadError, will retry') return False @@ -88,7 +94,9 @@ def read_from_descriptors(self, r: Readables) -> bool: return False def write_to_descriptors(self, w: Writables) -> bool: - if self.upstream and self.upstream.connection.fileno() in w and self.upstream.has_buffer(): + if self.upstream and \ + self.upstream.connection.fileno() in w and \ + self.upstream.has_buffer(): try: self.upstream.flush() except ssl.SSLWantWriteError: diff --git a/tests/test_main.py b/tests/test_main.py index 1f83153763..28874e822f 100644 --- a/tests/test_main.py +++ b/tests/test_main.py @@ -21,7 +21,7 @@ from proxy.proxy import main, entry_point from proxy.common.utils import bytes_ -from proxy.common.constants import DEFAULT_ENABLE_DASHBOARD, DEFAULT_LOG_LEVEL, DEFAULT_LOG_FILE, DEFAULT_LOG_FORMAT +from proxy.common.constants import DEFAULT_ENABLE_DASHBOARD, DEFAULT_LOCAL_EXECUTOR, DEFAULT_LOG_LEVEL, DEFAULT_LOG_FILE from proxy.common.constants import DEFAULT_TIMEOUT, DEFAULT_DEVTOOLS_WS_PATH, DEFAULT_DISABLE_HTTP_PROXY from proxy.common.constants import DEFAULT_ENABLE_STATIC_SERVER, DEFAULT_ENABLE_EVENTS, DEFAULT_ENABLE_DEVTOOLS from proxy.common.constants import DEFAULT_ENABLE_WEB_SERVER, DEFAULT_THREADLESS, DEFAULT_CERT_FILE, DEFAULT_KEY_FILE @@ -30,7 +30,7 @@ from proxy.common.constants import DEFAULT_NUM_WORKERS, DEFAULT_OPEN_FILE_LIMIT, DEFAULT_IPV6_HOSTNAME from proxy.common.constants import DEFAULT_SERVER_RECVBUF_SIZE, DEFAULT_CLIENT_RECVBUF_SIZE, DEFAULT_WORK_KLASS from proxy.common.constants import PLUGIN_INSPECT_TRAFFIC, PLUGIN_DASHBOARD, PLUGIN_DEVTOOLS_PROTOCOL, PLUGIN_WEB_SERVER -from proxy.common.constants import PLUGIN_HTTP_PROXY, DEFAULT_NUM_ACCEPTORS, PLUGIN_PROXY_AUTH +from proxy.common.constants import PLUGIN_HTTP_PROXY, DEFAULT_NUM_ACCEPTORS, PLUGIN_PROXY_AUTH, DEFAULT_LOG_FORMAT class TestMain(unittest.TestCase): @@ -71,6 +71,7 @@ def mock_default_args(mock_args: mock.Mock) -> None: mock_args.enable_events = DEFAULT_ENABLE_EVENTS mock_args.enable_dashboard = DEFAULT_ENABLE_DASHBOARD mock_args.work_klass = DEFAULT_WORK_KLASS + mock_args.local_executor = DEFAULT_LOCAL_EXECUTOR @mock.patch('os.remove') @mock.patch('os.path.exists') @@ -95,6 +96,7 @@ def test_entry_point( ) -> None: pid_file = os.path.join(tempfile.gettempdir(), 'pid') mock_sleep.side_effect = KeyboardInterrupt() + mock_initialize.return_value.local_executor = False mock_initialize.return_value.enable_events = False mock_initialize.return_value.pid_file = pid_file entry_point() @@ -142,6 +144,7 @@ def test_main_with_no_flags( mock_sleep: mock.Mock, ) -> None: mock_sleep.side_effect = KeyboardInterrupt() + mock_initialize.return_value.local_executor = False mock_initialize.return_value.enable_events = False main() mock_event_manager.assert_not_called() @@ -181,6 +184,7 @@ def test_enable_events( mock_sleep: mock.Mock, ) -> None: mock_sleep.side_effect = KeyboardInterrupt() + mock_initialize.return_value.local_executor = False mock_initialize.return_value.enable_events = True main() mock_event_manager.assert_called_once() From 473fe8c7903aeab03ade2f6152b8749314f69832 Mon Sep 17 00:00:00 2001 From: Abhinav Singh <126065+abhinavsingh@users.noreply.github.com> Date: Mon, 22 Nov 2021 22:28:22 +0530 Subject: [PATCH 39/54] Apply suggestions from code review Co-authored-by: Sviatoslav Sydorenko --- .flake8 | 1 - .gitignore | 1 + README.md | 2 -- proxy/core/acceptor/acceptor.py | 3 +-- proxy/core/acceptor/local.py | 4 +++- 5 files changed, 5 insertions(+), 6 deletions(-) diff --git a/.flake8 b/.flake8 index cf75431f6f..1bffdb4268 100644 --- a/.flake8 +++ b/.flake8 @@ -65,7 +65,6 @@ extend-ignore = N802 # FIXME: func name should be lowercase N816 # FIXME: mixed case var name P101 # FIXME: format string with unindexed params - PT007 # FIXME: wrong values type in @pytest.mark.parametrize, expected tuple PT009 # FIXME: pytest encourages use of `assert` PT018 # FIXME: use multiple `assert`s instead of one complex Q000 # FIXME: inconsistent double quotes use when single ones are needed diff --git a/.gitignore b/.gitignore index 78ec3ad8e0..217ce78fd2 100644 --- a/.gitignore +++ b/.gitignore @@ -8,6 +8,7 @@ .mypy_cache .hypothesis .tox +.python-version coverage.xml proxy.py.iml diff --git a/README.md b/README.md index 229ffae8ac..00182ac16e 100644 --- a/README.md +++ b/README.md @@ -2215,5 +2215,3 @@ https://github.com/abhinavsingh/proxy.py/issues/new - Single threaded server. For detailed changelog refer to release PRs or commit history. - history. - history. diff --git a/proxy/core/acceptor/acceptor.py b/proxy/core/acceptor/acceptor.py index e5aba2f35a..28a8c10aa0 100644 --- a/proxy/core/acceptor/acceptor.py +++ b/proxy/core/acceptor/acceptor.py @@ -141,8 +141,7 @@ def run_once(self) -> None: assert self._local self._local.evq.put((conn, addr)) else: - addr = None if addr == '' else addr - self._work(conn, addr) + self._work(conn, addr or None) except BlockingIOError: pass finally: diff --git a/proxy/core/acceptor/local.py b/proxy/core/acceptor/local.py index 60e2b1d785..4733547ae7 100644 --- a/proxy/core/acceptor/local.py +++ b/proxy/core/acceptor/local.py @@ -23,7 +23,9 @@ from multiprocessing import connection from typing import Optional, Tuple, List, Dict -from typing import Any # noqa: W0611 pylint: disable=unused-import +from typing import TYPE_CHECKING +if TYPE_CHECKING: + from typing import Any from ...common.utils import is_threadless From baff0b95f3855cd3daa817504ae81cd2bddb7b7b Mon Sep 17 00:00:00 2001 From: Abhinav Singh Date: Mon, 22 Nov 2021 22:30:56 +0530 Subject: [PATCH 40/54] Apply code review recommendations manually --- docs/conf.py | 2 +- proxy/core/acceptor/threadless.py | 2 +- proxy/core/base/tcp_server.py | 17 +++++------------ 3 files changed, 7 insertions(+), 14 deletions(-) diff --git a/docs/conf.py b/docs/conf.py index f641ffb358..0cc5afdea2 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -1,5 +1,5 @@ # pylint: disable=invalid-name -# Requires Python 3.7+ +# Requires Python 3.6+ # Ref: https://www.sphinx-doc.org/en/master/usage/configuration.html """Configuration for the Sphinx documentation generator.""" diff --git a/proxy/core/acceptor/threadless.py b/proxy/core/acceptor/threadless.py index 28ef7aec89..d356b291a3 100644 --- a/proxy/core/acceptor/threadless.py +++ b/proxy/core/acceptor/threadless.py @@ -270,7 +270,7 @@ def _create_tasks( task = self.loop.create_task( self.works[work_id].handle_events(*work_by_ids[work_id]), ) - task._work_id = work_id # type: ignore + task._work_id = work_id # type: ignore[attr-defined] # task.set_name(work_id) tasks.add(task) return tasks diff --git a/proxy/core/base/tcp_server.py b/proxy/core/base/tcp_server.py index 4f21e53983..ce1d476116 100644 --- a/proxy/core/base/tcp_server.py +++ b/proxy/core/base/tcp_server.py @@ -49,18 +49,11 @@ class BaseTcpServerHandler(Work): def __init__(self, *args: Any, **kwargs: Any) -> None: super().__init__(*args, **kwargs) self.must_flush_before_shutdown = False - if self.flags.unix_socket_path: # pragma: no cover - logger.debug( - 'Work#{0} accepted from {1}'.format( - self.work.connection.fileno(), self.work.address, - ), - ) - else: - logger.debug( - 'Work#{0} accepted from {1}'.format( - self.work.connection.fileno(), self.work.address, - ), - ) + logger.debug( + 'Work#%d accepted from %s', + self.work.connection.fileno(), + self.work.address, + ) @abstractmethod def handle_data(self, data: memoryview) -> Optional[bool]: From c5ed3e3a46d44d1bef4b50a166b683d07d61ab71 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 22 Nov 2021 17:03:05 +0000 Subject: [PATCH 41/54] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- proxy/core/acceptor/threadless.py | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/proxy/core/acceptor/threadless.py b/proxy/core/acceptor/threadless.py index d356b291a3..e7a3388932 100644 --- a/proxy/core/acceptor/threadless.py +++ b/proxy/core/acceptor/threadless.py @@ -33,7 +33,7 @@ from .work import Work -T = TypeVar("T") +T = TypeVar('T') logger = logging.getLogger(__name__) @@ -83,7 +83,7 @@ def __init__( # work_id int, # fileno, mask - Dict[int, int] + Dict[int, int], ] = {} self.wait_timeout: float = DEFAULT_SELECTOR_SELECT_TIMEOUT @@ -172,7 +172,9 @@ async def _selected_events(self) -> Tuple[ self.registered_events_by_work_ids[work_id][fileno] = mask logger.debug( 'fd#{0} modified for mask#{1} by work#{2}'.format( - fileno, mask, work_id)) + fileno, mask, work_id, + ), + ) else: # Can throw ValueError: Invalid file descriptor: -1 # @@ -187,7 +189,9 @@ async def _selected_events(self) -> Tuple[ self.registered_events_by_work_ids[work_id][fileno] = mask logger.debug( 'fd#{0} registered for mask#{1} by work#{2}'.format( - fileno, mask, work_id)) + fileno, mask, work_id, + ), + ) selected = self.selector.select( timeout=DEFAULT_SELECTOR_SELECT_TIMEOUT, ) @@ -251,7 +255,9 @@ def _cleanup(self, work_id: int) -> None: for fileno in self.registered_events_by_work_ids[work_id]: logger.debug( 'fd#{0} unregistered by work#{1}'.format( - fileno, work_id)) + fileno, work_id, + ), + ) self.selector.unregister(fileno) self.registered_events_by_work_ids[work_id].clear() del self.registered_events_by_work_ids[work_id] From 15966ac6be9a0659b70eef6682d754192afbc4af Mon Sep 17 00:00:00 2001 From: Abhinav Singh Date: Mon, 22 Nov 2021 22:38:39 +0530 Subject: [PATCH 42/54] Revert back `Any` and use `addr or None` --- proxy/core/acceptor/acceptor.py | 4 +--- proxy/core/acceptor/local.py | 5 ++--- 2 files changed, 3 insertions(+), 6 deletions(-) diff --git a/proxy/core/acceptor/acceptor.py b/proxy/core/acceptor/acceptor.py index 923bde34a6..a89e282f5b 100644 --- a/proxy/core/acceptor/acceptor.py +++ b/proxy/core/acceptor/acceptor.py @@ -132,13 +132,11 @@ def accept(self, events: List[Tuple[selectors.SelectorKey, int]]) -> None: logging.debug( 'Accepting new work#{0}'.format(conn.fileno()), ) - addr = None if addr == '' else addr - work = (conn, addr) + work = (conn, addr or None) if self.flags.local_executor: assert self._local_work_queue self._local_work_queue.put_nowait(work) else: - addr = None if addr == '' else addr self._work(*work) def run_once(self) -> None: diff --git a/proxy/core/acceptor/local.py b/proxy/core/acceptor/local.py index fe0d16f7e8..27f4457b62 100644 --- a/proxy/core/acceptor/local.py +++ b/proxy/core/acceptor/local.py @@ -17,9 +17,8 @@ import asyncio import contextlib -from typing import Optional, TYPE_CHECKING -if TYPE_CHECKING: - from typing import Any +from typing import Optional +from typing import Any # noqa: W0611 pylint: disable=unused-import from .threadless import Threadless From cedf02de539970524c5f5373166b76ee6419f298 Mon Sep 17 00:00:00 2001 From: Abhinav Singh Date: Mon, 22 Nov 2021 22:54:08 +0530 Subject: [PATCH 43/54] Address `flake8` --- proxy/core/acceptor/executors.py | 2 +- tests/plugin/test_http_proxy_plugins.py | 24 ++++++++++++++----- ...ttp_proxy_plugins_with_tls_interception.py | 8 +++++-- 3 files changed, 25 insertions(+), 9 deletions(-) diff --git a/proxy/core/acceptor/executors.py b/proxy/core/acceptor/executors.py index 918c4d05fd..c89c4dc0fd 100644 --- a/proxy/core/acceptor/executors.py +++ b/proxy/core/acceptor/executors.py @@ -202,7 +202,7 @@ def _shutdown_workers(self) -> None: """Pop a running threadless worker and clean it up.""" for index in range(self.flags.num_workers): self._workers[index].running.set() - for index in range(self.flags.num_workers): + for _ in range(self.flags.num_workers): pid = self.work_pids[-1] self._processes.pop().join() self._workers.pop() diff --git a/tests/plugin/test_http_proxy_plugins.py b/tests/plugin/test_http_proxy_plugins.py index bdc8094372..aa1c4d1a06 100644 --- a/tests/plugin/test_http_proxy_plugins.py +++ b/tests/plugin/test_http_proxy_plugins.py @@ -72,7 +72,9 @@ def _setUp(self, request: Any, mocker: MockerFixture) -> None: @pytest.mark.asyncio # type: ignore[misc] @pytest.mark.parametrize( "_setUp", - [('test_modify_post_data_plugin')], + ( + ('test_modify_post_data_plugin'), + ), indirect=True, ) # type: ignore[misc] async def test_modify_post_data_plugin(self) -> None: @@ -120,7 +122,9 @@ async def test_modify_post_data_plugin(self) -> None: @pytest.mark.asyncio # type: ignore[misc] @pytest.mark.parametrize( "_setUp", - [('test_proposed_rest_api_plugin')], + ( + ('test_proposed_rest_api_plugin'), + ), indirect=True, ) # type: ignore[misc] async def test_proposed_rest_api_plugin(self) -> None: @@ -163,7 +167,9 @@ async def test_proposed_rest_api_plugin(self) -> None: @pytest.mark.asyncio # type: ignore[misc] @pytest.mark.parametrize( "_setUp", - [('test_redirect_to_custom_server_plugin')], + ( + ('test_redirect_to_custom_server_plugin'), + ), indirect=True, ) # type: ignore[misc] async def test_redirect_to_custom_server_plugin(self) -> None: @@ -204,7 +210,9 @@ async def test_redirect_to_custom_server_plugin(self) -> None: @pytest.mark.asyncio # type: ignore[misc] @pytest.mark.parametrize( "_setUp", - [('test_filter_by_upstream_host_plugin')], + ( + ('test_filter_by_upstream_host_plugin'), + ), indirect=True, ) # type: ignore[misc] async def test_filter_by_upstream_host_plugin(self) -> None: @@ -243,7 +251,9 @@ async def test_filter_by_upstream_host_plugin(self) -> None: @pytest.mark.asyncio # type: ignore[misc] @pytest.mark.parametrize( "_setUp", - [('test_man_in_the_middle_plugin')], + ( + ('test_man_in_the_middle_plugin'), + ), indirect=True, ) # type: ignore[misc] async def test_man_in_the_middle_plugin(self) -> None: @@ -335,7 +345,9 @@ def closed() -> bool: @pytest.mark.asyncio # type: ignore[misc] @pytest.mark.parametrize( "_setUp", - [('test_filter_by_url_regex_plugin')], + ( + ('test_filter_by_url_regex_plugin'), + ), indirect=True, ) # type: ignore[misc] async def test_filter_by_url_regex_plugin(self) -> None: diff --git a/tests/plugin/test_http_proxy_plugins_with_tls_interception.py b/tests/plugin/test_http_proxy_plugins_with_tls_interception.py index 2527ad39a0..2401a32d76 100644 --- a/tests/plugin/test_http_proxy_plugins_with_tls_interception.py +++ b/tests/plugin/test_http_proxy_plugins_with_tls_interception.py @@ -154,7 +154,9 @@ def send(raw: bytes) -> int: @pytest.mark.asyncio # type: ignore[misc] @pytest.mark.parametrize( '_setUp', - [('test_modify_post_data_plugin')], + ( + ('test_modify_post_data_plugin'), + ), indirect=True, ) # type: ignore[misc] async def test_modify_post_data_plugin(self) -> None: @@ -207,7 +209,9 @@ async def test_modify_post_data_plugin(self) -> None: @pytest.mark.asyncio # type: ignore[misc] @pytest.mark.parametrize( '_setUp', - [('test_man_in_the_middle_plugin')], + ( + ('test_man_in_the_middle_plugin'), + ), indirect=True, ) # type: ignore[misc] async def test_man_in_the_middle_plugin(self) -> None: From d96d5c269afcd1511cf4b6a6098d702d0684853d Mon Sep 17 00:00:00 2001 From: Abhinav Singh Date: Mon, 22 Nov 2021 23:33:02 +0530 Subject: [PATCH 44/54] Update tests to use `fileno` --- tests/http/test_http_proxy.py | 2 +- .../http/test_http_proxy_tls_interception.py | 6 ++-- tests/http/test_protocol_handler.py | 32 +++++++++---------- tests/http/test_web_server.py | 24 +++++++------- tests/plugin/test_http_proxy_plugins.py | 32 +++++++++---------- ...ttp_proxy_plugins_with_tls_interception.py | 16 +++++----- 6 files changed, 56 insertions(+), 56 deletions(-) diff --git a/tests/http/test_http_proxy.py b/tests/http/test_http_proxy.py index e60d3086c7..b1bc964059 100644 --- a/tests/http/test_http_proxy.py +++ b/tests/http/test_http_proxy.py @@ -67,7 +67,7 @@ async def test_proxy_plugin_on_and_before_upstream_connection(self) -> None: self.mock_selector.return_value.select.side_effect = [ [( selectors.SelectorKey( - fileobj=self._conn.file(), + fileobj=self._conn.fileno(), fd=self._conn.fileno(), events=selectors.EVENT_READ, data=None, diff --git a/tests/http/test_http_proxy_tls_interception.py b/tests/http/test_http_proxy_tls_interception.py index 16ad094466..1fcfa71720 100644 --- a/tests/http/test_http_proxy_tls_interception.py +++ b/tests/http/test_http_proxy_tls_interception.py @@ -131,8 +131,8 @@ async def asyncReturnBool(val: bool) -> bool: self.mock_selector.return_value.select.side_effect = [ [( selectors.SelectorKey( - fileobj=self._conn, - fd=self._conn.fileno, + fileobj=self._conn.fileno(), + fd=self._conn.fileno(), events=selectors.EVENT_READ, data=None, ), @@ -150,7 +150,7 @@ async def asyncReturnBool(val: bool) -> bool: ) self.plugin.return_value.on_request_complete.assert_called() self.plugin.return_value.read_from_descriptors.assert_called_with([ - self._conn, + self._conn.fileno(), ]) self.proxy_plugin.return_value.before_upstream_connection.assert_called() self.proxy_plugin.return_value.handle_client_request.assert_called() diff --git a/tests/http/test_protocol_handler.py b/tests/http/test_protocol_handler.py index c043734ffc..ca6cce944e 100644 --- a/tests/http/test_protocol_handler.py +++ b/tests/http/test_protocol_handler.py @@ -35,8 +35,8 @@ def mock_selector_for_client_read(self: Any) -> None: self.mock_selector.return_value.select.return_value = [ ( selectors.SelectorKey( - fileobj=self._conn, - fd=self._conn.fileno, + fileobj=self._conn.fileno(), + fd=self._conn.fileno(), events=selectors.EVENT_READ, data=None, ), @@ -217,8 +217,8 @@ def has_buffer() -> bool: [ ( selectors.SelectorKey( - fileobj=self._conn, - fd=self._conn.fileno, + fileobj=self._conn.fileno(), + fd=self._conn.fileno(), events=selectors.EVENT_READ, data=None, ), @@ -228,8 +228,8 @@ def has_buffer() -> bool: [ ( selectors.SelectorKey( - fileobj=self._conn, - fd=self._conn.fileno, + fileobj=self._conn.fileno(), + fd=self._conn.fileno(), events=0, data=None, ), @@ -239,8 +239,8 @@ def has_buffer() -> bool: [ ( selectors.SelectorKey( - fileobj=self._conn, - fd=self._conn.fileno, + fileobj=self._conn.fileno(), + fd=self._conn.fileno(), events=selectors.EVENT_READ, data=None, ), @@ -250,8 +250,8 @@ def has_buffer() -> bool: [ ( selectors.SelectorKey( - fileobj=server.connection, - fd=server.connection.fileno, + fileobj=server.connection.fileno(), + fd=server.connection.fileno(), events=0, data=None, ), @@ -372,8 +372,8 @@ def mock_selector_for_client_read_and_server_write( [ ( selectors.SelectorKey( - fileobj=self._conn, - fd=self._conn.fileno, + fileobj=self._conn.fileno(), + fd=self._conn.fileno(), events=selectors.EVENT_READ, data=None, ), @@ -383,8 +383,8 @@ def mock_selector_for_client_read_and_server_write( [ ( selectors.SelectorKey( - fileobj=self._conn, - fd=self._conn.fileno, + fileobj=self._conn.fileno(), + fd=self._conn.fileno(), events=0, data=None, ), @@ -394,8 +394,8 @@ def mock_selector_for_client_read_and_server_write( [ ( selectors.SelectorKey( - fileobj=server.connection, - fd=server.connection.fileno, + fileobj=server.connection.fileno(), + fd=server.connection.fileno(), events=0, data=None, ), diff --git a/tests/http/test_web_server.py b/tests/http/test_web_server.py index 46818c5870..54a1d49060 100644 --- a/tests/http/test_web_server.py +++ b/tests/http/test_web_server.py @@ -63,8 +63,8 @@ def mock_selector_for_client_read(self: Any) -> None: self.mock_selector.return_value.select.return_value = [ ( selectors.SelectorKey( - fileobj=self._conn, - fd=self._conn.fileno, + fileobj=self._conn.fileno(), + fd=self._conn.fileno(), events=selectors.EVENT_READ, data=None, ), @@ -231,8 +231,8 @@ async def test_static_web_server_serves(self) -> None: self.mock_selector.return_value.select.side_effect = [ [( selectors.SelectorKey( - fileobj=self._conn, - fd=self._conn.fileno, + fileobj=self._conn.fileno(), + fd=self._conn.fileno(), events=selectors.EVENT_READ, data=None, ), @@ -240,8 +240,8 @@ async def test_static_web_server_serves(self) -> None: )], [( selectors.SelectorKey( - fileobj=self._conn, - fd=self._conn.fileno, + fileobj=self._conn.fileno(), + fd=self._conn.fileno(), events=selectors.EVENT_WRITE, data=None, ), @@ -281,8 +281,8 @@ async def test_static_web_server_serves_404(self) -> None: self.mock_selector.return_value.select.side_effect = [ [( selectors.SelectorKey( - fileobj=self._conn, - fd=self._conn.fileno, + fileobj=self._conn.fileno(), + fd=self._conn.fileno(), events=selectors.EVENT_READ, data=None, ), @@ -290,8 +290,8 @@ async def test_static_web_server_serves_404(self) -> None: )], [( selectors.SelectorKey( - fileobj=self._conn, - fd=self._conn.fileno, + fileobj=self._conn.fileno(), + fd=self._conn.fileno(), events=selectors.EVENT_WRITE, data=None, ), @@ -336,8 +336,8 @@ async def test_default_web_server_returns_404(self) -> None: self.mock_selector.return_value.select.return_value = [ ( selectors.SelectorKey( - fileobj=self._conn, - fd=self._conn.fileno, + fileobj=self._conn.fileno(), + fd=self._conn.fileno(), events=selectors.EVENT_READ, data=None, ), diff --git a/tests/plugin/test_http_proxy_plugins.py b/tests/plugin/test_http_proxy_plugins.py index aa1c4d1a06..7102c3ef7a 100644 --- a/tests/plugin/test_http_proxy_plugins.py +++ b/tests/plugin/test_http_proxy_plugins.py @@ -93,8 +93,8 @@ async def test_modify_post_data_plugin(self) -> None: self.mock_selector.return_value.select.side_effect = [ [( selectors.SelectorKey( - fileobj=self._conn, - fd=self._conn.fileno, + fileobj=self._conn.fileno(), + fd=self._conn.fileno(), events=selectors.EVENT_READ, data=None, ), @@ -140,8 +140,8 @@ async def test_proposed_rest_api_plugin(self) -> None: self.mock_selector.return_value.select.side_effect = [ [( selectors.SelectorKey( - fileobj=self._conn, - fd=self._conn.fileno, + fileobj=self._conn.fileno(), + fd=self._conn.fileno(), events=selectors.EVENT_READ, data=None, ), @@ -183,8 +183,8 @@ async def test_redirect_to_custom_server_plugin(self) -> None: self.mock_selector.return_value.select.side_effect = [ [( selectors.SelectorKey( - fileobj=self._conn, - fd=self._conn.fileno, + fileobj=self._conn.fileno(), + fd=self._conn.fileno(), events=selectors.EVENT_READ, data=None, ), @@ -226,8 +226,8 @@ async def test_filter_by_upstream_host_plugin(self) -> None: self.mock_selector.return_value.select.side_effect = [ [( selectors.SelectorKey( - fileobj=self._conn, - fd=self._conn.fileno, + fileobj=self._conn.fileno(), + fd=self._conn.fileno(), events=selectors.EVENT_READ, data=None, ), @@ -280,8 +280,8 @@ def closed() -> bool: self.mock_selector.return_value.select.side_effect = [ [( selectors.SelectorKey( - fileobj=self._conn, - fd=self._conn.fileno, + fileobj=self._conn.fileno(), + fd=self._conn.fileno(), events=selectors.EVENT_READ, data=None, ), @@ -289,8 +289,8 @@ def closed() -> bool: )], [( selectors.SelectorKey( - fileobj=server.connection, - fd=server.connection.fileno, + fileobj=server.connection.fileno(), + fd=server.connection.fileno(), events=selectors.EVENT_WRITE, data=None, ), @@ -298,8 +298,8 @@ def closed() -> bool: )], [( selectors.SelectorKey( - fileobj=server.connection, - fd=server.connection.fileno, + fileobj=server.connection.fileno(), + fd=server.connection.fileno(), events=selectors.EVENT_READ, data=None, ), @@ -361,8 +361,8 @@ async def test_filter_by_url_regex_plugin(self) -> None: self.mock_selector.return_value.select.side_effect = [ [( selectors.SelectorKey( - fileobj=self._conn, - fd=self._conn.fileno, + fileobj=self._conn.fileno(), + fd=self._conn.fileno(), events=selectors.EVENT_READ, data=None, ), diff --git a/tests/plugin/test_http_proxy_plugins_with_tls_interception.py b/tests/plugin/test_http_proxy_plugins_with_tls_interception.py index 2401a32d76..232b0dd954 100644 --- a/tests/plugin/test_http_proxy_plugins_with_tls_interception.py +++ b/tests/plugin/test_http_proxy_plugins_with_tls_interception.py @@ -106,8 +106,8 @@ def mock_connection() -> Any: self.mock_selector.return_value.select.side_effect = [ [( selectors.SelectorKey( - fileobj=self._conn, - fd=self._conn.fileno, + fileobj=self._conn.fileno(), + fd=self._conn.fileno(), events=selectors.EVENT_READ, data=None, ), @@ -115,8 +115,8 @@ def mock_connection() -> Any: )], [( selectors.SelectorKey( - fileobj=self.client_ssl_connection, - fd=self.client_ssl_connection.fileno, + fileobj=self.client_ssl_connection.fileno(), + fd=self.client_ssl_connection.fileno(), events=selectors.EVENT_READ, data=None, ), @@ -124,8 +124,8 @@ def mock_connection() -> Any: )], [( selectors.SelectorKey( - fileobj=self.server_ssl_connection, - fd=self.server_ssl_connection.fileno, + fileobj=self.server_ssl_connection.fileno(), + fd=self.server_ssl_connection.fileno(), events=selectors.EVENT_WRITE, data=None, ), @@ -133,8 +133,8 @@ def mock_connection() -> Any: )], [( selectors.SelectorKey( - fileobj=self.server_ssl_connection, - fd=self.server_ssl_connection.fileno, + fileobj=self.server_ssl_connection.fileno(), + fd=self.server_ssl_connection.fileno(), events=selectors.EVENT_READ, data=None, ), From 2805841119e22529313d6d9c17fb2644244e2a8c Mon Sep 17 00:00:00 2001 From: Abhinav Singh Date: Mon, 22 Nov 2021 23:52:18 +0530 Subject: [PATCH 45/54] Fix doc build --- docs/conf.py | 14 +++++++++++--- proxy/core/acceptor/acceptor.py | 2 +- proxy/core/acceptor/threadless.py | 4 ++-- 3 files changed, 14 insertions(+), 6 deletions(-) diff --git a/docs/conf.py b/docs/conf.py index 2db59aa9b3..4896ca064f 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -241,6 +241,7 @@ nitpicky = True _any_role = 'any' +_py_obj_role = 'py:obj' _py_class_role = 'py:class' nitpick_ignore = [ (_any_role, ''), @@ -256,8 +257,10 @@ (_any_role, 'HttpParser.state'), (_any_role, 'HttpProtocolHandler'), (_any_role, 'multiprocessing.Manager'), - (_any_role, 'work_klass'), (_any_role, 'proxy.core.base.tcp_upstream.TcpUpstreamConnectionHandler'), + (_any_role, 'work_klass'), + (_py_class_role, '_asyncio.Task'), + (_py_class_role, 'asyncio.events.AbstractEventLoop'), (_py_class_role, 'CacheStore'), (_py_class_role, 'HttpParser'), (_py_class_role, 'HttpProtocolHandlerPlugin'), @@ -268,12 +271,17 @@ (_py_class_role, 'paramiko.channel.Channel'), (_py_class_role, 'proxy.http.parser.parser.T'), (_py_class_role, 'proxy.plugin.cache.store.base.CacheStore'), + (_py_class_role, 'proxy.core.pool.AcceptorPool'), + (_py_class_role, 'proxy.core.executors.ThreadlessPool'), + (_py_class_role, 'proxy.core.acceptor.threadless.T'), + (_py_class_role, 'queue.Queue[Any]'), (_py_class_role, 'TcpClientConnection'), (_py_class_role, 'TcpServerConnection'), (_py_class_role, 'unittest.case.TestCase'), (_py_class_role, 'unittest.result.TestResult'), (_py_class_role, 'UUID'), - (_py_class_role, 'WebsocketFrame'), (_py_class_role, 'Url'), - (_py_class_role, '_asyncio.Task'), + (_py_class_role, 'WebsocketFrame'), + (_py_class_role, 'Work'), + (_py_obj_role, 'proxy.core.acceptor.threadless.T'), ] diff --git a/proxy/core/acceptor/acceptor.py b/proxy/core/acceptor/acceptor.py index a89e282f5b..481b27e615 100644 --- a/proxy/core/acceptor/acceptor.py +++ b/proxy/core/acceptor/acceptor.py @@ -26,7 +26,7 @@ from multiprocessing.reduction import recv_handle from typing import List, Optional, Tuple -from typing import Any +from typing import Any # noqa: W0611 pylint: disable=unused-import from ...common.flag import flags from ...common.utils import is_threadless diff --git a/proxy/core/acceptor/threadless.py b/proxy/core/acceptor/threadless.py index e7a3388932..2998bbabc8 100644 --- a/proxy/core/acceptor/threadless.py +++ b/proxy/core/acceptor/threadless.py @@ -21,7 +21,7 @@ import selectors import multiprocessing -from abc import abstractmethod, ABC, abstractproperty +from abc import abstractmethod, ABC from typing import Dict, Optional, Tuple, List, Set, Generic, TypeVar, Union from ...common.logger import Logger @@ -87,8 +87,8 @@ def __init__( ] = {} self.wait_timeout: float = DEFAULT_SELECTOR_SELECT_TIMEOUT - @abstractproperty @property + @abstractmethod def loop(self) -> Optional[asyncio.AbstractEventLoop]: raise NotImplementedError() From 7d5e955fbab754d51361ecc78d36d53804b44a6f Mon Sep 17 00:00:00 2001 From: Abhinav Singh Date: Tue, 23 Nov 2021 00:11:13 +0530 Subject: [PATCH 46/54] Fix doc spell, use tear down and not teardown --- proxy/core/acceptor/local.py | 1 + proxy/core/acceptor/remote.py | 4 ++++ proxy/core/acceptor/threadless.py | 4 ++-- proxy/core/base/tcp_upstream.py | 2 +- proxy/http/handler.py | 4 ++-- proxy/http/proxy/server.py | 2 +- proxy/http/server/web.py | 4 ++-- 7 files changed, 13 insertions(+), 8 deletions(-) diff --git a/proxy/core/acceptor/local.py b/proxy/core/acceptor/local.py index 27f4457b62..ab1b70ebba 100644 --- a/proxy/core/acceptor/local.py +++ b/proxy/core/acceptor/local.py @@ -11,6 +11,7 @@ .. spelling:: acceptor + teardown """ import queue import logging diff --git a/proxy/core/acceptor/remote.py b/proxy/core/acceptor/remote.py index 747304d56a..f641086691 100644 --- a/proxy/core/acceptor/remote.py +++ b/proxy/core/acceptor/remote.py @@ -7,6 +7,10 @@ :copyright: (c) 2013-present by Abhinav Singh and contributors. :license: BSD, see LICENSE for more details. + + .. spelling:: + + acceptor """ import asyncio import logging diff --git a/proxy/core/acceptor/threadless.py b/proxy/core/acceptor/threadless.py index 2998bbabc8..7d5f657127 100644 --- a/proxy/core/acceptor/threadless.py +++ b/proxy/core/acceptor/threadless.py @@ -98,7 +98,7 @@ def receive_from_work_queue(self) -> bool: Receive it and call ``work_on_tcp_conn``. - Return True to teardown the loop.""" + Return True to tear down the loop.""" raise NotImplementedError() @abstractmethod @@ -110,7 +110,7 @@ def work_queue_fileno(self) -> Optional[int]: def close_work_queue(self) -> None: """Only called if ``work_queue_fileno`` returns an integer. - If an fd is selectable for work queue, make sure + If an fd is select-able for work queue, make sure to close the work queue fd now.""" pass # pragma: no cover diff --git a/proxy/core/base/tcp_upstream.py b/proxy/core/base/tcp_upstream.py index d0ac63567a..1de1e4a910 100644 --- a/proxy/core/base/tcp_upstream.py +++ b/proxy/core/base/tcp_upstream.py @@ -83,7 +83,7 @@ def read_from_descriptors(self, r: Readables) -> bool: self.total_size += len(raw) self.handle_upstream_data(raw) else: - # Teardown because upstream proxy closed the connection + # Tear down because upstream proxy closed the connection return True except ssl.SSLWantReadError: logger.info('Upstream SSLWantReadError, will retry') diff --git a/proxy/http/handler.py b/proxy/http/handler.py index 25b9cd649e..81fcd079fb 100644 --- a/proxy/http/handler.py +++ b/proxy/http/handler.py @@ -196,7 +196,7 @@ def handle_data(self, data: memoryview) -> Optional[bool]: try: # HttpProtocolHandlerPlugin.on_client_data - # Can raise HttpProtocolException to teardown the connection + # Can raise HttpProtocolException to tear down the connection for plugin in self.plugins.values(): optional_data = plugin.on_client_data(data) if optional_data is None: @@ -370,7 +370,7 @@ def run(self) -> None: try: self.initialize() while True: - # Teardown if client buffer is empty and connection is inactive + # Tear down if client buffer is empty and connection is inactive if self.is_inactive(): logger.debug( 'Client buffer is empty and maximum inactivity has reached ' diff --git a/proxy/http/proxy/server.py b/proxy/http/proxy/server.py index 8998780206..b8181d663e 100644 --- a/proxy/http/proxy/server.py +++ b/proxy/http/proxy/server.py @@ -440,7 +440,7 @@ def on_response_chunk(self, chunk: List[memoryview]) -> List[memoryview]: # self.access_log() return chunk - # Can return None to teardown connection + # Can return None to tear down connection def on_client_data(self, raw: memoryview) -> Optional[memoryview]: if not self.request.has_host(): return raw diff --git a/proxy/http/server/web.py b/proxy/http/server/web.py index 263707bd69..129c088ffb 100644 --- a/proxy/http/server/web.py +++ b/proxy/http/server/web.py @@ -264,7 +264,7 @@ def on_client_data(self, raw: memoryview) -> Optional[memoryview]: remaining = raw.tobytes() frame = WebsocketFrame() while remaining != b'': - # TODO: Teardown if invalid protocol exception + # TODO: Tear down if invalid protocol exception remaining = frame.parse(remaining) if frame.opcode == websocketOpcodes.CONNECTION_CLOSE: logger.warning( @@ -292,7 +292,7 @@ def on_client_data(self, raw: memoryview) -> Optional[memoryview]: self.route.handle_request(self.pipeline_request) if not self.pipeline_request.is_http_1_1_keep_alive(): logger.error( - 'Pipelined request is not keep-alive, will teardown request...', + 'Pipelined request is not keep-alive, will tear down request...', ) raise HttpProtocolException() self.pipeline_request = None From f36ab2d039b6b021d01d412e7290b6337fee3f38 Mon Sep 17 00:00:00 2001 From: Abhinav Singh Date: Tue, 23 Nov 2021 02:45:48 +0530 Subject: [PATCH 47/54] Doc updates --- proxy/core/acceptor/acceptor.py | 31 +++++++------------------------ proxy/core/acceptor/executors.py | 6 ------ proxy/core/acceptor/local.py | 2 +- proxy/core/acceptor/remote.py | 9 +++++++-- proxy/core/acceptor/threadless.py | 4 ++-- proxy/proxy.py | 3 +++ 6 files changed, 20 insertions(+), 35 deletions(-) diff --git a/proxy/core/acceptor/acceptor.py b/proxy/core/acceptor/acceptor.py index 481b27e615..3df89e8809 100644 --- a/proxy/core/acceptor/acceptor.py +++ b/proxy/core/acceptor/acceptor.py @@ -57,34 +57,17 @@ class Acceptor(multiprocessing.Process): """Work acceptor process. On start-up, `Acceptor` accepts a file descriptor which will be used to - accept new work. File descriptor is accepted over a `fd_queue` which is - closed immediately after receiving the descriptor. + accept new work. File descriptor is accepted over a `fd_queue`. `Acceptor` goes on to listen for new work over the received server socket. By default, `Acceptor` will spawn a new thread to handle each work. - However, when `--threadless` option is enabled, `Acceptor` process - will also pre-spawns a - :class:`~proxy.core.acceptor.threadless.Threadless` process during - start-up. Accepted work is passed to these - :class:`~proxy.core.acceptor.threadless.Threadless` processes. - `Acceptor` process shares accepted work with a - :class:`~proxy.core.acceptor.threadless.Threadless` process over - it's dedicated pipe. - - TODO(abhinavsingh): Open questions:: - - 1. Instead of starting - :class:`~proxy.core.acceptor.threadless.Threadless` process, - can we work with a - :class:`~proxy.core.acceptor.threadless.Threadless` thread? - 2. What are the performance implications of sharing fds between - threads vs processes? - 3. How much performance degradation happens when acceptor and - threadless processes are running on separate CPU cores? - 4. Can we ensure both acceptor and threadless process are pinned to - the same CPU core? - + However, when `--threadless` option is enabled without `--local-executor`, + `Acceptor` process will also pre-spawns a + :class:`~proxy.core.acceptor.threadless.Threadless` process during start-up. + Accepted work is delegated to these :class:`~proxy.core.acceptor.threadless.Threadless` + processes. `Acceptor` process shares accepted work with a + :class:`~proxy.core.acceptor.threadless.Threadless` process over it's dedicated pipe. """ def __init__( diff --git a/proxy/core/acceptor/executors.py b/proxy/core/acceptor/executors.py index c89c4dc0fd..065e78bded 100644 --- a/proxy/core/acceptor/executors.py +++ b/proxy/core/acceptor/executors.py @@ -76,12 +76,6 @@ class ThreadlessPool: If necessary, start multiple threadless pool with different work classes. - - TODO: We could optimize multiple-work-type scenario - by making Threadless class constructor independent of ``work_klass``. - We could then relay the ``work_klass`` during work delegation. - This will also make ThreadlessPool constructor agnostic - of ``work_klass``. """ def __init__( diff --git a/proxy/core/acceptor/local.py b/proxy/core/acceptor/local.py index ab1b70ebba..632497b541 100644 --- a/proxy/core/acceptor/local.py +++ b/proxy/core/acceptor/local.py @@ -27,7 +27,7 @@ class LocalExecutor(Threadless['queue.Queue[Any]']): - """Listens for READ_EVENT over a queue, accepts and initializes work.""" + """A threadless executor implementation which uses a queue to receive new work.""" def __init__(self, *args: Any, **kwargs: Any) -> None: super().__init__(*args, **kwargs) diff --git a/proxy/core/acceptor/remote.py b/proxy/core/acceptor/remote.py index f641086691..76f8877d21 100644 --- a/proxy/core/acceptor/remote.py +++ b/proxy/core/acceptor/remote.py @@ -26,8 +26,13 @@ class RemoteExecutor(Threadless[connection.Connection]): - """RemoteExecutor receives work over a Connection object. - RemoteExecutor uses ``recv_handle`` to accept incoming work. + """A threadless executor implementation which receives work over a connection. + + NOTE: RemoteExecutor uses ``recv_handle`` to accept file descriptors. + + TODO: Refactor and abstract ``recv_handle`` part so that a threaded + remote executor can also accept work over a connection. Currently, + remote executors must be running in a process. """ def __init__(self, *args: Any, **kwargs: Any) -> None: diff --git a/proxy/core/acceptor/threadless.py b/proxy/core/acceptor/threadless.py index 7d5f657127..9fddd09a9f 100644 --- a/proxy/core/acceptor/threadless.py +++ b/proxy/core/acceptor/threadless.py @@ -39,9 +39,9 @@ class Threadless(ABC, Generic[T]): - """Work executor process. + """Work executor base class. - Threadless process provides an event loop, which is shared across + Threadless provides an event loop, which is shared across multiple :class:`~proxy.core.acceptor.work.Work` instances to handle work. diff --git a/proxy/proxy.py b/proxy/proxy.py index 1af5e2d762..0f2346f935 100644 --- a/proxy/proxy.py +++ b/proxy/proxy.py @@ -167,6 +167,7 @@ def setup(self) -> None: # we are listening upon. This is necessary to preserve # the server port when `--port=0` is used. self.flags.port = self.listener._port + # Setup EventManager if self.flags.enable_events: logger.info('Core Event enabled') self.event_manager = EventManager() @@ -174,12 +175,14 @@ def setup(self) -> None: event_queue = self.event_manager.queue \ if self.event_manager is not None \ else None + # Setup remote executors if not self.flags.local_executor: self.executors = ThreadlessPool( flags=self.flags, event_queue=event_queue, ) self.executors.setup() + # Setup acceptors self.acceptors = AcceptorPool( flags=self.flags, listener=self.listener, From f78fe5e650902102d5eafaef702711de2e8f4a25 Mon Sep 17 00:00:00 2001 From: Abhinav Singh Date: Tue, 23 Nov 2021 03:03:44 +0530 Subject: [PATCH 48/54] Add back support for `Python 3.6` --- .github/workflows/test-library.yml | 1 + README.md | 6 +++--- proxy/core/acceptor/acceptor.py | 7 +++++++ proxy/core/acceptor/threadless.py | 3 +-- proxy/http/handler.py | 5 +++-- proxy/http/websocket/client.py | 3 +-- setup.cfg | 3 ++- 7 files changed, 18 insertions(+), 10 deletions(-) diff --git a/.github/workflows/test-library.yml b/.github/workflows/test-library.yml index a1c7f4c114..181a4adcb9 100644 --- a/.github/workflows/test-library.yml +++ b/.github/workflows/test-library.yml @@ -429,6 +429,7 @@ jobs: # NOTE: to improve the responsiveness. It's nice to see the most # NOTE: important results first. - '3.10' + - 3.6 - 3.9 - 3.8 - 3.7 diff --git a/README.md b/README.md index 00182ac16e..abcb665e31 100644 --- a/README.md +++ b/README.md @@ -13,7 +13,7 @@ [![iOS, iOS Simulator](https://img.shields.io/static/v1?label=tested%20with&message=iOS%20%F0%9F%93%B1%20%7C%20iOS%20Simulator%20%F0%9F%93%B1&color=darkgreen&style=for-the-badge)](https://abhinavsingh.com/proxy-py-a-lightweight-single-file-http-proxy-server-in-python/) [![pypi version](https://img.shields.io/pypi/v/proxy.py)](https://pypi.org/project/proxy.py/) -[![Python 3.x](https://img.shields.io/static/v1?label=Python&message=3.7%20%7C%203.8%20%7C%203.9%20%7C%203.10&color=blue)](https://www.python.org/) +[![Python 3.x](https://img.shields.io/static/v1?label=Python&message=3.6%20%7C%203.7%20%7C%203.8%20%7C%203.9%20%7C%203.10&color=blue)](https://www.python.org/) [![Checked with mypy](https://img.shields.io/static/v1?label=MyPy&message=checked&color=blue)](http://mypy-lang.org/) [![lib](https://github.com/abhinavsingh/proxy.py/actions/workflows/test-library.yml/badge.svg)](https://github.com/abhinavsingh/proxy.py/actions/workflows/test-library.yml) [![codecov](https://codecov.io/gh/abhinavsingh/proxy.py/branch/develop/graph/badge.svg?token=Zh9J7b4la2)](https://codecov.io/gh/abhinavsingh/proxy.py) @@ -1709,7 +1709,7 @@ If threadless works for you, consider sending a PR by editing `_env_threadless_c ``` Hence a Python version that understands typing annotations is required. -Make sure you are using `Python 3.7+`. +Make sure you are using `Python 3.6+`. Verify the version before running `proxy.py`: @@ -1722,7 +1722,7 @@ All `typing` annotations can be replaced with `comment-only` annotations. Exampl >>> ################^^^^^^^^^^^ ``` -It will enable `proxy.py` to run on Python `pre-3.7`, even on `2.7`. +It will enable `proxy.py` to run on Python `pre-3.6`, even on `2.7`. However, as all future versions of Python will support `typing` annotations, this has not been considered. diff --git a/proxy/core/acceptor/acceptor.py b/proxy/core/acceptor/acceptor.py index 3df89e8809..861ab3d9a1 100644 --- a/proxy/core/acceptor/acceptor.py +++ b/proxy/core/acceptor/acceptor.py @@ -15,6 +15,7 @@ """ import queue import socket +import asyncio import logging import argparse import selectors @@ -106,6 +107,10 @@ def __init__( self._local_work_queue: Optional['queue.Queue[Any]'] = None self._local: Optional[LocalExecutor] = None self._lthread: Optional[threading.Thread] = None + self._loop: Optional[asyncio.AbstractEventLoop] = None + if not self.flags.local_executor and \ + not is_threadless(self.flags.threadless, self.flags.threaded): + self._loop = asyncio.get_event_loop_policy().get_event_loop() def accept(self, events: List[Tuple[selectors.SelectorKey, int]]) -> None: for _, mask in events: @@ -167,6 +172,8 @@ def run(self) -> None: if self.flags.local_executor: self._stop_local() self.sock.close() + if self._loop: + self._loop.close() logger.debug('Acceptor#%d shutdown', self.idd) def _start_local(self) -> None: diff --git a/proxy/core/acceptor/threadless.py b/proxy/core/acceptor/threadless.py index 9fddd09a9f..fd7ee98add 100644 --- a/proxy/core/acceptor/threadless.py +++ b/proxy/core/acceptor/threadless.py @@ -326,8 +326,7 @@ def run(self) -> None: assert self.loop while not self.running.is_set(): # logger.debug('Working on {0} works'.format(len(self.works))) - teardown = self.loop.run_until_complete(self._run_once()) - if teardown: + if self.loop.run_until_complete(self._run_once()): break except KeyboardInterrupt: pass diff --git a/proxy/http/handler.py b/proxy/http/handler.py index 81fcd079fb..b1d808f7b9 100644 --- a/proxy/http/handler.py +++ b/proxy/http/handler.py @@ -367,6 +367,7 @@ def run(self) -> None: This is here just to maintain backward compatibility with threaded mode. """ + loop = asyncio.get_event_loop_policy().get_event_loop() try: self.initialize() while True: @@ -377,8 +378,7 @@ def run(self) -> None: 'between client and server connection, tearing down...', ) break - teardown = asyncio.run(self._run_once()) - if teardown: + if loop.run_until_complete(self._run_once()): break except KeyboardInterrupt: # pragma: no cover pass @@ -391,6 +391,7 @@ def run(self) -> None: ) finally: self.shutdown() + loop.close() async def _run_once(self) -> bool: events, readables, writables = await self._selected_events() diff --git a/proxy/http/websocket/client.py b/proxy/http/websocket/client.py index da29a4a954..f4e25573bb 100644 --- a/proxy/http/websocket/client.py +++ b/proxy/http/websocket/client.py @@ -113,8 +113,7 @@ def run_once(self) -> bool: def run(self) -> None: try: while not self.closed: - teardown = self.run_once() - if teardown: + if self.run_once(): break except KeyboardInterrupt: pass diff --git a/setup.cfg b/setup.cfg index 702f3a3896..db77dbb005 100644 --- a/setup.cfg +++ b/setup.cfg @@ -49,6 +49,7 @@ classifiers = Programming Language :: Python :: Implementation Programming Language :: Python :: 3 :: Only Programming Language :: Python :: 3 + Programming Language :: Python :: 3.6 Programming Language :: Python :: 3.7 Programming Language :: Python :: 3.8 Programming Language :: Python :: 3.9 @@ -87,7 +88,7 @@ keywords = Python3 [options] -python_requires = >= 3.7 +python_requires = >= 3.6 packages = find: include_package_data = True zip_safe = False From 265380d2e687a6aa73ad19132d8c0fa7bca81de0 Mon Sep 17 00:00:00 2001 From: Abhinav Singh Date: Tue, 23 Nov 2021 03:07:02 +0530 Subject: [PATCH 49/54] Acceptors dont need loop initialization --- proxy/core/acceptor/acceptor.py | 7 ------- 1 file changed, 7 deletions(-) diff --git a/proxy/core/acceptor/acceptor.py b/proxy/core/acceptor/acceptor.py index 861ab3d9a1..3df89e8809 100644 --- a/proxy/core/acceptor/acceptor.py +++ b/proxy/core/acceptor/acceptor.py @@ -15,7 +15,6 @@ """ import queue import socket -import asyncio import logging import argparse import selectors @@ -107,10 +106,6 @@ def __init__( self._local_work_queue: Optional['queue.Queue[Any]'] = None self._local: Optional[LocalExecutor] = None self._lthread: Optional[threading.Thread] = None - self._loop: Optional[asyncio.AbstractEventLoop] = None - if not self.flags.local_executor and \ - not is_threadless(self.flags.threadless, self.flags.threaded): - self._loop = asyncio.get_event_loop_policy().get_event_loop() def accept(self, events: List[Tuple[selectors.SelectorKey, int]]) -> None: for _, mask in events: @@ -172,8 +167,6 @@ def run(self) -> None: if self.flags.local_executor: self._stop_local() self.sock.close() - if self._loop: - self._loop.close() logger.debug('Acceptor#%d shutdown', self.idd) def _start_local(self) -> None: From 5684b8c476983c22270e88ded0b326e8ab049727 Mon Sep 17 00:00:00 2001 From: Abhinav Singh Date: Tue, 23 Nov 2021 03:27:27 +0530 Subject: [PATCH 50/54] On Python 3.6 `asyncio.new_event_loop()` is necessary --- proxy/http/handler.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/proxy/http/handler.py b/proxy/http/handler.py index b1d808f7b9..6af97db026 100644 --- a/proxy/http/handler.py +++ b/proxy/http/handler.py @@ -12,6 +12,7 @@ http """ +import sys import ssl import time import errno @@ -367,7 +368,11 @@ def run(self) -> None: This is here just to maintain backward compatibility with threaded mode. """ - loop = asyncio.get_event_loop_policy().get_event_loop() + if sys.version_info.major == 3 and \ + sys.version_info.minor == 6: + loop = asyncio.new_event_loop() + else: + loop = asyncio.get_event_loop_policy().get_event_loop() try: self.initialize() while True: From 8686093fd63f80e8ca2cbd03f5510555e18adee3 Mon Sep 17 00:00:00 2001 From: Abhinav Singh Date: Tue, 23 Nov 2021 03:29:33 +0530 Subject: [PATCH 51/54] Make doc happy --- docs/conf.py | 1 - proxy/core/acceptor/acceptor.py | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/docs/conf.py b/docs/conf.py index 4896ca064f..68fb0974c3 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -246,7 +246,6 @@ nitpick_ignore = [ (_any_role, ''), (_any_role, '__init__'), - (_any_role, '--threadless'), (_any_role, 'Client'), (_any_role, 'event_queue'), (_any_role, 'fd_queue'), diff --git a/proxy/core/acceptor/acceptor.py b/proxy/core/acceptor/acceptor.py index 3df89e8809..3fa011d6ff 100644 --- a/proxy/core/acceptor/acceptor.py +++ b/proxy/core/acceptor/acceptor.py @@ -62,7 +62,7 @@ class Acceptor(multiprocessing.Process): `Acceptor` goes on to listen for new work over the received server socket. By default, `Acceptor` will spawn a new thread to handle each work. - However, when `--threadless` option is enabled without `--local-executor`, + However, when ``--threadless`` option is enabled without ``--local-executor``, `Acceptor` process will also pre-spawns a :class:`~proxy.core.acceptor.threadless.Threadless` process during start-up. Accepted work is delegated to these :class:`~proxy.core.acceptor.threadless.Threadless` From d84d3b44876efcfd3843b5f20ea5f4ac1c241028 Mon Sep 17 00:00:00 2001 From: Abhinav Singh Date: Tue, 23 Nov 2021 04:13:41 +0530 Subject: [PATCH 52/54] `--threaded` needs a new event loop for 3.7 too --- proxy/http/handler.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/proxy/http/handler.py b/proxy/http/handler.py index 6af97db026..020aa3901a 100644 --- a/proxy/http/handler.py +++ b/proxy/http/handler.py @@ -369,7 +369,8 @@ def run(self) -> None: This is here just to maintain backward compatibility with threaded mode. """ if sys.version_info.major == 3 and \ - sys.version_info.minor == 6: + sys.version_info.minor in (6, 7): + # NOTE: 3.7 on Windows works without new event loop too loop = asyncio.new_event_loop() else: loop = asyncio.get_event_loop_policy().get_event_loop() From 7090764863a7f562e5573c60efa122746d8d08e3 Mon Sep 17 00:00:00 2001 From: Abhinav Singh Date: Tue, 23 Nov 2021 05:05:10 +0530 Subject: [PATCH 53/54] Always use `asyncio.new_event_loop()` for threaded mode Added e2e integration tests (subprocess & curl) for all modes. --- proxy/http/handler.py | 8 +------ tests/integration/test_integration.py | 34 +++++++++++++++++---------- 2 files changed, 22 insertions(+), 20 deletions(-) diff --git a/proxy/http/handler.py b/proxy/http/handler.py index 020aa3901a..1551b298f1 100644 --- a/proxy/http/handler.py +++ b/proxy/http/handler.py @@ -12,7 +12,6 @@ http """ -import sys import ssl import time import errno @@ -368,12 +367,7 @@ def run(self) -> None: This is here just to maintain backward compatibility with threaded mode. """ - if sys.version_info.major == 3 and \ - sys.version_info.minor in (6, 7): - # NOTE: 3.7 on Windows works without new event loop too - loop = asyncio.new_event_loop() - else: - loop = asyncio.get_event_loop_policy().get_event_loop() + loop = asyncio.new_event_loop() try: self.initialize() while True: diff --git a/tests/integration/test_integration.py b/tests/integration/test_integration.py index 1756ecd3eb..f7d4ec1d9a 100644 --- a/tests/integration/test_integration.py +++ b/tests/integration/test_integration.py @@ -12,7 +12,7 @@ """ from pathlib import Path from subprocess import check_output, Popen -from typing import Generator +from typing import Generator, Any import pytest @@ -20,32 +20,33 @@ from proxy.common._compat import IS_WINDOWS # noqa: WPS436 -PROXY_PY_PORT = get_available_port() - - # FIXME: Ignore is necessary for as long as pytest hasn't figured out # FIXME: typing for their fixtures. # Refs: # * https://github.com/pytest-dev/pytest/issues/7469#issuecomment-918345196 # * https://github.com/pytest-dev/pytest/issues/3342 @pytest.fixture # type: ignore[misc] -def _proxy_py_instance() -> Generator[None, None, None]: +def _proxy_py_subprocess(request: Any) -> Generator[int, None, None]: """Instantiate proxy.py in a subprocess for testing. + NOTE: Doesn't waits for the proxy to startup. + Ensure instance check in your tests. + After the testing is over, tear it down. """ + port = get_available_port() proxy_cmd = ( 'python', '-m', 'proxy', '--hostname', '127.0.0.1', - '--port', str(PROXY_PY_PORT), + '--port', str(port), '--enable-web-server', - ) + ) + tuple(request.param.split()) proxy_proc = Popen(proxy_cmd) try: - yield + yield port finally: proxy_proc.terminate() - proxy_proc.wait(1) + proxy_proc.wait() # FIXME: Ignore is necessary for as long as pytest hasn't figured out @@ -54,15 +55,22 @@ def _proxy_py_instance() -> Generator[None, None, None]: # * https://github.com/pytest-dev/pytest/issues/7469#issuecomment-918345196 # * https://github.com/pytest-dev/pytest/issues/3342 @pytest.mark.smoke # type: ignore[misc] -@pytest.mark.usefixtures('_proxy_py_instance') # type: ignore[misc] +@pytest.mark.parametrize( + '_proxy_py_subprocess', + ( + ('--threadless'), + ('--threadless --local-executor'), + ('--threaded'), + ), + indirect=True, +) @pytest.mark.xfail( IS_WINDOWS, reason='OSError: [WinError 193] %1 is not a valid Win32 application', raises=OSError, ) # type: ignore[misc] -def test_curl() -> None: +def test_curl(_proxy_py_subprocess: int) -> None: """An acceptance test with using ``curl`` through proxy.py.""" this_test_module = Path(__file__) shell_script_test = this_test_module.with_suffix('.sh') - - check_output([str(shell_script_test), str(PROXY_PY_PORT)]) + check_output([str(shell_script_test), str(_proxy_py_subprocess)]) From bb0ae7c0507ec43bfe14c6cfd406a133869ebee1 Mon Sep 17 00:00:00 2001 From: Abhinav Singh Date: Tue, 23 Nov 2021 14:53:39 +0530 Subject: [PATCH 54/54] Lint fixes --- tests/integration/test_integration.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/tests/integration/test_integration.py b/tests/integration/test_integration.py index f7d4ec1d9a..9df24fa8d1 100644 --- a/tests/integration/test_integration.py +++ b/tests/integration/test_integration.py @@ -26,7 +26,7 @@ # * https://github.com/pytest-dev/pytest/issues/7469#issuecomment-918345196 # * https://github.com/pytest-dev/pytest/issues/3342 @pytest.fixture # type: ignore[misc] -def _proxy_py_subprocess(request: Any) -> Generator[int, None, None]: +def proxy_py_subprocess(request: Any) -> Generator[int, None, None]: """Instantiate proxy.py in a subprocess for testing. NOTE: Doesn't waits for the proxy to startup. @@ -56,21 +56,21 @@ def _proxy_py_subprocess(request: Any) -> Generator[int, None, None]: # * https://github.com/pytest-dev/pytest/issues/3342 @pytest.mark.smoke # type: ignore[misc] @pytest.mark.parametrize( - '_proxy_py_subprocess', + 'proxy_py_subprocess', ( ('--threadless'), ('--threadless --local-executor'), ('--threaded'), ), indirect=True, -) +) # type: ignore[misc] @pytest.mark.xfail( IS_WINDOWS, reason='OSError: [WinError 193] %1 is not a valid Win32 application', raises=OSError, ) # type: ignore[misc] -def test_curl(_proxy_py_subprocess: int) -> None: +def test_curl(proxy_py_subprocess: int) -> None: """An acceptance test with using ``curl`` through proxy.py.""" this_test_module = Path(__file__) shell_script_test = this_test_module.with_suffix('.sh') - check_output([str(shell_script_test), str(_proxy_py_subprocess)]) + check_output([str(shell_script_test), str(proxy_py_subprocess)])