Skip to content

Commit

Permalink
feat: Add ScrapyProcessProtocol.__repr__. Remove redundant tests in t…
Browse files Browse the repository at this point in the history
…est_endpoints.py (renamed test_server.py).
  • Loading branch information
jpmckinney committed Jul 23, 2024
1 parent 8b947b1 commit a62293c
Show file tree
Hide file tree
Showing 7 changed files with 71 additions and 80 deletions.
6 changes: 6 additions & 0 deletions scrapyd/launcher.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,12 @@ def __init__(self, project, spider, job, env, args):
self.args = args
self.deferred = defer.Deferred()

def __repr__(self):
return (
f"ScrapyProcessProtocol(pid={self.pid} project={self.project} spider={self.spider} job={self.job} "
f"start_time={self.start_time} end_time={self.end_time} env={self.env} args={self.args})"
)

def outReceived(self, data):
log.info(data.rstrip(), log_system=f"Launcher,{self.pid}/stdout")

Expand Down
2 changes: 1 addition & 1 deletion tests/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ def get_egg_data(basename):
return pkgutil.get_data("tests", f"fixtures/{basename}.egg")


def has_settings(root):
def has_settings():
return os.path.exists("scrapy.cfg")


Expand Down
7 changes: 6 additions & 1 deletion tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,9 +38,14 @@ def chdir(monkeypatch, tmp_path):
params=[
None,
(Config.SECTION, "items_dir", "items"),
]
"scrapy.cfg",
],
ids=["default", "items_dir", "settings"],
)
def app(request, chdir):
if request.param == "scrapy.cfg":
shutil.copytree(os.path.join(BASEDIR, "fixtures", "filesystem"), chdir, dirs_exist_ok=True)

config = Config()
if isinstance(request.param, tuple):
config.cp.set(*request.param)
Expand Down
41 changes: 40 additions & 1 deletion tests/test_launcher.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,16 @@
import datetime
import re

import pytest
from twisted.internet import error
from twisted.internet import defer, error
from twisted.logger import LogLevel, capturedLogs, eventAsText
from twisted.python import failure

from scrapyd import __version__
from scrapyd.config import Config
from scrapyd.interfaces import IEnvironment
from scrapyd.launcher import Launcher, get_crawl_args
from tests import has_settings


def message(captured):
Expand Down Expand Up @@ -73,6 +75,39 @@ def test_start_service_max_proc(app):
)


@pytest.mark.parametrize(
("message", "expected"),
[
({}, {}),
({"_version": "v1"}, {"SCRAPYD_EGG_VERSION": "v1"}),
],
)
def test_spawn_process(launcher, message, expected):
launcher._spawn_process({"_project": "localproject", "_spider": "s1", "_job": "j1", **message}, 1) # noqa: SLF001

process = launcher.processes[1]

assert isinstance(process.pid, int)
assert process.project == "localproject"
assert process.spider == "s1"
assert process.job == "j1"
assert isinstance(process.start_time, datetime.datetime)
assert process.end_time is None
assert isinstance(process.args, list) # see tests below
assert isinstance(process.deferred, defer.Deferred)

# scrapyd.environ.Environ.get_environment
assert process.env["SCRAPY_PROJECT"] == "localproject"
for key, value in expected.items():
assert process.env[key] == value
if "SCRAPYD_EGG_VERSION" not in expected:
assert "SCRAPYD_EGG_VERSION" not in process.env
if has_settings():
assert process.env["SCRAPY_SETTINGS_MODULE"] == "localproject.settings"
else:
assert "SCRAPY_SETTINGS_MODULE" not in process.env


def test_out_received(process):
with capturedLogs() as captured:
process.outReceived(b"out\n")
Expand Down Expand Up @@ -155,3 +190,7 @@ def test_process_ended_terminated(environ, process):
"args=\\['\\S+', '-m', 'scrapyd\\.runner', 'crawl', 's1', '-s', 'LOG_FILE=\\S+', '-a', '_job=j1'\\]",
message(captured),
)


def test_repr(process):
assert repr(process).startswith(f"ScrapyProcessProtocol(pid={process.pid} project=p1 spider=s1 job=j1 start_time=")
77 changes: 9 additions & 68 deletions tests/test_endpoints.py → tests/test_server.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@

import pytest
import requests
from requests.models import Response

from tests import get_egg_data
from tests.mockserver import MockScrapydServer
Expand All @@ -16,35 +15,10 @@ def mock_scrapyd(chdir):
yield server


@pytest.fixture()
def quotesbot_egg():
return io.BytesIO(get_egg_data("quotesbot"))


@pytest.fixture()
def quotesbot_egg_asyncio():
# This egg file contains settings with TWISTED_REACTOR set to asyncio ractor
return io.BytesIO(get_egg_data("quotesbot_asyncio"))


def _deploy(mock_scrapyd, quotesbot_egg) -> Response:
url = mock_scrapyd.urljoin("addversion.json")
data = {b"project": b"quotesbot", b"version": b"0.01"}
files = {b"egg": quotesbot_egg}
return requests.post(url, data=data, files=files)


def test_urljoin(mock_scrapyd):
assert mock_scrapyd.urljoin("foo") == f"{mock_scrapyd.url}foo"


def test_root(mock_scrapyd):
response = requests.get(mock_scrapyd.url)

assert response.status_code == 200
assert re.search("To schedule a spider you need to use the API", response.text)


def test_auth():
with MockScrapydServer(username="bob", password="hunter2") as server:
assert requests.get(server.url).status_code == 401
Expand Down Expand Up @@ -113,48 +87,15 @@ def test_options(mock_scrapyd, webservice, method):
assert response.headers["Allow"] == f"OPTIONS, HEAD, {method}"


def test_launch_spider_get(mock_scrapyd):
response = requests.get(mock_scrapyd.urljoin("schedule.json"))

assert response.status_code == 200
assert response.json()["status"] == "error"


def test_spider_list_no_project(mock_scrapyd):
response = requests.get(mock_scrapyd.urljoin("listspiders.json"))
data = response.json()

assert response.status_code == 200
assert data["status"] == "error"
assert data["message"] == "'project' parameter is required"


def test_spider_list_project_no_egg(mock_scrapyd):
response = requests.get(mock_scrapyd.urljoin("listprojects.json"))
data = response.json()

assert response.status_code == 200
assert data["status"] == "ok"


def test_addversion_and_delversion(mock_scrapyd, quotesbot_egg):
response = _deploy(mock_scrapyd, quotesbot_egg)
data = response.json()

assert response.status_code == 200
assert data["spiders"] == 2
assert data["status"] == "ok"
assert data["project"] == "quotesbot"

url = mock_scrapyd.urljoin("delversion.json")
res = requests.post(url, data={"project": "quotesbot", "version": "0.01"})

assert res.status_code == 200
assert res.json()["status"] == "ok"


def test_failed_settings(mock_scrapyd, quotesbot_egg_asyncio):
response = _deploy(mock_scrapyd, quotesbot_egg_asyncio)
# https://github.com/scrapy/scrapyd/issues/377
def test_other_reactors(mock_scrapyd):
response = requests.post(
mock_scrapyd.urljoin("addversion.json"),
data={b"project": b"quotesbot", b"version": b"0.01"},
# The egg's quotesbot/settings.py file sets TWISTED_REACTOR to
# "twisted.internet.asyncioreactor.AsyncioSelectorReactor"
files={b"egg": io.BytesIO(get_egg_data("quotesbot_asyncio"))},
)

assert response.status_code == 200
assert response.json()["status"] == "ok"
12 changes: 6 additions & 6 deletions tests/test_webservice.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ def scrapy_process():


def get_local_projects(root):
return ["localproject"] if has_settings(root) else []
return ["localproject"] if has_settings() else []


def add_test_version(app, project, version, basename):
Expand Down Expand Up @@ -185,7 +185,7 @@ def test_daemonstatus(txrequest, root_with_egg, scrapy_process):
],
)
def test_list_spiders(txrequest, root, args, spiders, run_only_if_has_settings):
if run_only_if_has_settings and not has_settings(root):
if run_only_if_has_settings and not has_settings():
pytest.skip("[settings] section is not set")

root_add_version(root, "myproject", "r1", "mybot")
Expand All @@ -205,7 +205,7 @@ def test_list_spiders(txrequest, root, args, spiders, run_only_if_has_settings):
],
)
def test_list_spiders_nonexistent(txrequest, root, args, param, run_only_if_has_settings):
if run_only_if_has_settings and not has_settings(root):
if run_only_if_has_settings and not has_settings():
pytest.skip("[settings] section is not set")

root_add_version(root, "myproject", "r1", "mybot")
Expand Down Expand Up @@ -437,7 +437,7 @@ def test_add_version(txrequest, root):


def test_add_version_settings(txrequest, root):
if not has_settings(root):
if not has_settings():
pytest.skip("[settings] section is not set")

args = {b"project": [b"localproject"], b"version": [b"0.1"], b"egg": [get_egg_data("quotesbot")]}
Expand All @@ -461,7 +461,7 @@ def test_add_version_invalid(txrequest, root):
],
)
def test_schedule(txrequest, root, args, run_only_if_has_settings):
if run_only_if_has_settings and not has_settings(root):
if run_only_if_has_settings and not has_settings():
pytest.skip("[settings] section is not set")

project = args[b"project"][0].decode()
Expand Down Expand Up @@ -529,7 +529,7 @@ def test_schedule_parameters(txrequest, root_with_egg):
],
)
def test_schedule_nonexistent(txrequest, root, args, param, run_only_if_has_settings):
if run_only_if_has_settings and not has_settings(root):
if run_only_if_has_settings and not has_settings():
pytest.skip("[settings] section is not set")

root_add_version(root, "myproject", "r1", "mybot")
Expand Down
6 changes: 3 additions & 3 deletions tests/test_website.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,11 +28,11 @@ def test_render_logs_dir(txrequest, root):
# https://github.com/twisted/twisted/blob/trunk/src/twisted/web/test/test_static.py
def test_render_logs_file(txrequest, root):
os.makedirs(os.path.join("logs", "quotesbot"))
with open(os.path.join("logs", "foo.bar"), "wb") as f:
with open(os.path.join("logs", "foo.txt"), "wb") as f:
f.write(b"baz")

file = root.children[b"logs"]
request = DummyRequest([b"foo.bar"])
request = DummyRequest([b"foo.txt"])
child = resource.getChildForRequest(file, request)

d = _render(child, request)
Expand Down Expand Up @@ -78,7 +78,7 @@ def test_render_home(txrequest, root_with_egg):
content = root_with_egg.children[b""].render_GET(txrequest)
expect_headers = {
b"Content-Type": [b"text/html; charset=utf-8"],
b"Content-Length": [b"736" if has_settings(root_with_egg) else b"714"],
b"Content-Length": [b"736" if has_settings() else b"714"],
}
if root_with_egg.local_items:
expect_headers[b"Content-Length"] = [b"751"]
Expand Down

0 comments on commit a62293c

Please sign in to comment.