Skip to content

Commit

Permalink
fix: Partially revert "Clarify error messages if project or version o…
Browse files Browse the repository at this point in the history
…f egg not found" #517 by making similar changes to Schedule as LisSpiders in 94e87da
  • Loading branch information
jpmckinney committed Jul 22, 2024
1 parent fd5e653 commit 750e07d
Show file tree
Hide file tree
Showing 3 changed files with 54 additions and 59 deletions.
22 changes: 1 addition & 21 deletions integration_tests/test_webservice.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ def test_options(webservice, method):
assert response.headers["Allow"] == f"OPTIONS, HEAD, {method}"


# Cancel, Status, ListJobs and ListSpiders will error with "project '%b' not found" on directory traversal attempts.
# ListSpiders, Schedule, Cancel, Status and ListJobs return "project '%b' not found" on directory traversal attempts.
# The egg storage (in get_project_list, called by get_spider_queues, called by QueuePoller, used by these webservices)
# would need to find a project like "../project" (which is impossible with the default eggstorage) to not error.
@pytest.mark.parametrize(
Expand All @@ -72,26 +72,6 @@ def test_project_directory_traversal(webservice, method, params):
assert data == {"status": "error", "message": "DirectoryTraversalError: ../p"}


@pytest.mark.parametrize(
("webservice", "method", "params"),
[
("schedule", "post", {"spider": "s"}),
],
)
def test_project_directory_traversal_runner(webservice, method, params):
response = getattr(requests, method)(
f"http://127.0.0.1:6800/{webservice}.json",
auth=("hello12345", "67890world"),
**{"params" if method == "get" else "data": {"project": "../p", **params}},
)

data = response.json()
data.pop("node_name")

assert response.status_code == 200, f"200 != {response.status_code}"
assert data == {"status": "error", "message": "DirectoryTraversalError: ../p"}


def test_daemonstatus():
assert_webservice("get", "/daemonstatus.json", {"status": "ok", "running": 0, "pending": 0, "finished": 0})

Expand Down
7 changes: 4 additions & 3 deletions scrapyd/webservice.py
Original file line number Diff line number Diff line change
Expand Up @@ -174,11 +174,12 @@ class Schedule(WsResource):
@param("priority", required=False, default=0, type=float)
@param("setting", required=False, default=list, multiple=True)
def render_POST(self, txrequest, project, spider, version, jobid, priority, setting):
if self.root.eggstorage.get(project, version) == (None, None):
if version:
raise error.Error(code=http.OK, message=b"version '%b' not found" % version.encode())
if project not in self.root.scheduler.queues:
raise error.Error(code=http.OK, message=b"project '%b' not found" % project.encode())

if version and self.root.eggstorage.get(project, version) == (None, None):
raise error.Error(code=http.OK, message=b"version '%b' not found" % version.encode())

spiders = spider_list.get(project, version, runner=self.root.runner)
if spider not in spiders:
raise error.Error(code=http.OK, message=b"spider '%b' not found" % spider.encode())
Expand Down
84 changes: 49 additions & 35 deletions tests/test_webservice.py
Original file line number Diff line number Diff line change
Expand Up @@ -457,33 +457,62 @@ def test_add_version_invalid(txrequest, root):
assert_error(txrequest, root, "POST", "addversion", args, message)


def test_schedule(txrequest, root_with_egg):
assert root_with_egg.scheduler.queues["quotesbot"].list() == []
# Like test_list_spiders.
@pytest.mark.parametrize(
("args", "run_only_if_has_settings"),
[
({b"project": [b"myproject"], b"spider": [b"spider3"]}, False),
({b"project": [b"myproject"], b"_version": [b"r1"], b"spider": [b"spider1"]}, False),
({b"project": [b"localproject"], b"spider": [b"example"]}, True),
],
)
def test_schedule(txrequest, root, args, run_only_if_has_settings):
if run_only_if_has_settings and not has_settings(root):
pytest.skip("[settings] section is not set")

txrequest.args = {b"project": [b"quotesbot"], b"spider": [b"toscrape-css"]}
content = root_with_egg.children[b"schedule.json"].render_POST(txrequest)
jobs = root_with_egg.scheduler.queues["quotesbot"].list()
jobs[0].pop("_job")
project = args[b"project"][0].decode()
spider = args[b"spider"][0].decode()
version = args[b"_version"][0].decode() if b"_version" in args else None

assert len(jobs) == 1
assert jobs[0] == {"name": "toscrape-css", "settings": {}, "version": None}
assert content["status"] == "ok"
assert "jobid" in content
root_add_version(root, "myproject", "r1", "mybot")
root_add_version(root, "myproject", "r2", "mybot2")
root.update_projects()

assert root.scheduler.queues[project].list() == []

def test_schedule_nonexistent_project(txrequest, root):
args = {b"project": [b"nonexistent"], b"spider": [b"toscrape-css"]}
assert_error(txrequest, root, "POST", "schedule", args, b"project 'nonexistent' not found")
txrequest.args = args.copy()
content = root.children[b"schedule.json"].render_POST(txrequest)
jobid = content.pop("jobid")

assert content.pop("node_name")
assert content == {"status": "ok"}
assert re.search(r"^[a-z0-9]{32}$", jobid)

def test_schedule_nonexistent_version(txrequest, root_with_egg):
args = {b"project": [b"quotesbot"], b"_version": [b"nonexistent"], b"spider": [b"toscrape-css"]}
assert_error(txrequest, root_with_egg, "POST", "schedule", args, b"version 'nonexistent' not found")
jobs = root.scheduler.queues[project].list()

assert len(jobs) == 1
assert jobs[0] == {"name": spider, "settings": {}, "version": version, "_job": jobid}

def test_schedule_nonexistent_spider(txrequest, root_with_egg):
args = {b"project": [b"quotesbot"], b"spider": [b"nonexistent"]}
assert_error(txrequest, root_with_egg, "POST", "schedule", args, b"spider 'nonexistent' not found")

# Like test_list_spiders_nonexistent.
@pytest.mark.parametrize(
("args", "param", "run_only_if_has_settings"),
[
({b"project": [b"nonexistent"], b"spider": [b"spider1"]}, "project", False),
({b"project": [b"myproject"], b"_version": [b"nonexistent"], b"spider": [b"spider1"]}, "version", False),
({b"project": [b"myproject"], b"spider": [b"nonexistent"]}, "spider", False),
({b"project": [b"localproject"], b"_version": [b"nonexistent"], b"spider": [b"example"]}, "version", True),
],
)
def test_schedule_nonexistent(txrequest, root, args, param, run_only_if_has_settings):
if run_only_if_has_settings and not has_settings(root):
pytest.skip("[settings] section is not set")

root_add_version(root, "myproject", "r1", "mybot")
root_add_version(root, "myproject", "r2", "mybot2")
root.update_projects()

assert_error(txrequest, root, "POST", "schedule", args, b"%b 'nonexistent' not found" % param.encode())


@pytest.mark.parametrize("args", [{}, {b"signal": [b"TERM"]}])
Expand Down Expand Up @@ -523,7 +552,7 @@ def test_cancel_nonexistent(txrequest, root):
assert_error(txrequest, root, "POST", "cancel", args, b"project 'nonexistent' not found")


# Cancel, Status, ListJobs and ListSpiders error with "project '%b' not found" on directory traversal attempts.
# ListSpiders, Schedule, Cancel, Status and ListJobs return "project '%b' not found" on directory traversal attempts.
# The egg storage (in get_project_list, called by get_spider_queues, called by QueuePoller, used by these webservices)
# would need to find a project like "../project" (which is impossible with the default eggstorage) to not error.
@pytest.mark.parametrize(
Expand Down Expand Up @@ -561,18 +590,3 @@ def test_project_directory_traversal(txrequest, root, endpoint, attach_egg, meth

eggstorage = root.app.getComponent(IEggStorage)
assert eggstorage.get("quotesbot") == (None, None)


@pytest.mark.parametrize(
("endpoint", "method"),
[
(b"schedule.json", "render_POST"),
],
)
def test_project_directory_traversal_runner(txrequest, root, endpoint, method):
txrequest.args = {b"project": [b"../p"], b"spider": [b"s"]}

with pytest.raises(DirectoryTraversalError) as exc:
getattr(root.children[endpoint], method)(txrequest)

assert str(exc.value) == "../p"

0 comments on commit 750e07d

Please sign in to comment.