Skip to content

Commit

Permalink
webservice: extend allowed character list
Browse files Browse the repository at this point in the history
  • Loading branch information
pawelmhm committed Dec 3, 2021
1 parent 767fac5 commit efb6af1
Show file tree
Hide file tree
Showing 3 changed files with 30 additions and 11 deletions.
16 changes: 15 additions & 1 deletion scrapyd/tests/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@

from scrapy.utils.test import get_pythonpath
from scrapyd.interfaces import IEggStorage
from scrapyd.utils import get_crawl_args, get_spider_list, UtilsCache
from scrapyd.utils import get_crawl_args, get_spider_list, UtilsCache, check_disallowed_characters
from scrapyd import get_application

def get_pythonpath_scrapyd():
Expand Down Expand Up @@ -128,3 +128,17 @@ def popen_wrapper(*args, **kwargs):
r'Exception: This should break the `scrapy list` command$'
)
self.assertRegexpMatches(tb, tb_regex)


@pytest.mark.parametrize('project_name, allowed', [
('/hello/world', False),
('@hello/world', False),
('hello world', True),
('hello_world', True),
('hello-world', True),
('C:\\hello\\world', False),
("chrząścz", False),
('"hello world"', True)
])
def test_disallowed_chars(project_name, allowed):
assert check_disallowed_characters(project_name) == allowed
7 changes: 7 additions & 0 deletions scrapyd/utils.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import sys
import os
import re
from .sqlite import JsonSqliteDict
from subprocess import Popen, PIPE
import six
Expand Down Expand Up @@ -154,3 +155,9 @@ def _to_native_str(text, encoding='utf-8', errors='strict'):
return text.encode(encoding, errors)
else:
return text.decode(encoding, errors)


def check_disallowed_characters(text):
# Anything that is not in this list: A-Za-z0-9.\s_- is banned
disallowed_characters_regex = r'[^\"A-Za-z0-9.\s_-]+'
return not re.search(disallowed_characters_regex, text)
18 changes: 8 additions & 10 deletions scrapyd/webservice.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,23 +10,21 @@
from twisted.web.http import Request

from .utils import get_spider_list, JsonResource, UtilsCache, \
native_stringify_dict
native_stringify_dict, check_disallowed_characters


def with_safe_project_name(func):
@functools.wraps(func)
def wrapper(resource, txrequest):
project_name = txrequest.args.pop(b'project', [None])[0]
msg = "'Project' name is required and must not contain illegal characters. "
msg += f'Project name "{project_name}" is not valid'
project_name = txrequest.args.pop(b'project', [None])[0].decode()
msg = "Project name is required and must be a valid string. "
msg += f"Project name '{project_name}' is not a valid project name."
msg = msg.encode()
if not project_name:
raise Error(code=400, message=msg.encode())
raise Error(code=400, message=msg)

project_name = project_name.decode()

allowed_name = re.sub('[^A-Za-z0-9.]+', '-', project_name)
if project_name != allowed_name:
raise Error(code=400, message=msg.encode())
if not check_disallowed_characters(project_name):
raise Error(code=400, message=msg)
return func(resource, txrequest, project_name)

return wrapper
Expand Down

0 comments on commit efb6af1

Please sign in to comment.