Skip to content

Commit

Permalink
windows: Move get_spider_list back to utils
Browse files Browse the repository at this point in the history
  • Loading branch information
jpmckinney committed Jul 19, 2024
1 parent b2dcb2b commit d2ef9ef
Show file tree
Hide file tree
Showing 3 changed files with 80 additions and 78 deletions.
77 changes: 77 additions & 0 deletions scrapyd/utils.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,85 @@
import os
import sys
from subprocess import PIPE, Popen
from typing import ClassVar
from urllib.parse import urlsplit

from scrapy.utils.misc import load_object

from scrapyd.config import Config
from scrapyd.exceptions import RunnerError
from scrapyd.sqlite import JsonSqliteDict


class UtilsCache:
# array of project name that need to be invalided
invalid_cached_projects: ClassVar = []

def __init__(self):
self.cache_manager = JsonSqliteDict(table="utils_cache_manager")

# Invalid the spider's list's cache of a given project (by name)
@staticmethod
def invalid_cache(project):
UtilsCache.invalid_cached_projects.append(project)

def __getitem__(self, key):
for p in UtilsCache.invalid_cached_projects:
if p in self.cache_manager:
del self.cache_manager[p]
UtilsCache.invalid_cached_projects[:] = []
return self.cache_manager[key]

def __setitem__(self, key, value):
self.cache_manager[key] = value

def __repr__(self):
return f"UtilsCache(cache_manager={self.cache_manager!r})"


def get_spider_list(project, runner=None, pythonpath=None, version=None):
"""Return the spider list from the given project, using the given runner"""

# UtilsCache uses JsonSqliteDict, which encodes the project's value as JSON, but JSON allows only string keys,
# so the stored dict will have a "null" key, instead of a None key.
if version is None:
version = ""

if "cache" not in get_spider_list.__dict__:
get_spider_list.cache = UtilsCache()
try:
return get_spider_list.cache[project][version]
except KeyError:
pass

if runner is None:
runner = Config().get("runner")

env = os.environ.copy()
env["PYTHONIOENCODING"] = "UTF-8"
env["SCRAPY_PROJECT"] = project
if pythonpath:
env["PYTHONPATH"] = pythonpath
if version:
env["SCRAPYD_EGG_VERSION"] = version
pargs = [sys.executable, "-m", runner, "list", "-s", "LOG_STDOUT=0"]
proc = Popen(pargs, stdout=PIPE, stderr=PIPE, env=env)
out, err = proc.communicate()
if proc.returncode:
msg = err or out or ""
msg = msg.decode("utf8")
raise RunnerError(msg)

spiders = out.decode("utf-8").splitlines()
try:
project_cache = get_spider_list.cache[project]
project_cache[version] = spiders
except KeyError:
project_cache = {version: spiders}
get_spider_list.cache[project] = project_cache

return spiders


def get_spider_queues(config):
"""Return a dict of Spider Queues keyed by project name"""
Expand Down
79 changes: 2 additions & 77 deletions scrapyd/webservice.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,24 +2,19 @@

import functools
import json
import os
import sys
import traceback
import uuid
import zipfile
from copy import copy
from io import BytesIO
from subprocess import PIPE, Popen
from typing import ClassVar

from twisted.python import log
from twisted.web import error, http, resource

from scrapyd.config import Config
from scrapyd.exceptions import EggNotFoundError, ProjectNotFoundError, RunnerError
from scrapyd.exceptions import EggNotFoundError, ProjectNotFoundError
from scrapyd.jobstorage import job_items_url, job_log_url
from scrapyd.sqlite import JsonSqliteDict
from scrapyd.utils import native_stringify_dict
from scrapyd.utils import UtilsCache, get_spider_list, native_stringify_dict


def param(
Expand Down Expand Up @@ -58,76 +53,6 @@ def wrapper(self, txrequest, *args, **kwargs):
return decorator


def get_spider_list(project, runner=None, pythonpath=None, version=None):
"""Return the spider list from the given project, using the given runner"""

# UtilsCache uses JsonSqliteDict, which encodes the project's value as JSON, but JSON allows only string keys,
# so the stored dict will have a "null" key, instead of a None key.
if version is None:
version = ""

if "cache" not in get_spider_list.__dict__:
get_spider_list.cache = UtilsCache()
try:
return get_spider_list.cache[project][version]
except KeyError:
pass

if runner is None:
runner = Config().get("runner")

env = os.environ.copy()
env["PYTHONIOENCODING"] = "UTF-8"
env["SCRAPY_PROJECT"] = project
if pythonpath:
env["PYTHONPATH"] = pythonpath
if version:
env["SCRAPYD_EGG_VERSION"] = version
pargs = [sys.executable, "-m", runner, "list", "-s", "LOG_STDOUT=0"]
proc = Popen(pargs, stdout=PIPE, stderr=PIPE, env=env)
out, err = proc.communicate()
if proc.returncode:
msg = err or out or ""
msg = msg.decode("utf8")
raise RunnerError(msg)

spiders = out.decode("utf-8").splitlines()
try:
project_cache = get_spider_list.cache[project]
project_cache[version] = spiders
except KeyError:
project_cache = {version: spiders}
get_spider_list.cache[project] = project_cache

return spiders


class UtilsCache:
# array of project name that need to be invalided
invalid_cached_projects: ClassVar = []

def __init__(self):
self.cache_manager = JsonSqliteDict(table="utils_cache_manager")

# Invalid the spider's list's cache of a given project (by name)
@staticmethod
def invalid_cache(project):
UtilsCache.invalid_cached_projects.append(project)

def __getitem__(self, key):
for p in UtilsCache.invalid_cached_projects:
if p in self.cache_manager:
del self.cache_manager[p]
UtilsCache.invalid_cached_projects[:] = []
return self.cache_manager[key]

def __setitem__(self, key, value):
self.cache_manager[key] = value

def __repr__(self):
return f"UtilsCache(cache_manager={self.cache_manager!r})"


class JsonResource(resource.Resource):
json_encoder = json.JSONEncoder()

Expand Down
2 changes: 1 addition & 1 deletion tests/test_webservice.py
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,7 @@ def popen_wrapper(*args, **kwargs):
cmd = [cmd[0], "-W", "ignore"] + cmd[1:]
return Popen(cmd, *args, **kwargs)

with mock.patch("scrapyd.webservice.Popen", wraps=popen_wrapper):
with mock.patch("scrapyd.utils.Popen", wraps=popen_wrapper):
exc = self.assertRaises(RunnerError, get_spider_list, "mybot3", pythonpath=pypath)
self.assertRegex(str(exc).rstrip(), r"Exception: This should break the `scrapy list` command$")

Expand Down

0 comments on commit d2ef9ef

Please sign in to comment.