Skip to content

Commit

Permalink
code complete for editref endpoint
Browse files Browse the repository at this point in the history
  • Loading branch information
mojomonger committed Jul 25, 2024
1 parent 2769dc3 commit f6d6168
Show file tree
Hide file tree
Showing 7 changed files with 158 additions and 35 deletions.
4 changes: 2 additions & 2 deletions src/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,11 +29,11 @@
from src.views.statistics.references import References
from src.views.statistics.xhtml import Xhtml

# new stuff apr 2024
from src.views.v2.article_cache_view_v2 import ArticleCacheV2
# new stuff jan 2024
from src.views.v2.article_view_v2 import ArticleV2
from src.views.version import Version
# new stuff apr 2024
from src.views.v2.article_cache_view_v2 import ArticleCacheV2
# new stuff jun 2024
from src.views.v2.editref_v2 import EditRefV2

Expand Down
63 changes: 49 additions & 14 deletions src/models/v2/job/editref_job_v2.py
Original file line number Diff line number Diff line change
@@ -1,30 +1,41 @@
# import re
# from urllib.parse import quote, unquote
import re
from urllib.parse import quote, unquote
#
# import requests
#
# import config
# from src.models.exceptions import MissingInformationError, WikipediaApiFetchError
# from src.models.wikimedia.enums import WikimediaDomain
from src.models.wikimedia.enums import WikimediaDomain
from src import MissingInformationError
from src.models.v2.job import JobV2


class EditRefJobV2(JobV2):
"""job that supports EditRefV2 endpoint"""

target: str = ""
replace: str = ""
old_ref: str = ""
new_ref: str = ""

source: str = ""
wiki_page_url: str = ""

wiki_lang: str = ""
wiki_domain: WikimediaDomain = WikimediaDomain.wikipedia
wiki_page_title: str = ""
wiki_revision: str = ""

regex_wiki_url = r"https?://(\w+)\.(\w+\.\w+)/wiki/(.+)"
# NB this should be elevated to a globalish constant

@property
def quoted_title(self):
if not self.wiki_page_title:
raise MissingInformationError("self.wiki_page_title is empty")
return quote(self.wiki_page_title, safe="")

# these following (commented) functions might be useful when we
# have a wikipage id rather than a string to describe source

# @property
# def quoted_title(self):
# if not self.title:
# raise MissingInformationError("self.title was empty")
# return quote(self.title, safe="")

# def get_mediawiki_ids(self) -> None:
# from src import app
#
Expand Down Expand Up @@ -88,11 +99,35 @@ class EditRefJobV2(JobV2):
# if not matches:
# app.logger.error("Not a supported Wikimedia URL")


def validate_fields(self):
"""
any parameter checking done here...
must have at least "source" or "wiki_page_url" defined
"""

# self.__extract_url__() # may want to do something to parse wikipage id in the future
pass
from src import app

if not self.source:
if not self.wiki_page_url:
app.logger.error('Parameters must contain a valid "source" or valid "wiki_page_url" value. '
f"source: {self.source}, wiki_page_url: {self.wiki_page_url}")
raise MissingInformationError(
f'Parameters must contain a valid "source" or valid "wiki_page_url" value.'
)

# extract wiki parts from wiki_page_url?
my_url = unquote(self.wiki_page_url)

matches = re.match(self.regex_wiki_url, my_url)
if matches:
groups = matches.groups()
self.wiki_lang = groups[0]
self.wiki_domain = WikimediaDomain(groups[1])
self.wiki_page_title = groups[2]
else:
app.logger.error(f"{self.wiki_page_url} is not a supported Wikimedia URL")
raise MissingInformationError(
f"wiki_page_url parameter value ({self.wiki_page_url}) is invalid."
)

7 changes: 4 additions & 3 deletions src/models/v2/schema/editref_schema_v2.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,10 @@ class EditRefSchemaV2(BaseSchemaV2):
# Defines expected parameters for EditRefV2 endpoint
# - default parameters are defined in BaseSchemaV2

target = fields.Str(required=True)
replace = fields.Str(required=True)
source = fields.Str(required=True)
old_ref = fields.Str(required=True)
new_ref = fields.Str(required=True)
source = fields.Str(required=False)
wiki_page_url = fields.Str(required=False)

# noinspection PyUnusedLocal
@post_load
Expand Down
1 change: 1 addition & 0 deletions src/models/wikimedia/wikipedia/reference/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -420,6 +420,7 @@ def extract_and_check(self) -> None:

self.__extract_templates_and_parameters__()
self.__extract_reference_urls__()

self.__extract_unique_first_level_domains__()
self.__generate_reference_id__()

Expand Down
6 changes: 3 additions & 3 deletions src/views/statistics/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ def __validate_and_get_job__(self):
def __validate__(self):
from src import app

app.logger.debug("StatisticsView::__validate__")
app.logger.debug("==> StatisticsView::__validate__")

errors = self.schema.validate(request.args)
if errors:
Expand All @@ -52,7 +52,7 @@ def __validate__(self):
def __parse_into_job__(self):
from src import app

app.logger.debug("__parse_into_job__: running")
app.logger.debug("==> StatisticsView::__parse_into_job__")
# app.logger.debug(request.args)
if not self.schema:
raise MissingInformationError()
Expand All @@ -61,6 +61,6 @@ def __parse_into_job__(self):
if not self.job:
# this seems to be the case when there are no arguments, as in the
# /version endpoint. Seems to be harmless not having a valid job property
console.print("self.job is null")
app.logger.info("StatisticsView: self.job is null")

console.print(self.job)
109 changes: 97 additions & 12 deletions src/views/v2/editref_v2.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,11 @@
from typing import Any, Optional, Tuple
import traceback

from dateutil.parser import isoparse

import config
import requests

from src.models.exceptions import MissingInformationError, WikipediaApiFetchError

from src.models.v2.schema.editref_schema_v2 import EditRefSchemaV2
Expand All @@ -26,9 +31,10 @@ class EditRefV2(StatisticsViewV2):
replaces search string with replace string in source string, and returns results
"""

schema = EditRefSchemaV2() # overrides StatisticsViewV2's schema property
job: EditRefJobV2 # overrides StatisticsViewV2's job property
schema = EditRefSchemaV2() # Defines expected parameters; Overrides StatisticsViewV2's "schema" property
job: EditRefJobV2 # Holds usable variables, seeded from schema. Overrides StatisticsViewV2's "job"

source_text = ""
replaced_data = ""

def get(self):
Expand All @@ -39,7 +45,7 @@ def get(self):
from src import app
app.logger.debug("==> EditRefV2::get")

return self.__process_data__()
return self.__process_data__(method="get")

def post(self):
"""
Expand All @@ -57,18 +63,39 @@ def __process_data__(self, method="get"):
try:
self.__validate_and_get_job__(method) # inherited from StatisticsViewV2
#
# validates via schema (a marshmallow feature) and sets job values wia schema's values
# validates schema params (a marshmallow feature), and sets job properties based on schema's values
"""
url = (
f"https://{self.job.lang}.{self.job.domain.value}/"
f"w/rest.php/v1/page/{self.job.quoted_title}"
)
headers = {"User-Agent": config.user_agent}
response = requests.get(url, headers=headers)
# console.print(response.json())
if response.status_code == 200:
data = response.json()
self.job.revision = int(data["latest"]["id"])
self.revision_isodate = isoparse(data["latest"]["timestamp"])
self.revision_timestamp = round(self.revision_isodate.timestamp())
self.page_id = int(data["id"])
# logger.debug(f"Got pageid: {self.page_id}")
self.wikitext = data["source"]
"""
# set up source_text
self.__setup_source_text__() # setup source_text to be

# set up results
self.__replace_data__() # sets self.replaced_data
self.__replace_data__() # self.replaced_data holds newly edited source

# and return results
return {
"target": self.job.target,
"replace": self.job.replace,
"source": self.job.source,
"result": self.replaced_data
}
# return {
# "old_ref": self.job.old_ref,
# "new_ref": self.job.new_ref,
# # "source": self.job.source,
# "result": self.replaced_data
# }
return self.replaced_data


except MissingInformationError as e:
app.logger.debug("after EditRefV2::self.__validate_and_get_job__ MissingInformationError exception")
Expand All @@ -80,10 +107,68 @@ def __process_data__(self, method="get"):
traceback.print_exc()
return {"error": f"General Error: {str(e)}"}, 500

def __setup_source_text__(self):
from src import app
app.logger.debug("==> EditRefV2::__setup_source_text__")

"""
set source_text to:
job.source if non-empty
fetched wikitext based on wiki_page_url otherwise
- EXCEPTION No Wiki Page
- EXCEPTION General
"""
if self.job.source:
self.source_text = self.job.source

else:
# grab wikitext from wiki_page_url

url = (
f"https://{self.job.wiki_lang}.{self.job.wiki_domain.value}/"
f"w/rest.php/v1/page/{self.job.quoted_title}"
)
headers = {"User-Agent": config.user_agent}
response = requests.get(url, headers=headers)

# console.print(response.json())
app.logger.debug(f"==> EditRefV2::__setup_source_text__: url to grab is: {url}")

if response.status_code == 200:

data = response.json()

self.job.wiki_revision = int(data["latest"]["id"])
self.revision_isodate = isoparse(data["latest"]["timestamp"])
self.revision_timestamp = round(self.revision_isodate.timestamp())
self.page_id = int(data["id"])

self.source_text = data["source"]

else:
# raise an exception because wiki page fetch was unsuccessful
app.logger.error(f"==> EditRefV2::__setup_source_text__: wikitext fetch was unsuccessful "
f"({self.job.wiki_page_url})")

def __replace_data__(self):
# takes source_text and applies replacement transformations on it
from src import app
app.logger.debug("==> EditRefV2::__replace_data__")

self.replaced_data = self.job.source.replace(self.job.target, self.job.replace)
app.logger.debug("==>")
app.logger.debug("==>")
app.logger.debug("==>")

app.logger.debug("==>")
app.logger.debug("==> SOURCE")
app.logger.debug("==>")
app.logger.debug(self.source_text)

self.replaced_data = self.source_text.replace(self.job.old_ref, self.job.new_ref)

app.logger.debug("==>")
app.logger.debug("==> REPLACED")
app.logger.debug("==>")
app.logger.debug(self.replaced_data)


3 changes: 2 additions & 1 deletion src/views/v2/statistics/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,11 +45,12 @@ def __read_from_cache__(self):
def __validate_and_get_job__(self, method="get"):
"""
Validates request params, whether from GET or POST, and,
if successful, pulls those param values into job object
if successful, pulls param values into job's properties
"""
from src import app
app.logger.debug(f"==> StatisticsViewV2::__validate_and_get_job__({method})")

# use args if GET, form if POST
request_args = request.args if (method == "get") else request.form

self.__validate__(request_args)
Expand Down

0 comments on commit f6d6168

Please sign in to comment.