-
Notifications
You must be signed in to change notification settings - Fork 9
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
7722e6b
commit 2769dc3
Showing
17 changed files
with
269 additions
and
47 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,6 +1,6 @@ | ||
[tool.poetry] | ||
name = "Internet Archive Reference Inventory (IARI)" | ||
version = "4.4.0" | ||
version = "4.4.1" | ||
description = "API capable of fetching, extracting, transforming and storing reference information from Wikipedia articles, websites and PDFs as structured data." | ||
authors = [ | ||
"Chris Lombardi <[email protected]>", | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,98 @@ | ||
# import re | ||
# from urllib.parse import quote, unquote | ||
# | ||
# import requests | ||
# | ||
# import config | ||
# from src.models.exceptions import MissingInformationError, WikipediaApiFetchError | ||
# from src.models.wikimedia.enums import WikimediaDomain | ||
from src.models.v2.job import JobV2 | ||
|
||
|
||
class EditRefJobV2(JobV2): | ||
"""job that supports EditRefV2 endpoint""" | ||
|
||
target: str = "" | ||
replace: str = "" | ||
source: str = "" | ||
|
||
# these following (commented) functions might be useful when we | ||
# have a wikipage id rather than a string to describe source | ||
|
||
# @property | ||
# def quoted_title(self): | ||
# if not self.title: | ||
# raise MissingInformationError("self.title was empty") | ||
# return quote(self.title, safe="") | ||
|
||
# def get_mediawiki_ids(self) -> None: | ||
# from src import app | ||
# | ||
# app.logger.debug( | ||
# f"ArticleJobV2::get_mediawiki_ids: self.page_id={self.page_id}" | ||
# ) | ||
# | ||
# if not self.page_id: | ||
# app.logger.debug( | ||
# f"ArticleJobV2::get_mediawiki_ids: lang={self.lang}, title={self.title}, lang={self.domain}" | ||
# ) | ||
# if not self.lang or not self.title or not self.domain: | ||
# raise MissingInformationError("url lang, title or domain not found") | ||
# | ||
# # https://stackoverflow.com/questions/31683508/wikipedia-mediawiki-api-get-pageid-from-url | ||
# wiki_fetch_url = ( | ||
# f"https://{self.lang}.{self.domain.value}/" | ||
# f"w/rest.php/v1/page/{self.quoted_title}" | ||
# ) | ||
# headers = {"User-Agent": config.user_agent} | ||
# response = requests.get(wiki_fetch_url, headers=headers) | ||
# # console.print(response.json()) | ||
# if response.status_code == 200: | ||
# data = response.json() | ||
# # We only set this if the patron did not specify a revision they want | ||
# if not self.revision: | ||
# self.revision = int(data["latest"]["id"]) | ||
# self.page_id = int(data["id"]) | ||
# | ||
# elif response.status_code == 404: | ||
# app.logger.error( | ||
# f"Could not fetch page data from {self.domain} because of 404. See {wiki_fetch_url}" | ||
# ) | ||
# else: | ||
# raise WikipediaApiFetchError( | ||
# f"Could not fetch page data. Got {response.status_code} from {wiki_fetch_url}" | ||
# ) | ||
|
||
# def __urldecode_url__(self): | ||
# """We decode the title to have a human readable string to pass around""" | ||
# self.url = unquote(self.url) | ||
# | ||
# def __extract_url__(self): | ||
# """This was generated with help of chatgpt using this prompt: | ||
# I want a python re regex that extracts "en" "wikipedia.org" | ||
# and "Test" from http://en.wikipedia.org/wiki/Test | ||
# """ | ||
# from src import app | ||
# | ||
# app.logger.debug("extract_url: running") | ||
# if self.url: | ||
# self.__urldecode_url__() | ||
# wiki_url_pattern = r"https?://(\w+)\.(\w+\.\w+)/wiki/(.+)" | ||
# | ||
# matches = re.match(wiki_url_pattern, self.url) | ||
# if matches: | ||
# groups = matches.groups() | ||
# self.lang = groups[0] | ||
# self.domain = WikimediaDomain(groups[1]) | ||
# self.title = groups[2] | ||
# if not matches: | ||
# app.logger.error("Not a supported Wikimedia URL") | ||
|
||
|
||
def validate_fields(self): | ||
""" | ||
any parameter checking done here... | ||
""" | ||
|
||
# self.__extract_url__() # may want to do something to parse wikipage id in the future | ||
pass |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,29 @@ | ||
from marshmallow import fields, post_load | ||
|
||
from src.models.v2.job.editref_job_v2 import EditRefJobV2 | ||
from src.models.v2.schema import BaseSchemaV2 | ||
|
||
|
||
class EditRefSchemaV2(BaseSchemaV2): | ||
# Defines expected parameters for EditRefV2 endpoint | ||
# - default parameters are defined in BaseSchemaV2 | ||
|
||
target = fields.Str(required=True) | ||
replace = fields.Str(required=True) | ||
source = fields.Str(required=True) | ||
|
||
# noinspection PyUnusedLocal | ||
@post_load | ||
# NB: post_load is a marshmallow directive; this function is run after loading request args | ||
# **kwargs is needed here despite what the validator claims | ||
def return_job_object(self, data, **kwargs) -> EditRefJobV2: # type: ignore # dead: disable | ||
"""Return Job object""" | ||
from src import app | ||
app.logger.debug("==> EditRefSchemaV2::@post_load:return_job_object") | ||
|
||
job = EditRefJobV2(**data) | ||
job.validate_fields() | ||
|
||
# NB we can modify job field values here before returning | ||
|
||
return job |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,89 @@ | ||
# from flask_restful import Resource, abort # type: ignore | ||
# from marshmallow import Schema | ||
from datetime import datetime | ||
from typing import Any, Optional, Tuple | ||
import traceback | ||
|
||
from src.models.exceptions import MissingInformationError, WikipediaApiFetchError | ||
|
||
from src.models.v2.schema.editref_schema_v2 import EditRefSchemaV2 | ||
from src.models.v2.job.editref_job_v2 import EditRefJobV2 | ||
|
||
# from src.models.v2.file_io.article_file_io_v2 import ArticleFileIoV2 | ||
# from src.models.v2.wikimedia.wikipedia.analyzer_v2 import WikipediaAnalyzerV2 | ||
# from src.models.wikimedia.enums import AnalyzerReturnValues, WikimediaDomain | ||
from src.views.v2.statistics import StatisticsViewV2 | ||
|
||
|
||
from src.helpers.get_version import get_poetry_version | ||
|
||
|
||
class EditRefV2(StatisticsViewV2): | ||
# TODO Since no setup_io is needed for this endpoint, we could maybe | ||
# base this on an "Execution" view? or a generic "Action" view? | ||
|
||
""" | ||
replaces search string with replace string in source string, and returns results | ||
""" | ||
|
||
schema = EditRefSchemaV2() # overrides StatisticsViewV2's schema property | ||
job: EditRefJobV2 # overrides StatisticsViewV2's job property | ||
|
||
replaced_data = "" | ||
|
||
def get(self): | ||
""" | ||
flask GET entrypoint for returning editref results | ||
must return a tuple: (Any,response_code) | ||
""" | ||
from src import app | ||
app.logger.debug("==> EditRefV2::get") | ||
|
||
return self.__process_data__() | ||
|
||
def post(self): | ||
""" | ||
flask POST entrypoint for returning editref results | ||
must return a tuple: (Any,response_code) | ||
""" | ||
from src import app | ||
app.logger.debug("==> EditRefV2::post") | ||
|
||
return self.__process_data__(method="post") | ||
|
||
|
||
def __process_data__(self, method="get"): | ||
from src import app | ||
try: | ||
self.__validate_and_get_job__(method) # inherited from StatisticsViewV2 | ||
# | ||
# validates via schema (a marshmallow feature) and sets job values wia schema's values | ||
|
||
# set up results | ||
self.__replace_data__() # sets self.replaced_data | ||
|
||
# and return results | ||
return { | ||
"target": self.job.target, | ||
"replace": self.job.replace, | ||
"source": self.job.source, | ||
"result": self.replaced_data | ||
} | ||
|
||
except MissingInformationError as e: | ||
app.logger.debug("after EditRefV2::self.__validate_and_get_job__ MissingInformationError exception") | ||
traceback.print_exc() | ||
return {"error": f"Missing Information Error: {str(e)}"}, 500 | ||
|
||
except Exception as e: | ||
app.logger.debug("after EditRefV2::self.__validate_and_get_job__ exception") | ||
traceback.print_exc() | ||
return {"error": f"General Error: {str(e)}"}, 500 | ||
|
||
def __replace_data__(self): | ||
from src import app | ||
app.logger.debug("==> EditRefV2::__replace_data__") | ||
|
||
self.replaced_data = self.job.source.replace(self.job.target, self.job.replace) | ||
|
||
|
Oops, something went wrong.