Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[BugFix] Intrinio News #6336

Merged
merged 7 commits into from
Apr 24, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 20 additions & 2 deletions openbb_platform/extensions/news/integration/test_news_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,15 @@ def headers():
"limit": 20,
"start_date": None,
"end_date": None,
"source": "yahoo",
"topic": None,
"is_spam": False,
"sentiment": None,
"language": None,
"word_count_greater_than": None,
"word_count_less_than": None,
"business_relevance_greater_than": None,
"business_relevance_less_than": None,
}
),
(
Expand Down Expand Up @@ -164,8 +173,17 @@ def test_news_world(params, headers):
"provider": "intrinio",
"symbol": "AAPL",
"limit": 20,
"start_date": None,
"end_date": None,
"start_date": "2024-01-02",
"end_date": "2024-01-03",
"source": "yahoo",
"topic": None,
"is_spam": False,
"sentiment": None,
"language": None,
"word_count_greater_than": None,
"word_count_less_than": None,
"business_relevance_greater_than": None,
"business_relevance_less_than": None,
}
),
(
Expand Down
26 changes: 22 additions & 4 deletions openbb_platform/extensions/news/integration/test_news_python.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,8 +53,17 @@ def obb(pytestconfig): # pylint: disable=inconsistent-return-statements
{
"provider": "intrinio",
"limit": 20,
"start_date": None,
"end_date": None,
"start_date": "2024-01-02",
"end_date": "2024-01-03",
"source": "yahoo",
"topic": None,
"is_spam": False,
"sentiment": None,
"language": None,
"word_count_greater_than": None,
"word_count_less_than": None,
"business_relevance_greater_than": None,
"business_relevance_less_than": None,
}
),
(
Expand Down Expand Up @@ -146,8 +155,17 @@ def test_news_world(params, obb):
"provider": "intrinio",
"symbol": "AAPL",
"limit": 20,
"start_date": None,
"end_date": None,
"start_date": "2024-01-02",
"end_date": "2024-01-03",
"source": "yahoo",
"topic": None,
"is_spam": False,
"sentiment": None,
"language": None,
"word_count_greater_than": None,
"word_count_less_than": None,
"business_relevance_greater_than": None,
"business_relevance_less_than": None,
}
),
(
Expand Down
Original file line number Diff line number Diff line change
@@ -1,19 +1,21 @@
"""Intrinio Company News Model."""

import asyncio
from datetime import datetime
from typing import Any, Dict, List, Optional
from typing import Any, Dict, List, Literal, Optional, Union

from openbb_core.provider.abstract.fetcher import Fetcher
from openbb_core.provider.standard_models.company_news import (
CompanyNewsData,
CompanyNewsQueryParams,
)
from openbb_core.provider.utils.errors import EmptyDataError
from openbb_core.provider.utils.helpers import (
ClientResponse,
amake_requests,
filter_by_dates,
amake_request,
get_querystring,
)
from openbb_intrinio.utils.helpers import get_data
from openbb_intrinio.utils.references import IntrinioSecurity
from pydantic import Field, field_validator


Expand All @@ -23,26 +25,133 @@ class IntrinioCompanyNewsQueryParams(CompanyNewsQueryParams):
Source: https://docs.intrinio.com/documentation/web_api/get_company_news_v2
"""

__alias_dict__ = {"symbol": "symbols", "limit": "page_size"}
__alias_dict__ = {
"limit": "page_size",
"source": "specific_source",
}
__json_schema_extra__ = {"symbol": ["multiple_items_allowed"]}

source: Optional[
Literal["yahoo", "moody", "moody_us_news", "moody_us_press_releases"]
] = Field(
default=None,
description="The source of the news article.",
)
sentiment: Union[None, Literal["positive", "neutral", "negative"]] = Field(
default=None,
description="Return news only from this source.",
)
language: Optional[str] = Field(
default=None,
description="Filter by language. Unsupported for yahoo source.",
)
topic: Optional[str] = Field(
default=None,
description="Filter by topic. Unsupported for yahoo source.",
)
word_count_greater_than: Optional[int] = Field(
default=None,
description="News stories will have a word count greater than this value."
+ " Unsupported for yahoo source.",
)
word_count_less_than: Optional[int] = Field(
default=None,
description="News stories will have a word count less than this value."
+ " Unsupported for yahoo source.",
)
is_spam: Optional[bool] = Field(
default=None,
description="Filter whether it is marked as spam or not."
+ " Unsupported for yahoo source.",
)
business_relevance_greater_than: Optional[float] = Field(
default=None,
description="News stories will have a business relevance score more than this value."
+ " Unsupported for yahoo source.",
)
business_relevance_less_than: Optional[float] = Field(
default=None,
description="News stories will have a business relevance score less than this value."
+ " Unsupported for yahoo source.",
)


class IntrinioCompanyNewsData(CompanyNewsData):
"""Intrinio Company News Data."""

__alias_dict__ = {
"symbols": "symbol",
"date": "publication_date",
"text": "summary",
"sentiment": "article_sentiment",
"sentiment_confidence": "article_sentiment_confidence",
"symbols": "symbol",
}

source: Optional[str] = Field(
default=None,
description="The source of the news article.",
)
summary: Optional[str] = Field(
default=None,
description="The summary of the news article.",
)
topics: Optional[str] = Field(
default=None,
description="The topics related to the news article.",
)
word_count: Optional[int] = Field(
default=None,
description="The word count of the news article.",
)
business_relevance: Optional[float] = Field(
default=None,
description=" How strongly correlated the news article is to the business",
)
sentiment: Optional[str] = Field(
default=None,
description="The sentiment of the news article - i.e, negative, positive.",
)
sentiment_confidence: Optional[float] = Field(
default=None,
description="The confidence score of the sentiment rating.",
)
language: Optional[str] = Field(
default=None,
description="The language of the news article.",
)
spam: Optional[bool] = Field(
default=None,
description="Whether the news article is spam.",
)
copyright: Optional[str] = Field(
default=None,
description="The copyright notice of the news article.",
)
id: str = Field(description="Article ID.")
security: Optional[IntrinioSecurity] = Field(
default=None,
description="The Intrinio Security object. Contains the security details related to the news article.",
)

@field_validator("publication_date", mode="before", check_fields=False)
def date_validate(cls, v): # pylint: disable=E0213
@classmethod
def date_validate(cls, v):
"""Return the date as a datetime object."""
return datetime.strptime(v, "%Y-%m-%dT%H:%M:%S.000Z")

@field_validator("topics", mode="before", check_fields=False)
@classmethod
def topics_validate(cls, v):
""" "Parse the topics as a string."""
if v:
topics = [t.get("name") for t in v if t and t not in ["", " "]]
return ", ".join(topics)
return None

@field_validator("copyright", mode="before", check_fields=False)
@classmethod
def copyright_validate(cls, v):
"""Clean empty strings"""
return None if v in ["", " "] else v


class IntrinioCompanyNewsFetcher(
Fetcher[
Expand All @@ -67,34 +176,75 @@ async def aextract_data(
api_key = credentials.get("intrinio_api_key") if credentials else ""

base_url = "https://api-v2.intrinio.com/companies"
query_str = get_querystring(
query.model_dump(by_alias=True), ["symbols", "page"]
ignore = (
["symbol", "page_size", "is_spam"]
if not query.source or query.source == "yahoo"
else ["symbol", "page_size"]
)

async def callback(response: ClientResponse, _: Any) -> List[Dict]:
"""Return the response."""
if response.status != 200:
return []

query_str = get_querystring(query.model_dump(by_alias=True), ignore)
symbols = query.symbol.split(",")
news: List = []

async def callback(response, session):
"""Response callback."""
result = await response.json()
if "error" in result:
raise RuntimeError(f"Intrinio Error Message -> {result['error']}")
symbol = response.url.parts[-2]
data = await response.json()

if isinstance(data, dict):
return [{**d, "symbol": symbol} for d in data.get("news", [])]
return []

urls = [
f"{base_url}/{symbol}/news?{query_str}&api_key={api_key}"
for symbol in [s.strip() for s in getattr(query, "symbol", "").split(",")]
]

return await amake_requests(urls, callback, **kwargs)
_data = result.get("news", [])
data = []
data.extend([{"symbol": symbol, **d} for d in _data])
articles = len(data)
next_page = result.get("next_page")
while next_page and query.limit > articles:
url = f"{base_url}/{symbol}/news?{query_str}&api_key={api_key}&next_page={next_page}"
result = await get_data(url, session=session, **kwargs)
_data = result.get("news", [])
if _data:
data.extend([{"symbol": symbol, **d} for d in _data])
articles = len(data)
next_page = result.get("next_page")
# Remove duplicates based on URL
return data

seen = set()

async def get_one(symbol):
"""Get the data for one symbol."""
# TODO: Change page_size to a more appropriate value when Intrinio fixes the bug in this param.
url = f"{base_url}/{symbol}/news?{query_str}&page_size=99&api_key={api_key}"
data = await amake_request(url, response_callback=callback, **kwargs)
if data:
data = [x for x in data if not (x["url"] in seen or seen.add(x["url"]))] # type: ignore
news.extend(
sorted(data, key=lambda x: x["publication_date"], reverse=True)[
: query.limit
]
)

tasks = [get_one(symbol) for symbol in symbols]

await asyncio.gather(*tasks)

if not news:
raise EmptyDataError("Error: The request was returned as empty.")

return news

# pylint: disable=unused-argument
@staticmethod
def transform_data(
query: IntrinioCompanyNewsQueryParams, data: List[Dict], **kwargs: Any
) -> List[IntrinioCompanyNewsData]:
"""Return the transformed data."""
modeled_data = [IntrinioCompanyNewsData.model_validate(d) for d in data]
return filter_by_dates(modeled_data, query.start_date, query.end_date)
results: List[IntrinioCompanyNewsData] = []
for item in data:
body = item.get("body", {})
if not body:
item["text"] = item.pop("summary")
if body:
_ = item.pop("body")
item["publication_date"] = body.get("publication_date", None)
item["text"] = body.get("body", None)
results.append(IntrinioCompanyNewsData.model_validate(item))
return results
Loading
Loading