-
Notifications
You must be signed in to change notification settings - Fork 2.7k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Migrate backtest logic from NT #1263
Changes from 7 commits
e68ffc8
32c2494
d0cdffb
28f88cb
9b5db21
6e09470
0d84962
523571b
c2d4c4d
c9534ff
944ea30
2f7de4c
289ca9e
0716f85
6a35bee
47fe0c0
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -22,14 +22,15 @@ | |
from abc import abstractmethod | ||
from functools import lru_cache | ||
from pathlib import Path | ||
from typing import List, Sequence, cast | ||
from typing import List, Optional, Sequence, cast | ||
|
||
import cachetools | ||
import numpy as np | ||
import pandas as pd | ||
from cachetools.keys import hashkey | ||
|
||
from qlib.backtest.decision import Order, OrderDir | ||
from qlib.rl.data.integration import fetch_features | ||
from qlib.typehint import Literal | ||
|
||
DealPriceType = Literal["bid_or_ask", "bid_or_ask_fill", "close"] | ||
|
@@ -178,7 +179,7 @@ def get_time_index(self) -> pd.DatetimeIndex: | |
return cast(pd.DatetimeIndex, self.data.index) | ||
|
||
|
||
class IntradayProcessedData: | ||
class BaseIntradayProcessedData: | ||
"""Processed market data after data cleanup and feature engineering. | ||
|
||
It contains both processed data for "today" and "yesterday", as some algorithms | ||
|
@@ -193,6 +194,10 @@ class IntradayProcessedData: | |
"""Processed data for "yesterday". | ||
Number of records must be ``time_length``, and columns must be ``feature_dim``.""" | ||
|
||
|
||
class IntradayProcessedData(BaseIntradayProcessedData): | ||
"""Subclass of IntradayProcessedData. Used to handle Dataset Handler style data.""" | ||
|
||
def __init__( | ||
self, | ||
data_dir: Path, | ||
|
@@ -233,6 +238,25 @@ def __repr__(self) -> str: | |
return f"{self.__class__.__name__}({self.today}, {self.yesterday})" | ||
|
||
|
||
class NTIntradayProcessedData(BaseIntradayProcessedData): | ||
"""Subclass of IntradayProcessedData. Used to handle NT style data.""" | ||
|
||
def __init__( | ||
self, | ||
stock_id: str, | ||
date: pd.Timestamp, | ||
) -> None: | ||
def _drop_stock_id(df: pd.DataFrame) -> pd.DataFrame: | ||
return df.reset_index().drop(columns=["instrument"]).set_index(["datetime"]) | ||
|
||
self.today = _drop_stock_id(fetch_features(stock_id, date)) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think it will be better to add more docs about the reason that There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
self.yesterday = _drop_stock_id(fetch_features(stock_id, date, yesterday=True)) | ||
|
||
def __repr__(self) -> str: | ||
with pd.option_context("memory_usage", False, "display.max_info_columns", 1, "display.large_repr", "info"): | ||
return f"{self.__class__.__name__}({self.today}, {self.yesterday})" | ||
|
||
|
||
@lru_cache(maxsize=100) # 100 * 50K = 5MB | ||
def load_simple_intraday_backtest_data( | ||
data_dir: Path, | ||
|
@@ -249,13 +273,19 @@ def load_simple_intraday_backtest_data( | |
key=lambda data_dir, stock_id, date, _, __: hashkey(data_dir, stock_id, date), | ||
) | ||
def load_intraday_processed_data( | ||
data_dir: Path, | ||
data_dir: Optional[Path], | ||
stock_id: str, | ||
date: pd.Timestamp, | ||
feature_dim: int, | ||
time_index: pd.Index, | ||
) -> IntradayProcessedData: | ||
return IntradayProcessedData(data_dir, stock_id, date, feature_dim, time_index) | ||
) -> BaseIntradayProcessedData: | ||
from qlib.rl.data.integration import dataset # pylint: disable=C0415 | ||
|
||
if dataset is None: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Same here. This should be separated. |
||
assert data_dir is not None | ||
return IntradayProcessedData(data_dir, stock_id, date, feature_dim, time_index) | ||
else: | ||
return NTIntradayProcessedData(stock_id, date) | ||
|
||
|
||
def load_orders( | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think this should be put into another file, because it's not
pickle_styled
. See the headers of this file.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I am not sure how should we re-organize related structures. If we need to move
NTIntradayProcessedData
to a separate file, where should we putNTIntradayProcessedData
andload_intraday_processed_data()
?