Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Migrate backtest logic from NT #1263

Merged
merged 16 commits into from
Sep 19, 2022
8 changes: 5 additions & 3 deletions qlib/backtest/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,7 @@ def get_exchange(
def create_account_instance(
start_time: Union[pd.Timestamp, str],
end_time: Union[pd.Timestamp, str],
benchmark: str,
benchmark: Optional[str],
account: Union[float, int, dict],
pos_type: str = "Position",
) -> Account:
Expand Down Expand Up @@ -163,7 +163,9 @@ def create_account_instance(
init_cash=init_cash,
position_dict=position_dict,
pos_type=pos_type,
benchmark_config={
benchmark_config={}
if benchmark is None
else {
"benchmark": benchmark,
"start_time": start_time,
"end_time": end_time,
Expand All @@ -176,7 +178,7 @@ def get_strategy_executor(
end_time: Union[pd.Timestamp, str],
strategy: Union[str, dict, object, Path],
executor: Union[str, dict, object, Path],
benchmark: str = "SH000300",
benchmark: Optional[str] = "SH000300",
account: Union[float, int, dict] = 1e9,
exchange_kwargs: dict = {},
pos_type: str = "Position",
Expand Down
4 changes: 2 additions & 2 deletions qlib/rl/data/exchange_wrapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,12 @@

import cachetools
import pandas as pd

from qlib.backtest import Exchange, Order
from qlib.backtest.decision import TradeRange, TradeRangeByTime
from qlib.constant import ONE_DAY, EPS_T
from qlib.constant import EPS_T, ONE_DAY
from qlib.rl.order_execution.utils import get_ticks_slice
from qlib.utils.index_data import IndexData

from .pickle_styled import BaseIntradayBacktestData


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ def __init__(

@cachetools.cached( # type: ignore
cache=cachetools.LRUCache(100),
key=lambda stock_id, date, backtest: (stock_id, date.replace(hour=0, minute=0, second=0), backtest),
key=lambda _, stock_id, date, backtest: (stock_id, date.replace(hour=0, minute=0, second=0), backtest),
)
def get(self, stock_id: str, date: pd.Timestamp, backtest: bool = False) -> pd.DataFrame:
start_time, end_time = date.replace(hour=0, minute=0, second=0), date.replace(hour=23, minute=59, second=59)
Expand Down
40 changes: 35 additions & 5 deletions qlib/rl/data/pickle_styled.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,14 +22,15 @@
from abc import abstractmethod
from functools import lru_cache
from pathlib import Path
from typing import List, Sequence, cast
from typing import List, Optional, Sequence, cast

import cachetools
import numpy as np
import pandas as pd
from cachetools.keys import hashkey

from qlib.backtest.decision import Order, OrderDir
from qlib.rl.data.integration import fetch_features
from qlib.typehint import Literal

DealPriceType = Literal["bid_or_ask", "bid_or_ask_fill", "close"]
Expand Down Expand Up @@ -178,7 +179,7 @@ def get_time_index(self) -> pd.DatetimeIndex:
return cast(pd.DatetimeIndex, self.data.index)


class IntradayProcessedData:
class BaseIntradayProcessedData:
"""Processed market data after data cleanup and feature engineering.

It contains both processed data for "today" and "yesterday", as some algorithms
Expand All @@ -193,6 +194,10 @@ class IntradayProcessedData:
"""Processed data for "yesterday".
Number of records must be ``time_length``, and columns must be ``feature_dim``."""


class IntradayProcessedData(BaseIntradayProcessedData):
"""Subclass of IntradayProcessedData. Used to handle Dataset Handler style data."""

def __init__(
self,
data_dir: Path,
Expand Down Expand Up @@ -233,6 +238,25 @@ def __repr__(self) -> str:
return f"{self.__class__.__name__}({self.today}, {self.yesterday})"


class NTIntradayProcessedData(BaseIntradayProcessedData):
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this should be put into another file, because it's not pickle_styled. See the headers of this file.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I am not sure how should we re-organize related structures. If we need to move NTIntradayProcessedData to a separate file, where should we put NTIntradayProcessedData and load_intraday_processed_data()?

"""Subclass of IntradayProcessedData. Used to handle NT style data."""

def __init__(
self,
stock_id: str,
date: pd.Timestamp,
) -> None:
def _drop_stock_id(df: pd.DataFrame) -> pd.DataFrame:
return df.reset_index().drop(columns=["instrument"]).set_index(["datetime"])

self.today = _drop_stock_id(fetch_features(stock_id, date))
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think it will be better to add more docs about the reason that today and yesterday are different with its base IntradayProcessedData.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

NTIntradayProcessedData's base is BaseIntradayProcessedData, not IntradayProcessedData. Both NTIntradayProcessedData and IntradayProcessedData are implementations of BaseIntradayProcessedData and that is why they have different data formats.

self.yesterday = _drop_stock_id(fetch_features(stock_id, date, yesterday=True))

def __repr__(self) -> str:
with pd.option_context("memory_usage", False, "display.max_info_columns", 1, "display.large_repr", "info"):
return f"{self.__class__.__name__}({self.today}, {self.yesterday})"


@lru_cache(maxsize=100) # 100 * 50K = 5MB
def load_simple_intraday_backtest_data(
data_dir: Path,
Expand All @@ -249,13 +273,19 @@ def load_simple_intraday_backtest_data(
key=lambda data_dir, stock_id, date, _, __: hashkey(data_dir, stock_id, date),
)
def load_intraday_processed_data(
data_dir: Path,
data_dir: Optional[Path],
stock_id: str,
date: pd.Timestamp,
feature_dim: int,
time_index: pd.Index,
) -> IntradayProcessedData:
return IntradayProcessedData(data_dir, stock_id, date, feature_dim, time_index)
) -> BaseIntradayProcessedData:
from qlib.rl.data.integration import dataset # pylint: disable=C0415

if dataset is None:
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Same here. This should be separated.

assert data_dir is not None
return IntradayProcessedData(data_dir, stock_id, date, feature_dim, time_index)
else:
return NTIntradayProcessedData(stock_id, date)


def load_orders(
Expand Down
8 changes: 4 additions & 4 deletions qlib/rl/interpreter.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

from __future__ import annotations

from typing import TYPE_CHECKING, Any, Generic, Optional, TypeVar
from typing import TYPE_CHECKING, Any, Generic, TypeVar, Union

import numpy as np

Expand All @@ -12,7 +12,7 @@
from .simulator import ActType, StateType

if TYPE_CHECKING:
from .utils.env_wrapper import EnvWrapper
from .utils.env_wrapper import CollectDataEnvWrapper, EnvWrapper

import gym
from gym import spaces
Expand Down Expand Up @@ -40,7 +40,7 @@ class Interpreter:
class StateInterpreter(Generic[StateType, ObsType], Interpreter):
"""State Interpreter that interpret execution result of qlib executor into rl env state"""

env: Optional[EnvWrapper] = None
env: Union[EnvWrapper, CollectDataEnvWrapper, None] = None

@property
def observation_space(self) -> gym.Space:
Expand Down Expand Up @@ -74,7 +74,7 @@ def interpret(self, simulator_state: StateType) -> ObsType:
class ActionInterpreter(Generic[StateType, PolicyActType, ActType], Interpreter):
"""Action Interpreter that interpret rl agent action into qlib orders"""

env: Optional[EnvWrapper] = None
env: Union[EnvWrapper, CollectDataEnvWrapper, None] = None

@property
def action_space(self) -> gym.Space:
Expand Down
Loading