-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
0 parents
commit b5c8076
Showing
62 changed files
with
136,963 additions
and
0 deletions.
There are no files selected for viewing
Binary file not shown.
Large diffs are not rendered by default.
Oops, something went wrong.
Large diffs are not rendered by default.
Oops, something went wrong.
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,111 @@ | ||
import pandas | ||
from normalize import normalize | ||
import sys | ||
|
||
# Default parameters | ||
type_ = "min-max" | ||
q_lower = None | ||
q_upper = None | ||
ref = None | ||
A = None | ||
b = None | ||
|
||
# User parameters | ||
for i in range(len(sys.argv)): | ||
if sys.argv[i] == "-type": | ||
type_ = sys.argv[i+1] | ||
if sys.argv[i] == "-q_lower": | ||
q_lower = float(sys.argv[i+1]) | ||
if sys.argv[i] == "-q_upper": | ||
q_upper = float(sys.argv[i+1]) | ||
if sys.argv[i] == "-ref": | ||
ref = float(sys.argv[i+1]) | ||
if sys.argv[i] == "-input": | ||
input_path = sys.argv[i+1] | ||
if sys.argv[i] == "-output": | ||
output_path = sys.argv[i+1] | ||
if sys.argv[i] == "-A": | ||
A = float(sys.argv[i+1]) | ||
if sys.argv[i] == "-b": | ||
b = float(sys.argv[i+1]) | ||
|
||
# test | ||
# input_path = r"E:\Machine_learning\task4\AUDNZDpro240.csv" | ||
# output_path = r"E:\Machine_learning\task4\test2.csv" | ||
|
||
# Sanity check | ||
if input_path == None or output_path == None: | ||
sys.exit() | ||
|
||
# import csv file as data frame | ||
df = pandas.read_csv(input_path) | ||
colnames = list(df.columns.values) | ||
|
||
# convert data frame to a list of lists | ||
list_ = df[colnames].values.tolist() | ||
ls = [] | ||
for i in range(7): | ||
ls.append([]) | ||
for i in range(7): | ||
for j in range(len(list_)): | ||
ls[i].append(list_[j][i]) | ||
|
||
# Convert list to numberic values | ||
|
||
if "Time" in colnames: | ||
for i in range(2, 7): | ||
ls[i] = list(map(float, ls[i])) | ||
else: | ||
for i in range(1, 7): | ||
ls[i] = list(map(float, ls[i])) | ||
|
||
# create output list | ||
output = [] | ||
if "Time" not in colnames: | ||
for i in range(5): | ||
output.append(ls[i]) | ||
normed_volume = [] # normalize volume column in list form | ||
normed_volume.append(0) | ||
for i in range(1, len(ls[5])): | ||
normed_volume.append( (ls[5][i] - ls[5][i-1]) / ls[5][i-1] ) | ||
output.append(normed_volume) | ||
output.append(ls[6]) | ||
else: | ||
for i in range(6): | ||
output.append(ls[i]) | ||
normed_volume = [] # normalize volume column in list form | ||
normed_volume.append(0) | ||
for i in range(1, len(ls[6])): | ||
normed_volume.append( (ls[6][i] - ls[6][i-1]) / ls[6][i-1] ) | ||
output.append(normed_volume) | ||
|
||
# convert back to data frame | ||
output_df = pandas.DataFrame() | ||
for i in range(len(colnames)): | ||
output_df[colnames[i]] = output[i] | ||
|
||
# If A and b were not given, save x1 and x2 to calculate them | ||
if A == None and b == None: | ||
x1 = output_df["Open"][0] | ||
x2 = output_df["Open"][1] | ||
|
||
# normalize output | ||
if "Time" in colnames: | ||
normcols = ["Open", "High", "Low", "Close"] | ||
else: | ||
normcols = ['Open', 'High', 'Low', 'Close', 'Adj Close'] | ||
|
||
output_df[normcols] = normalize(output_df[normcols], type_=type_, q_lower=q_lower, q_upper=q_upper, ref=ref, A=A, b=b) | ||
|
||
# Calculate A and b if not given | ||
if A == None and b == None: | ||
y1 = output_df["Open"][0] | ||
y2 = output_df["Open"][1] | ||
A = (y1-y2) / (x1 - x2) | ||
b = y1 - A*x1 | ||
print("A is {A}, and b is {b}".format(A=A, b=b)) | ||
|
||
|
||
# export data frame as csv file | ||
output_df.to_csv(output_path, index=False) | ||
|
Large diffs are not rendered by default.
Oops, something went wrong.
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,37 @@ | ||
import numpy as np | ||
import pandas | ||
|
||
def normalize(data, type_ = None, q_lower = None, q_upper = None, ref = None, A = None, b = None): | ||
""" | ||
Normalization function | ||
Can be of type: | ||
- min-max: requires no arguments | ||
- quantile: requires a lower(q_lower) and upper(q_upper) quantile | ||
- ref: requires a reference value(ref) | ||
If no type is given, it returns a list normalized to the previous value | ||
""" | ||
|
||
normed_df = pandas.DataFrame() | ||
|
||
if type_ == "min-max": | ||
x_min = data.values.min() | ||
x_max = data.values.max() | ||
normed_df = (data - x_min) / (x_max - x_min) | ||
|
||
elif type_ == "quantile": | ||
x_lower = np.percentile(data, q_lower * 100) | ||
x_upper = np.percentile(data, q_upper * 100) | ||
normed_df = (data - x_lower) / (x_upper- x_lower) | ||
|
||
elif type_ == "ref": | ||
normed_df = (data - ref) / ref | ||
|
||
elif type_ == "Ab": | ||
normed_df = A * data + b | ||
|
||
|
||
return normed_df |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,19 @@ | ||
How to use: | ||
- Required arguments: | ||
-input : input csv path | ||
-output : output csv path | ||
- Optional arguments: | ||
-type: "min-max", "quantile", "ref", "Ab" | ||
if no -type argument is given, it uses "min-max" normalization as standard | ||
-q_lower: lower quantile for type "quantile" normalization between 0 and 1 | ||
-q_upper: upper quantile for type "quantile" normalization between 0 and 1 | ||
-ref: reference value for type "ref" normalization | ||
-A : A value for type "Ab" normalization | ||
-b : b value for type "Ab" normalization | ||
|
||
Example command line arguments: | ||
application.py -input "E:\Machine_learning\task2\table.csv" -output "E:\Machine_learning\task2\test.csv" | ||
application.py -input "E:\Machine_learning\task2\table.csv" -output "E:\Machine_learning\task2\test.csv" -type min-max # does the same as the line before | ||
application.py -input "E:\Machine_learning\task2\table.csv" -output "E:\Machine_learning\task2\test.csv" -type quantile -q_lower 0.25 -q_upper 0.75 | ||
application.py -input "E:\Machine_learning\task2\table.csv" -output "E:\Machine_learning\task2\test.csv" -type ref -ref 80 | ||
application.py -input "E:\Machine_learning\task2\table.csv" -output "E:\Machine_learning\task2\test.csv" -type Ab -A 0.017314638441174052 -b -0.8138815577266735 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
cd "C:\Users\edward\Documents\Pair Trading\FX\normalize" | ||
python application.py -input AUDJPYpro240.csv -output nA.csv -type quantile -q_lower 0.13 -q_upper 0.87 | ||
python application.py -input AUDNZDpro240.csv -output nB.csv -type quantile -q_lower 0.13 -q_upper 0.87 | ||
@pause |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,113 @@ | ||
import math | ||
import sys | ||
|
||
import numpy as np | ||
import pandas as pd | ||
|
||
# variables for test purposes | ||
# input1 = r"E:\Machine_learning\task4\test.csv" | ||
# input2 = r"E:\Machine_learning\task4\test2.csv" | ||
# output = r"E:\Machine_learning\task4\test_result.csv" | ||
# minDX = 0.01 | ||
# minDT = 5 | ||
|
||
# User parameters | ||
for i in range(len(sys.argv)): | ||
if sys.argv[i] == "-input1": | ||
input1 = sys.argv[i+1] | ||
if sys.argv[i] == "-input2": | ||
input2 = (sys.argv[i+1]) | ||
if sys.argv[i] == "-output": | ||
output = (sys.argv[i+1]) | ||
if sys.argv[i] == "-minDX": | ||
minDX = float(sys.argv[i+1]) | ||
if sys.argv[i] == "-minDT": | ||
minDT = int(sys.argv[i+1]) | ||
|
||
# Read input csv files and concatenate them | ||
df1 = pd.read_csv(input1) | ||
df2 = pd.read_csv(input2) | ||
df_merged = pd.concat([df1, df2], axis=1) | ||
|
||
# Drop the second date column | ||
if "Adj Close" in list(df_merged.columns.values): | ||
df_merged.columns = ['Date', 'Open1', 'High1', 'Low1', 'Close1', 'Volume1', 'Adj Close1', | ||
'remove', 'Open2', 'High2', 'Low2', 'Close2', 'Volume2', 'Adj Close2'] | ||
else: | ||
df_merged.columns = ['Date', 'Time1', 'Open1', 'High1', 'Low1', 'Close1', 'Volume1', | ||
'remove', 'Time2', 'Open2', 'High2', 'Low2', 'Close2', 'Volume2'] | ||
df_merged = df_merged.drop('remove', axis=1) | ||
|
||
# Compute the difference between Close1 and Close2 | ||
diff = df_merged["Close1"] - df_merged["Close2"] | ||
length = len(diff) | ||
|
||
# Initialize list for signal and intersection points | ||
signal = [None] * length | ||
inter_points = [] | ||
|
||
# Find intersection points and set their signal to 0 | ||
for i in range(length): | ||
if abs(diff[i]) < minDX: | ||
signal[i] = 0 | ||
inter_points.append(i) | ||
|
||
# Set signals within minDT buffer from intersection points to 0. Check for intersections at the ends | ||
for point in inter_points: | ||
if minDT >= point and point < length - minDT: | ||
for i in range(point + minDT + 1): | ||
signal[i] = 0 | ||
if minDT >= point >= length - minDT: | ||
for i in range(length): | ||
signal[i] = 0 | ||
if minDT < point < length - minDT: | ||
for i in range(point - minDT, point + minDT + 1): | ||
signal[i] = 0 | ||
if minDT < point and point >= length - minDT: | ||
for i in range(point - minDT, length): | ||
signal[i] = 0 | ||
|
||
# check for cases where the diff function jumps over 0 | ||
for i in range(length - 1): | ||
if (diff[i] < -minDX and diff[i+1] > minDX) or (diff[i] > -minDX and diff[i+1] < minDX): | ||
inter_points.append(i + 0.5) | ||
inter_points.sort() | ||
|
||
# Calculate interval minimums and maximums | ||
interval_min_diff = [] | ||
interval_max_diff = [] | ||
for i in range(len(inter_points) - 1): | ||
interval_min_diff.append(np.min(diff[math.ceil(inter_points[i]): math.floor(inter_points[i+1]):])) | ||
interval_max_diff.append(np.max(diff[math.ceil(inter_points[i]): math.floor(inter_points[i+1]):])) | ||
|
||
# Check whether diff function on intervals with non-zero values is positive or negative | ||
# for i in range(len(inter_points)-1): | ||
# if math.ceil(inter_points[i+1]) - math.floor(inter_points[i]) > 2 * minDT + 2: | ||
# if (diff[math.floor(inter_points[i])] + diff[math.ceil(inter_points[i+1])]) // 2 > 0: | ||
# interval_min_diff[i] = 0 | ||
# elif (diff[math.floor(inter_points[i])] + diff[math.ceil(inter_points[i+1])]) // 2 < 0: | ||
# interval_max_diff[i] = 0 | ||
|
||
# Calculate signals out of buffer range | ||
for i in range(length): | ||
for j in range(len(inter_points) - 1): | ||
if signal[i] is None and math.ceil(inter_points[j]) < i < math.floor(inter_points[j + 1]): | ||
if interval_max_diff[j] > minDX: | ||
signal[i] = diff[i] / interval_max_diff[j] | ||
elif interval_min_diff[j] < -minDX: | ||
signal[i] = - diff[i] / interval_min_diff[j] | ||
else: | ||
signal[i] = 0 | ||
|
||
# Set signals before first intersection to 5, the ones after the last to -5 | ||
for i in range(math.ceil(inter_points[0])): | ||
signal[i] = 5 | ||
for i in range(math.ceil(inter_points[len(inter_points) - 1] + 1), length): | ||
signal[i] = -5 | ||
|
||
# Concatenate signal column to the data frame | ||
signal_df = pd.Series(signal) | ||
df_merged['Signal'] = signal_df.values | ||
|
||
# export data frame as csv file | ||
df_merged.to_csv(output, index=False) |
Oops, something went wrong.