Skip to content

Commit

Permalink
first commit
Browse files Browse the repository at this point in the history
  • Loading branch information
algomaschine committed Jan 20, 2019
0 parents commit b5c8076
Show file tree
Hide file tree
Showing 62 changed files with 136,963 additions and 0 deletions.
Binary file added 0608.zip
Binary file not shown.
3,870 changes: 3,870 additions & 0 deletions 1 normalize/AUDJPYpro240.csv

Large diffs are not rendered by default.

3,870 changes: 3,870 additions & 0 deletions 1 normalize/AUDNZDpro240.csv

Large diffs are not rendered by default.

Binary file added 1 normalize/__pycache__/normalize.cpython-36.pyc
Binary file not shown.
111 changes: 111 additions & 0 deletions 1 normalize/application.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,111 @@
import pandas
from normalize import normalize
import sys

# Default parameters
type_ = "min-max"
q_lower = None
q_upper = None
ref = None
A = None
b = None

# User parameters
for i in range(len(sys.argv)):
if sys.argv[i] == "-type":
type_ = sys.argv[i+1]
if sys.argv[i] == "-q_lower":
q_lower = float(sys.argv[i+1])
if sys.argv[i] == "-q_upper":
q_upper = float(sys.argv[i+1])
if sys.argv[i] == "-ref":
ref = float(sys.argv[i+1])
if sys.argv[i] == "-input":
input_path = sys.argv[i+1]
if sys.argv[i] == "-output":
output_path = sys.argv[i+1]
if sys.argv[i] == "-A":
A = float(sys.argv[i+1])
if sys.argv[i] == "-b":
b = float(sys.argv[i+1])

# test
# input_path = r"E:\Machine_learning\task4\AUDNZDpro240.csv"
# output_path = r"E:\Machine_learning\task4\test2.csv"

# Sanity check
if input_path == None or output_path == None:
sys.exit()

# import csv file as data frame
df = pandas.read_csv(input_path)
colnames = list(df.columns.values)

# convert data frame to a list of lists
list_ = df[colnames].values.tolist()
ls = []
for i in range(7):
ls.append([])
for i in range(7):
for j in range(len(list_)):
ls[i].append(list_[j][i])

# Convert list to numberic values

if "Time" in colnames:
for i in range(2, 7):
ls[i] = list(map(float, ls[i]))
else:
for i in range(1, 7):
ls[i] = list(map(float, ls[i]))

# create output list
output = []
if "Time" not in colnames:
for i in range(5):
output.append(ls[i])
normed_volume = [] # normalize volume column in list form
normed_volume.append(0)
for i in range(1, len(ls[5])):
normed_volume.append( (ls[5][i] - ls[5][i-1]) / ls[5][i-1] )
output.append(normed_volume)
output.append(ls[6])
else:
for i in range(6):
output.append(ls[i])
normed_volume = [] # normalize volume column in list form
normed_volume.append(0)
for i in range(1, len(ls[6])):
normed_volume.append( (ls[6][i] - ls[6][i-1]) / ls[6][i-1] )
output.append(normed_volume)

# convert back to data frame
output_df = pandas.DataFrame()
for i in range(len(colnames)):
output_df[colnames[i]] = output[i]

# If A and b were not given, save x1 and x2 to calculate them
if A == None and b == None:
x1 = output_df["Open"][0]
x2 = output_df["Open"][1]

# normalize output
if "Time" in colnames:
normcols = ["Open", "High", "Low", "Close"]
else:
normcols = ['Open', 'High', 'Low', 'Close', 'Adj Close']

output_df[normcols] = normalize(output_df[normcols], type_=type_, q_lower=q_lower, q_upper=q_upper, ref=ref, A=A, b=b)

# Calculate A and b if not given
if A == None and b == None:
y1 = output_df["Open"][0]
y2 = output_df["Open"][1]
A = (y1-y2) / (x1 - x2)
b = y1 - A*x1
print("A is {A}, and b is {b}".format(A=A, b=b))


# export data frame as csv file
output_df.to_csv(output_path, index=False)

3,870 changes: 3,870 additions & 0 deletions 1 normalize/nA.csv

Large diffs are not rendered by default.

3,870 changes: 3,870 additions & 0 deletions 1 normalize/nB.csv

Large diffs are not rendered by default.

37 changes: 37 additions & 0 deletions 1 normalize/normalize.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
import numpy as np
import pandas

def normalize(data, type_ = None, q_lower = None, q_upper = None, ref = None, A = None, b = None):
"""
Normalization function
Can be of type:
- min-max: requires no arguments
- quantile: requires a lower(q_lower) and upper(q_upper) quantile
- ref: requires a reference value(ref)
If no type is given, it returns a list normalized to the previous value
"""

normed_df = pandas.DataFrame()

if type_ == "min-max":
x_min = data.values.min()
x_max = data.values.max()
normed_df = (data - x_min) / (x_max - x_min)

elif type_ == "quantile":
x_lower = np.percentile(data, q_lower * 100)
x_upper = np.percentile(data, q_upper * 100)
normed_df = (data - x_lower) / (x_upper- x_lower)

elif type_ == "ref":
normed_df = (data - ref) / ref

elif type_ == "Ab":
normed_df = A * data + b


return normed_df
19 changes: 19 additions & 0 deletions 1 normalize/readme.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
How to use:
- Required arguments:
-input : input csv path
-output : output csv path
- Optional arguments:
-type: "min-max", "quantile", "ref", "Ab"
if no -type argument is given, it uses "min-max" normalization as standard
-q_lower: lower quantile for type "quantile" normalization between 0 and 1
-q_upper: upper quantile for type "quantile" normalization between 0 and 1
-ref: reference value for type "ref" normalization
-A : A value for type "Ab" normalization
-b : b value for type "Ab" normalization

Example command line arguments:
application.py -input "E:\Machine_learning\task2\table.csv" -output "E:\Machine_learning\task2\test.csv"
application.py -input "E:\Machine_learning\task2\table.csv" -output "E:\Machine_learning\task2\test.csv" -type min-max # does the same as the line before
application.py -input "E:\Machine_learning\task2\table.csv" -output "E:\Machine_learning\task2\test.csv" -type quantile -q_lower 0.25 -q_upper 0.75
application.py -input "E:\Machine_learning\task2\table.csv" -output "E:\Machine_learning\task2\test.csv" -type ref -ref 80
application.py -input "E:\Machine_learning\task2\table.csv" -output "E:\Machine_learning\task2\test.csv" -type Ab -A 0.017314638441174052 -b -0.8138815577266735
4 changes: 4 additions & 0 deletions 1 normalize/run-norm.bat
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
cd "C:\Users\edward\Documents\Pair Trading\FX\normalize"
python application.py -input AUDJPYpro240.csv -output nA.csv -type quantile -q_lower 0.13 -q_upper 0.87
python application.py -input AUDNZDpro240.csv -output nB.csv -type quantile -q_lower 0.13 -q_upper 0.87
@pause
113 changes: 113 additions & 0 deletions 2 signals (3)/application.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,113 @@
import math
import sys

import numpy as np
import pandas as pd

# variables for test purposes
# input1 = r"E:\Machine_learning\task4\test.csv"
# input2 = r"E:\Machine_learning\task4\test2.csv"
# output = r"E:\Machine_learning\task4\test_result.csv"
# minDX = 0.01
# minDT = 5

# User parameters
for i in range(len(sys.argv)):
if sys.argv[i] == "-input1":
input1 = sys.argv[i+1]
if sys.argv[i] == "-input2":
input2 = (sys.argv[i+1])
if sys.argv[i] == "-output":
output = (sys.argv[i+1])
if sys.argv[i] == "-minDX":
minDX = float(sys.argv[i+1])
if sys.argv[i] == "-minDT":
minDT = int(sys.argv[i+1])

# Read input csv files and concatenate them
df1 = pd.read_csv(input1)
df2 = pd.read_csv(input2)
df_merged = pd.concat([df1, df2], axis=1)

# Drop the second date column
if "Adj Close" in list(df_merged.columns.values):
df_merged.columns = ['Date', 'Open1', 'High1', 'Low1', 'Close1', 'Volume1', 'Adj Close1',
'remove', 'Open2', 'High2', 'Low2', 'Close2', 'Volume2', 'Adj Close2']
else:
df_merged.columns = ['Date', 'Time1', 'Open1', 'High1', 'Low1', 'Close1', 'Volume1',
'remove', 'Time2', 'Open2', 'High2', 'Low2', 'Close2', 'Volume2']
df_merged = df_merged.drop('remove', axis=1)

# Compute the difference between Close1 and Close2
diff = df_merged["Close1"] - df_merged["Close2"]
length = len(diff)

# Initialize list for signal and intersection points
signal = [None] * length
inter_points = []

# Find intersection points and set their signal to 0
for i in range(length):
if abs(diff[i]) < minDX:
signal[i] = 0
inter_points.append(i)

# Set signals within minDT buffer from intersection points to 0. Check for intersections at the ends
for point in inter_points:
if minDT >= point and point < length - minDT:
for i in range(point + minDT + 1):
signal[i] = 0
if minDT >= point >= length - minDT:
for i in range(length):
signal[i] = 0
if minDT < point < length - minDT:
for i in range(point - minDT, point + minDT + 1):
signal[i] = 0
if minDT < point and point >= length - minDT:
for i in range(point - minDT, length):
signal[i] = 0

# check for cases where the diff function jumps over 0
for i in range(length - 1):
if (diff[i] < -minDX and diff[i+1] > minDX) or (diff[i] > -minDX and diff[i+1] < minDX):
inter_points.append(i + 0.5)
inter_points.sort()

# Calculate interval minimums and maximums
interval_min_diff = []
interval_max_diff = []
for i in range(len(inter_points) - 1):
interval_min_diff.append(np.min(diff[math.ceil(inter_points[i]): math.floor(inter_points[i+1]):]))
interval_max_diff.append(np.max(diff[math.ceil(inter_points[i]): math.floor(inter_points[i+1]):]))

# Check whether diff function on intervals with non-zero values is positive or negative
# for i in range(len(inter_points)-1):
# if math.ceil(inter_points[i+1]) - math.floor(inter_points[i]) > 2 * minDT + 2:
# if (diff[math.floor(inter_points[i])] + diff[math.ceil(inter_points[i+1])]) // 2 > 0:
# interval_min_diff[i] = 0
# elif (diff[math.floor(inter_points[i])] + diff[math.ceil(inter_points[i+1])]) // 2 < 0:
# interval_max_diff[i] = 0

# Calculate signals out of buffer range
for i in range(length):
for j in range(len(inter_points) - 1):
if signal[i] is None and math.ceil(inter_points[j]) < i < math.floor(inter_points[j + 1]):
if interval_max_diff[j] > minDX:
signal[i] = diff[i] / interval_max_diff[j]
elif interval_min_diff[j] < -minDX:
signal[i] = - diff[i] / interval_min_diff[j]
else:
signal[i] = 0

# Set signals before first intersection to 5, the ones after the last to -5
for i in range(math.ceil(inter_points[0])):
signal[i] = 5
for i in range(math.ceil(inter_points[len(inter_points) - 1] + 1), length):
signal[i] = -5

# Concatenate signal column to the data frame
signal_df = pd.Series(signal)
df_merged['Signal'] = signal_df.values

# export data frame as csv file
df_merged.to_csv(output, index=False)
Loading

0 comments on commit b5c8076

Please sign in to comment.