class Predict:
def __init__(self, num_historical_days=20, days=10, pct_change=0, gan_model=f'{googlepath}/deployed_models/gan', cnn_modle=f'{googlepath}/deployed_models/cnn', xgb_model=f'{googlepath}/deployed_models/xgb'):
self.data = []
self.num_historical_days = num_historical_days
self.gan_model = gan_model
self.cnn_modle = cnn_modle
self.xgb_model = xgb_model
files = [f"{googlepath}stock_data/{f}" for f in os.listdir(f"{googlepath}stock_data")]
for file in files:
print(file)
df = pd.read_csv(file, index_col='timestamp', parse_dates=True)
df = df[['open','high','low','close','volume']]
# data for new column labels that will use the pct_change of the closing data.
# pct_change measure change between current and prior element. Map these into a 1x2
# array to show if the pct_change > (our desired threshold) or less than.
df = ((df -
df.rolling(num_historical_days).mean().shift(-num_historical_days))
/(df.rolling(num_historical_days).max().shift(-num_historical_days)
-df.rolling(num_historical_days).min().shift(-num_historical_days)))
df = df.dropna()
self.data.append((file.split('/')[-1], df.iloc[0], df[200:200+num_historical_days].values))
#split the df into arrays of length num_historical_days and append
# to data, i.e. array of df[curr - num_days : curr] -> a batch of values
# appending if price went up or down in curr day of "i" we are lookin
# at
def gan_predict(self):
tf.reset_default_graph()
gan = GAN(num_features=5, num_historical_days=self.num_historical_days, generator_input_size=200, is_train=False)
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
saver = tf.train.Saver()
saver.restore(sess, self.gan_model)
clf = joblib.load(self.xgb_model)
for sym, date, data in self.data:
features = sess.run(gan.features, feed_dict={gan.X:[data]})
features = xgb.DMatrix(features)
print('{} {} {}'.format(str(date).split(' ')[0], sym, clf.predict(features)[0][1] > 0.5))
#predictions = np.array([x for x in gan_estimator.predict(p.gan_predict())])
#print(predictions)
p = Predict()
p.gan_predict()