Jan-18-2021, 10:53 AM
This has an output, not checking accuracy. You forgot to specify delimiter, so pandas assumed a comma. Actual was a tab.
I printed out the data frame, you can remove that statement.
I am missing something in my python install that's needed for running ( lzma module ).
I don't think this will show up on your app:
I printed out the data frame, you can remove that statement.
I am missing something in my python install that's needed for running ( lzma module ).
I don't think this will show up on your app:
# from IPython.core.debugger import set_trace import pandas as pd import numpy as np import os import matplotlib.pyplot as plt import time # Assert starting direstory is script directory os.chdir(os.path.abspath(os.path.dirname(__file__))) plt.style.use(style="seaborn") df = pd.read_csv("LTCUSDHour.csv", sep='\t') print(df) df = df [["Close"]].copy() #print(df.head() df["Target"] = df.Close.shift(-1) df.dropna(inplace=True) #print(df.head()) # Train Test Split def train_test_split(data, perc): data = data.values n = int(len(data) * (1 - perc)) return data [:n], data [n:] train, test = train_test_split(df, 0.2) X = train[:, :-1] Y = train[:, -1] from xgboost import XGBRegressor model = XGBRegressor(objective=r"reg:squarederror", n_estimators=1000) model.fit(X, Y) test[0] val = np.array(test[0, 0]).reshape(1, -1) pred = model.predict(val) #to Predict def xgb_predict(train, val): train = np.array(train) X, Y = train[:, :-1], train[:, -1] model = XGBRegressor(objective=r"reg:squarederror", n_estimators=1000) model.fit(X, Y) val = np.array(val).reshape(1, -1) pred = model.predict(val) return pred[0] xgb_predict(train, test[0, 0]) from sklearn.metrics import mean_squared_error def validate(data, perc): predictions = [] train, test = train_test_split(data, perc) history = [x for x in train] for i in range(len(test)): test_X, test_Y = test[i, :-1], test[i, -1] pred = xgb_predict(history, test_X[0]) predictions.append(pred) history.append(test[i]) error = mean_squared_error(test[:, -1], predictions, squared=False) return error, test[:, -1], predictions time rmse, Y, pred = validate(df, 0.2) print(rmse)
Output:UserWarning: Could not import the lzma module. Your installed Python is incomplete. Attempting to use lzma compression will result in a RuntimeError.
Local time Open High Low Close Volume
0 04.01.2021,00:00:00.000 GMT+0700 151 155 149 153 1.4649 NaN
1 04.01.2021 01:00:00.000 GMT+0700 153 154 150 152.0000 1.272900e+00
2 04.01.2021 02:00:00.000 GMT+0700 152 153 150 152.0000 1.008200e+00
3 04.01.2021 03:00:00.000 GMT+0700 152 157 152 155.0000 1.346800e+00
4 04.01.2021 04:00:00.000 GMT+0700 155 158 155 157.0000 9.463000e-01
.. ... ... ... ... ... ...
305 16.01.2021 17:00:00.000 GMT+0700 147 149 146 148.0000 1.383900e-06
306 16.01.2021 18:00:00.000 GMT+0700 148 151 147 150.0000 6.740000e-07
307 16.01.2021 19:00:00.000 GMT+0700 150 153 147 150.0000 1.301100e-06
308 16.01.2021 20:00:00.000 GMT+0700 150 152 148 149.0000 1.032900e-06
309 16.01.2021 21:00:00.000 GMT+0700 149 149 147 149.0000 8.830000e-07
[310 rows x 6 columns]
/media/captainkirk/Data-3TB/projects/T-Z/T/TryStuff/venv/lib/python3.8/site-packages/xgboost/data.py:104: UserWarning: Use subset (sliced data) of np.ndarray is not recommended because it will generate extra copies and increase memory consumption
warnings.warn(
2.7550179373488026