xxxxxxxxxx
// This source code is subject to the terms of the Mozilla Public License 2.0 at https://mozilla.org/MPL/2.0/
// © x11joe
// Credit given to @midtownsk8rguy for original source code. I simply modified to add Pearson's R
//@version=4
study("Linear Regression Trend Channel With Pearson's R", "LRTCWPR", true, format.inherit)
period = input( 20, "Period" , input.integer, minval=3)
deviations = input( 2.0, "Deviation(s)" , input.float , minval=0.1, step=0.1)
extendType = input("Right", "Extend Method", input.string , options=["Right","None"])=="Right" ? extend.right : extend.none
periodMinusOne = period-1
Ex = 0.0, Ey = 0.0, Ex2 = 0.0,Ey2 =0.0, Exy = 0.0, for i=0 to periodMinusOne
closeI = nz(close[i]), Ex := Ex + i, Ey := Ey + closeI, Ex2 := Ex2 + (i * i),Ey2 := Ey2 + (closeI * closeI), Exy := Exy + (closeI * i)
ExT2 = pow(Ex,2.0) //Sum of X THEN Squared
EyT2 = pow(Ey,2.0) //Sym of Y THEN Squared
PearsonsR = (Exy - ((Ex*Ey)/period))/(sqrt(Ex2-(ExT2/period))*sqrt(Ey2-(EyT2/period)))
ExEx = Ex * Ex, slope = Ex2==ExEx ? 0.0 : (period * Exy - Ex * Ey) / (period * Ex2 - ExEx)
linearRegression = (Ey - slope * Ex) / period
intercept = linearRegression + bar_index * slope
deviation = 0.0, for i=0 to periodMinusOne
deviation := deviation + pow(nz(close[i]) - (intercept - slope * (bar_index[i])), 2.0)
deviation := deviations * sqrt(deviation / periodMinusOne)
startingPointY = linearRegression + slope * periodMinusOne
var label pearsonsRLabel = na
label.delete(pearsonsRLabel[1])
pearsonsRLabel := label.new(bar_index,startingPointY - deviation*2,text=tostring(PearsonsR), color=color.black,style=label.style_labeldown,textcolor=color.white,size=size.large)
var line upperChannelLine = na , var line medianChannelLine = na , var line lowerChannelLine = na
line.delete(upperChannelLine[1]), line.delete(medianChannelLine[1]), line.delete(lowerChannelLine[1])
upperChannelLine := line.new(bar_index - period + 1, startingPointY + deviation, bar_index, linearRegression + deviation, xloc.bar_index, extendType, color.new(#FF0000, 0), line.style_solid , 2)
medianChannelLine := line.new(bar_index - period + 1, startingPointY , bar_index, linearRegression , xloc.bar_index, extendType, color.new(#C0C000, 0), line.style_solid , 1)
lowerChannelLine := line.new(bar_index - period + 1, startingPointY - deviation, bar_index, linearRegression - deviation, xloc.bar_index, extendType, color.new(#00FF00, 0), line.style_solid , 2)
xxxxxxxxxx
- Linear regression is a method that let us understand the relationship between dependent and independent variables.
- It predicts continuous value. example: y = mx + c
- y is target, x is independent feature, m is slope, c is intercept
- By training on many datapoints, the model understands value of c and m. Then taking any value x, the model can estimate the value y.
- Simple linear regression = Use 1 independent variable for predicting 1 dependent variable
- Multiple linear regression = Use multiple independent variables for predicting 1 dependent variable
- Noise :
- Error in prediction.
- The error is gaussian noise and the residuals show normal distribution properties.
- The more the error, the more spread out the normal distribution (sigma or standard deviation in normal distribution)
- The best fit line has the least noise.
- We define an error function for m and c and choose the line that reduces the error and gain the optimized value for m and c
- Error function = loss function = cost function
- Residual = distance between datapoint and the fitted line
- Our loss function can be RSS (Residual Sum of Squares) the sum of the residuals and our goal is to minimize this value.
- metrics
- r-squared : percentage of the variance in target values explained by the features. range is 0 to 1
- RMSE : Root Mean squared error (Average error in prediction)
- cross validation : do train-test process in multiple folds and take average to consolidate r-squared.
- Regularization : penalizes large co-efficients to reduce overfitting. Some regressions that uses regularizations:
- Lasso Regression
- Ridge Regression
- Hyperparameter : Variables used to optimize model parameters
xxxxxxxxxx
# Implementation of gradient descent in linear regression
import numpy as np
import matplotlib.pyplot as plt
class Linear_Regression:
def __init__(self, X, Y):
self.X = X
self.Y = Y
self.b = [0, 0]
def update_coeffs(self, learning_rate):
Y_pred = self.predict()
Y = self.Y
m = len(Y)
self.b[0] = self.b[0] - (learning_rate * ((1/m) *
np.sum(Y_pred - Y)))
self.b[1] = self.b[1] - (learning_rate * ((1/m) *
np.sum((Y_pred - Y) * self.X)))
def predict(self, X=[]):
Y_pred = np.array([])
if not X: X = self.X
b = self.b
for x in X:
Y_pred = np.append(Y_pred, b[0] + (b[1] * x))
return Y_pred
def get_current_accuracy(self, Y_pred):
p, e = Y_pred, self.Y
n = len(Y_pred)
return 1-sum(
[
abs(p[i]-e[i])/e[i]
for i in range(n)
if e[i] != 0]
)/n
#def predict(self, b, yi):
def compute_cost(self, Y_pred):
m = len(self.Y)
J = (1 / 2*m) * (np.sum(Y_pred - self.Y)**2)
return J
def plot_best_fit(self, Y_pred, fig):
f = plt.figure(fig)
plt.scatter(self.X, self.Y, color='b')
plt.plot(self.X, Y_pred, color='g')
f.show()
def main():
X = np.array([i for i in range(11)])
Y = np.array([2*i for i in range(11)])
regressor = Linear_Regression(X, Y)
iterations = 0
steps = 100
learning_rate = 0.01
costs = []
#original best-fit line
Y_pred = regressor.predict()
regressor.plot_best_fit(Y_pred, 'Initial Best Fit Line')
while 1:
Y_pred = regressor.predict()
cost = regressor.compute_cost(Y_pred)
costs.append(cost)
regressor.update_coeffs(learning_rate)
iterations += 1
if iterations % steps == 0:
print(iterations, "epochs elapsed")
print("Current accuracy is :",
regressor.get_current_accuracy(Y_pred))
stop = input("Do you want to stop (y/*)??")
if stop == "y":
break
#final best-fit line
regressor.plot_best_fit(Y_pred, 'Final Best Fit Line')
#plot to verify cost function decreases
h = plt.figure('Verification')
plt.plot(range(iterations), costs, color='b')
h.show()
# if user wants to predict using the regressor:
regressor.predict([i for i in range(10)])
if __name__ == '__main__':
main()