How to calculate a root mean square using python ?

Published: July 31, 2019

DMCA.com Protection Status

An example of how to calculate a root mean square using python in the case of a linear regression model:

\begin{equation}
y = \theta_1 x + \theta_0
\end{equation}

Plot the data

Let's generate an ensemble of data with:

\begin{equation}
y = 3x + 2
\end{equation}

import matplotlib.pyplot as plt
import numpy as np

X = 4 * np.random.rand(1000,1)
X_b = np.c_[np.ones((1000,1)), X]

Y = 2 + 3 * X + np.random.randn(1000,1)

plt.plot(X,Y,'.')

plt.xlim(0,4)
plt.ylim(0,15)

plt.xlabel(r'x',fontsize=8)
plt.ylabel(r'y',fontsize=8)

plt.title('How to caclulate the mean squared error in  python ?',fontsize=8)

plt.savefig("mean_squared_error_01.png", bbox_inches='tight')

How to calculate a root mean square using python ?
How to calculate a root mean square using python ?

Linear model

Let's now consider the following linear model:

\begin{equation}
y = \theta_1 x + \theta_0
\end{equation}

with $\theta_0=-1.4$ et $\theta_1=5.0$

#----- Let's take one random linear model

theta = np.array([[-1.4],[5.0]])

X_new = np.array([[0],[4]])
X_new_b = np.c_[np.ones((2,1)), X_new]

plt.plot(X_new, X_new_b.dot( theta ), '-')

plt.xlim(0,4)
plt.ylim(0,15)

plt.xlabel(r'x',fontsize=8)
plt.ylabel(r'y',fontsize=8)

plt.title('How to caclulate the mean squared error in  python ?',fontsize=8)

plt.savefig("mean_squared_error_02.png", bbox_inches='tight')

plt.close()

How to calculate a root mean square using python ?
How to calculate a root mean square using python ?

Calculate the root mean square

The root mean square can be then calculated in python:

\begin{equation}
mse = \frac{1}{m} \sum_{i=1}^{m}(\theta^T.\textbf{x}^{(i)}-y^{(i)})^2
\end{equation}

Y_predict = X_b.dot( theta )

print(Y_predict.shape, X_b.shape, theta.shape)

mse = np.sum( (Y_predict-Y)**2 ) / 1000.0

print('mse: ', mse)

Another solution is to use the python module sklearn:

from sklearn.metrics import mean_squared_error

print('mse (sklearn): ', mean_squared_error(Y,Y_predict))

returns for example

mse:  6.75308540424
mse (sklearn):  6.75308540424

Calculate the root mean square for an ensemble of linear models

An example of how to calculate the root mean square for an ensemble of linear models (grid search over $\theta_0$ and $\theta_1$):

#----- Calculate the mse using a grid search

theta_0, theta_1 = np.meshgrid(np.arange(0, 10, 0.1), np.arange(0, 10, 0.1))

theta = np.vstack((theta_0.ravel(), theta_1.ravel()))

Y_predict = X_b @ theta

mse = np.sum( (Y_predict-Y)**2, axis=0 ) / 1000.0

mse = mse.reshape(100,100)

from matplotlib.colors import LogNorm
from pylab import figure, cm

plt.imshow(mse, origin='lower', norm=LogNorm(), extent=[0,10,0,10], cmap=cm.jet)

plt.title('How to caclulate the mean squared error in  python ?',fontsize=8)

plt.xlabel(r'$\theta_0$',fontsize=8)
plt.ylabel(r'$\theta_1$',fontsize=8)

plt.savefig("mean_squared_error_03.png", bbox_inches='tight')

#plt.show()

plt.close()

One can see that the linear model that minimize the root mean square is around $\theta_0=2$ and $\theta_1=3$.

How to calculate a root mean square using python ?
How to calculate a root mean square using python ?

We can also plot the variation of the mse versus $\theta_1$ for a given $\theta_0$:

plt.plot(mse[:,20])

plt.title('How to caclulate the mean squared error in  python ?',fontsize=8)

plt.xlabel(r'$\theta_1$',fontsize=8)
plt.ylabel(r'mean square error',fontsize=8)

positions = [i*10 for i in range(10)]
labels = [i for i in range(10)]

plt.xticks(positions, labels)

plt.grid(linestyle='--')

plt.savefig("mean_squared_error_04.png", bbox_inches='tight')

#plt.show()

How to calculate a root mean square using python ?
How to calculate a root mean square using python ?

Source code

import matplotlib.pyplot as plt
import numpy as np

X = 4 * np.random.rand(1000,1)
X_b = np.c_[np.ones((1000,1)), X]

Y = 2 + 3 * X + np.random.randn(1000,1)

plt.plot(X,Y,'.')

plt.xlim(0,4)
plt.ylim(0,15)

plt.xlabel(r'x',fontsize=8)
plt.ylabel(r'y',fontsize=8)

plt.title('How to caclulate the mean squared error in  python ?',fontsize=8)

plt.savefig("mean_squared_error_01.png", bbox_inches='tight')

#----- Let's take one random linear model

theta = np.array([[-1.4],[5.0]])

X_new = np.array([[0],[4]])
X_new_b = np.c_[np.ones((2,1)), X_new]

plt.plot(X_new, X_new_b.dot( theta ), '-')

plt.xlim(0,4)
plt.ylim(0,15)

plt.xlabel(r'x',fontsize=8)
plt.ylabel(r'y',fontsize=8)

plt.title('How to caclulate the mean squared error in  python ?',fontsize=8)

plt.savefig("mean_squared_error_02.png", bbox_inches='tight')

plt.close()

#----- using python

Y_predict = X_b.dot( theta )

print(Y_predict.shape, X_b.shape, theta.shape)

mse = np.sum( (Y_predict-Y)**2 ) / 1000.0

print('mse: ', mse)

#----- using sklearn

from sklearn.metrics import mean_squared_error

print('mse (sklearn): ', mean_squared_error(Y,Y_predict))

#----- Calculate the mse using a grid search

theta_0, theta_1 = np.meshgrid(np.arange(0, 10, 0.1), np.arange(0, 10, 0.1))

theta = np.vstack((theta_0.ravel(), theta_1.ravel()))

Y_predict = X_b @ theta

mse = np.sum( (Y_predict-Y)**2, axis=0 ) / 1000.0

mse = mse.reshape(100,100)

from matplotlib.colors import LogNorm
from pylab import figure, cm

plt.imshow(mse, origin='lower', norm=LogNorm(), extent=[0,10,0,10], cmap=cm.jet)

plt.title('How to caclulate the mean squared error in  python ?',fontsize=8)

plt.xlabel(r'$\theta_0$',fontsize=8)
plt.ylabel(r'$\theta_1$',fontsize=8)

plt.savefig("mean_squared_error_03.png", bbox_inches='tight')

#plt.show()

plt.close()

#----- plot theta_1 for a given theta_0

plt.plot(mse[:,20])

plt.title('How to caclulate the mean squared error in  python ?',fontsize=8)

plt.xlabel(r'$\theta_1$',fontsize=8)
plt.ylabel(r'mean square error',fontsize=8)

positions = [i*10 for i in range(10)]
labels = [i for i in range(10)]

plt.xticks(positions, labels)

plt.grid(linestyle='--')

plt.savefig("mean_squared_error_04.png", bbox_inches='tight')

#plt.show()

References

Image

of