# How to calculate a root mean square using python ?

Published: July 31, 2019

An example of how to calculate a root mean square using python in the case of a linear regression model:

y = \theta_1 x + \theta_0

### Plot the data

Let's generate an ensemble of data with:

y = 3x + 2

import matplotlib.pyplot as plt
import numpy as np

X = 4 * np.random.rand(1000,1)
X_b = np.c_[np.ones((1000,1)), X]

Y = 2 + 3 * X + np.random.randn(1000,1)

plt.plot(X,Y,'.')

plt.xlim(0,4)
plt.ylim(0,15)

plt.xlabel(r'x',fontsize=8)
plt.ylabel(r'y',fontsize=8)

plt.title('How to caclulate the mean squared error in  python ?',fontsize=8)

plt.savefig("mean_squared_error_01.png", bbox_inches='tight')


### Linear model

Let's now consider the following linear model:

y = \theta_1 x + \theta_0

with $\theta_0=-1.4$ et $\theta_1=5.0$

#----- Let's take one random linear model

theta = np.array([[-1.4],[5.0]])

X_new = np.array([[0],[4]])
X_new_b = np.c_[np.ones((2,1)), X_new]

plt.plot(X_new, X_new_b.dot( theta ), '-')

plt.xlim(0,4)
plt.ylim(0,15)

plt.xlabel(r'x',fontsize=8)
plt.ylabel(r'y',fontsize=8)

plt.title('How to caclulate the mean squared error in  python ?',fontsize=8)

plt.savefig("mean_squared_error_02.png", bbox_inches='tight')

plt.close()


### Calculate the root mean square

The root mean square can be then calculated in python:

mse = \frac{1}{m} \sum_{i=1}^{m}(\theta^T.\textbf{x}^{(i)}-y^{(i)})^2

Y_predict = X_b.dot( theta )

print(Y_predict.shape, X_b.shape, theta.shape)

mse = np.sum( (Y_predict-Y)**2 ) / 1000.0

print('mse: ', mse)


Another solution is to use the python module sklearn:

from sklearn.metrics import mean_squared_error

print('mse (sklearn): ', mean_squared_error(Y,Y_predict))


returns for example

mse:  6.75308540424
mse (sklearn):  6.75308540424


### Calculate the root mean square for an ensemble of linear models

An example of how to calculate the root mean square for an ensemble of linear models (grid search over $\theta_0$ and $\theta_1$):

#----- Calculate the mse using a grid search

theta_0, theta_1 = np.meshgrid(np.arange(0, 10, 0.1), np.arange(0, 10, 0.1))

theta = np.vstack((theta_0.ravel(), theta_1.ravel()))

Y_predict = X_b @ theta

mse = np.sum( (Y_predict-Y)**2, axis=0 ) / 1000.0

mse = mse.reshape(100,100)

from matplotlib.colors import LogNorm
from pylab import figure, cm

plt.imshow(mse, origin='lower', norm=LogNorm(), extent=[0,10,0,10], cmap=cm.jet)

plt.title('How to caclulate the mean squared error in  python ?',fontsize=8)

plt.xlabel(r'$\theta_0$',fontsize=8)
plt.ylabel(r'$\theta_1$',fontsize=8)

plt.savefig("mean_squared_error_03.png", bbox_inches='tight')

#plt.show()

plt.close()


One can see that the linear model that minimize the root mean square is around $\theta_0=2$ and $\theta_1=3$.

We can also plot the variation of the mse versus $\theta_1$ for a given $\theta_0$:

plt.plot(mse[:,20])

plt.title('How to caclulate the mean squared error in  python ?',fontsize=8)

plt.xlabel(r'$\theta_1$',fontsize=8)
plt.ylabel(r'mean square error',fontsize=8)

positions = [i*10 for i in range(10)]
labels = [i for i in range(10)]

plt.xticks(positions, labels)

plt.grid(linestyle='--')

plt.savefig("mean_squared_error_04.png", bbox_inches='tight')

#plt.show()


### Source code

import matplotlib.pyplot as plt
import numpy as np

X = 4 * np.random.rand(1000,1)
X_b = np.c_[np.ones((1000,1)), X]

Y = 2 + 3 * X + np.random.randn(1000,1)

plt.plot(X,Y,'.')

plt.xlim(0,4)
plt.ylim(0,15)

plt.xlabel(r'x',fontsize=8)
plt.ylabel(r'y',fontsize=8)

plt.title('How to caclulate the mean squared error in  python ?',fontsize=8)

plt.savefig("mean_squared_error_01.png", bbox_inches='tight')

#----- Let's take one random linear model

theta = np.array([[-1.4],[5.0]])

X_new = np.array([[0],[4]])
X_new_b = np.c_[np.ones((2,1)), X_new]

plt.plot(X_new, X_new_b.dot( theta ), '-')

plt.xlim(0,4)
plt.ylim(0,15)

plt.xlabel(r'x',fontsize=8)
plt.ylabel(r'y',fontsize=8)

plt.title('How to caclulate the mean squared error in  python ?',fontsize=8)

plt.savefig("mean_squared_error_02.png", bbox_inches='tight')

plt.close()

#----- using python

Y_predict = X_b.dot( theta )

print(Y_predict.shape, X_b.shape, theta.shape)

mse = np.sum( (Y_predict-Y)**2 ) / 1000.0

print('mse: ', mse)

#----- using sklearn

from sklearn.metrics import mean_squared_error

print('mse (sklearn): ', mean_squared_error(Y,Y_predict))

#----- Calculate the mse using a grid search

theta_0, theta_1 = np.meshgrid(np.arange(0, 10, 0.1), np.arange(0, 10, 0.1))

theta = np.vstack((theta_0.ravel(), theta_1.ravel()))

Y_predict = X_b @ theta

mse = np.sum( (Y_predict-Y)**2, axis=0 ) / 1000.0

mse = mse.reshape(100,100)

from matplotlib.colors import LogNorm
from pylab import figure, cm

plt.imshow(mse, origin='lower', norm=LogNorm(), extent=[0,10,0,10], cmap=cm.jet)

plt.title('How to caclulate the mean squared error in  python ?',fontsize=8)

plt.xlabel(r'$\theta_0$',fontsize=8)
plt.ylabel(r'$\theta_1$',fontsize=8)

plt.savefig("mean_squared_error_03.png", bbox_inches='tight')

#plt.show()

plt.close()

#----- plot theta_1 for a given theta_0

plt.plot(mse[:,20])

plt.title('How to caclulate the mean squared error in  python ?',fontsize=8)

plt.xlabel(r'$\theta_1$',fontsize=8)
plt.ylabel(r'mean square error',fontsize=8)

positions = [i*10 for i in range(10)]
labels = [i for i in range(10)]

plt.xticks(positions, labels)

plt.grid(linestyle='--')

plt.savefig("mean_squared_error_04.png", bbox_inches='tight')

#plt.show()


Image

of