Example of how to homogenize with the latitude a MODIS-2B-CLDCLASS-lidar random sample
Table of contents
Example for a month of data
input files can be downloaded here:
\begin{equation}
P(obs, bin) = \frac{\textrm{n_obs_bin}}{\textrm{n_obs_tot}}
\end{equation}
\begin{equation}
P(cloudy, bin) = \frac{\textrm{n_cloudy_bin}}{\textrm{n_cloudy_tot}}
\end{equation}
\begin{equation}
P(cloudy | bin) = \frac{\textrm{n_cloudy_bin}}{\textrm{n_obs_bin}}
\end{equation}
\begin{equation}
\textrm{n_cloudy_bin} = P(cloudy | bin) * P(obs, bin) * \textrm{n_obs_tot}
\end{equation}
\begin{equation}
P(cloudy, bin) = \frac{ P(cloudy | bin) * P(obs, bin) * \textrm{n_obs_tot} }{ \textrm{n_cloudy_tot} }
\end{equation}
\begin{equation}
\textrm{n_cloudy_bin} = P(cloudy, bin) * \textrm{n_cloudy_tot}
\end{equation}
#!/usr/bin/env python
from scipy.stats.kde import gaussian_kde
from random import uniform
from random import randint
import matplotlib.pyplot as plt
import numpy as np
import matplotlib.patches as mpatches
from statistics import mean
fig_count = 0
fig_ylim_max = 2000
path_to_media = "/Users/bmarcha1/Desktop/media/files/"
#----------------------------------------------------------------------------------------#
# Sample 1
input_filename = path_to_media + 'monthly_modis_cldclass_lidar/2008_01_sample_00_modis_caliop_cldclass_lidar.txt'
data = np.loadtxt(input_filename)
data = data[~np.all(data == 0, axis=1)]
nb_tot_obs = len(data[:,0])
#----------------------------------------------------------------------------------------#
# Sample 1: plot zonal observations
bins = np.linspace(-90, 90, 180)
hist_counts_list, hist_x_bins_list, patches = plt.hist(data[:,0], bins=bins, color="blue", alpha=0.4)
plt.xlim(-90.0,90.0)
plt.ylim(0,fig_ylim_max)
plt.grid()
plt.title('zonal observations',fontsize=10)
plt.xlabel('Latitudes',fontsize=8)
plt.ylabel('Counts',fontsize=8)
fig_count = fig_count + 1
filename ='2008_01_sample_homogenization_'+'{:02d}'.format(fig_count)+'.png'
plt.savefig(filename, dpi=100, bbox_inches='tight' )
#plt.show()
plt.close()
#----------------------------------------------------------------------------------------#
# Sample 1: homogeneization (step 1)
data_obs = data[~np.all(data == 0, axis=1)]
data_cloudy = data_obs[data_obs[:,3] < 2]
nb_tot_obs = len(data_obs[:,0])
nb_tot_cloudy = len(data_cloudy[:,0])
print('nb_tot_obs: ',nb_tot_obs)
print('nb_tot_cloudy: ',nb_tot_cloudy)
bins = np.linspace(-90, 90, 180)
n_obs, bins_obs, patches_obs = plt.hist(data[:,0], bins=bins, color="#89bedc", alpha=1.0)
n_cloudy, bins_cloudy, patches_cloudy = plt.hist(data_cloudy[:,0], bins=bins, color="#0b559f", alpha=1.0)
plt.xlim(-90.0,90.0)
plt.ylim(0,fig_ylim_max)
plt.grid()
plt.title('Jamuary 2008 colocated MODIS-2B-CLDCLASS-lidar sample \n (a) before sample homogenization',fontsize=10)
plt.xlabel('Latitudes', fontsize=8)
plt.ylabel('Counts', fontsize=8)
plt.xticks(fontsize=8)
plt.yticks(fontsize=8)
pop_a = mpatches.Patch(color='#89bedc', label='Nb. of Observations')
pop_b = mpatches.Patch(color='#0b559f', label='Nb. of cloudy pixels')
plt.legend(handles=[pop_a,pop_b])
fig_count = fig_count + 1
filename ='2008_01_sample_homogenization_'+'{:02d}'.format(fig_count)+'.png'
plt.savefig(filename, dpi=100, bbox_inches='tight' )
#plt.show()
plt.close()
prob_obs_bin = []
prob_cloudy_given_bin = []
for i in range(len(n_obs)):
if n_obs[i] > 0.0:
prob_obs_bin.append(1.0*n_obs[i]/nb_tot_obs)
prob_cloudy_given_bin.append(n_cloudy[i]/n_obs[i])
else:
prob_obs_bin.append(0)
prob_cloudy_given_bin.append(0)
prob_cloudy_bin = [prob_cloudy_given_bin[i]*prob_obs_bin[i]*nb_tot_obs/nb_tot_cloudy for i in range(len(prob_obs_bin))]
#----------------------------------------------------------------------------------------#
# Sample 1: homogeneization (step 2)
plt.bar([bins[i] for i in range(len(bins)-1)], [i*nb_tot_obs for i in prob_obs_bin], color="royalblue")
plt.xlim(-90.0,90.0)
plt.ylim(0,fig_ylim_max)
plt.grid()
fig_count = fig_count + 1
filename ='2008_01_sample_homogenization_'+'{:02d}'.format(fig_count)+'.png'
plt.savefig(filename, dpi=100, bbox_inches='tight' )
plt.close()
homogenization_factor = 30.0 # ]0,100[
l = sorted([i for i in prob_obs_bin if i > 0.0])
homogenization_threshold_idx= int( len(l) * homogenization_factor / 100.0 )
homogenization_threshold = l[homogenization_threshold_idx]
for idx,i in enumerate(prob_obs_bin):
if prob_obs_bin[idx] > homogenization_threshold:
prob_obs_bin[idx] = homogenization_threshold
plt.bar([bins[i] for i in range(len(bins)-1)], [i*nb_tot_obs for i in prob_obs_bin], color="royalblue")
plt.xlim(-90.0,90.0)
plt.ylim(0,fig_ylim_max)
plt.grid()
fig_count = fig_count + 1
filename ='2008_01_sample_homogenization_'+'{:02d}'.format(fig_count)+'.png'
plt.savefig(filename, dpi=100, bbox_inches='tight' )
plt.close()
plt.bar([bins[i] for i in range(len(bins)-1)], [i*nb_tot_cloudy for i in prob_cloudy_bin], color="lightcoral")
plt.xlim(-90.0,90.0)
plt.ylim(0,fig_ylim_max)
plt.grid()
fig_count = fig_count + 1
filename ='2008_01_sample_homogenization_'+'{:02d}'.format(fig_count)+'.png'
plt.savefig(filename, dpi=100, bbox_inches='tight' )
plt.close()
prob_cloudy_bin = [prob_cloudy_given_bin[i]*prob_obs_bin[i]*nb_tot_obs/nb_tot_cloudy for i in range(len(prob_obs_bin))]
plt.bar([bins[i] for i in range(len(bins)-1)], [i*nb_tot_obs for i in prob_obs_bin], width=1.01, color="#89bedc")
plt.bar([bins[i] for i in range(len(bins)-1)], [i*nb_tot_cloudy for i in prob_cloudy_bin], width=1.01, color="#0b559f")
plt.xlim(-90.0,90.0)
plt.ylim(0,fig_ylim_max)
plt.grid()
plt.title('Jamuary 2008 colocated MODIS-2B-CLDCLASS-lidar sample \n (b) after sample homogenization',fontsize=10)
plt.xlabel('Latitudes', fontsize=8)
plt.ylabel('Counts', fontsize=8)
plt.xticks(fontsize=8)
plt.yticks(fontsize=8)
pop_a = mpatches.Patch(color='#89bedc', label='Nb. of Observations')
pop_b = mpatches.Patch(color='#0b559f', label='Nb. of cloudy pixels')
plt.legend(handles=[pop_a,pop_b], loc='upper center')
fig_count = fig_count + 1
filename ='2008_01_sample_homogenization_'+'{:02d}'.format(fig_count)+'.png'
plt.savefig(filename, dpi=100, bbox_inches='tight' )
plt.close()
Example for a year of data
#!/usr/bin/env python
from scipy.stats.kde import gaussian_kde
from random import uniform
from random import randint
import matplotlib.pyplot as plt
import numpy as np
import matplotlib.patches as mpatches
from statistics import mean
fig_count = 0
fig_ylim_max = 20000
path_to_media = "/Users/bmarcha1/Desktop/media/files/"
#----------------------------------------------------------------------------------------#
# Sample 1: concatenate monthly sample
data_1 = np.loadtxt(path_to_media+'monthly_modis_cldclass_lidar/2008_01_sample_00_modis_caliop_cldclass_lidar.txt', skiprows=1)
data_2 = np.loadtxt(path_to_media+'monthly_modis_cldclass_lidar/2008_02_sample_00_modis_caliop_cldclass_lidar.txt', skiprows=1)
data_3 = np.loadtxt(path_to_media+'monthly_modis_cldclass_lidar/2008_03_sample_00_modis_caliop_cldclass_lidar.txt', skiprows=1)
data_4 = np.loadtxt(path_to_media+'monthly_modis_cldclass_lidar/2008_04_sample_00_modis_caliop_cldclass_lidar.txt', skiprows=1)
data_5 = np.loadtxt(path_to_media+'monthly_modis_cldclass_lidar/2008_05_sample_00_modis_caliop_cldclass_lidar.txt', skiprows=1)
data_6 = np.loadtxt(path_to_media+'monthly_modis_cldclass_lidar/2008_06_sample_00_modis_caliop_cldclass_lidar.txt', skiprows=1)
data_7 = np.loadtxt(path_to_media+'monthly_modis_cldclass_lidar/2008_07_sample_00_modis_caliop_cldclass_lidar.txt', skiprows=1)
data_8 = np.loadtxt(path_to_media+'monthly_modis_cldclass_lidar/2008_08_sample_00_modis_caliop_cldclass_lidar.txt', skiprows=1)
data_9 = np.loadtxt(path_to_media+'monthly_modis_cldclass_lidar/2008_09_sample_00_modis_caliop_cldclass_lidar.txt', skiprows=1)
data_10 = np.loadtxt(path_to_media+'monthly_modis_cldclass_lidar/2008_10_sample_00_modis_caliop_cldclass_lidar.txt', skiprows=1)
data_11 = np.loadtxt(path_to_media+'monthly_modis_cldclass_lidar/2008_11_sample_00_modis_caliop_cldclass_lidar.txt', skiprows=1)
data_12 = np.loadtxt(path_to_media+'monthly_modis_cldclass_lidar/2008_12_sample_00_modis_caliop_cldclass_lidar.txt', skiprows=1)
data = np.concatenate((data_1,data_2,data_3,data_4,data_5,data_6,
data_7,data_8,data_9,data_10,data_11,data_12), axis=0)
data = data[~np.all(data == 0, axis=1)]
#----------------------------------------------------------------------------------------#
# Sample 1: plot zonal observations
nb_tot_obs = len(data[:,0])
bins = np.linspace(-90, 90, 180)
hist_counts_list, hist_x_bins_list, patches = plt.hist(data[:,0], bins=bins, color="blue", alpha=0.4)
plt.title('')
plt.xlabel('Latitudes')
plt.ylabel('Counts')
plt.xlim(-90.0,90.0)
plt.ylim(0,fig_ylim_max)
plt.grid()
fig_count = fig_count + 1
filename ='2008_sample_homogenization_'+'{:02d}'.format(fig_count)+'.png'
plt.savefig(filename, dpi=100, bbox_inches='tight' )
#plt.show()
plt.close()
#----------------------------------------------------------------------------------------#
# Sample 1: homogeneization (step 1)
data_obs = data[~np.all(data == 0, axis=1)]
data_cloudy = data_obs[data_obs[:,3] < 2]
nb_tot_obs = len(data_obs[:,0])
nb_tot_cloudy = len(data_cloudy[:,0])
print('nb_tot_obs: ',nb_tot_obs)
print('nb_tot_cloudy: ',nb_tot_cloudy)
bins = np.linspace(-90, 90, 180)
n_obs, bins_obs, patches_obs = plt.hist(data[:,0], bins=bins, color="#89bedc", alpha=1.0)
n_cloudy, bins_cloudy, patches_cloudy = plt.hist(data_cloudy[:,0], bins=bins, color="#0b559f", alpha=1.0)
plt.xlim(-90.0,90.0)
plt.ylim(0,fig_ylim_max)
plt.grid()
plt.title('(a) before sample homogenization',fontsize=10)
plt.xlabel('Latitudes', fontsize=8)
plt.ylabel('Counts', fontsize=8)
plt.xticks(fontsize=8)
plt.yticks(fontsize=8)
pop_a = mpatches.Patch(color='#89bedc', label='Nb. of Observations')
pop_b = mpatches.Patch(color='#0b559f', label='Nb. of cloudy pixels')
plt.legend(handles=[pop_a,pop_b])
fig_count = fig_count + 1
filename ='2008_sample_homogenization_'+'{:02d}'.format(fig_count)+'.png'
plt.savefig(filename, dpi=100, bbox_inches='tight' )
#plt.show()
plt.close()
prob_obs_bin = []
prob_cloudy_given_bin = []
for i in range(len(n_obs)):
if n_obs[i] > 0.0:
prob_obs_bin.append(1.0*n_obs[i]/nb_tot_obs)
prob_cloudy_given_bin.append(n_cloudy[i]/n_obs[i])
else:
prob_obs_bin.append(0)
prob_cloudy_given_bin.append(0)
prob_cloudy_bin = [prob_cloudy_given_bin[i]*prob_obs_bin[i]*nb_tot_obs/nb_tot_cloudy for i in range(len(prob_obs_bin))]
#----------------------------------------------------------------------------------------#
# Sample 1: homogeneization (step 2)
plt.bar([bins[i] for i in range(len(bins)-1)], [i*nb_tot_obs for i in prob_obs_bin])
plt.xlim(-90.0,90.0)
plt.ylim(0,fig_ylim_max)
plt.grid()
plt.title('Random Sample Day Obs.')
plt.xlabel('Latitude')
plt.ylabel('Counts')
fig_count = fig_count + 1
filename ='2008_sample_homogenization_'+'{:02d}'.format(fig_count)+'.png'
plt.savefig(filename, dpi=100, bbox_inches='tight' )
plt.close()
homogenization_factor = 10.0 # ]0,100[
l = sorted([i for i in prob_obs_bin if i > 0.0])
homogenization_threshold_idx= int( len(l) * homogenization_factor / 100.0 )
homogenization_threshold = l[homogenization_threshold_idx]
for idx,i in enumerate(prob_obs_bin):
if prob_obs_bin[idx] > homogenization_threshold:
prob_obs_bin[idx] = homogenization_threshold
plt.bar([bins[i] for i in range(len(bins)-1)], [i*nb_tot_obs for i in prob_obs_bin])
plt.title('Random Sample Homogeneization')
plt.xlabel('Latitude')
plt.ylabel('Counts')
plt.xlim(-90.0,90.0)
plt.ylim(0,fig_ylim_max)
plt.grid()
fig_count = fig_count + 1
filename ='2008_sample_homogenization_'+'{:02d}'.format(fig_count)+'.png'
plt.savefig(filename, dpi=100, bbox_inches='tight' )
plt.close()
plt.bar([bins[i] for i in range(len(bins)-1)], [i*nb_tot_cloudy for i in prob_cloudy_bin])
plt.title('Random Sample Day Cloudy Pixels')
plt.xlabel('Latitude')
plt.ylabel('Counts')
plt.xlim(-90.0,90.0)
plt.ylim(0,fig_ylim_max)
plt.grid()
fig_count = fig_count + 1
filename ='2008_sample_homogenization_'+'{:02d}'.format(fig_count)+'.png'
plt.savefig(filename, dpi=100, bbox_inches='tight' )
plt.close()
prob_cloudy_bin = [prob_cloudy_given_bin[i]*prob_obs_bin[i]*nb_tot_obs/nb_tot_cloudy for i in range(len(prob_obs_bin))]
plt.bar([bins[i] for i in range(len(bins)-1)], [i*nb_tot_obs for i in prob_obs_bin], width=1.01, color="#89bedc")
plt.bar([bins[i] for i in range(len(bins)-1)], [i*nb_tot_cloudy for i in prob_cloudy_bin], width=1.01, color="#0b559f")
plt.xlim(-90.0,90.0)
plt.ylim(0,fig_ylim_max)
plt.grid()
plt.title('(b) after sample homogenization',fontsize=10)
plt.xlabel('Latitudes', fontsize=8)
plt.ylabel('Counts', fontsize=8)
plt.xticks(fontsize=8)
plt.yticks(fontsize=8)
pop_a = mpatches.Patch(color='#89bedc', label='Nb. of Observations')
pop_b = mpatches.Patch(color='#0b559f', label='Nb. of cloudy pixels')
plt.legend(handles=[pop_a,pop_b], loc='upper center')
fig_count = fig_count + 1
filename ='2008_sample_homogenization_'+'{:02d}'.format(fig_count)+'.png'
plt.savefig(filename, dpi=100, bbox_inches='tight' )
plt.close()