Example of how to homogenize with the latitude a MODIS-2B-CLDCLASS-lidar random sample
Table of contents
Example for a month of data
input files can be downloaded here:
\begin{equation}
P(obs, bin) = \frac{\textrm{n_obs_bin}}{\textrm{n_obs_tot}}
\end{equation}
\begin{equation}
P(cloudy, bin) = \frac{\textrm{n_cloudy_bin}}{\textrm{n_cloudy_tot}}
\end{equation}
\begin{equation}
P(cloudy | bin) = \frac{\textrm{n_cloudy_bin}}{\textrm{n_obs_bin}}
\end{equation}
\begin{equation}
\textrm{n_cloudy_bin} = P(cloudy | bin) * P(obs, bin) * \textrm{n_obs_tot}
\end{equation}
\begin{equation}
P(cloudy, bin) = \frac{ P(cloudy | bin) * P(obs, bin) * \textrm{n_obs_tot} }{ \textrm{n_cloudy_tot} }
\end{equation}
\begin{equation}
\textrm{n_cloudy_bin} = P(cloudy, bin) * \textrm{n_cloudy_tot}
\end{equation}
#!/usr/bin/env pythonfrom scipy.stats.kde import gaussian_kdefrom random import uniformfrom random import randintimport matplotlib.pyplot as pltimport numpy as npimport matplotlib.patches as mpatchesfrom statistics import meanfig_count = 0fig_ylim_max = 2000path_to_media = "/Users/bmarcha1/Desktop/media/files/"#----------------------------------------------------------------------------------------## Sample 1input_filename = path_to_media + 'monthly_modis_cldclass_lidar/2008_01_sample_00_modis_caliop_cldclass_lidar.txt'data = np.loadtxt(input_filename)data = data[~np.all(data == 0, axis=1)]nb_tot_obs = len(data[:,0])#----------------------------------------------------------------------------------------## Sample 1: plot zonal observationsbins = np.linspace(-90, 90, 180)hist_counts_list, hist_x_bins_list, patches = plt.hist(data[:,0], bins=bins, color="blue", alpha=0.4)plt.xlim(-90.0,90.0)plt.ylim(0,fig_ylim_max)plt.grid()plt.title('zonal observations',fontsize=10)plt.xlabel('Latitudes',fontsize=8)plt.ylabel('Counts',fontsize=8)fig_count = fig_count + 1filename ='2008_01_sample_homogenization_'+'{:02d}'.format(fig_count)+'.png'plt.savefig(filename, dpi=100, bbox_inches='tight' )#plt.show()plt.close()#----------------------------------------------------------------------------------------## Sample 1: homogeneization (step 1)data_obs = data[~np.all(data == 0, axis=1)]data_cloudy = data_obs[data_obs[:,3] < 2]nb_tot_obs = len(data_obs[:,0])nb_tot_cloudy = len(data_cloudy[:,0])print('nb_tot_obs: ',nb_tot_obs)print('nb_tot_cloudy: ',nb_tot_cloudy)bins = np.linspace(-90, 90, 180)n_obs, bins_obs, patches_obs = plt.hist(data[:,0], bins=bins, color="#89bedc", alpha=1.0)n_cloudy, bins_cloudy, patches_cloudy = plt.hist(data_cloudy[:,0], bins=bins, color="#0b559f", alpha=1.0)plt.xlim(-90.0,90.0)plt.ylim(0,fig_ylim_max)plt.grid()plt.title('Jamuary 2008 colocated MODIS-2B-CLDCLASS-lidar sample \n (a) before sample homogenization',fontsize=10)plt.xlabel('Latitudes', fontsize=8)plt.ylabel('Counts', fontsize=8)plt.xticks(fontsize=8)plt.yticks(fontsize=8)pop_a = mpatches.Patch(color='#89bedc', label='Nb. of Observations')pop_b = mpatches.Patch(color='#0b559f', label='Nb. of cloudy pixels')plt.legend(handles=[pop_a,pop_b])fig_count = fig_count + 1filename ='2008_01_sample_homogenization_'+'{:02d}'.format(fig_count)+'.png'plt.savefig(filename, dpi=100, bbox_inches='tight' )#plt.show()plt.close()prob_obs_bin = []prob_cloudy_given_bin = []for i in range(len(n_obs)):if n_obs[i] > 0.0:prob_obs_bin.append(1.0*n_obs[i]/nb_tot_obs)prob_cloudy_given_bin.append(n_cloudy[i]/n_obs[i])else:prob_obs_bin.append(0)prob_cloudy_given_bin.append(0)prob_cloudy_bin = [prob_cloudy_given_bin[i]*prob_obs_bin[i]*nb_tot_obs/nb_tot_cloudy for i in range(len(prob_obs_bin))]#----------------------------------------------------------------------------------------## Sample 1: homogeneization (step 2)plt.bar([bins[i] for i in range(len(bins)-1)], [i*nb_tot_obs for i in prob_obs_bin], color="royalblue")plt.xlim(-90.0,90.0)plt.ylim(0,fig_ylim_max)plt.grid()fig_count = fig_count + 1filename ='2008_01_sample_homogenization_'+'{:02d}'.format(fig_count)+'.png'plt.savefig(filename, dpi=100, bbox_inches='tight' )plt.close()homogenization_factor = 30.0 # ]0,100[l = sorted([i for i in prob_obs_bin if i > 0.0])homogenization_threshold_idx= int( len(l) * homogenization_factor / 100.0 )homogenization_threshold = l[homogenization_threshold_idx]for idx,i in enumerate(prob_obs_bin):if prob_obs_bin[idx] > homogenization_threshold:prob_obs_bin[idx] = homogenization_thresholdplt.bar([bins[i] for i in range(len(bins)-1)], [i*nb_tot_obs for i in prob_obs_bin], color="royalblue")plt.xlim(-90.0,90.0)plt.ylim(0,fig_ylim_max)plt.grid()fig_count = fig_count + 1filename ='2008_01_sample_homogenization_'+'{:02d}'.format(fig_count)+'.png'plt.savefig(filename, dpi=100, bbox_inches='tight' )plt.close()plt.bar([bins[i] for i in range(len(bins)-1)], [i*nb_tot_cloudy for i in prob_cloudy_bin], color="lightcoral")plt.xlim(-90.0,90.0)plt.ylim(0,fig_ylim_max)plt.grid()fig_count = fig_count + 1filename ='2008_01_sample_homogenization_'+'{:02d}'.format(fig_count)+'.png'plt.savefig(filename, dpi=100, bbox_inches='tight' )plt.close()prob_cloudy_bin = [prob_cloudy_given_bin[i]*prob_obs_bin[i]*nb_tot_obs/nb_tot_cloudy for i in range(len(prob_obs_bin))]plt.bar([bins[i] for i in range(len(bins)-1)], [i*nb_tot_obs for i in prob_obs_bin], width=1.01, color="#89bedc")plt.bar([bins[i] for i in range(len(bins)-1)], [i*nb_tot_cloudy for i in prob_cloudy_bin], width=1.01, color="#0b559f")plt.xlim(-90.0,90.0)plt.ylim(0,fig_ylim_max)plt.grid()plt.title('Jamuary 2008 colocated MODIS-2B-CLDCLASS-lidar sample \n (b) after sample homogenization',fontsize=10)plt.xlabel('Latitudes', fontsize=8)plt.ylabel('Counts', fontsize=8)plt.xticks(fontsize=8)plt.yticks(fontsize=8)pop_a = mpatches.Patch(color='#89bedc', label='Nb. of Observations')pop_b = mpatches.Patch(color='#0b559f', label='Nb. of cloudy pixels')plt.legend(handles=[pop_a,pop_b], loc='upper center')fig_count = fig_count + 1filename ='2008_01_sample_homogenization_'+'{:02d}'.format(fig_count)+'.png'plt.savefig(filename, dpi=100, bbox_inches='tight' )plt.close()
Example for a year of data
#!/usr/bin/env pythonfrom scipy.stats.kde import gaussian_kdefrom random import uniformfrom random import randintimport matplotlib.pyplot as pltimport numpy as npimport matplotlib.patches as mpatchesfrom statistics import meanfig_count = 0fig_ylim_max = 20000path_to_media = "/Users/bmarcha1/Desktop/media/files/"#----------------------------------------------------------------------------------------## Sample 1: concatenate monthly sampledata_1 = np.loadtxt(path_to_media+'monthly_modis_cldclass_lidar/2008_01_sample_00_modis_caliop_cldclass_lidar.txt', skiprows=1)data_2 = np.loadtxt(path_to_media+'monthly_modis_cldclass_lidar/2008_02_sample_00_modis_caliop_cldclass_lidar.txt', skiprows=1)data_3 = np.loadtxt(path_to_media+'monthly_modis_cldclass_lidar/2008_03_sample_00_modis_caliop_cldclass_lidar.txt', skiprows=1)data_4 = np.loadtxt(path_to_media+'monthly_modis_cldclass_lidar/2008_04_sample_00_modis_caliop_cldclass_lidar.txt', skiprows=1)data_5 = np.loadtxt(path_to_media+'monthly_modis_cldclass_lidar/2008_05_sample_00_modis_caliop_cldclass_lidar.txt', skiprows=1)data_6 = np.loadtxt(path_to_media+'monthly_modis_cldclass_lidar/2008_06_sample_00_modis_caliop_cldclass_lidar.txt', skiprows=1)data_7 = np.loadtxt(path_to_media+'monthly_modis_cldclass_lidar/2008_07_sample_00_modis_caliop_cldclass_lidar.txt', skiprows=1)data_8 = np.loadtxt(path_to_media+'monthly_modis_cldclass_lidar/2008_08_sample_00_modis_caliop_cldclass_lidar.txt', skiprows=1)data_9 = np.loadtxt(path_to_media+'monthly_modis_cldclass_lidar/2008_09_sample_00_modis_caliop_cldclass_lidar.txt', skiprows=1)data_10 = np.loadtxt(path_to_media+'monthly_modis_cldclass_lidar/2008_10_sample_00_modis_caliop_cldclass_lidar.txt', skiprows=1)data_11 = np.loadtxt(path_to_media+'monthly_modis_cldclass_lidar/2008_11_sample_00_modis_caliop_cldclass_lidar.txt', skiprows=1)data_12 = np.loadtxt(path_to_media+'monthly_modis_cldclass_lidar/2008_12_sample_00_modis_caliop_cldclass_lidar.txt', skiprows=1)data = np.concatenate((data_1,data_2,data_3,data_4,data_5,data_6,data_7,data_8,data_9,data_10,data_11,data_12), axis=0)data = data[~np.all(data == 0, axis=1)]#----------------------------------------------------------------------------------------## Sample 1: plot zonal observationsnb_tot_obs = len(data[:,0])bins = np.linspace(-90, 90, 180)hist_counts_list, hist_x_bins_list, patches = plt.hist(data[:,0], bins=bins, color="blue", alpha=0.4)plt.title('')plt.xlabel('Latitudes')plt.ylabel('Counts')plt.xlim(-90.0,90.0)plt.ylim(0,fig_ylim_max)plt.grid()fig_count = fig_count + 1filename ='2008_sample_homogenization_'+'{:02d}'.format(fig_count)+'.png'plt.savefig(filename, dpi=100, bbox_inches='tight' )#plt.show()plt.close()#----------------------------------------------------------------------------------------## Sample 1: homogeneization (step 1)data_obs = data[~np.all(data == 0, axis=1)]data_cloudy = data_obs[data_obs[:,3] < 2]nb_tot_obs = len(data_obs[:,0])nb_tot_cloudy = len(data_cloudy[:,0])print('nb_tot_obs: ',nb_tot_obs)print('nb_tot_cloudy: ',nb_tot_cloudy)bins = np.linspace(-90, 90, 180)n_obs, bins_obs, patches_obs = plt.hist(data[:,0], bins=bins, color="#89bedc", alpha=1.0)n_cloudy, bins_cloudy, patches_cloudy = plt.hist(data_cloudy[:,0], bins=bins, color="#0b559f", alpha=1.0)plt.xlim(-90.0,90.0)plt.ylim(0,fig_ylim_max)plt.grid()plt.title('(a) before sample homogenization',fontsize=10)plt.xlabel('Latitudes', fontsize=8)plt.ylabel('Counts', fontsize=8)plt.xticks(fontsize=8)plt.yticks(fontsize=8)pop_a = mpatches.Patch(color='#89bedc', label='Nb. of Observations')pop_b = mpatches.Patch(color='#0b559f', label='Nb. of cloudy pixels')plt.legend(handles=[pop_a,pop_b])fig_count = fig_count + 1filename ='2008_sample_homogenization_'+'{:02d}'.format(fig_count)+'.png'plt.savefig(filename, dpi=100, bbox_inches='tight' )#plt.show()plt.close()prob_obs_bin = []prob_cloudy_given_bin = []for i in range(len(n_obs)):if n_obs[i] > 0.0:prob_obs_bin.append(1.0*n_obs[i]/nb_tot_obs)prob_cloudy_given_bin.append(n_cloudy[i]/n_obs[i])else:prob_obs_bin.append(0)prob_cloudy_given_bin.append(0)prob_cloudy_bin = [prob_cloudy_given_bin[i]*prob_obs_bin[i]*nb_tot_obs/nb_tot_cloudy for i in range(len(prob_obs_bin))]#----------------------------------------------------------------------------------------## Sample 1: homogeneization (step 2)plt.bar([bins[i] for i in range(len(bins)-1)], [i*nb_tot_obs for i in prob_obs_bin])plt.xlim(-90.0,90.0)plt.ylim(0,fig_ylim_max)plt.grid()plt.title('Random Sample Day Obs.')plt.xlabel('Latitude')plt.ylabel('Counts')fig_count = fig_count + 1filename ='2008_sample_homogenization_'+'{:02d}'.format(fig_count)+'.png'plt.savefig(filename, dpi=100, bbox_inches='tight' )plt.close()homogenization_factor = 10.0 # ]0,100[l = sorted([i for i in prob_obs_bin if i > 0.0])homogenization_threshold_idx= int( len(l) * homogenization_factor / 100.0 )homogenization_threshold = l[homogenization_threshold_idx]for idx,i in enumerate(prob_obs_bin):if prob_obs_bin[idx] > homogenization_threshold:prob_obs_bin[idx] = homogenization_thresholdplt.bar([bins[i] for i in range(len(bins)-1)], [i*nb_tot_obs for i in prob_obs_bin])plt.title('Random Sample Homogeneization')plt.xlabel('Latitude')plt.ylabel('Counts')plt.xlim(-90.0,90.0)plt.ylim(0,fig_ylim_max)plt.grid()fig_count = fig_count + 1filename ='2008_sample_homogenization_'+'{:02d}'.format(fig_count)+'.png'plt.savefig(filename, dpi=100, bbox_inches='tight' )plt.close()plt.bar([bins[i] for i in range(len(bins)-1)], [i*nb_tot_cloudy for i in prob_cloudy_bin])plt.title('Random Sample Day Cloudy Pixels')plt.xlabel('Latitude')plt.ylabel('Counts')plt.xlim(-90.0,90.0)plt.ylim(0,fig_ylim_max)plt.grid()fig_count = fig_count + 1filename ='2008_sample_homogenization_'+'{:02d}'.format(fig_count)+'.png'plt.savefig(filename, dpi=100, bbox_inches='tight' )plt.close()prob_cloudy_bin = [prob_cloudy_given_bin[i]*prob_obs_bin[i]*nb_tot_obs/nb_tot_cloudy for i in range(len(prob_obs_bin))]plt.bar([bins[i] for i in range(len(bins)-1)], [i*nb_tot_obs for i in prob_obs_bin], width=1.01, color="#89bedc")plt.bar([bins[i] for i in range(len(bins)-1)], [i*nb_tot_cloudy for i in prob_cloudy_bin], width=1.01, color="#0b559f")plt.xlim(-90.0,90.0)plt.ylim(0,fig_ylim_max)plt.grid()plt.title('(b) after sample homogenization',fontsize=10)plt.xlabel('Latitudes', fontsize=8)plt.ylabel('Counts', fontsize=8)plt.xticks(fontsize=8)plt.yticks(fontsize=8)pop_a = mpatches.Patch(color='#89bedc', label='Nb. of Observations')pop_b = mpatches.Patch(color='#0b559f', label='Nb. of cloudy pixels')plt.legend(handles=[pop_a,pop_b], loc='upper center')fig_count = fig_count + 1filename ='2008_sample_homogenization_'+'{:02d}'.format(fig_count)+'.png'plt.savefig(filename, dpi=100, bbox_inches='tight' )plt.close()
