# How to homogenize with the latitude a MODIS-2B-CLDCLASS-lidar sample ?

Published: March 18, 2019

Example of how to homogenize with the latitude a MODIS-2B-CLDCLASS-lidar random sample

### Example for a month of data

P(obs, bin) = \frac{\textrm{n_obs_bin}}{\textrm{n_obs_tot}}

P(cloudy, bin) = \frac{\textrm{n_cloudy_bin}}{\textrm{n_cloudy_tot}}

P(cloudy | bin) = \frac{\textrm{n_cloudy_bin}}{\textrm{n_obs_bin}}

\textrm{n_cloudy_bin} = P(cloudy | bin) * P(obs, bin) * \textrm{n_obs_tot}

P(cloudy, bin) = \frac{ P(cloudy | bin) * P(obs, bin) * \textrm{n_obs_tot} }{ \textrm{n_cloudy_tot} }

\textrm{n_cloudy_bin} = P(cloudy, bin) * \textrm{n_cloudy_tot}

#!/usr/bin/env python

from scipy.stats.kde import gaussian_kde

from random import uniform
from random import randint

import matplotlib.pyplot as plt
import numpy as np

import matplotlib.patches as mpatches

from statistics import mean

fig_count = 0

fig_ylim_max = 2000

path_to_media = "/Users/bmarcha1/Desktop/media/files/"

#----------------------------------------------------------------------------------------#
# Sample 1

input_filename = path_to_media + 'monthly_modis_cldclass_lidar/2008_01_sample_00_modis_caliop_cldclass_lidar.txt'

data = np.loadtxt(input_filename)

data = data[~np.all(data == 0, axis=1)]

nb_tot_obs = len(data[:,0])

#----------------------------------------------------------------------------------------#
# Sample 1: plot zonal observations

bins = np.linspace(-90, 90, 180)

hist_counts_list, hist_x_bins_list, patches = plt.hist(data[:,0], bins=bins, color="blue", alpha=0.4)

plt.xlim(-90.0,90.0)
plt.ylim(0,fig_ylim_max)
plt.grid()

plt.title('zonal observations',fontsize=10)

plt.xlabel('Latitudes',fontsize=8)
plt.ylabel('Counts',fontsize=8)

fig_count = fig_count + 1
filename ='2008_01_sample_homogenization_'+'{:02d}'.format(fig_count)+'.png'
plt.savefig(filename, dpi=100, bbox_inches='tight' )
#plt.show()
plt.close()

#----------------------------------------------------------------------------------------#
# Sample 1: homogeneization (step 1)

data_obs = data[~np.all(data == 0, axis=1)]
data_cloudy = data_obs[data_obs[:,3] < 2]

nb_tot_obs = len(data_obs[:,0])
nb_tot_cloudy = len(data_cloudy[:,0])

print('nb_tot_obs: ',nb_tot_obs)
print('nb_tot_cloudy: ',nb_tot_cloudy)

bins = np.linspace(-90, 90, 180)

n_obs, bins_obs, patches_obs = plt.hist(data[:,0], bins=bins, color="#89bedc", alpha=1.0)
n_cloudy, bins_cloudy, patches_cloudy = plt.hist(data_cloudy[:,0], bins=bins, color="#0b559f", alpha=1.0)

plt.xlim(-90.0,90.0)
plt.ylim(0,fig_ylim_max)
plt.grid()

plt.title('Jamuary 2008 colocated MODIS-2B-CLDCLASS-lidar sample \n (a) before sample homogenization',fontsize=10)

plt.xlabel('Latitudes', fontsize=8)
plt.ylabel('Counts', fontsize=8)

plt.xticks(fontsize=8)
plt.yticks(fontsize=8)

pop_a = mpatches.Patch(color='#89bedc', label='Nb. of Observations')
pop_b = mpatches.Patch(color='#0b559f', label='Nb. of cloudy pixels')

plt.legend(handles=[pop_a,pop_b])

fig_count = fig_count + 1
filename ='2008_01_sample_homogenization_'+'{:02d}'.format(fig_count)+'.png'
plt.savefig(filename, dpi=100, bbox_inches='tight' )
#plt.show()
plt.close()

prob_obs_bin = []
prob_cloudy_given_bin = []

for i in range(len(n_obs)):
    if n_obs[i] > 0.0:
        prob_obs_bin.append(1.0*n_obs[i]/nb_tot_obs)
        prob_cloudy_given_bin.append(n_cloudy[i]/n_obs[i])
    else:
        prob_obs_bin.append(0)
        prob_cloudy_given_bin.append(0)

prob_cloudy_bin = [prob_cloudy_given_bin[i]*prob_obs_bin[i]*nb_tot_obs/nb_tot_cloudy for i in range(len(prob_obs_bin))]

#----------------------------------------------------------------------------------------#
# Sample 1: homogeneization (step 2)

plt.bar([bins[i] for i in range(len(bins)-1)], [i*nb_tot_obs for i in prob_obs_bin], color="royalblue")

plt.xlim(-90.0,90.0)
plt.ylim(0,fig_ylim_max)
plt.grid()

fig_count = fig_count + 1
filename ='2008_01_sample_homogenization_'+'{:02d}'.format(fig_count)+'.png'
plt.savefig(filename, dpi=100, bbox_inches='tight' )
plt.close()

homogenization_factor = 30.0 # ]0,100[
l = sorted([i for i in prob_obs_bin if i > 0.0])

homogenization_threshold_idx= int( len(l) * homogenization_factor / 100.0 )
homogenization_threshold = l[homogenization_threshold_idx]

for idx,i in enumerate(prob_obs_bin):
    if prob_obs_bin[idx] > homogenization_threshold:
        prob_obs_bin[idx] = homogenization_threshold

plt.bar([bins[i] for i in range(len(bins)-1)], [i*nb_tot_obs for i in prob_obs_bin], color="royalblue")

plt.xlim(-90.0,90.0)
plt.ylim(0,fig_ylim_max)
plt.grid()

fig_count = fig_count + 1
filename ='2008_01_sample_homogenization_'+'{:02d}'.format(fig_count)+'.png'
plt.savefig(filename, dpi=100, bbox_inches='tight' )
plt.close()

plt.bar([bins[i] for i in range(len(bins)-1)], [i*nb_tot_cloudy for i in prob_cloudy_bin], color="lightcoral")

plt.xlim(-90.0,90.0)
plt.ylim(0,fig_ylim_max)
plt.grid()

fig_count = fig_count + 1
filename ='2008_01_sample_homogenization_'+'{:02d}'.format(fig_count)+'.png'
plt.savefig(filename, dpi=100, bbox_inches='tight' )
plt.close()

prob_cloudy_bin = [prob_cloudy_given_bin[i]*prob_obs_bin[i]*nb_tot_obs/nb_tot_cloudy for i in range(len(prob_obs_bin))]

plt.bar([bins[i] for i in range(len(bins)-1)], [i*nb_tot_obs for i in prob_obs_bin], width=1.01, color="#89bedc")
plt.bar([bins[i] for i in range(len(bins)-1)], [i*nb_tot_cloudy for i in prob_cloudy_bin], width=1.01, color="#0b559f")

plt.xlim(-90.0,90.0)
plt.ylim(0,fig_ylim_max)
plt.grid()

plt.title('Jamuary 2008 colocated MODIS-2B-CLDCLASS-lidar sample \n (b) after sample homogenization',fontsize=10)

plt.xlabel('Latitudes', fontsize=8)
plt.ylabel('Counts', fontsize=8)

plt.xticks(fontsize=8)
plt.yticks(fontsize=8)

pop_a = mpatches.Patch(color='#89bedc', label='Nb. of Observations')
pop_b = mpatches.Patch(color='#0b559f', label='Nb. of cloudy pixels')

plt.legend(handles=[pop_a,pop_b], loc='upper center')

fig_count = fig_count + 1
filename ='2008_01_sample_homogenization_'+'{:02d}'.format(fig_count)+'.png'
plt.savefig(filename, dpi=100, bbox_inches='tight' )
plt.close()


### Example for a year of data

#!/usr/bin/env python

from scipy.stats.kde import gaussian_kde

from random import uniform
from random import randint

import matplotlib.pyplot as plt
import numpy as np

import matplotlib.patches as mpatches

from statistics import mean

fig_count = 0

fig_ylim_max = 20000

path_to_media = "/Users/bmarcha1/Desktop/media/files/"

#----------------------------------------------------------------------------------------#
# Sample 1: concatenate monthly sample

data_1 = np.loadtxt(path_to_media+'monthly_modis_cldclass_lidar/2008_01_sample_00_modis_caliop_cldclass_lidar.txt', skiprows=1)
data_2 = np.loadtxt(path_to_media+'monthly_modis_cldclass_lidar/2008_02_sample_00_modis_caliop_cldclass_lidar.txt', skiprows=1)
data_3 = np.loadtxt(path_to_media+'monthly_modis_cldclass_lidar/2008_03_sample_00_modis_caliop_cldclass_lidar.txt', skiprows=1)
data_4 = np.loadtxt(path_to_media+'monthly_modis_cldclass_lidar/2008_04_sample_00_modis_caliop_cldclass_lidar.txt', skiprows=1)
data_5 = np.loadtxt(path_to_media+'monthly_modis_cldclass_lidar/2008_05_sample_00_modis_caliop_cldclass_lidar.txt', skiprows=1)
data_6 = np.loadtxt(path_to_media+'monthly_modis_cldclass_lidar/2008_06_sample_00_modis_caliop_cldclass_lidar.txt', skiprows=1)
data_7 = np.loadtxt(path_to_media+'monthly_modis_cldclass_lidar/2008_07_sample_00_modis_caliop_cldclass_lidar.txt', skiprows=1)
data_8 = np.loadtxt(path_to_media+'monthly_modis_cldclass_lidar/2008_08_sample_00_modis_caliop_cldclass_lidar.txt', skiprows=1)
data_9 = np.loadtxt(path_to_media+'monthly_modis_cldclass_lidar/2008_09_sample_00_modis_caliop_cldclass_lidar.txt', skiprows=1)
data_10 = np.loadtxt(path_to_media+'monthly_modis_cldclass_lidar/2008_10_sample_00_modis_caliop_cldclass_lidar.txt', skiprows=1)
data_11 = np.loadtxt(path_to_media+'monthly_modis_cldclass_lidar/2008_11_sample_00_modis_caliop_cldclass_lidar.txt', skiprows=1)
data_12 = np.loadtxt(path_to_media+'monthly_modis_cldclass_lidar/2008_12_sample_00_modis_caliop_cldclass_lidar.txt', skiprows=1)

data = np.concatenate((data_1,data_2,data_3,data_4,data_5,data_6,
data_7,data_8,data_9,data_10,data_11,data_12), axis=0)

data = data[~np.all(data == 0, axis=1)]

#----------------------------------------------------------------------------------------#
# Sample 1: plot zonal observations

nb_tot_obs = len(data[:,0])

bins = np.linspace(-90, 90, 180)

hist_counts_list, hist_x_bins_list, patches = plt.hist(data[:,0], bins=bins, color="blue", alpha=0.4)

plt.title('')
plt.xlabel('Latitudes')
plt.ylabel('Counts')

plt.xlim(-90.0,90.0)
plt.ylim(0,fig_ylim_max)
plt.grid()

fig_count = fig_count + 1
filename ='2008_sample_homogenization_'+'{:02d}'.format(fig_count)+'.png'
plt.savefig(filename, dpi=100, bbox_inches='tight' )
#plt.show()
plt.close()

#----------------------------------------------------------------------------------------#
# Sample 1: homogeneization (step 1)

data_obs = data[~np.all(data == 0, axis=1)]
data_cloudy = data_obs[data_obs[:,3] < 2]

nb_tot_obs = len(data_obs[:,0])
nb_tot_cloudy = len(data_cloudy[:,0])

print('nb_tot_obs: ',nb_tot_obs)
print('nb_tot_cloudy: ',nb_tot_cloudy)

bins = np.linspace(-90, 90, 180)

n_obs, bins_obs, patches_obs = plt.hist(data[:,0], bins=bins, color="#89bedc", alpha=1.0)
n_cloudy, bins_cloudy, patches_cloudy = plt.hist(data_cloudy[:,0], bins=bins, color="#0b559f", alpha=1.0)

plt.xlim(-90.0,90.0)
plt.ylim(0,fig_ylim_max)
plt.grid()

plt.title('(a) before sample homogenization',fontsize=10)

plt.xlabel('Latitudes', fontsize=8)
plt.ylabel('Counts', fontsize=8)

plt.xticks(fontsize=8)
plt.yticks(fontsize=8)

pop_a = mpatches.Patch(color='#89bedc', label='Nb. of Observations')
pop_b = mpatches.Patch(color='#0b559f', label='Nb. of cloudy pixels')

plt.legend(handles=[pop_a,pop_b])

fig_count = fig_count + 1
filename ='2008_sample_homogenization_'+'{:02d}'.format(fig_count)+'.png'
plt.savefig(filename, dpi=100, bbox_inches='tight' )
#plt.show()
plt.close()

prob_obs_bin = []
prob_cloudy_given_bin = []

for i in range(len(n_obs)):
    if n_obs[i] > 0.0:
        prob_obs_bin.append(1.0*n_obs[i]/nb_tot_obs)
        prob_cloudy_given_bin.append(n_cloudy[i]/n_obs[i])
    else:
        prob_obs_bin.append(0)
        prob_cloudy_given_bin.append(0)

prob_cloudy_bin = [prob_cloudy_given_bin[i]*prob_obs_bin[i]*nb_tot_obs/nb_tot_cloudy for i in range(len(prob_obs_bin))]

#----------------------------------------------------------------------------------------#
# Sample 1: homogeneization (step 2)

plt.bar([bins[i] for i in range(len(bins)-1)], [i*nb_tot_obs for i in prob_obs_bin])

plt.xlim(-90.0,90.0)
plt.ylim(0,fig_ylim_max)
plt.grid()

plt.title('Random Sample Day Obs.') 
plt.xlabel('Latitude') 
plt.ylabel('Counts')

fig_count = fig_count + 1
filename ='2008_sample_homogenization_'+'{:02d}'.format(fig_count)+'.png'
plt.savefig(filename, dpi=100, bbox_inches='tight' )
plt.close()

homogenization_factor = 10.0 # ]0,100[
l = sorted([i for i in prob_obs_bin if i > 0.0])

homogenization_threshold_idx= int( len(l) * homogenization_factor / 100.0 )
homogenization_threshold = l[homogenization_threshold_idx]

for idx,i in enumerate(prob_obs_bin):
    if prob_obs_bin[idx] > homogenization_threshold:
        prob_obs_bin[idx] = homogenization_threshold

plt.bar([bins[i] for i in range(len(bins)-1)], [i*nb_tot_obs for i in prob_obs_bin])

plt.title('Random Sample Homogeneization') 
plt.xlabel('Latitude') 
plt.ylabel('Counts')

plt.xlim(-90.0,90.0)
plt.ylim(0,fig_ylim_max)
plt.grid()

fig_count = fig_count + 1
filename ='2008_sample_homogenization_'+'{:02d}'.format(fig_count)+'.png'
plt.savefig(filename, dpi=100, bbox_inches='tight' )
plt.close()

plt.bar([bins[i] for i in range(len(bins)-1)], [i*nb_tot_cloudy for i in prob_cloudy_bin])

plt.title('Random Sample Day Cloudy Pixels') 
plt.xlabel('Latitude') 
plt.ylabel('Counts')

plt.xlim(-90.0,90.0)
plt.ylim(0,fig_ylim_max)
plt.grid()

fig_count = fig_count + 1
filename ='2008_sample_homogenization_'+'{:02d}'.format(fig_count)+'.png'
plt.savefig(filename, dpi=100, bbox_inches='tight' )
plt.close()

prob_cloudy_bin = [prob_cloudy_given_bin[i]*prob_obs_bin[i]*nb_tot_obs/nb_tot_cloudy for i in range(len(prob_obs_bin))]

plt.bar([bins[i] for i in range(len(bins)-1)], [i*nb_tot_obs for i in prob_obs_bin], width=1.01, color="#89bedc")
plt.bar([bins[i] for i in range(len(bins)-1)], [i*nb_tot_cloudy for i in prob_cloudy_bin], width=1.01, color="#0b559f")

plt.xlim(-90.0,90.0)
plt.ylim(0,fig_ylim_max)
plt.grid()

plt.title('(b) after sample homogenization',fontsize=10)

plt.xlabel('Latitudes', fontsize=8)
plt.ylabel('Counts', fontsize=8)

plt.xticks(fontsize=8)
plt.yticks(fontsize=8)

pop_a = mpatches.Patch(color='#89bedc', label='Nb. of Observations')
pop_b = mpatches.Patch(color='#0b559f', label='Nb. of cloudy pixels')

plt.legend(handles=[pop_a,pop_b], loc='upper center')

fig_count = fig_count + 1
filename ='2008_sample_homogenization_'+'{:02d}'.format(fig_count)+'.png'
plt.savefig(filename, dpi=100, bbox_inches='tight' )
plt.close()