How to read CloudSat 2B GEOPROF GRANULE HDF4 file using python and pyhdf ?

Published: December 28, 2017

DMCA.com Protection Status

Simple example about how to read a CloudSat 2B GEOPROF GRANULE HDF4 file using python 3 and the pyhdf library (Note: to install python and pyhdf see the following article).

For example, let's consider the following CloudSat HDF file :

"2008025010959_09272_CS_2B-GEOPROF_GRANULE_P_R04_E02.hdf":

Import python libraries

First import all python libraries needed to read the file

from pyhdf.SD import SD, SDC 
from pyhdf.HDF import *
from pyhdf.VS import *

import pprint

file_path = './inputs/cloudsat/'
file_name = '2008025010959_09272_CS_2B-GEOPROF_GRANULE_P_R04_E02.hdf'

Read vdata

Retrieve information about all data stored using HDF vdata model:

f = HDF(file_path+file_name, SDC.READ) 
vs = f.vstart()

data_info_list = vs.vdatainfo()

pprint.pprint( data_info_list )

vs.end() # terminate the vdata interface
f.close()

returns

[('Profile_time', '', 7, 37082, 1, 5, 4, 1962, 0),
 ('UTC_start', '', 8, 1, 1, 5, 4, 1962, 0),
 ('TAI_start', '', 9, 1, 1, 5, 8, 1962, 0),
 ('Latitude', '', 10, 37082, 1, 5, 4, 1962, 0),
 ('Longitude', '', 11, 37082, 1, 5, 4, 1962, 0),
 ('Range_to_intercept', '', 13, 37082, 1, 5, 4, 1962, 0),
 ('DEM_elevation', '', 14, 37082, 1, 7, 2, 1962, 0),
 ('Vertical_binsize', '', 15, 1, 1, 5, 4, 1962, 0),
 ('Pitch_offset', '', 16, 1, 1, 5, 4, 1962, 0),
 ('Roll_offset', '', 17, 1, 1, 5, 4, 1962, 0),
 ('Data_quality', '', 18, 37082, 1, 5, 1, 1962, 0),
 ('Data_status', '', 19, 37082, 1, 5, 1, 1962, 0),
 ('Data_targetID', '', 20, 37082, 1, 5, 1, 1962, 0),
 ('SurfaceHeightBin', '', 21, 37082, 1, 6, 1, 1962, 0),
 ('SurfaceHeightBin_fraction', '', 22, 37082, 1, 5, 4, 1962, 0),
 ('Sigma-Zero', '', 26, 37082, 1, 7, 2, 1962, 0),
 ('MODIS_cloud_flag', '', 27, 37082, 1, 7, 1, 1962, 0),
 ('MODIS_Cloud_Fraction', '', 28, 37082, 1, 6, 1, 1962, 0),
 ('MODIS_scene_char', '', 29, 37082, 1, 6, 1, 1962, 0),
 ('MODIS_scene_var', '', 30, 37082, 1, 6, 1, 1962, 0),
 ('CPR_Echo_Top', '', 31, 37082, 1, 6, 1, 1962, 0),
 ('sem_NoiseFloor', '', 32, 37082, 1, 5, 4, 1962, 0),
 ('sem_NoiseFloorVar', '', 33, 37082, 1, 5, 4, 1962, 0),
 ('sem_NoiseGate', '', 34, 37082, 1, 5, 1, 1962, 0),
 ('Navigation_land_sea_flag', '', 35, 37082, 1, 5, 1, 1962, 0),
 ('Clutter_reduction_flag', '', 36, 37082, 1, 3, 1, 1962, 0),
 ('nray:2B-GEOPROF', 'DimVal0.1', 393, 1, 1, 0, 4, 1962, 0),
 ('nbin:2B-GEOPROF', 'DimVal0.1', 395, 1, 1, 0, 4, 1962, 0)]

Read latitudes and longitudes

Now how to read an exemple of how to read the latitude and longitude:

f = HDF(file_path+file_name, SDC.READ) 
vs = f.vstart()

vdata_lat = vs.attach('Latitude')
vdata_long = vs.attach('Longitude')

lat = vdata_lat[:]
long = vdata_long[:]

print('Nb pixels: ', len(lat))
print('Lat min, Lat max: ',min(lat),max(lat))

for i in range(15): # sample
        print(lat[i])

vdata_lat.detach() # "close" the vdata
vdata_long.detach() # "close" the vdata
vs.end() # terminate the vdata interface
f.close()

returns:

Nb pixels:  37082
Lat min, Lat max:  [-81.82308197021484] [81.82254791259766]
[-0.002585692098364234]
[-0.012253849767148495]
[-0.021921850740909576]
[-0.031589850783348083]
[-0.04125785082578659]
[-0.0509258508682251]
[-0.060593850910663605]
[-0.07026185095310211]
[-0.07992985099554062]
[-0.08959785103797913]
[-0.09926585108041763]
[-0.10893385112285614]
[-0.11860185116529465]
[-0.12826985120773315]
[-0.13793784379959106]

Note: to get vdata latitude info, just add the following line:

print( vdata_lat.attrinfo() )

returns

{'factor': (5, 1, 1.0, 4),
 'long_name': (4, 19, 'Spacecraft Latitude', 19),
 'offset': (5, 1, 0.0, 4),
 'units': (4, 7, 'degrees', 7),
 'valid_range': (5, 2, [-90.0, 90.0], 8)}

Read SDS data

file = SD(file_path+file_name, SDC.READ)
pprint.pprint( file.info() )  # number of sds and metadata

returns

(4, 12)

meaning that the hdf file contains 4 sds. Get and print all sds names:

datasets_dic = file.datasets()

sds_dic = {}
for key, value in datasets_dic.items():
    sds_dic[value[3]] = key

pprint.pprint( sds_dic )

returns

{0: 'Height',
 1: 'CPR_Cloud_mask',
 2: 'Gaseous_Attenuation',
 3: 'Radar_Reflectivity'}

Read CPR Cloud Mask

Exemple how to get CPR_Cloud_mask data and attributes:

sds_obj = file.select('CPR_Cloud_mask') # select sds

data = sds_obj.get()

sds_info = sds_obj.info()

print( sds_info )
pprint.pprint( sds_obj.attributes() )

print( data.shape )

returns

('CPR_Cloud_mask', 2, [37082, 125], 20, 7)

{'_FillValue': -99,
 'factor': 1.0,
 'long_name': 'CPR Cloud Mask',
 'missing': -9,
'missop': '==',
 'offset': 0.0,
 'valid_range': [0, 40]}

(37082, 125)

Code

#!/usr/bin/env python

from pyhdf.SD import SD, SDC 
from pyhdf.HDF import *
from pyhdf.VS import *

import pprint

#----------------------------------------------------------------------------------------#

file_path = './inputs/cloudsat/'
file_name = '2008025010959_09272_CS_2B-GEOPROF_GRANULE_P_R04_E02.hdf'

#----------------------------------------------------------------------------------------#
# Get vdata info

f = HDF(file_path+file_name, SDC.READ) 
vs = f.vstart()

data_info_list = vs.vdatainfo()

pprint.pprint( data_info_list )

vs.end() # terminate the vdata interface
f.close()

#----------------------------------------------------------------------------------------#
# Get latitude & longitude vdata

f = HDF(file_path+file_name, SDC.READ) 
vs = f.vstart()

vdata_lat = vs.attach('Latitude')
vdata_long = vs.attach('Longitude')

lat = vdata_lat[:]
long = vdata_long[:]

print('Nb pixels: ', len(lat))
print('Lat min, Lat max: ',min(lat),max(lat))

pprint.pprint( vdata_lat.attrinfo() )

for i in range(15): # sample
    print(lat[i])

vdata_lat.detach() # "close" the vdata
vdata_long.detach() # "close" the vdata
vs.end() # terminate the vdata interface
f.close()

#----------------------------------------------------------------------------------------#
# Get SDS info

file = SD(file_path+file_name, SDC.READ)

pprint.pprint( file.info() )  # number of sds and metadata

#----------------------------------------------------------------------------------------#
# get all sds names

datasets_dic = file.datasets()

#for idx,sds in enumerate(datasets_dic.keys()):
#   print idx,sds

sds_dic = {}
for key, value in datasets_dic.items():
    #print key, value, value[3]
    sds_dic[value[3]] = key

pprint.pprint( sds_dic )

#----------------------------------------------------------------------------------------#
# get CPR_Cloud_mask

sds_obj = file.select('CPR_Cloud_mask') # select sds

data = sds_obj.get()

sds_info = sds_obj.info()

print( sds_info )

pprint.pprint( sds_obj.attributes() )

    print( data.shape )

References

Liens Site
VS (Vdata table) API (pyhdf.VS) github
pyhdf.VS sourceforge
CloudSat Overview cloudsat.atmos.colostate.edu
Data Products cloudsat.atmos.colostate.edu
2B-GEOPROF-LIDAR cloudsat.atmos.colostate.edu
MODIS-AUX cloudsat.atmos.colostate.edu