# # # Distribution Statement A. Approved for public release. Distribution unlimited.
# # #
# # # Author:
# # # Naval Research Laboratory, Marine Meteorology Division
# # #
# # # This program is free software: you can redistribute it and/or modify it under
# # # the terms of the NRLMMD License included with this program. This program is
# # # distributed WITHOUT ANY WARRANTY; without even the implied warranty of
# # # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the included license
# # # for more details. If you did not receive the license, for more information see:
# # # https://github.com/U-S-NRL-Marine-Meteorology-Division/
"""Read NASA GPM GMI hdf5 data files.
Read a grannual NASA GPM GMI TBs in h5 format (each grannual file is about
5 minutes GPM GMI data)
Output variables in xarray object for geoips processing system
V0: August 4, 2020
Dataset information::
variables in original TBs structure format
tb_info = { 'S1': { 'tb10v': 0,
'tb10h': 1,
'tb19v': 2,
'tb19h': 3,
'tb23v': 4,
'tb37v': 5,
'tb37h': 6,
'tb89v': 7,
'tb89h': 8,},
'S2': { 'tb166v': 0,
'tb166h': 1,
'tb183_3v': 2,
'tb183_7v': 3}
}
"""
# Python Standard Libraries
from os.path import basename
import h5py
import numpy as np
import logging
LOG = logging.getLogger(__name__)
import matplotlib
matplotlib.use("agg")
import matplotlib.pyplot as plt
interface = "readers"
family = "standard"
name = "gmi_hdf5"
[docs]def read_gmi_file(fname, xarray_gmi):
"""Read a single GMI file fname."""
fileobj = h5py.File(fname, mode="r")
import pandas as pd
import xarray as xr
import numpy
# get the variables ( tbt/lon(nscan,npix), tb(nscan,npix,nChan),....., time(ns))
lon = fileobj["S1"]["Longitude"][()]
lat = fileobj["S1"]["Latitude"][()]
tb = fileobj["S1"]["Tb"][()]
tb_hi = fileobj["S2"]["Tb"][()] # for 166 and 183-7 GHz
# time info for each scan
yy = fileobj["S1"]["ScanTime"]["Year"][()]
mo = fileobj["S1"]["ScanTime"]["Month"][()]
dd = fileobj["S1"]["ScanTime"]["DayOfMonth"][()]
hh = fileobj["S1"]["ScanTime"]["Hour"][()]
mm = fileobj["S1"]["ScanTime"]["Minute"][()]
ss = fileobj["S1"]["ScanTime"]["Second"][()]
# setup time in datetime64 format required by geoips
nscan = lat.shape[0]
npix = lat.shape[1] # 221 pixels per scan
time_scan = np.zeros((nscan, npix))
for i in range(nscan):
time_scan[i:] = "%04d%02d%02d%02d%02d%02d" % (
yy[i],
mo[i],
dd[i],
hh[i],
mm[i],
ss[i],
)
# assignment of TB at each channel
V10 = tb[:, :, 0]
H10 = tb[:, :, 1]
V19 = tb[:, :, 2]
H19 = tb[:, :, 3]
V23 = tb[:, :, 4]
V37 = tb[:, :, 5]
H37 = tb[:, :, 6]
V89 = tb[:, :, 7]
H89 = tb[:, :, 8]
V166 = tb_hi[:, :, 0]
H166 = tb_hi[:, :, 1]
V183_3 = tb_hi[:, :, 2]
V183_7 = tb_hi[:, :, 3]
# close the h5 object
fileobj.close()
# ------ setup xarray variables ------
# namelist_gmi = ['latitude', 'longitude', 'V10', 'H10', 'V19','H19','V23', 'V37', 'H37', 'V89' ,'H89',
# 'V166', 'H166', 'V183-3','V183-7', 'time']
final_xarray = xr.Dataset()
if "latitude" not in xarray_gmi.variables.keys():
# setup GMI xarray
final_xarray["latitude"] = xr.DataArray(lat)
final_xarray["longitude"] = xr.DataArray(lon)
final_xarray["V10"] = xr.DataArray(V10)
final_xarray["H10"] = xr.DataArray(H10)
final_xarray["V19"] = xr.DataArray(V19)
final_xarray["H19"] = xr.DataArray(H19)
final_xarray["V23"] = xr.DataArray(V23)
final_xarray["V37"] = xr.DataArray(V37)
final_xarray["H37"] = xr.DataArray(H37)
final_xarray["V89"] = xr.DataArray(V89)
final_xarray["H89"] = xr.DataArray(H89)
final_xarray["V166"] = xr.DataArray(V166)
final_xarray["H166"] = xr.DataArray(H166)
final_xarray["V183-3"] = xr.DataArray(V183_3)
final_xarray["V183-7"] = xr.DataArray(V183_7)
final_xarray["time"] = xr.DataArray(
pd.DataFrame(time_scan)
.astype(int)
.apply(pd.to_datetime, format="%Y%m%d%H%M%S")
)
else:
final_xarray["latitude"] = xr.DataArray(
numpy.vstack([xarray_gmi["latitude"].to_masked_array(), lat])
)
final_xarray["longitude"] = xr.DataArray(
numpy.vstack([xarray_gmi["longitude"].to_masked_array(), lon])
)
final_xarray["V10"] = xr.DataArray(
numpy.vstack([xarray_gmi["V10"].to_masked_array(), V10])
)
final_xarray["H10"] = xr.DataArray(
numpy.vstack([xarray_gmi["H10"].to_masked_array(), H10])
)
final_xarray["V19"] = xr.DataArray(
numpy.vstack([xarray_gmi["V19"].to_masked_array(), V19])
)
final_xarray["H19"] = xr.DataArray(
numpy.vstack([xarray_gmi["H19"].to_masked_array(), H19])
)
final_xarray["V23"] = xr.DataArray(
numpy.vstack([xarray_gmi["V23"].to_masked_array(), V23])
)
final_xarray["V37"] = xr.DataArray(
numpy.vstack([xarray_gmi["V37"].to_masked_array(), V37])
)
final_xarray["H37"] = xr.DataArray(
numpy.vstack([xarray_gmi["H37"].to_masked_array(), H37])
)
final_xarray["V89"] = xr.DataArray(
numpy.vstack([xarray_gmi["V89"].to_masked_array(), V89])
)
final_xarray["H89"] = xr.DataArray(
numpy.vstack([xarray_gmi["H89"].to_masked_array(), H89])
)
final_xarray["V166"] = xr.DataArray(
numpy.vstack([xarray_gmi["V166"].to_masked_array(), V166])
)
final_xarray["H166"] = xr.DataArray(
numpy.vstack([xarray_gmi["H166"].to_masked_array(), H166])
)
final_xarray["V183-3"] = xr.DataArray(
numpy.vstack([xarray_gmi["V183-3"].to_masked_array(), V183_3])
)
final_xarray["V183-7"] = xr.DataArray(
numpy.vstack([xarray_gmi["V183-7"].to_masked_array(), V183_7])
)
new_time = xr.DataArray(
pd.DataFrame(time_scan)
.astype(int)
.apply(pd.to_datetime, format="%Y%m%d%H%M%S")
)
final_xarray["time"] = xr.DataArray(
numpy.vstack(
[
xarray_gmi["time"].to_masked_array(),
new_time.to_masked_array(),
]
)
)
return final_xarray
[docs]def call(fnames, metadata_only=False, chans=None, area_def=None, self_register=False):
"""Read GMI hdf5 data products.
Parameters
----------
fnames : list
* List of strings, full paths to files
metadata_only : bool, default=False
* NOT YET IMPLEMENTED
* Return before actually reading data if True
chans : list of str, default=None
* NOT YET IMPLEMENTED
* List of desired channels (skip unneeded variables as needed).
* Include all channels if None.
area_def : pyresample.AreaDefinition, default=None
* NOT YET IMPLEMENTED
* Specify region to read
* Read all data if None.
self_register : str or bool, default=False
* NOT YET IMPLEMENTED
* register all data to the specified dataset id (as specified in the
return dictionary keys).
* Read multiple resolutions of data if False.
Returns
-------
dict of xarray.Datasets
* dictionary of xarray.Dataset objects with required Variables and
Attributes.
* Dictionary keys can be any descriptive dataset ids.
See Also
--------
:ref:`xarray_standards`
Additional information regarding required attributes and variables
for GeoIPS-formatted xarray Datasets.
"""
import os
from datetime import datetime
import numpy as np
import xarray as xr
# from IPython import embed as shell
# fname='data_gmi/20200518.203639.gpm.gmi.gpm_pps.x.gmi.TB2016.x.TB2016_1b_v05a.h5'
LOG.info("Reading files %s", fnames)
xarray_gmi = xr.Dataset()
source_file_names = []
for fname in fnames:
source_file_names += [basename(fname)]
xarray_gmi = read_gmi_file(fname, xarray_gmi)
# setup attributors
from geoips.xarray_utils.time import get_datetime_from_datetime64
from geoips.xarray_utils.time import (
get_max_from_xarray_time,
get_min_from_xarray_time,
)
xarray_gmi.attrs["source_file_names"] = sorted(source_file_names)
xarray_gmi.attrs["start_datetime"] = get_min_from_xarray_time(xarray_gmi, "time")
xarray_gmi.attrs["end_datetime"] = get_max_from_xarray_time(xarray_gmi, "time")
xarray_gmi.attrs["source_name"] = "gmi"
xarray_gmi.attrs["platform_name"] = "GPM"
xarray_gmi.attrs["data_provider"] = "NASA"
xarray_gmi.attrs["granule_minutes"] = 5
# MTIFs need to be "prettier" for PMW products, so 2km resolution for final image
xarray_gmi.attrs["sample_distance_km"] = 2
xarray_gmi.attrs["interpolation_radius_of_influence"] = 12500
return {"GMI": xarray_gmi, "METADATA": xarray_gmi[[]]}