Source code for smdc_perftests.datasets.ascat

# Copyright (c) 2015,Vienna University of Technology,
# Department of Geodesy and Geoinformation
# All rights reserved.

# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED. IN NO EVENT SHALL VIENNA UNIVERSITY OF TECHNOLOGY,
# DEPARTMENT OF GEODESY AND GEOINFORMATION BE LIABLE FOR ANY
# DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

'''
Module contains the Classes for reading ASCAT data
Created on Fri Mar 27 15:12:18 2015

@author: Christoph.Paulik@geo.tuwien.ac.at
'''

import netCDF4 as nc
import numpy as np
import pandas as pd
import os
import pygeogrids.grids as grids
from datetime import timedelta


class ASCAT_grid(grids.CellGrid):

    """
    ASCAT grid class

    Attributes
    ----------
    land_ind: numpy.ndarray
        indices of the land points
    """

    def __init__(self, lsmaskfile=None):
        if lsmaskfile is None:
            lsmaskfile = os.path.join(os.path.dirname(__file__), "..", "bin",
                                      "ascat",
                                      "TUW_WARP5_grid_info_2_1.nc")
        with nc.Dataset(lsmaskfile) as ls:
            land = ls.variables['land_flag'][:]
            valid_points = np.where(land == 1)[0]

            # read whole grid information because this is faster than reading
            # only the valid points
            lon = ls.variables['lon'][:]
            lat = ls.variables['lat'][:]
            gpis = ls.variables['gpi'][:]
            cells = ls.variables['cell'][:]
            self.land_ind = gpis[valid_points]

        super(ASCAT_grid, self).__init__(lon[valid_points], lat[valid_points],
                                         cells[valid_points],
                                         gpis=gpis[valid_points])


[docs]class ASCAT_netcdf(object): """ Class for reading ASCAT data from netCDF files Caches the following: - time variable - keeps the dataset open as long as the instance exists """ def __init__(self, fname, variables=None, avg_var=None, time_var='time', gpi_var='gpis_correct', cell_var='cells_correct', get_exact_time=False): """ Parameters ---------- self: type description fname: string filename variables: list, optional if given only these variables will be read avg_var: list, optional list of variables for which to calculate the average if not given it is calculated for all variables time_var: string, optional name of the time variable in the netCDF file gpi_var: string, optional name of the gpi variable in the netCDF file cell_var: string, optional name of the cell variable in the netCDF file get_exact_time: boolean, optional for time series deliver the exact time and not the one rounded to the next hour. """ self.fname = fname self.ds = nc.Dataset(fname) self.gpi_var = gpi_var self.cell_var = cell_var self.time_var = time_var self.avg_var = avg_var self.get_exact_time = get_exact_time if variables is None: self.variables = self.ds.variables.keys() # exclude time, lat and lon from variable list self.variables.remove(self.time_var) self.variables.remove(self.gpi_var) self.variables.remove(self.cell_var) # remove old variables that were added by mistake self.variables.remove('cells') self.variables.remove('orig_gpis') self.variables.remove('row_size') self.variables.remove('exact_time') self.variables.remove('gpis') else: self.variables = variables # make sure ssf is always there for masking if self.avg_var is not None: if 'ssf' not in self.variables: self.variables.append('ssf') self.gpis = self.ds.variables[self.gpi_var][:] self.orig_gpis = self.ds.variables['orig_gpis'][:] self.row_size = self.ds.variables['row_size'][:] self.exact_time = self.ds.variables['exact_time'] self.cells = self.ds.variables[self.cell_var][:] self.times = nc.num2date(self.ds.variables[self.time_var][:], units=self.ds.variables[self.time_var].units) self._init_grid() def _init_grid(self): """ initialize the grid of the dataset """ self.grid = ASCAT_grid()
[docs] def get_timeseries(self, locationid, date_start=None, date_end=None): """ Parameters ---------- locationid: int location id as lat_index * row_length + lon_index date_start: datetime, optional start date of the time series date_end: datetime, optional end date of the time series Returns ------- ts : dict """ start_index, end_index = None, None if date_start is not None: start_index = nc.netcdftime.date2index(date_start, self.ds.variables[self.time_var]) if date_end is not None: end_index = nc.netcdftime.date2index(date_end, self.ds.variables[self.time_var]) date_slice = slice(start_index, end_index, None) # get position in netCDF from location id pos = np.where(self.gpis == locationid)[0][0] ts = {} for v in self.variables: ts[v] = self.ds.variables[v][date_slice, pos] ds = pd.DataFrame(ts, index=self.times) if self.get_exact_time: # read exact time values for gpi and ds = ds.dropna(how='all') pos = np.where(self.orig_gpis == locationid)[0][0] start_etime = np.sum(self.row_size[:pos]) end_etime = np.sum(self.row_size[:pos + 1]) exact_time = nc.num2date(self.exact_time[start_etime:end_etime], self.exact_time.units) et = pd.DataFrame({'date': exact_time}, index=exact_time) et = et.resample("H", how='first') ds = ds.join(et).dropna().set_index('date') return ds
[docs] def get_avg_image(self, date_start, date_end=None, cellID=None): """ Reads image from dataset, takes the average if more than one value is in the result array. Parameters ---------- date_start: datetime start date of the image to get. If only one date is given then the whole day of this date is read date_end: datetime, optional end date of the averaged image to get cellID: int, optional cell id to which the image should be limited, for ESA CCI this is not defined at the moment. """ if date_end is None: date_end = date_start + timedelta(days=1) img = self.get_data(date_start, date_end, cellID=cellID) # calculate average for v in img: if self.avg_var is not None: if v in self.avg_var: img[v][img['ssf'] != 1] = np.nan img[v] = np.nanmean(img[v], axis=0) return img
[docs] def get_data(self, date_start, date_end, cellID=None): """ Reads date cube from dataset Parameters ---------- date_start: datetime start date of the image to get. If only one date is given then the whole day of this date is read date_end: datetime end date of the averaged image to get cellID: int cell id to which the image should be limited, for ESA CCI this is not defined at the moment. """ start_index = nc.netcdftime.date2index(date_start, self.ds.variables[self.time_var]) end_index = nc.netcdftime.date2index(date_end, self.ds.variables[self.time_var]) date_slice = slice(start_index, end_index + 1, None) gpi_slice = slice(None, None, None) if cellID is not None: cell_pos = np.where(self.cells == cellID)[0] gpi_slice = slice(cell_pos[0], cell_pos[-1] + 1, None) img = {} for v in self.variables: if v in ['ssm', 'ssm_noise']: img[v] = self.ds.variables[v][ date_slice, gpi_slice].astype(np.float) else: img[v] = self.ds.variables[v][date_slice, gpi_slice] return img