Source code for smdc_perftests.performance_tests.test_scripts

# Copyright (c) 2015,Vienna University of Technology,
# Department of Geodesy and Geoinformation
# All rights reserved.

# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED. IN NO EVENT SHALL VIENNA UNIVERSITY OF TECHNOLOGY,
# DEPARTMENT OF GEODESY AND GEOINFORMATION BE LIABLE FOR ANY
# DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

'''
Module implements the test cases specified in the performance test protocol
Created on Wed Apr  1 10:59:05 2015

@author: christoph.paulik@geo.tuwien.ac.at
'''

import os
import glob
from datetime import datetime

from smdc_perftests.performance_tests import test_cases
from smdc_perftests.datasets import esa_cci
from smdc_perftests.datasets import ascat
from smdc_perftests import helper


[docs]def run_performance_tests(name, dataset, save_dir, gpi_list=None, date_range_list=None, cell_list=None, cell_date_list=None, gpi_read_perc=1.0, date_read_perc=1.0, cell_read_perc=1.0, max_runtime_per_test=None, repeats=1): """ Run a complete test suite on a dataset and store the results in the specified directory Parameters ---------- name: string name of the test run, used for filenaming dataset: dataset instance instance implementing the get_timeseries, get_avg_image and get_data methods. save_dir: string directory to store the test results in gpi_list: list, optional list of possible grid point indices, if given the timeseries reading tests will be run date_range_list: list, optional list of possible dates, if given then the read_avg_image and read_data tests will be run. The format is a list of lists e.g. [[datetime(2007,1,1), datetime(2007,1,1)], #reads one day [datetime(2007,1,1), datetime(2007,12,31)]] # reads one year cell_list: list, optional list of possible cells to read from. if given then the read_data test will be run cell_date_list: list, optional list of time intervals to read per cell. Should be as long as the cell list or longer. gpi_read_perc: float, optional percentage of random selection from gpi_list read for each try date_read_perc: float, optioanl percentage of random selection from date_range_list read for each try cell_read_perc: float, optioanl percentage of random selection from cell_range_list read for each try max_runtime_per_test: float, optional maximum runtime per test in seconds, if given the tests will be aborted after taking more than this time repeats: int, optional number of repeats for each measurement """ timed_dataset = test_cases.SelfTimingDataset(dataset) timed_avg_img_dataset = test_cases.SelfTimingDataset(dataset) if gpi_list is not None: # test reading of time series by grid point/location id test_name = '{}_test-rand-gpi'.format(name) @test_cases.measure(test_name, runs=repeats) def test_rand_gpi(): test_cases.read_rand_ts_by_gpi_list(timed_dataset, gpi_list, read_perc=gpi_read_perc, max_runtime=max_runtime_per_test) results = test_rand_gpi() results.to_nc(os.path.join(save_dir, test_name + ".nc")) detailed_results = test_cases.TestResults( timed_dataset.measurements['get_timeseries'], name=test_name + "_detailed") detailed_results.to_nc( os.path.join(save_dir, test_name + "_detailed.nc")) if date_range_list is not None: # test reading of daily images, only start date is given test_name = '{}_test-rand-daily-img'.format(name) # make date list containing just the start dates for reading images date_list = [] for d1, d2 in date_range_list: date_list.append(d1) @test_cases.measure(test_name, runs=repeats) def test_rand_img(): test_cases.read_rand_img_by_date_list(timed_dataset, date_list, read_perc=date_read_perc, max_runtime=max_runtime_per_test) results = test_rand_img() results.to_nc(os.path.join(save_dir, test_name + ".nc")) detailed_results = test_cases.TestResults( timed_dataset.measurements['get_avg_image'], name=test_name + "_detailed") detailed_results.to_nc( os.path.join(save_dir, test_name + "_detailed.nc")) # test reading of averaged images test_name = '{}_test-rand-avg-img'.format(name) @test_cases.measure(test_name, runs=repeats) def test_avg_img(): test_cases.read_rand_img_by_date_range(timed_avg_img_dataset, date_range_list, read_perc=date_read_perc, max_runtime=max_runtime_per_test) results = test_avg_img() results.to_nc(os.path.join(save_dir, test_name + ".nc")) detailed_results = test_cases.TestResults( timed_avg_img_dataset.measurements['get_avg_image'], name=test_name + "_detailed") detailed_results.to_nc( os.path.join(save_dir, test_name + "_detailed.nc")) if cell_list is not None and cell_date_list is not None: # test reading of complete cells test_name = '{}_test-rand-cells-data'.format(name) @test_cases.measure(test_name, runs=repeats) def test_read_cell_data(): test_cases.read_rand_cells_by_cell_list(timed_dataset, cell_date_list, cell_list, read_perc=cell_read_perc, max_runtime=max_runtime_per_test) results = test_read_cell_data() results.to_nc(os.path.join(save_dir, test_name + ".nc")) detailed_results = test_cases.TestResults( timed_dataset.measurements['get_data'], name=test_name + "_detailed") detailed_results.to_nc( os.path.join(save_dir, test_name + "_detailed.nc"))
[docs]def run_esa_cci_netcdf_tests(test_dir, results_dir, variables=['sm']): """ function for running the ESA CCI netCDF performance tests the tests will be run for all .nc files in the test_dir Parameters ---------- test_dir: string path to the test files results_dir: string path in which the results should be stored variables: list list of variables to read for the tests """ filelist = glob.glob(os.path.join(test_dir, "*.nc")) for filen in filelist: print "testing file", filen dataset = esa_cci.ESACCI_netcdf(filen, variables=variables) # get filename and use as name for test name = os.path.splitext(os.path.split(filen)[1])[0] # generate date list date_range_list = helper.generate_date_list( datetime(1980, 1, 1), datetime(2013, 12, 31), n=10000) run_performance_tests(name, dataset, results_dir, gpi_list=dataset.grid.land_ind, date_range_list=date_range_list, gpi_read_perc=0.1, repeats=1)
[docs]def run_esa_cci_tests(dataset, testname, results_dir, n_dates=10000, date_read_perc=0.1, gpi_read_perc=0.1, repeats=3, cell_read_perc=10.0, max_runtime_per_test=None): """ Runs the ESA CCI tests given a dataset instance Parameters ---------- dataset: Dataset instance Instance of a Dataset class testname: string Name of the test, used for storing the results results_dir: string path where to store the test restults n_dates: int, optional number of dates to generate date_read_perc: float, optioanl percentage of random selection from date_range_list read for each try gpi_read_perc: float, optional percentage of random selection from gpi_list read for each try repeats: int, optional number of repeats of the tests cell_list: list, optional list of possible cells to read from. if given then the read_data test will be run max_runtime_per_test: float, optional maximum runtime per test in seconds, if given the tests will be aborted after taking more than this time """ date_start = datetime(1980, 1, 1) date_end = datetime(2013, 12, 31) date_range_list = helper.generate_date_list(date_start, date_end, n=n_dates) # test 500 "cells" with 500 months cell_list=[0]*500 cell_date_list = helper.generate_date_list(date_start, date_end, n=len(cell_list)) grid = esa_cci.ESACCI_grid() run_performance_tests(name=testname, dataset=dataset, save_dir=results_dir, gpi_list=grid.land_ind, date_range_list=date_range_list, cell_list=cell_list, cell_date_list=cell_date_list, gpi_read_perc=gpi_read_perc, date_read_perc=date_read_perc, cell_read_perc=cell_read_perc, max_runtime_per_test=max_runtime_per_test, repeats=repeats)
[docs]def run_ascat_tests(dataset, testname, results_dir, n_dates=10000, date_read_perc=0.1, gpi_read_perc=0.1, repeats=3, cell_read_perc=10.0, max_runtime_per_test=None): """ Runs the ASCAT tests given a dataset instance Parameters ---------- dataset: Dataset instance Instance of a Dataset class testname: string Name of the test, used for storing the results results_dir: string path where to store the test restults n_dates: int, optional number of dates to generate date_read_perc: float, optioanl percentage of random selection from date_range_list read for each try gpi_read_perc: float, optional percentage of random selection from gpi_list read for each try repeats: int, optional number of repeats of the tests cell_list: list, optional list of possible cells to read from. if given then the read_data test will be run max_runtime_per_test: float, optional maximum runtime per test in seconds, if given the tests will be aborted after taking more than this time """ date_start = datetime(2007, 1, 1) date_end = datetime(2013, 12, 31) date_range_list = helper.generate_date_list(date_start, date_end, n=n_dates) grid = ascat.ASCAT_grid() cell_list=grid.get_cells() cell_date_list=helper.generate_date_list(date_start, date_end, n=len(cell_list)) run_performance_tests(testname, dataset, results_dir, gpi_list=grid.land_ind, date_range_list=date_range_list, date_read_perc=date_read_perc, gpi_read_perc=gpi_read_perc, cell_read_perc=cell_read_perc, repeats=repeats, cell_list=cell_list, cell_date_list=cell_date_list, max_runtime_per_test=max_runtime_per_test)
[docs]def run_equi7_tests(dataset, testname, results_dir, n_dates=10000, date_read_perc=0.1, gpi_read_perc=0.1, repeats=3, cell_read_perc=100.0, max_runtime_per_test=None): """ Runs the ASAR/Sentinel 1 Equi7 tests given a dataset instance Parameters ---------- dataset: Dataset instance Instance of a Dataset class testname: string Name of the test, used for storing the results results_dir: string path where to store the test restults n_dates: int, optional number of dates to generate date_read_perc: float, optioanl percentage of random selection from date_range_list read for each try gpi_read_perc: float, optional percentage of random selection from gpi_list read for each try repeats: int, optional number of repeats of the tests cell_list: list, optional list of possible cells to read from. if given then the read_data test will be run max_runtime_per_test: float, optional maximum runtime per test in seconds, if given the tests will be aborted after taking more than this time """ date_start = datetime(2015, 1, 8) date_end = datetime(2015, 2, 18) date_range_list = helper.generate_date_list(date_start, date_end, n=n_dates, max_spread=5, min_spread=5) gpi_list = range(2880000) cell_list = range(2) * 50 cell_date_list=helper.generate_date_list(date_start, date_end, n=len(cell_list), max_spread=5, min_spread=5) run_performance_tests(testname, dataset, results_dir, gpi_list=gpi_list, date_range_list=date_range_list, date_read_perc=date_read_perc, gpi_read_perc=gpi_read_perc, cell_read_perc=cell_read_perc, repeats=repeats, cell_list=cell_list, cell_date_list=cell_date_list, max_runtime_per_test=max_runtime_per_test)
if __name__ == '__main__': path = os.path.join( "/media", "sf_D", "SMDC", "performance_tests", "CCI_testdata") run_esa_cci_netcdf_tests( os.path.join(path, "compr-4"), os.path.join(path, "results"))