Source code for smdc_perftests.performance_tests.test_scripts
# Copyright (c) 2015,Vienna University of Technology,
# Department of Geodesy and Geoinformation
# All rights reserved.
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED. IN NO EVENT SHALL VIENNA UNIVERSITY OF TECHNOLOGY,
# DEPARTMENT OF GEODESY AND GEOINFORMATION BE LIABLE FOR ANY
# DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
'''
Module implements the test cases specified in the performance test protocol
Created on Wed Apr 1 10:59:05 2015
@author: christoph.paulik@geo.tuwien.ac.at
'''
import os
import glob
from datetime import datetime
from smdc_perftests.performance_tests import test_cases
from smdc_perftests.datasets import esa_cci
from smdc_perftests.datasets import ascat
from smdc_perftests import helper
[docs]def run_performance_tests(name, dataset, save_dir,
gpi_list=None,
date_range_list=None,
cell_list=None,
cell_date_list=None,
gpi_read_perc=1.0,
date_read_perc=1.0,
cell_read_perc=1.0,
max_runtime_per_test=None,
repeats=1):
"""
Run a complete test suite on a dataset and store the results
in the specified directory
Parameters
----------
name: string
name of the test run, used for filenaming
dataset: dataset instance
instance implementing the get_timeseries,
get_avg_image and get_data methods.
save_dir: string
directory to store the test results in
gpi_list: list, optional
list of possible grid point indices, if given the
timeseries reading tests will be run
date_range_list: list, optional
list of possible dates, if given then the read_avg_image
and read_data tests will be run.
The format is a list of lists e.g.
[[datetime(2007,1,1), datetime(2007,1,1)], #reads one day
[datetime(2007,1,1), datetime(2007,12,31)]] # reads one year
cell_list: list, optional
list of possible cells to read from. if given then the read_data
test will be run
cell_date_list: list, optional
list of time intervals to read per cell. Should be as long as the
cell list or longer.
gpi_read_perc: float, optional
percentage of random selection from gpi_list read for each try
date_read_perc: float, optioanl
percentage of random selection from date_range_list read for each try
cell_read_perc: float, optioanl
percentage of random selection from cell_range_list read for each try
max_runtime_per_test: float, optional
maximum runtime per test in seconds, if given the tests will be aborted
after taking more than this time
repeats: int, optional
number of repeats for each measurement
"""
timed_dataset = test_cases.SelfTimingDataset(dataset)
timed_avg_img_dataset = test_cases.SelfTimingDataset(dataset)
if gpi_list is not None:
# test reading of time series by grid point/location id
test_name = '{}_test-rand-gpi'.format(name)
@test_cases.measure(test_name, runs=repeats)
def test_rand_gpi():
test_cases.read_rand_ts_by_gpi_list(timed_dataset, gpi_list,
read_perc=gpi_read_perc,
max_runtime=max_runtime_per_test)
results = test_rand_gpi()
results.to_nc(os.path.join(save_dir, test_name + ".nc"))
detailed_results = test_cases.TestResults(
timed_dataset.measurements['get_timeseries'],
name=test_name + "_detailed")
detailed_results.to_nc(
os.path.join(save_dir, test_name + "_detailed.nc"))
if date_range_list is not None:
# test reading of daily images, only start date is given
test_name = '{}_test-rand-daily-img'.format(name)
# make date list containing just the start dates for reading images
date_list = []
for d1, d2 in date_range_list:
date_list.append(d1)
@test_cases.measure(test_name, runs=repeats)
def test_rand_img():
test_cases.read_rand_img_by_date_list(timed_dataset, date_list,
read_perc=date_read_perc,
max_runtime=max_runtime_per_test)
results = test_rand_img()
results.to_nc(os.path.join(save_dir, test_name + ".nc"))
detailed_results = test_cases.TestResults(
timed_dataset.measurements['get_avg_image'],
name=test_name + "_detailed")
detailed_results.to_nc(
os.path.join(save_dir, test_name + "_detailed.nc"))
# test reading of averaged images
test_name = '{}_test-rand-avg-img'.format(name)
@test_cases.measure(test_name, runs=repeats)
def test_avg_img():
test_cases.read_rand_img_by_date_range(timed_avg_img_dataset, date_range_list,
read_perc=date_read_perc,
max_runtime=max_runtime_per_test)
results = test_avg_img()
results.to_nc(os.path.join(save_dir, test_name + ".nc"))
detailed_results = test_cases.TestResults(
timed_avg_img_dataset.measurements['get_avg_image'],
name=test_name + "_detailed")
detailed_results.to_nc(
os.path.join(save_dir, test_name + "_detailed.nc"))
if cell_list is not None and cell_date_list is not None:
# test reading of complete cells
test_name = '{}_test-rand-cells-data'.format(name)
@test_cases.measure(test_name, runs=repeats)
def test_read_cell_data():
test_cases.read_rand_cells_by_cell_list(timed_dataset, cell_date_list, cell_list,
read_perc=cell_read_perc,
max_runtime=max_runtime_per_test)
results = test_read_cell_data()
results.to_nc(os.path.join(save_dir, test_name + ".nc"))
detailed_results = test_cases.TestResults(
timed_dataset.measurements['get_data'],
name=test_name + "_detailed")
detailed_results.to_nc(
os.path.join(save_dir, test_name + "_detailed.nc"))
[docs]def run_esa_cci_netcdf_tests(test_dir, results_dir, variables=['sm']):
"""
function for running the ESA CCI netCDF performance tests
the tests will be run for all .nc files in the test_dir
Parameters
----------
test_dir: string
path to the test files
results_dir: string
path in which the results should be stored
variables: list
list of variables to read for the tests
"""
filelist = glob.glob(os.path.join(test_dir, "*.nc"))
for filen in filelist:
print "testing file", filen
dataset = esa_cci.ESACCI_netcdf(filen, variables=variables)
# get filename and use as name for test
name = os.path.splitext(os.path.split(filen)[1])[0]
# generate date list
date_range_list = helper.generate_date_list(
datetime(1980, 1, 1), datetime(2013, 12, 31), n=10000)
run_performance_tests(name, dataset, results_dir,
gpi_list=dataset.grid.land_ind,
date_range_list=date_range_list,
gpi_read_perc=0.1, repeats=1)
[docs]def run_esa_cci_tests(dataset, testname, results_dir, n_dates=10000,
date_read_perc=0.1, gpi_read_perc=0.1,
repeats=3, cell_read_perc=10.0,
max_runtime_per_test=None):
"""
Runs the ESA CCI tests given a dataset instance
Parameters
----------
dataset: Dataset instance
Instance of a Dataset class
testname: string
Name of the test, used for storing the results
results_dir: string
path where to store the test restults
n_dates: int, optional
number of dates to generate
date_read_perc: float, optioanl
percentage of random selection from date_range_list read for each try
gpi_read_perc: float, optional
percentage of random selection from gpi_list read for each try
repeats: int, optional
number of repeats of the tests
cell_list: list, optional
list of possible cells to read from. if given then the read_data
test will be run
max_runtime_per_test: float, optional
maximum runtime per test in seconds, if given the tests will be aborted
after taking more than this time
"""
date_start = datetime(1980, 1, 1)
date_end = datetime(2013, 12, 31)
date_range_list = helper.generate_date_list(date_start, date_end, n=n_dates)
# test 500 "cells" with 500 months
cell_list=[0]*500
cell_date_list = helper.generate_date_list(date_start, date_end, n=len(cell_list))
grid = esa_cci.ESACCI_grid()
run_performance_tests(name=testname, dataset=dataset, save_dir=results_dir,
gpi_list=grid.land_ind,
date_range_list=date_range_list,
cell_list=cell_list,
cell_date_list=cell_date_list,
gpi_read_perc=gpi_read_perc,
date_read_perc=date_read_perc,
cell_read_perc=cell_read_perc,
max_runtime_per_test=max_runtime_per_test,
repeats=repeats)
[docs]def run_ascat_tests(dataset, testname, results_dir, n_dates=10000,
date_read_perc=0.1, gpi_read_perc=0.1, repeats=3,
cell_read_perc=10.0,
max_runtime_per_test=None):
"""
Runs the ASCAT tests given a dataset instance
Parameters
----------
dataset: Dataset instance
Instance of a Dataset class
testname: string
Name of the test, used for storing the results
results_dir: string
path where to store the test restults
n_dates: int, optional
number of dates to generate
date_read_perc: float, optioanl
percentage of random selection from date_range_list read for each try
gpi_read_perc: float, optional
percentage of random selection from gpi_list read for each try
repeats: int, optional
number of repeats of the tests
cell_list: list, optional
list of possible cells to read from. if given then the read_data
test will be run
max_runtime_per_test: float, optional
maximum runtime per test in seconds, if given the tests will be aborted
after taking more than this time
"""
date_start = datetime(2007, 1, 1)
date_end = datetime(2013, 12, 31)
date_range_list = helper.generate_date_list(date_start, date_end, n=n_dates)
grid = ascat.ASCAT_grid()
cell_list=grid.get_cells()
cell_date_list=helper.generate_date_list(date_start, date_end, n=len(cell_list))
run_performance_tests(testname, dataset, results_dir,
gpi_list=grid.land_ind,
date_range_list=date_range_list,
date_read_perc=date_read_perc,
gpi_read_perc=gpi_read_perc,
cell_read_perc=cell_read_perc,
repeats=repeats,
cell_list=cell_list,
cell_date_list=cell_date_list,
max_runtime_per_test=max_runtime_per_test)
[docs]def run_equi7_tests(dataset, testname, results_dir, n_dates=10000,
date_read_perc=0.1, gpi_read_perc=0.1, repeats=3,
cell_read_perc=100.0,
max_runtime_per_test=None):
"""
Runs the ASAR/Sentinel 1 Equi7 tests given a dataset instance
Parameters
----------
dataset: Dataset instance
Instance of a Dataset class
testname: string
Name of the test, used for storing the results
results_dir: string
path where to store the test restults
n_dates: int, optional
number of dates to generate
date_read_perc: float, optioanl
percentage of random selection from date_range_list read for each try
gpi_read_perc: float, optional
percentage of random selection from gpi_list read for each try
repeats: int, optional
number of repeats of the tests
cell_list: list, optional
list of possible cells to read from. if given then the read_data
test will be run
max_runtime_per_test: float, optional
maximum runtime per test in seconds, if given the tests will be aborted
after taking more than this time
"""
date_start = datetime(2015, 1, 8)
date_end = datetime(2015, 2, 18)
date_range_list = helper.generate_date_list(date_start, date_end, n=n_dates,
max_spread=5, min_spread=5)
gpi_list = range(2880000)
cell_list = range(2) * 50
cell_date_list=helper.generate_date_list(date_start, date_end, n=len(cell_list),
max_spread=5, min_spread=5)
run_performance_tests(testname, dataset, results_dir,
gpi_list=gpi_list,
date_range_list=date_range_list,
date_read_perc=date_read_perc,
gpi_read_perc=gpi_read_perc,
cell_read_perc=cell_read_perc,
repeats=repeats,
cell_list=cell_list,
cell_date_list=cell_date_list,
max_runtime_per_test=max_runtime_per_test)
if __name__ == '__main__':
path = os.path.join(
"/media", "sf_D", "SMDC", "performance_tests", "CCI_testdata")
run_esa_cci_netcdf_tests(
os.path.join(path, "compr-4"), os.path.join(path, "results"))