Examples¶
Basic Example¶
import smdc_perftests.performance_tests.test_cases as test_cases
import time
import numpy as np
# use measure decorator to run function multiple times
# and measure execution time of each run
# the returned results gets the name given in
# the decorator but can be changed later if necessary
@test_cases.measure('experiment', runs=50)
def experiment(sleeptime=0.01):
time.sleep(sleeptime+np.random.rand(1)*sleeptime)
result1 = experiment()
result2 = experiment(0.05)
result2.name = "sleep 0.05"
result3 = experiment(0.011)
result3.name = "sleep 0.011"
# the results can be printed
print result1
print result3
Results experiment
50 runs
median 0.0158 mean 0.0157 stdev 0.0029
sum 0.7859
95%% confidence interval of the mean
upper 0.0165
|
mean 0.0157
|
lower 0.0149
Results sleep 0.011
50 runs
median 0.0158 mean 0.0163 stdev 0.0034
sum 0.8168
95%% confidence interval of the mean
upper 0.0173
|
mean 0.0163
|
lower 0.0154
# the results can also be compared based on the 95% confidence intervals.
print result1 < result2
print result2 < result1
print result1 < result3
True
False
False
# or then plotted as boxplots
import smdc_perftests.visual as vis
import matplotlib.pyplot as plt
%matplotlib inline
fig, axis = vis.plot_boxplots(result1, result3)
plt.show()
Example with Dataset¶
import smdc_perftests.performance_tests.test_runner as test_runner
import time
import datetime as dt
import numpy as np
# define a fake Dataset class that implements the methods
# get_timeseries, get_avg_image and get_data
class FakeDataset(object):
"""
Fake Dataset that provides routines for reading
time series and images
that do nothing
"""
def __init__(self):
pass
self.ts_read = 0
self.img_read = 0
self.cells_read = 0
def get_timeseries(self, gpi, date_start=None, date_end=None):
time.sleep(0.01*np.random.rand(1))
self.ts_read += 1
return None
def get_avg_image(self, date_start, date_end=None, cell_id=None):
"""
Image readers generally return more than one
variable. This should not matter for these tests.
"""
assert type(date_start) == dt.datetime
self.img_read += 1
time.sleep(0.01*np.random.rand(1))
return None, None, None, None, None
def get_data(self, date_start, date_end, cell_id):
"""
Image readers generally return more than one
variable. This should not matter for these tests.
"""
assert type(date_start) == dt.datetime
assert type(date_end) == dt.datetime
self.cells_read += 1
time.sleep(0.01*np.random.rand(1))
return None, None, None, None, None
fd = FakeDataset()
# setup grid point index list, must come from grid object or
# sciDB
# this test dataset has 10000 gpis of which 1 percent will be read
gpi_list = range(10000)
@test_runner.measure('test_rand_gpi', runs=100)
def test_ts():
test_runner.read_rand_ts_by_gpi_list(fd, gpi_list)
result_ts = test_ts()
print result_ts
Results test_rand_gpi
100 runs
median 0.5642 mean 0.5591 stdev 0.0334
sum 55.9069
95%% confidence interval of the mean
upper 0.5657
|
mean 0.5591
|
lower 0.5524
# setup datetime list
# this test dataset has 10000 days of dates of which 1 percent will be read
date_list = []
for days in range(10000):
date_list.append(dt.datetime(2007, 1, 1) + dt.timedelta(days=days))
@test_runner.measure('test_rand_date', runs=100)
def test_img():
test_runner.read_rand_img_by_date_list(fd, date_list)
result_img = test_img()
print result_img
Results test_rand_date
100 runs
median 0.5530 mean 0.5548 stdev 0.0343
sum 55.4800
95%% confidence interval of the mean
upper 0.5616
|
mean 0.5548
|
lower 0.5480
"""
Read data by cell list using fixed start and end date
1 percent of the cells are read with a minimum of 1 cell.
"""
fd = FakeDataset()
cell_list = range(10000)
@test_runner.measure('test_rand_cells', runs=100)
def test():
test_runner.read_rand_cells_by_cell_list(fd,
dt.datetime(2007, 1, 1), dt.datetime(2008, 1, 1), cell_list)
results_cells = test()
print results_cells
Results test_rand_cells
100 runs
median 0.5510 mean 0.5476 stdev 0.0368
sum 54.7624
95%% confidence interval of the mean
upper 0.5549
|
mean 0.5476
|
lower 0.5403
import smdc_perftests.visual as vis
import matplotlib.pyplot as plt
%matplotlib inline
fig, axis = vis.plot_boxplots(result_ts, result_img, results_cells)
plt.show()
Example of running the test suite and analyzing the results¶
import os
from datetime import datetime
from smdc_perftests.performance_tests import test_scripts
# the test_scripts module contains the function
# run performance tests which runs all the performance tests on a dataset
# in this example we will use the esa cci dataset class
from smdc_perftests.datasets.esa_cci import ESACCI_netcdf
from smdc_perftests import helper
#init the esa cci dataset
fname = os.path.join("/media", "sf_H", "Development", "python",
"workspace",
"SMDC", "SMDC_perftests", "tests", "test_data",
"ESACCI-2Images.nc")
# only read the sm variable for this testrun
ds = ESACCI_netcdf(fname, variables=['sm'])
# get the testname from the filename
testname = os.path.splitext(os.path.split(fname)[1])[0]
# generate a date range list using the helper function
# in this example this does not make a lot of sense
date_range_list = helper.generate_date_list(datetime(2013, 11, 30),
datetime(2013, 12, 1),
n=50)
# set a directory into which to save the results
# in this case the the tests folder in the home directory
res_dir = "/home/pydev/tests/"
# run the performance tests using the grid point indices from
# the dataset grid, the generated date_range_list and gpi read percentage
# of 0.1 percent and only one repeat
test_scripts.run_performance_tests(testname, ds, res_dir,
gpi_list=ds.grid.land_ind,
date_range_list=date_range_list,
gpi_read_perc=0.1,
repeats=1)
reading 245 out of 244243 time series
reading 1 out of 50 dates
reading 1 out of 50 dates
This creates the following files named using the name given to the test and the name of the test function that was run.
!ls /home/pydev/tests
ESACCI-2Images_test-rand-avg-img.nc ESACCI-2Images_test-rand-gpi.nc
ESACCI-2Images_test-rand-daily-img.nc
Visualization of the results¶
%matplotlib inline
import glob
import smdc_perftests.performance_tests.analyze as analyze
# get all the files in the results folder
fs = glob.glob(os.path.join(res_dir, "*.nc"))
df = analyze.prep_results(fs)
# this returnes the mean times at the moment
print df
# and makes a very simple bar plot
ax = analyze.bar_plot(df)
means
ESACCI-2Images_test-rand-avg-img 0.085946
ESACCI-2Images_test-rand-gpi 0.098265
ESACCI-2Images_test-rand-daily-img 0.059122