Source code for smdc_perftests.performance_tests.analyze

# Copyright (c) 2015,Vienna University of Technology,
# Department of Geodesy and Geoinformation
# All rights reserved.

# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED. IN NO EVENT SHALL VIENNA UNIVERSITY OF TECHNOLOGY,
# DEPARTMENT OF GEODESY AND GEOINFORMATION BE LIABLE FOR ANY
# DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

'''
Module for analyzing and the test results
Created on Thu Apr  2 14:30:51 2015

@author: christoph.paulik@geo.tuwien.ac.at
'''
import pandas as pd
import matplotlib.pyplot as plt

import smdc_perftests.performance_tests.test_cases as test_cases

try:
    import seaborn as sns
    seaborn_installed = True
except ImportError:
    seaborn_installed = False
    pass


[docs]def prep_results(results_files, name_fm=None, grouping_f=None):
    """
    Takes a list of results file names and bundles the results into a
    pandas DataFrame

    Parameters
    ----------
    results_files: list
        list of filenames to load
    name_fm: function, optional
        if set a function that gets the name of the results and
        returns a more meaningful name. This is useful if the names of
        the results are very long or verbose.
    grouping_f: function ,optional
        can be used to assign groups according to the name of the
        results. Gets the name and returns a string.

    Returns
    -------
    df : pandas.DataFrame
        Results named and possibly grouped
    """
    if name_fm is None:
        name_fm = lambda x: x
    if grouping_f is None:
        grouping_f = lambda x: 'means'

    d = {}
    names = []

    for fname in results_files:
        res = test_cases.TestResults(fname)
        name = name_fm(res.name)
        if name not in names:
            names.append(name)
        group = grouping_f(res.name)
        if group not in d:
            d[group] = []
        d[group].append(res.mean)

    df = pd.DataFrame(d, index=names)
    return df


[docs]def bar_plot(df, show=True):
    """
    Make a bar plot from the gathered results

    Parameters
    ----------
    df: pandas.DataFrame
        Measured data
    show: boolean
        if set then the plot is shown

    Returns
    -------
    ax: matplotlib.axes
       axes of the plot
    """
    ax = df.plot(kind='bar')
    ax.set_yscale('log')
    if show:
        plt.show()
    return ax


[docs]def esa_cci_name_formatter(n):
    parts = n.split('_')
    chunking = parts[2]
    return chunking.split(',')[0]


[docs]def esa_cci_grouping(n):
    parts = n.split('_')
    rtype = parts[3]
    return '-'.join(rtype.split('-')[-2:])

if __name__ == '__main__':
    import glob
    import os

    path = os.path.join(
        "/media", "sf_D", "SMDC", "performance_tests", "CCI_testdata", "results")
    fs = glob.glob(os.path.join(path, "*.nc"))
    df = prep_results(fs, name_fm=esa_cci_name_formatter,
                      grouping_f=esa_cci_grouping)
    bar_plot(df)