Source code for dlsmicro.backend.io

""" Module for parsing data exported from Zetasizer software"""
import pandas as pd
import numpy as np

# Columns name order for the dlsmicro_export.edf template
default_column_order = ['Record', 'Sample Name', 'Measurement Position',
                        'Correlation Data', 'Correlation Delay Times',
                        'Distribution Fit Data',
                        'Distribution Fit Delay Times',
                        'Cumulants Fit Data', 'Cumulants Fit Delay Times',
                        'Derived Count Rate', 'Measured Intercept',
                        'Measured Baseline']


[docs]def read_zetasizer_csv_to_dict(file_path, row,
                               intensities_rows=None,
                               column_order=default_column_order,
                               use_zetasizer_g1=True):
    """ Read csv file exported from the Zetasizer software to a
    dictionary containing data relevant to DLS microrheology analysis

    Parameters
    ----------
    file_path : str
                Path to the .csv file to be read
    row : int
          Row number (0-indexed) for the measurement record containing the
          correlation data
    intensities_rows : list of int, `optional`
                       List of rows (0-indexed) corresponding to the
                       scattering intensity measurements for the
                       broken-ergodicity
                       correction. If ``None``, it is assumed that the
                       intensity measurements
                       begin after the row number for the correlation data and
                       end at the last
                       row of the .csv file.
    column_order : list of str, `optional`
                   Ordered list names for the columns in the .csv file
                   (depends on your export template). If you use
                   the `dlsmicro_export.edf`, this parameter is not necessary.
                   See ``dlsmicro.io.default_column_order``
    use_zetasizer_g1 : boolean, `optional`
                       If `True`, the estimated intermediate scattering
                       function `g1`, measured
                       baseline, and measured intercept exported by the
                       Zetasizer software are used to calculate the correlation
                       function. This is useful because `g1` is exported at
                       higher numerical
                       precision than the correlation function.
                       The `g1` exported from the
                       Zetasizer is `not` the true `g1` for non-ergodic
                       samples, but this
                       option correctly inverts the formula used by
                        the Zetasizer.

    Returns
    -------
    data_dict : dictionary
                Python dictionary containing the keys below
    'time_lag' : 1d-array
                 Vector of time-lags (in microseconds) at which the
                 correlation function is measured.
    'correlation' : 1d-array
                    Vector of values of the correlation coefficient at the
                    time-lags
                    ``data_dict['time_lag']``
    'point_intensity' : float
                        Scattering intensity at the measurement position where
                        ``data_dict['correlation']`` is collected
    'ensemble_intensity' : 1d-array
    'point_position' : float
                       Meausrement position in the cuvette (in mm) where the
                       data for the
                       correlation function ``data_dict['correlation']``
                       is collected
    'ensemble_positions' : 1-d array
                           Vector of measurement positions in the cuvette
                           corresponding to
                           the scattering intensities in
                           ``data_dict['ensemble_intensity']``

    Notes
    -----
    This function will correctly parse the .csv file generated using the
    dlsmicro_export.edf Zetasizer software template. If you would like to use
    this function for parsing .csv files
    from a different user-generated Zetasizer template, 
    the following parameters `must`
    be exported. In addition,
    a list of the paramters in the order in which they occur in the .csv
    columns must be supplied. Note that
    if generating your own Zetasizer template, you should not use the "include
    headers" option. This does not work
    well for exporting the correlation data.

    'Correlation Data' : Zetasizer exports ss a string of comma separated
                         values, e.g. "1.000, 0.987, 0.921, ..."
    'Correlation Data Delay Times' : Zetasizer exports as a string of comma
                                     separated values, e.g. "0.50, 1.0, 1.5, ..."
    'Derived Count Rate'
    'Measurement Position'

    If ``use_zetsizer_g1==True``, then the export template ``must`` also
    include the following:
    'Distribution Fit Data': Zetasizer exports as a string of comma separated
     values, e.g. "0.50, 1.0, 1.5, ..."
    'Distribution Fit Delay Times': Zetasizer exports as a string of comma
                                    separated values,
                                    e.g. "0.50, 1.0, 1.5, ..."
    'Measured Intercept'
    'Measured Baseline'
    """
    # Read csv to pandas dataframe
    df = pd.read_csv(file_path, header=None, names=column_order)
    # By default, assume that scattering intensity measurements for
    # broken ergodicity correction are in second row until the end of the file
    if intensities_rows is None:
        intensities_rows = range(row + 1, len(df))

    g = np.array(
        [float(i) for i in df.iloc[row]['Correlation Data'].split(',')])
    t = np.array(
        [float(i) for i in df.iloc[row]['Correlation Delay Times'].split(',')])
    # Get the g1 correlation function data from zetasizer, which is
    # more precise than g2
    tfit = np.array(
        [float(i) for i in df.iloc[row]
         ['Distribution Fit Delay Times'].split(',')])
    g1fit = np.array(
        [float(i) for i in df.iloc[row]
         ['Distribution Fit Data'].split(',')])

    B = df.iloc[row]['Measured Baseline']
    # Get scattering intensity for the row of interest and
    # the ensemble
    Ie = df.iloc[intensities_rows]['Derived Count Rate']
    Ip = df.iloc[row]['Derived Count Rate']

    point_pos = df.iloc[row]['Measurement Position']
    epos = df.iloc[intensities_rows]['Measurement Position']
    g = np.copy(g)

    # Replace g with the data obtained from the g1 correlation
    # function where the data exists
    if use_zetasizer_g1:
        gadj = B + g1fit**2.
        tinds = [np.argmin(np.abs(t-ti)) for ti in tfit]
        g[tinds] = gadj

    data_dict = {'time_lag': t, 'correlation': g, 'point_intensity': Ip,
                 'ensemble_intensities': Ie, 'point_position': point_pos,
                 'ensemble_positions': epos}
    return data_dict