""" Module for parsing data exported from Zetasizer software"""
import pandas as pd
import numpy as np
# Columns name order for the dlsmicro_export.edf template
default_column_order = ['Record', 'Sample Name', 'Measurement Position',
'Correlation Data', 'Correlation Delay Times',
'Distribution Fit Data',
'Distribution Fit Delay Times',
'Cumulants Fit Data', 'Cumulants Fit Delay Times',
'Derived Count Rate', 'Measured Intercept',
'Measured Baseline']
[docs]def read_zetasizer_csv_to_dict(file_path, row,
intensities_rows=None,
column_order=default_column_order,
use_zetasizer_g1=True):
""" Read csv file exported from the Zetasizer software to a
dictionary containing data relevant to DLS microrheology analysis
Parameters
----------
file_path : str
Path to the .csv file to be read
row : int
Row number (0-indexed) for the measurement record containing the
correlation data
intensities_rows : list of int, `optional`
List of rows (0-indexed) corresponding to the
scattering intensity measurements for the
broken-ergodicity
correction. If ``None``, it is assumed that the
intensity measurements
begin after the row number for the correlation data and
end at the last
row of the .csv file.
column_order : list of str, `optional`
Ordered list names for the columns in the .csv file
(depends on your export template). If you use
the `dlsmicro_export.edf`, this parameter is not necessary.
See ``dlsmicro.io.default_column_order``
use_zetasizer_g1 : boolean, `optional`
If `True`, the estimated intermediate scattering
function `g1`, measured
baseline, and measured intercept exported by the
Zetasizer software are used to calculate the correlation
function. This is useful because `g1` is exported at
higher numerical
precision than the correlation function.
The `g1` exported from the
Zetasizer is `not` the true `g1` for non-ergodic
samples, but this
option correctly inverts the formula used by
the Zetasizer.
Returns
-------
data_dict : dictionary
Python dictionary containing the keys below
'time_lag' : 1d-array
Vector of time-lags (in microseconds) at which the
correlation function is measured.
'correlation' : 1d-array
Vector of values of the correlation coefficient at the
time-lags
``data_dict['time_lag']``
'point_intensity' : float
Scattering intensity at the measurement position where
``data_dict['correlation']`` is collected
'ensemble_intensity' : 1d-array
'point_position' : float
Meausrement position in the cuvette (in mm) where the
data for the
correlation function ``data_dict['correlation']``
is collected
'ensemble_positions' : 1-d array
Vector of measurement positions in the cuvette
corresponding to
the scattering intensities in
``data_dict['ensemble_intensity']``
Notes
-----
This function will correctly parse the .csv file generated using the
dlsmicro_export.edf Zetasizer software template. If you would like to use
this function for parsing .csv files
from a different user-generated Zetasizer template,
the following parameters `must`
be exported. In addition,
a list of the paramters in the order in which they occur in the .csv
columns must be supplied. Note that
if generating your own Zetasizer template, you should not use the "include
headers" option. This does not work
well for exporting the correlation data.
'Correlation Data' : Zetasizer exports ss a string of comma separated
values, e.g. "1.000, 0.987, 0.921, ..."
'Correlation Data Delay Times' : Zetasizer exports as a string of comma
separated values, e.g. "0.50, 1.0, 1.5, ..."
'Derived Count Rate'
'Measurement Position'
If ``use_zetsizer_g1==True``, then the export template ``must`` also
include the following:
'Distribution Fit Data': Zetasizer exports as a string of comma separated
values, e.g. "0.50, 1.0, 1.5, ..."
'Distribution Fit Delay Times': Zetasizer exports as a string of comma
separated values,
e.g. "0.50, 1.0, 1.5, ..."
'Measured Intercept'
'Measured Baseline'
"""
# Read csv to pandas dataframe
df = pd.read_csv(file_path, header=None, names=column_order)
# By default, assume that scattering intensity measurements for
# broken ergodicity correction are in second row until the end of the file
if intensities_rows is None:
intensities_rows = range(row + 1, len(df))
g = np.array(
[float(i) for i in df.iloc[row]['Correlation Data'].split(',')])
t = np.array(
[float(i) for i in df.iloc[row]['Correlation Delay Times'].split(',')])
# Get the g1 correlation function data from zetasizer, which is
# more precise than g2
tfit = np.array(
[float(i) for i in df.iloc[row]
['Distribution Fit Delay Times'].split(',')])
g1fit = np.array(
[float(i) for i in df.iloc[row]
['Distribution Fit Data'].split(',')])
B = df.iloc[row]['Measured Baseline']
# Get scattering intensity for the row of interest and
# the ensemble
Ie = df.iloc[intensities_rows]['Derived Count Rate']
Ip = df.iloc[row]['Derived Count Rate']
point_pos = df.iloc[row]['Measurement Position']
epos = df.iloc[intensities_rows]['Measurement Position']
g = np.copy(g)
# Replace g with the data obtained from the g1 correlation
# function where the data exists
if use_zetasizer_g1:
gadj = B + g1fit**2.
tinds = [np.argmin(np.abs(t-ti)) for ti in tfit]
g[tinds] = gadj
data_dict = {'time_lag': t, 'correlation': g, 'point_intensity': Ip,
'ensemble_intensities': Ie, 'point_position': point_pos,
'ensemble_positions': epos}
return data_dict