# -*- coding: utf-8 -*-
# Copyright (c) 2025 University Medical Center Göttingen, Germany.
# All rights reserved.
#
# Patent Pending: DE 10 2024 112 939.5
# SPDX-License-Identifier: LicenseRef-Proprietary-See-LICENSE
#
# This software is licensed under a custom license. See the LICENSE file
# in the root directory for full details.
#
# **Commercial use is prohibited without a separate license.**
# Contact MBM ScienceBridge GmbH (https://sciencebridge.de/en/) for licensing.
import os.path
from typing import Union
import types
import numpy as np
import pandas as pd
from tqdm import tqdm as tqdm
from sarcasm.structure import Structure
from sarcasm.motion import Motion
[docs]
class MultiStructureAnalysis:
"""
Class for multi-tif-file comparison of structure.
Parameters
----------
list_files : list
List of tif files.
folder : str
Path to a folder to store data and results.
experiment : str, optional
Name of the experiment (default is None).
load_data : bool, optional
Whether to load the dataframe from previous analysis from the data folder (default is False).
**conditions : dict
Keyword arguments with regex functions to extract information from the filename.
Attributes
----------
folder : str
Path to the folder with data and results.
experiment : str
Name of the experiment.
files : list
List of tif files.
conditions : dict
Keyword arguments with regex functions to extract information from the filename.
data : pandas.DataFrame
DataFrame to store the structure data.
"""
def __init__(self, list_files: list, folder: str, experiment: str = None, load_data: bool = False, **conditions):
self.folder = folder
self.experiment = experiment
self.files = list_files
self.conditions = conditions
self.data = None
if load_data:
self.load_data()
[docs]
def get_data(self, structure_keys=None, meta_keys=None):
"""
Iterate files and get structure data.
Parameters
----------
structure_keys : list, optional
List of keys to extract structure data (default is None).
meta_keys : list, optional
List of keys to extract metadata (default is None).
Returns
-------
None
"""
self.data = []
for i, tif_file in enumerate(tqdm(self.files)):
try:
sarc_obj = Structure(filepath=tif_file)
dict_i = Export.get_structure_dict(sarc_obj, meta_keys, structure_keys,
experiment=self.experiment,
**self.conditions)
self.data.append(dict_i)
except Exception as e:
print(f'{tif_file} failed!')
print(repr(e))
self.data = pd.DataFrame.from_records(self.data)
self.save_data()
[docs]
def save_data(self):
"""
Save the DataFrame to the data folder.
Returns
-------
None
"""
self.data.to_pickle(self.folder + 'data_structure.pd')
[docs]
def load_data(self):
"""
Load the DataFrame from the data folder.
Returns
-------
None
Raises
------
FileExistsError
If the data file does not exist in the specified folder.
"""
if os.path.exists(self.folder + 'data_structure.pd'):
self.data = pd.read_pickle(self.folder + 'data_structure.pd')
else:
raise FileExistsError('Data from previous analysis does not exist and cannot be loaded. '
'Set load_data=False.')
[docs]
def export_data(self, filepath, format='.xlsx'):
"""
Export the DataFrame to .xlsx or .csv format.
Parameters
----------
filepath : str
Path to the output file.
format : str, optional
Format of the output file ('.xlsx' or '.csv') (default is '.xlsx').
Returns
-------
None
"""
if format == '.xlsx':
self.data.to_excel(filepath, index=False)
elif format == '.csv':
self.data.to_csv(filepath, index=False)
else:
raise ValueError('Unsupported file format')
[docs]
class MultiLOIAnalysis:
"""
Class for multi-LOI comparison.
Parameters
----------
list_lois : list
List of tuples containing tif file paths and LOI names.
folder : str
Path to a folder to store data and results.
load_data : bool, optional
Whether to load the dataframe from previous analysis from the folder (default is False).
**conditions : dict
Keyword arguments with regex functions to extract information from the filename.
Attributes
----------
folder : str
Path to the folder with data and results.
lois : list
List of tuples containing tif file paths and LOI names.
conditions : dict
Keyword arguments with regex functions to extract information from the filename.
data : pandas.DataFrame
DataFrame to store the motion data.
"""
def __init__(self, list_lois, folder, load_data=False, **conditions):
self.folder = folder
self.lois = list_lois
self.conditions = conditions
self.data = None
if load_data:
self.load_data()
[docs]
def get_data(self, loi_keys=None, meta_keys=None):
"""
Iterate files and get motion data.
Parameters
----------
loi_keys : list, optional
List of keys to extract motion data (default is None).
meta_keys : list, optional
List of keys to extract metadata (default is None).
Returns
-------
None
"""
self.data = []
for tif_file, loi_name in tqdm(self.lois):
try:
motion_obj = Motion(tif_file, loi_name)
dict_i = Export.get_motion_dict(motion_obj, meta_keys, loi_keys, **self.conditions)
self.data.append(dict_i)
except Exception as e:
print(f'{tif_file}, {loi_name} failed!')
print(repr(e))
self.data = pd.DataFrame.from_records(self.data)
self.save_data()
[docs]
def save_data(self):
"""
Save the DataFrame to the data folder as a pandas DataFrame.
Returns
-------
None
"""
self.data.to_pickle(self.folder + 'data_motion.pd')
[docs]
def load_data(self):
"""
Load the DataFrame from the data folder.
Returns
-------
None
Raises
------
FileExistsError
If the data file does not exist in the specified folder.
"""
if os.path.exists(self.folder + 'data_motion.pd'):
self.data = pd.read_pickle(self.folder + 'data_motion.pd')
else:
raise FileExistsError('Data from previous analysis does not exist and cannot be loaded. '
'Set load_data=False.')
[docs]
def export_data(self, filepath, format='.xlsx'):
"""
Export the DataFrame to .xlsx or .csv format.
Parameters
----------
filepath : str
Path to the output file.
format : str, optional
Format of the output file ('.xlsx' or '.csv') (default is '.xlsx').
Returns
-------
None
"""
if format == '.xlsx':
self.data.to_excel(filepath, index=False)
elif format == '.csv':
self.data.to_csv(filepath, index=False)
else:
raise ValueError('Unsupported file format')
[docs]
class Export:
"""
A class used to export structure and motion data from SarcAsM and Motion objects.
Attributes
----------
meta_keys_default : list
Default metadata keys.
structure_keys_default : list
Default structure keys.
motion_keys_default : list
Default motion keys.
"""
meta_keys_default = ['file_name', 'file_path', 'frames', 'size', 'pixelsize', 'timestamps',
'time', 'frametime']
structure_keys_default = ['cell_mask_area', 'cell_mask_area_ratio', 'cell_mask_intensity',
'domain_area_mean', 'domain_area_std', 'domain_oop_mean',
'domain_oop_std', 'domain_slen_mean', 'n_domains',
'myof_length_max', 'myof_length_mean', 'myof_length_std',
'myof_bending_mean', 'myof_bending_std',
'myof_straightness_mean', 'myof_straightness_std',
'sarcomere_area', 'sarcomere_area_ratio', 'sarcomere_length_mean',
'sarcomere_length_std', 'sarcomere_oop', 'n_zbands', 'n_mbands', 'n_vectors',
'z_intensity_mean', 'z_intensity_std', 'z_lat_alignment_mean',
'z_lat_alignment_std', 'z_lat_dist_mean', 'z_lat_dist_std', 'z_lat_length_groups_mean',
'z_lat_neighbors_mean', 'z_lat_neighbors_std', 'z_length_max',
'z_length_mean', 'z_length_std', 'z_oop', 'z_mask_area', 'z_mask_area_ratio',
'z_mask_intensity', 'z_straightness_mean', 'z_straightness_std']
motion_keys_default = ['beating_rate', 'beating_rate_variability', 'contr_max', 'contr_max_avg', 'elong_max',
'elong_max_avg', 'equ', 'time', 'vel_contr_max', 'vel_contr_max_avg', 'vel_elong_max',
'vel_elong_max_avg', 'n_sarcomeres', 'n_contr', 'ratio_nans',
'popping_rate_contr', 'popping_rate_sarcomeres', 'popping_rate',
'popping_events', 'popping_dist', 'popping_tau',
'popping_ks_dist_pvalue', 'popping_ks_dist_statistic', 'popping_p_dist', 'popping_p_tau',
'popping_ks_tau_pvalue', 'popping_ks_tau_statistic', 'time_to_peak', 'time_to_peak_avg',
'time_contr', 'time_quiet',
'corr_delta_slen', 'corr_vel',
'corr_delta_slen_serial', 'corr_delta_slen_mutual', 'corr_vel_serial', 'corr_vel_mutual',
'ratio_delta_slen_mutual_serial', 'ratio_vel_mutual_serial']
[docs]
@staticmethod
def get_structure_dict(sarc_obj, meta_keys=None, structure_keys=None, **conditions):
"""
Create a dictionary of structure and metadata features from a SarcAsM object.
Parameters
----------
sarc_obj : SarcAsM
Object of SarcAsM class or Motion class.
meta_keys : list, optional
List of metadata keys (default is None).
structure_keys : list, optional
List of structure keys (default is None).
conditions : kwargs
Keyword arguments to add information to the dictionary (e.g., "cell_line"= "wt", "info_xyz"=42).
Returns
-------
dict
Dictionary containing selected metadata and structure features.
"""
if structure_keys is None:
structure_keys = Export.structure_keys_default
if meta_keys is None:
meta_keys = Export.meta_keys_default
missing_meta_keys = [key for key in meta_keys if key not in sarc_obj.metadata]
if missing_meta_keys:
print('Missing metadata keys: ', missing_meta_keys)
dict_metadata_select = {key: sarc_obj.metadata.get(key, np.nan) for key in meta_keys}
missing_structure_keys = [key for key in structure_keys if key not in sarc_obj.data]
if missing_structure_keys:
print('Missing structure keys: ', missing_structure_keys)
dict_structure_select = {key: sarc_obj.data.get(key, np.nan) for key in structure_keys}
dict_ = {**dict_metadata_select, **dict_structure_select}
for condition, value in conditions.items():
if isinstance(value, types.FunctionType):
dict_[condition] = value(sarc_obj.filepath)
else:
dict_[condition] = value
return dict_
[docs]
@staticmethod
def export_structure_data(filepath, sarc_obj: Union[Structure, Motion], meta_keys=None, structure_keys=None, remove_arrays=True,
fileformat='.xlsx'):
"""
Export structure data to a file.
Parameters
----------
filepath : str
Path to the output file.
sarc_obj : SarcAsM
Object of SarcAsM class.
meta_keys : list, optional
List of metadata keys (default is None).
structure_keys : list, optional
List of structure keys (default is None).
remove_arrays : bool, optional
If True, removes columns with array data (default is True).
fileformat : str, optional
Format of the output file (default is '.xlsx').
"""
structure_dict = Export.get_structure_dict(sarc_obj, meta_keys=meta_keys,
structure_keys=structure_keys)
structure_df = pd.DataFrame(structure_dict)
if remove_arrays:
structure_df = Export.remove_arrays_dataframe(structure_df)
if fileformat == '.xlsx':
structure_df.to_excel(filepath)
elif fileformat == '.csv':
structure_df.to_csv(filepath)
elif fileformat == '.xml':
structure_df.to_xml(filepath)
[docs]
@staticmethod
def remove_arrays_dataframe(df):
"""
Remove columns with array data from a DataFrame.
Parameters
----------
df : pandas.DataFrame
Input DataFrame.
Returns
-------
pandas.DataFrame
DataFrame with array columns removed.
"""
df_reduced = df.copy()
for key in df.keys():
if isinstance(df[key][0], np.ndarray):
df_reduced.drop(key, axis=1, inplace=True)
return df_reduced
[docs]
@staticmethod
def get_motion_dict(motion_obj, meta_keys=None, loi_keys=None, concat=False, **conditions):
"""
Create a dictionary of motion features and metadata from a Motion object.
Parameters
----------
motion_obj : Motion
Object of Motion class for LOI analysis.
meta_keys : list, optional
List of metadata keys (default is None).
loi_keys : list, optional
List of LOI keys (default is None).
concat : bool, optional
If True, all 2D arrays will be concatenated to 1D arrays (default is False).
conditions : kwargs
Keyword arguments to add to the dictionary, can be any information, e.g., drug='ABC'.
Returns
-------
dict
Dictionary containing selected metadata and motion features.
"""
if loi_keys is None:
loi_keys = Export.motion_keys_default
if meta_keys is None:
meta_keys = Export.meta_keys_default
missing_meta_keys = [key for key in meta_keys if key not in motion_obj.metadata]
if missing_meta_keys:
print('Missing metadata keys: ', missing_meta_keys)
dict_metadata_select = {key: motion_obj.metadata.get(key, np.nan) for key in meta_keys}
missing_loi_keys = [key for key in loi_keys if key not in motion_obj.loi_data]
if missing_loi_keys:
print('Missing loi keys: ', missing_loi_keys)
dict_loi_select = {key: motion_obj.loi_data[key] if key in motion_obj.loi_data else np.nan for key in loi_keys}
dict_ = {**dict_metadata_select, **dict_loi_select, 'loi_name': motion_obj.loi_name}
for condition, value in conditions.items():
if isinstance(value, types.FunctionType):
dict_[condition] = value(motion_obj.filepath)
else:
dict_[condition] = value
if concat:
for key, value in dict_.items():
if isinstance(value, np.ndarray):
if len(value.shape) == 2:
dict_[key] = np.concatenate(value)
dict_['tif_name'] = motion_obj.filepath
return dict_
[docs]
@staticmethod
def export_motion_data(mot_obj: Motion, filepath, meta_keys=None, motion_keys=None, remove_arrays=True, fileformat='.xlsx'):
"""
Export motion data to a file.
Parameters
----------
mot_obj : Motion
Object of Motion class.
filepath : str
Path to the output file.
meta_keys : list, optional
List of metadata keys (default is None).
motion_keys : list, optional
List of motion keys (default is None).
remove_arrays : bool, optional
If True, removes columns with array data (default is True).
fileformat : str, optional
Format of the output file (default is '.xlsx').
"""
motion_dict = Export.get_motion_dict(mot_obj, meta_keys=meta_keys, loi_keys=motion_keys)
motion_df = pd.DataFrame(motion_dict)
if remove_arrays:
motion_df = Export.remove_arrays_dataframe(motion_df)
if fileformat == '.xlsx':
motion_df.to_excel(filepath)
elif fileformat == '.csv':
motion_df.to_csv(filepath)
else:
raise ValueError('Unsupported file format')