Source code for sarcasm.export

# -*- coding: utf-8 -*-
# Copyright (c) 2025 University Medical Center Göttingen, Germany.
# All rights reserved.
#
# Patent Pending: DE 10 2024 112 939.5
# SPDX-License-Identifier: LicenseRef-Proprietary-See-LICENSE
#
# This software is licensed under a custom license. See the LICENSE file
# in the root directory for full details.
#
# **Commercial use is prohibited without a separate license.**
# Contact MBM ScienceBridge GmbH (https://sciencebridge.de/en/) for licensing.


import os.path
from typing import Union

import types
import numpy as np

import pandas as pd
from tqdm import tqdm as tqdm

from sarcasm.structure import Structure
from sarcasm.motion import Motion


[docs] class MultiStructureAnalysis: """ Class for multi-tif-file comparison of structure. Parameters ---------- list_files : list List of tif files. folder : str Path to a folder to store data and results. experiment : str, optional Name of the experiment (default is None). load_data : bool, optional Whether to load the dataframe from previous analysis from the data folder (default is False). **conditions : dict Keyword arguments with regex functions to extract information from the filename. Attributes ---------- folder : str Path to the folder with data and results. experiment : str Name of the experiment. files : list List of tif files. conditions : dict Keyword arguments with regex functions to extract information from the filename. data : pandas.DataFrame DataFrame to store the structure data. """ def __init__(self, list_files: list, folder: str, experiment: str = None, load_data: bool = False, **conditions): self.folder = folder self.experiment = experiment self.files = list_files self.conditions = conditions self.data = None if load_data: self.load_data()
[docs] def get_data(self, structure_keys=None, meta_keys=None): """ Iterate files and get structure data. Parameters ---------- structure_keys : list, optional List of keys to extract structure data (default is None). meta_keys : list, optional List of keys to extract metadata (default is None). Returns ------- None """ self.data = [] for i, tif_file in enumerate(tqdm(self.files)): try: sarc_obj = Structure(filepath=tif_file) dict_i = Export.get_structure_dict(sarc_obj, meta_keys, structure_keys, experiment=self.experiment, **self.conditions) self.data.append(dict_i) except Exception as e: print(f'{tif_file} failed!') print(repr(e)) self.data = pd.DataFrame.from_records(self.data) self.save_data()
[docs] def save_data(self): """ Save the DataFrame to the data folder. Returns ------- None """ self.data.to_pickle(self.folder + 'data_structure.pd')
[docs] def load_data(self): """ Load the DataFrame from the data folder. Returns ------- None Raises ------ FileExistsError If the data file does not exist in the specified folder. """ if os.path.exists(self.folder + 'data_structure.pd'): self.data = pd.read_pickle(self.folder + 'data_structure.pd') else: raise FileExistsError('Data from previous analysis does not exist and cannot be loaded. ' 'Set load_data=False.')
[docs] def export_data(self, filepath, format='.xlsx'): """ Export the DataFrame to .xlsx or .csv format. Parameters ---------- filepath : str Path to the output file. format : str, optional Format of the output file ('.xlsx' or '.csv') (default is '.xlsx'). Returns ------- None """ if format == '.xlsx': self.data.to_excel(filepath, index=False) elif format == '.csv': self.data.to_csv(filepath, index=False) else: raise ValueError('Unsupported file format')
[docs] class MultiLOIAnalysis: """ Class for multi-LOI comparison. Parameters ---------- list_lois : list List of tuples containing tif file paths and LOI names. folder : str Path to a folder to store data and results. load_data : bool, optional Whether to load the dataframe from previous analysis from the folder (default is False). **conditions : dict Keyword arguments with regex functions to extract information from the filename. Attributes ---------- folder : str Path to the folder with data and results. lois : list List of tuples containing tif file paths and LOI names. conditions : dict Keyword arguments with regex functions to extract information from the filename. data : pandas.DataFrame DataFrame to store the motion data. """ def __init__(self, list_lois, folder, load_data=False, **conditions): self.folder = folder self.lois = list_lois self.conditions = conditions self.data = None if load_data: self.load_data()
[docs] def get_data(self, loi_keys=None, meta_keys=None): """ Iterate files and get motion data. Parameters ---------- loi_keys : list, optional List of keys to extract motion data (default is None). meta_keys : list, optional List of keys to extract metadata (default is None). Returns ------- None """ self.data = [] for tif_file, loi_name in tqdm(self.lois): try: motion_obj = Motion(tif_file, loi_name) dict_i = Export.get_motion_dict(motion_obj, meta_keys, loi_keys, **self.conditions) self.data.append(dict_i) except Exception as e: print(f'{tif_file}, {loi_name} failed!') print(repr(e)) self.data = pd.DataFrame.from_records(self.data) self.save_data()
[docs] def save_data(self): """ Save the DataFrame to the data folder as a pandas DataFrame. Returns ------- None """ self.data.to_pickle(self.folder + 'data_motion.pd')
[docs] def load_data(self): """ Load the DataFrame from the data folder. Returns ------- None Raises ------ FileExistsError If the data file does not exist in the specified folder. """ if os.path.exists(self.folder + 'data_motion.pd'): self.data = pd.read_pickle(self.folder + 'data_motion.pd') else: raise FileExistsError('Data from previous analysis does not exist and cannot be loaded. ' 'Set load_data=False.')
[docs] def export_data(self, filepath, format='.xlsx'): """ Export the DataFrame to .xlsx or .csv format. Parameters ---------- filepath : str Path to the output file. format : str, optional Format of the output file ('.xlsx' or '.csv') (default is '.xlsx'). Returns ------- None """ if format == '.xlsx': self.data.to_excel(filepath, index=False) elif format == '.csv': self.data.to_csv(filepath, index=False) else: raise ValueError('Unsupported file format')
[docs] class Export: """ A class used to export structure and motion data from SarcAsM and Motion objects. Attributes ---------- meta_keys_default : list Default metadata keys. structure_keys_default : list Default structure keys. motion_keys_default : list Default motion keys. """ meta_keys_default = ['file_name', 'file_path', 'frames', 'size', 'pixelsize', 'timestamps', 'time', 'frametime'] structure_keys_default = ['cell_mask_area', 'cell_mask_area_ratio', 'cell_mask_intensity', 'domain_area_mean', 'domain_area_std', 'domain_oop_mean', 'domain_oop_std', 'domain_slen_mean', 'n_domains', 'myof_length_max', 'myof_length_mean', 'myof_length_std', 'myof_bending_mean', 'myof_bending_std', 'myof_straightness_mean', 'myof_straightness_std', 'sarcomere_area', 'sarcomere_area_ratio', 'sarcomere_length_mean', 'sarcomere_length_std', 'sarcomere_oop', 'n_zbands', 'n_mbands', 'n_vectors', 'z_intensity_mean', 'z_intensity_std', 'z_lat_alignment_mean', 'z_lat_alignment_std', 'z_lat_dist_mean', 'z_lat_dist_std', 'z_lat_length_groups_mean', 'z_lat_neighbors_mean', 'z_lat_neighbors_std', 'z_length_max', 'z_length_mean', 'z_length_std', 'z_oop', 'z_mask_area', 'z_mask_area_ratio', 'z_mask_intensity', 'z_straightness_mean', 'z_straightness_std'] motion_keys_default = ['beating_rate', 'beating_rate_variability', 'contr_max', 'contr_max_avg', 'elong_max', 'elong_max_avg', 'equ', 'time', 'vel_contr_max', 'vel_contr_max_avg', 'vel_elong_max', 'vel_elong_max_avg', 'n_sarcomeres', 'n_contr', 'ratio_nans', 'popping_rate_contr', 'popping_rate_sarcomeres', 'popping_rate', 'popping_events', 'popping_dist', 'popping_tau', 'popping_ks_dist_pvalue', 'popping_ks_dist_statistic', 'popping_p_dist', 'popping_p_tau', 'popping_ks_tau_pvalue', 'popping_ks_tau_statistic', 'time_to_peak', 'time_to_peak_avg', 'time_contr', 'time_quiet', 'corr_delta_slen', 'corr_vel', 'corr_delta_slen_serial', 'corr_delta_slen_mutual', 'corr_vel_serial', 'corr_vel_mutual', 'ratio_delta_slen_mutual_serial', 'ratio_vel_mutual_serial']
[docs] @staticmethod def get_structure_dict(sarc_obj, meta_keys=None, structure_keys=None, **conditions): """ Create a dictionary of structure and metadata features from a SarcAsM object. Parameters ---------- sarc_obj : SarcAsM Object of SarcAsM class or Motion class. meta_keys : list, optional List of metadata keys (default is None). structure_keys : list, optional List of structure keys (default is None). conditions : kwargs Keyword arguments to add information to the dictionary (e.g., "cell_line"= "wt", "info_xyz"=42). Returns ------- dict Dictionary containing selected metadata and structure features. """ if structure_keys is None: structure_keys = Export.structure_keys_default if meta_keys is None: meta_keys = Export.meta_keys_default missing_meta_keys = [key for key in meta_keys if key not in sarc_obj.metadata] if missing_meta_keys: print('Missing metadata keys: ', missing_meta_keys) dict_metadata_select = {key: sarc_obj.metadata.get(key, np.nan) for key in meta_keys} missing_structure_keys = [key for key in structure_keys if key not in sarc_obj.data] if missing_structure_keys: print('Missing structure keys: ', missing_structure_keys) dict_structure_select = {key: sarc_obj.data.get(key, np.nan) for key in structure_keys} dict_ = {**dict_metadata_select, **dict_structure_select} for condition, value in conditions.items(): if isinstance(value, types.FunctionType): dict_[condition] = value(sarc_obj.filepath) else: dict_[condition] = value return dict_
[docs] @staticmethod def export_structure_data(filepath, sarc_obj: Union[Structure, Motion], meta_keys=None, structure_keys=None, remove_arrays=True, fileformat='.xlsx'): """ Export structure data to a file. Parameters ---------- filepath : str Path to the output file. sarc_obj : SarcAsM Object of SarcAsM class. meta_keys : list, optional List of metadata keys (default is None). structure_keys : list, optional List of structure keys (default is None). remove_arrays : bool, optional If True, removes columns with array data (default is True). fileformat : str, optional Format of the output file (default is '.xlsx'). """ structure_dict = Export.get_structure_dict(sarc_obj, meta_keys=meta_keys, structure_keys=structure_keys) structure_df = pd.DataFrame(structure_dict) if remove_arrays: structure_df = Export.remove_arrays_dataframe(structure_df) if fileformat == '.xlsx': structure_df.to_excel(filepath) elif fileformat == '.csv': structure_df.to_csv(filepath) elif fileformat == '.xml': structure_df.to_xml(filepath)
[docs] @staticmethod def remove_arrays_dataframe(df): """ Remove columns with array data from a DataFrame. Parameters ---------- df : pandas.DataFrame Input DataFrame. Returns ------- pandas.DataFrame DataFrame with array columns removed. """ df_reduced = df.copy() for key in df.keys(): if isinstance(df[key][0], np.ndarray): df_reduced.drop(key, axis=1, inplace=True) return df_reduced
[docs] @staticmethod def get_motion_dict(motion_obj, meta_keys=None, loi_keys=None, concat=False, **conditions): """ Create a dictionary of motion features and metadata from a Motion object. Parameters ---------- motion_obj : Motion Object of Motion class for LOI analysis. meta_keys : list, optional List of metadata keys (default is None). loi_keys : list, optional List of LOI keys (default is None). concat : bool, optional If True, all 2D arrays will be concatenated to 1D arrays (default is False). conditions : kwargs Keyword arguments to add to the dictionary, can be any information, e.g., drug='ABC'. Returns ------- dict Dictionary containing selected metadata and motion features. """ if loi_keys is None: loi_keys = Export.motion_keys_default if meta_keys is None: meta_keys = Export.meta_keys_default missing_meta_keys = [key for key in meta_keys if key not in motion_obj.metadata] if missing_meta_keys: print('Missing metadata keys: ', missing_meta_keys) dict_metadata_select = {key: motion_obj.metadata.get(key, np.nan) for key in meta_keys} missing_loi_keys = [key for key in loi_keys if key not in motion_obj.loi_data] if missing_loi_keys: print('Missing loi keys: ', missing_loi_keys) dict_loi_select = {key: motion_obj.loi_data[key] if key in motion_obj.loi_data else np.nan for key in loi_keys} dict_ = {**dict_metadata_select, **dict_loi_select, 'loi_name': motion_obj.loi_name} for condition, value in conditions.items(): if isinstance(value, types.FunctionType): dict_[condition] = value(motion_obj.filepath) else: dict_[condition] = value if concat: for key, value in dict_.items(): if isinstance(value, np.ndarray): if len(value.shape) == 2: dict_[key] = np.concatenate(value) dict_['tif_name'] = motion_obj.filepath return dict_
[docs] @staticmethod def export_motion_data(mot_obj: Motion, filepath, meta_keys=None, motion_keys=None, remove_arrays=True, fileformat='.xlsx'): """ Export motion data to a file. Parameters ---------- mot_obj : Motion Object of Motion class. filepath : str Path to the output file. meta_keys : list, optional List of metadata keys (default is None). motion_keys : list, optional List of motion keys (default is None). remove_arrays : bool, optional If True, removes columns with array data (default is True). fileformat : str, optional Format of the output file (default is '.xlsx'). """ motion_dict = Export.get_motion_dict(mot_obj, meta_keys=meta_keys, loi_keys=motion_keys) motion_df = pd.DataFrame(motion_dict) if remove_arrays: motion_df = Export.remove_arrays_dataframe(motion_df) if fileformat == '.xlsx': motion_df.to_excel(filepath) elif fileformat == '.csv': motion_df.to_csv(filepath) else: raise ValueError('Unsupported file format')