Source code for simplicity.settings_manager

# This file is part of SIMPLICITY
# Copyright (C) 2025 Pietro Gerletti
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/>.

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Tue Aug 27 19:38:43 2024

@author: pietro
"""

import os
import json
import simplicity.dir_manager as dm
import pandas as pd
import itertools
import copy

_data_dir = dm.get_data_dir()

[docs] def get_standard_parameters_values_file_path(): standard_parameters_values_file_path = os.path.join(dm.get_reference_parameters_dir(), "standard_values.json") return standard_parameters_values_file_path
[docs] def get_parameter_specs_file_path(): parameter_specs_file_path = os.path.join(dm.get_reference_parameters_dir(), "parameter_specs.json") return parameter_specs_file_path
[docs] def write_standard_parameters_values(): filename= get_standard_parameters_values_file_path() standard_values = { "population_size": 1000, 'long_shedders_ratio': 0, "infected_individuals_at_start": 10, "tau_1": 2.86, "tau_2": 3.91, "tau_3": 7.5, "tau_3_long": 133.5, "tau_4": 8, "R": 1.1, "R_long": 1, "diagnosis_rate_standard": 0.1, # in percentage, will be converted to kds in model "diagnosis_rate_long" : 0.1, # in percentage, will be converted to kdl in model "IH_virus_emergence_rate": 0, # k_v in theoretical model equations "nucleotide_substitution_rate": 0.00008759, # e in theoretical model equations "M_nsr_long": 1, # ratio to calculate long shedders NSR from base NSR "final_time": 365, "max_runtime": 86000, "phenotype_model": 'immune_waning', # or 'linear' "sequencing_rate": 0.05, "sequence_long_shedders": False, "seed": None } with open(filename, "w") as file: json.dump(standard_values, file, indent=4) print(f"Standard values written to {filename}")
[docs] def write_parameter_specs(): filename= get_parameter_specs_file_path() parameter_specs = { "population_size": {"type": "int", "min": 0, "max": 10000}, 'long_shedders_ratio': {"type": "float", "min": 0, "max": 1}, "tau_1": {"type": "float", "min": 0, "max": 10}, "tau_2": {"type": "float", "min": 0, "max": 100}, "tau_3": {"type": "float", "min": 0, "max": 300}, "tau_3_long": {"type": "float", "min": 0, "max": 300}, "tau_4": {"type": "float", "min": 0, "max": 30}, "infected_individuals_at_start": {"type": "int", "min": 0}, "R": {"type": "float", "min": 0, "max": 20}, "R_long": {"type": "float", "min": 0, "max": 20}, "M_nsr_long": {"type": "float"}, "diagnosis_rate_standard": {"type": "float", "min": 0, "max": 1}, "diagnosis_rate_long": {"type": "float", "min": 0, "max": 1}, "IH_virus_emergence_rate": {"type": "float", "min": 0}, "nucleotide_substitution_rate": {"type": "float", "min": 0, "max": 1}, "final_time": {"type": "int", "min": 0}, "max_runtime": {"type": "int", "min": 0}, "phenotype_model": {"type": "str"}, "sequencing_rate": {"type": "float", "min": 0, "max": 1}, "sequence_long_shedders": {"type": "bool"} } with open(filename, "w") as file: json.dump(parameter_specs, file, indent=4) print(f"Parameter specifications written to {filename}")
[docs] def read_standard_parameters_values(): filename= get_standard_parameters_values_file_path() try: with open(filename, "r") as file: return json.load(file) except FileNotFoundError: print(f"Error: {filename} not found. Writing default standard values.") write_standard_parameters_values() return read_standard_parameters_values()
[docs] def read_parameter_specs(): filename= get_parameter_specs_file_path() try: with open(filename, "r") as file: return json.load(file) except FileNotFoundError: print(f"Error: {filename} not found. Writing default parameter specifications.") write_parameter_specs() return read_parameter_specs()
[docs] def write_user_set_parameters_file(user_set_parameters, filename): file_path = os.path.join(dm.get_reference_parameters_dir(),filename) with open(file_path, "w") as file: json.dump(user_set_parameters, file, indent=4) print(f"user_set_parameters saved to {file_path}")
[docs] def read_user_set_parameters_file(filename): file_path = os.path.join(dm.get_reference_parameters_dir(),filename) try: with open(file_path, "r") as file: return json.load(file) except FileNotFoundError: print(f"Error: {filename} not found. Writing default standard values.") write_standard_parameters_values() return read_standard_parameters_values()
[docs] def get_experiment_settings_file_path(experiment_name): return os.path.join(_data_dir, f'{experiment_name}', '01_Experiments_settings', f'{experiment_name}_settings.json')
[docs] def get_n_seeds_file_path(experiment_name): return os.path.join(_data_dir, f'{experiment_name}', '01_Experiments_settings', f'{experiment_name}_n_seeds.json')
[docs] def check_parameters_names(parameters_dic): STANDARD_VALUES = read_standard_parameters_values() for key in parameters_dic.keys(): if key not in STANDARD_VALUES.keys(): raise ValueError(f'Parameter {key} is not a valid parameter')
[docs] def read_experiment_settings(experiment_name): experiment_settings_file_path = get_experiment_settings_file_path(experiment_name) with open(experiment_settings_file_path, 'r') as json_file: experiment_settings = json.load(json_file) return experiment_settings
[docs] def read_n_seeds_file(experiment_name): n_seeds_file_path = get_n_seeds_file_path(experiment_name) with open(n_seeds_file_path, 'r') as json_file: n_seeds_dic = json.load(json_file) return n_seeds_dic
[docs] def generate_experiment_settings(varying_params: dict, fixed_params: dict = None): """ Generates a list of parameter combinations from varying and fixed parameters. Args: varying (dict): Parameters for which all combinations should be generated. fixed (dict): Parameters that should have the same value across all combinations. Returns: List[dict]: A list of dictionaries with combined parameter sets. """ fixed_params = fixed_params or {} keys, values = zip(*varying_params.items()) if varying_params else ([], []) combinations = list(itertools.product(*values)) if values else [()] experiment_settings = [] for combo in combinations: setting = dict(zip(keys, combo)) setting.update(copy.deepcopy(fixed_params)) # Avoid mutation experiment_settings.append(setting) return experiment_settings
[docs] def write_experiment_settings(experiment_name: str, experiment_settings: list, n_seeds: int): """ Writes experiment settings (a list of parameter dictionaries) to a JSON file. Args: experiment_name (str): Name of the experiment (used for output folder). experiment_settings (list): List of parameter dictionaries. n_seeds (int): Number of random seeds to be stored separately. """ # check parameter names validity for param_set in experiment_settings: check_parameters_names(param_set) # Write settings to JSON experiment_settings_file_path = get_experiment_settings_file_path(experiment_name) n_seeds_file_path = get_n_seeds_file_path(experiment_name) with open(experiment_settings_file_path, 'w') as settings_file: json.dump(experiment_settings, settings_file, indent=4) with open(n_seeds_file_path, 'w') as n_seeds_file: json.dump({'n_seeds': n_seeds}, n_seeds_file, indent=4) print(f"Experiment settings file written to {experiment_settings_file_path}")
[docs] def write_simulation_parameters(file_path, population_size, long_shedders_ratio, tau_1, tau_2, tau_3, tau_3_long, tau_4, infected_individuals_at_start, R, R_long, M_nsr_long, diagnosis_rate_standard, diagnosis_rate_long, IH_virus_emergence_rate, nucleotide_substitution_rate, final_time, max_runtime, phenotype_model, sequencing_rate, sequence_long_shedders, seed ): settings = { "population_size": population_size, "long_shedders_ratio": long_shedders_ratio, "tau_1": tau_1, "tau_2": tau_2, "tau_3": tau_3, "tau_3_long": tau_3_long, "tau_4": tau_4, "infected_individuals_at_start": infected_individuals_at_start, "R": R, "R_long": R_long, "M_nsr_long": M_nsr_long, "diagnosis_rate_standard": diagnosis_rate_standard, "diagnosis_rate_long":diagnosis_rate_long, "IH_virus_emergence_rate" : IH_virus_emergence_rate, "nucleotide_substitution_rate": nucleotide_substitution_rate, "t_0": 0, "final_time": final_time, "max_runtime": max_runtime, "phenotype_model": phenotype_model, "sequencing_rate": sequencing_rate, "sequence_long_shedders":sequence_long_shedders, "seed": seed } # Serialize the dictionary to a JSON-formatted string and write it to a file with open(file_path, "w") as json_file: json.dump(settings, json_file, indent=4)
[docs] def generate_filename_from_params(params: dict): abbreviations = { "population_size": "N", "tau_3": "tau3", "infected_individuals_at_start": "init", "R": "R", "R_long": "Rl", "M_nsr_long": "M", "diagnosis_rate_standard": "kds", "diagnosis_rate_long": "kdl", "IH_virus_emergence_rate": "kv", "nucleotide_substitution_rate": "NSR", "final_time": "T", "phenotype_model": "pheno" # excluded: max_runtime, sequencing_rate, seed, F } exclude = {"max_runtime", "sequencing_rate", "seed"} parts = [] for key, value in params.items(): if key in exclude: continue abbrev = abbreviations.get(key, key) if isinstance(value, float): value_str = f"{value:.2g}".replace('.', 'p') # e.g., 0.01 → 1p0 elif isinstance(value, int): value_str = str(value) elif isinstance(value, str): value_str = value.replace(' ', '') else: value_str = str(value) parts.append(f"{abbrev}_{value_str}") file_name = "_".join(parts) + ".json" return file_name
[docs] def read_settings_and_write_simulation_parameters(experiment_name): """ Reads an experiment settings file in JSON format and generates individual simulation parameter files based on the combinations of parameters in the settings. The function will create a separate JSON file for each parameter combination within a directory named after the experiment. Parameters: ----------- experiment_name : str The name of the experiment. This is used to locate the settings file and to create the corresponding simulation parameters directory. """ # Define the path to the experiment settings file experiment_settings_file_path = get_experiment_settings_file_path(experiment_name) STANDARD_VALUES = read_standard_parameters_values() # Read the experiment settings file with open(experiment_settings_file_path, 'r') as settings_file: all_experiment_settings = json.load(settings_file) # Loop over each set of experiment settings and create a simulation parameters file for i, experiment_settings in enumerate(all_experiment_settings): # Only include parameters that differ from standard modified_params = { key: value for key, value in experiment_settings.items() if key in STANDARD_VALUES and value != STANDARD_VALUES[key] } # Fallback: if all params match standard, name it by index if not modified_params: file_name = 'standard_values.json' else: file_name = generate_filename_from_params(modified_params) simulation_parameters_file_path = os.path.join( dm.get_simulation_parameters_dir(experiment_name), file_name) # Merge the standard values with the current experiment settings settings = {**STANDARD_VALUES, **experiment_settings} # Write the simulation parameters to a JSON file write_simulation_parameters(simulation_parameters_file_path, settings["population_size"], settings['long_shedders_ratio'], settings["tau_1"], settings["tau_2"], settings["tau_3"], settings["tau_3_long"], settings["tau_4"], settings["infected_individuals_at_start"], settings["R"], settings["R_long"], settings["M_nsr_long"], settings["diagnosis_rate_standard"], settings["diagnosis_rate_long"], settings["IH_virus_emergence_rate"], settings["nucleotide_substitution_rate"], settings["final_time"], settings["max_runtime"], settings["phenotype_model"], settings["sequencing_rate"], settings["sequence_long_shedders"], settings["seed"] ) print(f"Simulation parameters written to directory: {simulation_parameters_file_path}")
[docs] def write_seeded_simulation_parameters(experiment_name: str): """ Generates multiple JSON files with different seeds for each simulation parameter file within a specified experiment. The function reads the original simulation parameter files, adds a 'seed' field, and writes the modified files to subdirectories named after the original files. Parameters: ----------- experiment_name : str The name of the experiment. n_seeds : int The number of seeded JSON files to generate for each simulation parameter file. Directory Structure: -------------------- Data/ └── experiment_name/ ├── 02_Simulation_parameters/ │ ├── param_file_1.json │ ├── param_file_2.json │ └── ... └── 03_Seeded_simulation_parameters/ ├── param_file_1/ │ ├── seed_0.json │ ├── seed_1.json │ └── ... ├── param_file_2/ │ ├── seed_0.json │ ├── seed_1.json │ └── ... └── ... """ simulation_parameters_dir = dm.get_simulation_parameters_dir(experiment_name) seeded_simulation_parameters_dir = dm.get_seeded_simulation_parameters_dir(experiment_name) # Iterate over all files in the simulation parameters directory for filename in os.listdir(simulation_parameters_dir): filepath = os.path.join(simulation_parameters_dir, filename) # Read the original JSON file with open(filepath, 'r') as file: simulation_parameters = json.load(file) # Create a subdirectory for the seeded files subdir_name = filename.replace(".json", "") subdir_path = os.path.join(seeded_simulation_parameters_dir, subdir_name) os.makedirs(subdir_path, exist_ok=True) # Generate multiple files with different seeds n_seeds = read_n_seeds_file(experiment_name)['n_seeds'] for i in range(n_seeds): # seed = random.randint(0, 1000000) simulation_parameters['seed'] = i seeded_file_path = os.path.join(subdir_path, f"seed_{i:04}.json") # Write the new JSON file with the added seed with open(seeded_file_path, 'w') as seeded_file: json.dump(simulation_parameters, seeded_file, indent=4)
[docs] def get_seeded_simulation_parameters_paths(experiment_name): """ Retrieves all seeded simulation parameter file paths for a given experiment. This function searches through the directory structure of the provided experiment name, looking for all JSON seeded simulation parameters files. It returns a list of full paths to these files. Args: experiment_name (str): The name of the experiment for which seeded simulation parameter paths are to be retrieved. Returns: list of str: A list of file paths, each pointing to a JSON file containing seeded simulation parameters. Example: If `experiment_name` is 'Experiment_1', and the directory structure contains multiple JSON files under: '/data_dir/Experiment_1/03_Seeded_simulation_parameters', the function will return a list like: [ '/data_dir/Experiment_1/03_Seeded_simulation_parameters/subdir/file1.json', '/data_dir/Experiment_1/03_Seeded_simulation_parameters/subdir/file2.json' ] """ # Define the base path for the simulation parameters base_dir = os.path.join(_data_dir, experiment_name, "03_Seeded_simulation_parameters") # List to store all seed file paths seeded_simulation_parameters_paths = [] # Walk through the directory structure to find all .json files for root, dirs, files in os.walk(base_dir): for file in files: if file.endswith(".json"): # Construct the full path to the file and add it to the list file_path = os.path.join(root, file) seeded_simulation_parameters_paths.append(file_path) return seeded_simulation_parameters_paths
[docs] def read_seeded_simulation_parameters(experiment_name, seeded_simulation_parameters_path): with open(seeded_simulation_parameters_path, 'r') as seeded_file: seeded_simulation_parameters = json.load(seeded_file) return seeded_simulation_parameters
[docs] def get_simulation_parameters_filepath_of_simulation_output_dir(simulation_output_dir): # get filepath of simulation parameters file from which the simulation_output_dir was generated from pathlib import Path simulation_output_dir_path = Path(simulation_output_dir) parts = simulation_output_dir_path.parts experiment_name = parts[-3] simulation_output_folder_name = parts[-1] # get simulation parameters dir for that experiment simulation_parameters_dir = dm.get_simulation_parameters_dir(experiment_name) simulation_parameters_file_path = os.path.join(simulation_parameters_dir, simulation_output_folder_name +'.json') return simulation_parameters_file_path
[docs] def get_parameter_value_from_simulation_output_dir(simulation_output_dir, parameter): # read and return desired parameter value for specific simulation output directory simulation_parameters_file_path = get_simulation_parameters_filepath_of_simulation_output_dir( simulation_output_dir) with open(simulation_parameters_file_path, 'r') as file: parameters_dict = json.load(file) return parameters_dict[parameter]
[docs] def read_OSR_NSR_regressor_parameters(): file_path = os.path.join(dm.get_reference_parameters_dir(), 'OSR_NSR_regressor_parameters_for_standard_parameter_values_exp.csv') df = pd.read_csv(file_path,index_col=0) best_fit_df = pd.to_numeric(df['Best Fit'], errors='coerce') return best_fit_df
[docs] def get_n_seeds_from_experiment_settings(experiment_numbered_name): """Reads n_seeds from the specific setting JSON file.""" # Use the exact helper you specified settings_dir = dm.get_experiment_settings_dir(experiment_numbered_name) # Use the exact naming convention file_path = os.path.join(settings_dir, f"{experiment_numbered_name}_n_seeds.json") if not os.path.exists(file_path): raise FileNotFoundError(f"Missing seeds setting file: {file_path}") with open(file_path, 'r') as f: data = json.load(f) return int(data['n_seeds'])