Source code for simplicity.settings_manager

# This file is part of SIMPLICITY
# Copyright (C) 2025 Pietro Gerletti
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/>.

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Tue Aug 27 19:38:43 2024

@author: pietro
"""

import os
import json
import simplicity.dir_manager as dm
import pandas as pd
import itertools
import copy

_data_dir = dm.get_data_dir()


[docs]
def get_standard_parameters_values_file_path():
    standard_parameters_values_file_path = os.path.join(dm.get_reference_parameters_dir(), "standard_values.json")
    return standard_parameters_values_file_path



[docs]
def get_parameter_specs_file_path():
    parameter_specs_file_path = os.path.join(dm.get_reference_parameters_dir(), "parameter_specs.json")
    return parameter_specs_file_path



[docs]
def write_standard_parameters_values():
    filename= get_standard_parameters_values_file_path()
    standard_values = {
        "population_size": 1000,
        'long_shedders_ratio': 0,
        "infected_individuals_at_start": 10,
        "tau_1": 2.86,
        "tau_2": 3.91,
        "tau_3": 7.5,
        "tau_3_long": 133.5,
        "tau_4": 8,
        "R": 1.1,
        "R_long": 1,
        "diagnosis_rate_standard": 0.1, # in percentage, will be converted to kds in model 
        "diagnosis_rate_long"    : 0.1, # in percentage, will be converted to kdl in model 
        "IH_virus_emergence_rate": 0,      # k_v in theoretical model equations
        "nucleotide_substitution_rate":  0.00008759,  # e in theoretical model equations
        "M_nsr_long": 1, # ratio to calculate long shedders NSR from base NSR
        "final_time": 365,
        "max_runtime": 86000, 
        "phenotype_model": 'immune_waning',  # or 'linear'
        "sequencing_rate": 0.05,
        "sequence_long_shedders": False,
        "seed": None
    }
    with open(filename, "w") as file:
        json.dump(standard_values, file, indent=4)
    print(f"Standard values written to {filename}")



[docs]
def write_parameter_specs():
    filename= get_parameter_specs_file_path()
    parameter_specs = {
        "population_size":               {"type": "int", "min": 0, "max": 10000},
        'long_shedders_ratio':           {"type": "float", "min": 0, "max": 1},
        "tau_1":                         {"type": "float", "min": 0, "max": 10},
        "tau_2":                         {"type": "float", "min": 0, "max": 100},
        "tau_3":                         {"type": "float", "min": 0, "max": 300},
        "tau_3_long":                    {"type": "float", "min": 0, "max": 300},
        "tau_4":                         {"type": "float", "min": 0, "max": 30},
        "infected_individuals_at_start": {"type": "int", "min": 0},
        "R":                             {"type": "float", "min": 0, "max": 20},
        "R_long":                        {"type": "float", "min": 0, "max": 20},
        "M_nsr_long":                    {"type": "float"},
        "diagnosis_rate_standard":       {"type": "float", "min": 0, "max": 1},
        "diagnosis_rate_long":           {"type": "float", "min": 0, "max": 1},
        "IH_virus_emergence_rate":       {"type": "float", "min": 0},
        "nucleotide_substitution_rate":  {"type": "float", "min": 0, "max": 1},
        "final_time":                    {"type": "int", "min": 0},    
        "max_runtime":                   {"type": "int", "min": 0},
        "phenotype_model":               {"type": "str"},
        "sequencing_rate":               {"type": "float", "min": 0, "max": 1},
        "sequence_long_shedders":        {"type": "bool"}
        }

    with open(filename, "w") as file:
        json.dump(parameter_specs, file, indent=4)
    print(f"Parameter specifications written to {filename}")



[docs]
def read_standard_parameters_values():
    filename= get_standard_parameters_values_file_path()
    try:
        with open(filename, "r") as file:
            return json.load(file)
    except FileNotFoundError:
        print(f"Error: {filename} not found. Writing default standard values.")
        write_standard_parameters_values()
        return read_standard_parameters_values()



[docs]
def read_parameter_specs():
    filename= get_parameter_specs_file_path()
    try:
        with open(filename, "r") as file:
            return json.load(file)
    except FileNotFoundError:
        print(f"Error: {filename} not found. Writing default parameter specifications.")
        write_parameter_specs()
        return read_parameter_specs()

    

[docs]
def write_user_set_parameters_file(user_set_parameters, filename):
    file_path = os.path.join(dm.get_reference_parameters_dir(),filename)
    with open(file_path, "w") as file:
        json.dump(user_set_parameters, file, indent=4)
    print(f"user_set_parameters saved to {file_path}")



[docs]
def read_user_set_parameters_file(filename):
    file_path = os.path.join(dm.get_reference_parameters_dir(),filename)
    try:
        with open(file_path, "r") as file:
            return json.load(file)
    except FileNotFoundError:
        print(f"Error: {filename} not found. Writing default standard values.")
        write_standard_parameters_values()
        return read_standard_parameters_values()

    

[docs]
def get_experiment_settings_file_path(experiment_name):
    return os.path.join(_data_dir,
                        f'{experiment_name}',
                        '01_Experiments_settings', 
                        f'{experiment_name}_settings.json')



[docs]
def get_n_seeds_file_path(experiment_name):
    return os.path.join(_data_dir,
                        f'{experiment_name}',
                        '01_Experiments_settings', 
                        f'{experiment_name}_n_seeds.json')



[docs]
def check_parameters_names(parameters_dic):
    STANDARD_VALUES = read_standard_parameters_values()
    for key in parameters_dic.keys():
        if key not in STANDARD_VALUES.keys():
            raise ValueError(f'Parameter {key} is not a valid parameter')



[docs]
def read_experiment_settings(experiment_name):
    experiment_settings_file_path = get_experiment_settings_file_path(experiment_name)
    with open(experiment_settings_file_path, 'r') as json_file:
        experiment_settings = json.load(json_file)
    return experiment_settings



[docs]
def read_n_seeds_file(experiment_name):
    n_seeds_file_path = get_n_seeds_file_path(experiment_name)
    with open(n_seeds_file_path, 'r') as json_file:
        n_seeds_dic = json.load(json_file)
    return n_seeds_dic




[docs]
def generate_experiment_settings(varying_params: dict, fixed_params: dict = None):
    """
    Generates a list of parameter combinations from varying and fixed parameters.
    
    Args:
        varying (dict): Parameters for which all combinations should be generated.
        fixed (dict): Parameters that should have the same value across all combinations.
    
    Returns:
        List[dict]: A list of dictionaries with combined parameter sets.
    """
    fixed_params = fixed_params or {}

    keys, values = zip(*varying_params.items()) if varying_params else ([], [])
    combinations = list(itertools.product(*values)) if values else [()]

    experiment_settings = []
    for combo in combinations:
        setting = dict(zip(keys, combo))
        setting.update(copy.deepcopy(fixed_params))  # Avoid mutation
        experiment_settings.append(setting)
    
    return experiment_settings



[docs]
def write_experiment_settings(experiment_name: str, experiment_settings: list, n_seeds: int):
    """
    Writes experiment settings (a list of parameter dictionaries) to a JSON file.

    Args:
        experiment_name (str): Name of the experiment (used for output folder).
        experiment_settings (list): List of parameter dictionaries.
        n_seeds (int): Number of random seeds to be stored separately.
    """
    # check parameter names validity
    for param_set in experiment_settings:
        check_parameters_names(param_set)

    # Write settings to JSON
    experiment_settings_file_path = get_experiment_settings_file_path(experiment_name)
    n_seeds_file_path = get_n_seeds_file_path(experiment_name)

    with open(experiment_settings_file_path, 'w') as settings_file:
        json.dump(experiment_settings, settings_file, indent=4)

    with open(n_seeds_file_path, 'w') as n_seeds_file:
        json.dump({'n_seeds': n_seeds}, n_seeds_file, indent=4)

    print(f"Experiment settings file written to {experiment_settings_file_path}")



[docs]
def write_simulation_parameters(file_path,
                                population_size,
                                long_shedders_ratio,
                                tau_1,
                                tau_2,
                                tau_3,
                                tau_3_long,
                                tau_4,
                                infected_individuals_at_start, 
                                R,
                                R_long,
                                M_nsr_long,
                                diagnosis_rate_standard,
                                diagnosis_rate_long,
                                IH_virus_emergence_rate,
                                nucleotide_substitution_rate,
                                final_time, 
                                max_runtime, 
                                phenotype_model,
                                sequencing_rate,
                                sequence_long_shedders,
                                seed
                                ):
    settings = {
        "population_size": population_size,
        "long_shedders_ratio": long_shedders_ratio,
        "tau_1": tau_1,
        "tau_2": tau_2,
        "tau_3": tau_3,
        "tau_3_long": tau_3_long,
        "tau_4": tau_4,
        "infected_individuals_at_start": infected_individuals_at_start,
        "R": R,
        "R_long": R_long,
        "M_nsr_long": M_nsr_long,
        "diagnosis_rate_standard": diagnosis_rate_standard, 
        "diagnosis_rate_long":diagnosis_rate_long,
        "IH_virus_emergence_rate" : IH_virus_emergence_rate,
        "nucleotide_substitution_rate": nucleotide_substitution_rate,
        "t_0": 0,
        "final_time": final_time,
        "max_runtime": max_runtime,
        "phenotype_model": phenotype_model,
        "sequencing_rate": sequencing_rate,
        "sequence_long_shedders":sequence_long_shedders,
        "seed": seed
    }
    
    # Serialize the dictionary to a JSON-formatted string and write it to a file
    with open(file_path, "w") as json_file:
        json.dump(settings, json_file, indent=4)

        

[docs]
def generate_filename_from_params(params: dict):
    
    abbreviations = {
    "population_size": "N",
    "tau_3": "tau3",
    "infected_individuals_at_start": "init",
    "R": "R",
    "R_long": "Rl",
    "M_nsr_long": "M",
    "diagnosis_rate_standard": "kds",
    "diagnosis_rate_long": "kdl",
    "IH_virus_emergence_rate": "kv",
    "nucleotide_substitution_rate": "NSR",
    "final_time": "T",
    "phenotype_model": "pheno"
    # excluded: max_runtime, sequencing_rate, seed, F
}
    exclude  = {"max_runtime", "sequencing_rate", "seed"}
    parts = []
    for key, value in params.items():
        if key in exclude:
            continue
        abbrev = abbreviations.get(key, key)
        if isinstance(value, float):
            value_str = f"{value:.2g}".replace('.', 'p')  # e.g., 0.01 → 1p0
        elif isinstance(value, int):
            value_str = str(value)
        elif isinstance(value, str):
            value_str = value.replace(' ', '')
        else:
            value_str = str(value)
        parts.append(f"{abbrev}_{value_str}")
    
    file_name = "_".join(parts) + ".json"
    return file_name




[docs]
def read_settings_and_write_simulation_parameters(experiment_name):
    """
    Reads an experiment settings file in JSON format and generates individual simulation parameter files
    based on the combinations of parameters in the settings. The function will create a separate JSON file
    for each parameter combination within a directory named after the experiment.

    Parameters:
    -----------
    experiment_name : str
        The name of the experiment. This is used to locate the settings file and to create the corresponding
        simulation parameters directory.

    """
    
    # Define the path to the experiment settings file
    experiment_settings_file_path = get_experiment_settings_file_path(experiment_name)
    STANDARD_VALUES = read_standard_parameters_values()
    # Read the experiment settings file
    with open(experiment_settings_file_path, 'r') as settings_file:
        all_experiment_settings = json.load(settings_file)
  
    # Loop over each set of experiment settings and create a simulation parameters file
    for i, experiment_settings in enumerate(all_experiment_settings):
        # Only include parameters that differ from standard
        modified_params = {
            key: value for key, value in experiment_settings.items()
            if key in STANDARD_VALUES and value != STANDARD_VALUES[key]
        }
        # Fallback: if all params match standard, name it by index
        if not modified_params:
            file_name = 'standard_values.json'
        else:
            file_name = generate_filename_from_params(modified_params)
            
        simulation_parameters_file_path = os.path.join(
                       dm.get_simulation_parameters_dir(experiment_name), 
                                                 file_name)
        
        # Merge the standard values with the current experiment settings
        settings = {**STANDARD_VALUES, **experiment_settings}
        
        # Write the simulation parameters to a JSON file
        write_simulation_parameters(simulation_parameters_file_path, 
                                    settings["population_size"],
                                    settings['long_shedders_ratio'],
                                    settings["tau_1"],
                                    settings["tau_2"],
                                    settings["tau_3"],
                                    settings["tau_3_long"],
                                    settings["tau_4"],
                                    settings["infected_individuals_at_start"],
                                    settings["R"],
                                    settings["R_long"],
                                    settings["M_nsr_long"],
                                    settings["diagnosis_rate_standard"],
                                    settings["diagnosis_rate_long"],
                                    settings["IH_virus_emergence_rate"],
                                    settings["nucleotide_substitution_rate"],
                                    settings["final_time"],
                                    settings["max_runtime"],
                                    settings["phenotype_model"],
                                    settings["sequencing_rate"],
                                    settings["sequence_long_shedders"],
                                    settings["seed"]
                                    )

    print(f"Simulation parameters written to directory: {simulation_parameters_file_path}")



[docs]
def write_seeded_simulation_parameters(experiment_name: str):
    """
    Generates multiple JSON files with different seeds for each simulation parameter file 
    within a specified experiment. The function reads the original simulation parameter 
    files, adds a 'seed' field, and writes the modified files to subdirectories named 
    after the original files.

    Parameters:
    -----------
    experiment_name : str
        The name of the experiment. 
    
    n_seeds : int
        The number of seeded JSON files to generate for each simulation parameter file. 

    Directory Structure:
    --------------------
    
    Data/
    └── experiment_name/
        ├── 02_Simulation_parameters/
        │   ├── param_file_1.json
        │   ├── param_file_2.json
        │   └── ...
        └── 03_Seeded_simulation_parameters/
            ├── param_file_1/
            │   ├── seed_0.json
            │   ├── seed_1.json
            │   └── ...
            ├── param_file_2/
            │   ├── seed_0.json
            │   ├── seed_1.json
            │   └── ...
            └── ...
    """
    simulation_parameters_dir = dm.get_simulation_parameters_dir(experiment_name)
    seeded_simulation_parameters_dir = dm.get_seeded_simulation_parameters_dir(experiment_name)

    # Iterate over all files in the simulation parameters directory
    for filename in os.listdir(simulation_parameters_dir):
        filepath = os.path.join(simulation_parameters_dir, filename)
        
        # Read the original JSON file
        with open(filepath, 'r') as file:
            simulation_parameters = json.load(file)
        
        # Create a subdirectory for the seeded files
        subdir_name = filename.replace(".json", "")
        subdir_path = os.path.join(seeded_simulation_parameters_dir, subdir_name)
        os.makedirs(subdir_path, exist_ok=True)
        
        # Generate multiple files with different seeds
        n_seeds = read_n_seeds_file(experiment_name)['n_seeds']
        for i in range(n_seeds):
            # seed = random.randint(0, 1000000)
            simulation_parameters['seed'] = i

            seeded_file_path = os.path.join(subdir_path, f"seed_{i:04}.json")
            
            # Write the new JSON file with the added seed
            with open(seeded_file_path, 'w') as seeded_file:
                json.dump(simulation_parameters, seeded_file, indent=4)




[docs]
def get_seeded_simulation_parameters_paths(experiment_name):
    """
    Retrieves all seeded simulation parameter file paths for a given experiment.

    This function searches through the directory structure of the provided
    experiment name, looking for all JSON seeded simulation parameters files. 
    It returns a list of full paths to these files.

    Args:
        experiment_name (str): The name of the experiment for which seeded simulation 
                               parameter paths are to be retrieved.

    Returns:
        list of str: A list of file paths, each pointing to a JSON file containing
                     seeded simulation parameters.

    Example:
        If `experiment_name` is 'Experiment_1', and the directory structure contains
        multiple JSON files under:
            '/data_dir/Experiment_1/03_Seeded_simulation_parameters',
        the function will return a list like:
            [
                '/data_dir/Experiment_1/03_Seeded_simulation_parameters/subdir/file1.json',
                '/data_dir/Experiment_1/03_Seeded_simulation_parameters/subdir/file2.json'
            ]
    """
    # Define the base path for the simulation parameters
    base_dir = os.path.join(_data_dir, experiment_name, "03_Seeded_simulation_parameters")
    
    # List to store all seed file paths
    seeded_simulation_parameters_paths = []
    
    # Walk through the directory structure to find all .json files
    for root, dirs, files in os.walk(base_dir):
        for file in files:
            if file.endswith(".json"):
                # Construct the full path to the file and add it to the list
                file_path = os.path.join(root, file)
                seeded_simulation_parameters_paths.append(file_path)
    
    return seeded_simulation_parameters_paths



[docs]
def read_seeded_simulation_parameters(experiment_name, seeded_simulation_parameters_path):
    with open(seeded_simulation_parameters_path, 'r') as seeded_file:
        seeded_simulation_parameters = json.load(seeded_file)
    return seeded_simulation_parameters



[docs]
def get_simulation_parameters_filepath_of_simulation_output_dir(simulation_output_dir):
    # get filepath of simulation parameters file from which the simulation_output_dir was generated
    from pathlib import Path
    simulation_output_dir_path = Path(simulation_output_dir)
    parts = simulation_output_dir_path.parts
    experiment_name = parts[-3]
    simulation_output_folder_name = parts[-1]
    # get simulation parameters dir for that experiment
    simulation_parameters_dir = dm.get_simulation_parameters_dir(experiment_name)
    simulation_parameters_file_path = os.path.join(simulation_parameters_dir,
                                              simulation_output_folder_name +'.json')

    return simulation_parameters_file_path



[docs]
def get_parameter_value_from_simulation_output_dir(simulation_output_dir, parameter):
    # read and return desired parameter value for specific simulation output directory
    simulation_parameters_file_path = get_simulation_parameters_filepath_of_simulation_output_dir(
                       simulation_output_dir)
    with open(simulation_parameters_file_path, 'r') as file:
        parameters_dict = json.load(file)
    
    return parameters_dict[parameter]



[docs]
def read_OSR_NSR_regressor_parameters():
    file_path = os.path.join(dm.get_reference_parameters_dir(),
              'OSR_NSR_regressor_parameters_for_standard_parameter_values_exp.csv')
    df = pd.read_csv(file_path,index_col=0)
    best_fit_df = pd.to_numeric(df['Best Fit'], errors='coerce')
    return best_fit_df

    

[docs]
def get_n_seeds_from_experiment_settings(experiment_numbered_name):
    """Reads n_seeds from the specific setting JSON file."""
    
    # Use the exact helper you specified
    settings_dir = dm.get_experiment_settings_dir(experiment_numbered_name)
    
    # Use the exact naming convention
    file_path = os.path.join(settings_dir, f"{experiment_numbered_name}_n_seeds.json")
    
    if not os.path.exists(file_path):
        raise FileNotFoundError(f"Missing seeds setting file: {file_path}")
        
    with open(file_path, 'r') as f:
        data = json.load(f)
        
    return int(data['n_seeds'])