// ****************************************************************************
//
//          Aevol - An in silico experimental evolution platform
//
// ****************************************************************************
//
// Copyright: See the AUTHORS file provided with the package or <www.aevol.fr>
// Web: http://www.aevol.fr/
// E-mail: See <http://www.aevol.fr/contact/>
// Original Authors : Guillaume Beslon, Carole Knibbe, David Parsons
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 2 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program.  If not, see <http://www.gnu.org/licenses/>.
//
// ****************************************************************************

#include "CheckpointHandler.h"

#include <algorithm>
#include <format>
#include <regex>
#include <sstream>

#include "aevol_flavor.h"
#include "aevol_version.h"
#include "io/fasta/fasta.h"
#include "Grid.h"
#include "Individual.h"

namespace aevol {

CheckpointHandler::CheckpointHandler(): CheckpointHandler("./checkpoints") {
}

CheckpointHandler::CheckpointHandler(const std::filesystem::path& chkpts_dir): chkpts_dir_(chkpts_dir) {
  std::filesystem::create_directories(chkpts_dir_);
}

auto CheckpointHandler::make_default() -> std::unique_ptr<CheckpointHandler> {
  return std::unique_ptr<CheckpointHandler>(new CheckpointHandler());
}

auto CheckpointHandler::make_from_ckpts_dir(const std::filesystem::path& chkpts_dir)
    -> std::unique_ptr<CheckpointHandler> {
  return std::unique_ptr<CheckpointHandler>(new CheckpointHandler(chkpts_dir));
}

auto CheckpointHandler::make_from_ckpt_file(const std::filesystem::path& chkp_file_path)
    -> std::unique_ptr<CheckpointHandler> {
  // Check that checkpoint_file exists
  if (not std::filesystem::exists(chkp_file_path)) {
    exit_with_usr_msg(std::format("cannot access '{}': No such file or directory", chkp_file_path.string()));
  }

  // Return new instance with checkpoints_dir inferred from chkp_file_path
  return std::unique_ptr<CheckpointHandler>(
      new CheckpointHandler(std::filesystem::canonical(chkp_file_path).parent_path().parent_path()));
}

auto CheckpointHandler::read_checkpoint(const std::filesystem::path& chkp_path,
                                        const std::filesystem::path& pop_path,
                                        const std::filesystem::path& setup_path) -> CheckpointData {
  auto checkpoint = mxifstream(chkp_path);
  if (not checkpoint) {
    std::cerr << "Error: failed to open file " << chkp_path << std::endl;
    exit(1);
  }
  auto setup_file = std::ifstream(setup_path);
  if (not setup_file) {
    std::cerr << "Error: failed to open file " << setup_path << std::endl;
    exit(1);
  }
  try {
    auto pop_file = FastaReader(pop_path);

    // Read header in all checkpoint-constitutive files
    std::string header;
    getline(checkpoint >> std::ws, header);
    check_compatibility(header, chkp_path);
    getline(pop_file >> std::ws, header);
    check_compatibility(header, pop_path);
    getline(setup_file >> std::ws, header);
    check_compatibility(header, setup_path);

    // Read current time from setup file
    auto time_read = aevol::time_type{};
    get_expected_or_throw(setup_file, "Time:", time_read);

    // Read grid size from checkpoint and pop size from population file; check consistency
    Grid::xy_type width, height;
    get_expected_or_throw(checkpoint, "Grid_size:", width, height);
    indiv_id_type pop_size;
    get_expected_or_throw(pop_file, ";");
    get_expected_or_throw(pop_file, "Pop_size:", pop_size);
    if (pop_size != width * height) {
      std::cerr << "Error: checkpoint pop size and grid size do not match" << std::endl;
      exit(1);
    }

    // Read experiment setup
    auto exp_setup_read = ExpSetup::make_from_checkpoint(setup_file, checkpoint, pop_size);

    // Read grid
    auto grid = Grid(width, height);
    for (size_t i = 0 ; i < pop_size; ++i) {
      get_expected_or_throw(checkpoint, "GridCell:", grid[i]);
    }

    // Read phenotypic target
    auto phenotypic_target = PhenotypicTarget::make_from_checkpoint(
        setup_file, checkpoint, exp_setup_read->fuzzy_factory().flavor(), exp_setup_read->fuzzy_factory().sampling());

    // Read checkpoint frequency
    auto checkpoint_frequency = time_type{};
    get_expected_or_throw(setup_file, "CheckpointFrequency:", checkpoint_frequency);

    // Read tree output frequency
    auto tree_output_frequency = std::optional<time_type>{};
    get_expected_or_throw(checkpoint, "TreeOutputFrequency:", tree_output_frequency);

    // Read stats output frequencies
    auto best_indiv_stats_output_frequency = std::optional<time_type>{};
    auto whole_pop_stats_output_frequency = std::optional<time_type>{};
    get_expected_or_throw(setup_file, "BestIndivStatsOutputFrequency:", best_indiv_stats_output_frequency);
    get_expected_or_throw(setup_file, "PopStatsOutputFrequency:", whole_pop_stats_output_frequency);

    // Read population
    auto fasta_population = read_population(pop_file, pop_size);

    return CheckpointData{time_read,
                          fasta_population,
                          std::move(grid),
                          std::move(phenotypic_target),
                          std::move(exp_setup_read),
                          checkpoint_frequency,
                          tree_output_frequency,
                          best_indiv_stats_output_frequency,
                          whole_pop_stats_output_frequency};
  }
  catch (std::exception& e) {
    std::cerr << "Error while reading checkpoint:\n  " << e.what() << std::endl;
    exit(1);
  }
}

auto CheckpointHandler::read_checkpoint(const std::filesystem::path& chkp_path) -> CheckpointData {
  // Open all files constitutive of a checkpoint
  auto [chkp_p, pop_p, setup_p] = make_chkp_files_paths(chkp_path);
  return read_checkpoint(chkp_p, pop_p, setup_p);
}

auto CheckpointHandler::read_checkpoint(aevol::time_type time) const -> CheckpointData {
  // Open all files constitutive of a checkpoint
  auto [chkp_path, pop_path, setup_path] = make_chkp_files_paths(time);
  auto chkpt_data = read_checkpoint(chkp_path, pop_path, setup_path);

  // Check the time read from checkpoint corresponds to requested time
  if (chkpt_data.time != time) {
    exit_with_usr_msg(
        std::format("discrepancy in simulation time:\n\trequested: {},\n\tfound in file: \"{}\" (file {})",
                    time,
                    chkpt_data.time,
                    setup_path.string()));
  }

  return chkpt_data;
}

void CheckpointHandler::write_checkpoint(
    aevol::time_type time,
    const indivs_idxs_sorted_map_t& indivs_idxs_map,
    const Grid& grid,
    const PhenotypicTarget& target,
    const ExpSetup& exp_setup,
    time_type checkpoint_frequency,
    std::optional<time_type> tree_output_frequency,
    std::optional<time_type> best_indiv_stats_output_frequency,
    std::optional<time_type> whole_pop_stats_output_frequency) {
  // Create directory for the checkpoint to be written into
  std::filesystem::create_directories(make_chkp_dir_path(time));

  // Open all files constitutive of a checkpoint
  auto [chkp_path, pop_path, setup_path] = make_chkp_files_paths(time);
  auto checkpoint = mxofstream(chkp_path);
  if (not checkpoint) {
    std::cerr << "Error: failed to open file " << chkp_path << std::endl;
    exit(1);
  }
  auto pop_file = FastaWriter(pop_path);
  if (not pop_file) {
    std::cerr << "Error: failed to open file " << pop_path << std::endl;
    exit(1);
  }
  auto setup_file = std::ofstream(setup_path);
  if (not setup_file) {
    std::cerr << "Error: failed to open file " << setup_path << std::endl;
    exit(1);
  }

  // Write checkpoint header in all checkpoint files
  checkpoint << std::format("Checkpoint file generated by {} version {}\n", aevol::flavor, aevol::version);
  pop_file << std::format("; Population file generated by {} version {}\n", aevol::flavor, aevol::version);
  setup_file << std::format("Setup file generated by {} version {}\n", aevol::flavor, aevol::version);

  // Write current time in setup file
  setup_file << "Time: " << time << "\n";

  // Write grid size in checkpoint and pop size in population file
  checkpoint << "Grid_size: " << grid.width() << " " << grid.height() << "\n";
  pop_file << "; Pop_size: " << grid.width() * grid.height() << "\n";

  // Write exp_setup
  exp_setup.write_to_checkpoint(setup_file, checkpoint);

  // Write grid
  for (size_t i = 0 ; i < grid.width() * grid.height() ; ++i) {
    checkpoint << "GridCell: " << grid[i] << "\n";
  }

  // Write phenotypic target
  target.write_to_checkpoint(setup_file, checkpoint);

  // Write checkpoint frequency
  setup_file << "CheckpointFrequency: " << checkpoint_frequency << "\n";

  // Write tree output frequency
  checkpoint << "TreeOutputFrequency: " << tree_output_frequency << "\n";

  // Write stats output frequencies
  setup_file << "BestIndivStatsOutputFrequency: " << best_indiv_stats_output_frequency << "\n";
  setup_file << "PopStatsOutputFrequency: " << whole_pop_stats_output_frequency << "\n";

  // Write population
  write_indivs_idxs_map(pop_file, indivs_idxs_map);
}

auto CheckpointHandler::make_chkp_dir_path(time_type t) const -> std::filesystem::path {
  return chkpts_dir_ / std::format("checkpoint_{:0>9}", t);
}

/**
 * \brief Make paths of all files constitutive of a checkpoint
 */
auto CheckpointHandler::make_chkp_files_paths(time_type t) const -> std::array<std::filesystem::path, 3> {
  auto chkp_dir_path = make_chkp_dir_path(t);
  return {chkp_dir_path / std::format("checkpoint_{:0>9}.ae", t),
          chkp_dir_path / std::format("population_{:0>9}.fa", t),
          chkp_dir_path / std::format("setup_{:0>9}.txt", t)};
}

auto CheckpointHandler::make_chkp_files_paths(const std::filesystem::path& chkpt_file)
    -> std::array<std::filesystem::path, 3> {
  // Check that checkpoint file exists and has extension ".ae"
  if (not std::filesystem::exists(chkpt_file)) {
    exit_with_usr_msg(std::format("cannot access '{}': No such file or directory", chkpt_file.string()));
  }
  if (not (chkpt_file.extension() == ".ae")) {
    exit_with_usr_msg(std::format("file '{}' does not seem to be a valid aevol checkpoint", chkpt_file.string()));
  }

  // Extract general checkpoints dir from provided path
  auto canonical_chkpt_path = std::filesystem::canonical(chkpt_file);
  auto one_chkp_dir         = canonical_chkpt_path.parent_path();

  // First attempt: try extract pre- and post- fixes from canonical filename (<pre>checkpoint<post>.ae) and use these
  // to generate population and setup file names.
  // If all files exist, return their paths
  std::smatch chkpt_filename_match;
  auto filename = chkpt_file.stem().string();
  std::regex_match(filename, chkpt_filename_match, std::regex(R"((.*)checkpoint(.*))"));
  if (not chkpt_filename_match.empty()) {
    if (chkpt_filename_match.size() != 3)  // NB: number of captures + 1
      exit_with_usr_msg(std::format("unexpected error while opening checkpoint '{}'", chkpt_file.string()));
    const auto& prefix  = chkpt_filename_match[1].str();
    const auto& postfix = chkpt_filename_match[2].str();

    if (std::filesystem::exists(one_chkp_dir / std::format("{}population{}.fa", prefix, postfix)) and
        std::filesystem::exists(one_chkp_dir / std::format("{}setup{}.txt", prefix, postfix))) {
      return {one_chkp_dir / std::format("{}checkpoint{}.ae", prefix, postfix),
              one_chkp_dir / std::format("{}population{}.fa", prefix, postfix),
              one_chkp_dir / std::format("{}setup{}.txt", prefix, postfix)};
    }
  }

  // Second attempt: inspect directory content. Check that there is exactly one of each (.ae, fasta, .txt)
  // Return those if condition OK, fire error if not
  auto chkp_file_path = std::filesystem::path{};
  auto pop_file_path = std::filesystem::path{};
  auto setup_file_path = std::filesystem::path{};

  for (const auto& dir_entry : std::filesystem::directory_iterator{one_chkp_dir}) {
    const auto& extension = dir_entry.path().extension();
    if (extension == ".ae") {
      if (not chkp_file_path.empty()) {
        exit_with_usr_msg(std::format("could not open checkpoint: found multiple .ae files in directory {}",
                                      one_chkp_dir.string()));
      }
      chkp_file_path = dir_entry;
    } else if (extension == ".fa" or extension == ".fasta") {
      if (not pop_file_path.empty()) {
        exit_with_usr_msg(std::format("could not open checkpoint: found multiple fasta files in directory {}",
                                      one_chkp_dir.string()));
      }
      pop_file_path = dir_entry;
    } else if (extension == ".txt") {
      if (not setup_file_path.empty()) {
        exit_with_usr_msg(std::format("could not open checkpoint: found multiple .txt files in directory {}",
                                      one_chkp_dir.string()));
      }
      setup_file_path = dir_entry;
    }
  }

  if (chkp_file_path.empty()) {
    exit_with_usr_msg(std::format("could not find checkpoint (.ae) file in directory {}",
                                  one_chkp_dir.string()));
  }
  if (pop_file_path.empty()) {
    exit_with_usr_msg(std::format("could not find population (.fa or .fasta) file in directory {}",
                                  one_chkp_dir.string()));
  }
  if (setup_file_path.empty()) {
    exit_with_usr_msg(std::format("could not find experiment setup (.txt) file in directory {}",
                                  one_chkp_dir.string()));
  }

  return {chkp_file_path, pop_file_path, setup_file_path};
}

void CheckpointHandler::check_compatibility(std::string header, std::filesystem::path path) {
  if (header.find(aevol::flavor) == std::string::npos) {
    exit_with_usr_msg(std::format("discrepancy in aevol flavor:\n\trunning: {},\n\tfound in file: \"{}\" (file {})",
                                  aevol::flavor,
                                  header,
                                  path.string()));
  }

  std::regex version_regex("version ([0-9]+).([0-9]+)");
  std::smatch version_match;
  std::regex_search(header, version_match, version_regex);
  auto major = std::stoi(version_match[1]);
  auto minor = std::stoi(version_match[2]);

  if (major != aevol::version_major or minor != aevol::version_minor) {
    exit_with_usr_msg(std::format("incompatible aevol versions:\n\trunning: {},\n\tfound in file: \"{}\" (file {})",
                                  aevol::version,
                                  header,
                                  path.string()));
  }
}

auto CheckpointHandler::read_population(FastaReader& fasta_reader, std::size_t population_size)
    -> fasta_structured_population_t {
  auto population = fasta_structured_population_t{};
  auto nb_retrieved_indivs = std::size_t(0);

  while (nb_retrieved_indivs < population_size) {
    auto [individual, seqid, modifiers] = fasta_reader.read_individual();

    // Update nb_retrieved_indivs w.r.t. content of "indexes" modifier
    auto idxs_str = modifiers.at("indexes");
    if (idxs_str == "all") {
      assert(nb_retrieved_indivs == 0);
      nb_retrieved_indivs = population_size;
    } else {
      nb_retrieved_indivs += std::ranges::count(idxs_str, ' ') + 1;
    }

    population.emplace_back(individual, seqid, modifiers);
  }

  return population;
}

void CheckpointHandler::write_indivs_idxs_map(FastaWriter& fasta_writer,
                                              const indivs_idxs_sorted_map_t& indivs_idxs_map) const {
  for (const auto& entry : indivs_idxs_map) {
    fasta_writer.write_individual(entry, indivs_idxs_map.size() == 1); // population is clonal if there's
                                                                       // only 1 sequence in map
  }
}

}  // namespace aevol
