/**
 * \file pappsomspp/processing/cbor/psm/evalscan/psmspecglob.cpp
 * \date 19/07/2025
 * \author Olivier Langella
 * \brief compute specglob alignment on scan's PSM
 */

/*******************************************************************************
 * Copyright (c) 2025 Olivier Langella <Olivier.Langella@universite-paris-saclay.fr>.
 *
 * This file is part of PAPPSOms-tools.
 *
 *     PAPPSOms-tools is free software: you can redistribute it and/or modify
 *     it under the terms of the GNU General Public License as published by
 *     the Free Software Foundation, either version 3 of the License, or
 *     (at your option) any later version.
 *
 *     PAPPSOms-tools is distributed in the hope that it will be useful,
 *     but WITHOUT ANY WARRANTY; without even the implied warranty of
 *     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *     GNU General Public License for more details.
 *
 *     You should have received a copy of the GNU General Public License
 *     along with PAPPSOms-tools.  If not, see <http://www.gnu.org/licenses/>.
 *
 ******************************************************************************/


#include "psmspecpeptidoms.h"

#include "pappsomspp/core/processing/cbor/psm/psmcborutils.h"
#include "pappsomspp/core/processing/filters/filterchargedeconvolution.h"
#include "pappsomspp/core/processing/filters/filterresample.h"
#include "pappsomspp/core/processing/filters/filterpass.h"
#include "pappsomspp/core/peptide/peptideproformaparser.h"
#include "pappsomspp/core/pappsoexception.h"
#include "pappsomspp/core/exception/exceptionoutofrange.h"
#include "pappsomspp/core/processing/specpeptidoms/spomsspectrum.h"

pappso::cbor::psm::PsmSpecPeptidOms::PsmSpecPeptidOms(std::size_t buffer_scan_size,
                                                      pappso::cbor::CborStreamWriter *cbor_output_p,
                                                      const QJsonObject &parameters)
  : PsmFileScanProcessAndCopy(buffer_scan_size, cbor_output_p, "SpecPeptidOms")
{
  m_specpeptidomsParameters = parameters;

  if(parameters.value("fragment_tolerance_unit").toString() == "dalton")
    {
      m_fragmentTolerance = pappso::PrecisionFactory::getDaltonInstance(
        parameters.value("fragment_tolerance").toDouble());
    }
  else if(parameters.value("fragment_tolerance_unit").toString() == "ppm")
    {
      m_fragmentTolerance =
        pappso::PrecisionFactory::getPpmInstance(parameters.value("fragment_tolerance").toDouble());
    }

  QJsonObject spectrum_param = parameters.value("spectrum").toObject();

  m_minimumMz    = spectrum_param.value("minimum_mz").toDouble();
  m_nMostIntense = spectrum_param.value("n_most_intense").toInt();
  m_deisotope    = spectrum_param.value("deisotope").toBool();

  m_aaCode.addAaModification('C', AaModification::getInstance("MOD:00397"));


  m_maxInterpretationsPerSpectrum = parameters.value("max_interpretations_per_spectrum").toInt();

  delayProteinMapInMemory();
}

pappso::cbor::psm::PsmSpecPeptidOms::~PsmSpecPeptidOms()
{
}

void
pappso::cbor::psm::PsmSpecPeptidOms::processBufferScanDone(pappso::UiMonitorInterface &monitor)
{


  PsmFileScanProcessAndCopy::processBufferScanDone(monitor);
  m_countScanProcessed += m_bufferScanSize;

  monitor.setStatus(QObject::tr("%1 scan processed").arg(m_countScanProcessed));
}

void
pappso::cbor::psm::PsmSpecPeptidOms::filterMassSpectrum(pappso::MassSpectrum &mass_spectrum) const
{
  if(m_deisotope)
    pappso::FilterChargeDeconvolution(m_fragmentTolerance).filter(mass_spectrum);
  pappso::FilterResampleKeepGreater(m_minimumMz).filter(mass_spectrum);
  pappso::FilterGreatestY(m_nMostIntense).filter(mass_spectrum);
}

void
pappso::cbor::psm::PsmSpecPeptidOms::proteinMapReady(pappso::UiMonitorInterface &monitor)
{
  if(!m_decoyPrefix.isEmpty())
    {
      // generate decoy sequences

      PsmProtein new_decoy_protein;
      for(auto &it_protein : m_proteinMap.getProteinMap())
        {
          new_decoy_protein = it_protein.second;
          new_decoy_protein.protein_sp =
            std::make_shared<Protein>(*new_decoy_protein.protein_sp.get());
          new_decoy_protein.protein_sp.get()->reverse();
          new_decoy_protein.protein_sp.get()->setAccession(
            m_decoyPrefix + new_decoy_protein.protein_sp.get()->getAccession());
          new_decoy_protein.isTarget = false;
        }
    }
  PsmFileScanProcessAndCopy::proteinMapReady(monitor);
}


void
pappso::cbor::psm::PsmSpecPeptidOms::parameterMapReady(pappso::UiMonitorInterface &monitor)
{

  QCborMap cbor_peptidoms_parameters = QCborValue::fromJsonValue(m_specpeptidomsParameters).toMap();

  m_cborParameterMap.insert(QString("peptidoms"), cbor_peptidoms_parameters);

  mp_cborOutput->append("parameter_map");
  mp_cborOutput->writeCborMap(m_cborParameterMap);
}

pappso::cbor::psm::CborScanMapBase *
pappso::cbor::psm::PsmSpecPeptidOms::newCborScanMap()
{
  return new PsmSpecPeptidOmsScan(*this, m_fragmentTolerance);
}

const pappso::AaCode &
pappso::cbor::psm::PsmSpecPeptidOms::getAaCode() const
{
  return m_aaCode;
}


pappso::cbor::psm::PsmSpecPeptidOmsScan::PsmSpecPeptidOmsScan(
  const pappso::cbor::psm::PsmSpecPeptidOms &psm_specpeptidoms,
  pappso::PrecisionPtr fragment_tolerance)
  : CborScanMapBase(psm_specpeptidoms)
{
  mp_psmSpecPeptidOms = &psm_specpeptidoms;

  m_decoyPrefix = mp_psmSpecPeptidOms->m_decoyPrefix;
}

pappso::cbor::psm::PsmSpecPeptidOmsScan::~PsmSpecPeptidOmsScan()
{
}

void
pappso::cbor::psm::PsmSpecPeptidOmsScan::process()
{
  // qWarning() << "PsmSpecPeptidOmsScan::process " << keys();
  if(!keys().contains("id"))
    {
      throw pappso::PappsoException(QObject::tr("missing scan id"));
    }
  if(keys().contains("psm_list"))
    {
      QualifiedMassSpectrumSPtr qualified_mass_spectrum = getCurrentQualifiedMassSpectrumSPtr();

      // qWarning() << "PsmSpecPeptidOmsScan::process "
      //            << qualified_mass_spectrum.get()->getMassSpectrumId().getSpectrumIndex();

      mp_psmSpecPeptidOms->filterMassSpectrum(
        *(qualified_mass_spectrum.get()->getMassSpectrumSPtr().get()));

      pappso::specpeptidoms::SpOMSSpectrumCsp experimental_spectrum =
        std::make_shared<pappso::specpeptidoms::SpOMSSpectrum>(
          *qualified_mass_spectrum.get(),
          mp_psmSpecPeptidOms->m_fragmentTolerance,
          mp_psmSpecPeptidOms->getAaCode());


      pappso::specpeptidoms::SemiGlobalAlignment semi_global_alignment(
        mp_psmSpecPeptidOms->m_scoreValues,
        mp_psmSpecPeptidOms->m_fragmentTolerance,
        mp_psmSpecPeptidOms->m_aaCode);


      QCborArray new_psm_arr;
      for(QCborValue cbor_psm : value("psm_list").toArray())
        {
          QCborMap old_cbor_psm_map = cbor_psm.toMap();


          if(!old_cbor_psm_map.keys().contains("proforma"))
            {
              throw pappso::PappsoException(
                QObject::tr("missing proforma in psm %1").arg(old_cbor_psm_map.keys().size()));
            }
          pappso::PeptideSp peptide_sp = pappso::PeptideProFormaParser::parseString(
            old_cbor_psm_map.value("proforma").toString());


          pappso::specpeptidoms::SpOMSProtein protein(old_cbor_psm_map.value("proforma").toString(),
                                                      peptide_sp.get()->getSequence(),
                                                      mp_psmSpecPeptidOms->m_aaCode);

          sequenceAlignment(false,
                            old_cbor_psm_map,
                            new_psm_arr,
                            experimental_spectrum,
                            semi_global_alignment,
                            &protein);

          if(!m_decoyPrefix.isEmpty())
            {
              QString sequence = peptide_sp.get()->getSequence();
              std::reverse(sequence.begin(), sequence.end());

              pappso::specpeptidoms::SpOMSProtein protein_rev(
                m_decoyPrefix + old_cbor_psm_map.value("proforma").toString(),
                sequence,
                mp_psmSpecPeptidOms->m_aaCode);

              sequenceAlignment(true,
                                old_cbor_psm_map,
                                new_psm_arr,
                                experimental_spectrum,
                                semi_global_alignment,
                                &protein_rev);
            }


          // qWarning() << "new_psm_arr.size()=" << new_psm_arr.size();
        }

      // insert(QString("psm_list"), new_psm_arr);
      remove(QString("psm_list"));
      insert(QString("psm_list"), new_psm_arr);

      filterPsmListUniqueUniqueProforma();
    }
  // qWarning() << "PsmSpecPeptidOmsScan::process end";
}


void
pappso::cbor::psm::PsmSpecPeptidOmsScan::sequenceAlignment(
  bool is_reverse,
  const QCborMap &old_cbor_psm_map,
  QCborArray &new_psm_arr,
  pappso::specpeptidoms::SpOMSSpectrumCsp &experimental_spectrum,
  pappso::specpeptidoms::SemiGlobalAlignment &semi_global_alignment,
  const pappso::specpeptidoms::SpOMSProtein *protein_ptr)
{

  std::vector<pappso::specpeptidoms::Location> locations;
  std::vector<double> potential_mass_errors;
  const QString &sequence = protein_ptr->getSequence();
  // qWarning() << "PsmSpecPeptidOmsScan::process " << peptide_sequence;
  //  do not take into account peptide containing too much redundancy
  if(pappso::specpeptidoms::SemiGlobalAlignment::checkSequenceDiversity(sequence, 5, 2))
    {
      if((sequence.size() >= 8) &&
         (!pappso::specpeptidoms::SemiGlobalAlignment::checkSequenceDiversity(sequence, 8, 3)))
        return;

      semi_global_alignment.fastAlign(*experimental_spectrum.get(), protein_ptr);
      // qDebug() << "Completed fastAlign";
      locations = semi_global_alignment.getLocationSaver().getLocations();

      qDebug() << "locations.size():" << locations.size();
      for(auto loc : locations)
        {
          QCborMap new_cbor_psm;
          qDebug() << "beginning=" << loc.beginning << "length=" << loc.length
                   << "tree=" << loc.tree << "score=" << loc.score
                   << "protein=" << loc.proteinPtr->getAccession();
          semi_global_alignment.preciseAlign(
            *experimental_spectrum.get(), loc.proteinPtr, loc.beginning, loc.length);
          qDebug() << "Completed preciseAlign";

          pappso::specpeptidoms::Alignment best_alignment =
            semi_global_alignment.getBestAlignment();
          /*  qDebug() << "Best alignment" << best_alignment.interpretation
                     << best_alignment.score << "SPC" << best_alignment.SPC
                     << "beginning" << best_alignment.beginning << "end"
                     << best_alignment.end;*/
          if(best_alignment.end > (std::size_t)sequence.size())
            {
              throw pappso::ExceptionOutOfRange(QString("best_alignment.end > "
                                                        "(std::size_t)sequence.size() : %1 %2")
                                                  .arg(best_alignment.end)
                                                  .arg(sequence.size()));
            }
          if(best_alignment.begin_shift > 0 || best_alignment.end_shift > 0 ||
             best_alignment.shifts.size() > 0)
            {
              qDebug();
              potential_mass_errors =
                pappso::specpeptidoms::SemiGlobalAlignment::getPotentialMassErrors(
                  mp_psmSpecPeptidOms->m_aaCode, best_alignment, sequence);
              qDebug();
              semi_global_alignment.postProcessingAlign(*experimental_spectrum.get(),
                                                        loc.proteinPtr,
                                                        loc.beginning,
                                                        loc.length,
                                                        potential_mass_errors);

              qDebug() << "semi_global_alignment.getBestAlignment()";
              pappso::specpeptidoms::Alignment best_post_processed_alignment =
                semi_global_alignment.getBestAlignment();
              if(best_post_processed_alignment.SPC > best_alignment.SPC)
                {
                  qDebug() << "Best post-processed alignment"
                           << best_post_processed_alignment.m_peptideModel.toInterpretation()
                           << best_post_processed_alignment.score << "SPC"
                           << best_post_processed_alignment.SPC;
                  storeAlignment(is_reverse,
                                 old_cbor_psm_map,
                                 new_cbor_psm,
                                 protein_ptr->getAccession(),
                                 best_post_processed_alignment);
                }
              else
                {
                  qDebug() << "no improvement in post-processing";
                  storeAlignment(is_reverse,
                                 old_cbor_psm_map,
                                 new_cbor_psm,
                                 protein_ptr->getAccession(),
                                 best_alignment);
                }
            }
          else
            {

              storeAlignment(is_reverse,
                             old_cbor_psm_map,
                             new_cbor_psm,
                             protein_ptr->getAccession(),
                             best_alignment);
            }

          if(!new_cbor_psm.isEmpty())
            {
              new_psm_arr.push_back(new_cbor_psm);
            }
        }
    }
}

void
pappso::cbor::psm::PsmSpecPeptidOmsScan::filterAndSortPsmList()
{
  std::size_t max_psm = mp_psmSpecPeptidOms->m_maxInterpretationsPerSpectrum;


  struct sortPsmResults
  {
    qint64 score;
    QCborMap psm;
  };

  QCborArray old_psm_arr = value("psm_list").toArray();
  QCborArray new_psm_arr;
  // for(QCborValue cbor_psm : old_psm_arr) {


  std::vector<sortPsmResults> sort_psm_list;
  for(auto it_psm : old_psm_arr)
    {
      QCborMap psm_map = it_psm.toMap();
      qint64 score =
        psm_map.value("eval").toMap().value("peptidoms").toMap().value("score").toInteger();
      sort_psm_list.push_back({score, psm_map});
    }
  qDebug();
  std::sort(sort_psm_list.begin(), sort_psm_list.end(), [](sortPsmResults &a, sortPsmResults &b) {
    return a.score > b.score;
  });


  auto it_end = sort_psm_list.begin() + max_psm;


  qDebug() << sort_psm_list.size();
  for(auto it = sort_psm_list.begin(); it != sort_psm_list.end() && it != it_end; it++)
    {

      new_psm_arr.append(it->psm);
      qDebug();
    }

  // qWarning() << new_psm_arr.size();
  remove(QString("psm_list"));
  insert(QString("psm_list"), new_psm_arr);
}


void
pappso::cbor::psm::PsmSpecPeptidOmsScan::storeAlignment(
  bool is_reverse,
  const QCborMap &old_cbor_psm,
  QCborMap &new_cbor_psm,
  const QString &accession,
  const pappso::specpeptidoms::Alignment &alignment)
{
  qDebug() << accession;

  if(alignment.score > 0)
    {

      QString peptide_key(alignment.m_peptideModel.toProForma());


      /*

    {"proforma":"LHGAGADDADADD",
    "protein_list":
    [{
        "accession": "GRMZM2G108267_P01",
        "positions": [
            337
        ]
    }
    {
        "accession": "GRMZM2G108267_P02",
        "positions": [
            337
        ]
    }
    ]
    ,
    "eval":{
        "matcher": {
            "score": 193077
        }
    }
    }
    */

      // do not take into account peptide containing too much redundancy
      if(pappso::specpeptidoms::SemiGlobalAlignment::checkSequenceDiversity(peptide_key, 5, 2))
        {
          // qDebug() << "peptide_key=" << peptide_key;
          new_cbor_psm.insert(QString("proforma"), peptide_key);

          // copy protein list in new psm
          new_cbor_psm.insert(QString("protein_list"), old_cbor_psm.value("protein_list"));

          fixPositionStart(is_reverse, new_cbor_psm, alignment.getPositionStart());

          QCborMap cbor_eval;

          QCborMap cbor_peptidoms;

          // auto it_protein_pos =
          //  one_peptide_result.map_protein2positions.insert({accession, QCborArray()});
          // it_protein_pos.first->second.append((qint64)alignment.getPositionStart());

          cbor_peptidoms.insert(QString("bracket"), alignment.m_peptideModel.toInterpretation());
          cbor_peptidoms.insert(QString("spc"), (qint64)alignment.SPC);
          cbor_peptidoms.insert(QString("score"), alignment.score);
          cbor_peptidoms.insert(QString("nam"), alignment.getNonAlignedMass());


          // copy matcher eval in new psm
          cbor_eval.insert(QString("matcher"), old_cbor_psm.value("eval").toMap().value("matcher"));


          // store peptidoms eval in new psm
          cbor_eval.insert(QString("peptidoms"), cbor_peptidoms);

          new_cbor_psm.insert(QString("eval"), cbor_eval);
        }
    }
}
void
pappso::cbor::psm::PsmSpecPeptidOmsScan::fixPositionStart(bool is_reverse,
                                                          QCborMap &new_cbor_psm,
                                                          std::size_t offset_position) const
{
  QCborArray new_protein_list;
  for(auto qcbor_protein : new_cbor_psm.value("protein_list").toArray())
    {
      QCborMap protein;

      QCborArray positions;
      if(is_reverse)
        {
          protein.insert(QString("accession"),
                         m_decoyPrefix + qcbor_protein.toMap().value("accession").toString());
          for(auto position : qcbor_protein.toMap().value("positions").toArray())
            {
              positions.append(position.toInteger() + (qint64)offset_position);
            }
        }
      else
        {

          protein.insert(QString("accession"), qcbor_protein.toMap().value("accession"));
          for(auto position : qcbor_protein.toMap().value("positions").toArray())
            {
              positions.append(position.toInteger() + (qint64)offset_position);
            }
        }

      protein.insert(QString("positions"), positions.toCborValue());

      new_protein_list.append(protein);
    }
  new_cbor_psm.insert(QString("protein_list"), new_protein_list);
}
