/* BEGIN software license
 *
 * MsXpertSuite - mass spectrometry software suite
 * -----------------------------------------------
 * Copyright(C) 2009,...,2018 Filippo Rusconi
 *
 * http://www.msxpertsuite.org
 *
 * This file is part of the MsXpertSuite project.
 *
 * The MsXpertSuite project is the successor of the massXpert project. This
 * project now includes various independent modules:
 *
 * - massXpert, model polymer chemistries and simulate mass spectrometric data;
 * - mineXpert, a powerful TIC chromatogram/mass spectrum viewer/miner;
 *
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program. If not, see <http://www.gnu.org/licenses/>.
 *
 * END software license
 */


/////////////////////// Stdlib includes


/////////////////////// Qt includes
#include <QChar>
#include <QString>


/////////////////////// Local includes
#include "MsXpS/libXpertMassCore/jsclassregistrar.h"
#include "MsXpS/libXpertMassCore/globals.hpp"
#include "MsXpS/libXpertMassCore/Formula.hpp"
#include "MsXpS/libXpertMassCore/Utils.hpp"

namespace MsXpS
{
namespace libXpertMassCore
{


/*!
\class MsXpS::libXpertMassCore::Formula
\inmodule libXpertMassCore
\ingroup PolChemDefBuildingdBlocks
\inheaderfile Formula.hpp

\brief The Formula class provides sophisticated abstractions to work with
formulas.

There are two peculiarities with this Formula implementation:

\list
\li The \e{Actionformula}
\li The \e{Title}
\endlist

\e{\b{The action-formula}}: the main textual element in this Formula
class is the \e{action-formula} (member m_actionFormula). A formula is the
description of the atomic composition of a compound. For example, the string
\e{C2H6} is a formula. While the previous \e{C2H6} example describes a static
chemical object, a Formula can also describe a dynamic chemical event, like a
reaction, by describing what chemical entities are gained by the molecule during
the chemical reaction (the "plus" component of the action-formula) and what
chemical entities are lost by the molecule (the "minus" component). For example,
an acetylation reaction can be described by the loss of \e{H2O} with gain of
\e{CH3COOH}. The net chemical gain on the molecule will be \e{CH3CO}. In this
example, one would thus define an action-formula in the following way:
\e{-H20+CH3COOH}. The "minus" formula associated with the '-' action accounts
for the leaving group of the reaction, while the "plus" formula associated with
the '+' action accounts for the entering group of the reaction. Note that there
is no limitation on the amount of such actions, as one could have an action
formula like this \e{-H+CO2-H2O+C2H6}. An \e{action-formula} does not need to
have any action sign (+ or -), and if it has no sign, the action-formula is a
plus-signed formula by default, which is what the reader would expect for a
standard formula.

\e{\b{The title}}: the action-formula may be documented with a title: a prefix
text enclosed in double quotes, like the following: \e{"Decomposed adenine"
C5H4N5 +H}. This documentation element is called the \e{title}. Note that the
presence of a title in a formula does not change anything to its workings as
long as the \e{title} is effectively enclosed in double quotes. The title is by
no means for an action-formula to work correctly. It is mainly used in some
particular context, like the calculator. An action-formula behaves exactly the
same as a simple formula from an end user perspective. Behind the scenes,
functions are called to separate all the '+'-associated formulas from all the
'-'-associated formulas so that masses are correctly associated to each
"leaving" or "entering" chemical groups. Formulas that are '-'-associated are
stored in the so-called "minus formula", while '+'-associated ones are stored in
the "plus formula". Note that all the formulas in Formula are QString objects.

Upon parsing of the action-formula, the m_minusFormula and the m_plusFormula
members are populated with formulas (in the  \e{-H+CO2-H2O+C2H6} example, the
"minus formula" would contain "HH2O", while the "plus formula" would contain
"CO2C2H6") and these are next used to account for the net formula.

\note A Formula instance is created in an invalid state (m_isValid is false).
Only when all the relevant data have been set and the Formula is validated
explicitely (\l{validate()}), the user of the Formula knows it is valid or not.
*/

/*!
    \enum MsXpS::libXpertMassCore::Formula::SplitResult

    This enum type specifies the result of an action-formula parsing process:

    \value NOT_SET
           The value was not set
    \value FAILURE
           The splitting work failed
    \value HAS_PLUS_COMPONENT
           The action formula has a plus component
    \value HAS_MINUS_COMPONENT
           The action formula has a minus component
    \value HAS_BOTH_COMPONENTS
           The action formula has both plus and minus components
*/


/*!
  \variable MsXpS::libXpertMassCore::Formula::m_title

  \brief String representing the title of the action-formula.

  The title is the descriptive string in double quotes that is
  associated to a formula,  like this:

  \c "Acetylation"-H2O+CH3COOH
*/

/*!
  \variable MsXpS::libXpertMassCore::Formula::m_actionFormula

  \brief String representing the action-formula.
*/

/*!
  \variable MsXpS::libXpertMassCore::Formula::m_plusFormula

  \brief String representing the "plus" component of the main m_actionFormula.

  This member datum is set upon parsing of m_actionFormula.
*/

/*!
  \variable MsXpS::libXpertMassCore::Formula::m_minusFormula

  \brief String representing the "minus" component of the main m_minusFormula.

  This member datum is set upon parsing of m_actionFormula.
*/

/*!
  \variable MsXpS::libXpertMassCore::Formula::m_symbolCountMap

  \brief Map relating the symbols (as keys) found in the formula and their
counts (atoms, in fact, as values).

  Note that the count value type is double, which allows for interesting
things to be done with Formula. Also, the count value might be negative if the
net mass of an action-formula is negative.

  \sa Formula::splitActionParts()
*/

/*!
  \variable int MsXpS::libXpertMassCore::Formula::m_forceCountIndex

  \brief The m_forceCountIndex tells if when defining a chemical composition
formula, the index '1' is required when the count of a symbol is not specified
and thus considered to be '1' by default. If true, water should be described as
"H2O1", if false, it might be described as "H2O".
*/


/*!
  \variable MsXpS::libXpertMassCore::Formula::m_isValid

  \brief The status of the formula.
 */


/*!
\brief Constructs a Formula instance.
*/
Formula::Formula(QObject *parent): QObject(parent)
{
}

/*!
\brief Constructs a Formula instance using the XML \a element according
to the \a version.
*/
Formula::Formula(const QDomElement &element, int version, QObject *parent)
  : QObject(parent)
{
  bool result = renderXmlFormulaElement(element, version);

  if(!result)
    qCritical()
      << "Failed rendering XML element for construction of Formula instance.";
}

/*!
   \brief Constructs a formula initialized with the \a formula_string
action-formula string.

  \a formula_string needs not be an action-formula, but it might be an
action-formula. This formula gets copied into the \c m_actionFormula without any
processing afterwards.

The default status of the Formula (m_isValid) is let to false because the
formula cannot be validated without reference isotopic data.
*/
Formula::Formula(const QString &formula_string, QObject *parent)
  : QObject(parent), m_actionFormula{formula_string}
{
  m_title = removeTitle();
  // qDebug() <<  "The title:" <<  m_title;

  removeSpaces();
  // qDebug() <<  "Formula after removing spaces:" <<  m_actionFormula;

  if(m_actionFormula.isEmpty())
    {
      qCritical() << "Formula created empty.";
    }

  if(!checkSyntax())
    {
      qCritical() << "Formula constructed with an action formula that does not "
                     "pass the checkSyntax test.";
    }

  //  Because there has been no validation with IsotopicData,  the default
  //  false value is maintained for m_isValid.
}

/*!
   \brief Constructs a formula as a copy of \a other.

   The copy is deep with \e{all} the data copied from \a other to the new
formula. There is no processing afterwards.
*/
Formula::Formula(const Formula &other, QObject *parent)
  : QObject(parent),
    m_title(other.m_title),
    m_actionFormula(other.m_actionFormula),
    m_plusFormula(other.m_plusFormula),
    m_minusFormula(other.m_minusFormula),
    m_symbolCountMap(other.m_symbolCountMap),
    m_isValid(other.m_isValid)
{
  // GCOV_EXCL_START
  //  Sanity check
  if(!removeTitle().isEmpty())
    {
      qCritical() << "The formula string still has a title.";
      m_isValid = false;
    }
  // GCOV_EXCL_STOP

  removeSpaces();

  if(m_actionFormula.isEmpty())
    {
      qCritical()
        << "Copy-constructed Formula created with empty action-formula.";
      m_isValid = false;
    }

  if(!checkSyntax())
    {
      qCritical()
        << "Formula constructed with an action formula that does not pass the "
           "checkSyntax test.";
      m_isValid = false;
    }
}

/*!
   \brief Destructs this formula.

   There is nothing to be delete explicitly.
*/
Formula::~Formula()
{
}

/*!
\brief Return a newly allocated Formula that is initialized using \a other and setting its parent to \a parent.
*/
Formula *
Formula::clone(const Formula &other, QObject *parent)
{
  Formula *copy_p = new Formula(parent);
  copy_p->initialize(other);
  return copy_p;
}

/*!
\brief Return a reference to this Formula after having initialized it using \a other.
*/
Formula &
Formula::initialize(const Formula &other)
{
  if(&other == this)
    return *this;

  m_title          = other.m_title;
  m_actionFormula  = other.m_actionFormula;
  m_plusFormula    = other.m_plusFormula;
  m_minusFormula   = other.m_minusFormula;
  m_symbolCountMap = other.m_symbolCountMap;
  m_isValid        = other.m_isValid;

  // GCOV_EXCL_START
  //  Sanity check
  if(!removeTitle().isEmpty())
    {
      qCritical() << "The formula string still has a title.";
      m_isValid = false;
    }
  // GCOV_EXCL_STOP

  removeSpaces();

  if(m_actionFormula.isEmpty())
    {
      qCritical() << "Formula created empty.";
      m_isValid = false;
    }

  if(!checkSyntax())
    {
      qCritical() << "Formula set to an action formula that does not pass the "
                     "checkSyntax test.";
      m_isValid = false;
    }

  return *this;
}

//////////////// THE ACTIONFORMULA /////////////////////
/*! Sets the action-formula \a formula to this Formula.

The  \a formula is copied to this m_actionFormula. Since the new formula was
not validated,  the status of this formula (m_isValid) is set to false. No
other processing is performed afterwards.
*/
void
Formula::setActionFormula(const QString &formula)
{
  m_actionFormula = formula;
  m_title         = removeTitle();

  removeSpaces();

  if(m_actionFormula.isEmpty())
    {
      qCritical() << "Formula set to an empty string.";
    }

  if(!checkSyntax())
    qCritical() << "Formula set to an action formula that does not pass the "
                   "checkSyntax test.";

  m_isValid = false;
}

/*! Sets the action-formula from \a formula to this Formula.

  The action-formula from \a formula is copied to this m_actionFormula. Since
  the new formula was not validated,  the status of this formula (m_isValid) is
  set to false. No other processing is performed afterwards.
*/
void
Formula::setActionFormula(const Formula &formula)
{
  m_actionFormula = formula.m_actionFormula;
  m_title         = removeTitle();

  if(m_actionFormula.isEmpty())
    {
      qCritical() << "Formula set to an empty string.";
    }

  if(!checkSyntax())
    qCritical() << "Formula set to an action formula that does not pass the "
                   "checkSyntax test.";

  m_isValid = false;
}

/*!
  \brief Appends to this formula the \a action_formula.

  The \a action_formula string is first stripped of its whitespace with
  QString::simplified(). Then it is appended to the m_actionFormula only if:

   \list

   \li It is not empty (if empty the function does nothing and returns false)

   \li It passes the checkSyntax() test (if not, the function does nothing and
  returns false)

  \endlist

  The status of this Formula is set to false because there was not explicit
  validation done.

   The function returns false if nothing was changed and true if something was
  actually appended to m_actionFormula.
*/
bool
Formula::appendActionFormula(const QString &action_formula)
{
  QString local_action_formula = action_formula.simplified();

  if(local_action_formula.isEmpty())
    return false;

  // Remove the spaces before appending.
  local_action_formula.remove(QRegularExpression("\\s+"));

  // Removes the title from the action-formula
  Formula temp_formula(local_action_formula);

  if(!temp_formula.checkSyntax())
    {
      qCritical()
        << "Cannot append formula that does not pass the checkSyntax test.";

      return false;
    }
  else
    {
      //  We append the action-formula stripped of its potential title.
      m_actionFormula.append(temp_formula.getActionFormula());
    }

  //  The formula has not been validated formally.
  m_isValid = false;

  return true;
}

/*!
\brief Returns the action formula,  along the the title if \a with_title is
true.
 */
QString
Formula::getActionFormula(bool with_title) const
{
  // qDebug() <<  "The title is:" <<  m_title;

  if(with_title && !m_title.isEmpty())
    return QString("\"%1\"%2").arg(m_title).arg(m_actionFormula);

  return m_actionFormula;
}

//////////////// THE TITLE /////////////////////
/*!
\brief Sets the title leading component of a formula to \a title.

A fully self-described formula might look like this:

"Acetylation"+CH3COOH-H2O

The first string between double quotes is called the title.
*/
void
Formula::setTitle(const QString &title)
{
  m_title = title;
}

/*!
\brief Returns the "title" leading component of a formula.

A fully selfdescribed formula might look like this:

"Acetylation"+CH3COOH-H2O

The first string between quotes is called the title.
 */
QString
Formula::getTitle() const
{
  return m_title;
}

/*!
\brief Returns the title from the member action-formula.

The \e{title} of a formula is the string, enclosed in
double quotes, that is located in front of the actual chemical
action-formula.  This function removes that \e{title} string from the
member action-formula using a QRegularExpression.
 */
QString
Formula::extractTitle() const
{
  QRegularExpression regexp("^\"(.*)\"");

  QRegularExpressionMatch match = regexp.match(m_actionFormula);

  if(!match.hasMatch())
    {
      // qDebug() <<  "Has no match.";
      return QString();
    }

  // qDebug() << "Match:" <<  match.captured(0);

  return match.captured(1);
}

/*!
\brief Removes the title from m_actionFormula and returns it.

The \e{title} of a formula is the string, enclosed in
double quotes, that is located in front of the actual chemical
action-formula.  This function removes that \e{title} string from the
member action-formula using a QRegularExpression.

The caller may use the returned title string to set it to m_title.

\sa Formula::extractTitle(), Formula::setTitle()
 */
QString
Formula::removeTitle()
{
  QString title = extractTitle();

  if(!title.isEmpty())
    {
      QRegularExpression regexp("^(\".*\")");
      m_actionFormula = m_actionFormula.remove(regexp);
    }

  return title;
}

//////////////// THE ATOM INDEX LOGIC /////////////////////

/*!
\brief Sets m_forceCountIndex to \a forceCountIndex.

When a formula contains a chemical element in a single copy, it is standard
practice to omit the count index: H2O is the same as H2O1. If forceCountIndex is
true, then the formula has to be in the form H2O1. This is required for some
specific calculations. The status (m_isValid) is set to false.
*/
void
Formula::setForceCountIndex(bool forceCountIndex)
{
  m_forceCountIndex = forceCountIndex;
}

/*!
\brief Returns true if single atoms should have a count index, false otherwise.

The force count index is set to true,  when a formula needs to have atom indices
set even if the count for these atoms is 1. For example,  H2O1 as compared to
H2O.
*/
bool
Formula::isForceCountIndex() const
{
  return m_forceCountIndex;
}

//////////////// THE SYNTAX CHECKING LOGIC /////////////////////

#if 0

Old version that parsed the action-formula char by char.
bool
Formula::checkSyntax(const QString &formula, bool forceCountIndex)
{
  // Static function.

  // qDebug() << "Checking syntax with formula:" << formula;

  QChar curChar;

  bool gotUpper = false;
  bool wasSign  = false;
  bool wasDigit = false;

  // Because the formula that we are analyzing might contain a title
  // and spaces , we first remove these. But make a local copy of
  // the member datum.

  QString localFormula = formula;

  // One formula can be like this:

  // "Decomposed adenine" C5H4N5 +H

  // The "Decomposed adenine" is the title
  // The C5H4N5 +H is the formula.

  localFormula.remove(QRegularExpression("\".*\""));

  // We want to remove all the possibly-existing spaces.

  localFormula.remove(QRegularExpression("\\s+"));


  for(int iter = 0; iter < localFormula.length(); ++iter)
    {
      curChar = localFormula.at(iter);

      // qDebug() << __FILE__ << "@" << __LINE__ << __FUNCTION__ << "()"
      //<< "Current character:" << curChar;

      // FIXME One improvement that would ease modelling the Averagine would
      // be to silently allow double formula indices (that is, double atom
      // counts). They would not be compulsory

      if(curChar.category() == QChar::Number_DecimalDigit)
        {
          // We are parsing a digit.

          // We may not have a digit after a +/- sign.
          if(wasSign)
            return false;

          wasSign  = false;
          wasDigit = true;

          continue;
        }
      else if(curChar.category() == QChar::Letter_Lowercase)
        {
          // Current character is lowercase, which means we are inside
          // of an atom symbol, such as Ca(the 'a') or Nob(either
          // 'o' or 'b'). Thus, gotUpper should be true !

          if(!gotUpper)
            return false;

          // We may not have a lowercase character after a +/- sign.
          if(wasSign)
            return false;

          // Let the people know that we have parsed a lowercase char
          // and not a digit.
          wasSign = false;

          wasDigit = false;
        }
      else if(curChar.category() == QChar::Letter_Uppercase)
        {
          // Current character is uppercase, which means that we are
          // at the beginning of an atom symbol.

          // There are two cases:
          // 1. We are starting for the very beginning of the formula, and
          // nothing came before this upper case character. That's fine.
          // 2. We had previously parsed a segment of the formula, and in this
          // case, we are closing a segment. If the parameter
          // obligatoryCountIndex is true, then we need to ensure that the
          // previous element had an associated number, even it the count
          // element is 1. This is required for the IsoSpec stuff in the gui
          // programs.

          if(iter > 0)
            {
              if(forceCountIndex)
                {
                  if(!wasDigit)
                    {
                      qDebug()
                        << "Returning false because upper case char was not"
                           "preceded by digit while not at the first char of "
                           "the formula";

                      return false;
                    }
                }
            }

          // Let the people know what we got:

          wasSign  = false;
          gotUpper = true;
          wasDigit = false;
        }
      else
        {
          if(curChar != '+' && curChar != '-')
            return false;
          else
            {
              // We may not have 2 +/- signs in a raw.
              if(wasSign)
                return false;
            }

          wasSign  = true;
          gotUpper = false;
          wasDigit = false;
        }
    }
  // end for (int iter = 0 ; iter < localFormula.length() ; ++iter)

  // Note that if we want an obligatory count index, then, at the end of the
  // formula, *compulsorily* we must have parsed a digit.

  if(forceCountIndex && !wasDigit)
    {
      qDebug()
        << "Returning false because the formula does not end with a digit.";

      return false;
    }

  // At this point we found no error condition.
  return true;
}
#endif

/*!
  \brief Returns true if the member action-formula is syntactically valid, false
otherwise.

  \sa checkSyntax(const QString &formula, bool force_count_index)
*/
bool
Formula::checkSyntax() const
{
  // The default formula is always m_actionFormula.

  return checkSyntax(m_actionFormula, m_forceCountIndex);
}

/*!
\brief Returns true if the \a formula_string action-formula is syntactically
valid, false otherwise.

If \a force_count_index is true, the syntax check accounts for the
requirement that all the symbols in the formula must be indexed, even if that
symbol's count is 1. This means that H2O would not pass the check, while H2O1
would.

The formula is first stripped of its title (if any), then all the spaces are
removed.

MsXpS::libXpertMassCore::Formula::subFormulaRegExp is then used to extract each
"plus" and / or "minus" component while checking its syntactic validity.

\note The syntax checking code does not verify that the action-formula is
chemically valid, that is, the "Cz4" symbol / count pair would check even if
the Cz chemical element does not exist.

\sa validate()
*/
bool
Formula::checkSyntax(const QString &formula_string, bool force_count_index)
{
  // qDebug() << "Checking syntax of formula string" << formula_string;

  // Because the formula that we are analyzing might contain a title
  // and spaces , we first remove these. But make a local copy of
  // the member datum.

  QString local_formula_string = formula_string;

  // One formula can be like this:

  // "Decomposed adenine" C5H4N5 +H

  // The "Decomposed adenine" is the title
  // The C5H4N5 +H is the formula.

  local_formula_string.remove(QRegularExpression("\".*\""));

  // We want to remove all the possibly-existing spaces.

  local_formula_string.remove(QRegularExpression("\\s+"));

  if(local_formula_string.isEmpty())
    {
      qWarning() << "The action formula string is empty.";
      return false;
    }

  // qDebug() << "The formula is:" << local_formula_string;

  // The raw formula might include:
  // +/- sign before the symbol
  // then the symbol (one uppercase any lowercase)
  // then the count as an integer or a double.

  // Attention, the regular expression logic below works by finding
  // patterns that match the regexp, BUT that does not means that
  // spurious substrings at the beginning or at the end cannot be
  // present and go undetected, like this: "3Cz3H12O6N14L2", where
  // the '3' on the left is not seen.

  // We thus need to first ensure that the string never begins with
  // something else than a +/- sign (optionally) and a letter (uppercase).
  QRegularExpression start_of_formula("^[+-]?[A-Z]");

  QRegularExpressionMatch match = start_of_formula.match(local_formula_string);
  if(!match.hasMatch())
    {
      qCritical() << "Error at start of formula string" << formula_string;

      return false;
    }

  // Like wise for formulas that do not end either by [A-Z] or [a-z] or \\d.
  QRegularExpression end_of_formula("[A-Za-z\\d]$");
  match = end_of_formula.match(local_formula_string);
  if(!match.hasMatch())
    {
      qCritical() << "Error at end of formula string.";

      return false;
    }

  for(const QRegularExpressionMatch &match :
      Utils::subFormulaRegExp.globalMatch(local_formula_string))
    {
      QString full_match = match.captured(0);

      // qDebug() << "The full sub-match:" << full_match;

      QString sign         = match.captured(1);
      QString symbol       = match.captured(2);
      QString count_string = match.captured(3);

      if(!count_string.isEmpty())
        {
          bool ok = false;
          // Verify that it correctly converts to double.
          count_string.toDouble(&ok);
          if(!ok)
            return false;
        }
      else
        {
          if(force_count_index)
            {
              qCritical() << "Error: symbol" << symbol << "has no index.";

              return false;
            }
          else
            {
              // qDebug() << "Symbol" << symbol
              //          << "has no index but that is tolerated.";
            }
        }

      // qDebug() << "Sign:" << match.captured(1) << "Symbol:" <<
      // match.captured(2)
      //          << "Count:" << match.captured(3);
    }

  // qDebug() << "Returning true";

  // qDebug() << "Checked syntax of formula " << formula_string
  //          << "returning true";

  return true;
}

//////////////// OPERATORS /////////////////////
/*!
  \brief Initializes all the member data of this formula by copying to it the
data from \a other.

  The copy is deep with \e{all} the data from \a other being copied into this
formula.

  There is no processing afterwards.
*/
Formula &
Formula::operator=(const Formula &other)
{
  if(&other == this)
    return *this;

  m_title          = other.m_title;
  m_actionFormula  = other.m_actionFormula;
  m_plusFormula    = other.m_plusFormula;
  m_minusFormula   = other.m_minusFormula;
  m_symbolCountMap = other.m_symbolCountMap;
  m_isValid        = other.m_isValid;

  // GCOV_EXCL_START
  //  Sanity check
  if(!removeTitle().isEmpty())
    {
      qCritical() << "The formula string still has a title.";
      m_isValid = false;
    }
  // GCOV_EXCL_STOP

  removeSpaces();

  if(m_actionFormula.isEmpty())
    {
      qCritical() << "Formula created empty.";
      m_isValid = false;
    }

  if(!checkSyntax())
    {
      qCritical() << "Formula set to an action formula that does not pass the "
                     "checkSyntax test.";
      m_isValid = false;
    }

  return *this;
}

/*! Returns true if this Formula and \a other are identical, false otherwise.

  The comparison is only performed on the title and action-formula, not on
any other member data that actually derive from the processing of the
action-formula.
*/
bool
Formula::operator==(const Formula &other) const
{
  if(&other == this)
    return true;

  if(m_title != other.m_title || m_actionFormula != other.m_actionFormula)
    return false;

  return true;
}

/*! Returns true if this Formula and \a other are different, false otherwise.

Returns the negated result of operator==().
*/
bool
Formula::operator!=(const Formula &other) const
{
  if(&other == this)
    return false;

  return !operator==(other);
}

//////////////// THE SUB-FORMULAS OPERATIONS /////////////////////


/*!
  \brief Tells the "plus" ('+') and "minus" ('-') parts in the member
action-formula.

  Parses the m_actionFormula action-formula and separates all the minus
components of that action-formula from all the plus components. The different
components are set to \a plus_formula and \a minus_formula.

  At the end of the split work, each sub-formula (\a plus_formula and \a
minus_formula) is actually parsed for validity, using the \a isotopic_data_csp
IsotopicData as reference.

  If \a times is not 1, then the accounting of the plus/minus formulas is
  compounded by this factor.

  If \a store is true, the symbol/count data obtained while
  parsing of the plus/minus action-formula components are stored in
\a symbol_count_map.

  If \a reset is true, the symbol/count data in
\a symbol_count_map are reset before the parsing operation. Setting this
parameter to false may be useful if the caller needs to "accumulate" the
accounting of the formulas.

The parsing of the action-formula is performed by performing its deconstruction
using \l{Utils::subFormulaRegExp}.

Returns FormulaSplitResult::FAILURE if the splitting failed,
FormulaSplitResult::HAS_PLUS_COMPONENT if at least one of the components of the
action-formula was found to be of type plus,
FormulaSplitResult::HAS_MINUS_COMPONENT if at least one of the components of the
action-formula was found to be of type minus. The result can be an OR'ing of
both values (FormulaSplitResult::HAS_BOTH_COMPONENTS) in the m_actionFormula
action-formula.

Because this function does not modify member data, by writing the results of the
computations to the passed variables,  it is declared const.

If this function completes successfully,  then that validates it successfully
(syntax ok,  and symbols known to the reference isotopic data), and m_isValid
is set to true, otherwise m_isValid is set to false.
*/
Formula::SplitResult
Formula::splitActionParts(IsotopicDataCstSPtr isotopic_data_csp,
                          QString &plus_formula,
                          QString &minus_formula,
                          std::map<QString, double> &symbol_count_map,
                          double times,
                          bool store,
                          bool reset) const
{
  if(isotopic_data_csp == nullptr || isotopic_data_csp.get() == nullptr ||
     !isotopic_data_csp->size())
    {
      qCritical(
        "Cannot split action parts of Formula without available IsotopicData.");

      m_isValid = false;
      return SplitResult::FAILURE;
    }

  SplitResult formula_split_result = SplitResult::NOT_SET;

  // We are asked to put all the '+' components of the formula
  // into corresponding formula and the same for the '-' components.

  plus_formula.clear();
  minus_formula.clear();

  if(reset)
    symbol_count_map.clear();

#if 0

  // This old version tried to save computing work, but it then anyways
  // calls for parsing of the formula which is the most computing-intensive
  // part. Thus we now rely on the regular expression to simultaneously
// check the syntax, divide the formula into its '+' and '-' parts,
  // and finally check that the symbol is known to the isotopic data.
  ^
  // If the formula does not contain any '-' character, then we
  // can approximate that all the formula is a '+' formula, that is, a
  // plusFormula:

  if(actions() == '+')
    {
      // qDebug() << "Only plus actions.";

      plus_formula.append(formula);

      // At this point we want to make sure that we have a correct
      // formula. Remove all the occurrences of the '+' sign.
      plus_formula.replace(QString("+"), QString(""));

      if(plus_formula.length() > 0)
        {
          // qDebug() << "splitActionParts: with plus_formula:" <<
          // plus_formula;

          if(!parse(isotopic_data_csp, plus_formula, times, store, reset))
            return FormulaSplitResult::FAILURE;
          else
            return FORMULA_SPLIT_PLUS;
        }
    }
  // End of
  // if(actions() == '+')

  // If we did not return at previous block that means there are at least one
  // '-' component in the formula. we truly have to iterate in the formula...
#endif


  // See the explanations in the header file for the member datum
  // m_subFormulaRegExp and its use with globalMatch(). One thing that is
  // important to see, is that the RegExp matches a triad : [ [sign or not]
  // [symbol] [count] ], so, if we have, says, formula "-H2O", it would match:

  // First '-' 'H' '2'
  // Second <no sign> 'O' <no count = 1>

  // The problem is that at second match, the algo thinks that O1 is a +
  // formula, while in fact it is part of a larger minus formula: -H2O. So we
  // need to check if, after a '-' in a formula, there comes or not a '+'. If so
  // we close the minus formula and start a plus formula, if not, we continue
  // adding matches to the minus formula.

  // qDebug() << "Now regex parsing with globalMatch feature of formula:"
  //          << formula;

  bool was_minus_formula = false;

  Utils utils;

  for(const QRegularExpressionMatch &match :
      Utils::subFormulaRegExp.globalMatch(m_actionFormula))
    {
      QString sub_match = match.captured(0);

      // qDebug() << "Entering [+-]?<symbol><count?> sub-match:" << sub_match;

      QString sign            = match.captured(1);
      QString symbol          = match.captured(2);
      QString count_as_string = match.captured(3);
      double count_as_double  = 1.0;

      if(!count_as_string.isEmpty())
        {
          bool ok         = false;
          count_as_double = count_as_string.toDouble(&ok);
          if(!ok)
            {
              m_isValid = false;
              return SplitResult::FAILURE;
            }
        }
      else
        {
          count_as_string = "1";
        }

      // Check that the symbol is known to the isotopic data.
      // qDebug() << "The symbol:" << symbol << "with count:" <<
      // count_as_double;
      int count = 0;
      if(!isotopic_data_csp->containsSymbol(symbol, count))
        {
          m_isValid = false;
          return SplitResult::FAILURE;
        }

      // Determine if there was a sign
      if(sign == "-")
        {
          // qDebug() << "Appending found minus formula:"
          //          << QString("%1%2").arg(symbol).arg(count_as_string);

          minus_formula.append(
            QString("%1%2").arg(symbol).arg(count_as_string));

          formula_split_result |= SplitResult::HAS_MINUS_COMPONENT;

          if(store)
            {
              // qDebug() << "Accounting symbol / count pair:" << symbol << "/"
              //          << count_as_double * static_cast<double>(times);

              accountSymbolCountPair(symbol_count_map,
                                     symbol,
                                     -1 * count_as_double *
                                       static_cast<double>(times));

              // qDebug() << " ...done.";
            }

          // Let next round know that we are inside a minus formula group.
          was_minus_formula = true;
        }
      else if(sign.isEmpty() && was_minus_formula)
        {
          // qDebug() << "Appending new unsigned formula to the minus formula:"
          //          << QString("%1%2").arg(symbol).arg(count_as_string);

          minus_formula.append(
            QString("%1%2").arg(symbol).arg(count_as_string));

          if(store)
            {
              // qDebug() << "Accounting symbol / count pair:" << symbol << "/"
              //          << count_as_double * static_cast<double>(times);

              accountSymbolCountPair(symbol_count_map,
                                     symbol,
                                     -1 * count_as_double *
                                       static_cast<double>(times));

              // qDebug() << " ...done.";
            }

          // Let next round know that we are still inside a minus formula group.
          was_minus_formula = true;
        }
      else
        // Either there was a '+' sign or there was no sign, but
        // we were not continuing a minus formula, thus we are parsing
        // a true '+' formula.
        {
          // qDebug() << "Appending found plus formula:"
          //          << QString("%1%2").arg(symbol).arg(count_as_string);

          plus_formula.append(QString("%1%2").arg(symbol).arg(count_as_string));

          formula_split_result |= SplitResult::HAS_PLUS_COMPONENT;

          if(store)
            {
              // qDebug() << "Accounting symbol / count pair:" << symbol << "/"
              //          << count_as_double * static_cast<double>(times);

              accountSymbolCountPair(symbol_count_map,
                                     symbol,
                                     count_as_double *
                                       static_cast<double>(times));

              // qDebug() << " ...done.";
            }

          was_minus_formula = false;
        }
    }

  // qDebug() << "Formula" << formula << "splits"
  //<< "(+)" << plus_formula << "(-)" << minus_formula;

  m_isValid = true;

  return formula_split_result;
}

/*!
\brief Tells the "plus" ('+') and "minus" ('-') parts in the member
actionformula.

Parses the m_actionFormula action-formula and separates all the minus
components of that action-formula from all the plus components. The different
components are set to their corresponding formula (m_minusFormula and
m_plusFormula).

This function delegate its work to the other splitActionParts() passing
arguments m_plusFormula,  m_minusFormula, m_symbolCountMap,  such that this
function modifies the content of this very object.

In all the computations above, reference isotopic data are accessed at \a
isotopic_data_csp. If \a times is not 1, then that value is used to compound the
results of the computations. If \a store is true, then the results of the
computations are stored in this object. If \a reset is true, then the
intermediate computation values and objects are reset.
*/
Formula::SplitResult
Formula::splitActionParts(IsotopicDataCstSPtr isotopic_data_csp,
                          double times,
                          bool store,
                          bool reset)
{
  return splitActionParts(isotopic_data_csp,
                          m_plusFormula,
                          m_minusFormula,
                          m_symbolCountMap,
                          times,
                          store,
                          reset);
}

/*!
  \brief Calls actions(const QString &formula) on this Formula's
action-formula m_actionFormula. Returns '+' if it only contains "plus"
elements or '-' if at least one "minus" element was found.

If m_actionFormula contains no sign at all, then it is considered to contain
only '+' elements and the function returns '+'. If at least one element is found
associated to a '-', then the "minus" action prevails and the function returns
'-'.

  \sa actions(const QString &formula), splitActionParts()
  */
QChar
Formula::actions() const
{
  return actions(m_actionFormula);
}

/*!
  \brief Returns '+' if \a formula only contains "plus" elements or '-'
if at least one "minus" element was found.

  If \a formula contains no sign at all, then it is considered to contain only
  '+' elements and the function returns '+'. If at least one element is found
  associated to a '-', then the "minus" action prevails and the function
returns '-'.

  \sa actions(), splitActionParts()
  */
QChar
Formula::actions(const QString &formula)
{
  double minusCount = formula.count('-', Qt::CaseInsensitive);

  return (minusCount == 0 ? '+' : '-');
}

/*!
  \brief Returns true if the member "minus" formula component is not empty,
false otherwise.
*/
bool
Formula::hasNetMinusPart()
{
  return m_minusFormula.size();
}

/*!
  \brief Returns the m_plusFormula formula.
*/
QString
Formula::getPlusFormula() const
{
  return m_plusFormula;
}

/* ! No doc
  \brief Returns the m_minusFormula formula.
*/
QString
Formula::getMinusFormula() const
{
  return m_minusFormula;
}

//////////////// VALIDATIONS /////////////////////
/*!
\brief Returns true if the formula validates successfully, false otherwise.

The validation uses the \a isotopic_data_csp  reference data and involves:

\list
\li Checking that the member action-formula is not empty and has a proper
syntax. Returns false otherwise;

\li Splitting the action-formula into a plus_formula and a minus_formula
components using \l{splitActionParts()}). If that step fails, returns false;

\li Verifying that both the plus_formula and the minus_formula are not
empty. Returns false otherwise.
\endlist

If errors are encountered, meaningful messages are stored in \a error_list_p
(which is not cleared).

If the validation is successful,  m_isValid is set to true,  otherwise it is set
to false.
*/
bool
Formula::validate(IsotopicDataCstSPtr isotopic_data_csp,
                  ErrorList *error_list_p) const
{
  qsizetype error_count = error_list_p->size();

  if(isotopic_data_csp == nullptr || isotopic_data_csp.get() == nullptr ||
     !isotopic_data_csp->size())
    {
      qCritical()
        << "Cannot validate a Formula if the isotopic data are unavailable.";
      error_list_p->push_back(
        "Cannot validate a Formula if the isotopic data are unavailable");
    }

  // qDebug() << "isotopic_data_csp.get():" << isotopic_data_csp.get();

  if(m_actionFormula.isEmpty())
    {
      qCritical() << "Cannot validate a Formula that is empty.";
      error_list_p->push_back("Cannot validate a Formula that is empty");
    }

  if(!checkSyntax())
    {
      qCritical()
        << "Cannot validate a Formula that fails the syntax check test.";
      error_list_p->push_back(
        "Cannot validate a Formula that fails the syntax check test");
    }

  // qDebug() << "Now splitting formula" << m_actionFormula << "into its action
  // parts.";

  QString plus_formula;
  QString minus_formula;
  std::map<QString, double> symbol_count_map;

  SplitResult result = splitActionParts(isotopic_data_csp,
                                               plus_formula,
                                               minus_formula,
                                               symbol_count_map,
                                               1,
                                               /*store*/ false,
                                               /*reset*/ true);

  if(result == SplitResult::FAILURE)
    {
      qCritical() << "Failed splitting the Formula.";
      error_list_p->push_back("Failed splitting the Formula");
    }

  // Both the action formulas cannot be empty.
  if(!plus_formula.size() && !minus_formula.size())
    {
      qCritical() << "Both the plus and minus formulas are empty.";
      error_list_p->push_back("Both the plus and minus formulas are empty");
    }

  //  If we added errors,  then that means that the Monomer was not valid.
  m_isValid = (error_list_p->size() > error_count ? false : true);

  return m_isValid;
}

/*!
\brief Returns true if the formula validates successfully, false otherwise.

See the other validation function for the validation logic.

This function allows to store in member data the results of the validation
process if \a store is set to true. If \a reset is true,  the member data are
first cleared.

If the validation is successful,  m_isValid is set to true,  otherwise it is set
to false.
*/
bool
Formula::validate(IsotopicDataCstSPtr isotopic_data_csp,
                  bool store,
                  bool reset,
                  ErrorList *error_list_p)
{
  qsizetype error_count = error_list_p->size();

  if(isotopic_data_csp == nullptr || isotopic_data_csp.get() == nullptr ||
     !isotopic_data_csp->size())
    {
      qCritical()
        << "Cannot validate a Formula if the isotopic data are unavailable.";
      error_list_p->push_back(
        "Cannot validate a Formula if the isotopic data are unavailable");
    }

  // qDebug() << "isotopic_data_csp.get():" << isotopic_data_csp.get();

  if(m_actionFormula.isEmpty())
    {
      qCritical() << "Cannot validate a Formula that is empty.";
      error_list_p->push_back("Cannot validate a Formula that is empty");
    }

  if(!checkSyntax())
    {
      qCritical()
        << "Cannot validate a Formula that fails the syntax check test.";
      error_list_p->push_back(
        "Cannot validate a Formula that fails the syntax check test");
    }

  // qDebug() << "Now splitting formula" << m_actionFormula << "into its action
  // parts.";

  SplitResult result = splitActionParts(isotopic_data_csp,
                                               m_plusFormula,
                                               m_minusFormula,
                                               m_symbolCountMap,
                                               1,
                                               /*store*/ store,
                                               /*reset*/ reset);

  if(result == SplitResult::FAILURE)
    {
      qCritical() << "Failed splitting the Formula.";
      error_list_p->push_back("Failed splitting the Formula");
    }

  // Both the action formulas cannot be empty.
  if(!m_plusFormula.size() && !m_minusFormula.size())
    {
      qCritical() << "Both the plus and minus formulas are empty.";
      error_list_p->push_back("Both the plus and minus formulas are empty");
    }

  //  If we added errors,  then that means that the Monomer was not valid.
  m_isValid = (error_list_p->size() > error_count ? false : true);

  return m_isValid;
}

/*!
\brief Returns the status of the formula,  that is, the result of validate().
 */
bool
Formula::isValid() const
{
  return m_isValid;
}

//////////////// THE SYMBOLS-COUNT OPERATIONS /////////////////////

/*!
  \brief Returns a const reference to the std::map<QString, double> container
  that relates chemical symbols with corresponding counts.
*/
const std::map<QString, double> &
Formula::getSymbolCountMapCstRef() const
{
  return m_symbolCountMap;
}

/*!
  \brief Returns the count value associated with key \a symbol in the symbol /
count member map m_symbolCountMap.
*/
double
Formula::symbolCount(const QString &symbol) const
{
  // Return the symbol index.

  std::map<QString, double>::const_iterator iter_end = m_symbolCountMap.cend();

  std::map<QString, double>::const_iterator iter =
    m_symbolCountMap.find(symbol);

  if(iter == iter_end)
    return 0;

  return iter->second;
}

/*!
\brief Accounts this Formula's action-formula (m_actionFormula) in the symbol /
count member container (m_symbolCountMap).

Calls splitActionParts() to actually parse m_actionFormula and account its
components to m_symbolCountMap. The accounting of the symbol / count can be
compounded by the \a times factor.

While splitting the "plus" and "minus" components of the action-formula, their
validity is checked against the reference isotopic data \a isotopic_data_csp.

This function is used when processively accounting many different formulas
into the symbol / count map. The formula is set to a new value and this
function is called without resetting the symbol / count map, effectively adding
formulas onto formulas sequentially.

Returns true if no error was encountered, false otherwise.

\sa splitActionParts(), Polymer::elementalComposition
*/
bool
Formula::accountSymbolCounts(IsotopicDataCstSPtr isotopic_data_csp, int times)
{
  // GCOV_EXCL_START
  if(isotopic_data_csp == nullptr || isotopic_data_csp.get() == nullptr ||
     !isotopic_data_csp->size())
    qFatal("Programming error. The isotopic data pointer cannot be nullptr.");
  // GCOV_EXCL_STOP

  // Note the 'times' param below.
  if(splitActionParts(
       isotopic_data_csp, times, /*store*/ true, /*reset*/ false) ==
     SplitResult::FAILURE)
    {
      m_isValid = false;
      return false;
    }

  return true;
}

/*!
  \brief Accounts for \a symbol and corresponding \a count in the \a
symbol_count_map map.

  The symbol_count_map relates each atom (chemical element) symbol with its
occurrence count as encountered while parsing the member action-formula.

  If the symbol was not encountered yet, a new key/value pair is created.
Otherwise, the count value is updated.

  Returns the new count status for \a symbol.
*/
double
Formula::accountSymbolCountPair(std::map<QString, double> &symbol_count_map,
                                const QString &symbol,
                                double count) const
{
  // We receive a symbol and we need to account for it count count in the member
  // symbol count map (count might be < 0).

  // Try to insert the new symbol/count pairinto the map. Check if that was done
  // or not. If the result.second is false, then that means the the insert
  // function did not perform because a pair by that symbol existed already. In
  // that case we just need to increment the count for the the pair.

  // qDebug() << "Accounting symbol:" << symbol << "for count:" << count;

  double new_count = 0;

  std::pair<std::map<QString, double>::iterator, bool> res =
    symbol_count_map.insert(std::pair<QString, double>(symbol, count));

  if(!res.second)
    {
      // qDebug() << "The symbol was already in the symbol/count map. with
      // count:"
      //          << res.first->second;

      // One pair by that symbol key existed already, just update the count and
      // store that value for reporting.
      res.first->second += count;
      new_count          = res.first->second;
      // new_count might be <= 0.

      // qDebug() << "For symbol" << symbol << "the new count:" << new_count;
    }
  else
    {
      // qDebug() << "Symbol" << symbol
      //          << "was not there already, setting the count to:" << count;

      // We just effectively added during the insert call above a new pair to
      // the map by key symbol with value count.
      new_count = count;
    }

  // We should check if the symbol has now a count of 0. In that case, we remove
  // the symbol altogether because we do not want to list naught symbols in the
  // final formula.

  if(!new_count)
    {
      // qDebug() << "For symbol" << symbol
      //<< "the new count is 0. Thus we erase the map item altogether.";

      symbol_count_map.erase(symbol);
    }

  // Update what's the text of the formula to represent what is in
  // atomCount list.

  // qDebug() << "The formula now can be reduced to:" << elementalComposition();

  return new_count;
}

/*!
  \brief Accounts for \a symbol and corresponding \a count in the member map.

  The m_symbolCountMap relates each atom (chemical element) symbol with its
occurrence count as encountered while parsing the member action-formula.

  If the symbol was not encountered yet, a new key/value pair is created.
Otherwise, the count value is updated.

  Returns the new count status for \a symbol.
*/
double
Formula::accountSymbolCountPair(const QString &symbol, double count)
{
  return accountSymbolCountPair(m_symbolCountMap, symbol, count);
}

/*!
  \brief Accounts into this Formula the \a formula_string action-formula using
\a isotopic_data_csp as reference data using \a times as a compounding factor.
The result of the operation is set to \a ok.

  The \a formula_string formula is converted into a temporary Formula and
processed:

  \list
  \li First validate() is called on the temporary formula, with storage of the
symbol/count data;
  \li The symbol/count data thus generated is used to update the member map.
  \li The m_actionFormula is set to the result of elementalComposition().
  \endlist

  Returns the size of the member symbol/count m_symbolCountMap.
*/
std::size_t
Formula::accountFormula(const QString &formula_string,
                        IsotopicDataCstSPtr isotopic_data_csp,
                        double times,
                        bool &ok)
{
  qDebug() << "Accounting in this formula:" << m_actionFormula
           << "the external formula:" << formula_string;

  // qDebug() << "Before having merged the external formula's map into this one,
  // " "this one has size:"
  //<< m_symbolCountMap.size();

  // We get a formula as an elemental composition text string and we want to
  // account for that formula in *this formula.

  // First off, validate the formula text,  and take advantage to store
  //  the resulting symbol/count pairs we'll use later to update *this Formula.

  Formula temp_formula(formula_string);
  ErrorList error_list;
  if(!temp_formula.validate(isotopic_data_csp,
                            /*store*/ true,
                            /*reset*/ true,
                            &error_list))
    {
      qCritical() << "Formula:" << formula_string
                  << "failed to validate with errors:"
                  << Utils::joinErrorList(error_list, ", ");
      ok = false;
      return 0;
    }

  // Now, for each item in the formula's symbol/count map, aggregate the found
  // data to *this symbol/count map. We'll have "merged" or "aggreated" the
  // other formula into *this one.

  std::map<QString, double>::const_iterator the_iterator_cst =
    temp_formula.m_symbolCountMap.cbegin();
  std::map<QString, double>::const_iterator the_end_iterator_cst =
    temp_formula.m_symbolCountMap.cend();

  while(the_iterator_cst != the_end_iterator_cst)
    {
      // qDebug() << "Iterating in symbol/count pair:" <<  iter->first << "-" <<
      // iter->second;

      accountSymbolCountPair((*the_iterator_cst).first,
                             (*the_iterator_cst).second * times);
      ++the_iterator_cst;
    }

  // qDebug() << "After having merged the external formula's map into this one,
  // " "this one has size:"
  //<< m_symbolCountMap.size();

  // Update what's the text of the action-formula to represent what is in
  // atomCount list.
  m_actionFormula = elementalComposition();
  m_title         = "Has changed";

  qDebug() << "And now this formula has text: " << m_actionFormula;

  ok = true;
  return m_symbolCountMap.size();
}

//////////////// ELEMENTAL COMPOSITION /////////////////////
/*!
\brief Returns a formula matching the contents of the memeber symbol / count
map.

The returned formula is formatted according to the IUPAC convention about the
ordering of the chemical elements: CxxHxxNxxOxxSxxPxx.

The "plus" components are output first and the "minus" components after.

If \a symbol_count_pairs_p is not nullptr, each symbol / count pair is added
to it.
*/
QString
Formula::elementalComposition(
  std::vector<std::pair<QString, double>> *symbol_count_pairs_p) const
{
  // Iterate in the symbol count member map and for each item output the symbol
  // string accompanied by the corresponding count. Note that the count for any
  // given symbol might be negative. We want to craft an elemental composition
  // that accounts for "actions", that is a +elemental formula and a -elemental
  // formula.

  std::map<QString, double>::const_iterator iter = m_symbolCountMap.cbegin();
  std::map<QString, double>::const_iterator iter_end = m_symbolCountMap.cend();

#if 0

  qDebug() << "While computing the elemental composition corresponding to the "
              "symbol/count map:";
  for(auto pair : m_symbolCountMap)
    qDebug().noquote() << "(" << pair.first << "," << pair.second << ")";

#endif

  QStringList negativeStringList;
  QStringList positiveStringList;

  while(iter != iter_end)
    {
      QString symbol = iter->first;
      double count   = iter->second;

      if(count < 0)
        {
          negativeStringList.append(
            QString("%1%2").arg(symbol).arg(-1 * count));
        }
      else
        {
          positiveStringList.append(QString("%1%2").arg(symbol).arg(count));
        }

      ++iter;
    }

  // We want to provide a formula that lists the positive component
  // first and the negative component last.

  // Each positive/negative component will list the atoms in the
  // conventional order : CxxHxxNxxOxx and all the rest in
  // alphabetical order.

  // We want to provide for each positive and negative components of the
  // initial formula object, an elemental formula that complies with the
  // convention : first the C atom, next the H, N, O, S, P atoms and all the
  // subsequent ones in alphabetical order.

  // Sort the lists.
  negativeStringList.sort();
  positiveStringList.sort();

  // Thus we look for the four C, H, N, O, S,P atoms, and we create the
  // initial part of the elemental formula. Each time we find one
  // such atom we remove it from the list, so that we can later just
  // append all the remaining atoms, since we have sorted the lists
  // above.

  // The positive component
  // ======================

  int symbol_index_in_list = 0;
  QString positiveComponentString;

  // Carbon
  symbol_index_in_list =
    positiveStringList.indexOf(QRegularExpression("C\\d*[\\.]?\\d*"));
  if(symbol_index_in_list != -1)
    {
      positiveComponentString += positiveStringList.at(symbol_index_in_list);
      positiveStringList.removeAt(symbol_index_in_list);

      if(symbol_count_pairs_p)
        symbol_count_pairs_p->push_back(
          std::pair<QString, double>("C", m_symbolCountMap.at("C")));
    }

  // Hydrogen
  symbol_index_in_list =
    positiveStringList.indexOf(QRegularExpression("H\\d*[\\.]?\\d*"));
  if(symbol_index_in_list != -1)
    {
      positiveComponentString += positiveStringList.at(symbol_index_in_list);
      positiveStringList.removeAt(symbol_index_in_list);

      if(symbol_count_pairs_p)
        symbol_count_pairs_p->push_back(
          std::pair<QString, double>("H", m_symbolCountMap.at("H")));
    }

  // Nitrogen
  symbol_index_in_list =
    positiveStringList.indexOf(QRegularExpression("N\\d*[\\.]?\\d*"));
  if(symbol_index_in_list != -1)
    {
      positiveComponentString += positiveStringList.at(symbol_index_in_list);
      positiveStringList.removeAt(symbol_index_in_list);

      if(symbol_count_pairs_p)
        symbol_count_pairs_p->push_back(
          std::pair<QString, double>("N", m_symbolCountMap.at("N")));
    }

  // Oxygen
  symbol_index_in_list =
    positiveStringList.indexOf(QRegularExpression("O\\d*[\\.]?\\d*"));
  if(symbol_index_in_list != -1)
    {
      positiveComponentString += positiveStringList.at(symbol_index_in_list);
      positiveStringList.removeAt(symbol_index_in_list);

      if(symbol_count_pairs_p)
        symbol_count_pairs_p->push_back(
          std::pair<QString, double>("O", m_symbolCountMap.at("O")));
    }

  // Sulfur
  symbol_index_in_list =
    positiveStringList.indexOf(QRegularExpression("S\\d*[\\.]?\\d*"));
  if(symbol_index_in_list != -1)
    {
      positiveComponentString += positiveStringList.at(symbol_index_in_list);
      positiveStringList.removeAt(symbol_index_in_list);

      if(symbol_count_pairs_p)
        symbol_count_pairs_p->push_back(
          std::pair<QString, double>("S", m_symbolCountMap.at("S")));
    }

  // Phosphorus
  symbol_index_in_list =
    positiveStringList.indexOf(QRegularExpression("P\\d*[\\.]?\\d*"));
  if(symbol_index_in_list != -1)
    {
      positiveComponentString += positiveStringList.at(symbol_index_in_list);
      positiveStringList.removeAt(symbol_index_in_list);

      if(symbol_count_pairs_p)
        symbol_count_pairs_p->push_back(
          std::pair<QString, double>("P", m_symbolCountMap.at("P")));
    }

  // Go on with all the other ones, if any...

  for(int iter = 0; iter < positiveStringList.size(); ++iter)
    {
      positiveComponentString += positiveStringList.at(iter);

      QRegularExpression regexp("([A-Z][a-z]*)(\\d*[\\.]?\\d*)");
      QRegularExpressionMatch match = regexp.match(positiveStringList.at(iter));

      if(match.hasMatch())
        {
          QString symbol  = match.captured(1);
          QString howMany = match.captured(2);

          bool ok      = false;
          double count = howMany.toDouble(&ok);

          if(!count && !ok)
            qFatal(
              "Fatal error at %s@%d -- %s(). "
              "Failed to parse an atom count."
              "Program aborted.",
              __FILE__,
              __LINE__,
              __FUNCTION__);

          if(symbol_count_pairs_p)
            symbol_count_pairs_p->push_back(
              std::pair<QString, double>(symbol, count));
        }
    }

  // qDebug() << __FILE__ << __LINE__
  //<< "positiveComponentString:" << positiveComponentString;


  // The negative component
  // ======================

  QString negativeComponentString;

  // Carbon
  symbol_index_in_list =
    negativeStringList.indexOf(QRegularExpression("C\\d*[\\.]?\\d*"));
  if(symbol_index_in_list != -1)
    {
      negativeComponentString += negativeStringList.at(symbol_index_in_list);
      negativeStringList.removeAt(symbol_index_in_list);

      if(symbol_count_pairs_p)
        symbol_count_pairs_p->push_back(
          std::pair<QString, double>("C", m_symbolCountMap.at("C")));
    }

  // Hydrogen
  symbol_index_in_list =
    negativeStringList.indexOf(QRegularExpression("H\\d*[\\.]?\\d*"));
  if(symbol_index_in_list != -1)
    {
      negativeComponentString += negativeStringList.at(symbol_index_in_list);
      negativeStringList.removeAt(symbol_index_in_list);

      if(symbol_count_pairs_p)
        symbol_count_pairs_p->push_back(
          std::pair<QString, double>("H", m_symbolCountMap.at("H")));
    }

  // Nitrogen
  symbol_index_in_list =
    negativeStringList.indexOf(QRegularExpression("N\\d*[\\.]?\\d*"));
  if(symbol_index_in_list != -1)
    {
      negativeComponentString += negativeStringList.at(symbol_index_in_list);
      negativeStringList.removeAt(symbol_index_in_list);

      if(symbol_count_pairs_p)
        symbol_count_pairs_p->push_back(
          std::pair<QString, double>("N", m_symbolCountMap.at("N")));
    }

  // Oxygen
  symbol_index_in_list =
    negativeStringList.indexOf(QRegularExpression("O\\d*[\\.]?\\d*"));
  if(symbol_index_in_list != -1)
    {
      negativeComponentString += negativeStringList.at(symbol_index_in_list);
      negativeStringList.removeAt(symbol_index_in_list);

      if(symbol_count_pairs_p)
        symbol_count_pairs_p->push_back(
          std::pair<QString, double>("O", m_symbolCountMap.at("O")));
    }

  // Sulfur
  symbol_index_in_list =
    negativeStringList.indexOf(QRegularExpression("S\\d*[\\.]?\\d*"));
  if(symbol_index_in_list != -1)
    {
      negativeComponentString += negativeStringList.at(symbol_index_in_list);
      negativeStringList.removeAt(symbol_index_in_list);

      if(symbol_count_pairs_p)
        symbol_count_pairs_p->push_back(
          std::pair<QString, double>("S", m_symbolCountMap.at("S")));
    }

  // Phosphorus
  symbol_index_in_list =
    negativeStringList.indexOf(QRegularExpression("P\\d*[\\.]?\\d*"));
  if(symbol_index_in_list != -1)
    {
      negativeComponentString += negativeStringList.at(symbol_index_in_list);
      negativeStringList.removeAt(symbol_index_in_list);

      if(symbol_count_pairs_p)
        symbol_count_pairs_p->push_back(
          std::pair<QString, double>("P", m_symbolCountMap.at("P")));
    }

  // Go on with all the other ones, if any...

  for(int iter = 0; iter < negativeStringList.size(); ++iter)
    {
      negativeComponentString += negativeStringList.at(iter);

      QRegularExpression regexp("([A-Z][a-z]*)(\\d*[\\.]?\\d*)");
      QRegularExpressionMatch match = regexp.match(negativeStringList.at(iter));

      if(match.hasMatch())
        {
          QString symbol  = match.captured(1);
          QString howMany = match.captured(2);

          bool ok      = false;
          double count = howMany.toInt(&ok, 10);

          if(!count && !ok)
            qFatal(
              "Fatal error at %s@%d -- %s(). "
              "Failed to parse an atom count."
              "Program aborted.",
              __FILE__,
              __LINE__,
              __FUNCTION__);

          if(symbol_count_pairs_p)
            symbol_count_pairs_p->push_back(
              std::pair<QString, double>(symbol, count));
        }
    }


  // qDebug() << __FILE__ << __LINE__
  //<< "negativeComponentString:" << negativeComponentString;

  // Create the final elemental formula that comprises both the
  // positive and negative element. First the positive element and
  // then the negative one. Only append the negative one, prepended
  // with '-' if the string is non-empty.

  QString elementalComposition = positiveComponentString;

  if(!negativeComponentString.isEmpty())
    elementalComposition += QString("-%1").arg(negativeComponentString);

  // qDebug() << __FILE__ << __LINE__
  // <<"elementalComposition:" << elementalComposition;

  return elementalComposition;
}

//////////////// MASS OPERATIONS /////////////////////

/*!
\brief Accounts this formula's monoisotopic and average masses into \a mono
and \a avg, using \a times as a compounding factor.

The masses corresponding to the member action-formula  are
calculated first and then the \a mono and \a avg parameters are updated
by incrementing their value with the calculated values. This incrementation
might be compounded by that \a times factor.

The masses of the member action-formula are computed using data from \a
isotopic_data_csp.

Sets \a ok to false if the calculation failed, to true otherwise.

Returns this object.

\sa splitActionParts()
*/
Formula &
Formula::accountMasses(bool &ok,
                       IsotopicDataCstSPtr isotopic_data_csp,
                       double &mono,
                       double &avg,
                       double times)
{
  // GCOV_EXCL_START
  if(isotopic_data_csp == nullptr || isotopic_data_csp.get() == nullptr ||
     !isotopic_data_csp->size())
    qFatal("Programming error. The isotopic data pointer cannot be nullptr.");
  // GCOV_EXCL_STOP

  // Note the 'times' param below that ensures we create proper symbol/count
  // map items by taking that compounding factor into account.

  // qDebug() << qSetRealNumberPrecision(6)
  //          << "We get two mono and avg variables with values:" << mono <<
  //          "-"
  //          << avg << "and times:" << times;

  if(!checkSyntax())
    {
      qDebug() << "The checkSyntax test failed for " << m_actionFormula;

      m_isValid = false;
      ok        = false;
      return *this;
    }

  if(splitActionParts(
       isotopic_data_csp, times, true /* store */, true /* reset */) ==
     SplitResult::FAILURE)
    {
      qDebug() << "The formula splitting into actions failed.";
      m_isValid = false;
      ok        = false;
      return *this;
    }

  // qDebug() << "after splitActionParts:"
  //          << "store: true ; reset: true"
  //          << "m_actionFormula:" << m_actionFormula << "text" << toString();

  // At this point m_symbolCountMap has all the symbol/count pairs needed to
  // account for the masses.

  std::map<QString, double>::const_iterator iter = m_symbolCountMap.cbegin();
  std::map<QString, double>::const_iterator iter_end = m_symbolCountMap.cend();

  // for(auto item : m_symbolCountMap)
  //   qDebug() << "One symbol count item:" << item.first << "/" << item.second;

  bool res = false;

  while(iter != iter_end)
    {
      QString symbol = iter->first;

      // qDebug() << "Getting masses for symbol:" << symbol;

      double mono_mass =
        isotopic_data_csp->getMonoMassBySymbol(iter->first, res);
      if(!res)
        {
          qWarning() << "Failed to get the mono mass.";
          ok = false;
          return *this;
        }
      mono += mono_mass * iter->second;

      ok = false;

      double avg_mass = isotopic_data_csp->getAvgMassBySymbol(iter->first, res);
      if(!res)
        {
          qWarning() << "Failed to get the avg mass.";
          ok = false;
          return *this;
        }
      avg += avg_mass * iter->second;


      ++iter;
    }

  ok        = true;
  m_isValid = true;

  return *this;
}

/*!
\brief Accounts the \a formula monoisotopic and average masses into \a mono
and \a avg, using \a times as a compounding factor.

The masses corresponding to the \a formula are calculated first and then the \a
mono and \a avg parameters are updated by incrementing their value with the
calculated values. This incrementation might be compounded by that \a times
factor.

The masses of the \a formula are computed using data from \a isotopic_data_csp.

Sets ok to false if the calculation failed, to true otherwise.

Returns this object.

\sa splitActionParts()
*/
Formula &
Formula::accountMasses(Formula &formula,
                       bool &ok,
                       IsotopicDataCstSPtr isotopic_data_csp,
                       double &mono,
                       double &avg,
                       double times)
{
  // GCOV_EXCL_START
  if(isotopic_data_csp == nullptr || isotopic_data_csp.get() == nullptr ||
     !isotopic_data_csp->size())
    qFatal("Programming error. The isotopic data pointer cannot be nullptr.");
  // GCOV_EXCL_STOP

  // Note the 'times' param below that ensures we create proper symbol/count
  // map items by taking that compounding factor into account.

  // qDebug() << qSetRealNumberPrecision(6)
  //          << "We get two mono and avg variables with values:" << mono <<
  //          "-"
  //          << avg << "and times:" << times;

  if(!formula.checkSyntax())
    {
      qDebug() << "The checkSyntax test failed for " << formula.m_actionFormula;

      formula.m_isValid = false;
      ok                = false;
      return formula;
    }

  if(formula.splitActionParts(isotopic_data_csp,
                              formula.m_plusFormula,
                              formula.m_minusFormula,
                              formula.m_symbolCountMap,
                              times,
                              true /* store */,
                              true /* reset */) == SplitResult::FAILURE)
    {
      formula.m_isValid = false;
      ok                = false;
      return formula;
    }

  // qDebug() << "after splitActionParts:"
  //          << "store: true ; reset: true"
  //          << "m_actionFormula:" << m_actionFormula << "text" << toString();

  // At this point m_symbolCountMap has all the symbol/count pairs needed to
  // account for the masses.

  std::map<QString, double>::const_iterator iter =
    formula.m_symbolCountMap.cbegin();
  std::map<QString, double>::const_iterator iter_end =
    formula.m_symbolCountMap.cend();

  // for(auto item : m_symbolCountMap)
  //   qDebug() << "One symbol count item:" << item.first << "/" << item.second;

  bool res = false;

  while(iter != iter_end)
    {
      QString symbol = iter->first;

      // qDebug() << "Getting masses for symbol:" << symbol;

      double mono_mass =
        isotopic_data_csp->getMonoMassBySymbol(iter->first, res);
      if(!res)
        {
          qWarning() << "Failed to get the mono mass.";
          ok = false;
          return formula;
        }
      mono += mono_mass * iter->second;

      ok = false;

      double avg_mass = isotopic_data_csp->getAvgMassBySymbol(iter->first, res);
      if(!res)
        {
          qWarning() << "Failed to get the avg mass.";
          ok = false;
          return formula;
        }
      avg += avg_mass * iter->second;


      ++iter;
    }

  ok                = true;
  formula.m_isValid = true;

  return formula;
}

//////////////// XML DATA LOADING WRITING /////////////////////
/*!
\brief Parses a formula XML \a element according to \a version and sets the data
to the member action-formula checking it syntax.

Returns true if parsing and syntax checking were successful, false
otherwise.

\sa checkSyntax()
*/
bool
Formula::renderXmlFormulaElement(const QDomElement &element,
                                 [[maybe_unused]] int version)
{
  if(element.tagName() != "formula")
    {
      qCritical() << "The element tag is not 'formula'";
      return false;
    }

  //  Will take care of removing the title and setting it to m_title.
  setActionFormula(element.text());

  // qDebug() << "Now set the action-formula to " << m_actionFormula;

  // Do not forget that we might have a title associated with the
  // formula and spaces. checkSyntax() should care of removing these
  // title and spaces before checking for chemical syntax
  // correctness.

  // Remember, syntax checking does not mean that the chemical
  // validity is assessed. For example "+HWKPTR" would pass
  // the checkSyntax() test.

  if(!checkSyntax())
    {
      qCritical() << "Failed to check syntax for formula:" << m_actionFormula;
      return false;
    }

  return true;
}

/*!
\brief Returns a string containing a formula XML element
documenting this Formula instance.

\a offset and \a indent define the formatting of the XML element.
*/
QString
Formula::formatXmlFormulaElement(int offset, const QString &indent)
{

  int newOffset;
  int iter = 0;

  QString lead("");
  QString string;

  // Prepare the lead.
  newOffset = offset;
  while(iter < newOffset)
    {
      lead += indent;
      ++iter;
    }

  string += QString("%1<formula>%2</formula>\n")
              .arg(lead)
              .arg(getActionFormula(/*with title*/ true));

  return string;
}

//////////////// UTILS /////////////////////

/*!
  \brief Removes \e{all} the space characters from the member action-formula.

  Spaces can be placed anywhere in formula for more readability. However, it
  might be required that these character spaces be removed. This function does
  just this, using a QRegularExpression.

  Returns the number of removed characters.
*/
int
Formula::removeSpaces()
{
  int length = m_actionFormula.length();

  // We want to remove all the possibly-existing spaces.

  m_actionFormula.remove(QRegularExpression("\\s+"));

  // Return the number of removed characters.
  return (length - m_actionFormula.length());
}

/*!
\brief Returns the total count of symbols (atoms) in this formula.

The determination is performed by summing up all the count values for all the
symbols in the member symbol / count pairs in the member map m_symbolCountMap.
*/
double
Formula::totalAtoms() const
{
  double total_atom_count = 0;

  std::map<QString, double>::const_iterator iter = m_symbolCountMap.cbegin();
  std::map<QString, double>::const_iterator iter_end = m_symbolCountMap.cend();

  while(iter != iter_end)
    {
      total_atom_count += iter->second;
      ++iter;
    }

  return total_atom_count;
}

/*!
\brief Returns the total count of isotopes in this formula using \a
isotopic_data_csp as the reference isotopic data.

The determination is performed by summing up all the isotope counts for
all the symbols keys in the member symbol / count map m_symbolCountMap.
*/
double
Formula::totalIsotopes(IsotopicDataCstSPtr isotopic_data_csp) const
{
  double total_isotope_count = 0;

  std::map<QString, double>::const_iterator iter = m_symbolCountMap.cbegin();
  std::map<QString, double>::const_iterator iter_end = m_symbolCountMap.cend();

  while(iter != iter_end)
    {
      total_isotope_count +=
        iter->second * isotopic_data_csp->getIsotopeCountBySymbol(iter->first);

      ++iter;
    }

  return total_isotope_count;
}

/*!
  \brief Clears \e{all} the formula member data.
*/
void
Formula::clear()
{
  m_title.clear();
  m_actionFormula.clear();
  m_plusFormula.clear();
  m_minusFormula.clear();
  m_forceCountIndex = false;
  m_symbolCountMap.clear();

  m_isValid = false;
}

void
Formula::registerJsConstructor(QJSEngine *engine)
{
  if(!engine)
    {
      qWarning() << "Cannot register class: engine is null";
      return;
    }

  // Register the meta object as a constructor
  QJSValue jsMetaObject = engine->newQMetaObject(&Formula::staticMetaObject);
  engine->globalObject().setProperty("Formula", jsMetaObject);
}

//////////////// PRIVATE FUNCTIONS /////////////////////
//////////////// PRIVATE FUNCTIONS /////////////////////
//////////////// PRIVATE FUNCTIONS /////////////////////

// GCOV_EXCL_START
/*!
  \brief Sets the m_plusFormula formula to \a formula.
*/
void
Formula::setPlusFormula(const QString &formula)
{
  m_plusFormula = formula;
}

/*!
  \brief Sets the m_minusFormula formula to \a formula.
*/
void
Formula::setMinusFormula(const QString &formula)
{
  m_minusFormula = formula;
}

// GCOV_EXCL_STOP


MSXPS_REGISTER_JS_CLASS(MsXpS::libXpertMassCore, Formula)

} // namespace libXpertMassCore
} // namespace MsXpS
