% -----------------------------------------------------------------------------
%  (C) Altran Praxis Limited
% -----------------------------------------------------------------------------
% 
%  The SPARK toolset is free software; you can redistribute it and/or modify it
%  under terms of the GNU General Public License as published by the Free
%  Software Foundation; either version 3, or (at your option) any later
%  version. The SPARK toolset is distributed in the hope that it will be
%  useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
%  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
%  Public License for more details. You should have received a copy of the GNU
%  General Public License distributed with the SPARK toolset; see file
%  COPYING3. If not, go to http://www.gnu.org/licenses for a complete copy of
%  the license.
% 
% =============================================================================

%###############################################################################
% PURPOSE
%-------------------------------------------------------------------------------
% Provides utilities that support parsing lists of characters.
%###############################################################################

%###############################################################################
% MODULE
%###############################################################################
:- module(parseutilities, [parse_all_to_nothing/2,
                           parse_nothing_to_all/2,
                           parse_atom/5,
                           parse_atom_silent/4,
                           parse_line/3,
                           parse_char_sep_atom_list/6,
                           parse_number/3,
                           parse_natural_int/3,
                           parse_possibly_signed_atom/4,
                           inside_selected_character_class/2,
                           atom_to_lower_case/2]).


%###############################################################################
% DEPENDENCIES
%###############################################################################

:- use_module('data__formats.pro',
              [add_type/2]).
:- use_module('newutilities.pro',
              [atom_to_integer/2]).
:- use_module('ioutilities.pro',
              [throw_error/2]).

%###############################################################################
% TYPES
%###############################################################################

:- add_type('SelectCharClass',
            [not('CharClass'),
             'CharClass']).

:- add_type('CharClass',
            [% Internal character classes.
             lower_case_char('Index_Int'),
             upper_case_char('Index_Int'),

             % Low level character classes.
             numeric,
             under_score,
             asterisk,
             space,
             newline,
             semicolon,
             colon,
             period,
             forwardslash,
             backwardslash,

             % Higher level character classes (entirely based on character
             % classes above).
             lower_case_char,
             upper_case_char,
             alpha,
             alpha_numeric]).

:- add_type('ParseRequest',
            [% Must parse one character.
             one,

             % Must parse at least one character.
             oneormore,

             % Need not parse any characters.
             zeroormore]).

%###############################################################################
% DATA
%###############################################################################

%###############################################################################
% PREDICATES
%###############################################################################


:- set_prolog_flag(double_quotes, chars).

%===============================================================================
% character(?CharClass, ?Char).
%-------------------------------------------------------------------------------
% The intention is that this character predicate will never be accessed
% outside this module. Instead, this module has a small collection of
% general predicates for performing character related operations.
%===============================================================================

character(numeric, '1').
character(numeric, '2').
character(numeric, '3').
character(numeric, '4').
character(numeric, '5').
character(numeric, '6').
character(numeric, '7').
character(numeric, '8').
character(numeric, '9').
character(numeric, '0').

character(lower_case_char(1),  'a').
character(lower_case_char(2),  'b').
character(lower_case_char(3),  'c').
character(lower_case_char(4),  'd').
character(lower_case_char(5),  'e').
character(lower_case_char(6),  'f').
character(lower_case_char(7),  'g').
character(lower_case_char(8),  'h').
character(lower_case_char(9),  'i').
character(lower_case_char(10), 'j').
character(lower_case_char(11), 'k').
character(lower_case_char(12), 'l').
character(lower_case_char(13), 'm').
character(lower_case_char(14), 'n').
character(lower_case_char(15), 'o').
character(lower_case_char(16), 'p').
character(lower_case_char(17), 'q').
character(lower_case_char(18), 'r').
character(lower_case_char(19), 's').
character(lower_case_char(20), 't').
character(lower_case_char(21), 'u').
character(lower_case_char(22), 'v').
character(lower_case_char(23), 'w').
character(lower_case_char(24), 'x').
character(lower_case_char(25), 'y').
character(lower_case_char(26), 'z').

character(upper_case_char(1),  'A').
character(upper_case_char(2),  'B').
character(upper_case_char(3),  'C').
character(upper_case_char(4),  'D').
character(upper_case_char(5),  'E').
character(upper_case_char(6),  'F').
character(upper_case_char(7),  'G').
character(upper_case_char(8),  'H').
character(upper_case_char(9),  'I').
character(upper_case_char(10), 'J').
character(upper_case_char(11), 'K').
character(upper_case_char(12), 'L').
character(upper_case_char(13), 'M').
character(upper_case_char(14), 'N').
character(upper_case_char(15), 'O').
character(upper_case_char(16), 'P').
character(upper_case_char(17), 'Q').
character(upper_case_char(18), 'R').
character(upper_case_char(19), 'S').
character(upper_case_char(20), 'T').
character(upper_case_char(21), 'U').
character(upper_case_char(22), 'V').
character(upper_case_char(23), 'W').
character(upper_case_char(24), 'X').
character(upper_case_char(25), 'Y').
character(upper_case_char(26), 'Z').

character(under_score, '_').
character(asterisk, '*').
character(space, ' ').
character(newline, '\n').
character(semicolon, ';').
character(colon, ':').
character(period, '.').
character(forwardslash, '/').
character(backwardslash, '\\').
character(hyphen, '-').

%===============================================================================
% inside_selected_character_class(+SelectCharClassList, +Char).
%-------------------------------------------------------------------------------
% Is successful where the provided character (Char) is within one of the
% selected character classes in (SelectCharClassList). Raises an error if
% an unaccepted character class is provided.
%===============================================================================

inside_selected_character_class(SelectCharClassList, Char):-
    member(SelectCharClass, SelectCharClassList),
    inside_selected_character_class_x(SelectCharClass, Char),
    !.

%-------------------------------------------------------------------------------

%Consider negated character class selection.
inside_selected_character_class_x(not(CharClass), Char):-
    \+ inside_character_class(CharClass, Char),
    !.

%Otherwise, consider as regular character class selection.
inside_selected_character_class_x(CharClass, Char):-
    \+ CharClass=not(_CharClass),
    inside_character_class(CharClass, Char),
    !.
%-------------------------------------------------------------------------------

% numeric.
inside_character_class(numeric, Char):-
    !,
    character(numeric, Char),
    !.

% under_score.
inside_character_class(under_score, Char):-
    !,
    character(under_score, Char),
    !.

% asterisk.
inside_character_class(asterisk, Char):-
    !,
    character(asterisk, Char),
    !.

% space.
inside_character_class(space, Char):-
    !,
    character(space, Char),
    !.

% newline.
inside_character_class(newline, Char):-
    !,
    character(newline, Char),
    !.

% semicolon.
inside_character_class(semicolon, Char):-
    !,
    character(semicolon, Char),
    !.

% colon.
inside_character_class(colon, Char):-
    !,
    character(colon, Char),
    !.

% period.
inside_character_class(period, Char):-
    !,
    character(period, Char),
    !.

% hyphen
inside_character_class(hyphen, Char):-
    !,
    character(hyphen, Char),
    !.

% forwardslash.
inside_character_class(forwardslash, Char):-
    !,
    character(forwardslash, Char),
    !.

% backwardslash.
inside_character_class(backwardslash, Char):-
    !,
    character(backwardslash, Char),
    !.

% lower_case_char.
inside_character_class(lower_case_char, Char):-
    !,
    character(lower_case_char(_Index_Int), Char),
    !.

% upper_case_char.
inside_character_class(upper_case_char, Char):-
    !,
    character(upper_case_char(_Index_Int), Char),
    !.

% alpha.
inside_character_class(alpha, Char):-
    !,
    inside_character_class_alpha(Char),
    !.

% alpha_numeric.
inside_character_class(alpha_numeric, Char):-
    !,
    inside_character_class_alpha_numeric(Char),
    !.

%From above, provided character class is not accepted. Raise an error.
inside_character_class(CharClass, _Char):-
    throw_error('Attempted to parse with an unaccepted character class: ~k',
                [CharClass]).

%-------------------------------------------------------------------------------

inside_character_class_alpha(Char):-
    character(lower_case_char(_Index_Int), Char),
    !.

inside_character_class_alpha(Char):-
    character(upper_case_char(_Index_Int), Char),
    !.

%-------------------------------------------------------------------------------

inside_character_class_alpha_numeric(Char):-
    character(lower_case_char(_Index_Int), Char),
    !.

inside_character_class_alpha_numeric(Char):-
    character(upper_case_char(_Index_Int), Char),
    !.

inside_character_class_alpha_numeric(Char):-
    character(numeric, Char),
    !.

%===============================================================================
% parse_all_to_nothing.
%-------------------------------------------------------------------------------
% Initially parses all characters, then increasingly parses one less
% character.
%===============================================================================

% Collect everything.
parse_all_to_nothing -->
    [_Char],
    parse_all_to_nothing.

% End of string.
parse_all_to_nothing -->
    [].

%===============================================================================
% parse_nothing_to_all.
%-------------------------------------------------------------------------------
% Initially parses zero characters, then increasingly parses an additional
% character.
%===============================================================================

parse_nothing_to_all -->
    [].

parse_nothing_to_all -->
    [_Char],
    parse_nothing_to_all.

%===============================================================================
% parse_atom(+SelectCharClassList, +ParseRequest, -Atom).
%-------------------------------------------------------------------------------
% Parses following characters that conform to at least one of the character
% classes in the provided list (CharClassList) as an atom (Atom) taking
% into account the parse request (ParseRequest).  If ParseRequest is 'one'
% then the predicate seeks to parse one character.  If ParseRequest is
% 'oneormore' then the predicate seeks to parse as many characters as
% possible, but at least one.  If ParseRequest is 'zeroormore' then the
% predicate seeks to parse seeks to parse as many characters as possible,
% but is still successful if zero are parsed.
%===============================================================================

% Parse one character.
parse_atom(SelectCharClassList, one, Atom) -->
    [H_Char],
    {inside_selected_character_class(SelectCharClassList, H_Char)},
    {atom_chars(Atom, [H_Char])},
    !.

% Parse many (at least one) characters.
parse_atom(SelectCharClassList, oneormore, Atom) -->
    parse_atom_one_or_more(SelectCharClassList, CharList),
    {atom_chars(Atom, CharList)},
    !.

% Parse many (even zero) characters.
parse_atom(SelectCharClassList, zeroormore, Atom) -->
    parse_atom_zero_or_more(SelectCharClassList, CharList),
    {atom_chars(Atom, CharList)},
    !.

%-------------------------------------------------------------------------------

% First character.
parse_atom_one_or_more(SelectCharClassList, [H_Char | T_CharList]) -->
    [H_Char],
    {inside_selected_character_class(SelectCharClassList, H_Char)},
    parse_atom_zero_or_more(SelectCharClassList, T_CharList),
    !.

%-------------------------------------------------------------------------------

% Subsequent characters.
parse_atom_zero_or_more(SelectCharClassList, [H_Char | T_CharList]) -->
    [H_Char],
    {inside_selected_character_class(SelectCharClassList, H_Char)},
    parse_atom_zero_or_more(SelectCharClassList, T_CharList),
    !.

% From above, is end of atom.
parse_atom_zero_or_more(_CharClassList, []) -->
    !.

%===============================================================================
% parse_atom_silent(+SelectCharClassList, +ParseRequest).
%-------------------------------------------------------------------------------
% Exactly the same behaviour as parse_atom, except that the parsed result
% is silently discarded.
%===============================================================================

parse_atom_silent(SelectCharClassList, ParseRequest) -->
   parse_atom(SelectCharClassList, ParseRequest, _Atom),
    !.

%===============================================================================
%parse_char_sep_atom_list(+Item_SelectCharClassList,
%                         +Between_SelectCharClassList,
%                         +Char,
%                         -AtomList).
%-------------------------------------------------------------------------------
%Parses character separated atoms. Each atom must contain at least one
%character from the provided character class list
%(Item_SelectCharClassList). Between the atoms and provided character,
%atoms of the provided character class list (Between_SelectCharClassList)
%may be present.  The character separated atoms parsed are returned as an
%atom list (AtomList). At least one atom must be parsed for the predicate
%to be successful.
%===============================================================================

% Continuation.
parse_char_sep_atom_list(Item_SelectCharClassList,
                         Between_SelectCharClassList,
                         Char,
                         [H_Atom | T_AtomList]) -->
    parse_atom_silent(Between_SelectCharClassList, zeroormore),
    parse_atom(Item_SelectCharClassList, oneormore, H_Atom),
    parse_atom_silent(Between_SelectCharClassList, zeroormore),
    [Char],
    parse_atom_silent(Between_SelectCharClassList, zeroormore),
    !,
    parse_char_sep_atom_list(Item_SelectCharClassList,
                             Between_SelectCharClassList,
                             Char,
                             T_AtomList),
    !.

% Final.
parse_char_sep_atom_list(Item_SelectCharClassList,
                         Between_SelectCharClassList,
                         _Char,
                         [H_Atom]) -->
    parse_atom_silent(Between_SelectCharClassList, zeroormore),
    parse_atom(Item_SelectCharClassList, oneormore, H_Atom),
    parse_atom_silent(Between_SelectCharClassList, zeroormore),
    !.

%===============================================================================
% parse_line(Line_Atom).
%-------------------------------------------------------------------------------
%Parse and return a line as any number of any characters up to a newline. The
%newline is removed from the input stream.
%===============================================================================

parse_line(Line_Atom) -->
    parse_atom([not(newline)], zeroormore, Line_Atom),
    parse_atom_silent([newline], one),
    !.

%===============================================================================
% parse_number(-Int).
%-------------------------------------------------------------------------------
%Parse an integer number, accepting both a negative and positive prefix.
%Where successful, the parsed number is returned as (Int).
%===============================================================================

% Negative number.
parse_number(Int) -->
    parse_atom_silent([space, newline], zeroormore),
    "-",
    parse_atom_silent([space, newline], zeroormore),
    parse_natural_int(Natural_Int),
    parse_atom_silent([space, newline], zeroormore),
    {Int is -(Natural_Int)},
    !.

% Positive number (by default).
parse_number(Int) -->
    parse_atom_silent([space, newline], zeroormore),
    parse_natural_int(Natural_Int),
    parse_atom_silent([space, newline], zeroormore),
    {Int is Natural_Int},
    !.

% Positive number.
parse_number(Int) -->
    parse_atom_silent([space, newline], zeroormore),
    "+",
    parse_atom_silent([space, newline], zeroormore),
    parse_natural_int(Natural_Int),
    parse_atom_silent([space, newline], zeroormore),
    {Int is Natural_Int},
    !.

%===============================================================================
% parse_natural_int(-Natural_Int).
%-------------------------------------------------------------------------------
% Parse an natural integer. Where successful, the number is returned as
% (Natural_Int).
%===============================================================================

%Make this call visible to the spxref tool.
:- public newutilities:atom_to_integer/2.

parse_natural_int(Natural_Int) -->
    parse_atom([numeric], oneormore, Atom),
    {atom_to_integer(Atom, Natural_Int)},
    !.

%===============================================================================
% parse_possibly_signed_atom(CharClassList, Term).
%-------------------------------------------------------------------------------
% Parse an atom, possibly prefixed by '-' or '+' signs. Where the '-' sign
% is present, the atom is returned in the term structure: -(X). If
% possible, the atom is converted to an integer.
%===============================================================================







% Negative number.
parse_possibly_signed_atom(CharClassList, -(Atom)) -->
    parse_atom_silent([space, newline], zeroormore),
    "-",
    parse_atom_silent([space, newline], zeroormore),
    parse_atom(CharClassList, oneormore, Signed_Atom),
    parse_atom_silent([space, newline], zeroormore),

    {modify_type(Signed_Atom, Atom)},
    !.

% Positive number (by default).
parse_possibly_signed_atom(CharClassList, Atom) -->
    parse_atom_silent([space, newline], zeroormore),
    parse_atom(CharClassList, oneormore, Signed_Atom),
    parse_atom_silent([space, newline], zeroormore),

    {modify_type(Signed_Atom, Atom)},
    !.

% Positive number.
parse_possibly_signed_atom(CharClassList, Atom) -->
    parse_atom_silent([space, newline], zeroormore),
    "+",
    parse_atom_silent([space, newline], zeroormore),
    parse_atom(CharClassList, oneormore, Signed_Atom),
    parse_atom_silent([space, newline], zeroormore),

    {modify_type(Signed_Atom, Atom)},
    !.

%-------------------------------------------------------------------------------





modify_type(Signed_Atom, Term):-
    name(Signed_Atom,Signed_CodeList),
    name(Term,Signed_CodeList).

%===============================================================================
% atom_to_lower_case(+MixedCase_Atom, -LowerCase_Atom).
%-------------------------------------------------------------------------------
%Convert all upper chase characters in the input atom (MixedCase_Atom) into
%their lower case equivalent in (LowerCase_Atom).
%===============================================================================

atom_to_lower_case(MixedCase_Atom, LowerCase_Atom):-
    atom_chars(MixedCase_Atom, MixedCase_CharList),
    atom_to_lower_case_x(MixedCase_CharList, LowerCase_CharList),
    atom_chars(LowerCase_Atom, LowerCase_CharList),
    !.

%-------------------------------------------------------------------------------

atom_to_lower_case_x([], []):-
    !.

% Convert uppercase char to lowercase equivalent.
atom_to_lower_case_x([H_MixedCase_Char | T_MixedCase_CharList],
                     [H_LowerCase_Char | T_LowerCase_CharList]):-
    character(upper_case_char(Index_Int), H_MixedCase_Char),
    character(lower_case_char(Index_Int), H_LowerCase_Char),
    atom_to_lower_case_x(T_MixedCase_CharList,
                         T_LowerCase_CharList).

% Copy over all other characters.
atom_to_lower_case_x([H_MixedCase__LowerCase__Char | T_MixedCase_CharList],
                     [H_MixedCase__LowerCase__Char | T_LowerCase_CharList]):-
    atom_to_lower_case_x(T_MixedCase_CharList,
                         T_LowerCase_CharList).


:- set_prolog_flag(double_quotes, codes).

%###############################################################################
% END-OF-FILE
