%---------------------------------------------------------------------------% % vim: ft=mercury ts=4 sw=4 et %---------------------------------------------------------------------------% % Copyright (C) 2009-2012 The University of Melbourne. % Copyright (C) 2014-2018 The Mercury team. % This file is distributed under the terms specified in COPYING.LIB. %---------------------------------------------------------------------------% % % File: parsing_utils.m % Authors: Ralph Becket , maclarty % Stability: low % % Utilities for recursive descent parsers. Parsers take at least three % arguments: a source (src) containing the input string, and an input/output % pair of parser states (ps) tracking the current offset into the input. % % Call parse(InputString, SkipWS, Parser, Result) to parse an input string % and return an error context and message if parsing failed. % The SkipWS predicate is used by the primitive parsers to skip over any % following whitespace (providing a skipping predicate allows users to define % comments as whitespace). % Alternatively, a new src and ps can be constructed by calling % new_src_and_ps(InputString, SkipWS, Src, !:PS). % % Parsing predicates are semidet and typically take the form % p(...parameters..., Src, Result, !PS). A parser matching variable % assignments of the form `x = 42' might be defined like this: % % var_assignment(Src, {Var, Value}, !PS) :- % var(Src, Var, !PS), % punct(Src, "=", !PS), % expr(Src, Expr, !PS). % % where var/4 and expr/4 are parsers for variables and expressions respectively % and punct/4 is provided by this module for matching punctuation. % %---------------------------------------------------------------------------% %---------------------------------------------------------------------------% :- module parsing_utils. :- interface. :- import_module char. :- import_module list. :- import_module maybe. :- import_module unit. %---------------------------------------------------------------------------% % The parser source (input string). % :- type src. % The parser "state", passed around in DCG arguments. % :- type ps. % These types and insts are useful for specifying "standard" parser % signatures. % :- type parser(T) == pred(src, T, ps, ps). :- inst parser == (pred(in, out, in, out) is semidet). % The following are for parsers that also transform a separate state value. % :- type parser_with_state(T, S) == pred(src, T, S, S, ps, ps). :- inst parser_with_state == (pred(in, out, in, out, in, out) is semidet). % Predicates of this type are used to skip whitespace in the primitive % parsers provided by this module. % :- type skip_whitespace_pred == parser(unit). :- type parse_result(T) ---> ok(T) ; error( error_message :: maybe(string), error_line :: int, error_col :: int ). % parse(Input, SkipWS, Parser, Result). % Try to parse Input using Parser and SkipWS to consume whitespace. % If Parser succeeds then return ok with the parsed value, % otherwise return error. If there were any calls to fail_with_message % without any subsequent progress being made, then the error message % passed to the last call to fail_with_message will be returned in the % error result. Otherwise no message is returned and the furthest % position the parser got in the input string is returned. % :- pred parse(string::in, skip_whitespace_pred::in(parser), parser(T)::in(parser), parse_result(T)::out) is cc_multi. % As above but using the default whitespace parser, whitespace/4. % :- pred parse(string::in, parser(T)::in(parser), parse_result(T)::out) is cc_multi. %---------------------% % Construct a new parser source and state from a string, also specifying % a predicate for skipping over whitespace (several primitive parsers % use this predicate to consume whitespace after a token; this argument % allows the user to specify a predicate for, say, skipping over comments % as well). % :- pred new_src_and_ps(string::in, skip_whitespace_pred::in(parser), src::out, ps::out) is det. % Construct a new parser source and state from a string. % The default whitespace parser, whitespace/4, is used. % :- pred new_src_and_ps(string::in, src::out, ps::out) is det. %---------------------% % Return the input string and its length from the parser source. % :- pred input_string(src::in, string::out, int::out) is det. % Obtain the current offset from the start of the input string % (the first character in the input has offset 0). % :- pred current_offset(src::in, int::out, ps::in, ps::out) is det. % Return the parser to skip over whitespace from the parser source. % :- pred get_skip_whitespace_pred(src::in, skip_whitespace_pred::out(parser)) is det. %---------------------% % input_substring(Src, StartOffset, EndOffsetPlusOne, Substring): % Copy the substring from the input occupying the offsets % [StartOffset, EndOffsetPlusOne). % :- pred input_substring(src::in, int::in, int::in, string::out) is semidet. %---------------------% :- type line_numbers. % Compute a structure from the parser source which can be used to % convert offsets into line numbers and positions in the file (this % is useful for error reporting). % :- func src_to_line_numbers(src) = line_numbers. % Convert an offset into a line number and position within the line % (the first line is number 1; the first character in a line is % position 1). % :- pred offset_to_line_number_and_position(line_numbers::in, int::in, int::out, int::out) is det. %---------------------% % Read the next char. % :- pred next_char(src::in, char::out, ps::in, ps::out) is semidet. % Read the next char but do not record progress information. % This is more efficient than next_char, but may produce less informative % error messages in case of a parse error. % :- pred next_char_no_progress(src::in, char::out, ps::in, ps::out) is semidet. %---------------------% % Match a char from the given string. % :- pred char_in_class(string::in, src::in, char::out, ps::in, ps::out) is semidet. %---------------------% % Match a string exactly and any subsequent whitespace. % :- pred punct(string::in, src::in, unit::out, ps::in, ps::out) is semidet. % keyword(IdChars, Keyword, Src, _, !PS) matches Keyword exactly (i.e., it % must not be followed by any character in IdChars) and any subsequent % whitespace. % :- pred keyword(string::in, string::in, src::in, unit::out, ps::in, ps::out) is semidet. % ikeyword(IdChars, Keyword, Src, _, !PS) % Case-insensitive version of keyword/6. % Only uppercase and lowercase letters in the ASCII range (A-Z, a-z) % are compared case insensitively. % :- pred ikeyword(string::in, string::in, src::in, unit::out, ps::in, ps::out) is semidet. % identifier(InitIdChars, IdChars, Src, Identifier, !PS) matches the next % identifier (result in Identifier) comprising a char from InitIdChars % followed by zero or more chars from IdChars. Any subsequent whitespace % is consumed. % :- pred identifier(string::in, string::in, src::in, string::out, ps::in, ps::out) is semidet. % Consume any whitespace (defined as a sequence of characters % satisfying char.is_whitespace). % :- pred whitespace(src::in, unit::out, ps::in, ps::out) is semidet. %---------------------% % Consume any input up to, and including, the next newline character % marking the end of the current line. % :- pred skip_to_eol(src::in, unit::out, ps::in, ps::out) is semidet. % Succeed if we have reached the end of the input. % :- pred eof(src::in, unit::out, ps::in, ps::out) is semidet. %---------------------% % Parse a float literal matching [-][0-9]+[.][0-9]+([Ee][-+][0-9]+)? % followed by any whitespace. The float_literal_as_string version simply % returns the matched string. The float_literal version uses % string.to_float to convert the output of float_literal_as_string; this % may return an approximate answer since not all floating point numbers % can be perfectly represented as Mercury floats. % :- pred float_literal_as_string(src::in, string::out, ps::in, ps::out) is semidet. :- pred float_literal(src::in, float::out, ps::in, ps::out) is semidet. % Parse an int literal matching [-][0-9]+, not followed by [.][0-9]+, % followed by any whitespace. The int_literal_as_string version simply % returns the matched string. The int_literal version uses string.to_int % to convert the output of int_literal_as_string; this may fail if the % number in question cannot be represented as a Mercury int. % :- pred int_literal_as_string(src::in, string::out, ps::in, ps::out) is semidet. :- pred int_literal(src::in, int::out, ps::in, ps::out) is semidet. % Parse an string literal. The string argument is the quote character. % A backslash (\) character in the string makes the next character % literal (e.g., for embedding quotes). These 'escaped' characters % are included as-is in the result, along with the preceding backslash. % Any following whitespace is also consumed. % :- pred string_literal(char::in, src::in, string::out, ps::in, ps::out) is semidet. %---------------------------------------------------------------------------% % Each basic parser combinators has a version that has a separate state % argument is threaded through the computation, for parsers that e.g. % incrementally construct a symbol table. % optional(P, Src, Result, !PS) returns Result = yes(X) if P(Src, X, !PS), % or Result = no if P does not succeed. % :- pred optional(parser(T)::in(parser), src::in, maybe(T)::out, ps::in, ps::out) is semidet. % optional(P, Src, Result, !S, !PS) returns Result = yes(X) % if P(Src, X, !S, !PS), or Result = no if P does not succeed. % :- pred optional(parser_with_state(T, S)::in(parser_with_state), src::in, maybe(T)::out, S::in, S::out, ps::in, ps::out) is semidet. %---------------------% % zero_or_more(P, Src, Xs, !PS) returns the list of results Xs obtained % by repeatedly applying P until P fails. The nth item in Xs is % the result from the nth application of P. % :- pred zero_or_more(parser(T)::in(parser), src::in, list(T)::out, ps::in, ps::out) is semidet. % zero_or_more(P, Src, Xs, !S, !PS) returns the list of results Xs obtained % by repeatedly applying P until P fails. The nth item in Xs is % the result from the nth application of P. % :- pred zero_or_more(parser_with_state(T, S)::in(parser_with_state), src::in, list(T)::out, S::in, S::out, ps::in, ps::out) is semidet. %---------------------% % one_or_more(P, Src, Xs, !PS) returns the list of results Xs obtained % by repeatedly applying P until P fails. The nth item in Xs is % the result from the nth application of P. P must succeed at least once. % :- pred one_or_more(parser(T)::in(parser), src::in, list(T)::out, ps::in, ps::out) is semidet. % one_or_more(P, Src, Xs, !S, !PS) returns the list of results Xs obtained % by repeatedly applying P until P fails. The nth item in Xs is % the result from the nth application of P. P must succeed at least once. % :- pred one_or_more(parser_with_state(T, S)::in(parser_with_state), src::in, list(T)::out, S::in, S::out, ps::in, ps::out) is semidet. %---------------------% % brackets(L, R, P, Src, X, !PS) is equivalent to % punct(L, Src, _, !PS), P(Src, X, !PS), punct(R, Src, _, !PS). % :- pred brackets(string::in, string::in, parser(T)::in(parser), src::in, T::out, ps::in, ps::out) is semidet. % brackets(L, R, P, Src, X, !S, !PS) is equivalent to % punct(L, Src, _, !PS), P(Src, X, !S, !PS), punct(R, Src, _, !PS). % :- pred brackets(string::in, string::in, parser_with_state(T, S)::in(parser_with_state), src::in, T::out, S::in, S::out, ps::in, ps::out) is semidet. %---------------------% % separated_list(Separator, P, Src, Xs, !PS) is like % zero_or_more(P, Src, Xs, !PS) except that successive applications of % P must be separated by punct(Separator, Src, _, !PS). % :- pred separated_list(string::in, parser(T)::in(parser), src::in, list(T)::out, ps::in, ps::out) is semidet. % separated_list(Separator, P, Src, Xs, !S, !PS) is like % zero_or_more(P, Src, Xs, !S, !PS) except that successive applications of % P must be separated by punct(Separator, Src, _, !PS). % :- pred separated_list(string::in, parser_with_state(T, S)::in(parser_with_state), src::in, list(T)::out, S::in, S::out, ps::in, ps::out) is semidet. %---------------------% % comma_separated_list(P, Src, Xs) is the same as % separated_list(",", P, Src, Xs). % :- pred comma_separated_list(parser(T)::in(parser), src::in, list(T)::out, ps::in, ps::out) is semidet. % comma_separated_list(P, Src, Xs, !S, !PS) is the same as % separated_list(",", P, Src, Xs, !S, !PS). % :- pred comma_separated_list(parser_with_state(T, S)::in(parser_with_state), src::in, list(T)::out, S::in, S::out, ps::in, ps::out) is semidet. %---------------------------------------------------------------------------% % Declaratively this predicate is equivalent to false. Operationally, % it will record an error message that will be returned by parse/4 % if no further progress is made and then fail. % :- pred fail_with_message(string::in, src::in, T::out, ps::in, ps::out) is semidet. % As above, but use the given offset for the context of the message. % :- pred fail_with_message(string::in, int::in, src::in, T::out, ps::in, ps::out) is semidet. %---------------------------------------------------------------------------% %---------------------------------------------------------------------------% :- implementation. :- import_module array. :- import_module int. :- import_module mutvar. :- import_module string. % The parser "state" is just the offset into the input string. % :- type ps == int. :- type src ---> src( input_length :: int, input_string :: string, skip_ws_pred :: skip_whitespace_pred, furthest_offset :: mutvar(int), % This mutable records the progress of the parser % through the input string. last_fail_message :: mutvar(fail_message_info) % This mutable is used to record messages passed to % fail_with_message and their context. ). :- type fail_message_info ---> fail_message_info(int, maybe(string)). %---------------------------------------------------------------------------% parse(InputString, SkipWS, Parser, Result) :- % This is pure, because it will always return the same results for % the same inputs (the mutable in Src cannot be accessed outside % of the promise_pure scope below). promise_pure ( new_src_and_ps(InputString, SkipWS, Src, PS0), ( if Parser(Src, Val, PS0, _) then Result = ok(Val) else impure get_mutvar(Src ^ last_fail_message, Info), impure get_mutvar(Src ^ furthest_offset, FurthestOffset), Info = fail_message_info(MessageOffset, LastFailMsg), ( if MessageOffset < FurthestOffset then Msg = no, Offset = FurthestOffset else Msg = LastFailMsg, Offset = MessageOffset ), offset_to_line_number_and_position(src_to_line_numbers(Src), Offset, Line, Col), Result0 = error(Msg, Line, Col), % We make parse/4 cc_multi because declaratively % parse(Str, SkipWS, Parser, error(MaybeMsg, Line, Col)) is true % for all MaybeMsg, Line and Col iff % new_src_and_ps(Str, SkipWS, Src, PS0), % Parser(Src, _, PS0, _) % is false, but operationally MaybeMsg, Line and Col are % restricted to one value each. cc_multi_equal(Result0, Result) ) ). parse(InputString, Parser, Result) :- parse(InputString, whitespace, Parser, Result). %---------------------------------------------------------------------------% new_src_and_ps(InputString, SkipWS, Src, PS) :- promise_pure ( impure new_mutvar(fail_message_info(0, no), ErrorInfoMutVar), impure new_mutvar(0, FurthestOffsetMutvar), Src = src(string.length(InputString), InputString, SkipWS, FurthestOffsetMutvar, ErrorInfoMutVar), PS = 0 ). new_src_and_ps(InputString, Src, PS) :- new_src_and_ps(InputString, whitespace, Src, PS). %---------------------------------------------------------------------------% input_string(Src, Src ^ input_string, Src ^ input_length). current_offset(_Src, Offset, !PS) :- Offset = !.PS. get_skip_whitespace_pred(Src, SkipWS) :- SkipWS0 = Src ^ skip_ws_pred, unsafe_skip_ws_pred_cast(SkipWS0, SkipWS). :- pred unsafe_skip_ws_pred_cast(skip_whitespace_pred::in, skip_whitespace_pred::out(parser)) is det. :- pragma foreign_proc("C", unsafe_skip_ws_pred_cast(SkipWS0::in, SkipWS::out(parser)), [will_not_call_mercury, promise_pure, thread_safe], " SkipWS = SkipWS0; "). :- pragma foreign_proc("C#", unsafe_skip_ws_pred_cast(SkipWS0::in, SkipWS::out(parser)), [will_not_call_mercury, promise_pure, thread_safe], " SkipWS = SkipWS0; "). :- pragma foreign_proc("Java", unsafe_skip_ws_pred_cast(SkipWS0::in, SkipWS::out(parser)), [will_not_call_mercury, promise_pure, thread_safe], " SkipWS = SkipWS0; "). :- pragma foreign_proc("Erlang", unsafe_skip_ws_pred_cast(SkipWS0::in, SkipWS::out(parser)), [will_not_call_mercury, promise_pure, thread_safe], " SkipWS = SkipWS0 "). %---------------------------------------------------------------------------% input_substring(Src, Start, EndPlusOne, Substring) :- promise_pure ( EndPlusOne =< Src ^ input_length, Substring = unsafe_between(Src ^ input_string, Start, EndPlusOne), impure record_progress(Src, Start) ). %---------------------------------------------------------------------------% % For a source string Src, the following array contains the positions % of all the newline characters in the string Src ++ "\n". % :- type line_numbers == array(int). %---------------------% src_to_line_numbers(Src) = LineNos :- Str = Src ^ input_string, src_to_line_numbers_loop(Str, 0, [], RevLineNosList), LineNos = array.from_reverse_list(RevLineNosList). :- pred src_to_line_numbers_loop(string::in, int::in, list(int)::in, list(int)::out) is det. src_to_line_numbers_loop(Str, Pos0, !RevLineNosList) :- ( if string.unsafe_index_next(Str, Pos0, Pos, Char) then ( if Char = '\n' then !:RevLineNosList = [Pos0 | !.RevLineNosList] else true ), src_to_line_numbers_loop(Str, Pos, !RevLineNosList) else !:RevLineNosList = [Pos0 | !.RevLineNosList] ). %---------------------% offset_to_line_number_and_position(LineNos, Offset, LineNo, Pos) :- Lo = 0, Hi = array.size(LineNos) - 1, offset_to_line_number_and_position_2(LineNos, Lo, Hi, Offset, LineNo, Pos). :- pred offset_to_line_number_and_position_2(line_numbers::in, int::in, int::in, int::in, int::out, int::out) is det. % Perform a binary search looking for the offset of the line number % of the line containing Offset. % offset_to_line_number_and_position_2(LineNos, Lo, Hi, Offset, LineNo, Pos) :- ( if Lo < Hi then Mid = (Lo + Hi) / 2, MidOffset = LineNos ^ elem(Mid), ( if MidOffset < Offset then offset_to_line_number_and_position_2(LineNos, Mid + 1, Hi, Offset, LineNo, Pos) else offset_to_line_number_and_position_2(LineNos, Lo, Mid, Offset, LineNo, Pos) ) else % Lo is the index of the newline that terminates the line that Offset % is on. We compute LineBegin as the offset of the first character % of the line Offset is on. ( if Lo = 0 then LineBegin = 0 else LineBegin = LineNos ^ elem(Lo - 1) + 1 ), LineNo = 1 + Lo, Pos = 1 + Offset - LineBegin ). %---------------------------------------------------------------------------% next_char(Src, Char, !PS) :- promise_pure ( current_offset(Src, Offset, !.PS, _), Offset < Src ^ input_length, string.unsafe_index_next(Src ^ input_string, Offset, NextOffset, Char), impure record_progress(Src, Offset), !:PS = NextOffset ). next_char_no_progress(Src, Char, !PS) :- current_offset(Src, Offset, !.PS, _), Offset < Src ^ input_length, string.unsafe_index_next(Src ^ input_string, Offset, NextOffset, Char), !:PS = NextOffset. %---------------------------------------------------------------------------% char_in_class(CharClass, Src, Char, !PS) :- next_char(Src, Char, !PS), string.contains_char(CharClass, Char). %---------------------------------------------------------------------------% %---------------------------------------------------------------------------% punct(Punct, Src, unit, !PS) :- match_string(Punct, Src, !PS), skip_whitespace(Src, !PS). %---------------------------------------------------------------------------% keyword(IdChars, Keyword, Src, unit, !PS) :- match_string(Keyword, Src, !PS), not char_in_class(IdChars, Src, _, !.PS, _), skip_whitespace(Src, !PS). ikeyword(IdChars, Keyword, Src, unit, !PS) :- imatch_string(Keyword, Src, !PS), not char_in_class(IdChars, Src, _, !.PS, _), skip_whitespace(Src, !PS). %---------------------------------------------------------------------------% identifier(InitIdChars, IdChars, Src, Identifier, !PS) :- current_offset(Src, Start, !PS), char_in_class(InitIdChars, Src, _, !PS), identifier_2(IdChars, Src, _, !PS), current_offset(Src, EndPlusOne, !PS), skip_whitespace(Src, !PS), input_substring(Src, Start, EndPlusOne, Identifier). :- pred identifier_2(string::in, src::in, unit::out, ps::in, ps::out) is semidet. identifier_2(IdChars, Src, unit, !PS) :- ( if char_in_class(IdChars, Src, _, !PS) then identifier_2(IdChars, Src, _, !PS) else true ). %---------------------------------------------------------------------------% whitespace(Src, unit, !PS) :- ( if next_char(Src, C, !PS), char.is_whitespace(C) then whitespace(Src, _, !PS) else semidet_true ). %---------------------------------------------------------------------------% skip_to_eol(Src, unit, !PS) :- next_char(Src, C, !PS), ( if C = ('\n') then true else skip_to_eol(Src, _, !PS) ). %---------------------------------------------------------------------------% eof(Src, unit, !PS) :- current_offset(Src, Offset, !PS), Offset = Src ^ input_length. %---------------------------------------------------------------------------% %---------------------------------------------------------------------------% float_literal_as_string(Src, FloatStr, !PS) :- current_offset(Src, Start, !PS), ( if next_char(Src, ('-'), !PS) then true else true ), digits(10, Src, _, !PS), next_char(Src, ('.'), !PS), digits(10, Src, _, !PS), ( if char_in_class("eE", Src, _, !PS) then optional_sign(Src, !PS), digits(10, Src, _, !PS) else true ), current_offset(Src, EndPlusOne, !PS), skip_whitespace(Src, !PS), input_substring(Src, Start, EndPlusOne, FloatStr). :- pred optional_sign(src::in, ps::in, ps::out) is det. optional_sign(Src, !PS) :- ( if next_char(Src, Char, !PS), ( Char = ('-') ; Char = ('+') ) then true else true ). float_literal(Src, Float, !PS) :- float_literal_as_string(Src, FloatStr, !PS), string.to_float(FloatStr, Float). %---------------------------------------------------------------------------% int_literal_as_string(Src, IntStr, !PS) :- current_offset(Src, Start, !PS), optional(char_in_class("-"), Src, _, !PS), digits(10, Src, _, !PS), not ( next_char(Src, ('.'), !PS), digits(10, Src, _, !.PS, _) ), current_offset(Src, EndPlusOne, !PS), skip_whitespace(Src, !PS), input_substring(Src, Start, EndPlusOne, IntStr). int_literal(Src, Int, !PS) :- int_literal_as_string(Src, IntStr, !PS), string.to_int(IntStr, Int). %---------------------------------------------------------------------------% :- pred digits(int::in, src::in, unit::out, ps::in, ps::out) is semidet. digits(Base, Src, unit, !PS) :- next_char(Src, C, !PS), char.is_base_digit(Base, C), digits_2(Base, Src, _, !PS). :- pred digits_2(int::in, src::in, unit::out, ps::in, ps::out) is semidet. digits_2(Base, Src, unit, !PS) :- ( if next_char(Src, C, !PS), char.is_base_digit(Base, C) then digits_2(Base, Src, _, !PS) else true ). %---------------------------------------------------------------------------% string_literal(QuoteChar, Src, String, !PS) :- next_char(Src, QuoteChar, !PS), current_offset(Src, Start, !PS), string_literal_2(Src, QuoteChar, _, !PS), current_offset(Src, EndPlusOne, !PS), string.unsafe_prev_index(Src ^ input_string, EndPlusOne, End, QuoteChar), skip_whitespace(Src, !PS), input_substring(Src, Start, End, String). :- pred string_literal_2(src::in, char::in, unit::out, ps::in, ps::out) is semidet. string_literal_2(Src, QuoteChar, unit, !PS) :- next_char(Src, C, !PS), ( if C = QuoteChar then true else if C = ('\\') then next_char(Src, _, !PS), string_literal_2(Src, QuoteChar, _, !PS) else string_literal_2(Src, QuoteChar, _, !PS) ). %---------------------------------------------------------------------------% %---------------------------------------------------------------------------% optional(P, Src, Result, !PS) :- ( if P(Src, X, !PS) then Result = yes(X) else Result = no, semidet_true ). %---------------------------------------------------------------------------% optional(P, Src, Result, !S, !PS) :- ( if P(Src, X, !S, !PS) then Result = yes(X) else Result = no, semidet_true ). %---------------------------------------------------------------------------% zero_or_more(P, Src, Result, !PS) :- zero_or_more_rev_acc(P, Src, [], RevResult, !PS), Result = list.reverse(RevResult). % We use an auxiliary predicate to make this tail recursive. % This can be an issue with long sequences. % :- pred zero_or_more_rev_acc(parser(T)::in(parser), src::in, list(T)::in, list(T)::out, ps::in, ps::out) is semidet. zero_or_more_rev_acc(P, Src, !RevResult, !PS) :- ( if P(Src, X, !PS) then list.cons(X, !RevResult), zero_or_more_rev_acc(P, Src, !RevResult, !PS) else semidet_true ). zero_or_more(P, Src, Result, !S, !PS) :- zero_or_more_rev_acc(P, Src, [], RevResult, !S, !PS), Result = list.reverse(RevResult). :- pred zero_or_more_rev_acc(parser_with_state(T, S)::in(parser_with_state), src::in, list(T)::in, list(T)::out, S::in, S::out, ps::in, ps::out) is semidet. zero_or_more_rev_acc(P, Src, !RevResult, !S, !PS) :- ( if P(Src, X, !S, !PS) then list.cons(X, !RevResult), zero_or_more_rev_acc(P, Src, !RevResult, !S, !PS) else semidet_true ). %---------------------------------------------------------------------------% one_or_more(P, Src, Result, !PS) :- P(Src, X, !PS), zero_or_more(P, Src, Xs, !PS), Result = [X | Xs]. one_or_more(P, Src, Result, !S, !PS) :- P(Src, X, !S, !PS), zero_or_more(P, Src, Xs, !S, !PS), Result = [X | Xs]. %---------------------------------------------------------------------------% brackets(L, R, P, Src, Result, !PS) :- punct(L, Src, _, !PS), P(Src, Result, !PS), punct(R, Src, _, !PS). brackets(L, R, P, Src, Result, !S, !PS) :- punct(L, Src, _, !PS), P(Src, Result, !S, !PS), punct(R, Src, _, !PS). %---------------------------------------------------------------------------% separated_list(Separator, P, Src, Result, !PS) :- CommaP = ( pred(CommaPSrc::in, CommaPX::out, !.PS::in, !:PS::out) is semidet :- punct(Separator, CommaPSrc, _, !PS), P(CommaPSrc, CommaPX, !PS) ), ( if P(Src, X, !PS) then zero_or_more(CommaP, Src, Xs, !PS), Result = [X | Xs] else Result = [] ). separated_list(Separator, P, Src, Result, !S, !PS) :- CommaP = ( pred(CommaPSrc::in, CommaPX::out, !.S::in, !:S::out, !.PS::in, !:PS::out) is semidet :- punct(Separator, CommaPSrc, _, !PS), P(CommaPSrc, CommaPX, !S, !PS) ), ( if P(Src, X, !S, !PS) then zero_or_more(CommaP, Src, Xs, !S, !PS), Result = [X | Xs] else Result = [] ). %---------------------------------------------------------------------------% comma_separated_list(P, Src, Result, !PS) :- separated_list(",", P, Src, Result, !PS). comma_separated_list(P, Src, Result, !S, !PS) :- separated_list(",", P, Src, Result, !S, !PS). %---------------------------------------------------------------------------% fail_with_message(Msg, Src, Val, !PS) :- % This is pure, because the mutable can only be accessed via % the parse/4 predicate which will always return the same results % for the same inputs. promise_pure ( impure set_mutvar(Src ^ last_fail_message, fail_message_info(!.PS, yes(Msg))), impure set_mutvar(Src ^ furthest_offset, !.PS), ( if semidet_fail then dynamic_cast(0, Val) % unreachable else fail ) ). fail_with_message(Msg, Offset, Src, Val, _, PS) :- fail_with_message(Msg, Src, Val, Offset, PS). %---------------------------------------------------------------------------% %---------------------------------------------------------------------------% :- pred match_string(string::in, src::in, ps::in, ps::out) is semidet. match_string(MatchStr, Src, PS, PS + N) :- promise_pure ( impure record_progress(Src, PS), N = string.length(MatchStr), PS + N =< Src ^ input_length, match_string_2(N, 0, MatchStr, PS, Src ^ input_string) ). :- pred match_string_2(int::in, int::in, string::in, int::in, string::in) is semidet. match_string_2(N, I, MatchStr, Offset, Str) :- ( if I < N then string.unsafe_index_code_unit(MatchStr, I, CodeUnit), string.unsafe_index_code_unit(Str, Offset + I, CodeUnit), match_string_2(N, I + 1, MatchStr, Offset, Str) else true ). :- pred imatch_string(string::in, src::in, ps::in, ps::out) is semidet. imatch_string(MatchStr, Src, PS, PS + N) :- promise_pure ( impure record_progress(Src, PS), N = string.length(MatchStr), PS + N =< Src ^ input_length, imatch_string_2(N, 0, MatchStr, PS, Src ^ input_string) ). :- pred imatch_string_2(int::in, int::in, string::in, int::in, string::in) is semidet. imatch_string_2(N, I, MatchStr, Offset, Str) :- ( if I < N then % We can compare by code units because char.to_upper only converts % letters in the ASCII range, and ASCII characters are always encoded % in a single code unit. string.unsafe_index_code_unit(MatchStr, I, CodeUnit1), string.unsafe_index_code_unit(Str, Offset + I, CodeUnit2), char.det_from_int(CodeUnit1, Chr1), char.det_from_int(CodeUnit2, Chr2), char.to_upper(Chr1) = char.to_upper(Chr2) : char, imatch_string_2(N, I + 1, MatchStr, Offset, Str) else true ). %---------------------------------------------------------------------------% :- pred skip_whitespace(src::in, ps::in, ps::out) is semidet. skip_whitespace(Src, PS0, PS) :- get_skip_whitespace_pred(Src, SkipWS), SkipWS(Src, _, PS0, PS). %---------------------------------------------------------------------------% % Update the furthest_offset field if any progress has been made. % :- impure pred record_progress(src::in, ps::in) is det. record_progress(Src, PS) :- MutVar = Src ^ furthest_offset, impure get_mutvar(MutVar, OS0), ( if PS > OS0 then impure set_mutvar(MutVar, PS) else true ). %---------------------------------------------------------------------------% %---------------------------------------------------------------------------%