Files
mercury/library/parsing_utils.m
Mark Brown d465fa53cb Update the COPYING.LIB file and references to it.
Discussion of these changes can be found on the Mercury developers
mailing list archives from June 2018.

COPYING.LIB:
    Add a special linking exception to the LGPL.

*:
    Update references to COPYING.LIB.

    Clean up some minor errors that have accumulated in copyright
    messages.
2018-06-09 17:43:12 +10:00

971 lines
33 KiB
Mathematica

%---------------------------------------------------------------------------%
% vim: ft=mercury ts=4 sw=4 et
%---------------------------------------------------------------------------%
% Copyright (C) 2009-2012 The University of Melbourne.
% Copyright (C) 2014-2018 The Mercury team.
% This file is distributed under the terms specified in COPYING.LIB.
%---------------------------------------------------------------------------%
%
% File: parsing_utils.m
% Authors: Ralph Becket <rafe@csse.unimelb.edu.au>, maclarty
% Stability: low
%
% Utilities for recursive descent parsers. Parsers take at least three
% arguments: a source (src) containing the input string, and an input/output
% pair of parser states (ps) tracking the current offset into the input.
%
% Call parse(InputString, SkipWS, Parser, Result) to parse an input string
% and return an error context and message if parsing failed.
% The SkipWS predicate is used by the primitive parsers to skip over any
% following whitespace (providing a skipping predicate allows users to define
% comments as whitespace).
% Alternatively, a new src and ps can be constructed by calling
% new_src_and_ps(InputString, SkipWS, Src, !:PS).
%
% Parsing predicates are semidet and typically take the form
% p(...parameters..., Src, Result, !PS). A parser matching variable
% assignments of the form `x = 42' might be defined like this:
%
% var_assignment(Src, {Var, Value}, !PS) :-
% var(Src, Var, !PS),
% punct(Src, "=", !PS),
% expr(Src, Expr, !PS).
%
% where var/4 and expr/4 are parsers for variables and expressions respectively
% and punct/4 is provided by this module for matching punctuation.
%
%---------------------------------------------------------------------------%
%---------------------------------------------------------------------------%
:- module parsing_utils.
:- interface.
:- import_module char.
:- import_module list.
:- import_module maybe.
:- import_module unit.
%---------------------------------------------------------------------------%
% The parser source (input string).
%
:- type src.
% The parser "state", passed around in DCG arguments.
%
:- type ps.
% These types and insts are useful for specifying "standard" parser
% signatures.
%
:- type parser(T) == pred(src, T, ps, ps).
:- inst parser == (pred(in, out, in, out) is semidet).
% The following are for parsers that also transform a separate state value.
%
:- type parser_with_state(T, S) == pred(src, T, S, S, ps, ps).
:- inst parser_with_state == (pred(in, out, in, out, in, out) is semidet).
% Predicates of this type are used to skip whitespace in the primitive
% parsers provided by this module.
%
:- type skip_whitespace_pred == parser(unit).
:- type parse_result(T)
---> ok(T)
; error(
error_message :: maybe(string),
error_line :: int,
error_col :: int
).
% parse(Input, SkipWS, Parser, Result).
% Try to parse Input using Parser and SkipWS to consume whitespace.
% If Parser succeeds then return ok with the parsed value,
% otherwise return error. If there were any calls to fail_with_message
% without any subsequent progress being made, then the error message
% passed to the last call to fail_with_message will be returned in the
% error result. Otherwise no message is returned and the furthest
% position the parser got in the input string is returned.
%
:- pred parse(string::in, skip_whitespace_pred::in(parser),
parser(T)::in(parser), parse_result(T)::out) is cc_multi.
% As above but using the default whitespace parser, whitespace/4.
%
:- pred parse(string::in, parser(T)::in(parser), parse_result(T)::out)
is cc_multi.
%---------------------%
% Construct a new parser source and state from a string, also specifying
% a predicate for skipping over whitespace (several primitive parsers
% use this predicate to consume whitespace after a token; this argument
% allows the user to specify a predicate for, say, skipping over comments
% as well).
%
:- pred new_src_and_ps(string::in, skip_whitespace_pred::in(parser),
src::out, ps::out) is det.
% Construct a new parser source and state from a string.
% The default whitespace parser, whitespace/4, is used.
%
:- pred new_src_and_ps(string::in, src::out, ps::out) is det.
%---------------------%
% Return the input string and its length from the parser source.
%
:- pred input_string(src::in, string::out, int::out) is det.
% Obtain the current offset from the start of the input string
% (the first character in the input has offset 0).
%
:- pred current_offset(src::in, int::out, ps::in, ps::out) is det.
% Return the parser to skip over whitespace from the parser source.
%
:- pred get_skip_whitespace_pred(src::in, skip_whitespace_pred::out(parser))
is det.
%---------------------%
% input_substring(Src, StartOffset, EndOffsetPlusOne, Substring):
% Copy the substring from the input occupying the offsets
% [StartOffset, EndOffsetPlusOne).
%
:- pred input_substring(src::in, int::in, int::in, string::out) is semidet.
%---------------------%
:- type line_numbers.
% Compute a structure from the parser source which can be used to
% convert offsets into line numbers and positions in the file (this
% is useful for error reporting).
%
:- func src_to_line_numbers(src) = line_numbers.
% Convert an offset into a line number and position within the line
% (the first line is number 1; the first character in a line is
% position 1).
%
:- pred offset_to_line_number_and_position(line_numbers::in, int::in,
int::out, int::out) is det.
%---------------------%
% Read the next char.
%
:- pred next_char(src::in, char::out, ps::in, ps::out) is semidet.
% Read the next char but do not record progress information.
% This is more efficient than next_char, but may produce less informative
% error messages in case of a parse error.
%
:- pred next_char_no_progress(src::in, char::out, ps::in, ps::out) is semidet.
%---------------------%
% Match a char from the given string.
%
:- pred char_in_class(string::in, src::in, char::out,
ps::in, ps::out) is semidet.
%---------------------%
% Match a string exactly and any subsequent whitespace.
%
:- pred punct(string::in, src::in, unit::out, ps::in, ps::out) is semidet.
% keyword(IdChars, Keyword, Src, _, !PS) matches Keyword exactly (i.e., it
% must not be followed by any character in IdChars) and any subsequent
% whitespace.
%
:- pred keyword(string::in, string::in, src::in, unit::out,
ps::in, ps::out) is semidet.
% ikeyword(IdChars, Keyword, Src, _, !PS)
% Case-insensitive version of keyword/6.
% Only uppercase and lowercase letters in the ASCII range (A-Z, a-z)
% are compared case insensitively.
%
:- pred ikeyword(string::in, string::in, src::in, unit::out,
ps::in, ps::out) is semidet.
% identifier(InitIdChars, IdChars, Src, Identifier, !PS) matches the next
% identifier (result in Identifier) comprising a char from InitIdChars
% followed by zero or more chars from IdChars. Any subsequent whitespace
% is consumed.
%
:- pred identifier(string::in, string::in, src::in, string::out,
ps::in, ps::out) is semidet.
% Consume any whitespace (defined as a sequence of characters
% satisfying char.is_whitespace).
%
:- pred whitespace(src::in, unit::out,
ps::in, ps::out) is semidet.
%---------------------%
% Consume any input up to, and including, the next newline character
% marking the end of the current line.
%
:- pred skip_to_eol(src::in, unit::out,
ps::in, ps::out) is semidet.
% Succeed if we have reached the end of the input.
%
:- pred eof(src::in, unit::out, ps::in, ps::out) is semidet.
%---------------------%
% Parse a float literal matching [-][0-9]+[.][0-9]+([Ee][-+][0-9]+)?
% followed by any whitespace. The float_literal_as_string version simply
% returns the matched string. The float_literal version uses
% string.to_float to convert the output of float_literal_as_string; this
% may return an approximate answer since not all floating point numbers
% can be perfectly represented as Mercury floats.
%
:- pred float_literal_as_string(src::in, string::out,
ps::in, ps::out) is semidet.
:- pred float_literal(src::in, float::out,
ps::in, ps::out) is semidet.
% Parse an int literal matching [-][0-9]+, not followed by [.][0-9]+,
% followed by any whitespace. The int_literal_as_string version simply
% returns the matched string. The int_literal version uses string.to_int
% to convert the output of int_literal_as_string; this may fail if the
% number in question cannot be represented as a Mercury int.
%
:- pred int_literal_as_string(src::in, string::out,
ps::in, ps::out) is semidet.
:- pred int_literal(src::in, int::out,
ps::in, ps::out) is semidet.
% Parse an string literal. The string argument is the quote character.
% A backslash (\) character in the string makes the next character
% literal (e.g., for embedding quotes). These 'escaped' characters
% are included as-is in the result, along with the preceding backslash.
% Any following whitespace is also consumed.
%
:- pred string_literal(char::in, src::in, string::out,
ps::in, ps::out) is semidet.
%---------------------------------------------------------------------------%
% Each basic parser combinators has a version that has a separate state
% argument is threaded through the computation, for parsers that e.g.
% incrementally construct a symbol table.
% optional(P, Src, Result, !PS) returns Result = yes(X) if P(Src, X, !PS),
% or Result = no if P does not succeed.
%
:- pred optional(parser(T)::in(parser), src::in, maybe(T)::out,
ps::in, ps::out) is semidet.
% optional(P, Src, Result, !S, !PS) returns Result = yes(X)
% if P(Src, X, !S, !PS), or Result = no if P does not succeed.
%
:- pred optional(parser_with_state(T, S)::in(parser_with_state), src::in,
maybe(T)::out, S::in, S::out, ps::in, ps::out) is semidet.
%---------------------%
% zero_or_more(P, Src, Xs, !PS) returns the list of results Xs obtained
% by repeatedly applying P until P fails. The nth item in Xs is
% the result from the nth application of P.
%
:- pred zero_or_more(parser(T)::in(parser), src::in, list(T)::out,
ps::in, ps::out) is semidet.
% zero_or_more(P, Src, Xs, !S, !PS) returns the list of results Xs obtained
% by repeatedly applying P until P fails. The nth item in Xs is
% the result from the nth application of P.
%
:- pred zero_or_more(parser_with_state(T, S)::in(parser_with_state), src::in,
list(T)::out, S::in, S::out, ps::in, ps::out) is semidet.
%---------------------%
% one_or_more(P, Src, Xs, !PS) returns the list of results Xs obtained
% by repeatedly applying P until P fails. The nth item in Xs is
% the result from the nth application of P. P must succeed at least once.
%
:- pred one_or_more(parser(T)::in(parser), src::in, list(T)::out,
ps::in, ps::out) is semidet.
% one_or_more(P, Src, Xs, !S, !PS) returns the list of results Xs obtained
% by repeatedly applying P until P fails. The nth item in Xs is
% the result from the nth application of P. P must succeed at least once.
%
:- pred one_or_more(parser_with_state(T, S)::in(parser_with_state), src::in,
list(T)::out, S::in, S::out, ps::in, ps::out) is semidet.
%---------------------%
% brackets(L, R, P, Src, X, !PS) is equivalent to
% punct(L, Src, _, !PS), P(Src, X, !PS), punct(R, Src, _, !PS).
%
:- pred brackets(string::in, string::in, parser(T)::in(parser), src::in,
T::out, ps::in, ps::out) is semidet.
% brackets(L, R, P, Src, X, !S, !PS) is equivalent to
% punct(L, Src, _, !PS), P(Src, X, !S, !PS), punct(R, Src, _, !PS).
%
:- pred brackets(string::in, string::in,
parser_with_state(T, S)::in(parser_with_state), src::in,
T::out, S::in, S::out, ps::in, ps::out) is semidet.
%---------------------%
% separated_list(Separator, P, Src, Xs, !PS) is like
% zero_or_more(P, Src, Xs, !PS) except that successive applications of
% P must be separated by punct(Separator, Src, _, !PS).
%
:- pred separated_list(string::in, parser(T)::in(parser), src::in,
list(T)::out, ps::in, ps::out) is semidet.
% separated_list(Separator, P, Src, Xs, !S, !PS) is like
% zero_or_more(P, Src, Xs, !S, !PS) except that successive applications of
% P must be separated by punct(Separator, Src, _, !PS).
%
:- pred separated_list(string::in,
parser_with_state(T, S)::in(parser_with_state),
src::in, list(T)::out, S::in, S::out, ps::in, ps::out) is semidet.
%---------------------%
% comma_separated_list(P, Src, Xs) is the same as
% separated_list(",", P, Src, Xs).
%
:- pred comma_separated_list(parser(T)::in(parser), src::in, list(T)::out,
ps::in, ps::out) is semidet.
% comma_separated_list(P, Src, Xs, !S, !PS) is the same as
% separated_list(",", P, Src, Xs, !S, !PS).
%
:- pred comma_separated_list(parser_with_state(T, S)::in(parser_with_state),
src::in, list(T)::out, S::in, S::out, ps::in, ps::out) is semidet.
%---------------------------------------------------------------------------%
% Declaratively this predicate is equivalent to false. Operationally,
% it will record an error message that will be returned by parse/4
% if no further progress is made and then fail.
%
:- pred fail_with_message(string::in, src::in, T::out, ps::in, ps::out)
is semidet.
% As above, but use the given offset for the context of the message.
%
:- pred fail_with_message(string::in, int::in, src::in, T::out,
ps::in, ps::out) is semidet.
%---------------------------------------------------------------------------%
%---------------------------------------------------------------------------%
:- implementation.
:- import_module array.
:- import_module int.
:- import_module mutvar.
:- import_module string.
% The parser "state" is just the offset into the input string.
%
:- type ps == int.
:- type src
---> src(
input_length :: int,
input_string :: string,
skip_ws_pred :: skip_whitespace_pred,
furthest_offset :: mutvar(int),
% This mutable records the progress of the parser
% through the input string.
last_fail_message :: mutvar(fail_message_info)
% This mutable is used to record messages passed to
% fail_with_message and their context.
).
:- type fail_message_info
---> fail_message_info(int, maybe(string)).
%---------------------------------------------------------------------------%
parse(InputString, SkipWS, Parser, Result) :-
% This is pure, because it will always return the same results for
% the same inputs (the mutable in Src cannot be accessed outside
% of the promise_pure scope below).
promise_pure (
new_src_and_ps(InputString, SkipWS, Src, PS0),
( if Parser(Src, Val, PS0, _) then
Result = ok(Val)
else
impure get_mutvar(Src ^ last_fail_message, Info),
impure get_mutvar(Src ^ furthest_offset, FurthestOffset),
Info = fail_message_info(MessageOffset, LastFailMsg),
( if MessageOffset < FurthestOffset then
Msg = no,
Offset = FurthestOffset
else
Msg = LastFailMsg,
Offset = MessageOffset
),
offset_to_line_number_and_position(src_to_line_numbers(Src),
Offset, Line, Col),
Result0 = error(Msg, Line, Col),
% We make parse/4 cc_multi because declaratively
% parse(Str, SkipWS, Parser, error(MaybeMsg, Line, Col)) is true
% for all MaybeMsg, Line and Col iff
% new_src_and_ps(Str, SkipWS, Src, PS0),
% Parser(Src, _, PS0, _)
% is false, but operationally MaybeMsg, Line and Col are
% restricted to one value each.
cc_multi_equal(Result0, Result)
)
).
parse(InputString, Parser, Result) :-
parse(InputString, whitespace, Parser, Result).
%---------------------------------------------------------------------------%
new_src_and_ps(InputString, SkipWS, Src, PS) :-
promise_pure (
impure new_mutvar(fail_message_info(0, no), ErrorInfoMutVar),
impure new_mutvar(0, FurthestOffsetMutvar),
Src = src(string.length(InputString), InputString, SkipWS,
FurthestOffsetMutvar, ErrorInfoMutVar),
PS = 0
).
new_src_and_ps(InputString, Src, PS) :-
new_src_and_ps(InputString, whitespace, Src, PS).
%---------------------------------------------------------------------------%
input_string(Src, Src ^ input_string, Src ^ input_length).
current_offset(_Src, Offset, !PS) :-
Offset = !.PS.
get_skip_whitespace_pred(Src, SkipWS) :-
SkipWS0 = Src ^ skip_ws_pred,
unsafe_skip_ws_pred_cast(SkipWS0, SkipWS).
:- pred unsafe_skip_ws_pred_cast(skip_whitespace_pred::in,
skip_whitespace_pred::out(parser)) is det.
:- pragma foreign_proc("C",
unsafe_skip_ws_pred_cast(SkipWS0::in, SkipWS::out(parser)),
[will_not_call_mercury, promise_pure, thread_safe],
"
SkipWS = SkipWS0;
").
:- pragma foreign_proc("C#",
unsafe_skip_ws_pred_cast(SkipWS0::in, SkipWS::out(parser)),
[will_not_call_mercury, promise_pure, thread_safe],
"
SkipWS = SkipWS0;
").
:- pragma foreign_proc("Java",
unsafe_skip_ws_pred_cast(SkipWS0::in, SkipWS::out(parser)),
[will_not_call_mercury, promise_pure, thread_safe],
"
SkipWS = SkipWS0;
").
:- pragma foreign_proc("Erlang",
unsafe_skip_ws_pred_cast(SkipWS0::in, SkipWS::out(parser)),
[will_not_call_mercury, promise_pure, thread_safe],
"
SkipWS = SkipWS0
").
%---------------------------------------------------------------------------%
input_substring(Src, Start, EndPlusOne, Substring) :-
promise_pure (
EndPlusOne =< Src ^ input_length,
Substring = unsafe_between(Src ^ input_string, Start, EndPlusOne),
impure record_progress(Src, Start)
).
%---------------------------------------------------------------------------%
% For a source string Src, the following array contains the positions
% of all the newline characters in the string Src ++ "\n".
%
:- type line_numbers == array(int).
%---------------------%
src_to_line_numbers(Src) = LineNos :-
Str = Src ^ input_string,
src_to_line_numbers_loop(Str, 0, [], RevLineNosList),
LineNos = array.from_reverse_list(RevLineNosList).
:- pred src_to_line_numbers_loop(string::in, int::in,
list(int)::in, list(int)::out) is det.
src_to_line_numbers_loop(Str, Pos0, !RevLineNosList) :-
( if string.unsafe_index_next(Str, Pos0, Pos, Char) then
( if Char = '\n' then
!:RevLineNosList = [Pos0 | !.RevLineNosList]
else
true
),
src_to_line_numbers_loop(Str, Pos, !RevLineNosList)
else
!:RevLineNosList = [Pos0 | !.RevLineNosList]
).
%---------------------%
offset_to_line_number_and_position(LineNos, Offset, LineNo, Pos) :-
Lo = 0,
Hi = array.size(LineNos) - 1,
offset_to_line_number_and_position_2(LineNos, Lo, Hi, Offset, LineNo, Pos).
:- pred offset_to_line_number_and_position_2(line_numbers::in, int::in,
int::in, int::in, int::out, int::out) is det.
% Perform a binary search looking for the offset of the line number
% of the line containing Offset.
%
offset_to_line_number_and_position_2(LineNos, Lo, Hi, Offset, LineNo, Pos) :-
( if Lo < Hi then
Mid = (Lo + Hi) / 2,
MidOffset = LineNos ^ elem(Mid),
( if MidOffset < Offset then
offset_to_line_number_and_position_2(LineNos, Mid + 1, Hi, Offset,
LineNo, Pos)
else
offset_to_line_number_and_position_2(LineNos, Lo, Mid, Offset,
LineNo, Pos)
)
else
% Lo is the index of the newline that terminates the line that Offset
% is on. We compute LineBegin as the offset of the first character
% of the line Offset is on.
( if Lo = 0 then
LineBegin = 0
else
LineBegin = LineNos ^ elem(Lo - 1) + 1
),
LineNo = 1 + Lo,
Pos = 1 + Offset - LineBegin
).
%---------------------------------------------------------------------------%
next_char(Src, Char, !PS) :-
promise_pure (
current_offset(Src, Offset, !.PS, _),
Offset < Src ^ input_length,
string.unsafe_index_next(Src ^ input_string, Offset, NextOffset, Char),
impure record_progress(Src, Offset),
!:PS = NextOffset
).
next_char_no_progress(Src, Char, !PS) :-
current_offset(Src, Offset, !.PS, _),
Offset < Src ^ input_length,
string.unsafe_index_next(Src ^ input_string, Offset, NextOffset, Char),
!:PS = NextOffset.
%---------------------------------------------------------------------------%
char_in_class(CharClass, Src, Char, !PS) :-
next_char(Src, Char, !PS),
string.contains_char(CharClass, Char).
%---------------------------------------------------------------------------%
%---------------------------------------------------------------------------%
punct(Punct, Src, unit, !PS) :-
match_string(Punct, Src, !PS),
skip_whitespace(Src, !PS).
%---------------------------------------------------------------------------%
keyword(IdChars, Keyword, Src, unit, !PS) :-
match_string(Keyword, Src, !PS),
not char_in_class(IdChars, Src, _, !.PS, _),
skip_whitespace(Src, !PS).
ikeyword(IdChars, Keyword, Src, unit, !PS) :-
imatch_string(Keyword, Src, !PS),
not char_in_class(IdChars, Src, _, !.PS, _),
skip_whitespace(Src, !PS).
%---------------------------------------------------------------------------%
identifier(InitIdChars, IdChars, Src, Identifier, !PS) :-
current_offset(Src, Start, !PS),
char_in_class(InitIdChars, Src, _, !PS),
identifier_2(IdChars, Src, _, !PS),
current_offset(Src, EndPlusOne, !PS),
skip_whitespace(Src, !PS),
input_substring(Src, Start, EndPlusOne, Identifier).
:- pred identifier_2(string::in, src::in, unit::out,
ps::in, ps::out) is semidet.
identifier_2(IdChars, Src, unit, !PS) :-
( if char_in_class(IdChars, Src, _, !PS) then
identifier_2(IdChars, Src, _, !PS)
else
true
).
%---------------------------------------------------------------------------%
whitespace(Src, unit, !PS) :-
( if
next_char(Src, C, !PS),
char.is_whitespace(C)
then
whitespace(Src, _, !PS)
else
semidet_true
).
%---------------------------------------------------------------------------%
skip_to_eol(Src, unit, !PS) :-
next_char(Src, C, !PS),
( if C = ('\n') then true else skip_to_eol(Src, _, !PS) ).
%---------------------------------------------------------------------------%
eof(Src, unit, !PS) :-
current_offset(Src, Offset, !PS),
Offset = Src ^ input_length.
%---------------------------------------------------------------------------%
%---------------------------------------------------------------------------%
float_literal_as_string(Src, FloatStr, !PS) :-
current_offset(Src, Start, !PS),
( if next_char(Src, ('-'), !PS) then true else true ),
digits(10, Src, _, !PS),
next_char(Src, ('.'), !PS),
digits(10, Src, _, !PS),
( if char_in_class("eE", Src, _, !PS) then
optional_sign(Src, !PS),
digits(10, Src, _, !PS)
else
true
),
current_offset(Src, EndPlusOne, !PS),
skip_whitespace(Src, !PS),
input_substring(Src, Start, EndPlusOne, FloatStr).
:- pred optional_sign(src::in, ps::in, ps::out) is det.
optional_sign(Src, !PS) :-
( if
next_char(Src, Char, !PS),
( Char = ('-')
; Char = ('+')
)
then
true
else
true
).
float_literal(Src, Float, !PS) :-
float_literal_as_string(Src, FloatStr, !PS),
string.to_float(FloatStr, Float).
%---------------------------------------------------------------------------%
int_literal_as_string(Src, IntStr, !PS) :-
current_offset(Src, Start, !PS),
optional(char_in_class("-"), Src, _, !PS),
digits(10, Src, _, !PS),
not (
next_char(Src, ('.'), !PS),
digits(10, Src, _, !.PS, _)
),
current_offset(Src, EndPlusOne, !PS),
skip_whitespace(Src, !PS),
input_substring(Src, Start, EndPlusOne, IntStr).
int_literal(Src, Int, !PS) :-
int_literal_as_string(Src, IntStr, !PS),
string.to_int(IntStr, Int).
%---------------------------------------------------------------------------%
:- pred digits(int::in, src::in, unit::out, ps::in, ps::out) is semidet.
digits(Base, Src, unit, !PS) :-
next_char(Src, C, !PS),
char.is_base_digit(Base, C),
digits_2(Base, Src, _, !PS).
:- pred digits_2(int::in, src::in, unit::out, ps::in, ps::out) is semidet.
digits_2(Base, Src, unit, !PS) :-
( if
next_char(Src, C, !PS),
char.is_base_digit(Base, C)
then
digits_2(Base, Src, _, !PS)
else
true
).
%---------------------------------------------------------------------------%
string_literal(QuoteChar, Src, String, !PS) :-
next_char(Src, QuoteChar, !PS),
current_offset(Src, Start, !PS),
string_literal_2(Src, QuoteChar, _, !PS),
current_offset(Src, EndPlusOne, !PS),
string.unsafe_prev_index(Src ^ input_string, EndPlusOne, End, QuoteChar),
skip_whitespace(Src, !PS),
input_substring(Src, Start, End, String).
:- pred string_literal_2(src::in, char::in, unit::out,
ps::in, ps::out) is semidet.
string_literal_2(Src, QuoteChar, unit, !PS) :-
next_char(Src, C, !PS),
( if C = QuoteChar then
true
else if C = ('\\') then
next_char(Src, _, !PS),
string_literal_2(Src, QuoteChar, _, !PS)
else
string_literal_2(Src, QuoteChar, _, !PS)
).
%---------------------------------------------------------------------------%
%---------------------------------------------------------------------------%
optional(P, Src, Result, !PS) :-
( if P(Src, X, !PS) then
Result = yes(X)
else
Result = no,
semidet_true
).
%---------------------------------------------------------------------------%
optional(P, Src, Result, !S, !PS) :-
( if P(Src, X, !S, !PS) then
Result = yes(X)
else
Result = no,
semidet_true
).
%---------------------------------------------------------------------------%
zero_or_more(P, Src, Result, !PS) :-
zero_or_more_rev_acc(P, Src, [], RevResult, !PS),
Result = list.reverse(RevResult).
% We use an auxiliary predicate to make this tail recursive.
% This can be an issue with long sequences.
%
:- pred zero_or_more_rev_acc(parser(T)::in(parser), src::in,
list(T)::in, list(T)::out, ps::in, ps::out) is semidet.
zero_or_more_rev_acc(P, Src, !RevResult, !PS) :-
( if P(Src, X, !PS) then
list.cons(X, !RevResult),
zero_or_more_rev_acc(P, Src, !RevResult, !PS)
else
semidet_true
).
zero_or_more(P, Src, Result, !S, !PS) :-
zero_or_more_rev_acc(P, Src, [], RevResult, !S, !PS),
Result = list.reverse(RevResult).
:- pred zero_or_more_rev_acc(parser_with_state(T, S)::in(parser_with_state),
src::in, list(T)::in, list(T)::out, S::in, S::out, ps::in, ps::out)
is semidet.
zero_or_more_rev_acc(P, Src, !RevResult, !S, !PS) :-
( if P(Src, X, !S, !PS) then
list.cons(X, !RevResult),
zero_or_more_rev_acc(P, Src, !RevResult, !S, !PS)
else
semidet_true
).
%---------------------------------------------------------------------------%
one_or_more(P, Src, Result, !PS) :-
P(Src, X, !PS),
zero_or_more(P, Src, Xs, !PS),
Result = [X | Xs].
one_or_more(P, Src, Result, !S, !PS) :-
P(Src, X, !S, !PS),
zero_or_more(P, Src, Xs, !S, !PS),
Result = [X | Xs].
%---------------------------------------------------------------------------%
brackets(L, R, P, Src, Result, !PS) :-
punct(L, Src, _, !PS),
P(Src, Result, !PS),
punct(R, Src, _, !PS).
brackets(L, R, P, Src, Result, !S, !PS) :-
punct(L, Src, _, !PS),
P(Src, Result, !S, !PS),
punct(R, Src, _, !PS).
%---------------------------------------------------------------------------%
separated_list(Separator, P, Src, Result, !PS) :-
CommaP =
( pred(CommaPSrc::in, CommaPX::out, !.PS::in, !:PS::out) is semidet :-
punct(Separator, CommaPSrc, _, !PS),
P(CommaPSrc, CommaPX, !PS)
),
( if P(Src, X, !PS) then
zero_or_more(CommaP, Src, Xs, !PS),
Result = [X | Xs]
else
Result = []
).
separated_list(Separator, P, Src, Result, !S, !PS) :-
CommaP =
( pred(CommaPSrc::in, CommaPX::out, !.S::in, !:S::out,
!.PS::in, !:PS::out) is semidet :-
punct(Separator, CommaPSrc, _, !PS),
P(CommaPSrc, CommaPX, !S, !PS)
),
( if P(Src, X, !S, !PS) then
zero_or_more(CommaP, Src, Xs, !S, !PS),
Result = [X | Xs]
else
Result = []
).
%---------------------------------------------------------------------------%
comma_separated_list(P, Src, Result, !PS) :-
separated_list(",", P, Src, Result, !PS).
comma_separated_list(P, Src, Result, !S, !PS) :-
separated_list(",", P, Src, Result, !S, !PS).
%---------------------------------------------------------------------------%
fail_with_message(Msg, Src, Val, !PS) :-
% This is pure, because the mutable can only be accessed via
% the parse/4 predicate which will always return the same results
% for the same inputs.
promise_pure (
impure set_mutvar(Src ^ last_fail_message,
fail_message_info(!.PS, yes(Msg))),
impure set_mutvar(Src ^ furthest_offset, !.PS),
( if semidet_fail then
dynamic_cast(0, Val) % unreachable
else
fail
)
).
fail_with_message(Msg, Offset, Src, Val, _, PS) :-
fail_with_message(Msg, Src, Val, Offset, PS).
%---------------------------------------------------------------------------%
%---------------------------------------------------------------------------%
:- pred match_string(string::in, src::in, ps::in, ps::out) is semidet.
match_string(MatchStr, Src, PS, PS + N) :-
promise_pure (
impure record_progress(Src, PS),
N = string.length(MatchStr),
PS + N =< Src ^ input_length,
match_string_2(N, 0, MatchStr, PS, Src ^ input_string)
).
:- pred match_string_2(int::in, int::in, string::in, int::in, string::in)
is semidet.
match_string_2(N, I, MatchStr, Offset, Str) :-
( if I < N then
string.unsafe_index_code_unit(MatchStr, I, CodeUnit),
string.unsafe_index_code_unit(Str, Offset + I, CodeUnit),
match_string_2(N, I + 1, MatchStr, Offset, Str)
else
true
).
:- pred imatch_string(string::in, src::in, ps::in, ps::out) is semidet.
imatch_string(MatchStr, Src, PS, PS + N) :-
promise_pure (
impure record_progress(Src, PS),
N = string.length(MatchStr),
PS + N =< Src ^ input_length,
imatch_string_2(N, 0, MatchStr, PS, Src ^ input_string)
).
:- pred imatch_string_2(int::in, int::in, string::in, int::in, string::in)
is semidet.
imatch_string_2(N, I, MatchStr, Offset, Str) :-
( if I < N then
% We can compare by code units because char.to_upper only converts
% letters in the ASCII range, and ASCII characters are always encoded
% in a single code unit.
string.unsafe_index_code_unit(MatchStr, I, CodeUnit1),
string.unsafe_index_code_unit(Str, Offset + I, CodeUnit2),
char.det_from_int(CodeUnit1, Chr1),
char.det_from_int(CodeUnit2, Chr2),
char.to_upper(Chr1) = char.to_upper(Chr2) : char,
imatch_string_2(N, I + 1, MatchStr, Offset, Str)
else
true
).
%---------------------------------------------------------------------------%
:- pred skip_whitespace(src::in, ps::in, ps::out) is semidet.
skip_whitespace(Src, PS0, PS) :-
get_skip_whitespace_pred(Src, SkipWS),
SkipWS(Src, _, PS0, PS).
%---------------------------------------------------------------------------%
% Update the furthest_offset field if any progress has been made.
%
:- impure pred record_progress(src::in, ps::in) is det.
record_progress(Src, PS) :-
MutVar = Src ^ furthest_offset,
impure get_mutvar(MutVar, OS0),
( if PS > OS0 then
impure set_mutvar(MutVar, PS)
else
true
).
%---------------------------------------------------------------------------%
%---------------------------------------------------------------------------%