Files
mercury/library/lexer.m
2015-12-01 05:35:29 +11:00

2661 lines
91 KiB
Mathematica

%---------------------------------------------------------------------------%
% vim: ft=mercury ts=4 sw=4 et
%---------------------------------------------------------------------------%
% Copyright (C) 1993-2000, 2003-2008, 2011-2012 The University of Melbourne.
% This file may only be copied under the terms of the GNU Library General
% Public License - see the file COPYING.LIB in the Mercury distribution.
%---------------------------------------------------------------------------%
%
% File: lexer.m.
% Main author: fjh.
% Stability: high.
%
% Lexical analysis. This module defines the representation of tokens
% and exports predicates for reading in tokens from an input stream.
%
% See ISO Prolog 6.4. Also see the comments at the top of parser.m.
%
%---------------------------------------------------------------------------%
%---------------------------------------------------------------------------%
:- module lexer.
:- interface.
:- import_module char.
:- import_module io.
:- import_module integer.
%---------------------------------------------------------------------------%
:- type token
---> name(string)
; variable(string)
; integer(int)
; big_integer(integer_base, integer)
% An integer that is too big for `int'.
; float(float)
; string(string) % "...."
; implementation_defined(string) % $name
; open % '('
; open_ct % '(' without any preceding whitespace
; close % ')'
; open_list % '['
; close_list % ']'
; open_curly % '{'
; close_curly % '}'
; ht_sep % '|'
; comma % ','
; end % '.'
; junk(char) % junk character in the input stream
; error(string) % some other invalid token
; io_error(io.error) % error reading from the input stream
; eof % end-of-file
; integer_dot(int).
% The lexer will never return integer_dot. This token is used
% internally in the lexer, to keep the grammar LL(1) so that
% only one character of pushback is needed. But the lexer will
% convert integer_dot/1 tokens to integer/1 tokens before
% returning them.
:- type integer_base
---> base_2
; base_8
; base_10
; base_16.
% For every token, we record the line number of the line on
% which the token occurred.
%
:- type token_context == int. % line number
% This "fat list" representation is more efficient than a list of pairs.
%
:- type token_list
---> token_cons(token, token_context, token_list)
; token_nil.
% Read a list of tokens from the current input stream.
% Keep reading until we encounter either an `end' token
% (i.e. a full stop followed by whitespace) or the end-of-file.
%
:- pred get_token_list(token_list::out, io::di, io::uo) is det.
% The type `offset' represents a (zero-based) offset into a string.
%
:- type offset == int.
% string_get_token_list_max(String, MaxOffset, Tokens,
% InitialPos, FinalPos):
%
% Scan a list of tokens from a string, starting at the current offset
% specified by InitialPos. Keep scanning until either we encounter either
% an `end' token (i.e. a full stop followed by whitespace) or until we
% reach MaxOffset. (MaxOffset must be =< the length of the string.)
% Return the tokens scanned in Tokens, and return the position one
% character past the end of the last token in FinalPos.
%
:- pred string_get_token_list_max(string::in, offset::in, token_list::out,
posn::in, posn::out) is det.
% string_get_token_list(String, Tokens, InitialPos, FinalPos):
%
% calls string_get_token_list_max above with MaxPos = length of String.
%
:- pred string_get_token_list(string::in, token_list::out,
posn::in, posn::out) is det.
% Convert a token to a human-readable string describing the token.
%
:- pred token_to_string(token::in, string::out) is det.
%---------------------------------------------------------------------------%
%---------------------------------------------------------------------------%
:- implementation.
%---------------------------------------------------------------------------%
:- interface.
% graphic_token_char(Char): true iff `Char'
% is "graphic token char" (ISO Prolog 6.4.2).
% This is exported for use by term_io.quote_atom.
%
:- pred graphic_token_char(char::in) is semidet.
%---------------------------------------------------------------------------%
:- implementation.
:- import_module int.
:- import_module list.
:- import_module require.
:- import_module string.
% Note that there are two implementations of most predicates here:
% one that deals with strings, and another that deals with io.states.
% We can't write the io.state version in terms of the string version
% because we don't know how much string to slurp up until after we have
% lexically analysed it. Some interactive applications require the old
% Prolog behaviour of stopping after an end token (i.e. `.' plus whitespace)
% rather than reading in whole lines. Conversely, we can't write the string
% version using the io.state version, since that would require either
% cheating with the io.state or ruining the string interface.
%
% An alternative would be to write both versions in terms of a generic
% "char_stream" typeclass, with instances for io.states and for strings.
% However, for this to be acceptably efficient it would require the compiler
% to specialize the code, which currently (13 May 98) it is not capable
% of doing.
%
% In fact, the string version is still not as efficient as I would like.
% The compiler ought to (but currently doesn't) unfold all the instances
% of the `posn' type. We could do this type unfolding by hand, but
% it would be very tedious and it would make the code less readable.
% If and when there is compiler support for this, we should also think about
% moving the `String' and `Len' arguments into the posn (or making a new
% `lexer_state' struct which contains both the posn and the String and Len
% arguments).
%---------------------------------------------------------------------------%
token_to_string(Token, String) :-
(
Token = name(Name),
string.append_list(["token '", Name, "'"], String)
;
Token = variable(Var),
string.append_list(["variable `", Var, "'"], String)
;
Token = integer(Int),
string.int_to_string(Int, IntString),
string.append_list(["integer `", IntString, "'"], String)
;
Token = big_integer(Base, Integer),
(
Base = base_2,
BaseInt = 2,
Prefix = "0b"
;
Base = base_8,
BaseInt = 8,
Prefix = "0o"
;
Base = base_10,
BaseInt = 10,
Prefix = ""
;
Base = base_16,
BaseInt = 16,
Prefix = "0x"
),
IntString = integer.to_base_string(Integer, BaseInt),
string.append_list(["integer `", Prefix, IntString, "'"], String)
;
Token = float(Float),
string.float_to_string(Float, FloatString),
string.append_list(["float `", FloatString, "'"], String)
;
Token = string(TokenString),
string.append_list(["string """, TokenString, """"], String)
;
Token = implementation_defined(Name),
string.append_list(["implementation-defined `$", Name, "'"], String)
;
Token = open,
String = "token ` ('"
;
Token = open_ct,
String = "token `('"
;
Token = close,
String = "token `)'"
;
Token = open_list,
String = "token `['"
;
Token = close_list,
String = "token `]'"
;
Token = open_curly,
String = "token `{'"
;
Token = close_curly,
String = "token `}'"
;
Token = ht_sep,
String = "token `|'"
;
Token = comma,
String = "token `,'"
;
Token = end,
String = "token `. '"
;
Token = eof,
String = "end-of-file"
;
Token = junk(JunkChar),
char.to_int(JunkChar, Code),
string.int_to_base_string(Code, 16, Hex),
string.append_list(["illegal character <<0x", Hex, ">>"], String)
;
Token = io_error(IO_Error),
io.error_message(IO_Error, IO_ErrorMessage),
string.append("I/O error: ", IO_ErrorMessage, String)
;
Token = error(Message),
string.append_list(["illegal token (", Message, ")"], String)
;
Token = integer_dot(Int),
string.int_to_string(Int, IntString),
string.append_list(["integer `", IntString, "'."], String)
).
get_token_list(Tokens, !IO) :-
% We build the tokens up as lists of characters in reverse order.
% When we get to the end of each token, we call `rev_char_list_to_string/2'
% to convert that representation into a string.
%
% Comments of the form
% foo --> bar . baz
% mean that we are parsing a `foo', and we've already scanned past
% the `bar', so now we need to match with a `baz'.
io.input_stream(Stream, !IO),
get_token(Stream, Token, Context, !IO),
get_token_list_2(Stream, Token, Context, Tokens, !IO).
:- pred get_token_list_2(io.input_stream::in, token::in, token_context::in,
token_list::out, io::di, io::uo) is det.
get_token_list_2(Stream, Token0, Context0, Tokens, !IO) :-
(
Token0 = eof,
Tokens = token_nil
;
( Token0 = end
; Token0 = error(_)
; Token0 = io_error(_)
),
Tokens = token_cons(Token0, Context0, token_nil)
;
Token0 = integer_dot(Int),
get_context(Stream, Context1, !IO),
get_dot(Stream, Token1, !IO),
get_token_list_2(Stream, Token1, Context1, Tokens1, !IO),
Tokens = token_cons(integer(Int), Context0, Tokens1)
;
( Token0 = float(_)
; Token0 = string(_)
; Token0 = variable(_)
; Token0 = integer(_)
; Token0 = big_integer(_, _)
; Token0 = implementation_defined(_)
; Token0 = junk(_)
; Token0 = name(_)
; Token0 = open
; Token0 = open_ct
; Token0 = close
; Token0 = open_list
; Token0 = close_list
; Token0 = open_curly
; Token0 = close_curly
; Token0 = comma
; Token0 = ht_sep
),
get_token(Stream, Token1, Context1, !IO),
get_token_list_2(Stream, Token1, Context1, Tokens1, !IO),
Tokens = token_cons(Token0, Context0, Tokens1)
).
string_get_token_list(String, Tokens, !Posn) :-
string.length(String, Len),
string_get_token_list_max(String, Len, Tokens, !Posn).
string_get_token_list_max(String, Len, Tokens, !Posn) :-
string_get_token(String, Len, Token, Context, !Posn),
(
Token = eof,
Tokens = token_nil
;
( Token = end
; Token = error(_)
; Token = io_error(_)
),
Tokens = token_cons(Token, Context, token_nil)
;
( Token = float(_)
; Token = string(_)
; Token = variable(_)
; Token = integer(_)
; Token = big_integer(_, _)
; Token = integer_dot(_)
; Token = implementation_defined(_)
; Token = junk(_)
; Token = name(_)
; Token = open
; Token = open_ct
; Token = close
; Token = open_list
; Token = close_list
; Token = open_curly
; Token = close_curly
; Token = comma
; Token = ht_sep
),
Tokens = token_cons(Token, Context, Tokens1),
string_get_token_list_max(String, Len, Tokens1, !Posn)
).
%---------------------------------------------------------------------------%
%
% Some low-level routines.
%
:- pred get_context(io.input_stream::in, token_context::out, io::di, io::uo)
is det.
get_context(Stream, Context, !IO) :-
io.get_line_number(Stream, Context, !IO).
:- type string_token_context == token_context.
:- pred string_get_context(posn::in, string_token_context::out,
posn::in, posn::out) is det.
string_get_context(StartPosn, Context, !Posn) :-
StartPosn = posn(StartLineNum, _, _),
Context = StartLineNum.
% In future, we might want to modify this code to read something like this:
%
% posn_to_line_and_column(StartPosn, StartLineNum, StartColumn),
% posn_to_line_and_column(!.Posn, EndLineNum, EndColumn),
% Context = detailed(StartLine, StartColumn, EndLine, EndColumn).
:- pred string_read_char(string::in, int::in, char::out,
posn::in, posn::out) is semidet.
:- pragma inline(string_read_char/5).
string_read_char(String, Len, Char, Posn0, Posn) :-
Posn0 = posn(LineNum0, LineOffset0, Offset0),
Offset0 < Len,
string.unsafe_index_next(String, Offset0, Offset, Char),
( if Char = '\n' then
LineNum = LineNum0 + 1,
Posn = posn(LineNum, Offset, Offset)
else
Posn = posn(LineNum0, LineOffset0, Offset)
).
:- pred string_ungetchar(string::in, posn::in, posn::out) is det.
string_ungetchar(String, Posn0, Posn) :-
Posn0 = posn(LineNum0, LineOffset0, Offset0),
( if string.unsafe_prev_index(String, Offset0, Offset, Char) then
( if Char = '\n' then
LineNum = LineNum0 - 1,
Posn = posn(LineNum, Offset, Offset)
else
Posn = posn(LineNum0, LineOffset0, Offset)
)
else
Posn = Posn0
).
:- pred grab_string(string::in, posn::in, string::out,
posn::in, posn::out) is det.
grab_string(String, Posn0, SubString, Posn, Posn) :-
Posn0 = posn(_, _, Offset0),
Posn = posn(_, _, Offset),
string.unsafe_between(String, Offset0, Offset, SubString).
:- pred string_set_line_number(int::in, posn::in, posn::out) is det.
string_set_line_number(LineNumber, Posn0, Posn) :-
Posn0 = posn(_, _, Offset),
Posn = posn(LineNumber, Offset, Offset).
%---------------------------------------------------------------------------%
:- type get_token_action
---> action_whitespace
; action_alpha_lower
; action_alpha_upper_uscore
; action_zero
; action_nonzero_digit
; action_special_token
; action_dot
; action_percent
; action_quote
; action_slash
; action_hash
; action_backquote
; action_dollar
; action_graphic_token.
:- type scanned_past_whitespace
---> scanned_past_whitespace
; not_scanned_past_whitespace.
:- pred get_token(io.input_stream::in, token::out, token_context::out,
io::di, io::uo) is det.
get_token(Stream, Token, Context, !IO) :-
get_token_2(Stream, not_scanned_past_whitespace, Token, Context, !IO).
% If passed `scanned_past_whitespace' then we have already scanned past
% some whitespace, so '(' gets scanned as `open' rather than `open_ct'.
%
% `get_token_2' must be inlined into `execute_get_token_action' so that
% the recursive call can be compiled to a loop on backends that cannot
% eliminate tail calls in general.
%
:- pragma inline(get_token_2/6).
:- pred get_token_2(io.input_stream::in, scanned_past_whitespace::in,
token::out, token_context::out, io::di, io::uo) is det.
get_token_2(Stream, ScannedPastWhiteSpace, Token, Context, !IO) :-
io.read_char_unboxed(Stream, Result, Char, !IO),
(
Result = error(Error),
get_context(Stream, Context, !IO),
Token = io_error(Error)
;
Result = eof,
get_context(Stream, Context, !IO),
Token = eof
;
Result = ok,
( if lookup_token_action(Char, Action) then
execute_get_token_action(Stream, Char, Action,
ScannedPastWhiteSpace, Token, Context, !IO)
else
get_context(Stream, Context, !IO),
Token = junk(Char)
)
).
:- pred string_get_token(string::in, int::in, token::out,
token_context::out, posn::in, posn::out) is det.
string_get_token(String, Len, Token, Context, !Posn) :-
string_get_token_2(String, Len, not_scanned_past_whitespace,
Token, Context, !Posn).
:- pragma inline(string_get_token_2/7). % see get_token_2
:- pred string_get_token_2(string::in, int::in, scanned_past_whitespace::in,
token::out, token_context::out, posn::in, posn::out) is det.
string_get_token_2(String, Len, ScannedPastWhiteSpace, Token, Context, !Posn)
:-
Posn0 = !.Posn,
( if string_read_char(String, Len, Char, !Posn) then
( if lookup_token_action(Char, Action) then
execute_string_get_token_action(String, Len, Posn0, Char, Action,
ScannedPastWhiteSpace, Token, Context, !Posn)
else
string_get_context(Posn0, Context, !Posn),
Token = junk(Char)
)
else
string_get_context(Posn0, Context, !Posn),
Token = eof
).
% Decide on how the given character should be treated. Note that
% performance suffers significantly if this predicate is not inlined.
%
:- pred lookup_token_action(char::in, get_token_action::out) is semidet.
:- pragma inline(lookup_token_action/2).
lookup_token_action(Char, Action) :-
% The body of this predicate should be turned into a single table lookup
% by the compiler.
(
% This list of characters comes from the code of char.is_whitespace.
% Any update here will also require an update there.
( Char = ' '
; Char = '\t'
; Char = '\n'
; Char = '\r'
; Char = '\f'
; Char = '\v'
),
Action = action_whitespace
;
% This list of characters comes from char.is_alnum_or_underscore and
% char.lower_upper.
( Char = 'a' ; Char = 'b' ; Char = 'c' ; Char = 'd'
; Char = 'e' ; Char = 'f' ; Char = 'g' ; Char = 'h'
; Char = 'i' ; Char = 'j' ; Char = 'k' ; Char = 'l'
; Char = 'm' ; Char = 'n' ; Char = 'o' ; Char = 'p'
; Char = 'q' ; Char = 'r' ; Char = 's' ; Char = 't'
; Char = 'u' ; Char = 'v' ; Char = 'w' ; Char = 'x'
; Char = 'y' ; Char = 'z'
),
Action = action_alpha_lower
;
% This list of characters comes from char.is_alnum_or_underscore and
% char.lower_upper.
( Char = '_'
; Char = 'A' ; Char = 'B' ; Char = 'C' ; Char = 'D'
; Char = 'E' ; Char = 'F' ; Char = 'G' ; Char = 'H'
; Char = 'I' ; Char = 'J' ; Char = 'K' ; Char = 'L'
; Char = 'M' ; Char = 'N' ; Char = 'O' ; Char = 'P'
; Char = 'Q' ; Char = 'R' ; Char = 'S' ; Char = 'T'
; Char = 'U' ; Char = 'V' ; Char = 'W' ; Char = 'X'
; Char = 'Y' ; Char = 'Z'
),
Action = action_alpha_upper_uscore
;
Char = '0',
Action = action_zero
;
% This list of characters comes from char.is_alnum_or_underscore and
% char.is_digit.
( Char = '1' ; Char = '2' ; Char = '3' ; Char = '4'
; Char = '5' ; Char = '6' ; Char = '7' ; Char = '8'
; Char = '9'
),
Action = action_nonzero_digit
;
% These are the characters for which special_token succeeds.
( Char = ('(')
; Char = (')')
; Char = ('[')
; Char = (']')
; Char = ('{')
; Char = ('}')
; Char = ('|')
; Char = (',')
; Char = (';')
),
Action = action_special_token
;
Char = ('.'),
Action = action_dot
;
Char = ('%'),
Action = action_percent
;
( Char = '"'
; Char = ''''
),
Action = action_quote
;
Char = ('/'),
Action = action_slash
;
Char = ('#'),
Action = action_hash
;
Char = ('`'),
Action = action_backquote
;
Char = ('$'),
Action = action_dollar
;
% These are the characters for which graphic_token_char succeeds.
% The ones that are commented out have their own actions.
( Char = ('!')
% ; Char = ('#') handled as action_hash
% ; Char = ('$') handled as action_dollar
; Char = ('&')
; Char = ('*')
; Char = ('+')
; Char = ('-')
% ; Char = ('.') handled as action_dot
% ; Char = ('/') handled as action_slash
; Char = (':')
; Char = ('<')
; Char = ('=')
; Char = ('>')
; Char = ('?')
; Char = ('@')
; Char = ('^')
; Char = ('~')
; Char = ('\\')
),
Action = action_graphic_token
).
%---------------------------------------------------------------------------%
% Some descendant predicates of `execute_get_token_action' have the job of
% consuming input that does not correspond to a token, e.g. skip_to_eol
% skips to the end of line and does not produce a token unless it
% encounters the end-of-file or an I/O error.
%
% If a descendant predicate does not produce a token, then it must return
% an indication back to `execute_get_token_action' that it did not, then
% `execute_get_token_action' will call itself recursively to get the next
% token.
%
% An alternative would be for the descendant predicate which has not
% produced a token to call `execute_get_token_action' (indirectly) to get
% the next token. However, `execute_get_token_action' calling itself is
% preferable as the direct recursion can be compiled to a loop by backends
% that cannot otherwise eliminate tail calls.
%
% We would like to define a type to represent token values being produced
% or not:
%
% :- type maybe_token
% ---> yes(token, token_context)
% ; no.
%
% but the heap allocation required to return "yes(Token, Context)" would be
% a significant overhead. Instead, each predicate that might not produce a
% token returns two values, of type `token' and `maybe_have_valid_token'
% (below).
%
% If the predicate does produce a token then it returns the token and the
% context. This corresponds to the "yes(Token, Context)" case.
%
% If the predicate does not produce a token then it returns a dummy token
% value (that must be ignored) and an invalid context, i.e. one for which
% have_token_with_context fails. This corresponds to the "no" case.
%
:- type maybe_have_valid_token
---> maybe_have_valid_token(token_context).
:- pred have_token(io.input_stream::in, maybe_have_valid_token::out,
io::di, io::uo) is det.
have_token(Stream, maybe_have_valid_token(Context), !IO) :-
get_context(Stream, Context, !IO).
:- pred string_have_token(posn::in, maybe_have_valid_token::out,
posn::in, posn::out) is det.
string_have_token(Posn0, maybe_have_valid_token(Context), !Posn) :-
string_get_context(Posn0, Context, !Posn).
:- pred do_not_have_token(token::out, maybe_have_valid_token::out) is det.
do_not_have_token(Token, HaveToken) :-
Token = eof, % dummy
HaveToken = maybe_have_valid_token(-1). % invalid context
:- pred have_token_with_context(maybe_have_valid_token::in, token_context::out)
is semidet.
have_token_with_context(maybe_have_valid_token(Context), Context) :-
Context \= -1.
%---------------------------------------------------------------------------%
% Handle the character we just read the way lookup_token_action decided
% it should be treated. Note that inlining this predicate does not
% significantly affect performance.
%
:- pred execute_get_token_action(io.input_stream::in, char::in,
get_token_action::in, scanned_past_whitespace::in, token::out,
token_context::out, io::di, io::uo) is det.
% :- pragma inline(execute_get_token_action/8).
execute_get_token_action(Stream, Char, Action, ScannedPastWhiteSpace,
Token, Context, !IO) :-
(
Action = action_whitespace,
get_token_2(Stream, scanned_past_whitespace, Token, Context, !IO)
;
Action = action_alpha_upper_uscore,
get_context(Stream, Context, !IO),
get_variable(Stream, [Char], Token, !IO)
;
Action = action_alpha_lower,
get_context(Stream, Context, !IO),
get_name(Stream, [Char], Token, !IO)
;
Action = action_zero,
get_context(Stream, Context, !IO),
get_zero(Stream, Token, !IO)
;
Action = action_nonzero_digit,
get_context(Stream, Context, !IO),
get_number(Stream, [Char], Token, !IO)
;
Action = action_special_token,
get_context(Stream, Context, !IO),
handle_special_token(Char, ScannedPastWhiteSpace, Token)
;
Action = action_dot,
get_context(Stream, Context, !IO),
get_dot(Stream, Token, !IO)
;
Action = action_quote,
get_context(Stream, Context, !IO),
start_quoted_name(Stream, Char, [], Token, !IO)
;
(
Action = action_percent,
skip_to_eol(Stream, Token0, HaveToken0, !IO)
;
Action = action_slash,
get_slash(Stream, Token0, HaveToken0, !IO)
),
( if have_token_with_context(HaveToken0, Context0) then
Token = Token0,
Context = Context0
else
get_token_2(Stream, scanned_past_whitespace, Token, Context, !IO)
)
;
Action = action_hash,
get_source_line_number(Stream, [], Token0, HaveToken0, !IO),
( if have_token_with_context(HaveToken0, Context0) then
Token = Token0,
Context = Context0
else
get_token_2(Stream, not_scanned_past_whitespace, Token, Context,
!IO)
)
;
Action = action_backquote,
get_context(Stream, Context, !IO),
Token = name("`")
;
Action = action_dollar,
get_context(Stream, Context, !IO),
get_implementation_defined_literal_rest(Stream, Token, !IO)
;
Action = action_graphic_token,
get_context(Stream, Context, !IO),
get_graphic(Stream, [Char], Token, !IO)
).
% The string version of execute_get_token_action.
%
:- pred execute_string_get_token_action(string::in, int::in, posn::in,
char::in, get_token_action::in, scanned_past_whitespace::in, token::out,
token_context::out, posn::in, posn::out) is det.
% :- pragma inline(execute_string_get_token_action/10).
execute_string_get_token_action(String, Len, Posn0, Char, Action,
ScannedPastWhiteSpace, Token, Context, !Posn) :-
(
Action = action_whitespace,
string_get_token_2(String, Len, scanned_past_whitespace,
Token, Context, !Posn)
;
Action = action_alpha_upper_uscore,
string_get_variable(String, Len, Posn0, Token, Context, !Posn)
;
Action = action_alpha_lower,
string_get_name(String, Len, Posn0, Token, Context, !Posn)
;
Action = action_zero,
string_get_zero(String, Len, Posn0, Token, Context, !Posn)
;
Action = action_nonzero_digit,
string_get_number(String, Len, Posn0, Token, Context, !Posn)
;
Action = action_special_token,
string_get_context(Posn0, Context, !Posn),
handle_special_token(Char, ScannedPastWhiteSpace, Token)
;
Action = action_dot,
string_get_dot(String, Len, Posn0, Token, Context, !Posn)
;
Action = action_quote,
string_start_quoted_name(String, Len, Char, [], Posn0, Token,
Context, !Posn)
;
(
Action = action_percent,
string_skip_to_eol(String, Len, Token0, HaveToken0, !Posn)
;
Action = action_slash,
string_get_slash(String, Len, Posn0, Token0, HaveToken0, !Posn)
),
( if have_token_with_context(HaveToken0, Context0) then
Token = Token0,
Context = Context0
else
string_get_token_2(String, Len, scanned_past_whitespace,
Token, Context, !Posn)
)
;
Action = action_hash,
string_get_source_line_number(String, Len, !.Posn, Token0, HaveToken0,
!Posn),
( if have_token_with_context(HaveToken0, Context0) then
Token = Token0,
Context = Context0
else
string_get_token_2(String, Len, not_scanned_past_whitespace,
Token, Context, !Posn)
)
;
Action = action_backquote,
string_get_context(Posn0, Context, !Posn),
Token = name("`")
;
Action = action_dollar,
string_get_implementation_defined_literal_rest(String, Len, Posn0,
Token, Context, !Posn)
;
Action = action_graphic_token,
string_get_graphic(String, Len, Posn0, Token, Context, !Posn)
).
%---------------------------------------------------------------------------%
% Decide what to do for a token which consists of a special character.
% The reason for inlining this predicate is that each caller has a
% specific value for ScannedPastWhiteSpace, and thus after inlining,
% the compiler should be able to eliminate the switch on
% ScannedPastWhiteSpace.
%
:- pred handle_special_token(char::in, scanned_past_whitespace::in, token::out)
is det.
:- pragma inline(handle_special_token/3).
handle_special_token(Char, ScannedPastWhiteSpace, Token) :-
( if special_token(Char, SpecialToken) then
(
ScannedPastWhiteSpace = not_scanned_past_whitespace,
( if SpecialToken = open then
Token = open_ct
else
Token = SpecialToken
)
;
ScannedPastWhiteSpace = scanned_past_whitespace,
Token = SpecialToken
)
else
error("lexer.m, handle_special_token: unknown special token")
).
:- pred special_token(char::in, token::out) is semidet.
% The list of characters here is duplicated in lookup_token_action above.
special_token('(', open). % May get converted to open_ct above.
special_token(')', close).
special_token('[', open_list).
special_token(']', close_list).
special_token('{', open_curly).
special_token('}', close_curly).
special_token('|', ht_sep).
special_token(',', comma).
special_token(';', name(";")).
% The list of characters here is duplicated in lookup_token_action above.
graphic_token_char('!').
graphic_token_char('#').
graphic_token_char('$').
graphic_token_char('&').
graphic_token_char('*').
graphic_token_char('+').
graphic_token_char('-').
graphic_token_char('.').
graphic_token_char('/').
graphic_token_char(':').
graphic_token_char('<').
graphic_token_char('=').
graphic_token_char('>').
graphic_token_char('?').
graphic_token_char('@').
graphic_token_char('^').
graphic_token_char('~').
graphic_token_char('\\').
%---------------------------------------------------------------------------%
:- pred get_dot(io.input_stream::in, token::out, io::di, io::uo) is det.
get_dot(Stream, Token, !IO) :-
io.read_char_unboxed(Stream, Result, Char, !IO),
(
Result = error(Error),
Token = io_error(Error)
;
Result = eof,
Token = end
;
Result = ok,
( if whitespace_after_dot(Char) then
io.putback_char(Stream, Char, !IO),
Token = end
else if graphic_token_char(Char) then
get_graphic(Stream, [Char, '.'], Token, !IO)
else
io.putback_char(Stream, Char, !IO),
Token = name(".")
)
).
:- pred string_get_dot(string::in, int::in, posn::in, token::out,
string_token_context::out, posn::in, posn::out) is det.
string_get_dot(String, Len, Posn0, Token, Context, !Posn) :-
( if string_read_char(String, Len, Char, !Posn) then
( if whitespace_after_dot(Char) then
string_ungetchar(String, !Posn),
string_get_context(Posn0, Context, !Posn),
Token = end
else if graphic_token_char(Char) then
string_get_graphic(String, Len, Posn0, Token, Context, !Posn)
else
string_ungetchar(String, !Posn),
string_get_context(Posn0, Context, !Posn),
Token = name(".")
)
else
string_get_context(Posn0, Context, !Posn),
Token = end
).
:- pred whitespace_after_dot(char::in) is semidet.
whitespace_after_dot(Char) :-
( char.is_whitespace(Char)
; Char = '%'
).
%---------------------------------------------------------------------------%
%
% Comments.
%
:- pred skip_to_eol(io.input_stream::in, token::out,
maybe_have_valid_token::out, io::di, io::uo) is det.
skip_to_eol(Stream, Token, HaveToken, !IO) :-
io.read_char_unboxed(Stream, Result, Char, !IO),
(
Result = error(Error),
have_token(Stream, HaveToken, !IO),
Token = io_error(Error)
;
Result = eof,
have_token(Stream, HaveToken, !IO),
Token = eof
;
Result = ok,
( if Char = '\n' then
do_not_have_token(Token, HaveToken)
else
skip_to_eol(Stream, Token, HaveToken, !IO)
)
).
:- pred string_skip_to_eol(string::in, int::in, token::out,
maybe_have_valid_token::out, posn::in, posn::out) is det.
string_skip_to_eol(String, Len, Token, HaveToken, !Posn) :-
( if string_read_char(String, Len, Char, !Posn) then
( if Char = '\n' then
do_not_have_token(Token, HaveToken)
else
string_skip_to_eol(String, Len, Token, HaveToken, !Posn)
)
else
string_have_token(!.Posn, HaveToken, !Posn),
Token = eof
).
:- pred get_slash(io.input_stream::in, token::out, maybe_have_valid_token::out,
io::di, io::uo) is det.
get_slash(Stream, Token, HaveToken, !IO) :-
io.read_char_unboxed(Stream, Result, Char, !IO),
(
Result = error(Error),
have_token(Stream, HaveToken, !IO),
Token = io_error(Error)
;
Result = eof,
have_token(Stream, HaveToken, !IO),
Token = name("/")
;
Result = ok,
( if Char = ('*') then
get_comment(Stream, Token, HaveToken, !IO)
else if graphic_token_char(Char) then
get_graphic(Stream, [Char, '/'], Token, !IO),
have_token(Stream, HaveToken, !IO)
else
io.putback_char(Stream, Char, !IO),
have_token(Stream, HaveToken, !IO),
Token = name("/")
)
).
:- pred string_get_slash(string::in, int::in, posn::in, token::out,
maybe_have_valid_token::out, posn::in, posn::out) is det.
string_get_slash(String, Len, Posn0, Token, HaveToken, !Posn) :-
( if string_read_char(String, Len, Char, !Posn) then
( if Char = ('*') then
string_get_comment(String, Len, Posn0, Token, HaveToken, !Posn)
else if graphic_token_char(Char) then
string_get_graphic(String, Len, Posn0, Token, Context, !Posn),
HaveToken = maybe_have_valid_token(Context)
else
string_ungetchar(String, !Posn),
string_have_token(Posn0, HaveToken, !Posn),
Token = name("/")
)
else
string_have_token(Posn0, HaveToken, !Posn),
Token = name("/")
).
:- pred get_comment(io.input_stream::in, token::out,
maybe_have_valid_token::out, io::di, io::uo) is det.
get_comment(Stream, Token, HaveToken, !IO) :-
io.read_char_unboxed(Stream, Result, Char, !IO),
(
Result = error(Error),
have_token(Stream, HaveToken, !IO),
Token = io_error(Error)
;
Result = eof,
have_token(Stream, HaveToken, !IO),
Token = error("unterminated '/*' comment")
;
Result = ok,
( if Char = ('*') then
get_comment_2(Stream, Token, HaveToken, !IO)
else
get_comment(Stream, Token, HaveToken, !IO)
)
).
:- pred string_get_comment(string::in, int::in, posn::in, token::out,
maybe_have_valid_token::out, posn::in, posn::out) is det.
string_get_comment(String, Len, Posn0, Token, HaveToken, !Posn) :-
( if string_read_char(String, Len, Char, !Posn) then
( if Char = ('*') then
string_get_comment_2(String, Len, Posn0, Token, HaveToken, !Posn)
else
string_get_comment(String, Len, Posn0, Token, HaveToken, !Posn)
)
else
string_have_token(Posn0, HaveToken, !Posn),
Token = error("unterminated '/*' comment")
).
:- pred get_comment_2(io.input_stream::in, token::out,
maybe_have_valid_token::out, io::di, io::uo) is det.
get_comment_2(Stream, Token, HaveToken, !IO) :-
io.read_char_unboxed(Stream, Result, Char, !IO),
(
Result = error(Error),
have_token(Stream, HaveToken, !IO),
Token = io_error(Error)
;
Result = eof,
have_token(Stream, HaveToken, !IO),
Token = error("unterminated '/*' comment")
;
Result = ok,
( if Char = ('/') then
% end of /* ... */ comment, so get next token
do_not_have_token(Token, HaveToken)
else if Char = ('*') then
get_comment_2(Stream, Token, HaveToken, !IO)
else
get_comment(Stream, Token, HaveToken, !IO)
)
).
:- pred string_get_comment_2(string::in, int::in, posn::in, token::out,
maybe_have_valid_token::out, posn::in, posn::out) is det.
string_get_comment_2(String, Len, Posn0, Token, HaveToken, !Posn) :-
( if string_read_char(String, Len, Char, !Posn) then
( if Char = ('/') then
% end of /* ... */ comment, so get next token
do_not_have_token(Token, HaveToken)
else if Char = ('*') then
string_get_comment_2(String, Len, Posn0, Token, HaveToken, !Posn)
else
string_get_comment(String, Len, Posn0, Token, HaveToken, !Posn)
)
else
string_have_token(Posn0, HaveToken, !Posn),
Token = error("unterminated '/*' comment")
).
%---------------------------------------------------------------------------%
%
% Quoted names and quoted strings.
%
:- pred start_quoted_name(io.input_stream::in, char::in, list(char)::in,
token::out, io::di, io::uo) is det.
start_quoted_name(Stream, QuoteChar, !.RevChars, Token, !IO) :-
get_quoted_name(Stream, QuoteChar, !.RevChars, Token0, !IO),
( if Token0 = error(_) then
% Skip to the end of the string or name.
start_quoted_name(Stream, QuoteChar, !.RevChars, _, !IO),
Token = Token0
else if Token0 = eof then
Token = error("unterminated quote")
else
Token = Token0
).
:- pred string_start_quoted_name(string::in, int::in, char::in,
list(char)::in, posn::in, token::out, string_token_context::out,
posn::in, posn::out) is det.
string_start_quoted_name(String, Len, QuoteChar, !.RevChars, Posn0,
Token, Context, !Posn) :-
string_get_quoted_name(String, Len, QuoteChar, !.RevChars, Posn0,
Token0, Context, !Posn),
( if Token0 = error(_) then
% Skip to the end of the string or name.
string_start_quoted_name(String, Len, QuoteChar, !.RevChars,
Posn0, _, _, !Posn),
Token = Token0
else if Token0 = eof then
Token = error("unterminated quote")
else
Token = Token0
).
:- pred get_quoted_name(io.input_stream::in, char::in, list(char)::in,
token::out, io::di, io::uo) is det.
get_quoted_name(Stream, QuoteChar, !.RevChars, Token, !IO) :-
io.read_char_unboxed(Stream, Result, Char, !IO),
(
Result = error(Error),
Token = io_error(Error)
;
Result = eof,
Token = eof
;
Result = ok,
( if Char = QuoteChar then
get_quoted_name_quote(Stream, QuoteChar, !.RevChars, Token, !IO)
else if Char = ('\\') then
get_quoted_name_escape(Stream, QuoteChar, !.RevChars, Token, !IO)
else
!:RevChars = [Char | !.RevChars],
get_quoted_name(Stream, QuoteChar, !.RevChars, Token, !IO)
)
).
:- pred string_get_quoted_name(string::in, int::in, char::in,
list(char)::in, posn::in, token::out, string_token_context::out,
posn::in, posn::out) is det.
string_get_quoted_name(String, Len, QuoteChar, !.RevChars,
Posn0, Token, Context, !Posn) :-
( if string_read_char(String, Len, Char, !Posn) then
( if Char = QuoteChar then
string_get_quoted_name_quote(String, Len, QuoteChar, !.RevChars,
Posn0, Token, Context, !Posn)
else if Char = ('\\') then
string_get_quoted_name_escape(String, Len, QuoteChar, !.RevChars,
Posn0, Token, Context, !Posn)
else
!:RevChars = [Char | !.RevChars],
string_get_quoted_name(String, Len, QuoteChar, !.RevChars,
Posn0, Token, Context, !Posn)
)
else
string_get_context(Posn0, Context, !Posn),
Token = eof
).
:- pred get_quoted_name_quote(io.input_stream::in, char::in, list(char)::in,
token::out, io::di, io::uo) is det.
get_quoted_name_quote(Stream, QuoteChar, !.RevChars, Token, !IO) :-
io.read_char_unboxed(Stream, Result, Char, !IO),
(
Result = error(Error),
Token = io_error(Error)
;
Result = eof,
finish_quoted_name(QuoteChar, !.RevChars, Token)
;
Result = ok,
( if Char = QuoteChar then
!:RevChars = [Char | !.RevChars],
get_quoted_name(Stream, QuoteChar, !.RevChars, Token, !IO)
else
io.putback_char(Stream, Char, !IO),
finish_quoted_name(QuoteChar, !.RevChars, Token)
)
).
:- pred string_get_quoted_name_quote(string::in, int::in, char::in,
list(char)::in, posn::in, token::out, string_token_context::out,
posn::in, posn::out) is det.
string_get_quoted_name_quote(String, Len, QuoteChar, !.RevChars,
Posn0, Token, Context, !Posn) :-
( if string_read_char(String, Len, Char, !Posn) then
( if Char = QuoteChar then
!:RevChars = [Char | !.RevChars],
string_get_quoted_name(String, Len, QuoteChar, !.RevChars,
Posn0, Token, Context, !Posn)
else
string_ungetchar(String, !Posn),
string_get_context(Posn0, Context, !Posn),
finish_quoted_name(QuoteChar, !.RevChars, Token)
)
else
string_get_context(Posn0, Context, !Posn),
finish_quoted_name(QuoteChar, !.RevChars, Token)
).
:- pred finish_quoted_name(char::in, list(char)::in, token::out) is det.
finish_quoted_name(QuoteChar, RevChars, Token) :-
( if rev_char_list_to_string(RevChars, String) then
( if QuoteChar = '''' then
Token = name(String)
else if QuoteChar = '"' then
Token = string(String)
else
error("lexer.m: unknown quote character")
)
else
Token = error("invalid character in quoted name")
).
:- pred get_quoted_name_escape(io.input_stream::in, char::in, list(char)::in,
token::out, io::di, io::uo) is det.
get_quoted_name_escape(Stream, QuoteChar, !.RevChars, Token, !IO) :-
io.read_char_unboxed(Stream, Result, Char, !IO),
(
Result = error(Error),
Token = io_error(Error)
;
Result = eof,
Token = eof
;
Result = ok,
( if Char = '\n' then
get_quoted_name(Stream, QuoteChar, !.RevChars, Token, !IO)
else if Char = '\r' then
% Files created on Windows may have an extra return character.
get_quoted_name_escape(Stream, QuoteChar, !.RevChars, Token, !IO)
else if escape_char(Char, EscapedChar) then
!:RevChars = [EscapedChar | !.RevChars],
get_quoted_name(Stream, QuoteChar, !.RevChars, Token, !IO)
else if Char = 'x' then
get_hex_escape(Stream, QuoteChar, !.RevChars, [], Token, !IO)
else if Char = 'u' then
get_unicode_escape(Stream, 4, QuoteChar, !.RevChars, [],
Token, !IO)
else if Char = 'U' then
get_unicode_escape(Stream, 8, QuoteChar, !.RevChars, [],
Token, !IO)
else if char.is_octal_digit(Char) then
get_octal_escape(Stream, QuoteChar, !.RevChars, [Char], Token, !IO)
else
Token = error("invalid escape character")
)
).
:- pred string_get_quoted_name_escape(string::in, int::in, char::in,
list(char)::in, posn::in, token::out, string_token_context::out,
posn::in, posn::out) is det.
string_get_quoted_name_escape(String, Len, QuoteChar, !.RevChars, Posn0,
Token, Context, !Posn) :-
( if string_read_char(String, Len, Char, !Posn) then
( if Char = '\n' then
string_get_quoted_name(String, Len, QuoteChar,
!.RevChars, Posn0, Token, Context, !Posn)
else if Char = '\r' then
% Files created on Windows may have an extra return character.
string_get_quoted_name_escape(String, Len, QuoteChar,
!.RevChars, Posn0, Token, Context, !Posn)
else if escape_char(Char, EscapedChar) then
!:RevChars = [EscapedChar | !.RevChars],
string_get_quoted_name(String, Len, QuoteChar,
!.RevChars, Posn0, Token, Context, !Posn)
else if Char = 'x' then
string_get_hex_escape(String, Len, QuoteChar,
!.RevChars, [], Posn0, Token, Context, !Posn)
else if Char = 'u' then
string_get_unicode_escape(4, String, Len, QuoteChar,
!.RevChars, [], Posn0, Token, Context, !Posn)
else if Char = 'U' then
string_get_unicode_escape(8, String, Len, QuoteChar,
!.RevChars, [], Posn0, Token, Context, !Posn)
else if char.is_octal_digit(Char) then
string_get_octal_escape(String, Len, QuoteChar,
!.RevChars, [Char], Posn0, Token, Context, !Posn)
else
string_get_context(!.Posn, Context, !Posn),
Token = error("invalid escape character")
)
else
string_get_context(Posn0, Context, !Posn),
Token = eof
).
:- pred escape_char(char::in, char::out) is semidet.
escape_char('a', '\a').
escape_char('b', '\b').
escape_char('r', '\r').
escape_char('f', '\f').
escape_char('t', '\t').
escape_char('n', '\n').
escape_char('v', '\v').
escape_char('\\', '\\').
escape_char('''', '''').
escape_char('"', '"').
escape_char('`', '`').
:- pred get_unicode_escape(io.input_stream::in, int::in, char::in,
list(char)::in, list(char)::in, token::out, io::di, io::uo) is det.
get_unicode_escape(Stream, NumHexChars, QuoteChar, !.RevChars, !.RevHexChars,
Token, !IO) :-
( if NumHexChars = list.length(!.RevHexChars) then
( if
rev_char_list_to_string(!.RevHexChars, HexString),
string.base_string_to_int(16, HexString, UnicodeCharCode),
allowed_unicode_char_code(UnicodeCharCode),
char.from_int(UnicodeCharCode, UnicodeChar)
then
( if UnicodeCharCode = 0 then
Token = null_character_error
else
!:RevChars = [UnicodeChar | !.RevChars],
get_quoted_name(Stream, QuoteChar, !.RevChars, Token, !IO)
)
else
Token = error("invalid Unicode character code")
)
else
io.read_char_unboxed(Stream, Result, Char, !IO),
(
Result = error(Error),
Token = io_error(Error)
;
Result = eof,
Token = eof
;
Result = ok,
( if char.is_hex_digit(Char) then
!:RevHexChars = [Char | !.RevHexChars],
get_unicode_escape(Stream, NumHexChars, QuoteChar,
!.RevChars, !.RevHexChars, Token, !IO)
else
Token = error("invalid hex character in Unicode escape")
)
)
).
:- pred string_get_unicode_escape(int::in, string::in, int::in, char::in,
list(char)::in, list(char)::in, posn::in, token::out,
string_token_context::out, posn::in, posn::out) is det.
string_get_unicode_escape(NumHexChars, String, Len, QuoteChar,
!.RevChars, !.RevHexChars, Posn0, Token, Context, !Posn) :-
( if NumHexChars = list.length(!.RevHexChars) then
( if
rev_char_list_to_string(!.RevHexChars, HexString),
string.base_string_to_int(16, HexString, UnicodeCharCode),
allowed_unicode_char_code(UnicodeCharCode),
char.from_int(UnicodeCharCode, UnicodeChar)
then
( if UnicodeCharCode = 0 then
string_get_context(Posn0, Context, !Posn),
Token = null_character_error
else
!:RevChars = [UnicodeChar | !.RevChars],
string_get_quoted_name(String, Len, QuoteChar, !.RevChars,
Posn0, Token, Context, !Posn)
)
else
string_get_context(Posn0, Context, !Posn),
Token = error("invalid Unicode character code")
)
else
( if string_read_char(String, Len, Char, !Posn) then
( if char.is_hex_digit(Char) then
!:RevHexChars = [Char | !.RevHexChars],
string_get_unicode_escape(NumHexChars, String, Len, QuoteChar,
!.RevChars, !.RevHexChars, Posn0, Token, Context, !Posn)
else
string_get_context(Posn0, Context, !Posn),
Token = error("invalid hex character in Unicode escape")
)
else
string_get_context(Posn0, Context, !Posn),
Token = eof
)
).
% Succeeds if the give code point is a legal Unicode code point
% (regardless of whether it is reserved for private use or not).
%
:- pred allowed_unicode_char_code(int::in) is semidet.
allowed_unicode_char_code(Code) :-
Code >= 0,
Code =< 0x10FFFF,
% The following range is reserved for surrogates.
not (
Code >= 0xD800, Code =< 0xDFFF
).
:- pred get_hex_escape(io.input_stream::in, char::in, list(char)::in,
list(char)::in, token::out, io::di, io::uo) is det.
get_hex_escape(Stream, QuoteChar, !.RevChars, !.RevHexChars, Token, !IO) :-
io.read_char_unboxed(Stream, Result, Char, !IO),
(
Result = error(Error),
Token = io_error(Error)
;
Result = eof,
Token = eof
;
Result = ok,
( if char.is_hex_digit(Char) then
!:RevHexChars = [Char | !.RevHexChars],
get_hex_escape(Stream, QuoteChar, !.RevChars, !.RevHexChars,
Token, !IO)
else if Char = ('\\') then
finish_hex_escape(Stream, QuoteChar, !.RevChars, !.RevHexChars,
Token, !IO)
else
Token = error("unterminated hex escape")
)
).
:- pred string_get_hex_escape(string::in, int::in, char::in,
list(char)::in, list(char)::in, posn::in, token::out,
string_token_context::out, posn::in, posn::out) is det.
string_get_hex_escape(String, Len, QuoteChar, !.RevChars, !.RevHexChars,
Posn0, Token, Context, !Posn) :-
( if string_read_char(String, Len, Char, !Posn) then
( if char.is_hex_digit(Char) then
!:RevHexChars = [Char | !.RevHexChars],
string_get_hex_escape(String, Len, QuoteChar,
!.RevChars, !.RevHexChars, Posn0, Token, Context, !Posn)
else if Char = ('\\') then
string_finish_hex_escape(String, Len, QuoteChar, !.RevChars,
!.RevHexChars, Posn0, Token, Context, !Posn)
else
string_get_context(Posn0, Context, !Posn),
Token = error("unterminated hex escape")
)
else
string_get_context(Posn0, Context, !Posn),
Token = eof
).
:- pred finish_hex_escape(io.input_stream::in, char::in, list(char)::in,
list(char)::in, token::out, io::di, io::uo) is det.
finish_hex_escape(Stream, QuoteChar, !.RevChars, !.RevHexChars, Token, !IO) :-
(
!.RevHexChars = [],
Token = error("empty hex escape")
;
!.RevHexChars = [_ | _],
( if
rev_char_list_to_string(!.RevHexChars, HexString),
string.base_string_to_int(16, HexString, Int),
char.to_int(Char, Int)
then
( if Int = 0 then
Token = null_character_error
else
!:RevChars = [Char | !.RevChars],
get_quoted_name(Stream, QuoteChar, !.RevChars, Token, !IO)
)
else
Token = error("invalid hex escape")
)
).
:- pred string_finish_hex_escape(string::in, int::in, char::in,
list(char)::in, list(char)::in, posn::in, token::out,
string_token_context::out, posn::in, posn::out) is det.
string_finish_hex_escape(String, Len, QuoteChar, !.RevChars, !.RevHexChars,
Posn0, Token, Context, !Posn) :-
(
!.RevHexChars = [],
string_get_context(Posn0, Context, !Posn),
Token = error("empty hex escape")
;
!.RevHexChars = [_ | _],
( if
rev_char_list_to_string(!.RevHexChars, HexString),
string.base_string_to_int(16, HexString, Int),
char.to_int(Char, Int)
then
( if Int = 0 then
Token = null_character_error,
string_get_context(Posn0, Context, !Posn)
else
!:RevChars = [Char | !.RevChars],
string_get_quoted_name(String, Len, QuoteChar, !.RevChars,
Posn0, Token, Context, !Posn)
)
else
string_get_context(Posn0, Context, !Posn),
Token = error("invalid hex escape")
)
).
:- pred get_octal_escape(io.input_stream::in, char::in, list(char)::in,
list(char)::in, token::out, io::di, io::uo) is det.
get_octal_escape(Stream, QuoteChar, !.RevChars, !.RevOctalChars, Token, !IO) :-
io.read_char_unboxed(Stream, Result, Char, !IO),
(
Result = error(Error),
Token = io_error(Error)
;
Result = eof,
Token = eof
;
Result = ok,
( if char.is_octal_digit(Char) then
!:RevOctalChars = [Char | !.RevOctalChars],
get_octal_escape(Stream, QuoteChar, !.RevChars, !.RevOctalChars,
Token, !IO)
else if Char = ('\\') then
finish_octal_escape(Stream, QuoteChar, !.RevChars, !.RevOctalChars,
Token, !IO)
else
Token = error("unterminated octal escape")
)
).
:- pred string_get_octal_escape(string::in, int::in, char::in,
list(char)::in, list(char)::in, posn::in, token::out,
string_token_context::out, posn::in, posn::out) is det.
string_get_octal_escape(String, Len, QuoteChar, !.RevChars, !.RevOctalChars,
Posn0, Token, Context, !Posn) :-
( if string_read_char(String, Len, Char, !Posn) then
( if char.is_octal_digit(Char) then
!:RevOctalChars = [Char | !.RevOctalChars],
string_get_octal_escape(String, Len, QuoteChar,
!.RevChars, !.RevOctalChars, Posn0, Token, Context, !Posn)
else if Char = ('\\') then
string_finish_octal_escape(String, Len, QuoteChar,
!.RevChars, !.RevOctalChars, Posn0, Token, Context, !Posn)
else
string_get_context(Posn0, Context, !Posn),
Token = error("unterminated octal escape")
)
else
Token = eof,
string_get_context(Posn0, Context, !Posn)
).
:- pred finish_octal_escape(io.input_stream::in, char::in, list(char)::in,
list(char)::in, token::out, io::di, io::uo) is det.
finish_octal_escape(Stream, QuoteChar, !.RevChars, !.RevOctalChars,
Token, !IO) :-
(
!.RevOctalChars = [],
Token = error("empty octal escape")
;
!.RevOctalChars = [_ | _],
( if
rev_char_list_to_string(!.RevOctalChars, OctalString),
string.base_string_to_int(8, OctalString, Int),
char.to_int(Char, Int)
then
( if Int = 0 then
Token = null_character_error
else
!:RevChars = [Char | !.RevChars],
get_quoted_name(Stream, QuoteChar, !.RevChars, Token, !IO)
)
else
Token = error("invalid octal escape")
)
).
:- pred string_finish_octal_escape(string::in, int::in, char::in,
list(char)::in, list(char)::in, posn::in, token::out,
string_token_context::out, posn::in, posn::out) is det.
string_finish_octal_escape(String, Len, QuoteChar, !.RevChars, !.RevOctalChars,
Posn0, Token, Context, !Posn) :-
(
!.RevOctalChars = [],
Token = error("empty octal escape"),
string_get_context(Posn0, Context, !Posn)
;
!.RevOctalChars = [_ | _],
( if
rev_char_list_to_string(!.RevOctalChars, OctalString),
string.base_string_to_int(8, OctalString, Int),
char.to_int(Char, Int)
then
( if Int = 0 then
Token = null_character_error,
string_get_context(Posn0, Context, !Posn)
else
!:RevChars = [Char | !.RevChars],
string_get_quoted_name(String, Len, QuoteChar, !.RevChars,
Posn0, Token, Context, !Posn)
)
else
Token = error("invalid octal escape"),
string_get_context(Posn0, Context, !Posn)
)
).
%---------------------------------------------------------------------------%
%
% Names and variables.
%
:- pred get_name(io.input_stream::in, list(char)::in, token::out,
io::di, io::uo) is det.
get_name(Stream, !.RevChars, Token, !IO) :-
io.read_char_unboxed(Stream, Result, Char, !IO),
(
Result = error(Error),
Token = io_error(Error)
;
Result = eof,
( if rev_char_list_to_string(!.RevChars, Name) then
Token = name(Name)
else
Token = error("invalid character in name")
)
;
Result = ok,
( if char.is_alnum_or_underscore(Char) then
!:RevChars = [Char | !.RevChars],
get_name(Stream, !.RevChars, Token, !IO)
else
io.putback_char(Stream, Char, !IO),
( if rev_char_list_to_string(!.RevChars, Name) then
Token = name(Name)
else
Token = error("invalid character in name")
)
)
).
:- pred string_get_name(string::in, int::in, posn::in, token::out,
string_token_context::out, posn::in, posn::out) is det.
string_get_name(String, Len, Posn0, Token, Context, !Posn) :-
( if string_read_char(String, Len, Char, !Posn) then
( if char.is_alnum_or_underscore(Char) then
string_get_name(String, Len, Posn0, Token, Context, !Posn)
else
string_ungetchar(String, !Posn),
grab_string(String, Posn0, Name, !Posn),
Token = name(Name),
string_get_context(Posn0, Context, !Posn)
)
else
grab_string(String, Posn0, Name, !Posn),
Token = name(Name),
string_get_context(Posn0, Context, !Posn)
).
:- pred get_implementation_defined_literal_rest(io.input_stream::in,
token::out, io::di, io::uo) is det.
get_implementation_defined_literal_rest(Stream, Token, !IO) :-
io.read_char_unboxed(Stream, Result, Char, !IO),
(
Result = error(Error),
Token = io_error(Error)
;
Result = eof,
Token = name("$")
;
Result = ok,
( if char.is_lower(Char) then
get_name(Stream, [Char], Token0, !IO),
( if Token0 = name(S) then
Token = implementation_defined(S)
else
Token = Token0
)
else if graphic_token_char(Char) then
get_graphic(Stream, [Char, '$'], Token, !IO)
else
io.putback_char(Stream, Char, !IO),
Token = name("$")
)
).
:- pred string_get_implementation_defined_literal_rest(string::in, int::in,
posn::in, token::out, string_token_context::out, posn::in, posn::out)
is det.
string_get_implementation_defined_literal_rest(String, Len, Posn0,
Token, Context, !Posn) :-
Posn1 = !.Posn,
( if string_read_char(String, Len, Char, !Posn) then
( if char.is_lower(Char) then
string_get_name(String, Len, Posn1, Token0, Context, !Posn),
( if Token0 = name(S) then
Token = implementation_defined(S)
else
Token = Token0
)
else if graphic_token_char(Char) then
string_get_graphic(String, Len, Posn0, Token, Context, !Posn)
else
string_ungetchar(String, !Posn),
Token = name("$"),
string_get_context(Posn0, Context, !Posn)
)
else
Token = name("$"),
string_get_context(Posn0, Context, !Posn)
).
% A line number directive token is `#' followed by an integer
% (specifying the line number) followed by a newline.
% Such a token sets the source line number for the next line, but it is
% otherwise ignored. This means that line number directives may appear
% anywhere that a token may appear, including in the middle of terms.
% (The source file name can be set with a `:- pragma source_file'
% declaration.)
%
:- pred get_source_line_number(io.input_stream::in, list(char)::in, token::out,
maybe_have_valid_token::out, io::di, io::uo) is det.
get_source_line_number(Stream, !.RevChars, Token, HaveToken, !IO) :-
io.read_char_unboxed(Stream, Result, Char, !IO),
(
Result = error(Error),
have_token(Stream, HaveToken, !IO),
Token = io_error(Error)
;
Result = eof,
have_token(Stream, HaveToken, !IO),
Token = error("unexpected end-of-file in `#' line number directive")
;
Result = ok,
( if char.is_digit(Char) then
!:RevChars = [Char | !.RevChars],
get_source_line_number(Stream, !.RevChars, Token, HaveToken, !IO)
else if Char = '\n' then
( if rev_char_list_to_string(!.RevChars, String) then
( if
string.base_string_to_int(10, String, Int),
Int > 0
then
io.set_line_number(Stream, Int, !IO),
do_not_have_token(Token, HaveToken)
else
have_token(Stream, HaveToken, !IO),
string.append_list(["invalid line number `", String,
"' in `#' line number directive"], Message),
Token = error(Message)
)
else
have_token(Stream, HaveToken, !IO),
Token = error("invalid character in `#' line number directive")
)
else
have_token(Stream, HaveToken, !IO),
( if char.to_int(Char, 0) then
String = "NUL"
else
string.from_char_list([Char], String)
),
string.append_list(["invalid character `", String,
"' in `#' line number directive"], Message),
Token = error(Message)
)
).
:- pred string_get_source_line_number(string::in, int::in, posn::in,
token::out, maybe_have_valid_token::out, posn::in, posn::out) is det.
string_get_source_line_number(String, Len, Posn1, Token, HaveToken, !Posn) :-
( if string_read_char(String, Len, Char, !Posn) then
( if char.is_digit(Char) then
string_get_source_line_number(String, Len, Posn1, Token, HaveToken,
!Posn)
else if Char = '\n' then
grab_string(String, Posn1, LineNumString, !Posn),
( if
string.base_string_to_int(10, LineNumString, LineNum),
LineNum > 0
then
string_set_line_number(LineNum, !Posn),
do_not_have_token(Token, HaveToken)
else
string_have_token(Posn1, HaveToken, !Posn),
string.append_list(["invalid line number `", LineNumString,
"' in `#' line number directive"], Message),
Token = error(Message)
)
else
string_have_token(Posn1, HaveToken, !Posn),
( if char.to_int(Char, 0) then
DirectiveString = "NUL"
else
string.from_char_list([Char], DirectiveString)
),
string.append_list(["invalid character `", DirectiveString,
"' in `#' line number directive"], Message),
Token = error(Message)
)
else
string_have_token(Posn1, HaveToken, !Posn),
Token = error("unexpected end-of-file in `#' line number directive")
).
:- pred get_graphic(io.input_stream::in, list(char)::in, token::out,
io::di, io::uo) is det.
get_graphic(Stream, !.RevChars, Token, !IO) :-
io.read_char_unboxed(Stream, Result, Char, !IO),
(
Result = error(Error),
Token = io_error(Error)
;
Result = eof,
( if rev_char_list_to_string(!.RevChars, Name) then
Token = name(Name)
else
Token = error("invalid character in graphic token")
)
;
Result = ok,
( if graphic_token_char(Char) then
!:RevChars = [Char | !.RevChars],
get_graphic(Stream, !.RevChars, Token, !IO)
else
io.putback_char(Stream, Char, !IO),
( if rev_char_list_to_string(!.RevChars, Name) then
Token = name(Name)
else
Token = error("invalid character in graphic token")
)
)
).
:- pred string_get_graphic(string::in, int::in, posn::in, token::out,
string_token_context::out, posn::in, posn::out) is det.
string_get_graphic(String, Len, Posn0, Token, Context, !Posn) :-
( if string_read_char(String, Len, Char, !Posn) then
( if graphic_token_char(Char) then
string_get_graphic(String, Len, Posn0, Token, Context, !Posn)
else
string_ungetchar(String, !Posn),
grab_string(String, Posn0, Name, !Posn),
Token = name(Name),
string_get_context(Posn0, Context, !Posn)
)
else
grab_string(String, Posn0, Name, !Posn),
string_get_context(Posn0, Context, !Posn),
Token = name(Name)
).
:- pred get_variable(io.input_stream::in, list(char)::in, token::out,
io::di, io::uo) is det.
get_variable(Stream, !.RevChars, Token, !IO) :-
io.read_char_unboxed(Stream, Result, Char, !IO),
(
Result = error(Error),
Token = io_error(Error)
;
Result = eof,
( if rev_char_list_to_string(!.RevChars, VariableName) then
Token = variable(VariableName)
else
Token = error("invalid character in variable")
)
;
Result = ok,
( if char.is_alnum_or_underscore(Char) then
!:RevChars = [Char | !.RevChars],
get_variable(Stream, !.RevChars, Token, !IO)
else
io.putback_char(Stream, Char, !IO),
( if rev_char_list_to_string(!.RevChars, VariableName) then
Token = variable(VariableName)
else
Token = error("invalid character in variable")
)
)
).
:- pred string_get_variable(string::in, int::in, posn::in, token::out,
string_token_context::out, posn::in, posn::out) is det.
string_get_variable(String, Len, Posn0, Token, Context, !Posn) :-
( if string_read_char(String, Len, Char, !Posn) then
( if char.is_alnum_or_underscore(Char) then
string_get_variable(String, Len, Posn0, Token, Context, !Posn)
else
string_ungetchar(String, !Posn),
grab_string(String, Posn0, VariableName, !Posn),
Token = variable(VariableName),
string_get_context(Posn0, Context, !Posn)
)
else
grab_string(String, Posn0, VariableName, !Posn),
Token = variable(VariableName),
string_get_context(Posn0, Context, !Posn)
).
%---------------------------------------------------------------------------%
%
% Integer and float literals.
%
:- pred get_zero(io.input_stream::in, token::out, io::di, io::uo) is det.
get_zero(Stream, Token, !IO) :-
io.read_char_unboxed(Stream, Result, Char, !IO),
(
Result = error(Error),
Token = io_error(Error)
;
Result = eof,
Token = integer(0)
;
Result = ok,
( if char.is_digit(Char) then
get_number(Stream, [Char], Token, !IO)
else if Char = '''' then
get_char_code(Stream, Token, !IO)
else if Char = 'b' then
get_binary(Stream, Token, !IO)
else if Char = 'o' then
get_octal(Stream, Token, !IO)
else if Char = 'x' then
get_hex(Stream, Token, !IO)
else if Char = ('.') then
get_int_dot(Stream, ['0'], Token, !IO)
else if ( Char = 'e' ; Char = 'E' ) then
get_float_exponent(Stream, [Char, '0'], Token, !IO)
else
io.putback_char(Stream, Char, !IO),
Token = integer(0)
)
).
:- pred string_get_zero(string::in, int::in, posn::in, token::out,
string_token_context::out, posn::in, posn::out) is det.
string_get_zero(String, Len, Posn0, Token, Context, !Posn) :-
( if string_read_char(String, Len, Char, !Posn) then
( if char.is_digit(Char) then
string_get_number(String, Len, Posn0, Token, Context, !Posn)
else if Char = '''' then
string_get_char_code(String, Len, Posn0, Token, Context, !Posn)
else if Char = 'b' then
string_get_binary(String, Len, Posn0, Token, Context, !Posn)
else if Char = 'o' then
string_get_octal(String, Len, Posn0, Token, Context, !Posn)
else if Char = 'x' then
string_get_hex(String, Len, Posn0, Token, Context, !Posn)
else if Char = ('.') then
string_get_int_dot(String, Len, Posn0, Token, Context, !Posn)
else if ( Char = 'e' ; Char = 'E' ) then
string_get_float_exponent(String, Len, Posn0, Token, Context,
!Posn)
else
string_ungetchar(String, !Posn),
string_get_context(Posn0, Context, !Posn),
Token = integer(0)
)
else
string_get_context(Posn0, Context, !Posn),
Token = integer(0)
).
:- pred get_char_code(io.input_stream::in, token::out, io::di, io::uo) is det.
get_char_code(Stream, Token, !IO) :-
io.read_char_unboxed(Stream, Result, Char, !IO),
(
Result = error(Error),
Token = io_error(Error)
;
Result = eof,
Token = error("unterminated char code constant")
;
Result = ok,
char.to_int(Char, CharCode),
Token = integer(CharCode)
).
:- pred string_get_char_code(string::in, int::in, posn::in, token::out,
string_token_context::out, posn::in, posn::out) is det.
string_get_char_code(String, Len, Posn0, Token, Context, !Posn) :-
( if string_read_char(String, Len, Char, !Posn) then
char.to_int(Char, CharCode),
Token = integer(CharCode),
string_get_context(Posn0, Context, !Posn)
else
Token = error("unterminated char code constant"),
string_get_context(Posn0, Context, !Posn)
).
:- pred get_binary(io.input_stream::in, token::out, io::di, io::uo) is det.
get_binary(Stream, Token, !IO) :-
io.read_char_unboxed(Stream, Result, Char, !IO),
(
Result = error(Error),
Token = io_error(Error)
;
Result = eof,
Token = error("unterminated binary constant")
;
Result = ok,
( if char.is_binary_digit(Char) then
get_binary_2(Stream, [Char], Token, !IO)
else
io.putback_char(Stream, Char, !IO),
Token = error("unterminated binary constant")
)
).
:- pred string_get_binary(string::in, int::in, posn::in, token::out,
string_token_context::out, posn::in, posn::out) is det.
string_get_binary(String, Len, Posn0, Token, Context, !Posn) :-
Posn1 = !.Posn,
( if string_read_char(String, Len, Char, !Posn) then
( if char.is_binary_digit(Char) then
string_get_binary_2(String, Len, Posn1, Token, Context, !Posn)
else
string_ungetchar(String, !Posn),
Token = error("unterminated binary constant"),
string_get_context(Posn0, Context, !Posn)
)
else
Token = error("unterminated binary constant"),
string_get_context(Posn0, Context, !Posn)
).
:- pred get_binary_2(io.input_stream::in, list(char)::in, token::out,
io::di, io::uo) is det.
get_binary_2(Stream, !.RevChars, Token, !IO) :-
io.read_char_unboxed(Stream, Result, Char, !IO),
(
Result = error(Error),
Token = io_error(Error)
;
Result = eof,
rev_char_list_to_int(!.RevChars, base_2, Token)
;
Result = ok,
( if char.is_binary_digit(Char) then
!:RevChars = [Char | !.RevChars],
get_binary_2(Stream, !.RevChars, Token, !IO)
else
io.putback_char(Stream, Char, !IO),
rev_char_list_to_int(!.RevChars, base_2, Token)
)
).
:- pred string_get_binary_2(string::in, int::in, posn::in, token::out,
string_token_context::out, posn::in, posn::out) is det.
string_get_binary_2(String, Len, Posn1, Token, Context, !Posn) :-
( if string_read_char(String, Len, Char, !Posn) then
( if char.is_binary_digit(Char) then
string_get_binary_2(String, Len, Posn1, Token, Context, !Posn)
else
string_ungetchar(String, !Posn),
grab_string(String, Posn1, BinaryString, !Posn),
conv_string_to_int(BinaryString, base_2, Token),
string_get_context(Posn1, Context, !Posn)
)
else
grab_string(String, Posn1, BinaryString, !Posn),
conv_string_to_int(BinaryString, base_2, Token),
string_get_context(Posn1, Context, !Posn)
).
:- pred get_octal(io.input_stream::in, token::out, io::di, io::uo) is det.
get_octal(Stream, Token, !IO) :-
io.read_char_unboxed(Stream, Result, Char, !IO),
(
Result = error(Error),
Token = io_error(Error)
;
Result = eof,
Token = error("unterminated octal constant")
;
Result = ok,
( if char.is_octal_digit(Char) then
get_octal_2(Stream, [Char], Token, !IO)
else
io.putback_char(Stream, Char, !IO),
Token = error("unterminated octal constant")
)
).
:- pred string_get_octal(string::in, int::in, posn::in, token::out,
string_token_context::out,
posn::in, posn::out) is det.
string_get_octal(String, Len, Posn0, Token, Context, !Posn) :-
Posn1 = !.Posn,
( if string_read_char(String, Len, Char, !Posn) then
( if char.is_octal_digit(Char) then
string_get_octal_2(String, Len, Posn1, Token, Context, !Posn)
else
string_ungetchar(String, !Posn),
Token = error("unterminated octal constant"),
string_get_context(Posn0, Context, !Posn)
)
else
Token = error("unterminated octal constant"),
string_get_context(Posn0, Context, !Posn)
).
:- pred get_octal_2(io.input_stream::in, list(char)::in, token::out,
io::di, io::uo) is det.
get_octal_2(Stream, !.RevChars, Token, !IO) :-
io.read_char_unboxed(Stream, Result, Char, !IO),
(
Result = error(Error),
Token = io_error(Error)
;
Result = eof,
rev_char_list_to_int(!.RevChars, base_8, Token)
;
Result = ok,
( if char.is_octal_digit(Char) then
!:RevChars = [Char | !.RevChars],
get_octal_2(Stream, !.RevChars, Token, !IO)
else
io.putback_char(Stream, Char, !IO),
rev_char_list_to_int(!.RevChars, base_8, Token)
)
).
:- pred string_get_octal_2(string::in, int::in, posn::in, token::out,
string_token_context::out, posn::in, posn::out) is det.
string_get_octal_2(String, Len, Posn1, Token, Context, !Posn) :-
( if string_read_char(String, Len, Char, !Posn) then
( if char.is_octal_digit(Char) then
string_get_octal_2(String, Len, Posn1, Token, Context, !Posn)
else
string_ungetchar(String, !Posn),
grab_string(String, Posn1, BinaryString, !Posn),
conv_string_to_int(BinaryString, base_8, Token),
string_get_context(Posn1, Context, !Posn)
)
else
grab_string(String, Posn1, BinaryString, !Posn),
conv_string_to_int(BinaryString, base_8, Token),
string_get_context(Posn1, Context, !Posn)
).
:- pred get_hex(io.input_stream::in, token::out, io::di, io::uo) is det.
get_hex(Stream, Token, !IO) :-
io.read_char_unboxed(Stream, Result, Char, !IO),
(
Result = error(Error),
Token = io_error(Error)
;
Result = eof,
Token = error("unterminated hex constant")
;
Result = ok,
( if char.is_hex_digit(Char) then
get_hex_2(Stream, [Char], Token, !IO)
else
io.putback_char(Stream, Char, !IO),
Token = error("unterminated hex constant")
)
).
:- pred string_get_hex(string::in, int::in, posn::in, token::out,
string_token_context::out,
posn::in, posn::out) is det.
string_get_hex(String, Len, Posn0, Token, Context, !Posn) :-
Posn1 = !.Posn,
( if string_read_char(String, Len, Char, !Posn) then
( if char.is_hex_digit(Char) then
string_get_hex_2(String, Len, Posn1, Token, Context, !Posn)
else
string_ungetchar(String, !Posn),
Token = error("unterminated hex constant"),
string_get_context(Posn0, Context, !Posn)
)
else
Token = error("unterminated hex constant"),
string_get_context(Posn0, Context, !Posn)
).
:- pred get_hex_2(io.input_stream::in, list(char)::in, token::out,
io::di, io::uo) is det.
get_hex_2(Stream, !.RevChars, Token, !IO) :-
io.read_char_unboxed(Stream, Result, Char, !IO),
(
Result = error(Error),
Token = io_error(Error)
;
Result = eof,
rev_char_list_to_int(!.RevChars, base_16, Token)
;
Result = ok,
( if char.is_hex_digit(Char) then
!:RevChars = [Char | !.RevChars],
get_hex_2(Stream, !.RevChars, Token, !IO)
else
io.putback_char(Stream, Char, !IO),
rev_char_list_to_int(!.RevChars, base_16, Token)
)
).
:- pred string_get_hex_2(string::in, int::in, posn::in, token::out,
string_token_context::out, posn::in, posn::out) is det.
string_get_hex_2(String, Len, Posn1, Token, Context, !Posn) :-
( if string_read_char(String, Len, Char, !Posn) then
( if char.is_hex_digit(Char) then
string_get_hex_2(String, Len, Posn1, Token, Context, !Posn)
else
string_ungetchar(String, !Posn),
grab_string(String, Posn1, BinaryString, !Posn),
conv_string_to_int(BinaryString, base_16, Token),
string_get_context(Posn1, Context, !Posn)
)
else
grab_string(String, Posn1, BinaryString, !Posn),
conv_string_to_int(BinaryString, base_16, Token),
string_get_context(Posn1, Context, !Posn)
).
:- pred get_number(io.input_stream::in, list(char)::in, token::out,
io::di, io::uo) is det.
get_number(Stream, !.RevChars, Token, !IO) :-
io.read_char_unboxed(Stream, Result, Char, !IO),
(
Result = error(Error),
Token = io_error(Error)
;
Result = eof,
rev_char_list_to_int(!.RevChars, base_10, Token)
;
Result = ok,
( if char.is_digit(Char) then
!:RevChars = [Char | !.RevChars],
get_number(Stream, !.RevChars, Token, !IO)
else if Char = ('.') then
get_int_dot(Stream, !.RevChars, Token, !IO)
else if ( Char = 'e' ; Char = 'E' ) then
!:RevChars = [Char | !.RevChars],
get_float_exponent(Stream, !.RevChars, Token, !IO)
else
io.putback_char(Stream, Char, !IO),
rev_char_list_to_int(!.RevChars, base_10, Token)
)
).
:- pred string_get_number(string::in, int::in, posn::in, token::out,
string_token_context::out, posn::in, posn::out) is det.
string_get_number(String, Len, Posn0, Token, Context, !Posn) :-
( if string_read_char(String, Len, Char, !Posn) then
( if char.is_digit(Char) then
string_get_number(String, Len, Posn0, Token, Context, !Posn)
else if Char = ('.') then
string_get_int_dot(String, Len, Posn0, Token, Context, !Posn)
else if ( Char = 'e' ; Char = 'E' ) then
string_get_float_exponent(String, Len, Posn0, Token, Context,
!Posn)
else
string_ungetchar(String, !Posn),
grab_string(String, Posn0, NumberString, !Posn),
conv_string_to_int(NumberString, base_10, Token),
string_get_context(Posn0, Context, !Posn)
)
else
grab_string(String, Posn0, NumberString, !Posn),
conv_string_to_int(NumberString, base_10, Token),
string_get_context(Posn0, Context, !Posn)
).
:- pred get_int_dot(io.input_stream::in, list(char)::in, token::out,
io::di, io::uo) is det.
get_int_dot(Stream, !.RevChars, Token, !IO) :-
% XXX The float literal syntax doesn't match ISO Prolog.
io.read_char_unboxed(Stream, Result, Char, !IO),
(
Result = error(Error),
Token = io_error(Error)
;
Result = eof,
io.putback_char(Stream, '.', !IO),
rev_char_list_to_int(!.RevChars, base_10, Token)
;
Result = ok,
( if char.is_digit(Char) then
!:RevChars = [Char, '.' | !.RevChars],
get_float_decimals(Stream, !.RevChars, Token, !IO)
else
io.putback_char(Stream, Char, !IO),
% We can't putback the ".", because io.putback_char only
% guarantees one character of pushback. So instead, we return
% an `integer_dot' token; the main loop of get_token_list_2 will
% handle this appropriately.
rev_char_list_to_int(!.RevChars, base_10, Token0),
( if Token0 = integer(Int) then
Token = integer_dot(Int)
else
Token = Token0
)
)
).
:- pred string_get_int_dot(string::in, int::in, posn::in, token::out,
string_token_context::out, posn::in, posn::out) is det.
string_get_int_dot(String, Len, Posn0, Token, Context, !Posn) :-
( if string_read_char(String, Len, Char, !Posn) then
( if char.is_digit(Char) then
string_get_float_decimals(String, Len, Posn0, Token, Context,
!Posn)
else
string_ungetchar(String, !Posn),
string_ungetchar(String, !Posn),
grab_string(String, Posn0, NumberString, !Posn),
conv_string_to_int(NumberString, base_10, Token),
string_get_context(Posn0, Context, !Posn)
)
else
string_ungetchar(String, !Posn),
grab_string(String, Posn0, NumberString, !Posn),
conv_string_to_int(NumberString, base_10, Token),
string_get_context(Posn0, Context, !Posn)
).
% We have read past the decimal point, so now get the decimals.
%
:- pred get_float_decimals(io.input_stream::in, list(char)::in, token::out,
io::di, io::uo) is det.
get_float_decimals(Stream, !.RevChars, Token, !IO) :-
io.read_char_unboxed(Stream, Result, Char, !IO),
(
Result = error(Error),
Token = io_error(Error)
;
Result = eof,
rev_char_list_to_float(!.RevChars, Token)
;
Result = ok,
( if char.is_digit(Char) then
!:RevChars = [Char | !.RevChars],
get_float_decimals(Stream, !.RevChars, Token, !IO)
else if ( Char = 'e' ; Char = 'E' ) then
!:RevChars = [Char | !.RevChars],
get_float_exponent(Stream, !.RevChars, Token, !IO)
else
io.putback_char(Stream, Char, !IO),
rev_char_list_to_float(!.RevChars, Token)
)
).
:- pred string_get_float_decimals(string::in, int::in, posn::in,
token::out, string_token_context::out, posn::in, posn::out) is det.
string_get_float_decimals(String, Len, Posn0, Token, Context, !Posn) :-
( if string_read_char(String, Len, Char, !Posn) then
( if char.is_digit(Char) then
string_get_float_decimals(String, Len, Posn0, Token, Context,
!Posn)
else if ( Char = 'e' ; Char = 'E' ) then
string_get_float_exponent(String, Len, Posn0, Token, Context,
!Posn)
else
string_ungetchar(String, !Posn),
grab_string(String, Posn0, FloatString, !Posn),
conv_to_float(FloatString, Token),
string_get_context(Posn0, Context, !Posn)
)
else
grab_string(String, Posn0, FloatString, !Posn),
conv_to_float(FloatString, Token),
string_get_context(Posn0, Context, !Posn)
).
:- pred get_float_exponent(io.input_stream::in, list(char)::in, token::out,
io::di, io::uo) is det.
get_float_exponent(Stream, !.RevChars, Token, !IO) :-
io.read_char_unboxed(Stream, Result, Char, !IO),
(
Result = error(Error),
Token = io_error(Error)
;
Result = eof,
rev_char_list_to_float(!.RevChars, Token)
;
Result = ok,
( if ( Char = ('+') ; Char = ('-') ) then
!:RevChars = [Char | !.RevChars],
get_float_exponent_2(Stream, !.RevChars, Token, !IO)
else if char.is_digit(Char) then
!:RevChars = [Char | !.RevChars],
get_float_exponent_3(Stream, !.RevChars, Token, !IO)
else
io.putback_char(Stream, Char, !IO),
Token = error("unterminated exponent in float token")
)
).
:- pred string_get_float_exponent(string::in, int::in, posn::in,
token::out, string_token_context::out, posn::in, posn::out) is det.
string_get_float_exponent(String, Len, Posn0, Token, Context, !Posn) :-
( if string_read_char(String, Len, Char, !Posn) then
( if ( Char = ('+') ; Char = ('-') ) then
string_get_float_exponent_2(String, Len, Posn0, Token, Context,
!Posn)
else if char.is_digit(Char) then
string_get_float_exponent_3(String, Len, Posn0, Token, Context,
!Posn)
else
string_ungetchar(String, !Posn),
Token = error("unterminated exponent in float token"),
string_get_context(Posn0, Context, !Posn)
)
else
grab_string(String, Posn0, FloatString, !Posn),
conv_to_float(FloatString, Token),
string_get_context(Posn0, Context, !Posn)
).
% We have read past the E signalling the start of the exponent -
% make sure that there's at least one digit following,
% and then get the remaining digits.
%
:- pred get_float_exponent_2(io.input_stream::in, list(char)::in, token::out,
io::di, io::uo) is det.
get_float_exponent_2(Stream, !.RevChars, Token, !IO) :-
io.read_char_unboxed(Stream, Result, Char, !IO),
(
Result = error(Error),
Token = io_error(Error)
;
Result = eof,
Token = error("unterminated exponent in float token")
;
Result = ok,
( if char.is_digit(Char) then
!:RevChars = [Char | !.RevChars],
get_float_exponent_3(Stream, !.RevChars, Token, !IO)
else
io.putback_char(Stream, Char, !IO),
Token = error("unterminated exponent in float token")
)
).
% We have read past the E signalling the start of the exponent -
% make sure that there's at least one digit following,
% and then get the remaining digits.
%
:- pred string_get_float_exponent_2(string::in, int::in, posn::in,
token::out, string_token_context::out, posn::in, posn::out) is det.
string_get_float_exponent_2(String, Len, Posn0, Token, Context, !Posn) :-
( if string_read_char(String, Len, Char, !Posn) then
( if char.is_digit(Char) then
string_get_float_exponent_3(String, Len, Posn0, Token, Context,
!Posn)
else
string_ungetchar(String, !Posn),
Token = error("unterminated exponent in float token"),
string_get_context(Posn0, Context, !Posn)
)
else
Token = error("unterminated exponent in float token"),
string_get_context(Posn0, Context, !Posn)
).
% We have read past the first digit of the exponent -
% now get the remaining digits.
%
:- pred get_float_exponent_3(io.input_stream::in, list(char)::in, token::out,
io::di, io::uo) is det.
get_float_exponent_3(Stream, !.RevChars, Token, !IO) :-
io.read_char_unboxed(Stream, Result, Char, !IO),
(
Result = error(Error),
Token = io_error(Error)
;
Result = eof,
rev_char_list_to_float(!.RevChars, Token)
;
Result = ok,
( if char.is_digit(Char) then
!:RevChars = [Char | !.RevChars],
get_float_exponent_3(Stream, !.RevChars, Token, !IO)
else
io.putback_char(Stream, Char, !IO),
rev_char_list_to_float(!.RevChars, Token)
)
).
:- pred string_get_float_exponent_3(string::in, int::in, posn::in,
token::out, string_token_context::out, posn::in, posn::out) is det.
string_get_float_exponent_3(String, Len, Posn0, Token, Context, !Posn) :-
( if string_read_char(String, Len, Char, !Posn) then
( if char.is_digit(Char) then
string_get_float_exponent_3(String, Len, Posn0, Token, Context,
!Posn)
else
string_ungetchar(String, !Posn),
grab_string(String, Posn0, FloatString, !Posn),
conv_to_float(FloatString, Token),
string_get_context(Posn0, Context, !Posn)
)
else
grab_string(String, Posn0, FloatString, !Posn),
conv_to_float(FloatString, Token),
string_get_context(Posn0, Context, !Posn)
).
%---------------------------------------------------------------------------%
%
% Utility routines.
%
:- pred rev_char_list_to_int(list(char)::in, integer_base::in, token::out)
is det.
rev_char_list_to_int(RevChars, Base, Token) :-
( if rev_char_list_to_string(RevChars, String) then
conv_string_to_int(String, Base, Token)
else
Token = error("invalid character in int")
).
:- pred conv_string_to_int(string::in, integer_base::in, token::out) is det.
conv_string_to_int(String, Base, Token) :-
BaseInt = integer_base_int(Base),
( if string.base_string_to_int(BaseInt, String, Int) then
Token = integer(Int)
else if integer.from_base_string(BaseInt, String, Integer) then
Token = big_integer(Base, Integer)
else
Token = error("invalid character in int")
).
:- func integer_base_int(integer_base) = int.
integer_base_int(base_2) = 2.
integer_base_int(base_8) = 8.
integer_base_int(base_10) = 10.
integer_base_int(base_16) = 16.
:- pred rev_char_list_to_float(list(char)::in, token::out) is det.
rev_char_list_to_float(RevChars, Token) :-
( if rev_char_list_to_string(RevChars, String) then
conv_to_float(String, Token)
else
Token = error("invalid character in int")
).
:- pred conv_to_float(string::in, token::out) is det.
conv_to_float(String, Token) :-
( if string.to_float(String, Float) then
Token = float(Float)
else
Token = error("invalid float token")
).
:- pred rev_char_list_to_string(list(char)::in, string::out) is semidet.
rev_char_list_to_string(RevChars, String) :-
string.semidet_from_rev_char_list(RevChars, String).
:- func null_character_error = token.
null_character_error =
error("null character is illegal in strings and names").
%---------------------------------------------------------------------------%