Files
mercury/library/lexer.m
Julien Fischer e0f5ac47db Make it easier for vi to jump past the initial comments
Estimated hours taken: 0.1
Branches: main

library/*.m:
	Make it easier for vi to jump past the initial comments
	at the head of a module.
2006-04-19 05:18:00 +00:00

1995 lines
67 KiB
Mathematica

%---------------------------------------------------------------------------%
% vim: ft=mercury ts=4 sw=4 et wm=0 tw=0
%---------------------------------------------------------------------------%
% Copyright (C) 1993-2000, 2003-2006 The University of Melbourne.
% This file may only be copied under the terms of the GNU Library General
% Public License - see the file COPYING.LIB in the Mercury distribution.
%---------------------------------------------------------------------------%
%
% File: lexer.m.
% Main author: fjh.
% Stability: high.
%
% Lexical analysis. This module defines the representation of tokens
% and exports predicates for reading in tokens from an input stream.
%
% See ISO Prolog 6.4. Also see the comments at the top of parser.m.
%
%-----------------------------------------------------------------------------%
%-----------------------------------------------------------------------------%
:- module lexer.
:- interface.
:- import_module char.
:- import_module io.
%-----------------------------------------------------------------------------%
:- type token
---> name(string)
; variable(string)
; integer(int)
; float(float)
; string(string) % "...."
; open % '('
; open_ct % '(' without any preceding whitespace
; close % ')'
; open_list % '['
; close_list % ']'
; open_curly % '{'
; close_curly % '}'
; ht_sep % '|'
; comma % ','
; end % '.'
; junk(char) % junk character in the input stream
; error(string) % some other invalid token
; io_error(io.error) % error reading from the input stream
; eof % end-of-file
; integer_dot(int). % the lexer will never return this.
% The integer_dot/1 token is used
% internally in the lexer, to keep
% the grammar LL(1) so that only one
% character of pushback is needed.
% But the lexer will convert
% integer_dot/1 tokens to integer/1
% tokens before returning them.
% For every token, we record the line number of the line on
% which the token occurred.
%
:- type token_context == int. % line number
% This "fat list" representation is more efficient than a list of pairs.
%
:- type token_list
---> token_cons(token, token_context, token_list)
; token_nil.
% Read a list of tokens from the current input stream.
% Keep reading until we encounter either an `end' token
% (i.e. a full stop followed by whitespace) or the end-of-file.
%
:- pred get_token_list(token_list::out, io::di, io::uo) is det.
% The type `offset' represents a (zero-based) offset into a string.
%
:- type offset == int.
% string_get_token_list(String, MaxOffset, Tokens, InitialPos, FinalPos):
%
% Scan a list of tokens from a string, starting at the current offset
% specified by InitialPos. Keep scanning until either we encounter either
% an `end' token (i.e. a full stop followed by whitespace) or until we
% reach MaxOffset. (MaxOffset must be =< the length of the string.)
% Return the tokens scanned in Tokens, and return the position one
% character past the end of the last token in FinalPos.
%
:- pred string_get_token_list(string::in, offset::in, token_list::out,
posn::in, posn::out) is det.
% string_get_token_list(String, Tokens, InitialPos, FinalPos):
% calls string_get_token_list/5 above with MaxPos = length of String.
%
:- pred string_get_token_list(string::in, token_list::out,
posn::in, posn::out) is det.
% Convert a token to a human-readable string describing the token.
%
:- pred token_to_string(token::in, string::out) is det.
%-----------------------------------------------------------------------------%
:- implementation.
%-----------------------------------------------------------------------------%
:- interface.
% graphic_token_char(Char): true iff `Char'
% is "graphic token char" (ISO Prolog 6.4.2).
% This is exported for use by term_io.quote_atom.
%
:- pred graphic_token_char(char::in) is semidet.
%-----------------------------------------------------------------------------%
:- implementation.
:- import_module int.
:- import_module list.
:- import_module require.
:- import_module string.
:- import_module term.
% Note that there are two implementations of most predicates here:
% one which deals with strings, the other that deals with io.states.
% We can't write the io.state version in terms of the string
% version because we don't know how much string to slurp up
% until after we've lexically analysed it. Some interactive
% applications require the old Prolog behaviour of stopping
% after an end token (i.e. `.' plus whitespace) rather than
% reading in whole lines.
% Conversely, we can't write the string version using the io.state
% version, since that would require either cheating with the io.state
% or ruining the string interface.
%
% An alternative would be to write both versions in terms
% of a generic "char_stream" typeclass, with instances
% for io.states and for strings.
% However, for this to be acceptably efficient it would
% require the compiler to specialize the code, which
% currently (13 May 98) it is not capable of doing.
%
% In fact, the string version is still not as efficient as I would
% like. The compiler ought to (but currently doesn't) unfold all
% the instances of the `posn' type. We could do this type unfolding
% by hand, but it would be very tedious and it would make the code
% less readable. If/when there is compiler support for this, we
% should also think about moving the `String' and `Len' arguments
% into the posn (or making a new `lexer_state' struct which contains
% both the posn and the String and Len arguments).
%-----------------------------------------------------------------------------%
token_to_string(name(Name), String) :-
string.append_list(["token '", Name, "'"], String).
token_to_string(variable(Var), String) :-
string.append_list(["variable `", Var, "'"], String).
token_to_string(integer(Int), String) :-
string.int_to_string(Int, IntString),
string.append_list(["integer `", IntString, "'"], String).
token_to_string(float(Float), String) :-
string.float_to_string(Float, FloatString),
string.append_list(["float `", FloatString, "'"], String).
token_to_string(string(Token), String) :-
string.append_list(["string """, Token, """"], String).
token_to_string(open, "token ` ('").
token_to_string(open_ct, "token `('").
token_to_string(close, "token `)'").
token_to_string(open_list, "token `['").
token_to_string(close_list, "token `]'").
token_to_string(open_curly, "token `{'").
token_to_string(close_curly, "token `}'").
token_to_string(ht_sep, "token `|'").
token_to_string(comma, "token `,'").
token_to_string(end, "token `. '").
token_to_string(eof, "end-of-file").
token_to_string(junk(JunkChar), String) :-
char.to_int(JunkChar, Code),
string.int_to_base_string(Code, 16, Hex),
string.append_list(["illegal character <<0x", Hex, ">>"], String).
token_to_string(io_error(IO_Error), String) :-
io.error_message(IO_Error, IO_ErrorMessage),
string.append("I/O error: ", IO_ErrorMessage, String).
token_to_string(error(Message), String) :-
string.append_list(["illegal token (", Message, ")"], String).
token_to_string(integer_dot(Int), String) :-
string.int_to_string(Int, IntString),
string.append_list(["integer `", IntString, "'."], String).
% We build the tokens up as lists of characters in reverse order.
% When we get to the end of each token, we call
% `rev_char_list_to_string/2' to convert that representation
% into a string.
% Comments of the form
% foo --> bar . baz
% mean that we are parsing a `foo', and we've already scanned
% past the `bar', so now we need to match with a `baz'.
get_token_list(Tokens, !IO) :-
get_token(Token, Context, !IO),
get_token_list_2(Token, Context, Tokens, !IO).
:- pred get_token_list_2(token::in, token_context::in, token_list::out,
io::di, io::uo) is det.
get_token_list_2(Token0, Context0, Tokens, !IO) :-
( Token0 = eof ->
Tokens = token_nil
; ( Token0 = end ; Token0 = error(_) ; Token0 = io_error(_) ) ->
Tokens = token_cons(Token0, Context0, token_nil)
; Token0 = integer_dot(Int) ->
get_context(Context1, !IO),
get_dot(Token1, !IO),
get_token_list_2(Token1, Context1, Tokens1, !IO),
Tokens = token_cons(integer(Int), Context0, Tokens1)
;
get_token(Token1, Context1, !IO),
get_token_list_2(Token1, Context1, Tokens1, !IO),
Tokens = token_cons(Token0, Context0, Tokens1)
).
string_get_token_list(String, Tokens, !Posn) :-
string.length(String, Len),
string_get_token_list(String, Len, Tokens, !Posn).
string_get_token_list(String, Len, Tokens, !Posn) :-
string_get_token(String, Len, Token, Context, !Posn),
( Token = eof ->
Tokens = token_nil
; ( Token = end ; Token = error(_) ; Token = io_error(_) ) ->
Tokens = token_cons(Token, Context, token_nil)
;
Tokens = token_cons(Token, Context, Tokens1),
string_get_token_list(String, Len, Tokens1, !Posn)
).
%-----------------------------------------------------------------------------%
%
% Some low-level routines.
:- pred get_context(token_context::out, io::di, io::uo) is det.
get_context(Context, !IO) :-
io.get_line_number(Context, !IO).
:- type string_token_context == token_context.
:- pred string_get_context(posn::in, string_token_context::out,
posn::in, posn::out) is det.
string_get_context(StartPosn, Context, !Posn) :-
StartPosn = posn(StartLineNum, _, _),
Context = StartLineNum.
% In future, we might want to modify this code to read something
% like this:
%
% posn_to_line_and_column(StartPosn, StartLineNum, StartColumn),
% posn_to_line_and_column(!.Posn, EndLineNum, EndColumn),
% Context = detailed(StartLine, StartColumn, EndLine, EndColumn).
:- pred string_read_char(string::in, int::in, char::out,
posn::in, posn::out) is semidet.
:- pragma inline(string_read_char/5).
string_read_char(String, Len, Char, Posn0, Posn) :-
Posn0 = posn(LineNum0, LineOffset0, Offset0),
Offset0 < Len,
string.unsafe_index(String, Offset0, Char),
Offset = Offset0 + 1,
( Char = '\n' ->
LineNum = LineNum0 + 1,
Posn = posn(LineNum, Offset, Offset)
;
Posn = posn(LineNum0, LineOffset0, Offset)
).
:- pred string_ungetchar(string::in, posn::in, posn::out) is det.
string_ungetchar(String, Posn0, Posn) :-
Posn0 = posn(LineNum0, LineOffset0, Offset0),
Offset = Offset0 - 1,
string.unsafe_index(String, Offset, Char),
( Char = '\n' ->
LineNum = LineNum0 - 1,
Posn = posn(LineNum, Offset, Offset)
;
Posn = posn(LineNum0, LineOffset0, Offset)
).
:- pred grab_string(string::in, posn::in, string::out,
posn::in, posn::out) is det.
grab_string(String, Posn0, SubString, Posn, Posn) :-
Posn0 = posn(_, _, Offset0),
Posn = posn(_, _, Offset),
Count = Offset - Offset0,
string.unsafe_substring(String, Offset0, Count, SubString).
:- pred string_set_line_number(int::in, posn::in, posn::out) is det.
string_set_line_number(LineNumber, Posn0, Posn) :-
Posn0 = posn(_, _, Offset),
Posn = posn(LineNumber, Offset, Offset).
%-----------------------------------------------------------------------------%
:- pred get_token(token::out, token_context::out, io::di, io::uo)
is det.
get_token(Token, Context, !IO) :-
io.read_char(Result, !IO),
(
Result = error(Error),
get_context(Context, !IO),
Token = io_error(Error)
;
Result = eof,
get_context(Context, !IO),
Token = eof
;
Result = ok(Char),
( char.is_whitespace(Char) ->
get_token_2(Token, Context, !IO)
; ( char.is_upper(Char) ; Char = '_' ) ->
get_context(Context, !IO),
get_variable([Char], Token, !IO)
; char.is_lower(Char) ->
get_context(Context, !IO),
get_name([Char], Token, !IO)
; Char = '0' ->
get_context(Context, !IO),
get_zero(Token, !IO)
; char.is_digit(Char) ->
get_context(Context, !IO),
get_number([Char], Token, !IO)
; special_token(Char, SpecialToken) ->
get_context(Context, !IO),
( SpecialToken = open ->
Token = open_ct
;
Token = SpecialToken
)
; Char = ('.') ->
get_context(Context, !IO),
get_dot(Token, !IO)
; Char = ('%') ->
skip_to_eol(Token, Context, !IO)
; ( Char = '"' ; Char = '''' ) ->
get_context(Context, !IO),
get_quoted_name(Char, [], Token, !IO)
; Char = ('/') ->
get_slash(Token, Context, !IO)
; Char = ('#') ->
get_source_line_number([], Token, Context, !IO)
; Char = ('`') ->
get_context(Context, !IO),
Token = name("`")
; graphic_token_char(Char) ->
get_context(Context, !IO),
get_graphic([Char], Token, !IO)
;
get_context(Context, !IO),
Token = junk(Char)
)
).
:- pred string_get_token(string::in, int::in, token::out,
token_context::out, posn::in, posn::out) is det.
string_get_token(String, Len, Token, Context, !Posn) :-
Posn0 = !.Posn,
( string_read_char(String, Len, Char, !Posn) ->
( char.is_whitespace(Char) ->
string_get_token_2(String, Len, Token, Context, !Posn)
; ( char.is_upper(Char) ; Char = '_' ) ->
string_get_variable(String, Len, Posn0, Token, Context, !Posn)
; char.is_lower(Char) ->
string_get_name(String, Len, Posn0, Token, Context, !Posn)
; Char = '0' ->
string_get_zero(String, Len, Posn0, Token, Context, !Posn)
; char.is_digit(Char) ->
string_get_number(String, Len, Posn0, Token, Context, !Posn)
; special_token(Char, SpecialToken) ->
string_get_context(Posn0, Context, !Posn),
( SpecialToken = open ->
Token = open_ct
;
Token = SpecialToken
)
; Char = ('.') ->
string_get_dot(String, Len, Posn0, Token, Context, !Posn)
; Char = ('%') ->
string_skip_to_eol(String, Len, Token, Context, !Posn)
; ( Char = '"' ; Char = '''' ) ->
string_get_quoted_name(String, Len, Char, [], Posn0, Token,
Context, !Posn)
; Char = ('/') ->
string_get_slash(String, Len, Posn0, Token, Context, !Posn)
; Char = ('#') ->
string_get_source_line_number(String, Len, !.Posn, Token, Context,
!Posn)
; Char = ('`') ->
string_get_context(Posn0, Context, !Posn),
Token = name("`")
; graphic_token_char(Char) ->
string_get_graphic(String, Len, Posn0, Token, Context, !Posn)
;
string_get_context(Posn0, Context, !Posn),
Token = junk(Char)
)
;
string_get_context(Posn0, Context, !Posn),
Token = eof
).
%-----------------------------------------------------------------------------%
% This is just like get_token, except that we have already scanned past
% some whitespace, so '(' gets scanned as `open' rather than `open_ct'.
%
:- pred get_token_2(token::out, token_context::out, io::di, io::uo)
is det.
get_token_2(Token, Context, !IO) :-
io.read_char(Result, !IO),
(
Result = error(Error),
get_context(Context, !IO),
Token = io_error(Error)
;
Result = eof,
get_context(Context, !IO),
Token = eof
;
Result = ok(Char),
( char.is_whitespace(Char) ->
get_token_2(Token, Context, !IO)
; ( char.is_upper(Char) ; Char = '_' ) ->
get_context(Context, !IO),
get_variable([Char], Token, !IO)
; char.is_lower(Char) ->
get_context(Context, !IO),
get_name([Char], Token, !IO)
; Char = '0' ->
get_context(Context, !IO),
get_zero(Token, !IO)
; char.is_digit(Char) ->
get_context(Context, !IO),
get_number([Char], Token, !IO)
; special_token(Char, SpecialToken) ->
get_context(Context, !IO),
Token = SpecialToken
; Char = ('.') ->
get_context(Context, !IO),
get_dot(Token, !IO)
; Char = ('%') ->
skip_to_eol(Token, Context, !IO)
; ( Char = '"' ; Char = '''' ) ->
get_context(Context, !IO),
get_quoted_name(Char, [], Token, !IO)
; Char = ('/') ->
get_slash(Token, Context, !IO)
; Char = ('#') ->
get_source_line_number([], Token, Context, !IO)
; Char = ('`') ->
get_context(Context, !IO),
Token = name("`")
; graphic_token_char(Char) ->
get_context(Context, !IO),
get_graphic([Char], Token, !IO)
;
get_context(Context, !IO),
Token = junk(Char)
)
).
:- pred string_get_token_2(string::in, int::in, token::out,
token_context::out, posn::in, posn::out) is det.
string_get_token_2(String, Len, Token, Context, !Posn) :-
Posn0 = !.Posn,
( string_read_char(String, Len, Char, !Posn) ->
( char.is_whitespace(Char) ->
string_get_token_2(String, Len, Token, Context, !Posn)
; ( char.is_upper(Char) ; Char = '_' ) ->
string_get_variable(String, Len, Posn0, Token, Context, !Posn)
; char.is_lower(Char) ->
string_get_name(String, Len, Posn0, Token, Context, !Posn)
; Char = '0' ->
string_get_zero(String, Len, Posn0, Token, Context, !Posn)
; char.is_digit(Char) ->
string_get_number(String, Len, Posn0, Token, Context, !Posn)
; special_token(Char, SpecialToken) ->
string_get_context(Posn0, Context, !Posn),
Token = SpecialToken
; Char = ('.') ->
string_get_dot(String, Len, Posn0, Token, Context, !Posn)
; Char = ('%') ->
string_skip_to_eol(String, Len, Token, Context, !Posn)
; ( Char = '"' ; Char = '''' ) ->
string_get_quoted_name(String, Len, Char, [], Posn0, Token,
Context, !Posn)
; Char = ('/') ->
string_get_slash(String, Len, Posn0, Token, Context, !Posn)
; Char = ('#') ->
string_get_source_line_number(String, Len, !.Posn, Token, Context,
!Posn)
; Char = ('`') ->
string_get_context(Posn0, Context, !Posn),
Token = name("`")
; graphic_token_char(Char) ->
string_get_graphic(String, Len, Posn0, Token, Context, !Posn)
;
string_get_context(Posn0, Context, !Posn),
Token = junk(Char)
)
;
string_get_context(Posn0, Context, !Posn),
Token = eof
).
%-----------------------------------------------------------------------------%
:- pred special_token(char::in, token::out) is semidet.
special_token('(', open). % May get converted to open_ct
special_token(')', close).
special_token('[', open_list).
special_token(']', close_list).
special_token('{', open_curly).
special_token('}', close_curly).
special_token('|', ht_sep).
special_token(',', comma).
special_token(';', name(";")).
graphic_token_char('!').
graphic_token_char('#').
graphic_token_char('$').
graphic_token_char('&').
graphic_token_char('*').
graphic_token_char('+').
graphic_token_char('-').
graphic_token_char('.').
graphic_token_char('/').
graphic_token_char(':').
graphic_token_char('<').
graphic_token_char('=').
graphic_token_char('>').
graphic_token_char('?').
graphic_token_char('@').
graphic_token_char('^').
graphic_token_char('~').
graphic_token_char('\\').
%-----------------------------------------------------------------------------%
:- pred get_dot(token::out, io::di, io::uo) is det.
get_dot(Token, !IO) :-
io.read_char(Result, !IO),
(
Result = error(Error),
Token = io_error(Error)
;
Result = eof,
Token = end
;
Result = ok(Char),
( whitespace_after_dot(Char) ->
io.putback_char(Char, !IO),
Token = end
; graphic_token_char(Char) ->
get_graphic([Char, '.'], Token, !IO)
;
io.putback_char(Char, !IO),
Token = name(".")
)
).
:- pred string_get_dot(string::in, int::in, posn::in, token::out,
string_token_context::out, posn::in, posn::out) is det.
string_get_dot(String, Len, Posn0, Token, Context, !Posn) :-
( string_read_char(String, Len, Char, !Posn) ->
( whitespace_after_dot(Char) ->
string_ungetchar(String, !Posn),
string_get_context(Posn0, Context, !Posn),
Token = end
; graphic_token_char(Char) ->
string_get_graphic(String, Len, Posn0, Token, Context, !Posn)
;
string_ungetchar(String, !Posn),
string_get_context(Posn0, Context, !Posn),
Token = name(".")
)
;
string_get_context(Posn0, Context, !Posn),
Token = end
).
:- pred whitespace_after_dot(char::in) is semidet.
whitespace_after_dot(Char) :-
( char.is_whitespace(Char)
; Char = '%'
).
%-----------------------------------------------------------------------------%
%
% Comments.
:- pred skip_to_eol(token::out, token_context::out, io::di, io::uo)
is det.
skip_to_eol(Token, Context, !IO) :-
io.read_char(Result, !IO),
(
Result = error(Error),
get_context(Context, !IO),
Token = io_error(Error)
;
Result = eof,
get_context(Context, !IO),
Token = eof
;
Result = ok(Char),
( Char = '\n' ->
get_token_2(Token, Context, !IO)
;
skip_to_eol(Token, Context, !IO)
)
).
:- pred string_skip_to_eol(string::in, int::in, token::out,
token_context::out, posn::in, posn::out) is det.
string_skip_to_eol(String, Len, Token, Context, !Posn) :-
( string_read_char(String, Len, Char, !Posn) ->
( Char = '\n' ->
string_get_token_2(String, Len, Token, Context, !Posn)
;
string_skip_to_eol(String, Len, Token, Context, !Posn)
)
;
string_get_context(!.Posn, Context, !Posn),
Token = eof
).
:- pred get_slash(token::out, token_context::out, io::di, io::uo) is det.
get_slash(Token, Context, !IO) :-
io.read_char(Result, !IO),
(
Result = error(Error),
get_context(Context, !IO),
Token = io_error(Error)
;
Result = eof,
get_context(Context, !IO),
Token = name("/")
;
Result = ok(Char),
( Char = ('*') ->
get_comment(Token, Context, !IO)
; graphic_token_char(Char) ->
get_context(Context, !IO),
get_graphic([Char, '/'], Token, !IO)
;
io.putback_char(Char, !IO),
get_context(Context, !IO),
Token = name("/")
)
).
:- pred string_get_slash(string::in, int::in, posn::in, token::out,
string_token_context::out, posn::in, posn::out) is det.
string_get_slash(String, Len, Posn0, Token, Context, !Posn) :-
( string_read_char(String, Len, Char, !Posn) ->
( Char = ('*') ->
string_get_comment(String, Len, Posn0, Token, Context, !Posn)
; graphic_token_char(Char) ->
string_get_graphic(String, Len, Posn0, Token, Context, !Posn)
;
string_ungetchar(String, !Posn),
string_get_context(Posn0, Context, !Posn),
Token = name("/")
)
;
string_get_context(Posn0, Context, !Posn),
Token = name("/")
).
:- pred get_comment(token::out, token_context::out,
io::di, io::uo) is det.
get_comment(Token, Context, !IO) :-
io.read_char(Result, !IO),
(
Result = error(Error),
get_context(Context, !IO),
Token = io_error(Error)
;
Result = eof,
get_context(Context, !IO),
Token = error("unterminated '/*' comment")
;
Result = ok(Char),
( Char = ('*') ->
get_comment_2(Token, Context, !IO)
;
get_comment(Token, Context, !IO)
)
).
:- pred string_get_comment(string::in, int::in, posn::in, token::out,
string_token_context::out, posn::in, posn::out) is det.
string_get_comment(String, Len, Posn0, Token, Context, !Posn) :-
( string_read_char(String, Len, Char, !Posn) ->
( Char = ('*') ->
string_get_comment_2(String, Len, Posn0, Token, Context, !Posn)
;
string_get_comment(String, Len, Posn0, Token, Context, !Posn)
)
;
string_get_context(Posn0, Context, !Posn),
Token = error("unterminated '/*' comment")
).
:- pred get_comment_2(token::out, token_context::out, io::di, io::uo) is det.
get_comment_2(Token, Context, !IO) :-
io.read_char(Result, !IO),
(
Result = error(Error),
get_context(Context, !IO),
Token = io_error(Error)
;
Result = eof,
get_context(Context, !IO),
Token = error("unterminated '/*' comment")
;
Result = ok(Char),
( Char = ('/') ->
% end of /* ... */ comment, so get next token
get_token_2(Token, Context, !IO)
; Char = ('*') ->
get_comment_2(Token, Context, !IO)
;
get_comment(Token, Context, !IO)
)
).
:- pred string_get_comment_2(string::in, int::in, posn::in, token::out,
string_token_context::out, posn::in, posn::out) is det.
string_get_comment_2(String, Len, Posn0, Token, Context, !Posn) :-
( string_read_char(String, Len, Char, !Posn) ->
( Char = ('/') ->
% end of /* ... */ comment, so get next token
string_get_token_2(String, Len, Token, Context, !Posn)
; Char = ('*') ->
string_get_comment_2(String, Len, Posn0, Token, Context, !Posn)
;
string_get_comment(String, Len, Posn0, Token, Context, !Posn)
)
;
string_get_context(Posn0, Context, !Posn),
Token = error("unterminated '/*' comment")
).
%-----------------------------------------------------------------------------%
%
% Quoted names and quoted strings.
:- pred get_quoted_name(char::in, list(char)::in, token::out,
io::di, io::uo) is det.
get_quoted_name(QuoteChar, Chars, Token, !IO) :-
io.read_char(Result, !IO),
(
Result = error(Error),
Token = io_error(Error)
;
Result = eof,
Token = error("unterminated quote")
;
Result = ok(Char),
( Char = QuoteChar ->
get_quoted_name_quote(QuoteChar, Chars, Token, !IO)
; Char = ('\\') ->
get_quoted_name_escape(QuoteChar, Chars, Token, !IO)
;
get_quoted_name(QuoteChar, [Char | Chars], Token, !IO)
)
).
:- pred string_get_quoted_name(string::in, int::in, char::in,
list(char)::in, posn::in, token::out, string_token_context::out,
posn::in, posn::out) is det.
string_get_quoted_name(String, Len, QuoteChar, Chars, Posn0, Token, Context,
!Posn) :-
( string_read_char(String, Len, Char, !Posn) ->
( Char = QuoteChar ->
string_get_quoted_name_quote(String, Len, QuoteChar, Chars,
Posn0, Token, Context, !Posn)
; Char = ('\\') ->
string_get_quoted_name_escape(String, Len, QuoteChar, Chars,
Posn0, Token, Context, !Posn)
;
string_get_quoted_name(String, Len, QuoteChar, [Char | Chars],
Posn0, Token, Context, !Posn)
)
;
string_get_context(Posn0, Context, !Posn),
Token = error("unterminated quote")
).
:- pred get_quoted_name_quote(char::in, list(char)::in, token::out,
io::di, io::uo) is det.
get_quoted_name_quote(QuoteChar, Chars, Token, !IO) :-
io.read_char(Result, !IO),
(
Result = error(Error),
Token = io_error(Error)
;
Result = eof,
finish_quoted_name(QuoteChar, Chars, Token)
;
Result = ok(Char),
( Char = QuoteChar ->
get_quoted_name(QuoteChar, [Char | Chars], Token, !IO)
;
io.putback_char(Char, !IO),
finish_quoted_name(QuoteChar, Chars, Token)
)
).
:- pred string_get_quoted_name_quote(string::in, int::in, char::in,
list(char)::in, posn::in, token::out, string_token_context::out,
posn::in, posn::out) is det.
string_get_quoted_name_quote(String, Len, QuoteChar, Chars, Posn0,
Token, Context, !Posn) :-
( string_read_char(String, Len, Char, !Posn) ->
( Char = QuoteChar ->
string_get_quoted_name(String, Len, QuoteChar, [Char | Chars],
Posn0, Token, Context, !Posn)
;
string_ungetchar(String, !Posn),
string_get_context(Posn0, Context, !Posn),
finish_quoted_name(QuoteChar, Chars, Token)
)
;
string_get_context(Posn0, Context, !Posn),
finish_quoted_name(QuoteChar, Chars, Token)
).
:- pred finish_quoted_name(char::in, list(char)::in, token::out) is det.
finish_quoted_name(QuoteChar, Chars, Token) :-
rev_char_list_to_string(Chars, String),
( QuoteChar = '''' ->
Token = name(String)
; QuoteChar = '"' ->
Token = string(String)
;
error("lexer.m: unknown quote character")
).
:- pred get_quoted_name_escape(char::in, list(char)::in, token::out,
io::di, io::uo) is det.
get_quoted_name_escape(QuoteChar, Chars, Token, !IO) :-
io.read_char(Result, !IO),
(
Result = error(Error),
Token = io_error(Error)
;
Result = eof,
Token = error("unterminated quoted name")
;
Result = ok(Char),
( Char = '\n' ->
get_quoted_name(QuoteChar, Chars, Token, !IO)
; Char = '\r' ->
% Files created on Windows may have an extra return character.
get_quoted_name_escape(QuoteChar, Chars, Token, !IO)
; escape_char(Char, EscapedChar) ->
Chars1 = [EscapedChar | Chars],
get_quoted_name(QuoteChar, Chars1, Token, !IO)
; Char = 'x' ->
get_hex_escape(QuoteChar, Chars, [], Token, !IO)
; char.is_octal_digit(Char) ->
get_octal_escape(QuoteChar, Chars, [Char], Token, !IO)
;
Token = error("invalid escape character")
)
).
:- pred string_get_quoted_name_escape(string::in, int::in, char::in,
list(char)::in, posn::in, token::out, string_token_context::out,
posn::in, posn::out) is det.
string_get_quoted_name_escape(String, Len, QuoteChar, Chars, Posn0,
Token, Context, !Posn) :-
( string_read_char(String, Len, Char, !Posn) ->
( Char = '\n' ->
string_get_quoted_name(String, Len, QuoteChar, Chars,
Posn0, Token, Context, !Posn)
; Char = '\r' ->
% Files created on Windows may have an extra return character.
string_get_quoted_name_escape(String, Len, QuoteChar, Chars,
Posn0, Token, Context, !Posn)
; escape_char(Char, EscapedChar) ->
Chars1 = [EscapedChar | Chars],
string_get_quoted_name(String, Len, QuoteChar, Chars1,
Posn0, Token, Context, !Posn)
; Char = 'x' ->
string_get_hex_escape(String, Len, QuoteChar, Chars, [],
Posn0, Token, Context, !Posn)
; char.is_octal_digit(Char) ->
string_get_octal_escape(String, Len, QuoteChar, Chars, [Char],
Posn0, Token, Context, !Posn)
;
string_get_context(!.Posn, Context, !Posn),
Token = error("invalid escape character")
)
;
string_get_context(Posn0, Context, !Posn),
Token = error("unterminated quoted name")
).
:- pred escape_char(char::in, char::out) is semidet.
escape_char('a', '\a').
escape_char('b', '\b').
escape_char('r', '\r').
escape_char('f', '\f').
escape_char('t', '\t').
escape_char('n', '\n').
escape_char('v', '\v').
escape_char('\\', '\\').
escape_char('''', '''').
escape_char('"', '"').
escape_char('`', '`').
:- pred get_hex_escape(char::in, list(char)::in, list(char)::in,
token::out, io::di, io::uo) is det.
get_hex_escape(QuoteChar, Chars, HexChars, Token, !IO) :-
io.read_char(Result, !IO),
(
Result = error(Error),
Token = io_error(Error)
;
Result = eof,
Token = error("unterminated quote")
;
Result = ok(Char),
( char.is_hex_digit(Char) ->
get_hex_escape(QuoteChar, Chars, [Char | HexChars], Token, !IO)
; Char = ('\\') ->
finish_hex_escape(QuoteChar, Chars, HexChars, Token, !IO)
;
Token = error("unterminated hex escape")
)
).
:- pred string_get_hex_escape(string::in, int::in, char::in,
list(char)::in, list(char)::in, posn::in, token::out,
string_token_context::out, posn::in, posn::out) is det.
string_get_hex_escape(String, Len, QuoteChar, Chars, HexChars, Posn0,
Token, Context, !Posn) :-
( string_read_char(String, Len, Char, !Posn) ->
( char.is_hex_digit(Char) ->
string_get_hex_escape(String, Len, QuoteChar, Chars,
[Char | HexChars], Posn0, Token, Context, !Posn)
; Char = ('\\') ->
string_finish_hex_escape(String, Len, QuoteChar, Chars,
HexChars, Posn0, Token, Context, !Posn)
;
string_get_context(Posn0, Context, !Posn),
Token = error("unterminated hex escape")
)
;
string_get_context(Posn0, Context, !Posn),
Token = error("unterminated quote")
).
:- pred finish_hex_escape(char::in, list(char)::in, list(char)::in,
token::out, io::di, io::uo) is det.
finish_hex_escape(QuoteChar, Chars, HexChars, Token, !IO) :-
( HexChars = [] ->
Token = error("empty hex escape")
;
rev_char_list_to_string(HexChars, HexString),
(
string.base_string_to_int(16, HexString, Int),
char.to_int(Char, Int)
->
get_quoted_name(QuoteChar, [Char|Chars], Token, !IO)
;
Token = error("invalid hex escape")
)
).
:- pred string_finish_hex_escape(string::in, int::in, char::in,
list(char)::in, list(char)::in, posn::in, token::out,
string_token_context::out, posn::in, posn::out) is det.
string_finish_hex_escape(String, Len, QuoteChar, Chars, HexChars, Posn0,
Token, Context, !Posn) :-
(
HexChars = [],
string_get_context(Posn0, Context, !Posn),
Token = error("empty hex escape")
;
HexChars = [_ | _],
rev_char_list_to_string(HexChars, HexString),
(
string.base_string_to_int(16, HexString, Int),
char.to_int(Char, Int)
->
string_get_quoted_name(String, Len, QuoteChar, [Char | Chars],
Posn0, Token, Context, !Posn)
;
string_get_context(Posn0, Context, !Posn),
Token = error("invalid hex escape")
)
).
:- pred get_octal_escape(char::in, list(char)::in, list(char)::in,
token::out, io::di, io::uo) is det.
get_octal_escape(QuoteChar, Chars, OctalChars, Token, !IO) :-
io.read_char(Result, !IO),
(
Result = error(Error),
Token = io_error(Error)
;
Result = eof,
Token = error("unterminated quote")
;
Result = ok(Char),
( char.is_octal_digit(Char) ->
get_octal_escape(QuoteChar, Chars, [Char | OctalChars], Token, !IO)
; Char = ('\\') ->
finish_octal_escape(QuoteChar, Chars,
OctalChars, Token, !IO)
;
% XXX We don't report this as an error since we need bug-for-bug
% compatibility with NU-Prolog.
% Token = error("unterminated octal escape")
io.putback_char(Char, !IO),
finish_octal_escape(QuoteChar, Chars, OctalChars, Token, !IO)
)
).
:- pred string_get_octal_escape(string::in, int::in, char::in,
list(char)::in, list(char)::in, posn::in, token::out,
string_token_context::out, posn::in, posn::out) is det.
string_get_octal_escape(String, Len, QuoteChar, Chars, OctalChars,
Posn0, Token, Context, !Posn) :-
( string_read_char(String, Len, Char, !Posn) ->
( char.is_octal_digit(Char) ->
string_get_octal_escape(String, Len, QuoteChar, Chars,
[Char | OctalChars], Posn0, Token, Context, !Posn)
; Char = ('\\') ->
string_finish_octal_escape(String, Len, QuoteChar, Chars,
OctalChars, Posn0, Token, Context, !Posn)
;
% XXX We don't report this as an error since we need bug-for-bug
% compatibility with NU-Prolog.
% Token = error("unterminated octal escape")
string_ungetchar(String, !Posn),
string_finish_octal_escape(String, Len, QuoteChar, Chars,
OctalChars, Posn0, Token, Context, !Posn)
)
;
Token = error("unterminated quote"),
string_get_context(Posn0, Context, !Posn)
).
:- pred finish_octal_escape(char::in, list(char)::in, list(char)::in,
token::out, io::di, io::uo) is det.
finish_octal_escape(QuoteChar, Chars, OctalChars, Token, !IO) :-
( OctalChars = [] ->
Token = error("empty octal escape")
;
rev_char_list_to_string(OctalChars, OctalString),
(
string.base_string_to_int(8, OctalString, Int),
char.to_int(Char, Int)
->
get_quoted_name(QuoteChar, [Char | Chars], Token, !IO)
;
Token = error("invalid octal escape")
)
).
:- pred string_finish_octal_escape(string::in, int::in, char::in,
list(char)::in, list(char)::in, posn::in, token::out,
string_token_context::out, posn::in, posn::out) is det.
string_finish_octal_escape(String, Len, QuoteChar, Chars, OctalChars,
Posn0, Token, Context, !Posn) :-
(
OctalChars = [],
Token = error("empty octal escape"),
string_get_context(Posn0, Context, !Posn)
;
OctalChars = [_ | _],
rev_char_list_to_string(OctalChars, OctalString),
(
string.base_string_to_int(8, OctalString, Int),
char.to_int(Char, Int)
->
string_get_quoted_name(String, Len, QuoteChar, [Char | Chars],
Posn0, Token, Context, !Posn)
;
Token = error("invalid octal escape"),
string_get_context(Posn0, Context, !Posn)
)
).
%-----------------------------------------------------------------------------%
%
% Names and variables.
:- pred get_name(list(char)::in, token::out, io::di, io::uo) is det.
get_name(Chars, Token, !IO) :-
io.read_char(Result, !IO),
(
Result = error(Error),
Token = io_error(Error)
;
Result = eof,
rev_char_list_to_string(Chars, Name),
Token = name(Name)
;
Result = ok(Char),
( char.is_alnum_or_underscore(Char) ->
get_name([Char | Chars], Token, !IO)
;
io.putback_char(Char, !IO),
rev_char_list_to_string(Chars, Name),
Token = name(Name)
)
).
:- pred string_get_name(string::in, int::in, posn::in, token::out,
string_token_context::out, posn::in, posn::out) is det.
string_get_name(String, Len, Posn0, Token, Context, !Posn) :-
( string_read_char(String, Len, Char, !Posn) ->
( char.is_alnum_or_underscore(Char) ->
string_get_name(String, Len, Posn0, Token, Context, !Posn)
;
string_ungetchar(String, !Posn),
grab_string(String, Posn0, Name, !Posn),
Token = name(Name),
string_get_context(Posn0, Context, !Posn)
)
;
grab_string(String, Posn0, Name, !Posn),
Token = name(Name),
string_get_context(Posn0, Context, !Posn)
).
% A line number directive token is `#' followed by an integer
% (specifying the line number) followed by a newline.
% Such a token sets the source line number for the next
% line, but it is otherwise ignored. This means that line number
% directives may appear anywhere that a token may appear, including
% in the middle of terms.
% (The source file name can be set with a `:- pragma source_file'
% declaration.)
%
:- pred get_source_line_number(list(char)::in, token::out,
token_context::out, io::di, io::uo) is det.
get_source_line_number(Chars, Token, Context, !IO) :-
io.read_char(Result, !IO),
(
Result = error(Error),
get_context(Context, !IO),
Token = io_error(Error)
;
Result = eof,
get_context(Context, !IO),
Token = error("unexpected end-of-file in `#' line number directive")
;
Result = ok(Char),
( char.is_digit(Char) ->
get_source_line_number([Char | Chars], Token, Context, !IO)
; Char = '\n' ->
rev_char_list_to_string(Chars, String),
(
string.base_string_to_int(10, String, Int),
Int > 0
->
io.set_line_number(Int, !IO),
get_token(Token, Context, !IO)
;
get_context(Context, !IO),
string.append_list(["invalid line number `", String,
"' in `#' line number directive"], Message),
Token = error(Message)
)
;
get_context(Context, !IO),
string.from_char_list([Char], String),
string.append_list(["invalid character `", String,
"' in `#' line number directive"], Message),
Token = error(Message)
)
).
:- pred string_get_source_line_number(string::in, int::in, posn::in,
token::out, token_context::out, posn::in, posn::out) is det.
string_get_source_line_number(String, Len, Posn1, Token, Context, !Posn) :-
( string_read_char(String, Len, Char, !Posn) ->
( char.is_digit(Char) ->
string_get_source_line_number(String, Len, Posn1, Token, Context,
!Posn)
; Char = '\n' ->
grab_string(String, Posn1, LineNumString, !Posn),
(
string.base_string_to_int(10, LineNumString, LineNum),
LineNum > 0
->
string_set_line_number(LineNum, !Posn),
string_get_token(String, Len, Token, Context, !Posn)
;
string_get_context(Posn1, Context, !Posn),
string.append_list(["invalid line number `", LineNumString,
"' in `#' line number directive"], Message),
Token = error(Message)
)
;
string_get_context(Posn1, Context, !Posn),
string.from_char_list([Char], DirectiveString),
string.append_list(["invalid character `", DirectiveString,
"' in `#' line number directive"], Message),
Token = error(Message)
)
;
string_get_context(Posn1, Context, !Posn),
Token = error("unexpected end-of-file in `#' line number directive")
).
:- pred get_graphic(list(char)::in, token::out, io::di, io::uo) is det.
get_graphic(Chars, Token, !IO) :-
io.read_char(Result, !IO),
(
Result = error(Error),
Token = io_error(Error)
;
Result = eof,
rev_char_list_to_string(Chars, Name),
Token = name(Name)
;
Result = ok(Char),
( graphic_token_char(Char) ->
get_graphic([Char | Chars], Token, !IO)
;
io.putback_char(Char, !IO),
rev_char_list_to_string(Chars, Name),
Token = name(Name)
)
).
:- pred string_get_graphic(string::in, int::in, posn::in, token::out,
string_token_context::out, posn::in, posn::out) is det.
string_get_graphic(String, Len, Posn0, Token, Context, !Posn) :-
( string_read_char(String, Len, Char, !Posn) ->
( graphic_token_char(Char) ->
string_get_graphic(String, Len, Posn0, Token, Context, !Posn)
;
string_ungetchar(String, !Posn),
grab_string(String, Posn0, Name, !Posn),
Token = name(Name),
string_get_context(Posn0, Context, !Posn)
)
;
grab_string(String, Posn0, Name, !Posn),
string_get_context(Posn0, Context, !Posn),
Token = name(Name)
).
:- pred get_variable(list(char)::in, token::out, io::di, io::uo) is det.
get_variable(Chars, Token, !IO) :-
io.read_char(Result, !IO),
(
Result = error(Error),
Token = io_error(Error)
;
Result = eof,
rev_char_list_to_string(Chars, VariableName),
Token = variable(VariableName)
;
Result = ok(Char),
( char.is_alnum_or_underscore(Char) ->
get_variable([Char | Chars], Token, !IO)
;
io.putback_char(Char, !IO),
rev_char_list_to_string(Chars, VariableName),
Token = variable(VariableName)
)
).
:- pred string_get_variable(string::in, int::in, posn::in, token::out,
string_token_context::out, posn::in, posn::out) is det.
string_get_variable(String, Len, Posn0, Token, Context, !Posn) :-
( string_read_char(String, Len, Char, !Posn) ->
( char.is_alnum_or_underscore(Char) ->
string_get_variable(String, Len, Posn0, Token, Context, !Posn)
;
string_ungetchar(String, !Posn),
grab_string(String, Posn0, VariableName, !Posn),
Token = variable(VariableName),
string_get_context(Posn0, Context, !Posn)
)
;
grab_string(String, Posn0, VariableName, !Posn),
Token = variable(VariableName),
string_get_context(Posn0, Context, !Posn)
).
%-----------------------------------------------------------------------------%
%
% Integer and float literals.
:- pred get_zero(token::out, io::di, io::uo) is det.
get_zero(Token, !IO) :-
io.read_char(Result, !IO),
(
Result = error(Error),
Token = io_error(Error)
;
Result = eof,
Token = integer(0)
;
Result = ok(Char),
( char.is_digit(Char) ->
get_number([Char], Token, !IO)
; Char = '''' ->
get_char_code(Token, !IO)
; Char = 'b' ->
get_binary(Token, !IO)
; Char = 'o' ->
get_octal(Token, !IO)
; Char = 'x' ->
get_hex(Token, !IO)
; Char = ('.') ->
get_int_dot(['0'], Token, !IO)
; ( Char = 'e' ; Char = 'E' ) ->
get_float_exponent([Char, '0'], Token, !IO)
;
io.putback_char(Char, !IO),
Token = integer(0)
)
).
:- pred string_get_zero(string::in, int::in, posn::in, token::out,
string_token_context::out, posn::in, posn::out) is det.
string_get_zero(String, Len, Posn0, Token, Context, !Posn) :-
( string_read_char(String, Len, Char, !Posn) ->
( char.is_digit(Char) ->
string_get_number(String, Len, Posn0, Token, Context, !Posn)
; Char = '''' ->
string_get_char_code(String, Len, Posn0, Token, Context, !Posn)
; Char = 'b' ->
string_get_binary(String, Len, Posn0, Token, Context, !Posn)
; Char = 'o' ->
string_get_octal(String, Len, Posn0, Token, Context, !Posn)
; Char = 'x' ->
string_get_hex(String, Len, Posn0, Token, Context, !Posn)
; Char = ('.') ->
string_get_int_dot(String, Len, Posn0, Token, Context, !Posn)
; ( Char = 'e' ; Char = 'E' ) ->
string_get_float_exponent(String, Len, Posn0, Token, Context,
!Posn)
;
string_ungetchar(String, !Posn),
string_get_context(Posn0, Context, !Posn),
Token = integer(0)
)
;
string_get_context(Posn0, Context, !Posn),
Token = integer(0)
).
:- pred get_char_code(token::out, io::di, io::uo) is det.
get_char_code(Token, !IO) :-
io.read_char(Result, !IO),
(
Result = error(Error),
Token = io_error(Error)
;
Result = eof,
Token = error("unterminated char code constant")
;
Result = ok(Char),
char.to_int(Char, CharCode),
Token = integer(CharCode)
).
:- pred string_get_char_code(string::in, int::in, posn::in, token::out,
string_token_context::out, posn::in, posn::out) is det.
string_get_char_code(String, Len, Posn0, Token, Context, !Posn) :-
( string_read_char(String, Len, Char, !Posn) ->
char.to_int(Char, CharCode),
Token = integer(CharCode),
string_get_context(Posn0, Context, !Posn)
;
Token = error("unterminated char code constant"),
string_get_context(Posn0, Context, !Posn)
).
:- pred get_binary(token::out, io::di, io::uo) is det.
get_binary(Token, !IO) :-
io.read_char(Result, !IO),
(
Result = error(Error),
Token = io_error(Error)
;
Result = eof,
Token = error("unterminated binary constant")
;
Result = ok(Char),
( char.is_binary_digit(Char) ->
get_binary_2([Char], Token, !IO)
;
io.putback_char(Char, !IO),
Token = error("unterminated binary constant")
)
).
:- pred string_get_binary(string::in, int::in, posn::in, token::out,
string_token_context::out, posn::in, posn::out) is det.
string_get_binary(String, Len, Posn0, Token, Context, !Posn) :-
( string_read_char(String, Len, Char, !Posn) ->
( char.is_binary_digit(Char) ->
string_get_binary_2(String, Len, Posn0, Token, Context, !Posn)
;
string_ungetchar(String, !Posn),
Token = error("unterminated binary constant"),
string_get_context(Posn0, Context, !Posn)
)
;
Token = error("unterminated binary constant"),
string_get_context(Posn0, Context, !Posn)
).
:- pred get_binary_2(list(char)::in, token::out, io::di, io::uo) is det.
get_binary_2(Chars, Token, !IO) :-
io.read_char(Result, !IO),
(
Result = error(Error),
Token = io_error(Error)
;
Result = eof,
rev_char_list_to_int(Chars, 2, Token)
;
Result = ok(Char),
( char.is_binary_digit(Char) ->
get_binary_2([Char | Chars], Token, !IO)
;
io.putback_char(Char, !IO),
rev_char_list_to_int(Chars, 2, Token)
)
).
:- pred string_get_binary_2(string::in, int::in, posn::in, token::out,
string_token_context::out, posn::in, posn::out) is det.
string_get_binary_2(String, Len, Posn0, Token, Context, !Posn) :-
( string_read_char(String, Len, Char, !Posn) ->
( char.is_binary_digit(Char) ->
string_get_binary_2(String, Len, Posn0, Token, Context, !Posn)
;
string_ungetchar(String, !Posn),
grab_string(String, Posn0, BinaryString, !Posn),
conv_string_to_int(BinaryString, 2, Token),
string_get_context(Posn0, Context, !Posn)
)
;
grab_string(String, Posn0, BinaryString, !Posn),
conv_string_to_int(BinaryString, 2, Token),
string_get_context(Posn0, Context, !Posn)
).
:- pred get_octal(token::out, io::di, io::uo) is det.
get_octal(Token, !IO) :-
io.read_char(Result, !IO),
(
Result = error(Error),
Token = io_error(Error)
;
Result = eof,
Token = error("unterminated octal constant")
;
Result = ok(Char),
( char.is_octal_digit(Char) ->
get_octal_2([Char], Token, !IO)
;
io.putback_char(Char, !IO),
Token = error("unterminated octal constant")
)
).
:- pred string_get_octal(string::in, int::in, posn::in, token::out,
string_token_context::out,
posn::in, posn::out) is det.
string_get_octal(String, Len, Posn0, Token, Context, !Posn) :-
( string_read_char(String, Len, Char, !Posn) ->
( char.is_octal_digit(Char) ->
string_get_octal_2(String, Len, Posn0, Token, Context, !Posn)
;
string_ungetchar(String, !Posn),
Token = error("unterminated octal constant"),
string_get_context(Posn0, Context, !Posn)
)
;
Token = error("unterminated octal constant"),
string_get_context(Posn0, Context, !Posn)
).
:- pred get_octal_2(list(char)::in, token::out, io::di, io::uo) is det.
get_octal_2(Chars, Token, !IO) :-
io.read_char(Result, !IO),
(
Result = error(Error),
Token = io_error(Error)
;
Result = eof,
rev_char_list_to_int(Chars, 8, Token)
;
Result = ok(Char),
( char.is_octal_digit(Char) ->
get_octal_2([Char | Chars], Token, !IO)
;
io.putback_char(Char, !IO),
rev_char_list_to_int(Chars, 8, Token)
)
).
:- pred string_get_octal_2(string::in, int::in, posn::in, token::out,
string_token_context::out, posn::in, posn::out) is det.
string_get_octal_2(String, Len, Posn0, Token, Context, !Posn) :-
( string_read_char(String, Len, Char, !Posn) ->
( char.is_octal_digit(Char) ->
string_get_octal_2(String, Len, Posn0, Token, Context, !Posn)
;
string_ungetchar(String, !Posn),
grab_string(String, Posn0, BinaryString, !Posn),
conv_string_to_int(BinaryString, 8, Token),
string_get_context(Posn0, Context, !Posn)
)
;
grab_string(String, Posn0, BinaryString, !Posn),
conv_string_to_int(BinaryString, 8, Token),
string_get_context(Posn0, Context, !Posn)
).
:- pred get_hex(token::out, io::di, io::uo) is det.
get_hex(Token, !IO) :-
io.read_char(Result, !IO),
(
Result = error(Error),
Token = io_error(Error)
;
Result = eof,
Token = error("unterminated hex constant")
;
Result = ok(Char),
( char.is_hex_digit(Char) ->
get_hex_2([Char], Token, !IO)
;
io.putback_char(Char, !IO),
Token = error("unterminated hex constant")
)
).
:- pred string_get_hex(string::in, int::in, posn::in, token::out,
string_token_context::out,
posn::in, posn::out) is det.
string_get_hex(String, Len, Posn0, Token, Context, !Posn) :-
( string_read_char(String, Len, Char, !Posn) ->
( char.is_hex_digit(Char) ->
string_get_hex_2(String, Len, Posn0, Token, Context, !Posn)
;
string_ungetchar(String, !Posn),
Token = error("unterminated hex constant"),
string_get_context(Posn0, Context, !Posn)
)
;
Token = error("unterminated hex constant"),
string_get_context(Posn0, Context, !Posn)
).
:- pred get_hex_2(list(char)::in, token::out, io::di, io::uo) is det.
get_hex_2(Chars, Token, !IO) :-
io.read_char(Result, !IO),
(
Result = error(Error),
Token = io_error(Error)
;
Result = eof,
rev_char_list_to_int(Chars, 16, Token)
;
Result = ok(Char),
( char.is_hex_digit(Char) ->
get_hex_2([Char | Chars], Token, !IO)
;
io.putback_char(Char, !IO),
rev_char_list_to_int(Chars, 16, Token)
)
).
:- pred string_get_hex_2(string::in, int::in, posn::in, token::out,
string_token_context::out, posn::in, posn::out) is det.
string_get_hex_2(String, Len, Posn0, Token, Context, !Posn) :-
( string_read_char(String, Len, Char, !Posn) ->
( char.is_hex_digit(Char) ->
string_get_hex_2(String, Len, Posn0, Token, Context, !Posn)
;
string_ungetchar(String, !Posn),
grab_string(String, Posn0, BinaryString, !Posn),
conv_string_to_int(BinaryString, 16, Token),
string_get_context(Posn0, Context, !Posn)
)
;
grab_string(String, Posn0, BinaryString, !Posn),
conv_string_to_int(BinaryString, 16, Token),
string_get_context(Posn0, Context, !Posn)
).
:- pred get_number(list(char)::in, token::out, io::di, io::uo) is det.
get_number(Chars, Token, !IO) :-
io.read_char(Result, !IO),
(
Result = error(Error),
Token = io_error(Error)
;
Result = eof,
rev_char_list_to_int(Chars, 10, Token)
;
Result = ok(Char),
( char.is_digit(Char) ->
get_number([Char | Chars], Token, !IO)
; Char = ('.') ->
get_int_dot(Chars, Token, !IO)
; ( Char = 'e' ; Char = 'E' ) ->
get_float_exponent([Char | Chars], Token, !IO)
;
io.putback_char(Char, !IO),
rev_char_list_to_int(Chars, 10, Token)
)
).
:- pred string_get_number(string::in, int::in, posn::in, token::out,
string_token_context::out, posn::in, posn::out) is det.
string_get_number(String, Len, Posn0, Token, Context, !Posn) :-
( string_read_char(String, Len, Char, !Posn) ->
( char.is_digit(Char) ->
string_get_number(String, Len, Posn0, Token, Context, !Posn)
; Char = ('.') ->
string_get_int_dot(String, Len, Posn0, Token, Context, !Posn)
; ( Char = 'e' ; Char = 'E' ) ->
string_get_float_exponent(String, Len, Posn0, Token, Context,
!Posn)
;
string_ungetchar(String, !Posn),
grab_string(String, Posn0, NumberString, !Posn),
conv_string_to_int(NumberString, 10, Token),
string_get_context(Posn0, Context, !Posn)
)
;
grab_string(String, Posn0, NumberString, !Posn),
conv_string_to_int(NumberString, 10, Token),
string_get_context(Posn0, Context, !Posn)
).
:- pred get_int_dot(list(char)::in, token::out, io::di, io::uo) is det.
get_int_dot(Chars, Token, !IO) :-
% XXX The float literal syntax doesn't match ISO Prolog.
io.read_char(Result, !IO),
(
Result = error(Error),
Token = io_error(Error)
;
Result = eof,
io.putback_char('.', !IO),
rev_char_list_to_int(Chars, 10, Token)
;
Result = ok(Char),
( char.is_digit(Char) ->
get_float_decimals([Char, '.' | Chars], Token, !IO)
;
io.putback_char(Char, !IO),
% We can't putback the ".", because io.putback_char only
% guarantees one character of pushback. So instead, we return
% an `integer_dot' token; the main loop of get_token_list_2 will
% handle this appropriately.
rev_char_list_to_int(Chars, 10, Token0),
( Token0 = integer(Int) ->
Token = integer_dot(Int)
;
Token = Token0
)
)
).
:- pred string_get_int_dot(string::in, int::in, posn::in, token::out,
string_token_context::out, posn::in, posn::out) is det.
string_get_int_dot(String, Len, Posn0, Token, Context, !Posn) :-
( string_read_char(String, Len, Char, !Posn) ->
( char.is_digit(Char) ->
string_get_float_decimals(String, Len, Posn0, Token, Context,
!Posn)
;
string_ungetchar(String, !Posn),
string_ungetchar(String, !Posn),
grab_string(String, Posn0, NumberString, !Posn),
conv_string_to_int(NumberString, 10, Token),
string_get_context(Posn0, Context, !Posn)
)
;
string_ungetchar(String, !Posn),
grab_string(String, Posn0, NumberString, !Posn),
conv_string_to_int(NumberString, 10, Token),
string_get_context(Posn0, Context, !Posn)
).
:- pred get_float_decimals(list(char)::in, token::out,
io::di, io::uo) is det.
% We've read past the decimal point, so now get the decimals.
%
get_float_decimals(Chars, Token, !IO) :-
io.read_char(Result, !IO),
(
Result = error(Error),
Token = io_error(Error)
;
Result = eof,
rev_char_list_to_float(Chars, Token)
;
Result = ok(Char),
( char.is_digit(Char) ->
get_float_decimals([Char | Chars], Token, !IO)
; ( Char = 'e' ; Char = 'E' ) ->
get_float_exponent([Char | Chars], Token, !IO)
;
io.putback_char(Char, !IO),
rev_char_list_to_float(Chars, Token)
)
).
:- pred string_get_float_decimals(string::in, int::in, posn::in,
token::out, string_token_context::out, posn::in, posn::out) is det.
string_get_float_decimals(String, Len, Posn0, Token, Context, !Posn) :-
( string_read_char(String, Len, Char, !Posn) ->
( char.is_digit(Char) ->
string_get_float_decimals(String, Len, Posn0, Token, Context,
!Posn)
; ( Char = 'e' ; Char = 'E' ) ->
string_get_float_exponent(String, Len, Posn0, Token, Context,
!Posn)
;
string_ungetchar(String, !Posn),
grab_string(String, Posn0, FloatString, !Posn),
conv_to_float(FloatString, Token),
string_get_context(Posn0, Context, !Posn)
)
;
grab_string(String, Posn0, FloatString, !Posn),
conv_to_float(FloatString, Token),
string_get_context(Posn0, Context, !Posn)
).
:- pred get_float_exponent(list(char)::in, token::out,
io::di, io::uo) is det.
get_float_exponent(Chars, Token, !IO) :-
io.read_char(Result, !IO),
(
Result = error(Error),
Token = io_error(Error)
;
Result = eof,
rev_char_list_to_float(Chars, Token)
;
Result = ok(Char),
( ( Char = ('+') ; Char = ('-') ) ->
get_float_exponent_2([Char | Chars], Token, !IO)
; char.is_digit(Char) ->
get_float_exponent_3([Char | Chars], Token, !IO)
;
io.putback_char(Char, !IO),
Token = error("unterminated exponent in float token")
)
).
:- pred string_get_float_exponent(string::in, int::in, posn::in,
token::out, string_token_context::out, posn::in, posn::out) is det.
string_get_float_exponent(String, Len, Posn0, Token, Context, !Posn) :-
( string_read_char(String, Len, Char, !Posn) ->
( ( Char = ('+') ; Char = ('-') ) ->
string_get_float_exponent_2(String, Len, Posn0, Token, Context,
!Posn)
; char.is_digit(Char) ->
string_get_float_exponent_3(String, Len, Posn0, Token, Context,
!Posn)
;
string_ungetchar(String, !Posn),
Token = error("unterminated exponent in float token"),
string_get_context(Posn0, Context, !Posn)
)
;
grab_string(String, Posn0, FloatString, !Posn),
conv_to_float(FloatString, Token),
string_get_context(Posn0, Context, !Posn)
).
:- pred get_float_exponent_2(list(char)::in, token::out,
io::di, io::uo) is det.
% We've read past the E signalling the start of the exponent -
% make sure that there's at least one digit following,
% and then get the remaining digits.
%
get_float_exponent_2(Chars, Token, !IO) :-
io.read_char(Result, !IO),
(
Result = error(Error),
Token = io_error(Error)
;
Result = eof,
Token = error("unterminated exponent in float token")
;
Result = ok(Char),
( char.is_digit(Char) ->
get_float_exponent_3([Char | Chars], Token, !IO)
;
io.putback_char(Char, !IO),
Token = error("unterminated exponent in float token")
)
).
:- pred string_get_float_exponent_2(string::in, int::in, posn::in,
token::out, string_token_context::out, posn::in, posn::out) is det.
% We've read past the E signalling the start of the exponent -
% make sure that there's at least one digit following,
% and then get the remaining digits.
%
string_get_float_exponent_2(String, Len, Posn0, Token, Context, !Posn) :-
( string_read_char(String, Len, Char, !Posn) ->
( char.is_digit(Char) ->
string_get_float_exponent_3(String, Len, Posn0, Token, Context,
!Posn)
;
string_ungetchar(String, !Posn),
Token = error("unterminated exponent in float token"),
string_get_context(Posn0, Context, !Posn)
)
;
Token = error("unterminated exponent in float token"),
string_get_context(Posn0, Context, !Posn)
).
:- pred get_float_exponent_3(list(char)::in, token::out,
io::di, io::uo) is det.
% We've read past the first digit of the exponent -
% now get the remaining digits.
%
get_float_exponent_3(Chars, Token, !IO) :-
io.read_char(Result, !IO),
(
Result = error(Error),
Token = io_error(Error)
;
Result = eof,
rev_char_list_to_float(Chars, Token)
;
Result = ok(Char),
( char.is_digit(Char) ->
get_float_exponent_3([Char | Chars], Token, !IO)
;
io.putback_char(Char, !IO),
rev_char_list_to_float(Chars, Token)
)
).
:- pred string_get_float_exponent_3(string::in, int::in, posn::in,
token::out, string_token_context::out, posn::in, posn::out) is det.
string_get_float_exponent_3(String, Len, Posn0, Token, Context, !Posn) :-
( string_read_char(String, Len, Char, !Posn) ->
( char.is_digit(Char) ->
string_get_float_exponent_3(String, Len, Posn0, Token, Context,
!Posn)
;
string_ungetchar(String, !Posn),
grab_string(String, Posn0, FloatString, !Posn),
conv_to_float(FloatString, Token),
string_get_context(Posn0, Context, !Posn)
)
;
grab_string(String, Posn0, FloatString, !Posn),
conv_to_float(FloatString, Token),
string_get_context(Posn0, Context, !Posn)
).
%-----------------------------------------------------------------------------%
%
% Utility routines.
:- pred rev_char_list_to_int(list(char)::in, int::in, token::out) is det.
rev_char_list_to_int(RevChars, Base, Token) :-
rev_char_list_to_string(RevChars, String),
conv_string_to_int(String, Base, Token).
:- pred conv_string_to_int(string::in, int::in, token::out) is det.
conv_string_to_int(String, Base, Token) :-
( string.base_string_to_int(Base, String, Int) ->
Token = integer(Int)
;
Token = error("invalid integer token")
).
:- pred rev_char_list_to_float(list(char)::in, token::out) is det.
rev_char_list_to_float(RevChars, Token) :-
rev_char_list_to_string(RevChars, String),
conv_to_float(String, Token).
:- pred conv_to_float(string::in, token::out) is det.
conv_to_float(String, Token) :-
( string.to_float(String, Float) ->
Token = float(Float)
;
Token = error("invalid float token")
).
:- pred rev_char_list_to_string(list(char)::in, string::out) is det.
rev_char_list_to_string(RevChars, String) :-
string.from_rev_char_list(RevChars, String).
%-----------------------------------------------------------------------------%