Files
mercury/extras/lex/samples/lex_demo.m
Mark Brown d465fa53cb Update the COPYING.LIB file and references to it.
Discussion of these changes can be found on the Mercury developers
mailing list archives from June 2018.

COPYING.LIB:
    Add a special linking exception to the LGPL.

*:
    Update references to COPYING.LIB.

    Clean up some minor errors that have accumulated in copyright
    messages.
2018-06-09 17:43:12 +10:00

184 lines
5.5 KiB
Mathematica

%-----------------------------------------------------------------------------%
% lex_demo.m
% Sun Aug 20 18:11:42 BST 2000
%
% vim: ts=4 sw=4 et tw=0 wm=0 ft=mercury
%
% Copyright (C) 2001-2002 The University of Melbourne
% Copyright (C) 2001 The Rationalizer Intelligent Software AG
% The changes made by Rationalizer are contributed under the terms
% of the GNU General Public License - see the file COPYING in the
% Mercury Distribution.
% Copyright (C) 2014, 2018 The Mercury team.
% This file is distributed under the terms specified in COPYING.LIB.
%
%-----------------------------------------------------------------------------%
:- module lex_demo.
:- interface.
:- import_module io.
:- pred main(io::di, io::uo) is det.
%-----------------------------------------------------------------------------%
%-----------------------------------------------------------------------------%
:- implementation.
:- import_module lex.
:- import_module exception.
:- import_module float.
:- import_module int.
:- import_module list.
:- import_module maybe.
:- import_module string.
%-----------------------------------------------------------------------------%
main(!IO) :-
io.print("\
I recognise the following words:
""cat"", ""dog"", ""rat"", ""mat"", ""sat"", ""caught"", ""chased"",
""and"", ""then"", ""the"", ""it"", ""them"", ""to"", ""on"".
I also recognise Unicode characters:
"""", """", """", """"
I also recognise Mercury-style and C++ style comments comments, integers
and floating point numbers, and a variety of punctuation symbols.
Try me...
", !IO),
prompt_loop(!IO).
:- pred prompt_loop(io::di, io::uo) is det.
prompt_loop(!IO) :-
io.write_string("> ", !IO),
io.read_line_as_string(Result, !IO),
(
Result = ok(Input0),
Lexer = lex.init(lexemes, lex.read_from_string, ignore(space)),
some [!State] (
copy(Input0, Input),
!:State = lex.start(Lexer, Input),
tokenise(MaybeError, [], Tokens, !State),
_ = lex.stop(!.State),
(
MaybeError = yes(Error),
io.format("Error on line %d: %s\n",
[i(Error ^ re_line), s(Error ^ re_message)], !IO)
;
MaybeError = no
),
io.format("Tokens: %s\n", [s(string(Tokens))], !IO)
),
prompt_loop(!IO)
;
Result = eof
% Just exit
;
Result = error(Error),
io.format(io.stdout_stream, "Error while reading input: %s",
[s(error_message(Error))], !IO),
io.set_exit_status(1, !IO)
).
%-----------------------------------------------------------------------------%
:- type read_error
---> read_error(
re_message :: string,
re_line :: int
).
:- pred tokenise(maybe(read_error)::out, list(token)::in, list(token)::out,
lexer_state(token, string)::di, lexer_state(token, string)::uo) is det.
tokenise(MaybeError, !Tokens, !LS) :-
lex.read(Result, !LS),
(
Result = ok(Token),
!:Tokens = [Token | !.Tokens],
tokenise(MaybeError, !Tokens, !LS)
;
Result = eof,
reverse(!Tokens),
MaybeError = no
;
Result = error(Message, Line),
MaybeError = yes(read_error(Message, Line))
).
%-----------------------------------------------------------------------------%
:- type token
---> noun(string)
; comment(string)
; integer(int)
; real(float)
; verb(string)
; conj(string)
; prep(string)
; adverb(string)
; punc
; space
; word(string)
; unrecognised(string).
:- func lexemes = list(lexeme(token)).
lexemes = [
( "%" ++ junk -> (func(Match) = comment(Match)) ),
( '/'*2 ++ junk -> (func(Match) = comment(Match)) ),
( signed_int -> (func(Match) = integer(string.det_to_int(Match))) ),
( real -> (func(Match) = real(string.det_to_float(Match))) ),
% Multiple regexps can match the same token constructor.
%
( "cat" -> (func(Match) = noun(Match)) ),
( "dog" -> (func(Match) = noun(Match)) ),
( "rat" -> (func(Match) = noun(Match)) ),
( "mat" -> (func(Match) = noun(Match)) ),
% Here we use `or', rather than multiple lexemes.
%
( "sat" or
"caught" or
"chased" -> (func(Match) = verb(Match)) ),
( "and" or
"then" -> (func(Match) = conj(Match)) ),
% `\/' is a synonym for `or'. Tell us which you prefer...
%
( "the" \/
"it" \/
"them" \/
"to" \/
"on" -> (func(Match) = prep(Match)) ),
( "我" or "中文" -> func(Match) = noun(Match) ),
( "会" -> func(Match) = adverb(Match) ),
( "说" -> func(Match) = verb(Match) ),
% return/1 can be used when you don't care what string was matched.
%
( any("~!@#$%^&*()_+`-={}|[]\\:"";'<>?,./")
-> return(punc) ),
( whitespace -> return(space) ),
( +(range('a', 'z') or
range('A', 'Z')
) -> func(Match) = word(Match) ),
( dot -> func(Match) = unrecognised(Match) )
].
%-----------------------------------------------------------------------------%
:- end_module lex_demo.
%-----------------------------------------------------------------------------%