From 05745a70bbbe8e5578f3c93a54c1f58865a93e67 Mon Sep 17 00:00:00 2001 From: Sebastian Godelet Date: Tue, 6 May 2014 12:34:09 +0200 Subject: [PATCH] Added the times operator for regular expressions, such that one can express /[a-z]{10}/ in this way: `Regex = range('a', 'z') * 10'. extras/lex/lex.m: Removed unused and unsafe str_foldr function, added (T * int) = regexp function. extras/lex/samples/lex_demo.m: Removed whitespace in comments, added an input prompt, added a lexeme for '//' C++ comments using the new '*' operator. --- extras/lex/lex.m | 19 ++++++++++++------- extras/lex/samples/lex_demo.m | 15 ++++++++------- 2 files changed, 20 insertions(+), 14 deletions(-) diff --git a/extras/lex/lex.m b/extras/lex/lex.m index 7570e3222..4abed1c1e 100644 --- a/extras/lex/lex.m +++ b/extras/lex/lex.m @@ -117,6 +117,7 @@ :- func ?(T) = regexp <= regexp(T). % ?(R) = R or null :- func +(T) = regexp <= regexp(T). % +(R) = R ++ *(R) :- func range(char, char) = regexp. % range('a', 'z') = any("ab...xyz") +:- func (T * int) = regexp <= regexp(T). % R * N = R ++ ... ++ R % Some useful single-char regexps. % @@ -837,19 +838,23 @@ anybut(S) = R :- ExcludedChars = sparse_bitset.list_to_set(string.to_char_list(S)), R = re(sparse_bitset.difference(valid_unicode_chars, ExcludedChars)). -:- func str_foldr(func(char, T) = T, string, T, int) = T. - -str_foldr(Fn, S, X, I) = - ( if I < 0 then X - else str_foldr(Fn, S, Fn(string.det_index(S, I), X), I - 1) - ). - ?(R) = (R or null). +(R) = (R ++ *(R)). range(Start, End) = re(charset(char.to_int(Start), char.to_int(End))). +R * N = Result :- + ( N < 0 -> + unexpected($file, $pred, "N must be a non-negative number") + ; N = 0 -> + Result = null + ; N = 1 -> + Result = re(R) + ; + Result = conc(re(R), (R * (N - 1))) + ). + %-----------------------------------------------------------------------------% % Some useful single-char regexps. diff --git a/extras/lex/samples/lex_demo.m b/extras/lex/samples/lex_demo.m index 80a50d09a..d22054bce 100644 --- a/extras/lex/samples/lex_demo.m +++ b/extras/lex/samples/lex_demo.m @@ -6,7 +6,7 @@ % % Copyright (C) 2001-2002 The University of Melbourne % Copyright (C) 2001 The Rationalizer Intelligent Software AG -% The changes made by Rationalizer are contributed under the terms +% The changes made by Rationalizer are contributed under the terms % of the GNU General Public License - see the file COPYING in the % Mercury Distribution. % @@ -46,8 +46,8 @@ I recognise the following words: ""and"", ""then"", ""the"", ""it"", ""them"", ""to"", ""on"". I also recognise Unicode characters: ""我"", ""会"", ""说"", ""中文"" -I also recognise Mercury-style comments, integers and floating point -numbers, and a variety of punctuation symbols. +I also recognise Mercury-style and C++ style comments comments, integers +and floating point numbers, and a variety of punctuation symbols. Try me... @@ -55,7 +55,8 @@ Try me... Lexer = lex.init(lexemes, lex.read_from_stdin, ignore(space)), State0 = lex.start(Lexer, !.IO), - tokenise_stdin(State0, State), + lex.manipulate_source(io.print("> "), State0, State1), + tokenise_stdin(State1, State), !:IO = lex.stop(State). %-----------------------------------------------------------------------------% @@ -65,8 +66,7 @@ Try me... tokenise_stdin(!LS) :- lex.read(Result, !LS), - lex.manipulate_source(io.print(Result), !LS), - lex.manipulate_source(io.nl, !LS), + lex.manipulate_source(io.print_line(Result), !LS), ( Result = ok(_), tokenise_stdin(!LS) @@ -97,6 +97,7 @@ tokenise_stdin(!LS) :- lexemes = [ ( "%" ++ junk -> (func(Match) = comment(Match)) ), + ( '/'*2 ++ junk -> (func(Match) = comment(Match)) ), ( signed_int -> (func(Match) = integer(string.det_to_int(Match))) ), ( real -> (func(Match) = real(string.det_to_float(Match))) ), @@ -117,7 +118,7 @@ lexemes = [ "then" -> (func(Match) = conj(Match)) ), % `\/' is a synonym for `or'. Tell us which you prefer... - % + % ( "the" \/ "it" \/ "them" \/