Added the times operator for regular expressions, such that one can express

/[a-z]{10}/ in this way: `Regex = range('a', 'z') * 10'. extras/lex/lex.m: Removed unused and unsafe str_foldr function, added (T * int) = regexp function. extras/lex/samples/lex_demo.m: Removed whitespace in comments, added an input prompt, added a lexeme for '//' C++ comments using the new '*' operator.
2025-12-13 04:44:39 +00:00 · 2014-05-06 12:34:09 +02:00
parent 39caed9793
commit 05745a70bb
2 changed files with 20 additions and 14 deletions
--- a/extras/lex/lex.m
+++ b/extras/lex/lex.m
@@ -117,6 +117,7 @@
 :- func ?(T) = regexp <= regexp(T).  % ?(R)       = R or null
 :- func +(T) = regexp <= regexp(T).  % +(R)       = R ++ *(R)
 :- func range(char, char) = regexp.  % range('a', 'z') = any("ab...xyz")
+:- func (T * int) = regexp <= regexp(T). % R * N = R ++ ... ++ R

    % Some useful single-char regexps.
    %
@@ -837,19 +838,23 @@ anybut(S) = R :-
    ExcludedChars = sparse_bitset.list_to_set(string.to_char_list(S)),
    R = re(sparse_bitset.difference(valid_unicode_chars, ExcludedChars)).

-:- func str_foldr(func(char, T) = T, string, T, int) = T.
-
-str_foldr(Fn, S, X, I) =
-    ( if I < 0 then X
-               else str_foldr(Fn, S, Fn(string.det_index(S, I), X), I - 1)
-    ).
-
 ?(R) = (R or null).

 +(R) = (R ++ *(R)).

 range(Start, End) = re(charset(char.to_int(Start), char.to_int(End))).

+R * N = Result :-
+    ( N < 0 ->
+        unexpected($file, $pred, "N must be a non-negative number")
+    ; N = 0 ->
+        Result = null
+    ; N = 1 ->
+        Result = re(R)
+    ;
+        Result = conc(re(R), (R * (N - 1)))
+    ).
+
 %-----------------------------------------------------------------------------%
 % Some useful single-char regexps.

--- a/extras/lex/samples/lex_demo.m
+++ b/extras/lex/samples/lex_demo.m
@@ -6,7 +6,7 @@
 %
 % Copyright (C) 2001-2002 The University of Melbourne
 % Copyright (C) 2001 The Rationalizer Intelligent Software AG
-%   The changes made by Rationalizer are contributed under the terms 
+%   The changes made by Rationalizer are contributed under the terms
 %   of the GNU General Public License - see the file COPYING in the
 %   Mercury Distribution.
 %
@@ -46,8 +46,8 @@ I recognise the following words:
 ""and"", ""then"", ""the"", ""it"", ""them"", ""to"", ""on"".
 I also recognise Unicode characters:
 ""我"", ""会"", ""说"", ""中文""
-I also recognise Mercury-style comments, integers and floating point
-numbers, and a variety of punctuation symbols.
+I also recognise Mercury-style and C++ style comments comments, integers
+and floating point numbers, and a variety of punctuation symbols.

 Try me...

@@ -55,7 +55,8 @@ Try me...

    Lexer  = lex.init(lexemes, lex.read_from_stdin, ignore(space)),
    State0 = lex.start(Lexer, !.IO),
-    tokenise_stdin(State0, State),
+    lex.manipulate_source(io.print("> "), State0, State1),
+    tokenise_stdin(State1, State),
    !:IO = lex.stop(State).

 %-----------------------------------------------------------------------------%
@@ -65,8 +66,7 @@ Try me...

 tokenise_stdin(!LS) :-
    lex.read(Result, !LS),
-    lex.manipulate_source(io.print(Result), !LS),
-    lex.manipulate_source(io.nl, !LS),
+    lex.manipulate_source(io.print_line(Result), !LS),
    (
        Result = ok(_),
        tokenise_stdin(!LS)
@@ -97,6 +97,7 @@ tokenise_stdin(!LS) :-
 lexemes = [

    ( "%" ++ junk       -> (func(Match) = comment(Match)) ),
+    ( '/'*2 ++ junk     -> (func(Match) = comment(Match)) ),
    ( signed_int        -> (func(Match) = integer(string.det_to_int(Match))) ),
    ( real              -> (func(Match) = real(string.det_to_float(Match))) ),

@@ -117,7 +118,7 @@ lexemes = [
      "then"            -> (func(Match) = conj(Match)) ),

        % `\/' is a synonym for `or'.  Tell us which you prefer...
-        % 
+        %
    ( "the" \/
      "it" \/
      "them" \/