mirror of
https://github.com/Mercury-Language/mercury.git
synced 2025-12-13 04:44:39 +00:00
Added the times operator for regular expressions, such that one can express
/[a-z]{10}/ in this way: `Regex = range('a', 'z') * 10'.
extras/lex/lex.m:
Removed unused and unsafe str_foldr function,
added (T * int) = regexp function.
extras/lex/samples/lex_demo.m:
Removed whitespace in comments,
added an input prompt,
added a lexeme for '//' C++ comments using the new '*' operator.
This commit is contained in:
committed by
Paul Bone
parent
39caed9793
commit
05745a70bb
@@ -117,6 +117,7 @@
|
||||
:- func ?(T) = regexp <= regexp(T). % ?(R) = R or null
|
||||
:- func +(T) = regexp <= regexp(T). % +(R) = R ++ *(R)
|
||||
:- func range(char, char) = regexp. % range('a', 'z') = any("ab...xyz")
|
||||
:- func (T * int) = regexp <= regexp(T). % R * N = R ++ ... ++ R
|
||||
|
||||
% Some useful single-char regexps.
|
||||
%
|
||||
@@ -837,19 +838,23 @@ anybut(S) = R :-
|
||||
ExcludedChars = sparse_bitset.list_to_set(string.to_char_list(S)),
|
||||
R = re(sparse_bitset.difference(valid_unicode_chars, ExcludedChars)).
|
||||
|
||||
:- func str_foldr(func(char, T) = T, string, T, int) = T.
|
||||
|
||||
str_foldr(Fn, S, X, I) =
|
||||
( if I < 0 then X
|
||||
else str_foldr(Fn, S, Fn(string.det_index(S, I), X), I - 1)
|
||||
).
|
||||
|
||||
?(R) = (R or null).
|
||||
|
||||
+(R) = (R ++ *(R)).
|
||||
|
||||
range(Start, End) = re(charset(char.to_int(Start), char.to_int(End))).
|
||||
|
||||
R * N = Result :-
|
||||
( N < 0 ->
|
||||
unexpected($file, $pred, "N must be a non-negative number")
|
||||
; N = 0 ->
|
||||
Result = null
|
||||
; N = 1 ->
|
||||
Result = re(R)
|
||||
;
|
||||
Result = conc(re(R), (R * (N - 1)))
|
||||
).
|
||||
|
||||
%-----------------------------------------------------------------------------%
|
||||
% Some useful single-char regexps.
|
||||
|
||||
|
||||
@@ -6,7 +6,7 @@
|
||||
%
|
||||
% Copyright (C) 2001-2002 The University of Melbourne
|
||||
% Copyright (C) 2001 The Rationalizer Intelligent Software AG
|
||||
% The changes made by Rationalizer are contributed under the terms
|
||||
% The changes made by Rationalizer are contributed under the terms
|
||||
% of the GNU General Public License - see the file COPYING in the
|
||||
% Mercury Distribution.
|
||||
%
|
||||
@@ -46,8 +46,8 @@ I recognise the following words:
|
||||
""and"", ""then"", ""the"", ""it"", ""them"", ""to"", ""on"".
|
||||
I also recognise Unicode characters:
|
||||
""我"", ""会"", ""说"", ""中文""
|
||||
I also recognise Mercury-style comments, integers and floating point
|
||||
numbers, and a variety of punctuation symbols.
|
||||
I also recognise Mercury-style and C++ style comments comments, integers
|
||||
and floating point numbers, and a variety of punctuation symbols.
|
||||
|
||||
Try me...
|
||||
|
||||
@@ -55,7 +55,8 @@ Try me...
|
||||
|
||||
Lexer = lex.init(lexemes, lex.read_from_stdin, ignore(space)),
|
||||
State0 = lex.start(Lexer, !.IO),
|
||||
tokenise_stdin(State0, State),
|
||||
lex.manipulate_source(io.print("> "), State0, State1),
|
||||
tokenise_stdin(State1, State),
|
||||
!:IO = lex.stop(State).
|
||||
|
||||
%-----------------------------------------------------------------------------%
|
||||
@@ -65,8 +66,7 @@ Try me...
|
||||
|
||||
tokenise_stdin(!LS) :-
|
||||
lex.read(Result, !LS),
|
||||
lex.manipulate_source(io.print(Result), !LS),
|
||||
lex.manipulate_source(io.nl, !LS),
|
||||
lex.manipulate_source(io.print_line(Result), !LS),
|
||||
(
|
||||
Result = ok(_),
|
||||
tokenise_stdin(!LS)
|
||||
@@ -97,6 +97,7 @@ tokenise_stdin(!LS) :-
|
||||
lexemes = [
|
||||
|
||||
( "%" ++ junk -> (func(Match) = comment(Match)) ),
|
||||
( '/'*2 ++ junk -> (func(Match) = comment(Match)) ),
|
||||
( signed_int -> (func(Match) = integer(string.det_to_int(Match))) ),
|
||||
( real -> (func(Match) = real(string.det_to_float(Match))) ),
|
||||
|
||||
@@ -117,7 +118,7 @@ lexemes = [
|
||||
"then" -> (func(Match) = conj(Match)) ),
|
||||
|
||||
% `\/' is a synonym for `or'. Tell us which you prefer...
|
||||
%
|
||||
%
|
||||
( "the" \/
|
||||
"it" \/
|
||||
"them" \/
|
||||
|
||||
Reference in New Issue
Block a user