mirror of
https://github.com/Mercury-Language/mercury.git
synced 2026-04-24 22:04:13 +00:00
extras/lex/lex.buf.m:
extras/lex/lex.lexeme.m:
extras/lex/lex.convert_NFA_to_DFA.m:
As above.
196 lines
7.1 KiB
Mathematica
196 lines
7.1 KiB
Mathematica
%----------------------------------------------------------------------------
|
|
% vim: ts=4 sw=4 et tw=0 wm=0 ff=unix ft=mercury
|
|
%----------------------------------------------------------------------------
|
|
%
|
|
% lex.lexeme.m
|
|
% Sat Aug 19 08:22:32 BST 2000
|
|
% Copyright (C) 2001 Ralph Becket <rbeck@microsoft.com>
|
|
% Copyright (C) 2001 The Rationalizer Intelligent Software AG.
|
|
% The changes made by Rationalizer are contributed under the terms
|
|
% of the GNU Lesser General Public License, see the file COPYING.LGPL
|
|
% in this directory.
|
|
% Copyright (C) 2002, 2010-2011 The University of Melbourne.
|
|
% Copyright (C) 2017-2019, 2023 The Mercury team.
|
|
% This file is distributed under the terms specified in COPYING.LIB.
|
|
%
|
|
% A lexeme combines a token with a regexp. The lexer compiles lexemes
|
|
% and returns the longest successful parse in the input stream,
|
|
% or an error if no match occurs.
|
|
%
|
|
%-----------------------------------------------------------------------------%
|
|
|
|
:- module lex.lexeme.
|
|
:- interface.
|
|
|
|
:- import_module array.
|
|
:- import_module bool.
|
|
:- import_module bitmap.
|
|
:- import_module char.
|
|
|
|
%-----------------------------------------------------------------------------%
|
|
|
|
:- type compiled_lexeme(T)
|
|
---> compiled_lexeme(
|
|
token :: token_creator(T),
|
|
state :: state_no,
|
|
transition_map :: transition_map
|
|
).
|
|
|
|
:- inst compiled_lexeme for compiled_lexeme/1
|
|
---> compiled_lexeme(token_creator, ground, ground).
|
|
|
|
:- type transition_map
|
|
---> transition_map(
|
|
accepting_states :: bitmap,
|
|
rows :: array(row)
|
|
).
|
|
|
|
% A transition row is an array of packed_transitions.
|
|
%
|
|
:- type row == array(packed_transition).
|
|
|
|
% A packed_transition combines a target state_no and the transition char
|
|
% codepoint for which the transition is valid.
|
|
%
|
|
:- type packed_transition
|
|
---> packed_transition(btr_state :: state_no, char :: char).
|
|
|
|
:- type packed_transitions == list(packed_transition).
|
|
|
|
:- func compile_lexeme(lexeme(T)) = compiled_lexeme(T).
|
|
|
|
% next_state(CLXM, CurrentState, Char, NextState, IsAccepting)
|
|
% succeeds iff there is a transition in CLXM from CurrentState
|
|
% to NextState via Char; IsAccepting is `yes' iff NextState is
|
|
% an accepting state_no.
|
|
%
|
|
:- pred next_state(compiled_lexeme(T)::in(compiled_lexeme),
|
|
state_no::in, char::in, state_no::out, bool::out) is semidet.
|
|
|
|
% Succeeds iff a compiled_lexeme is in an accepting state_no.
|
|
%
|
|
:- pred in_accepting_state(compiled_lexeme(T)::in(compiled_lexeme)) is semidet.
|
|
|
|
%-----------------------------------------------------------------------------%
|
|
%-----------------------------------------------------------------------------%
|
|
|
|
:- implementation.
|
|
|
|
:- import_module lex.automata.
|
|
:- import_module lex.regexp.
|
|
:- import_module lex.convert_NFA_to_DFA.
|
|
|
|
:- import_module list.
|
|
:- import_module set.
|
|
|
|
%-----------------------------------------------------------------------------%
|
|
|
|
compile_lexeme(Lexeme) = CompiledLexeme :-
|
|
Lexeme = (RegExp - TokenCreator),
|
|
NFA = remove_null_transitions(regexp_to_NFA(RegExp)),
|
|
DFA = convert_NFA_to_DFA(NFA),
|
|
StartState = DFA ^ smc_start_state,
|
|
StopStates = DFA ^ smc_stop_states,
|
|
Transitions = DFA ^ smc_state_transitions,
|
|
N = 1 + find_top_state(Transitions),
|
|
Accepting = set_accepting_states(StopStates, bitmap.init(N, no)),
|
|
Rows = array(set_up_rows(0, N, Transitions)),
|
|
TransitionMap = transition_map(Accepting, Rows),
|
|
CompiledLexeme = compiled_lexeme(TokenCreator, StartState, TransitionMap).
|
|
|
|
%-----------------------------------------------------------------------------%
|
|
|
|
:- func find_top_state(transitions) = int.
|
|
:- mode find_top_state(in(atom_transitions)) = out is det.
|
|
|
|
find_top_state([]) = 0.
|
|
find_top_state([trans(X, _, Y) | Ts]) = max(X, max(Y, find_top_state(Ts))).
|
|
|
|
%-----------------------------------------------------------------------------%
|
|
|
|
:- func set_accepting_states(set(state_no), bitmap) = bitmap.
|
|
:- mode set_accepting_states(in, bitmap_di) = bitmap_uo is det.
|
|
|
|
set_accepting_states(States, Bitmap0) =
|
|
set_accepting_states_0(set.to_sorted_list(States), Bitmap0).
|
|
|
|
:- func set_accepting_states_0(list(state_no), bitmap) = bitmap.
|
|
:- mode set_accepting_states_0(in, bitmap_di) = bitmap_uo is det.
|
|
|
|
set_accepting_states_0([], Bitmap) = Bitmap.
|
|
set_accepting_states_0([State | States], Bitmap) =
|
|
set_accepting_states_0(States, bitmap.set(Bitmap, State)).
|
|
|
|
%-----------------------------------------------------------------------------%
|
|
|
|
:- func set_up_rows(int, int, transitions) = list(row).
|
|
:- mode set_up_rows(in, in, in(atom_transitions)) = out is det.
|
|
|
|
set_up_rows(I, N, Transitions) = Rows :-
|
|
( if I >= N then
|
|
Rows = []
|
|
else
|
|
Rows = [compile_transitions_for_state(I, [], Transitions) |
|
|
set_up_rows(I + 1, N, Transitions)]
|
|
).
|
|
|
|
%-----------------------------------------------------------------------------%
|
|
|
|
:- func compile_transitions_for_state(int::in, packed_transitions::in,
|
|
transitions::in(atom_transitions)) = (row::array_uo) is det.
|
|
|
|
compile_transitions_for_state(_, IBTs, []) = array(IBTs).
|
|
compile_transitions_for_state(I, IBTs, [T | Ts]) =
|
|
compile_transitions_for_state(
|
|
I,
|
|
( if T = trans(I, Charset, Y) then
|
|
sparse_bitset.foldl(
|
|
func(Char, Tx) = [packed_transition(Y, Char) | Tx],
|
|
Charset, IBTs)
|
|
else
|
|
IBTs
|
|
),
|
|
Ts
|
|
).
|
|
|
|
%-----------------------------------------------------------------------------%
|
|
|
|
next_state(CLXM, CurrentState, Char, NextState, IsAccepting) :-
|
|
Rows = CLXM ^ transition_map ^ rows,
|
|
AcceptingStates = CLXM ^ transition_map ^ accepting_states,
|
|
find_next_state(Char, Rows ^ elem(CurrentState), NextState),
|
|
IsAccepting = AcceptingStates ^ bit(NextState).
|
|
|
|
%-----------------------------------------------------------------------------%
|
|
|
|
:- pred find_next_state(char, array(packed_transition), state_no).
|
|
:- mode find_next_state(in, in, out) is semidet.
|
|
|
|
find_next_state(Char, PackedTransitions, State) :-
|
|
Lo = array.min(PackedTransitions),
|
|
Hi = array.max(PackedTransitions),
|
|
find_next_state_0(Lo, Hi, Char, PackedTransitions, State).
|
|
|
|
:- pred find_next_state_0(int, int, char, array(packed_transition), state_no).
|
|
:- mode find_next_state_0(in, in, in, in, out) is semidet.
|
|
|
|
find_next_state_0(Lo, Hi, Char, PackedTransitions, State) :-
|
|
Lo =< Hi,
|
|
PackedTransition = PackedTransitions ^ elem(Lo),
|
|
( if PackedTransition ^ char = Char then
|
|
State = PackedTransition ^ btr_state
|
|
else
|
|
find_next_state_0(Lo + 1, Hi, Char, PackedTransitions, State)
|
|
).
|
|
|
|
%-----------------------------------------------------------------------------%
|
|
|
|
in_accepting_state(CLXM) :-
|
|
bitmap.is_set(
|
|
CLXM ^ transition_map ^ accepting_states, CLXM ^ state
|
|
).
|
|
|
|
%-----------------------------------------------------------------------------%
|
|
:- end_module lex.lexeme.
|
|
%-----------------------------------------------------------------------------%
|