Files
mercury/extras/lex/lex.lexeme.m
Julien Fischer 93dba486ec Update programming style in extras/lex.
extras/lex/lex.buf.m:
extras/lex/lex.lexeme.m:
extras/lex/lex.convert_NFA_to_DFA.m:
    As above.
2023-03-12 14:36:49 +11:00

196 lines
7.1 KiB
Mathematica

%----------------------------------------------------------------------------
% vim: ts=4 sw=4 et tw=0 wm=0 ff=unix ft=mercury
%----------------------------------------------------------------------------
%
% lex.lexeme.m
% Sat Aug 19 08:22:32 BST 2000
% Copyright (C) 2001 Ralph Becket <rbeck@microsoft.com>
% Copyright (C) 2001 The Rationalizer Intelligent Software AG.
% The changes made by Rationalizer are contributed under the terms
% of the GNU Lesser General Public License, see the file COPYING.LGPL
% in this directory.
% Copyright (C) 2002, 2010-2011 The University of Melbourne.
% Copyright (C) 2017-2019, 2023 The Mercury team.
% This file is distributed under the terms specified in COPYING.LIB.
%
% A lexeme combines a token with a regexp. The lexer compiles lexemes
% and returns the longest successful parse in the input stream,
% or an error if no match occurs.
%
%-----------------------------------------------------------------------------%
:- module lex.lexeme.
:- interface.
:- import_module array.
:- import_module bool.
:- import_module bitmap.
:- import_module char.
%-----------------------------------------------------------------------------%
:- type compiled_lexeme(T)
---> compiled_lexeme(
token :: token_creator(T),
state :: state_no,
transition_map :: transition_map
).
:- inst compiled_lexeme for compiled_lexeme/1
---> compiled_lexeme(token_creator, ground, ground).
:- type transition_map
---> transition_map(
accepting_states :: bitmap,
rows :: array(row)
).
% A transition row is an array of packed_transitions.
%
:- type row == array(packed_transition).
% A packed_transition combines a target state_no and the transition char
% codepoint for which the transition is valid.
%
:- type packed_transition
---> packed_transition(btr_state :: state_no, char :: char).
:- type packed_transitions == list(packed_transition).
:- func compile_lexeme(lexeme(T)) = compiled_lexeme(T).
% next_state(CLXM, CurrentState, Char, NextState, IsAccepting)
% succeeds iff there is a transition in CLXM from CurrentState
% to NextState via Char; IsAccepting is `yes' iff NextState is
% an accepting state_no.
%
:- pred next_state(compiled_lexeme(T)::in(compiled_lexeme),
state_no::in, char::in, state_no::out, bool::out) is semidet.
% Succeeds iff a compiled_lexeme is in an accepting state_no.
%
:- pred in_accepting_state(compiled_lexeme(T)::in(compiled_lexeme)) is semidet.
%-----------------------------------------------------------------------------%
%-----------------------------------------------------------------------------%
:- implementation.
:- import_module lex.automata.
:- import_module lex.regexp.
:- import_module lex.convert_NFA_to_DFA.
:- import_module list.
:- import_module set.
%-----------------------------------------------------------------------------%
compile_lexeme(Lexeme) = CompiledLexeme :-
Lexeme = (RegExp - TokenCreator),
NFA = remove_null_transitions(regexp_to_NFA(RegExp)),
DFA = convert_NFA_to_DFA(NFA),
StartState = DFA ^ smc_start_state,
StopStates = DFA ^ smc_stop_states,
Transitions = DFA ^ smc_state_transitions,
N = 1 + find_top_state(Transitions),
Accepting = set_accepting_states(StopStates, bitmap.init(N, no)),
Rows = array(set_up_rows(0, N, Transitions)),
TransitionMap = transition_map(Accepting, Rows),
CompiledLexeme = compiled_lexeme(TokenCreator, StartState, TransitionMap).
%-----------------------------------------------------------------------------%
:- func find_top_state(transitions) = int.
:- mode find_top_state(in(atom_transitions)) = out is det.
find_top_state([]) = 0.
find_top_state([trans(X, _, Y) | Ts]) = max(X, max(Y, find_top_state(Ts))).
%-----------------------------------------------------------------------------%
:- func set_accepting_states(set(state_no), bitmap) = bitmap.
:- mode set_accepting_states(in, bitmap_di) = bitmap_uo is det.
set_accepting_states(States, Bitmap0) =
set_accepting_states_0(set.to_sorted_list(States), Bitmap0).
:- func set_accepting_states_0(list(state_no), bitmap) = bitmap.
:- mode set_accepting_states_0(in, bitmap_di) = bitmap_uo is det.
set_accepting_states_0([], Bitmap) = Bitmap.
set_accepting_states_0([State | States], Bitmap) =
set_accepting_states_0(States, bitmap.set(Bitmap, State)).
%-----------------------------------------------------------------------------%
:- func set_up_rows(int, int, transitions) = list(row).
:- mode set_up_rows(in, in, in(atom_transitions)) = out is det.
set_up_rows(I, N, Transitions) = Rows :-
( if I >= N then
Rows = []
else
Rows = [compile_transitions_for_state(I, [], Transitions) |
set_up_rows(I + 1, N, Transitions)]
).
%-----------------------------------------------------------------------------%
:- func compile_transitions_for_state(int::in, packed_transitions::in,
transitions::in(atom_transitions)) = (row::array_uo) is det.
compile_transitions_for_state(_, IBTs, []) = array(IBTs).
compile_transitions_for_state(I, IBTs, [T | Ts]) =
compile_transitions_for_state(
I,
( if T = trans(I, Charset, Y) then
sparse_bitset.foldl(
func(Char, Tx) = [packed_transition(Y, Char) | Tx],
Charset, IBTs)
else
IBTs
),
Ts
).
%-----------------------------------------------------------------------------%
next_state(CLXM, CurrentState, Char, NextState, IsAccepting) :-
Rows = CLXM ^ transition_map ^ rows,
AcceptingStates = CLXM ^ transition_map ^ accepting_states,
find_next_state(Char, Rows ^ elem(CurrentState), NextState),
IsAccepting = AcceptingStates ^ bit(NextState).
%-----------------------------------------------------------------------------%
:- pred find_next_state(char, array(packed_transition), state_no).
:- mode find_next_state(in, in, out) is semidet.
find_next_state(Char, PackedTransitions, State) :-
Lo = array.min(PackedTransitions),
Hi = array.max(PackedTransitions),
find_next_state_0(Lo, Hi, Char, PackedTransitions, State).
:- pred find_next_state_0(int, int, char, array(packed_transition), state_no).
:- mode find_next_state_0(in, in, in, in, out) is semidet.
find_next_state_0(Lo, Hi, Char, PackedTransitions, State) :-
Lo =< Hi,
PackedTransition = PackedTransitions ^ elem(Lo),
( if PackedTransition ^ char = Char then
State = PackedTransition ^ btr_state
else
find_next_state_0(Lo + 1, Hi, Char, PackedTransitions, State)
).
%-----------------------------------------------------------------------------%
in_accepting_state(CLXM) :-
bitmap.is_set(
CLXM ^ transition_map ^ accepting_states, CLXM ^ state
).
%-----------------------------------------------------------------------------%
:- end_module lex.lexeme.
%-----------------------------------------------------------------------------%