Files
mercury/extras/lex/lex.buf.m
Zoltan Somogyi af23a48a2b Minor style improvements to extras.
extras/gator/gator:
    Update programming style. Fix indentation. Add a vim modeline.

extras/gator/genotype.m:
extras/gator/phenotype.m:
    Fix indentation.

extras/lex/regex.m:
    Replace if-then-else chain with a switch.

extras/base64/base64.m:
extras/cgi/cgi.m:
extras/cgi/form_test.m:
extras/cgi/html.m:
extras/cgi/mercury_www.m:
extras/complex_numbers/complex_numbers.complex.m:
extras/complex_numbers/complex_numbers.complex_float.m:
extras/complex_numbers/complex_numbers.complex_imag.m:
extras/complex_numbers/complex_numbers.float_complex.m:
extras/complex_numbers/complex_numbers.float_imag.m:
extras/complex_numbers/complex_numbers.imag.m:
extras/complex_numbers/complex_numbers.imag_complex.m:
extras/complex_numbers/complex_numbers.imag_float.m:
extras/complex_numbers/complex_numbers.m:
extras/curs/curs.m:
extras/dynamic_linking/dl_test.m:
extras/dynamic_linking/dl_test2.m:
extras/dynamic_linking/hello.m:
extras/error/error.m:
extras/fixed/fixed.m:
extras/fixed/mercury_fixed.m:
extras/java_extras/make_temp.m:
extras/lex/lex.automata.m:
extras/lex/lex.buf.m:
extras/lex/lex.convert_NFA_to_DFA.m:
extras/lex/lex.lexeme.m:
extras/lex/lex.m:
extras/lex/lex.regexp.m:
extras/logged_output/logged_output.m:
extras/logged_output/main.m:
extras/monte/doit.m:
extras/monte/dots.m:
extras/monte/geom.m:
extras/monte/hg.m:
extras/monte/monte.m:
extras/monte/rnd.m:
extras/mopenssl/mopenssl.m:
extras/odbc/mercury_odbc.m:
extras/odbc/odbc.m:
extras/odbc/odbc_test.m:
extras/posix/posix.chdir.m:
extras/posix/posix.closedir.m:
extras/posix/posix.dup.m:
extras/posix/posix.exec.m:
extras/posix/posix.fork.m:
extras/posix/posix.getpid.m:
extras/posix/posix.kill.m:
extras/posix/posix.lseek.m:
extras/posix/posix.m:
extras/posix/posix.mkdir.m:
extras/posix/posix.open.m:
extras/posix/posix.opendir.m:
extras/posix/posix.pipe.m:
extras/posix/posix.read.m:
extras/posix/posix.readdir.m:
extras/posix/posix.realpath.m:
extras/posix/posix.rmdir.m:
extras/posix/posix.select.m:
extras/posix/posix.sleep.m:
extras/posix/posix.socket.m:
extras/posix/posix.stat.m:
extras/posix/posix.strerror.m:
extras/posix/posix.wait.m:
extras/posix/posix.write.m:
extras/quickcheck/qcheck.m:
extras/quickcheck/rnd.m:
extras/quickcheck/test_qcheck.m:
extras/show_ops/show_ops.m:
extras/split_file/split_file.m:
extras/windows_installer_generator/wix.m:
extras/windows_installer_generator/wix_files.m:
extras/windows_installer_generator/wix_gui.m:
extras/windows_installer_generator/wix_installer.m:
extras/windows_installer_generator/wix_util.m:
    Apply tools/stdlines to all these files.
2025-05-05 08:08:50 +10:00

308 lines
11 KiB
Mathematica

%---------------------------------------------------------------------------%
% vim: ts=4 sw=4 et tw=0 wm=0 ff=unix ft=mercury
%---------------------------------------------------------------------------%
%
% lex.buf.m
% Copyright (C) 2001 Ralph Becket <rbeck@microsoft.com>
% Copyright (C) 2002, 2010 The University of Melbourne.
% Copyright (C) 2017-2019, 2023, 2025 The Mercury team.
% This file is distributed under the terms specified in COPYING.LIB.
%
% Sat Aug 19 16:56:30 BST 2000
%
% This module implements the rolling char buffer. The char buffer
% is optimised for efficiency.
%
% The buffer stores chars read from an input source (e.g. IO or string).
% Because the lexer can want to `unread' chars (when a long candidate lexeme
% fails), the buffer may contain `read ahead' chars. The structure of the
% buffer is as follows:
%
% buf[0] buf[len]
% | len = end - start |
% v v
% +---------------------------------------------------+
% |.|.|.|.|.|a|b|c|d|e|f|g|h|i|j|k|l| | | | | | | | | |
% +---------------------------------------------------+
% ^ ^ ^ ^ ^
% | | | | |
% origin start cursor end terminus
%
% origin, start etc. are all recorded in terms of offsets (number of chars)
% from the start of the input stream, counting the first char read as
% being at offset 1. Hence, the char at the cursor is at buf[cursor - origin].
%
% READING CHARS
%
% * In the diagram, `g' is the next char that will be read.
%
% Thu cursor marks the point of the next char to be read in.
%
% If the cursor advances to the end, then a new char is read from the input
% and inserted into the buffer at the end and the end marker is incremented.
%
% If the end marker advances to the terminus, then the buffer is extended
% and the terminus adjusted appropriately. The buffer may take this opportunity
% to garbage collect the inaccessible chars between the origin and
% the start marker.
%
% EOF
%
% * In the diagram, if EOF had been detected then the end marker
% would give the offset at which it occurred.
%
% When EOF is read from the input stream, a special eof flag is set
% (and the end marker, of course, will give its offset). Any attempt to read
% at or past this point will cause the buffer to return the EOF signal.
%
% REWINDING
%
% * In the diagram, the cursor may be rewound to any point
% between the start marker and itself, inclusive.
%
% At any point, the cursor may be reset to any point between
% itself and the start marker inclusive.
%
% At any point, the user may ask for the offset of the cursor.
%
% STRING EXTRACTION
%
% * In the diagram, the string read in so far is "abcdef".
%
% The buffer provides a facility to return the substring consisting of
% the chars between the start marker and up to, but not including,
% that under the cursor.
%
% COMMITS
%
% * In the diagram, a commit will move the start marker to be the same
% as the cursor.
%
% The user can issue a commit order to the buffer which moves the start pointer
% to where the cursor is, preventing rewinds back past this point.
% This is important since it means that the region prior to the cursor
% in the buffer is now available for garbage collection.
%
%---------------------------------------------------------------------------%
:- module lex.buf.
:- interface.
:- import_module array.
:- import_module bool.
:- import_module char.
:- import_module string.
%---------------------------------------------------------------------------%
% XXX We need a char and/or byte array datatype;
% array(char) uses one word for each char, which is rather wasteful.
%
:- type buf == array(char).
% T is the type of the input source (typically io.state or string);
% the user must initialise the buffer by specifying an appropriate
% read predicate.
%
:- type buf_state(T)
---> buf_state(
buf_origin :: offset,
buf_start :: offset,
buf_cursor :: offset,
buf_end :: offset,
buf_terminus :: offset,
% If `yes', then buf_end has the offset.
buf_eof_seen :: bool,
buf_read_pred :: read_pred(T)
).
:- inst buf_state for buf_state/1
---> buf_state(
ground,
ground,
ground,
ground,
ground,
ground,
read_pred
).
% Returns an empty buffer and an initialised buf_state.
%
:- pred init(read_pred(T)::in(read_pred), buf_state(T)::out(buf_state),
buf::array_uo) is det.
% Reads the next char and advances the cursor.
% Updates the buf_state, the buf and the input.
%
:- pred read(read_result::out,
buf_state(T)::in(buf_state), buf_state(T)::out(buf_state),
buf::array_di, buf::array_uo, T::di, T::uo) is det.
% Returns the offset of the start marker.
%
:- func start_offset(buf_state(T)) = offset.
:- mode start_offset(in(buf_state)) = out is det.
% Returns the offset of the cursor.
%
:- func cursor_offset(buf_state(T)) = offset.
:- mode cursor_offset(in(buf_state)) = out is det.
% Rewinds the buffer. An exception is raised if the offset provided
% is not legitimate.
%
:- func rewind_cursor(offset, buf_state(T)) = buf_state(T).
:- mode rewind_cursor(in, in(buf_state)) = out(buf_state) is det.
% Extracts the string of chars between the start and cursor.
%
:- func string_to_cursor(buf_state(T), buf) = string.
:- mode string_to_cursor(in(buf_state), array_ui) = out is det.
% Advances the start marker to the cursor. Rewinds past the cursor
% will therefore no longer be possible.
%
:- func commit(buf_state(T)) = buf_state(T).
:- mode commit(in(buf_state)) = out(buf_state) is det.
%---------------------------------------------------------------------------%
%---------------------------------------------------------------------------%
:- implementation.
:- import_module exception.
% The amount the buffer is grown by if (a) more space is required
% and (b) the available space is smaller than this amount.
%
:- func low_water_mark = int.
low_water_mark = 256.
:- func initial_buf_size = int.
initial_buf_size = 1024.
% XXX Debugging values.
% %
% :- func low_water_mark = int.
% low_water_mark = 16.
%
% :- func initial_buf_size = int.
% initial_buf_size = 32.
%---------------------------------------------------------------------------%
init(BufReadPred, BufState, Buf) :-
BufState = buf_state(0, 0, 0, 0, initial_buf_size, no, BufReadPred),
Buf = array.init(initial_buf_size, ('@')).
%---------------------------------------------------------------------------%
read(Result, BufState0, BufState, !Buf, !Src) :-
Origin = BufState0 ^ buf_origin,
Start = BufState0 ^ buf_start,
Cursor = BufState0 ^ buf_cursor,
End = BufState0 ^ buf_end,
Terminus = BufState0 ^ buf_terminus,
EOFSeen = BufState0 ^ buf_eof_seen,
ReadP = BufState0 ^ buf_read_pred,
( if Cursor < End then
Result = ok(array.lookup(!.Buf, Cursor - Origin)),
BufState = ( BufState0 ^ buf_cursor := Cursor + 1 )
else /* Cursor = End */ if EOFSeen = yes then
Result = eof,
BufState = BufState0
else if End < Terminus then
ReadP(Cursor, Result, !Src),
( if Result = ok(Char) then
array.set(End - Origin, Char, !Buf),
BufState = (( BufState0
^ buf_cursor := Cursor + 1 )
^ buf_end := End + 1 )
else
BufState = BufState0
)
else
% Need to GC and/or extend the buffer.
GarbageLength = Start - Origin,
adjust_buf(GarbageLength, ExtraLength, !Buf),
NewOrigin = Origin + GarbageLength,
NewTerminus = Terminus + GarbageLength + ExtraLength,
BufState1 = (( BufState0
^ buf_origin := NewOrigin )
^ buf_terminus := NewTerminus ),
read(Result, BufState1, BufState, !Buf, !Src)
).
%---------------------------------------------------------------------------%
% Garbage collects the chars between the origin and start and
% extends the buffer if the remaining space is below the low
% water mark.
%
:- pred adjust_buf(int::in, int::out, buf::array_di, buf::array_uo) is det.
adjust_buf(GarbageLength, ExtraLength, Buf0, Buf) :-
Size0 = array.size(Buf0),
( if GarbageLength < low_water_mark then
% We need to grow the buffer.
array.init(Size0 + low_water_mark, ('@'), Buf1),
ExtraLength = low_water_mark
else
Buf1 = Buf0,
ExtraLength = 0
),
Buf = shift_buf(0, Size0 - GarbageLength, GarbageLength, Buf0, Buf1).
%---------------------------------------------------------------------------%
:- func shift_buf(int, int, int, buf, buf) = buf.
:- mode shift_buf(in, in, in, array_ui, array_di) = array_uo is det.
shift_buf(I, Hi, Disp, Src, Tgt) =
( if I < Hi then
shift_buf(I + 1, Hi, Disp, Src,
array.set(Tgt, I, array.lookup(Src, I + Disp)))
else
Tgt
).
%---------------------------------------------------------------------------%
start_offset(BufState) = BufState ^ buf_start.
%---------------------------------------------------------------------------%
cursor_offset(BufState) = BufState ^ buf_cursor.
%---------------------------------------------------------------------------%
rewind_cursor(Offset, BufState) =
( if
( Offset < BufState ^ buf_start
; BufState ^ buf_cursor < Offset
)
then
throw("buf: rewind/2: offset arg outside valid range")
else
BufState ^ buf_cursor := Offset
).
%---------------------------------------------------------------------------%
string_to_cursor(BufState, Buf) = String :-
From = BufState ^ buf_start - BufState ^ buf_origin,
Length = (BufState ^ buf_cursor - 1 - BufState ^ buf_start),
To = From + Length,
String = string.from_char_list(array.fetch_items(Buf, From, To)).
%---------------------------------------------------------------------------%
commit(BufState) = ( BufState ^ buf_start := BufState ^ buf_cursor ).
%---------------------------------------------------------------------------%
:- end_module lex.buf.
%---------------------------------------------------------------------------%