Carve io.text_read.m out of io.m, and ...

... move the implementation of flush_*output to io.stream_ops.m.

In both cases, improve the predicate names a bit.

Add a distinguishing prefix to the names of the function symbols of a type,
to reduce the number of times the function symbol names ok, error and eof
are overloaded from ridiculous to slighly less ridiculous :-(

Move related predicate arguments next to each other.

library/io.text_read.m:
    New submodule containing the code that implement the predicates
    that read values of non-primitive types, all but one of which are text.

library/io.stream_ops.m:
    Move the implementation of flushing output here from io.m.

library/io.m:
    Delete the code moved to the above modules.

library/MODULES_UNDOC:
library/library.m:
    List the new submodule as undocumented.

library/Mercury.options:
    Allow io.text_read.m, instead of io.m, to define an inst for a type
    that has a definition in a foreign language as well as Mercury.
    This diff moves this inst definition to io.text_read.m.
This commit is contained in:
Zoltan Somogyi
2022-03-15 01:12:07 +11:00
parent b7ca428cef
commit e163ea91b6
6 changed files with 785 additions and 750 deletions

View File

@@ -3,6 +3,7 @@ io.primitives_read.m
io.primitives_write.m
io.stream_db.m
io.stream_ops.m
io.text_read.m
mer_std.m
mutvar.m
par_builtin.m

View File

@@ -53,11 +53,11 @@ MCFLAGS-string += --no-warn-unknown-format-calls
MCFLAGS-mer_std += --no-warn-nothing-exported
# Avoid warnings about insts with non-existent function symbols in their
# bound lists. The non-existent function symbols are used here to represent
# bound lists. The non-existent function symbols are used here to represent
# insts for foreign types.
MCFLAGS-array += --no-warn-insts-without-matching-type
MCFLAGS-bitmap += --no-warn-insts-without-matching-type
MCFLAGS-io += --no-warn-insts-without-matching-type
MCFLAGS-io.text_read += --no-warn-insts-without-matching-type
# Avoid warnings about unused imports.
MCFLAGS-builtin += --no-warn-unused-imports

View File

@@ -2098,6 +2098,7 @@
:- include_module primitives_write. % Include exported for benchmarking.m.
:- include_module stream_db. % Include exported for browser/browse.m.
:- include_module stream_ops. % Include exported for benchmarking.m.
:- include_module text_read. % Include exported for symmetry.
%---------------------%
%
@@ -2246,6 +2247,7 @@
:- import_module io.primitives_write.
:- import_module io.stream_db.
:- import_module io.stream_ops.
:- import_module io.text_read.
:- import_module mercury_term_parser.
:- import_module require.
:- import_module stream.string_writer.
@@ -3372,43 +3374,12 @@ read_word(Stream, Result, !IO) :-
read_word_2(Stream, Result, !IO)
).
:- pred read_word_2(io.input_stream::in, io.result(list(char))::out,
io::di, io::uo) is det.
read_word_2(Stream, Result, !IO) :-
read_char(Stream, CharResult, !IO),
(
CharResult = error(Error),
Result = error(Error)
;
CharResult = eof,
Result = eof
;
CharResult = ok(Char),
( if char.is_whitespace(Char) then
putback_char(Stream, Char, !IO),
Result = ok([])
else
read_word_2(Stream, Result0, !IO),
(
Result0 = ok(Chars),
Result = ok([Char | Chars])
;
Result0 = error(_),
Result = Result0
;
Result0 = eof,
Result = ok([Char])
)
)
).
read_line(Result, !IO) :-
input_stream(Stream, !IO),
read_line(Stream, Result, !IO).
read_line(Stream, Result, !IO) :-
read_line_2(Stream, ResultCode, Chars, Error, !IO),
read_line_2(Stream, ResultCode, Error, Chars, !IO),
(
ResultCode = result_code_ok,
Result = ok(Chars)
@@ -3421,190 +3392,27 @@ read_line(Stream, Result, !IO) :-
Result = error(io_error(Msg))
).
:- pred read_line_2(input_stream::in, result_code::out, list(char)::out,
system_error::out, io::di, io::uo) is det.
read_line_2(Stream, Result, Chars, Error, !IO) :-
read_char_code(Stream, Result0, Char, Error0, !IO),
(
Result0 = result_code_ok,
( if Char = '\n' then
Result = result_code_ok,
Chars = [Char],
Error = Error0
else
read_line_2(Stream, Result, CharsTail, Error, !IO),
Chars = [Char | CharsTail] % lcmc
)
;
( Result0 = result_code_eof
; Result0 = result_code_error
),
Result = Result0,
Chars = [],
Error = Error0
).
read_line_as_string(Result, !IO) :-
input_stream(Stream, !IO),
read_line_as_string(Stream, Result, !IO).
read_line_as_string(input_stream(Stream), Result, !IO) :-
read_line_as_string_2(Stream, yes, Res, String, Error, !IO),
read_line_as_string_2(Stream, yes, Res, Error, String, !IO),
(
Res = ok,
Res = rlas_ok,
Result = ok(String)
;
Res = eof,
Res = rlas_eof,
Result = eof
;
Res = null_char,
Res = rlas_null_char,
Result = error(io_error("null character in input"))
;
Res = error,
Res = rlas_error,
make_err_msg(Error, "read failed: ", Msg, !IO),
Result = error(io_error(Msg))
).
:- type read_line_as_string_result
---> ok
; eof
; null_char
; error.
:- pragma foreign_export_enum("C", read_line_as_string_result/0,
[prefix("ML_READ_LINE_AS_STRING_"), uppercase]).
:- pragma foreign_export_enum("Java", read_line_as_string_result/0,
[prefix("ML_READ_LINE_AS_STRING_"), uppercase]).
:- pred read_line_as_string_2(io.stream::in, bool::in,
read_line_as_string_result::out, string::out, system_error::out,
io::di, io::uo) is det.
:- pragma foreign_proc("C",
read_line_as_string_2(Stream::in, _FirstCall::in, Res::out,
RetString::out, Error::out, _IO0::di, _IO::uo),
[will_not_call_mercury, promise_pure, tabled_for_io, thread_safe,
does_not_affect_liveness, no_sharing],
"
#define ML_IO_READ_LINE_GROW(n) ((n) * 3 / 2)
#define ML_IO_BYTES_TO_WORDS(n) (((n) + sizeof(MR_Word) - 1) / sizeof(MR_Word))
#define ML_IO_READ_LINE_START 1024
char initial_read_buffer[ML_IO_READ_LINE_START];
char *read_buffer = initial_read_buffer;
size_t read_buf_size = ML_IO_READ_LINE_START;
size_t i;
int char_code = '\\0';
Res = ML_READ_LINE_AS_STRING_OK;
Error = 0;
for (i = 0; char_code != '\\n'; ) {
char_code = mercury_get_byte(Stream);
if (char_code == EOF) {
if (i == 0) {
if (MR_FERROR(*Stream)) {
Res = ML_READ_LINE_AS_STRING_ERROR;
Error = errno;
} else {
Res = ML_READ_LINE_AS_STRING_EOF;
}
}
break;
}
if (char_code == 0) {
Res = ML_READ_LINE_AS_STRING_NULL_CHAR;
break;
}
read_buffer[i++] = (char) char_code;
MR_assert(i <= read_buf_size);
if (i == read_buf_size) {
// Grow the read buffer.
read_buf_size = ML_IO_READ_LINE_GROW(read_buf_size);
if (read_buffer == initial_read_buffer) {
read_buffer = MR_NEW_ARRAY(char, read_buf_size);
MR_memcpy(read_buffer, initial_read_buffer,
ML_IO_READ_LINE_START);
} else {
read_buffer = MR_RESIZE_ARRAY(read_buffer, char,
read_buf_size);
}
}
}
if (Res == ML_READ_LINE_AS_STRING_OK) {
MR_Word ret_string_word;
MR_offset_incr_hp_atomic_msg(ret_string_word,
0, ML_IO_BYTES_TO_WORDS((i + 1) * sizeof(char)),
MR_ALLOC_ID, ""string.string/0"");
RetString = (MR_String) ret_string_word;
MR_memcpy(RetString, read_buffer, i * sizeof(char));
RetString[i] = '\\0';
} else {
RetString = MR_make_string_const("""");
}
if (read_buffer != initial_read_buffer) {
MR_free(read_buffer);
}
").
:- pragma foreign_proc("Java",
read_line_as_string_2(Stream::in, _FirstCall::in, Res::out,
RetString::out, Error::out, _IO0::di, _IO::uo),
[will_not_call_mercury, promise_pure, tabled_for_io, thread_safe,
does_not_affect_liveness, may_not_duplicate],
"
try {
RetString =
((jmercury.io__stream_ops.MR_TextInputFile) Stream).read_line();
if (RetString != null) {
Res = ML_READ_LINE_AS_STRING_OK;
} else {
Res = ML_READ_LINE_AS_STRING_EOF;
}
Error = null;
} catch (java.io.IOException e) {
Res = ML_READ_LINE_AS_STRING_ERROR;
RetString = """";
Error = e;
}
").
read_line_as_string_2(Stream, FirstCall, Res, String, Error, !IO) :-
% XXX This is terribly inefficient, a better approach would be
% to use a buffer like what is done for io.read_file_as_string.
read_char_code(input_stream(Stream), ResultCode, Char, Error0, !IO),
(
ResultCode = result_code_ok,
( if Char = '\n' then
Res = ok,
String = "\n",
Error = Error0
else if char.to_int(Char, 0) then
Res = null_char,
String = "",
Error = Error0
else
read_line_as_string_2(Stream, no, Res, String0, Error, !IO),
string.first_char(String, Char, String0)
)
;
ResultCode = result_code_eof,
(
FirstCall = yes,
Res = eof
;
FirstCall = no,
Res = ok
),
String = "",
Error = Error0
;
ResultCode = result_code_error,
Res = error,
String = "",
Error = Error0
).
ignore_whitespace(Result, !IO) :-
input_stream(Stream, !IO),
ignore_whitespace(Stream, Result, !IO).
@@ -4011,47 +3819,9 @@ flush_output(!IO) :-
flush_output(Stream, !IO).
flush_output(output_stream(Stream), !IO) :-
flush_output_2(Stream, Error, !IO),
flush_text_output_2(Stream, Error, !IO),
throw_on_output_error(Error, !IO).
:- pred flush_output_2(stream::in, system_error::out, io::di, io::uo) is det.
:- pragma foreign_proc("C",
flush_output_2(Stream::in, Error::out, _IO0::di, _IO::uo),
[will_not_call_mercury, promise_pure, tabled_for_io, thread_safe,
does_not_affect_liveness, no_sharing],
"
if (MR_FLUSH(*Stream) < 0) {
Error = errno;
} else {
Error = 0;
}
").
:- pragma foreign_proc("C#",
flush_output_2(Stream::in, Error::out, _IO0::di, _IO::uo),
[will_not_call_mercury, promise_pure, thread_safe, tabled_for_io],
"
try {
Stream.stream.Flush();
Error = null;
} catch (System.SystemException e) {
Error = e;
}
").
:- pragma foreign_proc("Java",
flush_output_2(Stream::in, Error::out, _IO0::di, _IO::uo),
[will_not_call_mercury, promise_pure, thread_safe, tabled_for_io],
"
try {
((jmercury.io__stream_ops.MR_TextOutputFile) Stream).flush();
Error = null;
} catch (java.io.IOException e) {
Error = e;
}
").
%---------------------%
flush_binary_output(!IO) :-
@@ -4062,45 +3832,6 @@ flush_binary_output(binary_output_stream(Stream), !IO) :-
flush_binary_output_2(Stream, Error, !IO),
throw_on_output_error(Error, !IO).
:- pred flush_binary_output_2(stream::in, system_error::out,
io::di, io::uo) is det.
:- pragma foreign_proc("C",
flush_binary_output_2(Stream::in, Error::out, _IO0::di, _IO::uo),
[will_not_call_mercury, promise_pure, tabled_for_io, thread_safe,
does_not_affect_liveness, no_sharing],
"
if (MR_FLUSH(*Stream) < 0) {
Error = errno;
} else {
Error = 0;
}
").
:- pragma foreign_proc("C#",
flush_binary_output_2(Stream::in, Error::out, _IO0::di, _IO::uo),
[will_not_call_mercury, promise_pure, thread_safe, tabled_for_io],
"
try {
Stream.stream.Flush();
Error = null;
} catch (System.SystemException e) {
Error = e;
}
").
:- pragma foreign_proc("Java",
flush_binary_output_2(Stream::in, Error::out, _IO0::di, _IO::uo),
[will_not_call_mercury, promise_pure, thread_safe, tabled_for_io],
"
try {
((jmercury.io__stream_ops.MR_BinaryOutputFile) Stream).flush();
Error = null;
} catch (java.io.IOException e) {
Error = e;
}
").
%---------------------------------------------------------------------------%
%
% Whole file input predicates.
@@ -4212,93 +3943,6 @@ read_file_as_string_and_num_code_units(input_stream(Stream), Result, !IO) :-
)
).
:- pred read_file_as_string_2(stream::in, string::out, int::out,
system_error::out, bool::out, io::di, io::uo) is det.
:- pragma foreign_proc("Java",
read_file_as_string_2(Stream::in, String::out, NumCUs::out,
Error::out, NullCharError::out, _IO0::di, _IO::uo),
[will_not_call_mercury, promise_pure, thread_safe, tabled_for_io],
"
StringBuilder sb = new StringBuilder();
try {
((jmercury.io__stream_ops.MR_TextInputFile) Stream).read_file(sb);
Error = null;
} catch (java.io.IOException e) {
Error = e;
}
String = sb.toString();
NumCUs = String.length();
NullCharError = bool.NO;
").
read_file_as_string_2(Stream, Str, NumCUs, Error, NullCharError, !IO) :-
% Check if the stream is a regular file; if so, allocate a buffer
% according to the size of the file. Otherwise, just use a default buffer
% size of 4k minus a bit (to give malloc some room).
input_stream_file_size(input_stream(Stream), FileSize, !IO),
( if FileSize >= 0 then
% When targeting C, this reserves just enough space for all the bytes
% in the file, plus the final NUL character.
%
% When targeting C#, this reserves one slot in an array of code points
% for each byte in the file, plus the NUL. This means that the buffer
% we reserve may be bigger than needed. How much bigger depends on
% the number of code points in the file that take more than one
% UTF-16 code units.
BufferSize0 = FileSize + 1
else
BufferSize0 = 4000
),
alloc_buffer(BufferSize0, Buffer0),
% Read the file into the buffer (resizing it as we go if necessary),
% convert the buffer into a string, and see if anything went wrong.
%
% When targeting C, Pos counts UTF-8 code *units* (in the usual case
% where the input is valid UTF-8; otherwise, it counts bytes).
% When targeting C#, Pos counts code *points*.
% When targeting Java, the foreign_proc above replaces this clause.
Pos0 = 0,
read_file_as_string_loop(input_stream(Stream), Buffer0, BufferSize0, Pos0,
Str, NumCUs, Error, NullCharError, !IO).
:- pred read_file_as_string_loop(input_stream::in, buffer::buffer_di,
int::in, int::in, string::out, int::out, system_error::out, bool::out,
io::di, io::uo) is det.
% This predicate is not used when compiling to Java; this pragma avoids
% a warning even in that case.
:- pragma consider_used(pred(read_file_as_string_loop/10)).
read_file_as_string_loop(Stream, !.Buffer, BufferSize0, !.Pos,
Str, NumCUs, Error, NullCharError, !IO) :-
Stream = input_stream(RealStream),
read_into_buffer(RealStream, !Buffer, BufferSize0, !Pos, Error0, !IO),
( if !.Pos < BufferSize0 then
% Buffer is not full: end-of-file or error.
( if
buffer_and_pos_to_string_and_length(!.Buffer, !.Pos,
StrPrime, NumCUsPrime)
then
Str = StrPrime,
NumCUs = NumCUsPrime,
NullCharError = no
else
Str = "",
NumCUs = 0,
NullCharError = yes
),
Error = Error0
else if !.Pos = BufferSize0 then
% Buffer is full; make room for more of the file.
% Doubling its size should catch up to its actual size quickly.
BufferSize1 = BufferSize0 * 2,
resize_buffer(BufferSize0, BufferSize1, !Buffer),
read_file_as_string_loop(Stream, !.Buffer, BufferSize1, !.Pos,
Str, NumCUs, Error, NullCharError, !IO)
else
error("io.read_file_as_string: buffer overflow")
).
%---------------------%
read_binary_file(Result, !IO) :-
@@ -4332,349 +3976,7 @@ read_binary_file_as_bitmap(Result, !IO) :-
read_binary_file_as_bitmap(Stream, Result, !IO).
read_binary_file_as_bitmap(Stream, Result, !IO) :-
% Check if the stream is a regular file; if so, allocate a buffer
% according to the size of the file. Otherwise, just use a default buffer
% size of 4k minus a bit (to give malloc some room).
binary_input_stream_file_size(Stream, FileSize, !IO),
( if FileSize >= 0i64 then
binary_input_stream_offset64(Stream, CurrentOffset, !IO),
RemainingSizeInt64 = FileSize - CurrentOffset,
( if
int.bits_per_int = 32,
RemainingSizeInt64 > int64.from_int(int.max_int)
then
Result = error(io_error("io.read_binary_file_as_bitmap: " ++
"file size exceeds maximum buffer size"))
else
RemainingSize = int64.cast_to_int(RemainingSizeInt64),
some [!BM] (
!:BM = bitmap.init(RemainingSize * bits_per_byte),
( if RemainingSize = 0 then
Result = ok(!.BM)
else
bitmap.read_bitmap_range(Stream, 0, RemainingSize, !BM,
BytesRead, ReadResult, !IO),
(
ReadResult = ok,
( if BytesRead = RemainingSize then
Result = ok(!.BM)
else
Result = error(io_error(
"io.read_binary_file_as_bitmap: " ++
"incorrect file size"))
)
;
ReadResult = error(Msg),
Result = error(Msg)
)
)
)
)
else
BufferSize = 4000,
read_binary_file_as_bitmap_from_stream(Stream, BufferSize,
Res, [], RevBitmaps, !IO),
(
Res = ok,
Result = ok(bitmap.append_list(reverse(RevBitmaps)))
;
Res = error(Msg),
Result = error(Msg)
)
).
:- pred read_binary_file_as_bitmap_from_stream(io.binary_input_stream::in,
num_bytes::in, io.res::out, list(bitmap)::in, list(bitmap)::out,
io::di, io::uo) is det.
read_binary_file_as_bitmap_from_stream(Stream, BufferSize, Res, !BMs, !IO) :-
some [!BM] (
!:BM = bitmap.init(BufferSize * bits_per_byte),
bitmap.read_bitmap_range(Stream, 0, BufferSize, !BM, NumBytesRead,
ReadRes, !IO),
(
ReadRes = ok,
( if NumBytesRead < BufferSize then
!:BM = bitmap.shrink_without_copying(!.BM,
NumBytesRead * bits_per_byte),
!:BMs = [!.BM | !.BMs],
Res = ok
else
!:BMs = [!.BM | !.BMs],
% Double the buffer size each time.
read_binary_file_as_bitmap_from_stream(Stream, BufferSize * 2,
Res, !BMs, !IO)
)
;
ReadRes = error(Err),
Res = error(Err)
)
).
%---------------------%
% XXX FIXME this should return an int64.
:- pred input_stream_file_size(io.input_stream::in, int::out,
io::di, io::uo) is det.
:- pragma consider_used(pred(input_stream_file_size/4)).
input_stream_file_size(input_stream(Stream), Size, !IO) :-
stream_file_size(Stream, Size64, !IO),
Size = int64.cast_to_int(Size64).
:- pred binary_input_stream_file_size(io.binary_input_stream::in, int64::out,
io::di, io::uo) is det.
binary_input_stream_file_size(binary_input_stream(Stream), Size, !IO) :-
stream_file_size(Stream, Size, !IO).
% stream_file_size(Stream, Size):
%
% If Stream is a regular file, then Size is its size (in bytes),
% otherwise Size is -1.
%
:- pred stream_file_size(stream::in, int64::out, io::di, io::uo) is det.
:- pragma foreign_proc("C",
stream_file_size(Stream::in, Size::out, _IO0::di, _IO::uo),
[will_not_call_mercury, promise_pure, tabled_for_io, thread_safe,
does_not_affect_liveness, no_sharing],
"
#if defined(MR_HAVE_FSTAT) && \
(defined(MR_HAVE_FILENO) || defined(fileno)) && defined(S_ISREG)
struct stat s;
if (MR_IS_FILE_STREAM(*Stream)) {
if (fstat(fileno(MR_file(*Stream)), &s) == 0 && S_ISREG(s.st_mode)) {
Size = s.st_size;
} else {
Size = -1;
}
} else {
Size = -1;
}
#else
Size = -1;
#endif
").
:- pragma foreign_proc("C#",
stream_file_size(Stream::in, Size::out, _IO0::di, _IO::uo),
[will_not_call_mercury, promise_pure, thread_safe],
"{
if (Stream.stream.CanSeek) {
Size = Stream.stream.Length;
} else {
Size = -1;
}
}").
:- pragma foreign_proc("Java",
stream_file_size(Stream::in, Size::out, _IO0::di, _IO::uo),
[will_not_call_mercury, promise_pure, thread_safe, tabled_for_io],
"
try {
Size = ((jmercury.io__stream_ops.MR_BinaryFile) Stream).size();
} catch (java.io.IOException e) {
Size = -1;
}
").
%---------------------%
% A buffer is an array of chars.
% For C backends, it is a C array of C chars.
% For other backends, it is a Mercury array of Mercury chars.
% XXX It would be better to use a char_array type rather than array(char).
% This is because on the Java and IL backends indexing into an array whose
% element type is known statically requires less overhead.
%
% It may be possible to merge with string.string_buffer.
%
:- type buffer
---> buffer(array(char)).
:- pragma foreign_type(c, buffer, "char *", [can_pass_as_mercury_type]).
% XXX Extend the workaround for no `ui' modes in array.m.
:- inst uniq_buffer for buffer/0
---> buffer(uniq_array).
:- mode buffer_di == di(uniq_buffer).
:- mode buffer_uo == out(uniq_buffer).
:- pred alloc_buffer(int::in, buffer::buffer_uo) is det.
:- pragma consider_used(pred(alloc_buffer/2)).
:- pragma foreign_proc("C",
alloc_buffer(Size::in, Buffer::buffer_uo),
[will_not_call_mercury, promise_pure, tabled_for_io, thread_safe,
does_not_affect_liveness, no_sharing],
"{
MR_Word buf;
MR_offset_incr_hp_atomic_msg(buf, 0,
(Size * sizeof(char) + sizeof(MR_Word) - 1) / sizeof(MR_Word),
MR_ALLOC_ID, ""io.buffer/0"");
Buffer = (char *) buf;
}").
alloc_buffer(Size, buffer(Array)) :-
char.det_from_int(0, NullChar),
array.init(Size, NullChar, Array).
:- pred resize_buffer(int::in, int::in,
buffer::buffer_di, buffer::buffer_uo) is det.
:- pragma foreign_proc("C",
resize_buffer(OldSize::in, NewSize::in,
Buffer0::buffer_di, Buffer::buffer_uo),
[will_not_call_mercury, promise_pure, tabled_for_io, thread_safe,
does_not_affect_liveness],
"{
MR_CHECK_EXPR_TYPE(Buffer0, char *);
MR_CHECK_EXPR_TYPE(Buffer, char *);
#ifdef MR_CONSERVATIVE_GC
Buffer = MR_GC_realloc(Buffer0, NewSize * sizeof(char));
#else
if (Buffer0 + OldSize == (char *) MR_hp) {
MR_Word next;
MR_offset_incr_hp_atomic_msg(next, 0,
(NewSize * sizeof(char) + sizeof(MR_Word) - 1)
/ sizeof(MR_Word),
MR_ALLOC_ID, ""io.buffer/0"");
assert(Buffer0 + OldSize == (char *) next);
Buffer = Buffer0;
} else {
// Just have to alloc and copy.
MR_Word buf;
MR_offset_incr_hp_atomic_msg(buf, 0,
(NewSize * sizeof(char) + sizeof(MR_Word) - 1)
/ sizeof(MR_Word),
MR_ALLOC_ID, ""io.buffer/0"");
Buffer = (char *) buf;
if (OldSize > NewSize) {
MR_memcpy(Buffer, Buffer0, NewSize);
} else {
MR_memcpy(Buffer, Buffer0, OldSize);
}
}
#endif
}").
resize_buffer(_OldSize, NewSize, buffer(Array0), buffer(Array)) :-
char.det_from_int(0, Char),
array.resize(NewSize, Char, Array0, Array).
:- pred buffer_and_pos_to_string_and_length(buffer::buffer_di, int::in,
string::out, int::out) is semidet.
% This predicate is used when compiling to C and C#; this pragma avoids
% a warning when compiling to Java.
:- pragma consider_used(pred(buffer_and_pos_to_string_and_length/4)).
:- pragma foreign_proc("C",
buffer_and_pos_to_string_and_length(Buffer::buffer_di, Pos::in,
Str::out, NumCUs::out),
[will_not_call_mercury, promise_pure, tabled_for_io, thread_safe,
does_not_affect_liveness],
"{
Str = Buffer;
Str[Pos] = '\\0';
// Check that the string does not contain null characters.
if (strlen(Str) != Pos) {
SUCCESS_INDICATOR = MR_FALSE;
} else {
SUCCESS_INDICATOR = MR_TRUE;
}
// In C, Pos counts bytes, which are the same size as UTF-8 code units.
// NumCUs is expected to be in the code units native to the target
// language, and this is UTF-8, so no conversion needs to be done.
// (Compare to the C# case below.)
NumCUs = Pos;
}").
buffer_and_pos_to_string_and_length(buffer(Array), Pos, Str, NumCUs) :-
% This predicate is used only when compiling to C and C#, and when
% targeting C, we use the foreign_proc above, so this clause is used
% only when targeting C#.
%
% In C#, Pos counts chars, i.e. code points. Most code points occupy
% just one UTF-16 code unit, but some occupy two. The call below to
% semidet_from_char_list will do this expansion as necessary.
% We can't know how many code units the final string contains
% until we count them. (Compare to the C case above.)
%
% XXX The current implementation of read_file_as_string_2
% reads in code units one by one, converts them to code points
% to store them in array slots, then converts the array to a string,
% which converts each code point back into one or two UTF-16 code units.
% A fully C#-specific implementation of read_file_as_string_2,
% one not shared with C, should be able to dispense with all the
% redundant conversions.
array.fetch_items(Array, min(Array), min(Array) + Pos - 1, List),
string.semidet_from_char_list(List, Str),
string.length(Str, NumCUs).
:- pred read_into_buffer(stream::in, buffer::buffer_di, buffer::buffer_uo,
int::in, int::in, int::out, system_error::out, io::di, io::uo) is det.
:- pragma foreign_proc("C",
read_into_buffer(Stream::in, Buffer0::buffer_di, Buffer::buffer_uo,
BufferSize::in, Pos0::in, Pos::out, Error::out, _IO0::di, _IO::uo),
[will_not_call_mercury, promise_pure, tabled_for_io, thread_safe,
does_not_affect_liveness],
"
size_t bytes_to_read;
size_t bytes_read;
MR_CHECK_EXPR_TYPE(Buffer0, char *);
MR_CHECK_EXPR_TYPE(Buffer, char *);
bytes_to_read = BufferSize - Pos0;
bytes_read = MR_READ(*Stream, Buffer0 + Pos0, bytes_to_read);
Buffer = Buffer0;
Pos = Pos0 + bytes_read;
if (bytes_read < bytes_to_read && MR_FERROR(*Stream)) {
Error = errno;
} else {
Error = 0;
}
").
read_into_buffer(Stream, buffer(Array0), buffer(Array), BufferSize,
Pos0, Pos, Error, !IO) :-
read_into_array(input_stream(Stream), Array0, Array, BufferSize,
Pos0, Pos, Error, !IO).
:- pred read_into_array(input_stream::in,
array(char)::array_di, array(char)::array_uo, int::in, int::in, int::out,
system_error::out, io::di, io::uo) is det.
% This predicate is not used when compiling to C or Java; this pragma avoids
% a warning even in those cases.
:- pragma consider_used(pred(read_into_array/9)).
read_into_array(Stream, !Array, ArraySize, !Pos, Error, !IO) :-
( if !.Pos >= ArraySize then
Error = no_error
else
read_char_code(Stream, ResultCode, Char, Error0, !IO),
(
ResultCode = result_code_ok,
array.set(!.Pos, Char, !Array),
!:Pos = !.Pos + 1,
read_into_array(Stream, !Array, ArraySize, !Pos, Error, !IO)
;
( ResultCode = result_code_eof
; ResultCode = result_code_error
),
Error = Error0
)
).
read_binary_file_as_bitmap_2(Stream, Result, !IO).
%---------------------------------------------------------------------------%
%

View File

@@ -51,6 +51,13 @@
%---------------------%
:- pred flush_text_output_2(stream::in, system_error::out,
io::di, io::uo) is det.
:- pred flush_binary_output_2(stream::in, system_error::out,
io::di, io::uo) is det.
%---------------------%
:- pred get_input_line_number_2(stream::in, int::out, io::di, io::uo) is det.
:- pred set_input_line_number_2(stream::in, int::in, io::di, io::uo) is det.
:- pred get_output_line_number_2(stream::in, int::out, io::di, io::uo) is det.
@@ -79,13 +86,10 @@
:- pred set_input_stream_2(stream::in, stream::out,
io::di, io::uo) is det.
:- pred set_binary_input_stream_2(stream::in, stream::out,
io::di, io::uo) is det.
:- pred set_output_stream_2(stream::in, stream::out,
io::di, io::uo) is det.
:- pred set_binary_output_stream_2(stream::in, stream::out,
io::di, io::uo) is det.
@@ -127,7 +131,6 @@
Error = e;
}
").
:- pragma foreign_proc("Java",
do_open_text(FileName::in, Mode::in, StreamId::out, Stream::out,
Error::out, _IO0::di, _IO::uo),
@@ -178,7 +181,6 @@
Error = errno;
}
").
:- pragma foreign_proc("C#",
do_open_binary(FileName::in, Mode::in, StreamId::out, Stream::out,
Error::out, _IO0::di, _IO::uo),
@@ -195,7 +197,6 @@
Error = e;
}
").
:- pragma foreign_proc("Java",
do_open_binary(FileName::in, Mode::in, StreamId::out, Stream::out,
Error::out, _IO0::di, _IO::uo),
@@ -242,7 +243,6 @@
Error = 0;
}
").
:- pragma foreign_proc("C#",
close_stream(Stream::in, Error::out, _IO0::di, _IO::uo),
[will_not_call_mercury, promise_pure, tabled_for_io, thread_safe],
@@ -254,7 +254,6 @@
Error = e;
}
").
:- pragma foreign_proc("Java",
close_stream(Stream::in, Error::out, _IO0::di, _IO::uo),
[will_not_call_mercury, promise_pure, tabled_for_io, thread_safe,
@@ -295,9 +294,7 @@ whence_to_int(end, 2).
Error = EINVAL;
}
").
% MISSING C# seek_binary_2
:- pragma foreign_proc("Java",
seek_binary_2(Stream::in, Flag::in, Off::in, Error::out,
_IO0::di, _IO::uo),
@@ -332,9 +329,7 @@ whence_to_int(end, 2).
Error = EINVAL;
}
").
% MISSING C# binary_stream_offset_2
:- pragma foreign_proc("Java",
binary_stream_offset_2(Stream::in, Offset::out, Error::out,
_IO0::di, _IO::uo),
@@ -349,12 +344,87 @@ whence_to_int(end, 2).
}
").
%---------------------%
%---------------------------------------------------------------------------%
%
% The implementations of flush_text_output_2 and flush_binary_output_2
% are identical for C and C#, but they differ in Java: one casts the stream
% to MR_TextOutputFile, the other to MR_BinaryOutputFile.
%
:- pragma foreign_proc("C",
flush_text_output_2(Stream::in, Error::out, _IO0::di, _IO::uo),
[will_not_call_mercury, promise_pure, tabled_for_io, thread_safe,
does_not_affect_liveness, no_sharing],
"
if (MR_FLUSH(*Stream) < 0) {
Error = errno;
} else {
Error = 0;
}
").
:- pragma foreign_proc("C#",
flush_text_output_2(Stream::in, Error::out, _IO0::di, _IO::uo),
[will_not_call_mercury, promise_pure, thread_safe, tabled_for_io],
"
try {
Stream.stream.Flush();
Error = null;
} catch (System.SystemException e) {
Error = e;
}
").
:- pragma foreign_proc("Java",
flush_text_output_2(Stream::in, Error::out, _IO0::di, _IO::uo),
[will_not_call_mercury, promise_pure, thread_safe, tabled_for_io],
"
try {
((jmercury.io__stream_ops.MR_TextOutputFile) Stream).flush();
Error = null;
} catch (java.io.IOException e) {
Error = e;
}
").
:- pragma foreign_proc("C",
flush_binary_output_2(Stream::in, Error::out, _IO0::di, _IO::uo),
[will_not_call_mercury, promise_pure, tabled_for_io, thread_safe,
does_not_affect_liveness, no_sharing],
"
if (MR_FLUSH(*Stream) < 0) {
Error = errno;
} else {
Error = 0;
}
").
:- pragma foreign_proc("C#",
flush_binary_output_2(Stream::in, Error::out, _IO0::di, _IO::uo),
[will_not_call_mercury, promise_pure, thread_safe, tabled_for_io],
"
try {
Stream.stream.Flush();
Error = null;
} catch (System.SystemException e) {
Error = e;
}
").
:- pragma foreign_proc("Java",
flush_binary_output_2(Stream::in, Error::out, _IO0::di, _IO::uo),
[will_not_call_mercury, promise_pure, thread_safe, tabled_for_io],
"
try {
((jmercury.io__stream_ops.MR_BinaryOutputFile) Stream).flush();
Error = null;
} catch (java.io.IOException e) {
Error = e;
}
").
%---------------------------------------------------------------------------%
%
% The implementations of get_input_line_number_2 and get_output_line_number_2
% are identical for C and C#, but they differ in Java: one casts the stream
% to MR_TextInputFile, the other to MR_TextOutputFile. Likewise for the
% predicate that sets the line number.
% predicates that set the line number.
%
:- pragma foreign_proc("C",
@@ -397,6 +467,8 @@ whence_to_int(end, 2).
((jmercury.io__stream_ops.MR_TextInputFile) Stream).line_number = LineNum;
").
%---------------------%
:- pragma foreign_proc("C",
get_output_line_number_2(Stream::in, LineNum::out, _IO0::di, _IO::uo),
[will_not_call_mercury, promise_pure, tabled_for_io,
@@ -456,7 +528,6 @@ whence_to_int(end, 2).
"
Stream = &mercury_stdin;
").
:- pragma foreign_proc("C#",
stdin_stream_2 = (Stream::out),
[will_not_call_mercury, promise_pure, thread_safe,
@@ -464,7 +535,6 @@ whence_to_int(end, 2).
"
Stream = mercury.io__stream_ops.mercury_stdin;
").
:- pragma foreign_proc("Java",
stdin_stream_2 = (Stream::out),
[will_not_call_mercury, promise_pure, thread_safe,
@@ -488,14 +558,12 @@ stdin_stream_2(Stream, !IO) :-
"
Stream = &mercury_stdin_binary;
").
:- pragma foreign_proc("C#",
stdin_binary_stream_2(Stream::out, _IO0::di, _IO::uo),
[will_not_call_mercury, promise_pure, thread_safe, tabled_for_io],
"
Stream = mercury.io__stream_ops.mercury_stdin_binary;
").
:- pragma foreign_proc("Java",
stdin_binary_stream_2(Stream::out, _IO0::di, _IO::uo),
[will_not_call_mercury, promise_pure, thread_safe, tabled_for_io,
@@ -515,7 +583,6 @@ stdin_stream_2(Stream, !IO) :-
"
Stream = &mercury_stdout;
").
:- pragma foreign_proc("C#",
stdout_stream_2 = (Stream::out),
[will_not_call_mercury, promise_pure, thread_safe,
@@ -523,7 +590,6 @@ stdin_stream_2(Stream, !IO) :-
"
Stream = mercury.io__stream_ops.mercury_stdout;
").
:- pragma foreign_proc("Java",
stdout_stream_2 = (Stream::out),
[will_not_call_mercury, promise_pure, thread_safe,
@@ -547,14 +613,12 @@ stdout_stream_2(Stream, !IO) :-
"
Stream = &mercury_stdout_binary;
").
:- pragma foreign_proc("C#",
stdout_binary_stream_2(Stream::out, _IO0::di, _IO::uo),
[will_not_call_mercury, promise_pure, thread_safe, tabled_for_io],
"
Stream = mercury.io__stream_ops.mercury_stdout_binary;
").
:- pragma foreign_proc("Java",
stdout_binary_stream_2(Stream::out, _IO0::di, _IO::uo),
[will_not_call_mercury, promise_pure, thread_safe, tabled_for_io,
@@ -574,7 +638,6 @@ stdout_stream_2(Stream, !IO) :-
"
Stream = &mercury_stderr;
").
:- pragma foreign_proc("C#",
stderr_stream_2 = (Stream::out),
[will_not_call_mercury, promise_pure, thread_safe,
@@ -582,7 +645,6 @@ stdout_stream_2(Stream, !IO) :-
"
Stream = mercury.io__stream_ops.mercury_stderr;
").
:- pragma foreign_proc("Java",
stderr_stream_2 = (Stream::out),
[will_not_call_mercury, promise_pure, thread_safe,
@@ -606,14 +668,12 @@ stderr_stream_2(Stream, !IO) :-
"
Stream = mercury_current_text_input();
").
:- pragma foreign_proc("C#",
input_stream_2(Stream::out, _IO0::di, _IO::uo),
[will_not_call_mercury, promise_pure, tabled_for_io],
"
Stream = mercury.io__stream_ops.mercury_current_text_input;
").
:- pragma foreign_proc("Java",
input_stream_2(Stream::out, _IO0::di, _IO::uo),
[will_not_call_mercury, promise_pure, tabled_for_io,
@@ -632,14 +692,12 @@ stderr_stream_2(Stream, !IO) :-
"
Stream = mercury_current_binary_input();
").
:- pragma foreign_proc("C#",
binary_input_stream_2(Stream::out, _IO0::di, _IO::uo),
[will_not_call_mercury, promise_pure, tabled_for_io],
"
Stream = mercury.io__stream_ops.mercury_current_binary_input;
").
:- pragma foreign_proc("Java",
binary_input_stream_2(Stream::out, _IO0::di, _IO::uo),
[will_not_call_mercury, promise_pure, tabled_for_io,
@@ -658,14 +716,12 @@ stderr_stream_2(Stream, !IO) :-
"
Stream = mercury_current_text_output();
").
:- pragma foreign_proc("C#",
output_stream_2(Stream::out, _IO0::di, _IO::uo),
[will_not_call_mercury, promise_pure, tabled_for_io],
"
Stream = mercury.io__stream_ops.mercury_current_text_output;
").
:- pragma foreign_proc("Java",
output_stream_2(Stream::out, _IO0::di, _IO::uo),
[will_not_call_mercury, promise_pure, tabled_for_io,
@@ -684,14 +740,12 @@ stderr_stream_2(Stream, !IO) :-
"
Stream = mercury_current_binary_output();
").
:- pragma foreign_proc("C#",
binary_output_stream_2(Stream::out, _IO0::di, _IO::uo),
[will_not_call_mercury, promise_pure, tabled_for_io],
"
Stream = mercury.io__stream_ops.mercury_current_binary_output;
").
:- pragma foreign_proc("Java",
binary_output_stream_2(Stream::out, _IO0::di, _IO::uo),
[will_not_call_mercury, promise_pure, tabled_for_io,
@@ -712,7 +766,6 @@ stderr_stream_2(Stream, !IO) :-
MR_set_thread_local_mutable(MercuryFilePtr, NewStream,
mercury_current_text_input_index);
").
:- pragma foreign_proc("C#",
set_input_stream_2(NewStream::in, OutStream::out, _IO0::di, _IO::uo),
[will_not_call_mercury, promise_pure, tabled_for_io],
@@ -720,7 +773,6 @@ stderr_stream_2(Stream, !IO) :-
OutStream = mercury.io__stream_ops.mercury_current_text_input;
mercury.io__stream_ops.mercury_current_text_input = NewStream;
").
:- pragma foreign_proc("Java",
set_input_stream_2(NewStream::in, OutStream::out, _IO0::di, _IO::uo),
[will_not_call_mercury, promise_pure, tabled_for_io],
@@ -743,7 +795,6 @@ stderr_stream_2(Stream, !IO) :-
MR_set_thread_local_mutable(MercuryFilePtr, NewStream,
mercury_current_binary_input_index);
").
:- pragma foreign_proc("C#",
set_binary_input_stream_2(NewStream::in, OutStream::out,
_IO0::di, _IO::uo),
@@ -752,7 +803,6 @@ stderr_stream_2(Stream, !IO) :-
OutStream = mercury.io__stream_ops.mercury_current_binary_input;
mercury.io__stream_ops.mercury_current_binary_input = NewStream;
").
:- pragma foreign_proc("Java",
set_binary_input_stream_2(NewStream::in, OutStream::out,
_IO0::di, _IO::uo),
@@ -775,7 +825,6 @@ stderr_stream_2(Stream, !IO) :-
MR_set_thread_local_mutable(MercuryFilePtr, NewStream,
mercury_current_text_output_index);
").
:- pragma foreign_proc("C#",
set_output_stream_2(NewStream::in, OutStream::out, _IO0::di, _IO::uo),
[will_not_call_mercury, promise_pure, tabled_for_io],
@@ -783,7 +832,6 @@ stderr_stream_2(Stream, !IO) :-
OutStream = mercury.io__stream_ops.mercury_current_text_output;
mercury.io__stream_ops.mercury_current_text_output = NewStream;
").
:- pragma foreign_proc("Java",
set_output_stream_2(NewStream::in, OutStream::out, _IO0::di, _IO::uo),
[will_not_call_mercury, promise_pure, tabled_for_io],
@@ -806,7 +854,6 @@ stderr_stream_2(Stream, !IO) :-
MR_set_thread_local_mutable(MercuryFilePtr, NewStream,
mercury_current_binary_output_index);
").
:- pragma foreign_proc("C#",
set_binary_output_stream_2(NewStream::in, OutStream::out,
_IO0::di, _IO::uo),
@@ -815,7 +862,6 @@ stderr_stream_2(Stream, !IO) :-
OutStream = mercury.io__stream_ops.mercury_current_binary_output;
mercury.io__stream_ops.mercury_current_binary_output = NewStream;
").
:- pragma foreign_proc("Java",
set_binary_output_stream_2(NewStream::in, OutStream::out,
_IO0::di, _IO::uo),

685
library/io.text_read.m Normal file
View File

@@ -0,0 +1,685 @@
%---------------------------------------------------------------------------%
% vim: ft=mercury ts=4 sw=4 et
%---------------------------------------------------------------------------%
% Copyright (C) 1993-2012 The University of Melbourne.
% Copyright (C) 2013-2022 The Mercury team.
% This file is distributed under the terms specified in COPYING.LIB.
%---------------------------------------------------------------------------%
%
% File: io.text_read.m.
%
% This module implements the predicates in io.m that read
% words, lines and files.
%
%---------------------------------------------------------------------------%
%---------------------------------------------------------------------------%
:- module io.text_read.
:- interface.
:- import_module bitmap.
:- import_module list.
%---------------------------------------------------------------------------%
:- pred read_word_2(io.input_stream::in, io.result(list(char))::out,
io::di, io::uo) is det.
%---------------------%
:- pred read_line_2(io.input_stream::in, result_code::out, system_error::out,
list(char)::out, io::di, io::uo) is det.
%---------------------%
:- type read_line_as_string_result
---> rlas_ok
; rlas_eof
; rlas_null_char
; rlas_error.
:- pred read_line_as_string_2(io.stream::in, bool::in,
read_line_as_string_result::out, system_error::out, string::out,
io::di, io::uo) is det.
%---------------------%
:- pred read_file_as_string_2(stream::in, string::out, int::out,
system_error::out, bool::out, io::di, io::uo) is det.
%---------------------%
:- pred read_binary_file_as_bitmap_2(io.binary_input_stream::in,
io.res(bitmap)::out, io::di, io::uo) is det.
%---------------------------------------------------------------------------%
:- implementation.
:- import_module array.
:- import_module char.
:- import_module int.
:- import_module int64.
:- import_module io.primitives_read.
:- import_module string.
%---------------------------------------------------------------------------%
read_word_2(Stream, Result, !IO) :-
read_char(Stream, CharResult, !IO),
(
CharResult = error(Error),
Result = error(Error)
;
CharResult = eof,
Result = eof
;
CharResult = ok(Char),
( if char.is_whitespace(Char) then
putback_char(Stream, Char, !IO),
Result = ok([])
else
read_word_2(Stream, Result0, !IO),
(
Result0 = ok(Chars),
Result = ok([Char | Chars])
;
Result0 = error(_),
Result = Result0
;
Result0 = eof,
Result = ok([Char])
)
)
).
%---------------------------------------------------------------------------%
read_line_2(Stream, Result, Error, Chars, !IO) :-
read_char_code(Stream, Result0, Char, Error0, !IO),
(
Result0 = result_code_ok,
( if Char = '\n' then
Result = result_code_ok,
Chars = [Char],
Error = Error0
else
read_line_2(Stream, Result, Error, CharsTail, !IO),
Chars = [Char | CharsTail] % lcmc
)
;
( Result0 = result_code_eof
; Result0 = result_code_error
),
Result = Result0,
Chars = [],
Error = Error0
).
%---------------------------------------------------------------------------%
:- pragma foreign_export_enum("C", read_line_as_string_result/0,
[prefix("ML_"), uppercase]).
:- pragma foreign_export_enum("Java", read_line_as_string_result/0,
[prefix("ML_"), uppercase]).
:- pragma foreign_proc("C",
read_line_as_string_2(Stream::in, _FirstCall::in, Res::out, Error::out,
RetString::out, _IO0::di, _IO::uo),
[will_not_call_mercury, promise_pure, tabled_for_io, thread_safe,
does_not_affect_liveness, no_sharing],
"
#define ML_IO_READ_LINE_GROW(n) ((n) * 3 / 2)
#define ML_IO_BYTES_TO_WORDS(n) (((n) + sizeof(MR_Word) - 1) / sizeof(MR_Word))
#define ML_IO_READ_LINE_START 1024
char initial_read_buffer[ML_IO_READ_LINE_START];
char *read_buffer = initial_read_buffer;
size_t read_buf_size = ML_IO_READ_LINE_START;
size_t i;
int char_code = '\\0';
Res = ML_RLAS_OK;
Error = 0;
for (i = 0; char_code != '\\n'; ) {
char_code = mercury_get_byte(Stream);
if (char_code == EOF) {
if (i == 0) {
if (MR_FERROR(*Stream)) {
Res = ML_RLAS_ERROR;
Error = errno;
} else {
Res = ML_RLAS_EOF;
}
}
break;
}
if (char_code == 0) {
Res = ML_RLAS_NULL_CHAR;
break;
}
read_buffer[i++] = (char) char_code;
MR_assert(i <= read_buf_size);
if (i == read_buf_size) {
// Grow the read buffer.
read_buf_size = ML_IO_READ_LINE_GROW(read_buf_size);
if (read_buffer == initial_read_buffer) {
read_buffer = MR_NEW_ARRAY(char, read_buf_size);
MR_memcpy(read_buffer, initial_read_buffer,
ML_IO_READ_LINE_START);
} else {
read_buffer = MR_RESIZE_ARRAY(read_buffer, char,
read_buf_size);
}
}
}
if (Res == ML_RLAS_OK) {
MR_Word ret_string_word;
MR_offset_incr_hp_atomic_msg(ret_string_word,
0, ML_IO_BYTES_TO_WORDS((i + 1) * sizeof(char)),
MR_ALLOC_ID, ""string.string/0"");
RetString = (MR_String) ret_string_word;
MR_memcpy(RetString, read_buffer, i * sizeof(char));
RetString[i] = '\\0';
} else {
RetString = MR_make_string_const("""");
}
if (read_buffer != initial_read_buffer) {
MR_free(read_buffer);
}
").
:- pragma foreign_proc("Java",
read_line_as_string_2(Stream::in, _FirstCall::in, Res::out, Error::out,
RetString::out, _IO0::di, _IO::uo),
[will_not_call_mercury, promise_pure, tabled_for_io, thread_safe,
does_not_affect_liveness, may_not_duplicate],
"
try {
RetString =
((jmercury.io__stream_ops.MR_TextInputFile) Stream).read_line();
if (RetString != null) {
Res = ML_RLAS_OK;
} else {
Res = ML_RLAS_EOF;
}
Error = null;
} catch (java.io.IOException e) {
Res = ML_RLAS_ERROR;
RetString = """";
Error = e;
}
").
read_line_as_string_2(Stream, FirstCall, Res, Error, String, !IO) :-
% XXX This is terribly inefficient, a better approach would be
% to use a buffer like what is done for io.read_file_as_string.
read_char_code(input_stream(Stream), ResultCode, Char, Error0, !IO),
(
ResultCode = result_code_ok,
( if Char = '\n' then
Res = rlas_ok,
String = "\n",
Error = Error0
else if char.to_int(Char, 0) then
Res = rlas_null_char,
String = "",
Error = Error0
else
read_line_as_string_2(Stream, no, Res, Error, String0, !IO),
string.first_char(String, Char, String0)
)
;
ResultCode = result_code_eof,
(
FirstCall = yes,
Res = rlas_eof
;
FirstCall = no,
Res = rlas_ok
),
String = "",
Error = Error0
;
ResultCode = result_code_error,
Res = rlas_error,
String = "",
Error = Error0
).
%---------------------------------------------------------------------------%
:- pragma foreign_proc("Java",
read_file_as_string_2(Stream::in, String::out, NumCUs::out,
Error::out, NullCharError::out, _IO0::di, _IO::uo),
[will_not_call_mercury, promise_pure, thread_safe, tabled_for_io],
"
StringBuilder sb = new StringBuilder();
try {
((jmercury.io__stream_ops.MR_TextInputFile) Stream).read_file(sb);
Error = null;
} catch (java.io.IOException e) {
Error = e;
}
String = sb.toString();
NumCUs = String.length();
NullCharError = bool.NO;
").
read_file_as_string_2(Stream, Str, NumCUs, Error, NullCharError, !IO) :-
% Check if the stream is a regular file; if so, allocate a buffer
% according to the size of the file. Otherwise, just use a default buffer
% size of 4k minus a bit (to give malloc some room).
input_stream_file_size(input_stream(Stream), FileSize, !IO),
( if FileSize >= 0 then
% When targeting C, this reserves just enough space for all the bytes
% in the file, plus the final NUL character.
%
% When targeting C#, this reserves one slot in an array of code points
% for each byte in the file, plus the NUL. This means that the buffer
% we reserve may be bigger than needed. How much bigger depends on
% the number of code points in the file that take more than one
% UTF-16 code units.
BufferSize0 = FileSize + 1
else
BufferSize0 = 4000
),
alloc_buffer(BufferSize0, Buffer0),
% Read the file into the buffer (resizing it as we go if necessary),
% convert the buffer into a string, and see if anything went wrong.
%
% When targeting C, Pos counts UTF-8 code *units* (in the usual case
% where the input is valid UTF-8; otherwise, it counts bytes).
% When targeting C#, Pos counts code *points*.
% When targeting Java, the foreign_proc above replaces this clause.
Pos0 = 0,
read_file_as_string_loop(input_stream(Stream), Buffer0, BufferSize0, Pos0,
Str, NumCUs, Error, NullCharError, !IO).
:- pred read_file_as_string_loop(input_stream::in, buffer::buffer_di,
int::in, int::in, string::out, int::out, system_error::out, bool::out,
io::di, io::uo) is det.
% This predicate is not used when compiling to Java; this pragma avoids
% a warning even in that case.
:- pragma consider_used(pred(read_file_as_string_loop/10)).
read_file_as_string_loop(Stream, !.Buffer, BufferSize0, !.Pos,
Str, NumCUs, Error, NullCharError, !IO) :-
Stream = input_stream(RealStream),
read_into_buffer(RealStream, !Buffer, BufferSize0, !Pos, Error0, !IO),
( if !.Pos < BufferSize0 then
% Buffer is not full: end-of-file or error.
( if
buffer_and_pos_to_string_and_length(!.Buffer, !.Pos,
StrPrime, NumCUsPrime)
then
Str = StrPrime,
NumCUs = NumCUsPrime,
NullCharError = no
else
Str = "",
NumCUs = 0,
NullCharError = yes
),
Error = Error0
else if !.Pos = BufferSize0 then
% Buffer is full; make room for more of the file.
% Doubling its size should catch up to its actual size quickly.
BufferSize1 = BufferSize0 * 2,
resize_buffer(BufferSize0, BufferSize1, !Buffer),
read_file_as_string_loop(Stream, !.Buffer, BufferSize1, !.Pos,
Str, NumCUs, Error, NullCharError, !IO)
else
error("io.read_file_as_string: buffer overflow")
).
%---------------------%
read_binary_file_as_bitmap_2(Stream, Result, !IO) :-
% Check if the stream is a regular file; if so, allocate a buffer
% according to the size of the file. Otherwise, just use a default buffer
% size of 4k minus a bit (to give malloc some room).
binary_input_stream_file_size(Stream, FileSize, !IO),
( if FileSize >= 0i64 then
binary_input_stream_offset64(Stream, CurrentOffset, !IO),
RemainingSizeInt64 = FileSize - CurrentOffset,
( if
int.bits_per_int = 32,
RemainingSizeInt64 > int64.from_int(int.max_int)
then
Result = error(io_error("io.read_binary_file_as_bitmap: " ++
"file size exceeds maximum buffer size"))
else
RemainingSize = int64.cast_to_int(RemainingSizeInt64),
some [!BM] (
!:BM = bitmap.init(RemainingSize * bits_per_byte),
( if RemainingSize = 0 then
Result = ok(!.BM)
else
bitmap.read_bitmap_range(Stream, 0, RemainingSize, !BM,
BytesRead, ReadResult, !IO),
(
ReadResult = ok,
( if BytesRead = RemainingSize then
Result = ok(!.BM)
else
Result = error(io_error(
"io.read_binary_file_as_bitmap: " ++
"incorrect file size"))
)
;
ReadResult = error(Msg),
Result = error(Msg)
)
)
)
)
else
BufferSize = 4000,
read_binary_file_as_bitmap_from_stream(Stream, BufferSize,
Res, [], RevBitmaps, !IO),
(
Res = ok,
Result = ok(bitmap.append_list(reverse(RevBitmaps)))
;
Res = error(Msg),
Result = error(Msg)
)
).
:- pred read_binary_file_as_bitmap_from_stream(io.binary_input_stream::in,
num_bytes::in, io.res::out, list(bitmap)::in, list(bitmap)::out,
io::di, io::uo) is det.
read_binary_file_as_bitmap_from_stream(Stream, BufferSize, Res, !BMs, !IO) :-
some [!BM] (
!:BM = bitmap.init(BufferSize * bits_per_byte),
bitmap.read_bitmap_range(Stream, 0, BufferSize, !BM, NumBytesRead,
ReadRes, !IO),
(
ReadRes = ok,
( if NumBytesRead < BufferSize then
!:BM = bitmap.shrink_without_copying(!.BM,
NumBytesRead * bits_per_byte),
!:BMs = [!.BM | !.BMs],
Res = ok
else
!:BMs = [!.BM | !.BMs],
% Double the buffer size each time.
read_binary_file_as_bitmap_from_stream(Stream, BufferSize * 2,
Res, !BMs, !IO)
)
;
ReadRes = error(Err),
Res = error(Err)
)
).
%---------------------%
% XXX FIXME this should return an int64.
:- pred input_stream_file_size(io.input_stream::in, int::out,
io::di, io::uo) is det.
:- pragma consider_used(pred(input_stream_file_size/4)).
input_stream_file_size(input_stream(Stream), Size, !IO) :-
stream_file_size(Stream, Size64, !IO),
Size = int64.cast_to_int(Size64).
:- pred binary_input_stream_file_size(io.binary_input_stream::in, int64::out,
io::di, io::uo) is det.
binary_input_stream_file_size(binary_input_stream(Stream), Size, !IO) :-
stream_file_size(Stream, Size, !IO).
% stream_file_size(Stream, Size):
%
% If Stream is a regular file, then Size is its size (in bytes),
% otherwise Size is -1.
%
:- pred stream_file_size(stream::in, int64::out, io::di, io::uo) is det.
:- pragma foreign_proc("C",
stream_file_size(Stream::in, Size::out, _IO0::di, _IO::uo),
[will_not_call_mercury, promise_pure, tabled_for_io, thread_safe,
does_not_affect_liveness, no_sharing],
"
#if defined(MR_HAVE_FSTAT) && \
(defined(MR_HAVE_FILENO) || defined(fileno)) && defined(S_ISREG)
struct stat s;
if (MR_IS_FILE_STREAM(*Stream)) {
if (fstat(fileno(MR_file(*Stream)), &s) == 0 && S_ISREG(s.st_mode)) {
Size = s.st_size;
} else {
Size = -1;
}
} else {
Size = -1;
}
#else
Size = -1;
#endif
").
:- pragma foreign_proc("C#",
stream_file_size(Stream::in, Size::out, _IO0::di, _IO::uo),
[will_not_call_mercury, promise_pure, thread_safe],
"{
if (Stream.stream.CanSeek) {
Size = Stream.stream.Length;
} else {
Size = -1;
}
}").
:- pragma foreign_proc("Java",
stream_file_size(Stream::in, Size::out, _IO0::di, _IO::uo),
[will_not_call_mercury, promise_pure, thread_safe, tabled_for_io],
"
try {
Size = ((jmercury.io__stream_ops.MR_BinaryFile) Stream).size();
} catch (java.io.IOException e) {
Size = -1;
}
").
%---------------------%
% A buffer is an array of chars.
% For C backends, it is a C array of C chars.
% For other backends, it is a Mercury array of Mercury chars.
% XXX It would be better to use a char_array type rather than array(char).
% This is because on the Java (and maybe the C#) backend, indexing into
% an array whose element type is known statically requires less overhead.
%
% It may be possible to merge with string.string_buffer.
%
:- type buffer
---> buffer(array(char)).
:- pragma foreign_type(c, buffer, "char *", [can_pass_as_mercury_type]).
% XXX Extend the workaround for no `ui' modes in array.m.
:- inst uniq_buffer for buffer/0
---> buffer(uniq_array).
:- mode buffer_di == di(uniq_buffer).
:- mode buffer_uo == out(uniq_buffer).
:- pred alloc_buffer(int::in, buffer::buffer_uo) is det.
:- pragma consider_used(pred(alloc_buffer/2)).
:- pragma foreign_proc("C",
alloc_buffer(Size::in, Buffer::buffer_uo),
[will_not_call_mercury, promise_pure, tabled_for_io, thread_safe,
does_not_affect_liveness, no_sharing],
"{
MR_Word buf;
MR_offset_incr_hp_atomic_msg(buf, 0,
(Size * sizeof(char) + sizeof(MR_Word) - 1) / sizeof(MR_Word),
MR_ALLOC_ID, ""io.buffer/0"");
Buffer = (char *) buf;
}").
alloc_buffer(Size, buffer(Array)) :-
char.det_from_int(0, NullChar),
array.init(Size, NullChar, Array).
:- pred resize_buffer(int::in, int::in,
buffer::buffer_di, buffer::buffer_uo) is det.
:- pragma foreign_proc("C",
resize_buffer(OldSize::in, NewSize::in,
Buffer0::buffer_di, Buffer::buffer_uo),
[will_not_call_mercury, promise_pure, tabled_for_io, thread_safe,
does_not_affect_liveness],
"{
MR_CHECK_EXPR_TYPE(Buffer0, char *);
MR_CHECK_EXPR_TYPE(Buffer, char *);
#ifdef MR_CONSERVATIVE_GC
Buffer = MR_GC_realloc(Buffer0, NewSize * sizeof(char));
#else
if (Buffer0 + OldSize == (char *) MR_hp) {
MR_Word next;
MR_offset_incr_hp_atomic_msg(next, 0,
(NewSize * sizeof(char) + sizeof(MR_Word) - 1)
/ sizeof(MR_Word),
MR_ALLOC_ID, ""io.buffer/0"");
assert(Buffer0 + OldSize == (char *) next);
Buffer = Buffer0;
} else {
// Just have to alloc and copy.
MR_Word buf;
MR_offset_incr_hp_atomic_msg(buf, 0,
(NewSize * sizeof(char) + sizeof(MR_Word) - 1)
/ sizeof(MR_Word),
MR_ALLOC_ID, ""io.buffer/0"");
Buffer = (char *) buf;
if (OldSize > NewSize) {
MR_memcpy(Buffer, Buffer0, NewSize);
} else {
MR_memcpy(Buffer, Buffer0, OldSize);
}
}
#endif
}").
resize_buffer(_OldSize, NewSize, buffer(Array0), buffer(Array)) :-
char.det_from_int(0, Char),
array.resize(NewSize, Char, Array0, Array).
:- pred buffer_and_pos_to_string_and_length(buffer::buffer_di, int::in,
string::out, int::out) is semidet.
% This predicate is used when compiling to C and C#; this pragma avoids
% a warning when compiling to Java.
:- pragma consider_used(pred(buffer_and_pos_to_string_and_length/4)).
:- pragma foreign_proc("C",
buffer_and_pos_to_string_and_length(Buffer::buffer_di, Pos::in,
Str::out, NumCUs::out),
[will_not_call_mercury, promise_pure, tabled_for_io, thread_safe,
does_not_affect_liveness],
"{
Str = Buffer;
Str[Pos] = '\\0';
// Check that the string does not contain null characters.
if (strlen(Str) != Pos) {
SUCCESS_INDICATOR = MR_FALSE;
} else {
SUCCESS_INDICATOR = MR_TRUE;
}
// In C, Pos counts bytes, which are the same size as UTF-8 code units.
// NumCUs is expected to be in the code units native to the target
// language, and this is UTF-8, so no conversion needs to be done.
// (Compare to the C# case below.)
NumCUs = Pos;
}").
buffer_and_pos_to_string_and_length(buffer(Array), Pos, Str, NumCUs) :-
% This predicate is used only when compiling to C and C#, and when
% targeting C, we use the foreign_proc above, so this clause is used
% only when targeting C#.
%
% In C#, Pos counts chars, i.e. code points. Most code points occupy
% just one UTF-16 code unit, but some occupy two. The call below to
% semidet_from_char_list will do this expansion as necessary.
% We can't know how many code units the final string contains
% until we count them. (Compare to the C case above.)
%
% XXX The current implementation of read_file_as_string_2
% reads in code units one by one, converts them to code points
% to store them in array slots, then converts the array to a string,
% which converts each code point back into one or two UTF-16 code units.
% A fully C#-specific implementation of read_file_as_string_2,
% one not shared with C, should be able to dispense with all the
% redundant conversions.
array.fetch_items(Array, min(Array), min(Array) + Pos - 1, List),
string.semidet_from_char_list(List, Str),
string.length(Str, NumCUs).
:- pred read_into_buffer(stream::in, buffer::buffer_di, buffer::buffer_uo,
int::in, int::in, int::out, system_error::out, io::di, io::uo) is det.
:- pragma foreign_proc("C",
read_into_buffer(Stream::in, Buffer0::buffer_di, Buffer::buffer_uo,
BufferSize::in, Pos0::in, Pos::out, Error::out, _IO0::di, _IO::uo),
[will_not_call_mercury, promise_pure, tabled_for_io, thread_safe,
does_not_affect_liveness],
"
size_t bytes_to_read;
size_t bytes_read;
MR_CHECK_EXPR_TYPE(Buffer0, char *);
MR_CHECK_EXPR_TYPE(Buffer, char *);
bytes_to_read = BufferSize - Pos0;
bytes_read = MR_READ(*Stream, Buffer0 + Pos0, bytes_to_read);
Buffer = Buffer0;
Pos = Pos0 + bytes_read;
if (bytes_read < bytes_to_read && MR_FERROR(*Stream)) {
Error = errno;
} else {
Error = 0;
}
").
read_into_buffer(Stream, buffer(Array0), buffer(Array), BufferSize,
Pos0, Pos, Error, !IO) :-
read_into_array(input_stream(Stream), Array0, Array, BufferSize,
Pos0, Pos, Error, !IO).
:- pred read_into_array(input_stream::in,
array(char)::array_di, array(char)::array_uo, int::in, int::in, int::out,
system_error::out, io::di, io::uo) is det.
% This predicate is not used when compiling to C or Java; this pragma avoids
% a warning even in those cases.
:- pragma consider_used(pred(read_into_array/9)).
read_into_array(Stream, !Array, ArraySize, !Pos, Error, !IO) :-
( if !.Pos >= ArraySize then
Error = no_error
else
read_char_code(Stream, ResultCode, Char, Error0, !IO),
(
ResultCode = result_code_ok,
array.set(!.Pos, Char, !Array),
!:Pos = !.Pos + 1,
read_into_array(Stream, !Array, ArraySize, !Pos, Error, !IO)
;
( ResultCode = result_code_eof
; ResultCode = result_code_error
),
Error = Error0
)
).
%---------------------------------------------------------------------------%
:- end_module io.text_read.
%---------------------------------------------------------------------------%

View File

@@ -304,6 +304,7 @@ stdlib_module_doc_undoc("io.primitives_read", undoc).
stdlib_module_doc_undoc("io.primitives_write", undoc).
stdlib_module_doc_undoc("io.stream_db", undoc).
stdlib_module_doc_undoc("io.stream_ops", undoc).
stdlib_module_doc_undoc("io.text_read", undoc).
stdlib_module_doc_undoc("kv_list", doc).
stdlib_module_doc_undoc("lazy", doc).
stdlib_module_doc_undoc("library", doc).