mirror of
https://github.com/Mercury-Language/mercury.git
synced 2026-04-19 03:13:40 +00:00
... and make {input,output}_stream synonyms for them, rather than vice versa.
library/io.m:
As above.
library/bitmap.m:
library/dir.m:
library/io.primitives_read.m:
library/io.stream_db.m:
library/io.text_read.m:
library/mercury_term_lexer.m:
library/stream.string_writer.m:
Conform to the change above.
tests/hard_coded/stream_string_writer_types.exp:
Expect the new type_ctor for text streams.
688 lines
23 KiB
Mathematica
688 lines
23 KiB
Mathematica
%---------------------------------------------------------------------------%
|
|
% vim: ft=mercury ts=4 sw=4 et
|
|
%---------------------------------------------------------------------------%
|
|
% Copyright (C) 1993-2012 The University of Melbourne.
|
|
% Copyright (C) 2013-2022 The Mercury team.
|
|
% This file is distributed under the terms specified in COPYING.LIB.
|
|
%---------------------------------------------------------------------------%
|
|
%
|
|
% File: io.text_read.m.
|
|
%
|
|
% This module implements the predicates in io.m that read
|
|
% words, lines and files.
|
|
%
|
|
%---------------------------------------------------------------------------%
|
|
%---------------------------------------------------------------------------%
|
|
|
|
:- module io.text_read.
|
|
:- interface.
|
|
|
|
:- import_module bitmap.
|
|
:- import_module list.
|
|
|
|
%---------------------------------------------------------------------------%
|
|
|
|
:- pred read_word_2(io.text_input_stream::in, io.result(list(char))::out,
|
|
io::di, io::uo) is det.
|
|
|
|
%---------------------%
|
|
|
|
:- pred read_line_2(io.text_input_stream::in, result_code::out,
|
|
system_error::out, list(char)::out, io::di, io::uo) is det.
|
|
|
|
%---------------------%
|
|
|
|
:- type read_line_as_string_result
|
|
---> rlas_ok
|
|
; rlas_eof
|
|
; rlas_null_char
|
|
; rlas_error.
|
|
|
|
:- pred read_line_as_string_2(io.stream::in, bool::in,
|
|
read_line_as_string_result::out, system_error::out, string::out,
|
|
io::di, io::uo) is det.
|
|
|
|
%---------------------%
|
|
|
|
:- pred read_file_as_string_2(stream::in, string::out, int::out,
|
|
system_error::out, bool::out, io::di, io::uo) is det.
|
|
|
|
%---------------------%
|
|
|
|
:- pred read_binary_file_as_bitmap_2(io.binary_input_stream::in,
|
|
io.res(bitmap)::out, io::di, io::uo) is det.
|
|
|
|
%---------------------------------------------------------------------------%
|
|
|
|
:- implementation.
|
|
|
|
:- import_module array.
|
|
:- import_module char.
|
|
:- import_module int.
|
|
:- import_module int64.
|
|
:- import_module io.primitives_read.
|
|
|
|
%---------------------------------------------------------------------------%
|
|
|
|
read_word_2(Stream, Result, !IO) :-
|
|
read_char(Stream, CharResult, !IO),
|
|
(
|
|
CharResult = error(Error),
|
|
Result = error(Error)
|
|
;
|
|
CharResult = eof,
|
|
Result = eof
|
|
;
|
|
CharResult = ok(Char),
|
|
( if char.is_whitespace(Char) then
|
|
putback_char(Stream, Char, !IO),
|
|
Result = ok([])
|
|
else
|
|
read_word_2(Stream, Result0, !IO),
|
|
(
|
|
Result0 = ok(Chars),
|
|
Result = ok([Char | Chars])
|
|
;
|
|
Result0 = error(_),
|
|
Result = Result0
|
|
;
|
|
Result0 = eof,
|
|
Result = ok([Char])
|
|
)
|
|
)
|
|
).
|
|
|
|
%---------------------------------------------------------------------------%
|
|
|
|
read_line_2(Stream, Result, Error, Chars, !IO) :-
|
|
read_char_code(Stream, Result0, Error0, Char, !IO),
|
|
(
|
|
Result0 = result_code_ok,
|
|
( if Char = '\n' then
|
|
Result = result_code_ok,
|
|
Chars = [Char],
|
|
Error = Error0
|
|
else
|
|
read_line_2(Stream, Result, Error, CharsTail, !IO),
|
|
Chars = [Char | CharsTail] % lcmc
|
|
)
|
|
;
|
|
( Result0 = result_code_eof
|
|
; Result0 = result_code_error
|
|
),
|
|
Result = Result0,
|
|
Chars = [],
|
|
Error = Error0
|
|
).
|
|
|
|
%---------------------------------------------------------------------------%
|
|
|
|
:- pragma foreign_export_enum("C", read_line_as_string_result/0,
|
|
[prefix("ML_"), uppercase]).
|
|
:- pragma foreign_export_enum("Java", read_line_as_string_result/0,
|
|
[prefix("ML_"), uppercase]).
|
|
|
|
:- pragma foreign_proc("C",
|
|
read_line_as_string_2(Stream::in, _FirstCall::in, Res::out, Error::out,
|
|
RetString::out, _IO0::di, _IO::uo),
|
|
[will_not_call_mercury, promise_pure, tabled_for_io, thread_safe,
|
|
does_not_affect_liveness, no_sharing],
|
|
"
|
|
#define ML_IO_READ_LINE_GROW(n) ((n) * 3 / 2)
|
|
#define ML_IO_BYTES_TO_WORDS(n) (((n) + sizeof(MR_Word) - 1) / sizeof(MR_Word))
|
|
#define ML_IO_READ_LINE_START 1024
|
|
|
|
char initial_read_buffer[ML_IO_READ_LINE_START];
|
|
char *read_buffer = initial_read_buffer;
|
|
size_t read_buf_size = ML_IO_READ_LINE_START;
|
|
size_t i;
|
|
int char_code = '\\0';
|
|
|
|
Res = ML_RLAS_OK;
|
|
Error = 0;
|
|
for (i = 0; char_code != '\\n'; ) {
|
|
char_code = mercury_get_byte(Stream);
|
|
if (char_code == EOF) {
|
|
if (i == 0) {
|
|
if (MR_FERROR(*Stream)) {
|
|
Res = ML_RLAS_ERROR;
|
|
Error = errno;
|
|
} else {
|
|
Res = ML_RLAS_EOF;
|
|
}
|
|
}
|
|
break;
|
|
}
|
|
if (char_code == 0) {
|
|
Res = ML_RLAS_NULL_CHAR;
|
|
break;
|
|
}
|
|
read_buffer[i++] = (char) char_code;
|
|
MR_assert(i <= read_buf_size);
|
|
if (i == read_buf_size) {
|
|
// Grow the read buffer.
|
|
read_buf_size = ML_IO_READ_LINE_GROW(read_buf_size);
|
|
if (read_buffer == initial_read_buffer) {
|
|
read_buffer = MR_NEW_ARRAY(char, read_buf_size);
|
|
MR_memcpy(read_buffer, initial_read_buffer,
|
|
ML_IO_READ_LINE_START);
|
|
} else {
|
|
read_buffer = MR_RESIZE_ARRAY(read_buffer, char,
|
|
read_buf_size);
|
|
}
|
|
}
|
|
}
|
|
if (Res == ML_RLAS_OK) {
|
|
MR_Word ret_string_word;
|
|
MR_offset_incr_hp_atomic_msg(ret_string_word,
|
|
0, ML_IO_BYTES_TO_WORDS((i + 1) * sizeof(char)),
|
|
MR_ALLOC_ID, ""string.string/0"");
|
|
RetString = (MR_String) ret_string_word;
|
|
MR_memcpy(RetString, read_buffer, i * sizeof(char));
|
|
RetString[i] = '\\0';
|
|
} else {
|
|
RetString = MR_make_string_const("""");
|
|
}
|
|
if (read_buffer != initial_read_buffer) {
|
|
MR_free(read_buffer);
|
|
}
|
|
").
|
|
|
|
:- pragma foreign_proc("Java",
|
|
read_line_as_string_2(Stream::in, _FirstCall::in, Res::out, Error::out,
|
|
RetString::out, _IO0::di, _IO::uo),
|
|
[will_not_call_mercury, promise_pure, tabled_for_io, thread_safe,
|
|
does_not_affect_liveness, may_not_duplicate],
|
|
"
|
|
try {
|
|
RetString =
|
|
((jmercury.io__stream_ops.MR_TextInputFile) Stream).read_line();
|
|
if (RetString != null) {
|
|
Res = ML_RLAS_OK;
|
|
} else {
|
|
Res = ML_RLAS_EOF;
|
|
}
|
|
Error = null;
|
|
} catch (java.io.IOException e) {
|
|
Res = ML_RLAS_ERROR;
|
|
RetString = """";
|
|
Error = e;
|
|
}
|
|
").
|
|
|
|
read_line_as_string_2(Stream, FirstCall, Res, Error, String, !IO) :-
|
|
% XXX This is terribly inefficient, a better approach would be
|
|
% to use a buffer like what is done for io.read_file_as_string.
|
|
read_char_code(text_input_stream(Stream), ResultCode, Error0, Char, !IO),
|
|
(
|
|
ResultCode = result_code_ok,
|
|
( if Char = '\n' then
|
|
Res = rlas_ok,
|
|
String = "\n",
|
|
Error = Error0
|
|
else if char.to_int(Char, 0) then
|
|
Res = rlas_null_char,
|
|
String = "",
|
|
Error = Error0
|
|
else
|
|
read_line_as_string_2(Stream, no, Res, Error, String0, !IO),
|
|
string.first_char(String, Char, String0)
|
|
)
|
|
;
|
|
ResultCode = result_code_eof,
|
|
(
|
|
FirstCall = yes,
|
|
Res = rlas_eof
|
|
;
|
|
FirstCall = no,
|
|
Res = rlas_ok
|
|
),
|
|
String = "",
|
|
Error = Error0
|
|
;
|
|
ResultCode = result_code_error,
|
|
Res = rlas_error,
|
|
String = "",
|
|
Error = Error0
|
|
).
|
|
|
|
%---------------------------------------------------------------------------%
|
|
|
|
:- pragma foreign_proc("Java",
|
|
read_file_as_string_2(Stream::in, String::out, NumCUs::out,
|
|
Error::out, NullCharError::out, _IO0::di, _IO::uo),
|
|
[will_not_call_mercury, promise_pure, thread_safe, tabled_for_io],
|
|
"
|
|
StringBuilder sb = new StringBuilder();
|
|
try {
|
|
((jmercury.io__stream_ops.MR_TextInputFile) Stream).read_file(sb);
|
|
Error = null;
|
|
} catch (java.io.IOException e) {
|
|
Error = e;
|
|
}
|
|
String = sb.toString();
|
|
NumCUs = String.length();
|
|
NullCharError = bool.NO;
|
|
").
|
|
|
|
read_file_as_string_2(Stream, Str, NumCUs, Error, NullCharError, !IO) :-
|
|
% Check if the stream is a regular file; if so, allocate a buffer
|
|
% according to the size of the file. Otherwise, just use a default buffer
|
|
% size of 4k minus a bit (to give malloc some room).
|
|
input_stream_file_size(text_input_stream(Stream), FileSize, !IO),
|
|
( if FileSize >= 0 then
|
|
% When targeting C, this reserves just enough space for all the bytes
|
|
% in the file, plus the final NUL character.
|
|
%
|
|
% When targeting C#, this reserves one slot in an array of code points
|
|
% for each byte in the file, plus the NUL. This means that the buffer
|
|
% we reserve may be bigger than needed. How much bigger depends on
|
|
% the number of code points in the file that take more than one
|
|
% UTF-16 code units.
|
|
BufferSize0 = FileSize + 1
|
|
else
|
|
BufferSize0 = 4000
|
|
),
|
|
alloc_buffer(BufferSize0, Buffer0),
|
|
% Read the file into the buffer (resizing it as we go if necessary),
|
|
% convert the buffer into a string, and see if anything went wrong.
|
|
%
|
|
% When targeting C, Pos counts UTF-8 code *units* (in the usual case
|
|
% where the input is valid UTF-8; otherwise, it counts bytes).
|
|
% When targeting C#, Pos counts code *points*.
|
|
% When targeting Java, the foreign_proc above replaces this clause.
|
|
Pos0 = 0,
|
|
read_file_as_string_loop(text_input_stream(Stream), Buffer0, BufferSize0,
|
|
Pos0, Str, NumCUs, Error, NullCharError, !IO).
|
|
|
|
:- pred read_file_as_string_loop(text_input_stream::in, buffer::buffer_di,
|
|
int::in, int::in, string::out, int::out, system_error::out, bool::out,
|
|
io::di, io::uo) is det.
|
|
% This predicate is not used when compiling to Java; this pragma avoids
|
|
% a warning even in that case.
|
|
:- pragma consider_used(pred(read_file_as_string_loop/10)).
|
|
|
|
read_file_as_string_loop(Stream, !.Buffer, BufferSize0, !.Pos,
|
|
Str, NumCUs, Error, NullCharError, !IO) :-
|
|
Stream = text_input_stream(RealStream),
|
|
read_into_buffer(RealStream, !Buffer, BufferSize0, !Pos, Error0, !IO),
|
|
( if !.Pos < BufferSize0 then
|
|
% Buffer is not full: end-of-file or error.
|
|
( if
|
|
buffer_and_pos_to_string_and_length(!.Buffer, !.Pos,
|
|
StrPrime, NumCUsPrime)
|
|
then
|
|
Str = StrPrime,
|
|
NumCUs = NumCUsPrime,
|
|
NullCharError = no
|
|
else
|
|
Str = "",
|
|
NumCUs = 0,
|
|
NullCharError = yes
|
|
),
|
|
Error = Error0
|
|
else if !.Pos = BufferSize0 then
|
|
% Buffer is full; make room for more of the file.
|
|
% Doubling its size should catch up to its actual size quickly.
|
|
BufferSize1 = BufferSize0 * 2,
|
|
resize_buffer(BufferSize0, BufferSize1, !Buffer),
|
|
read_file_as_string_loop(Stream, !.Buffer, BufferSize1, !.Pos,
|
|
Str, NumCUs, Error, NullCharError, !IO)
|
|
else
|
|
error("io.read_file_as_string: buffer overflow")
|
|
).
|
|
|
|
%---------------------%
|
|
|
|
read_binary_file_as_bitmap_2(Stream, Result, !IO) :-
|
|
% Check if the stream is a regular file; if so, allocate a buffer
|
|
% according to the size of the file. Otherwise, just use a default buffer
|
|
% size of 4k minus a bit (to give malloc some room).
|
|
binary_input_stream_file_size(Stream, FileSize, !IO),
|
|
( if FileSize >= 0i64 then
|
|
binary_input_stream_offset64(Stream, CurrentOffset, !IO),
|
|
RemainingSizeInt64 = FileSize - CurrentOffset,
|
|
( if
|
|
int.bits_per_int = 32,
|
|
RemainingSizeInt64 > int64.from_int(int.max_int)
|
|
then
|
|
Result = error(io_error_string("io.read_binary_file_as_bitmap: " ++
|
|
"file size exceeds maximum buffer size"))
|
|
else
|
|
RemainingSize = int64.cast_to_int(RemainingSizeInt64),
|
|
some [!BM] (
|
|
!:BM = bitmap.init(RemainingSize * bits_per_byte),
|
|
( if RemainingSize = 0 then
|
|
Result = ok(!.BM)
|
|
else
|
|
bitmap.read_bitmap_range(Stream, 0, RemainingSize, !BM,
|
|
BytesRead, ReadResult, !IO),
|
|
(
|
|
ReadResult = ok,
|
|
( if BytesRead = RemainingSize then
|
|
Result = ok(!.BM)
|
|
else
|
|
Result = error(io_error_string(
|
|
"io.read_binary_file_as_bitmap: " ++
|
|
"incorrect file size"))
|
|
)
|
|
;
|
|
ReadResult = error(Msg),
|
|
Result = error(Msg)
|
|
)
|
|
)
|
|
)
|
|
)
|
|
else
|
|
BufferSize = 4000,
|
|
read_binary_file_as_bitmap_from_stream(Stream, BufferSize,
|
|
Res, [], RevBitmaps, !IO),
|
|
(
|
|
Res = ok,
|
|
Result = ok(bitmap.append_list(reverse(RevBitmaps)))
|
|
;
|
|
Res = error(Msg),
|
|
Result = error(Msg)
|
|
)
|
|
).
|
|
|
|
:- pred read_binary_file_as_bitmap_from_stream(io.binary_input_stream::in,
|
|
num_bytes::in, io.res::out, list(bitmap)::in, list(bitmap)::out,
|
|
io::di, io::uo) is det.
|
|
|
|
read_binary_file_as_bitmap_from_stream(Stream, BufferSize, Res, !BMs, !IO) :-
|
|
some [!BM] (
|
|
!:BM = bitmap.init(BufferSize * bits_per_byte),
|
|
bitmap.read_bitmap_range(Stream, 0, BufferSize, !BM, NumBytesRead,
|
|
ReadRes, !IO),
|
|
(
|
|
ReadRes = ok,
|
|
( if NumBytesRead < BufferSize then
|
|
!:BM = bitmap.shrink_without_copying(!.BM,
|
|
NumBytesRead * bits_per_byte),
|
|
!:BMs = [!.BM | !.BMs],
|
|
Res = ok
|
|
else
|
|
!:BMs = [!.BM | !.BMs],
|
|
|
|
% Double the buffer size each time.
|
|
read_binary_file_as_bitmap_from_stream(Stream, BufferSize * 2,
|
|
Res, !BMs, !IO)
|
|
)
|
|
;
|
|
ReadRes = error(Err),
|
|
Res = error(Err)
|
|
)
|
|
).
|
|
|
|
%---------------------%
|
|
|
|
% XXX FIXME this should return an int64.
|
|
:- pred input_stream_file_size(io.text_input_stream::in, int::out,
|
|
io::di, io::uo) is det.
|
|
:- pragma consider_used(pred(input_stream_file_size/4)).
|
|
|
|
input_stream_file_size(text_input_stream(Stream), Size, !IO) :-
|
|
stream_file_size(Stream, Size64, !IO),
|
|
Size = int64.cast_to_int(Size64).
|
|
|
|
:- pred binary_input_stream_file_size(io.binary_input_stream::in, int64::out,
|
|
io::di, io::uo) is det.
|
|
|
|
binary_input_stream_file_size(binary_input_stream(Stream), Size, !IO) :-
|
|
stream_file_size(Stream, Size, !IO).
|
|
|
|
% stream_file_size(Stream, Size):
|
|
%
|
|
% If Stream is a regular file, then set Size to its size (in bytes),
|
|
% otherwise set Size to -1.
|
|
%
|
|
:- pred stream_file_size(stream::in, int64::out, io::di, io::uo) is det.
|
|
|
|
:- pragma foreign_proc("C",
|
|
stream_file_size(Stream::in, Size::out, _IO0::di, _IO::uo),
|
|
[will_not_call_mercury, promise_pure, tabled_for_io, thread_safe,
|
|
does_not_affect_liveness, no_sharing],
|
|
"
|
|
#if defined(MR_HAVE_FSTAT) && \
|
|
(defined(MR_HAVE_FILENO) || defined(fileno)) && defined(S_ISREG)
|
|
struct stat s;
|
|
if (MR_IS_FILE_STREAM(*Stream)) {
|
|
if (fstat(fileno(MR_file(*Stream)), &s) == 0 && S_ISREG(s.st_mode)) {
|
|
Size = s.st_size;
|
|
} else {
|
|
Size = -1;
|
|
}
|
|
} else {
|
|
Size = -1;
|
|
}
|
|
#else
|
|
Size = -1;
|
|
#endif
|
|
").
|
|
|
|
:- pragma foreign_proc("C#",
|
|
stream_file_size(Stream::in, Size::out, _IO0::di, _IO::uo),
|
|
[will_not_call_mercury, promise_pure, thread_safe],
|
|
"{
|
|
if (Stream.stream.CanSeek) {
|
|
Size = Stream.stream.Length;
|
|
} else {
|
|
Size = -1;
|
|
}
|
|
}").
|
|
|
|
:- pragma foreign_proc("Java",
|
|
stream_file_size(Stream::in, Size::out, _IO0::di, _IO::uo),
|
|
[will_not_call_mercury, promise_pure, thread_safe, tabled_for_io],
|
|
"
|
|
try {
|
|
Size = ((jmercury.io__stream_ops.MR_BinaryFile) Stream).size();
|
|
} catch (java.io.IOException e) {
|
|
Size = -1;
|
|
}
|
|
").
|
|
|
|
%---------------------%
|
|
|
|
% A buffer is an array of chars.
|
|
% For C backends, it is a C array of C chars.
|
|
% For other backends, it is a Mercury array of Mercury chars.
|
|
|
|
% XXX It would be better to use a char_array type rather than array(char).
|
|
% This is because on the Java (and maybe the C#) backend, indexing into
|
|
% an array whose element type is known statically requires less overhead.
|
|
%
|
|
% It may be possible to merge with string.string_buffer.
|
|
%
|
|
:- type buffer
|
|
---> buffer(array(char)).
|
|
|
|
:- pragma foreign_type(c, buffer, "char *", [can_pass_as_mercury_type]).
|
|
|
|
% XXX Extend the workaround for no `ui' modes in array.m.
|
|
:- inst uniq_buffer for buffer/0
|
|
---> buffer(uniq_array).
|
|
|
|
:- mode buffer_di == di(uniq_buffer).
|
|
:- mode buffer_uo == out(uniq_buffer).
|
|
|
|
:- pred alloc_buffer(int::in, buffer::buffer_uo) is det.
|
|
:- pragma consider_used(pred(alloc_buffer/2)).
|
|
|
|
:- pragma foreign_proc("C",
|
|
alloc_buffer(Size::in, Buffer::buffer_uo),
|
|
[will_not_call_mercury, promise_pure, tabled_for_io, thread_safe,
|
|
does_not_affect_liveness, no_sharing],
|
|
"{
|
|
MR_Word buf;
|
|
MR_offset_incr_hp_atomic_msg(buf, 0,
|
|
(Size * sizeof(char) + sizeof(MR_Word) - 1) / sizeof(MR_Word),
|
|
MR_ALLOC_ID, ""io.buffer/0"");
|
|
Buffer = (char *) buf;
|
|
}").
|
|
|
|
alloc_buffer(Size, buffer(Array)) :-
|
|
char.det_from_int(0, NullChar),
|
|
array.init(Size, NullChar, Array).
|
|
|
|
:- pred resize_buffer(int::in, int::in,
|
|
buffer::buffer_di, buffer::buffer_uo) is det.
|
|
|
|
:- pragma foreign_proc("C",
|
|
resize_buffer(OldSize::in, NewSize::in,
|
|
Buffer0::buffer_di, Buffer::buffer_uo),
|
|
[will_not_call_mercury, promise_pure, tabled_for_io, thread_safe,
|
|
does_not_affect_liveness],
|
|
"{
|
|
MR_CHECK_EXPR_TYPE(Buffer0, char *);
|
|
MR_CHECK_EXPR_TYPE(Buffer, char *);
|
|
|
|
#ifdef MR_CONSERVATIVE_GC
|
|
Buffer = MR_GC_realloc(Buffer0, NewSize * sizeof(char));
|
|
#else
|
|
if (Buffer0 + OldSize == (char *) MR_hp) {
|
|
MR_Word next;
|
|
MR_offset_incr_hp_atomic_msg(next, 0,
|
|
(NewSize * sizeof(char) + sizeof(MR_Word) - 1)
|
|
/ sizeof(MR_Word),
|
|
MR_ALLOC_ID, ""io.buffer/0"");
|
|
assert(Buffer0 + OldSize == (char *) next);
|
|
Buffer = Buffer0;
|
|
} else {
|
|
// Just have to alloc and copy.
|
|
MR_Word buf;
|
|
MR_offset_incr_hp_atomic_msg(buf, 0,
|
|
(NewSize * sizeof(char) + sizeof(MR_Word) - 1)
|
|
/ sizeof(MR_Word),
|
|
MR_ALLOC_ID, ""io.buffer/0"");
|
|
Buffer = (char *) buf;
|
|
if (OldSize > NewSize) {
|
|
MR_memcpy(Buffer, Buffer0, NewSize);
|
|
} else {
|
|
MR_memcpy(Buffer, Buffer0, OldSize);
|
|
}
|
|
}
|
|
#endif
|
|
}").
|
|
|
|
resize_buffer(_OldSize, NewSize, buffer(Array0), buffer(Array)) :-
|
|
char.det_from_int(0, Char),
|
|
array.resize(NewSize, Char, Array0, Array).
|
|
|
|
:- pred buffer_and_pos_to_string_and_length(buffer::buffer_di, int::in,
|
|
string::out, int::out) is semidet.
|
|
% This predicate is used when compiling to C and C#; this pragma avoids
|
|
% a warning when compiling to Java.
|
|
:- pragma consider_used(pred(buffer_and_pos_to_string_and_length/4)).
|
|
|
|
:- pragma foreign_proc("C",
|
|
buffer_and_pos_to_string_and_length(Buffer::buffer_di, Pos::in,
|
|
Str::out, NumCUs::out),
|
|
[will_not_call_mercury, promise_pure, tabled_for_io, thread_safe,
|
|
does_not_affect_liveness],
|
|
"{
|
|
Str = Buffer;
|
|
Str[Pos] = '\\0';
|
|
|
|
// Check that the string does not contain null characters.
|
|
if (strlen(Str) != Pos) {
|
|
SUCCESS_INDICATOR = MR_FALSE;
|
|
} else {
|
|
SUCCESS_INDICATOR = MR_TRUE;
|
|
}
|
|
|
|
// In C, Pos counts bytes, which are the same size as UTF-8 code units.
|
|
// NumCUs is expected to be in the code units native to the target
|
|
// language, and this is UTF-8, so no conversion needs to be done.
|
|
// (Compare to the C# case below.)
|
|
NumCUs = Pos;
|
|
}").
|
|
|
|
buffer_and_pos_to_string_and_length(buffer(Array), Pos, Str, NumCUs) :-
|
|
% This predicate is used only when compiling to C and C#, and when
|
|
% targeting C, we use the foreign_proc above, so this clause is used
|
|
% only when targeting C#.
|
|
%
|
|
% In C#, Pos counts chars, i.e. code points. Most code points occupy
|
|
% just one UTF-16 code unit, but some occupy two. The call below to
|
|
% semidet_from_char_list will do this expansion as necessary.
|
|
% We can't know how many code units the final string contains
|
|
% until we count them. (Compare to the C case above.)
|
|
%
|
|
% XXX The current implementation of read_file_as_string_2
|
|
% reads in code units one by one, converts them to code points
|
|
% to store them in array slots, then converts the array to a string,
|
|
% which converts each code point back into one or two UTF-16 code units.
|
|
% A fully C#-specific implementation of read_file_as_string_2,
|
|
% one not shared with C, should be able to dispense with all the
|
|
% redundant conversions.
|
|
array.fetch_items(Array, min(Array), min(Array) + Pos - 1, List),
|
|
string.semidet_from_char_list(List, Str),
|
|
string.length(Str, NumCUs).
|
|
|
|
:- pred read_into_buffer(stream::in, buffer::buffer_di, buffer::buffer_uo,
|
|
int::in, int::in, int::out, system_error::out, io::di, io::uo) is det.
|
|
|
|
:- pragma foreign_proc("C",
|
|
read_into_buffer(Stream::in, Buffer0::buffer_di, Buffer::buffer_uo,
|
|
BufferSize::in, Pos0::in, Pos::out, Error::out, _IO0::di, _IO::uo),
|
|
[will_not_call_mercury, promise_pure, tabled_for_io, thread_safe,
|
|
does_not_affect_liveness],
|
|
"
|
|
size_t bytes_to_read;
|
|
size_t bytes_read;
|
|
|
|
MR_CHECK_EXPR_TYPE(Buffer0, char *);
|
|
MR_CHECK_EXPR_TYPE(Buffer, char *);
|
|
|
|
bytes_to_read = BufferSize - Pos0;
|
|
bytes_read = MR_READ(*Stream, Buffer0 + Pos0, bytes_to_read);
|
|
|
|
Buffer = Buffer0;
|
|
Pos = Pos0 + bytes_read;
|
|
|
|
if (bytes_read < bytes_to_read && MR_FERROR(*Stream)) {
|
|
Error = errno;
|
|
} else {
|
|
Error = 0;
|
|
}
|
|
").
|
|
|
|
read_into_buffer(Stream, buffer(Array0), buffer(Array), BufferSize,
|
|
!Pos, Error, !IO) :-
|
|
% This predicate is used only when compiling to C and C#, and when
|
|
% targeting C, we use the foreign_proc above, so this clause is used
|
|
% only when targeting C#.
|
|
read_into_array(text_input_stream(Stream), Array0, Array, BufferSize,
|
|
!Pos, Error, !IO).
|
|
|
|
:- pred read_into_array(io.text_input_stream::in,
|
|
array(char)::array_di, array(char)::array_uo, int::in, int::in, int::out,
|
|
system_error::out, io::di, io::uo) is det.
|
|
% This predicate is not used when compiling to C or Java; this pragma avoids
|
|
% a warning even in those cases.
|
|
:- pragma consider_used(pred(read_into_array/9)).
|
|
|
|
read_into_array(Stream, !Array, ArraySize, !Pos, Error, !IO) :-
|
|
( if !.Pos >= ArraySize then
|
|
Error = no_error
|
|
else
|
|
read_char_code(Stream, ResultCode, Error0, Char, !IO),
|
|
(
|
|
ResultCode = result_code_ok,
|
|
array.set(!.Pos, Char, !Array),
|
|
!:Pos = !.Pos + 1,
|
|
read_into_array(Stream, !Array, ArraySize, !Pos, Error, !IO)
|
|
;
|
|
( ResultCode = result_code_eof
|
|
; ResultCode = result_code_error
|
|
),
|
|
Error = Error0
|
|
)
|
|
).
|
|
|
|
%---------------------------------------------------------------------------%
|
|
:- end_module io.text_read.
|
|
%---------------------------------------------------------------------------%
|