mirror of
https://github.com/Mercury-Language/mercury.git
synced 2026-04-15 09:23:44 +00:00
138 lines
4.6 KiB
Mathematica
138 lines
4.6 KiB
Mathematica
%----------------------------------------------------------------------------%
|
|
% vim: ft=mercury ts=4 sw=4 et
|
|
%----------------------------------------------------------------------------%
|
|
% Copyright (C) 2015, 2024-2025 The Mercury team.
|
|
% This file may only be copied under the terms of the GNU General
|
|
% Public License - see the file COPYING in the Mercury distribution.
|
|
%----------------------------------------------------------------------------%
|
|
|
|
:- module backend_libs.string_encoding.
|
|
:- interface.
|
|
|
|
:- import_module libs.
|
|
:- import_module libs.globals.
|
|
|
|
:- import_module list.
|
|
:- import_module string.
|
|
|
|
% target_char_range(Target, Min, Max):
|
|
%
|
|
% Return the smallest and largest integers that represent
|
|
% valid code points in the encoding we use on the given target platform.
|
|
%
|
|
:- pred target_char_range(compilation_target::in, int::out, int::out) is det.
|
|
|
|
% Return the string_encoding we use on the given target platform.
|
|
%
|
|
:- func target_string_encoding(compilation_target) = string_encoding.
|
|
|
|
% Convert a string to the list of its code units in the given encoding.
|
|
%
|
|
:- pred to_code_unit_list_in_encoding(string_encoding::in, string::in,
|
|
list(int)::out) is det.
|
|
|
|
% Convert a list of code units in the given encoding to a string.
|
|
% Fails if the list does not follow the rules of the encoding.
|
|
%
|
|
:- pred from_code_unit_list_in_encoding(string_encoding::in, list(int)::in,
|
|
string::out) is semidet.
|
|
:- pred det_from_code_unit_list_in_encoding(string_encoding::in, list(int)::in,
|
|
string::out) is det.
|
|
|
|
% Convert a list of code units in the given encoding to a string.
|
|
% Allows ill-formed sequences, and will succeed *unless* the given list
|
|
% includes a zero, signifying a null character.
|
|
%
|
|
% At the moment, it works only when the encoding specified by the first
|
|
% argument is utf8, *and* the compiler's own encoding is utf8.
|
|
% If either encoding is utf16, it will throw an exception.
|
|
%
|
|
:- pred from_code_unit_list_in_encoding_allow_ill_formed(string_encoding::in,
|
|
list(int)::in, string::out) is semidet.
|
|
:- pred det_from_code_unit_list_in_encoding_allow_ill_formed(
|
|
string_encoding::in, list(int)::in, string::out) is det.
|
|
|
|
%----------------------------------------------------------------------------%
|
|
%----------------------------------------------------------------------------%
|
|
|
|
:- implementation.
|
|
|
|
:- import_module require.
|
|
|
|
target_char_range(_Target, Min, Max) :-
|
|
% The range of `char' is the same for all existing targets.
|
|
Min = 0,
|
|
Max = 0x10ffff.
|
|
|
|
target_string_encoding(Target) = Encoding :-
|
|
(
|
|
Target = target_c,
|
|
Encoding = utf8
|
|
;
|
|
( Target = target_java
|
|
; Target = target_csharp
|
|
),
|
|
Encoding = utf16
|
|
).
|
|
|
|
to_code_unit_list_in_encoding(Encoding, String, CodeUnits) :-
|
|
require_complete_switch [Encoding]
|
|
(
|
|
Encoding = utf8,
|
|
string.to_utf8_code_unit_list(String, CodeUnits)
|
|
;
|
|
Encoding = utf16,
|
|
string.to_utf16_code_unit_list(String, CodeUnits)
|
|
).
|
|
|
|
from_code_unit_list_in_encoding(Encoding, CodeUnits, String) :-
|
|
require_complete_switch [Encoding]
|
|
(
|
|
Encoding = utf8,
|
|
string.from_utf8_code_unit_list(CodeUnits, String)
|
|
;
|
|
Encoding = utf16,
|
|
string.from_utf16_code_unit_list(CodeUnits, String)
|
|
).
|
|
|
|
det_from_code_unit_list_in_encoding(Encoding, CodeUnits, String) :-
|
|
( if from_code_unit_list_in_encoding(Encoding, CodeUnits, StringPrime) then
|
|
String = StringPrime
|
|
else
|
|
unexpected($pred, "from_code_unit_list_in_encoding failed")
|
|
).
|
|
|
|
from_code_unit_list_in_encoding_allow_ill_formed(Encoding, CodeUnits,
|
|
String) :-
|
|
require_complete_switch [Encoding]
|
|
(
|
|
Encoding = utf8,
|
|
InternalEncoding = internal_string_encoding,
|
|
(
|
|
InternalEncoding = utf8,
|
|
string.from_code_unit_list_allow_ill_formed(CodeUnits, String)
|
|
;
|
|
InternalEncoding = utf16,
|
|
unexpected($pred, "implementing on utf16 is nyi")
|
|
)
|
|
;
|
|
Encoding = utf16,
|
|
unexpected($pred, "utf16 is nyi")
|
|
).
|
|
|
|
det_from_code_unit_list_in_encoding_allow_ill_formed(Encoding,
|
|
CodeUnits, String) :-
|
|
( if
|
|
from_code_unit_list_in_encoding_allow_ill_formed(Encoding,
|
|
CodeUnits, StringPrime)
|
|
then
|
|
String = StringPrime
|
|
else
|
|
unexpected($pred,
|
|
"from_code_unit_list_in_encoding_allow_ill_formed failed")
|
|
).
|
|
|
|
%----------------------------------------------------------------------------%
|
|
:- end_module backend_libs.string_encoding.
|
|
%----------------------------------------------------------------------------%
|