Files
mercury/compiler/string_encoding.m
2025-09-23 15:17:49 +10:00

138 lines
4.6 KiB
Mathematica

%----------------------------------------------------------------------------%
% vim: ft=mercury ts=4 sw=4 et
%----------------------------------------------------------------------------%
% Copyright (C) 2015, 2024-2025 The Mercury team.
% This file may only be copied under the terms of the GNU General
% Public License - see the file COPYING in the Mercury distribution.
%----------------------------------------------------------------------------%
:- module backend_libs.string_encoding.
:- interface.
:- import_module libs.
:- import_module libs.globals.
:- import_module list.
:- import_module string.
% target_char_range(Target, Min, Max):
%
% Return the smallest and largest integers that represent
% valid code points in the encoding we use on the given target platform.
%
:- pred target_char_range(compilation_target::in, int::out, int::out) is det.
% Return the string_encoding we use on the given target platform.
%
:- func target_string_encoding(compilation_target) = string_encoding.
% Convert a string to the list of its code units in the given encoding.
%
:- pred to_code_unit_list_in_encoding(string_encoding::in, string::in,
list(int)::out) is det.
% Convert a list of code units in the given encoding to a string.
% Fails if the list does not follow the rules of the encoding.
%
:- pred from_code_unit_list_in_encoding(string_encoding::in, list(int)::in,
string::out) is semidet.
:- pred det_from_code_unit_list_in_encoding(string_encoding::in, list(int)::in,
string::out) is det.
% Convert a list of code units in the given encoding to a string.
% Allows ill-formed sequences, and will succeed *unless* the given list
% includes a zero, signifying a null character.
%
% At the moment, it works only when the encoding specified by the first
% argument is utf8, *and* the compiler's own encoding is utf8.
% If either encoding is utf16, it will throw an exception.
%
:- pred from_code_unit_list_in_encoding_allow_ill_formed(string_encoding::in,
list(int)::in, string::out) is semidet.
:- pred det_from_code_unit_list_in_encoding_allow_ill_formed(
string_encoding::in, list(int)::in, string::out) is det.
%----------------------------------------------------------------------------%
%----------------------------------------------------------------------------%
:- implementation.
:- import_module require.
target_char_range(_Target, Min, Max) :-
% The range of `char' is the same for all existing targets.
Min = 0,
Max = 0x10ffff.
target_string_encoding(Target) = Encoding :-
(
Target = target_c,
Encoding = utf8
;
( Target = target_java
; Target = target_csharp
),
Encoding = utf16
).
to_code_unit_list_in_encoding(Encoding, String, CodeUnits) :-
require_complete_switch [Encoding]
(
Encoding = utf8,
string.to_utf8_code_unit_list(String, CodeUnits)
;
Encoding = utf16,
string.to_utf16_code_unit_list(String, CodeUnits)
).
from_code_unit_list_in_encoding(Encoding, CodeUnits, String) :-
require_complete_switch [Encoding]
(
Encoding = utf8,
string.from_utf8_code_unit_list(CodeUnits, String)
;
Encoding = utf16,
string.from_utf16_code_unit_list(CodeUnits, String)
).
det_from_code_unit_list_in_encoding(Encoding, CodeUnits, String) :-
( if from_code_unit_list_in_encoding(Encoding, CodeUnits, StringPrime) then
String = StringPrime
else
unexpected($pred, "from_code_unit_list_in_encoding failed")
).
from_code_unit_list_in_encoding_allow_ill_formed(Encoding, CodeUnits,
String) :-
require_complete_switch [Encoding]
(
Encoding = utf8,
InternalEncoding = internal_string_encoding,
(
InternalEncoding = utf8,
string.from_code_unit_list_allow_ill_formed(CodeUnits, String)
;
InternalEncoding = utf16,
unexpected($pred, "implementing on utf16 is nyi")
)
;
Encoding = utf16,
unexpected($pred, "utf16 is nyi")
).
det_from_code_unit_list_in_encoding_allow_ill_formed(Encoding,
CodeUnits, String) :-
( if
from_code_unit_list_in_encoding_allow_ill_formed(Encoding,
CodeUnits, StringPrime)
then
String = StringPrime
else
unexpected($pred,
"from_code_unit_list_in_encoding_allow_ill_formed failed")
).
%----------------------------------------------------------------------------%
:- end_module backend_libs.string_encoding.
%----------------------------------------------------------------------------%