mirror of
https://github.com/Mercury-Language/mercury.git
synced 2026-04-15 01:13:30 +00:00
library/char.m:
Adjust the description of binary digits to be consistent with
elsewhere in this module.
Fix a double-up word.
1275 lines
36 KiB
Mathematica
1275 lines
36 KiB
Mathematica
%---------------------------------------------------------------------------%
|
|
% vim: ft=mercury ts=4 sw=4 et
|
|
%---------------------------------------------------------------------------%
|
|
% Copyright (C) 1994-2008, 2011 The University of Melbourne.
|
|
% Copyright (C) 2013-2015, 2017-2022, 2024-2026 The Mercury team.
|
|
% This file is distributed under the terms specified in COPYING.LIB.
|
|
%---------------------------------------------------------------------------%
|
|
%
|
|
% File: char.m.
|
|
% Main author: fjh.
|
|
% Stability: high.
|
|
%
|
|
% This module defines some predicates that manipulate characters.
|
|
%
|
|
% Originally we used `character' rather than `char' for the type name
|
|
% because `char' was used by NU-Prolog to mean something different.
|
|
% But now we use `char' and the use of `character' is discouraged.
|
|
%
|
|
% All predicates and functions exported by this module that deal with
|
|
% Unicode conform to version 13 of the Unicode standard.
|
|
%
|
|
%---------------------------------------------------------------------------%
|
|
%---------------------------------------------------------------------------%
|
|
|
|
:- module char.
|
|
:- interface.
|
|
|
|
:- import_module enum.
|
|
:- import_module list.
|
|
:- import_module pretty_printer.
|
|
|
|
%---------------------------------------------------------------------------%
|
|
|
|
% A Unicode code point.
|
|
%
|
|
:- type char == character.
|
|
|
|
:- instance enum(character).
|
|
:- instance uenum(character).
|
|
|
|
% `to_int'/1 and `to_int(in, out)' convert a character to its
|
|
% corresponding numerical code (integer value).
|
|
%
|
|
% `to_int(out, in)' converts an integer value to a character value.
|
|
% It fails for integer values outside of the Unicode range.
|
|
%
|
|
% Be aware that there is no guarantee that characters can be written to
|
|
% files or to the standard output or standard error streams. Files using an
|
|
% 8-bit national character set would only be able to represent a subset of
|
|
% all possible code points. Currently, the Mercury standard library can
|
|
% only read and write UTF-8 text files, so the entire range is supported
|
|
% (excluding surrogate and noncharacter code points).
|
|
%
|
|
% Note that '\0' is not accepted as a Mercury null character literal.
|
|
% Instead, a null character can be created using `det_from_int(0)'.
|
|
% Null characters are not allowed in Mercury strings in C grades.
|
|
%
|
|
:- func to_int(char) = int.
|
|
:- pred to_int(char, int).
|
|
:- mode to_int(in, out) is det.
|
|
:- mode to_int(in, in) is semidet. % implied
|
|
:- mode to_int(out, in) is semidet.
|
|
|
|
% Converts an integer to its corresponding character, if any.
|
|
% A more expressive name for the reverse mode of to_int.
|
|
%
|
|
:- pred from_int(int::in, char::out) is semidet.
|
|
|
|
% Converts an integer to its corresponding character.
|
|
% Throws an exception if there isn't one.
|
|
%
|
|
:- func det_from_int(int) = char.
|
|
:- pred det_from_int(int::in, char::out) is det.
|
|
|
|
% Converts a character to its numerical character code (unsigned integer).
|
|
%
|
|
:- func to_uint(char) = uint.
|
|
|
|
% Converts an unsigned integer to its corresponding character, if any.
|
|
%
|
|
:- pred from_uint(uint::in, char::out) is semidet.
|
|
|
|
% Converts an unsigned integer to its corresponding character.
|
|
% Throws an exception if there isn't one.
|
|
%
|
|
:- func det_from_uint(uint) = char.
|
|
|
|
% Returns the minimum numerical character code.
|
|
%
|
|
:- func min_char_value = int.
|
|
:- pred min_char_value(int::out) is det.
|
|
|
|
% Returns the maximum numerical character code.
|
|
%
|
|
:- func max_char_value = int.
|
|
:- pred max_char_value(int::out) is det.
|
|
|
|
%---------------------------------------------------------------------------%
|
|
|
|
% True if-and-only-if the character is a lowercase letter (a-z)
|
|
% in the ASCII range.
|
|
%
|
|
:- pred is_lower(char::in) is semidet.
|
|
|
|
% True if-and-only-if the character is an uppercase letter (A-Z)
|
|
% in the ASCII range.
|
|
%
|
|
:- pred is_upper(char::in) is semidet.
|
|
|
|
% Convert a character to lowercase.
|
|
% Note that this only converts letters (A-Z) in the ASCII range.
|
|
%
|
|
:- func to_lower(char) = char.
|
|
:- pred to_lower(char::in, char::out) is det.
|
|
|
|
% Convert a character to uppercase.
|
|
% Note that this only converts letters (a-z) in the ASCII range.
|
|
%
|
|
:- func to_upper(char) = char.
|
|
:- pred to_upper(char::in, char::out) is det.
|
|
|
|
% lower_upper(Lower, Upper) is true if-and-only-if
|
|
% Lower is a lowercase letter (a-z) and Upper is the corresponding
|
|
% uppercase letter (A-Z) in the ASCII range.
|
|
%
|
|
:- pred lower_upper(char, char).
|
|
:- mode lower_upper(in, out) is semidet.
|
|
:- mode lower_upper(out, in) is semidet.
|
|
|
|
%---------------------------------------------------------------------------%
|
|
|
|
% True if-and-only-if the character is in the ASCII range (0-127).
|
|
%
|
|
:- pred is_ascii(char::in) is semidet.
|
|
|
|
% True if-and-only-if the character is a whitespace character
|
|
% in the ASCII range:
|
|
%
|
|
% U+0020 space
|
|
% U+0009 character tabulation (horizontal tab)
|
|
% U+000A line feed
|
|
% U+000B line tabulation (vertical tab)
|
|
% U+000C form feed
|
|
% U+000D carriage return
|
|
%
|
|
:- pred is_whitespace(char::in) is semidet.
|
|
|
|
% True if-and-only-if the character is a letter (A-Z, a-z)
|
|
% in the ASCII range.
|
|
%
|
|
:- pred is_alpha(char::in) is semidet.
|
|
|
|
% True if-and-only-if the character is a letter (A-Z, a-z) or digit (0-9)
|
|
% in the ASCII range.
|
|
%
|
|
:- pred is_alnum(char::in) is semidet.
|
|
|
|
% True if-and-only-if the character is a letter (A-Z, a-z)
|
|
% or an underscore (_) in the ASCII range.
|
|
%
|
|
:- pred is_alpha_or_underscore(char::in) is semidet.
|
|
|
|
% True if-and-only-if the character is a letter (A-Z, a-z),
|
|
% a digit (0-9) or an underscore (_) in the ASCII range.
|
|
%
|
|
:- pred is_alnum_or_underscore(char::in) is semidet.
|
|
|
|
%---------------------------------------------------------------------------%
|
|
|
|
% True if-and-only-if the character is a decimal digit (0-9)
|
|
% in the ASCII range.
|
|
%
|
|
:- pred is_digit(char::in) is semidet.
|
|
|
|
% True if-and-only-if the character is a binary digit (0 or 1)
|
|
% in the ASCII range.
|
|
%
|
|
:- pred is_binary_digit(char::in) is semidet.
|
|
|
|
% True if-and-only-if the character is an octal digit (0-7)
|
|
% in the ASCII range.
|
|
%
|
|
:- pred is_octal_digit(char::in) is semidet.
|
|
|
|
% True if-and-only-if the character is a decimal digit (0-9)
|
|
% in the ASCII range. Synonym for is_digit/1.
|
|
%
|
|
:- pred is_decimal_digit(char::in) is semidet.
|
|
|
|
% True if-and-only-if the character is a hexadecimal digit (0-9, a-f, A-F)
|
|
% in the ASCII range.
|
|
%
|
|
:- pred is_hex_digit(char::in) is semidet.
|
|
|
|
% is_base_digit(Base, Digit):
|
|
% True if-and-only-if Digit is a digit in the given Base (0-9, a-z, A-Z).
|
|
% Throws an exception if Base < 2 or Base > 36.
|
|
%
|
|
:- pred is_base_digit(int::in, char::in) is semidet.
|
|
|
|
%---------------------%
|
|
|
|
% binary_digit_to_int(Char, Int):
|
|
% True if-and-only-if Char is a binary digit (0 or 1) representing
|
|
% the value Int.
|
|
%
|
|
:- pred binary_digit_to_int(char::in, int::out) is semidet.
|
|
|
|
% As above, but throws an exception instead of failing.
|
|
%
|
|
:- func det_binary_digit_to_int(char) = int.
|
|
|
|
% octal_digit_to_int(Char, Int):
|
|
% True if-and-only-if Char is an octal digit (0-7) representing
|
|
% the value Int.
|
|
%
|
|
:- pred octal_digit_to_int(char::in, int::out) is semidet.
|
|
|
|
% As above, but throws an exception instead of failing.
|
|
%
|
|
:- func det_octal_digit_to_int(char) = int.
|
|
|
|
% decimal_digit_to_int(Char, Int):
|
|
% True if-and-only-if Char is a decimal digit (0-9) representing
|
|
% the value Int.
|
|
%
|
|
:- pred decimal_digit_to_int(char::in, int::out) is semidet.
|
|
|
|
% As above, but throws an exception instead of failing.
|
|
%
|
|
:- func det_decimal_digit_to_int(char) = int.
|
|
|
|
% hex_digit_to_int(Char, Int):
|
|
% True if-and-only-if Char is a hexadecimal digit (0-9, a-f or A-F)
|
|
% representing the value Int.
|
|
%
|
|
:- pred hex_digit_to_int(char::in, int::out) is semidet.
|
|
|
|
% As above, but throws an exception instead of failing.
|
|
%
|
|
:- func det_hex_digit_to_int(char) = int.
|
|
|
|
% base_digit_to_int(Base, Char, Int):
|
|
% True if-and-only-if Char is a decimal digit (0-9) or a letter (a-z, A-Z)
|
|
% representing the value Int (0-35) in the given base.
|
|
% Throws an exception if Base < 2 or Base > 36.
|
|
%
|
|
:- pred base_digit_to_int(int::in, char::in, int::out) is semidet.
|
|
|
|
% As above, but throws an exception instead of failing.
|
|
%
|
|
:- func det_base_digit_to_int(int, char) = int.
|
|
|
|
% A version of base_digit_to_int that does not check whether
|
|
% Base is in the range 2 to 36. If it is not, the behavior is undefined.
|
|
%
|
|
:- pred unsafe_base_digit_to_int(int::in, char::in, int::out) is semidet.
|
|
|
|
%---------------------%
|
|
|
|
% Convert an integer in the range 0-1 to a binary digit (0 or 1) in the
|
|
% ASCII range.
|
|
%
|
|
:- pred int_to_binary_digit(int::in, char::out) is semidet.
|
|
|
|
% As above, but throw an exception instead of failing.
|
|
%
|
|
:- func det_int_to_binary_digit(int) = char.
|
|
|
|
% Convert an integer 0-7 to an octal digit (0-7) in the ASCII range.
|
|
%
|
|
:- pred int_to_octal_digit(int::in, char::out) is semidet.
|
|
|
|
% As above, but throw an exception instead of failing.
|
|
%
|
|
:- func det_int_to_octal_digit(int) = char.
|
|
|
|
% Convert an integer 0-9 to a decimal digit (0-9) in the ASCII range.
|
|
%
|
|
:- pred int_to_decimal_digit(int::in, char::out) is semidet.
|
|
|
|
% As above, but throw an exception instead of failing.
|
|
%
|
|
:- func det_int_to_decimal_digit(int) = char.
|
|
|
|
% Convert an integer 0-15 to an uppercase hexadecimal digit (0-9, A-F) in
|
|
% the ASCII range.
|
|
%
|
|
:- pred int_to_hex_digit(int::in, char::out) is semidet.
|
|
|
|
% As above, but throw an exception instead of failing.
|
|
%
|
|
:- func det_int_to_hex_digit(int) = char.
|
|
|
|
% base_int_to_digit(Base, Int, Char):
|
|
% True if-and-only-if Char is a decimal digit (0-9) or an uppercase letter
|
|
% (A-Z) representing the value Int (0-35) in the given base.
|
|
% Throws an exception if Base < 2 or Base > 36.
|
|
%
|
|
:- pred base_int_to_digit(int::in, int::in, char::out) is semidet.
|
|
|
|
% As above, but throw an exception instead of failing.
|
|
%
|
|
:- func det_base_int_to_digit(int, int) = char.
|
|
|
|
%---------------------------------------------------------------------------%
|
|
|
|
% Encode a Unicode code point in UTF-8.
|
|
% Fails for surrogate code points.
|
|
%
|
|
:- pred to_utf8(char::in, list(int)::out) is semidet.
|
|
|
|
% As above, but represent UTF-8 code units using uint8s.
|
|
%
|
|
:- pred to_utf8_uint8(char::in, list(uint8)::out) is semidet.
|
|
|
|
% Encode a Unicode code point in UTF-16 (native endianness).
|
|
% Fails for surrogate code points.
|
|
%
|
|
:- pred to_utf16(char::in, list(int)::out) is semidet.
|
|
|
|
% As above, but represent UTF-16 code units using uint16s.
|
|
%
|
|
:- pred to_utf16_uint16(char::in, list(uint16)::out) is semidet.
|
|
|
|
% True if-and-only-if the character is a Unicode Surrogate code point,
|
|
% that is a code point in General Category `Other,surrogate' (`Cs').
|
|
% In UTF-16, a code point with a scalar value greater than 0xffff is
|
|
% encoded with a pair of surrogate code points.
|
|
%
|
|
:- pred is_surrogate(char::in) is semidet.
|
|
|
|
% True if-and-only-if the character is a Unicode leading surrogate
|
|
% code point. A leading surrogate code point is in the inclusive range
|
|
% from 0xd800 to 0xdbff.
|
|
%
|
|
:- pred is_leading_surrogate(char::in) is semidet.
|
|
|
|
% True if-and-only-if the character is a Unicode trailing surrogate
|
|
% code point. A trailing surrogate code point is in the inclusive range
|
|
% from 0xdc00 to 0xdfff.
|
|
%
|
|
:- pred is_trailing_surrogate(char::in) is semidet.
|
|
|
|
% True if-and-only-if the character is a Unicode Noncharacter code point.
|
|
% Sixty-six code points are not used to encode characters.
|
|
% These code points should not be used for interchange, but may be used
|
|
% internally.
|
|
%
|
|
:- pred is_noncharacter(char::in) is semidet.
|
|
|
|
% True if-and-only-if the character is a Unicode Control code point,
|
|
% that is a code point in General Category `Other,control' (`Cc').
|
|
%
|
|
:- pred is_control(char::in) is semidet.
|
|
|
|
% True if-and-only-if the character is a Unicode Space Separator
|
|
% code point, that is a code point in General Category
|
|
% `Separator,space' (`Zs').
|
|
%
|
|
:- pred is_space_separator(char::in) is semidet.
|
|
|
|
% True if-and-only-if the character is a Unicode Line Separator code point,
|
|
% that is a code point in General Category `Separator,line' (`Zl').
|
|
%
|
|
:- pred is_line_separator(char::in) is semidet.
|
|
|
|
% True if-and-only-if the character is a Unicode Paragraph Separator
|
|
% code point, that is a code point in General Category
|
|
% `Separator,paragraph' (`Zp').
|
|
%
|
|
:- pred is_paragraph_separator(char::in) is semidet.
|
|
|
|
% True if-and-only-if the character is a Unicode Private-use code point,
|
|
% that is a code point in General Category `Other,private use' (`Co').
|
|
%
|
|
:- pred is_private_use(char::in) is semidet.
|
|
|
|
%---------------------------------------------------------------------------%
|
|
|
|
% Convert a char to a pretty_printer.doc for formatting.
|
|
%
|
|
:- func char_to_doc(char) = pretty_printer.doc.
|
|
:- pragma obsolete(func(char_to_doc/1), [pretty_printer.char_to_doc/1]).
|
|
|
|
%---------------------------------------------------------------------------%
|
|
|
|
% The following have all been deprecated.
|
|
|
|
% Use hex_digit_to_int/2 instead.
|
|
%
|
|
:- pred is_hex_digit(char, int).
|
|
:- mode is_hex_digit(in, out) is semidet.
|
|
|
|
% Convert an integer 0-15 to a hexadecimal digit (0-9, A-F) in the ASCII
|
|
% range.
|
|
%
|
|
% Use int_to_hex_digit/2 instead.
|
|
%
|
|
:- pred int_to_hex_char(int, char).
|
|
:- mode int_to_hex_char(in, out) is semidet.
|
|
|
|
%---------------------------------------------------------------------------%
|
|
%
|
|
% Computing hashes of chars.
|
|
%
|
|
|
|
% Compute a hash value for a char.
|
|
%
|
|
:- func hash(char) = int.
|
|
:- pred hash(char::in, int::out) is det.
|
|
|
|
%---------------------------------------------------------------------------%
|
|
%---------------------------------------------------------------------------%
|
|
|
|
:- implementation.
|
|
|
|
:- interface.
|
|
|
|
% A version of is_surrogate that takes the character in its integer form.
|
|
% Exported for use by string.m.
|
|
%
|
|
:- pred char_int_is_surrogate(int::in) is semidet.
|
|
|
|
%---------------------------------------------------------------------------%
|
|
|
|
:- implementation.
|
|
|
|
:- import_module int.
|
|
:- import_module require.
|
|
:- import_module uint.
|
|
:- import_module uint16.
|
|
:- import_module uint8.
|
|
|
|
:- instance enum(character) where [
|
|
func(to_int/1) is char.to_int,
|
|
pred(from_int/2) is char.from_int
|
|
].
|
|
|
|
:- instance uenum(character) where [
|
|
func(to_uint/1) is char.to_uint,
|
|
pred(from_uint/2) is char.from_uint
|
|
].
|
|
|
|
:- pragma foreign_decl("C", "#include <limits.h>").
|
|
|
|
%---------------------------------------------------------------------------%
|
|
%
|
|
% All of
|
|
%
|
|
% - func to_int/1
|
|
% - pred to_int/2
|
|
% - pred from_int/2
|
|
% - func det_from_int/1
|
|
% - pred det_from_int/2
|
|
%
|
|
% are implemented in terms of pred to_int/2. For the *from_int operations,
|
|
% this is possible *only* because that predicate has a reverse mode as its
|
|
% usual forward mode.
|
|
%
|
|
|
|
to_int(C) = N :-
|
|
to_int(C, N).
|
|
|
|
%---------------------%
|
|
%
|
|
% The <in, out> mode of to_int.
|
|
%
|
|
|
|
:- pragma inline(pred(to_int/2)).
|
|
|
|
:- pragma foreign_proc("C",
|
|
to_int(Character::in, Int::out),
|
|
[will_not_call_mercury, promise_pure, thread_safe, will_not_modify_trail,
|
|
does_not_affect_liveness],
|
|
"
|
|
Int = (MR_UnsignedChar) Character;
|
|
").
|
|
:- pragma foreign_proc("C#",
|
|
to_int(Character::in, Int::out),
|
|
[will_not_call_mercury, promise_pure, thread_safe],
|
|
"
|
|
Int = Character;
|
|
").
|
|
:- pragma foreign_proc("Java",
|
|
to_int(Character::in, Int::out),
|
|
[will_not_call_mercury, promise_pure, thread_safe],
|
|
"
|
|
Int = Character;
|
|
").
|
|
|
|
%---------------------%
|
|
%
|
|
% The <in, in> mode of to_int.
|
|
%
|
|
|
|
:- pragma foreign_proc("C",
|
|
to_int(Character::in, Int::in),
|
|
[will_not_call_mercury, promise_pure, thread_safe, will_not_modify_trail,
|
|
does_not_affect_liveness],
|
|
"
|
|
SUCCESS_INDICATOR = ((MR_UnsignedChar) Character == Int);
|
|
").
|
|
:- pragma foreign_proc("C#",
|
|
to_int(Character::in, Int::in),
|
|
[will_not_call_mercury, promise_pure, thread_safe],
|
|
"
|
|
SUCCESS_INDICATOR = (Character == Int);
|
|
").
|
|
:- pragma foreign_proc("Java",
|
|
to_int(Character::in, Int::in),
|
|
[will_not_call_mercury, promise_pure, thread_safe],
|
|
"
|
|
SUCCESS_INDICATOR = (Character == Int);
|
|
").
|
|
|
|
%---------------------%
|
|
%
|
|
% The <out, in> mode of to_int.
|
|
%
|
|
|
|
:- pragma foreign_proc("C",
|
|
to_int(Character::out, Int::in),
|
|
[will_not_call_mercury, promise_pure, thread_safe, will_not_modify_trail,
|
|
does_not_affect_liveness],
|
|
"
|
|
Character = Int;
|
|
SUCCESS_INDICATOR = (Character >= 0 && Character <= 0x10ffff);
|
|
").
|
|
:- pragma foreign_proc("C#",
|
|
to_int(Character::out, Int::in),
|
|
[will_not_call_mercury, promise_pure, thread_safe],
|
|
"
|
|
Character = Int;
|
|
SUCCESS_INDICATOR = (Int >= 0 && Int <= 0x10ffff);
|
|
").
|
|
|
|
:- pragma foreign_proc("Java",
|
|
to_int(Character::out, Int::in),
|
|
[will_not_call_mercury, promise_pure, thread_safe],
|
|
"
|
|
Character = Int;
|
|
SUCCESS_INDICATOR = (Int >= 0 && Int <= 0x10ffff);
|
|
").
|
|
|
|
%---------------------%
|
|
|
|
from_int(Int, Char) :-
|
|
to_int(Char, Int).
|
|
|
|
%---------------------%
|
|
|
|
det_from_int(Int) = Char :-
|
|
det_from_int(Int, Char).
|
|
|
|
det_from_int(Int, Char) :-
|
|
( if char.from_int(Int, CharPrime) then
|
|
Char = CharPrime
|
|
else
|
|
unexpected($pred, "conversion failed")
|
|
).
|
|
|
|
%---------------------------------------------------------------------------%
|
|
%
|
|
% The to_uint/from_uint operations are implemented quite differently from
|
|
% their int versions. The reason for this is that while to_int has both
|
|
% a forward mode and a reverse mode, to_uint has only the forward mode.
|
|
% (By the time we added unsigned integers to the language, experience has
|
|
% taught us that more modes are not necessarily better.)
|
|
%
|
|
|
|
to_uint(Char) = UInt :-
|
|
UInt = uint.cast_from_int(char.to_int(Char)).
|
|
|
|
:- pragma inline(pred(from_uint/2)).
|
|
|
|
:- pragma foreign_proc("C",
|
|
from_uint(UInt::in, Character::out),
|
|
[will_not_call_mercury, promise_pure, thread_safe, will_not_modify_trail,
|
|
does_not_affect_liveness],
|
|
"
|
|
Character = (MR_UnsignedChar) UInt;
|
|
SUCCESS_INDICATOR = (UInt <= 0x10ffff);
|
|
").
|
|
:- pragma foreign_proc("C#",
|
|
from_uint(UInt::in, Character::out),
|
|
[will_not_call_mercury, promise_pure, thread_safe],
|
|
"
|
|
Character = (int) UInt;
|
|
SUCCESS_INDICATOR = (UInt <= 0x10ffff);
|
|
").
|
|
:- pragma foreign_proc("Java",
|
|
from_uint(UInt::in, Character::out),
|
|
[will_not_call_mercury, promise_pure, thread_safe],
|
|
"
|
|
Character = UInt;
|
|
SUCCESS_INDICATOR = ((UInt & 0xffffffffL) <= (0x10ffff & 0xffffffffL));
|
|
").
|
|
|
|
det_from_uint(UInt) = Char :-
|
|
( if char.from_uint(UInt, CharPrime) then
|
|
Char = CharPrime
|
|
else
|
|
unexpected($pred, "conversion failed")
|
|
).
|
|
|
|
%---------------------------------------------------------------------------%
|
|
|
|
min_char_value = N :-
|
|
min_char_value(N).
|
|
|
|
% We use unsigned character codes, so the minimum character code
|
|
% is always zero.
|
|
min_char_value(0).
|
|
|
|
max_char_value = N :-
|
|
max_char_value(N).
|
|
|
|
:- pragma foreign_proc("C",
|
|
max_char_value(Max::out),
|
|
[will_not_call_mercury, promise_pure, thread_safe, will_not_modify_trail,
|
|
does_not_affect_liveness],
|
|
"
|
|
Max = 0x10ffff;
|
|
").
|
|
:- pragma foreign_proc("C#",
|
|
max_char_value(Max::out),
|
|
[will_not_call_mercury, promise_pure, thread_safe],
|
|
"
|
|
Max = 0x10ffff;
|
|
").
|
|
:- pragma foreign_proc("Java",
|
|
max_char_value(Max::out),
|
|
[will_not_call_mercury, promise_pure, thread_safe],
|
|
"
|
|
Max = 0x10ffff;
|
|
").
|
|
|
|
%---------------------------------------------------------------------------%
|
|
|
|
is_lower(Lower) :-
|
|
lower_upper(Lower, _).
|
|
|
|
is_upper(Upper) :-
|
|
( if lower_upper(_, Upper) then
|
|
true
|
|
else
|
|
fail
|
|
).
|
|
|
|
to_lower(C1) = C2 :-
|
|
to_lower(C1, C2).
|
|
|
|
to_lower(Char, Lower) :-
|
|
( if lower_upper(LowerChar, Char) then
|
|
Lower = LowerChar
|
|
else
|
|
Lower = Char
|
|
).
|
|
|
|
to_upper(C1) = C2 :-
|
|
to_upper(C1, C2).
|
|
|
|
to_upper(Char, Upper) :-
|
|
( if lower_upper(Char, UpperChar) then
|
|
Upper = UpperChar
|
|
else
|
|
Upper = Char
|
|
).
|
|
|
|
lower_upper('a', 'A').
|
|
lower_upper('b', 'B').
|
|
lower_upper('c', 'C').
|
|
lower_upper('d', 'D').
|
|
lower_upper('e', 'E').
|
|
lower_upper('f', 'F').
|
|
lower_upper('g', 'G').
|
|
lower_upper('h', 'H').
|
|
lower_upper('i', 'I').
|
|
lower_upper('j', 'J').
|
|
lower_upper('k', 'K').
|
|
lower_upper('l', 'L').
|
|
lower_upper('m', 'M').
|
|
lower_upper('n', 'N').
|
|
lower_upper('o', 'O').
|
|
lower_upper('p', 'P').
|
|
lower_upper('q', 'Q').
|
|
lower_upper('r', 'R').
|
|
lower_upper('s', 'S').
|
|
lower_upper('t', 'T').
|
|
lower_upper('u', 'U').
|
|
lower_upper('v', 'V').
|
|
lower_upper('w', 'W').
|
|
lower_upper('x', 'X').
|
|
lower_upper('y', 'Y').
|
|
lower_upper('z', 'Z').
|
|
|
|
%---------------------------------------------------------------------------%
|
|
|
|
is_ascii(Char) :-
|
|
Code = char.to_int(Char),
|
|
Code >= 0x00,
|
|
Code =< 0x7f.
|
|
|
|
% The information here is duplicated in lookup_token_action in
|
|
% mercury_term_lexer.m. If you update this, you will also need to update that.
|
|
is_whitespace(' ').
|
|
is_whitespace('\t').
|
|
is_whitespace('\n').
|
|
is_whitespace('\r').
|
|
is_whitespace('\f').
|
|
is_whitespace('\v').
|
|
|
|
is_alpha(Char) :-
|
|
( if is_lower(Char) then
|
|
true
|
|
else if is_upper(Char) then
|
|
true
|
|
else
|
|
fail
|
|
).
|
|
|
|
is_alnum(Char) :-
|
|
( if is_alpha(Char) then
|
|
true
|
|
else if is_digit(Char) then
|
|
true
|
|
else
|
|
fail
|
|
).
|
|
|
|
is_alpha_or_underscore(Char) :-
|
|
( if Char = '_' then
|
|
true
|
|
else
|
|
is_alpha(Char)
|
|
).
|
|
|
|
is_alnum_or_underscore(Char) :-
|
|
% We explicitly enumerate here for efficiency.
|
|
% (The information here and in some of the following predicates,
|
|
% e.g. lower_upper, is duplicated in lookup_token_action
|
|
% in mercury_term_lexer.m.)
|
|
%
|
|
% A more concise implementation would be:
|
|
%
|
|
% ( if is_digit(Char) then
|
|
% true
|
|
% else
|
|
% is_alpha_or_underscore(Char)
|
|
% ).
|
|
|
|
( Char = '0' ; Char = '1' ; Char = '2' ; Char = '3' ; Char = '4'
|
|
; Char = '5' ; Char = '6' ; Char = '7' ; Char = '8' ; Char = '9'
|
|
; Char = 'a' ; Char = 'b' ; Char = 'c' ; Char = 'd' ; Char = 'e'
|
|
; Char = 'f' ; Char = 'g' ; Char = 'h' ; Char = 'i' ; Char = 'j'
|
|
; Char = 'k' ; Char = 'l' ; Char = 'm' ; Char = 'n' ; Char = 'o'
|
|
; Char = 'p' ; Char = 'q' ; Char = 'r' ; Char = 's' ; Char = 't'
|
|
; Char = 'u' ; Char = 'v' ; Char = 'w' ; Char = 'x' ; Char = 'y'
|
|
; Char = 'z'
|
|
; Char = 'A' ; Char = 'B' ; Char = 'C' ; Char = 'D' ; Char = 'E'
|
|
; Char = 'F' ; Char = 'G' ; Char = 'H' ; Char = 'I' ; Char = 'J'
|
|
; Char = 'K' ; Char = 'L' ; Char = 'M' ; Char = 'N' ; Char = 'O'
|
|
; Char = 'P' ; Char = 'Q' ; Char = 'R' ; Char = 'S' ; Char = 'T'
|
|
; Char = 'U' ; Char = 'V' ; Char = 'W' ; Char = 'X' ; Char = 'Y'
|
|
; Char = 'Z'
|
|
; Char = '_'
|
|
).
|
|
|
|
%---------------------------------------------------------------------------%
|
|
|
|
% Lots of big tables.
|
|
%
|
|
% It is conceivable that there are more efficient implementations,
|
|
% but these versions are very portable.
|
|
|
|
%---------------------------------------------------------------------------%
|
|
%
|
|
% Digit classification.
|
|
%
|
|
|
|
is_digit(D) :-
|
|
is_decimal_digit(D).
|
|
|
|
is_binary_digit('0').
|
|
is_binary_digit('1').
|
|
|
|
is_octal_digit('0').
|
|
is_octal_digit('1').
|
|
is_octal_digit('2').
|
|
is_octal_digit('3').
|
|
is_octal_digit('4').
|
|
is_octal_digit('5').
|
|
is_octal_digit('6').
|
|
is_octal_digit('7').
|
|
|
|
is_decimal_digit('0').
|
|
is_decimal_digit('1').
|
|
is_decimal_digit('2').
|
|
is_decimal_digit('3').
|
|
is_decimal_digit('4').
|
|
is_decimal_digit('5').
|
|
is_decimal_digit('6').
|
|
is_decimal_digit('7').
|
|
is_decimal_digit('8').
|
|
is_decimal_digit('9').
|
|
|
|
is_hex_digit('0').
|
|
is_hex_digit('1').
|
|
is_hex_digit('2').
|
|
is_hex_digit('3').
|
|
is_hex_digit('4').
|
|
is_hex_digit('5').
|
|
is_hex_digit('6').
|
|
is_hex_digit('7').
|
|
is_hex_digit('8').
|
|
is_hex_digit('9').
|
|
is_hex_digit('a').
|
|
is_hex_digit('b').
|
|
is_hex_digit('c').
|
|
is_hex_digit('d').
|
|
is_hex_digit('e').
|
|
is_hex_digit('f').
|
|
is_hex_digit('A').
|
|
is_hex_digit('B').
|
|
is_hex_digit('C').
|
|
is_hex_digit('D').
|
|
is_hex_digit('E').
|
|
is_hex_digit('F').
|
|
|
|
is_base_digit(Base, Digit) :-
|
|
( if 2 =< Base, Base =< 36 then
|
|
base_digit_to_int(Base, Digit, _Int)
|
|
else
|
|
error($pred, "invalid base")
|
|
).
|
|
|
|
%---------------------------------------------------------------------------%
|
|
%
|
|
% Digit to integer conversion.
|
|
%
|
|
|
|
binary_digit_to_int('0', 0).
|
|
binary_digit_to_int('1', 1).
|
|
|
|
det_binary_digit_to_int(Digit) = Int :-
|
|
( if binary_digit_to_int(Digit, IntPrime) then
|
|
Int = IntPrime
|
|
else
|
|
error($pred, "char.binary_digit_to_int failed")
|
|
).
|
|
|
|
octal_digit_to_int('0', 0).
|
|
octal_digit_to_int('1', 1).
|
|
octal_digit_to_int('2', 2).
|
|
octal_digit_to_int('3', 3).
|
|
octal_digit_to_int('4', 4).
|
|
octal_digit_to_int('5', 5).
|
|
octal_digit_to_int('6', 6).
|
|
octal_digit_to_int('7', 7).
|
|
|
|
det_octal_digit_to_int(Digit) = Int :-
|
|
( if octal_digit_to_int(Digit, IntPrime) then
|
|
Int = IntPrime
|
|
else
|
|
error($pred, "char.octal_digit_to_int failed")
|
|
).
|
|
|
|
decimal_digit_to_int('0', 0).
|
|
decimal_digit_to_int('1', 1).
|
|
decimal_digit_to_int('2', 2).
|
|
decimal_digit_to_int('3', 3).
|
|
decimal_digit_to_int('4', 4).
|
|
decimal_digit_to_int('5', 5).
|
|
decimal_digit_to_int('6', 6).
|
|
decimal_digit_to_int('7', 7).
|
|
decimal_digit_to_int('8', 8).
|
|
decimal_digit_to_int('9', 9).
|
|
|
|
det_decimal_digit_to_int(Digit) = Int :-
|
|
( if decimal_digit_to_int(Digit, IntPrime) then
|
|
Int = IntPrime
|
|
else
|
|
error($pred, "char.decimal_digit_to_int failed")
|
|
).
|
|
|
|
hex_digit_to_int('0', 0).
|
|
hex_digit_to_int('1', 1).
|
|
hex_digit_to_int('2', 2).
|
|
hex_digit_to_int('3', 3).
|
|
hex_digit_to_int('4', 4).
|
|
hex_digit_to_int('5', 5).
|
|
hex_digit_to_int('6', 6).
|
|
hex_digit_to_int('7', 7).
|
|
hex_digit_to_int('8', 8).
|
|
hex_digit_to_int('9', 9).
|
|
hex_digit_to_int('a', 10).
|
|
hex_digit_to_int('b', 11).
|
|
hex_digit_to_int('c', 12).
|
|
hex_digit_to_int('d', 13).
|
|
hex_digit_to_int('e', 14).
|
|
hex_digit_to_int('f', 15).
|
|
hex_digit_to_int('A', 10).
|
|
hex_digit_to_int('B', 11).
|
|
hex_digit_to_int('C', 12).
|
|
hex_digit_to_int('D', 13).
|
|
hex_digit_to_int('E', 14).
|
|
hex_digit_to_int('F', 15).
|
|
|
|
det_hex_digit_to_int(DigitStr) = Int :-
|
|
( if hex_digit_to_int(DigitStr, IntPrime) then
|
|
Int = IntPrime
|
|
else
|
|
error($pred, "char.hex_digit_to_int failed")
|
|
).
|
|
|
|
base_digit_to_int(Base, DigitStr, Int) :-
|
|
( if 1 < Base, Base < 37 then
|
|
unsafe_base_digit_to_int(Base, DigitStr, Int)
|
|
else
|
|
error($pred, "base is not in the range 2 .. 36")
|
|
).
|
|
|
|
det_base_digit_to_int(Base, DigitStr) = Int :-
|
|
( if base_digit_to_int(Base, DigitStr, IntPrime) then
|
|
Int = IntPrime
|
|
else
|
|
error($pred, "char.base_digit_to_int failed")
|
|
).
|
|
|
|
unsafe_base_digit_to_int(Base, DigitStr0, Int) :-
|
|
( if lower_upper(DigitStr0, UpperStr) then
|
|
DigitStr = UpperStr
|
|
else
|
|
DigitStr = DigitStr0
|
|
),
|
|
int_to_extended_digit(Int, DigitStr),
|
|
Int < Base.
|
|
|
|
%---------------------------------------------------------------------------%
|
|
%
|
|
% Integer to digit conversion.
|
|
%
|
|
|
|
int_to_binary_digit(0, '0').
|
|
int_to_binary_digit(1, '1').
|
|
|
|
det_int_to_binary_digit(Int) = Digit :-
|
|
( if int_to_binary_digit(Int, DigitPrime) then
|
|
Digit = DigitPrime
|
|
else
|
|
error($pred, "char.int_to_binary_digit failed")
|
|
).
|
|
|
|
int_to_octal_digit(0, '0').
|
|
int_to_octal_digit(1, '1').
|
|
int_to_octal_digit(2, '2').
|
|
int_to_octal_digit(3, '3').
|
|
int_to_octal_digit(4, '4').
|
|
int_to_octal_digit(5, '5').
|
|
int_to_octal_digit(6, '6').
|
|
int_to_octal_digit(7, '7').
|
|
|
|
det_int_to_octal_digit(Int) = Digit :-
|
|
( if int_to_octal_digit(Int, DigitPrime) then
|
|
Digit = DigitPrime
|
|
else
|
|
error($pred, "char.int_to_octal_digit failed")
|
|
).
|
|
|
|
int_to_decimal_digit(0, '0').
|
|
int_to_decimal_digit(1, '1').
|
|
int_to_decimal_digit(2, '2').
|
|
int_to_decimal_digit(3, '3').
|
|
int_to_decimal_digit(4, '4').
|
|
int_to_decimal_digit(5, '5').
|
|
int_to_decimal_digit(6, '6').
|
|
int_to_decimal_digit(7, '7').
|
|
int_to_decimal_digit(8, '8').
|
|
int_to_decimal_digit(9, '9').
|
|
|
|
det_int_to_decimal_digit(Int) = Digit :-
|
|
( if int_to_decimal_digit(Int, DigitPrime) then
|
|
Digit = DigitPrime
|
|
else
|
|
error($pred, "char.int_to_decimal_digit failed")
|
|
).
|
|
|
|
int_to_hex_digit(0, '0').
|
|
int_to_hex_digit(1, '1').
|
|
int_to_hex_digit(2, '2').
|
|
int_to_hex_digit(3, '3').
|
|
int_to_hex_digit(4, '4').
|
|
int_to_hex_digit(5, '5').
|
|
int_to_hex_digit(6, '6').
|
|
int_to_hex_digit(7, '7').
|
|
int_to_hex_digit(8, '8').
|
|
int_to_hex_digit(9, '9').
|
|
int_to_hex_digit(10, 'A').
|
|
int_to_hex_digit(11, 'B').
|
|
int_to_hex_digit(12, 'C').
|
|
int_to_hex_digit(13, 'D').
|
|
int_to_hex_digit(14, 'E').
|
|
int_to_hex_digit(15, 'F').
|
|
|
|
det_int_to_hex_digit(Int) = Digit :-
|
|
( if int_to_hex_digit(Int, DigitPrime) then
|
|
Digit = DigitPrime
|
|
else
|
|
error($pred, "char.int_to_hex_digit failed")
|
|
).
|
|
|
|
base_int_to_digit(Base, Int, Digit) :-
|
|
( if 1 < Base, Base < 37 then
|
|
Int < Base,
|
|
int_to_extended_digit(Int, Digit)
|
|
else
|
|
error($pred, "invalid base")
|
|
).
|
|
|
|
det_base_int_to_digit(Base, Int) = Digit :-
|
|
( if base_int_to_digit(Base, Int, DigitPrime) then
|
|
Digit = DigitPrime
|
|
else
|
|
error($pred, "char.base_int_to_digit failed")
|
|
).
|
|
|
|
%---------------------------------------------------------------------------%
|
|
%
|
|
% Conversion to UTF-8 code units.
|
|
%
|
|
|
|
to_utf8(Char, CodeUnits) :-
|
|
to_utf8_code_units(Char, NumCodeUnits, A, B, C, D),
|
|
(
|
|
NumCodeUnits = 1,
|
|
CodeUnits = [uint8.to_int(A)]
|
|
;
|
|
NumCodeUnits = 2,
|
|
CodeUnits = [uint8.to_int(A), uint8.to_int(B)]
|
|
;
|
|
NumCodeUnits = 3,
|
|
CodeUnits = [uint8.to_int(A), uint8.to_int(B), uint8.to_int(C)]
|
|
;
|
|
NumCodeUnits = 4,
|
|
CodeUnits = [uint8.to_int(A), uint8.to_int(B),
|
|
uint8.to_int(C), uint8.to_int(D)]
|
|
).
|
|
|
|
to_utf8_uint8(Char, CodeUnits) :-
|
|
to_utf8_code_units(Char, NumCodeUnits, A, B, C, D),
|
|
(
|
|
NumCodeUnits = 1,
|
|
CodeUnits = [A]
|
|
;
|
|
NumCodeUnits = 2,
|
|
CodeUnits = [A, B]
|
|
;
|
|
NumCodeUnits = 3,
|
|
CodeUnits = [A, B, C]
|
|
;
|
|
NumCodeUnits = 4,
|
|
CodeUnits = [A, B, C, D]
|
|
).
|
|
|
|
:- pred to_utf8_code_units(char::in, int::out(bound(1 ; 2 ; 3 ; 4)),
|
|
uint8::out, uint8::out, uint8::out, uint8::out) is semidet.
|
|
|
|
to_utf8_code_units(Char, NumCodeUnits, A, B, C, D) :-
|
|
Int = char.to_int(Char),
|
|
( if Int =< 0x7f then
|
|
A = uint8.cast_from_int(Int),
|
|
B = 0u8,
|
|
C = 0u8,
|
|
D = 0u8,
|
|
NumCodeUnits = 1
|
|
else if Int =< 0x7ff then
|
|
A = uint8.cast_from_int(0xc0 \/ ((Int >> 6) /\ 0x1f)),
|
|
B = uint8.cast_from_int(0x80 \/ (Int /\ 0x3f)),
|
|
C = 0u8,
|
|
D = 0u8,
|
|
NumCodeUnits = 2
|
|
else if Int =< 0xffff then
|
|
not char_int_is_surrogate(Int),
|
|
A = uint8.cast_from_int(0xe0 \/ ((Int >> 12) /\ 0x0f)),
|
|
B = uint8.cast_from_int(0x80 \/ ((Int >> 6) /\ 0x3f)),
|
|
C = uint8.cast_from_int(0x80 \/ (Int /\ 0x3f)),
|
|
D = 0u8,
|
|
NumCodeUnits = 3
|
|
else if Int =< 0x10ffff then
|
|
A = uint8.cast_from_int(0xf0 \/ ((Int >> 18) /\ 0x07)),
|
|
B = uint8.cast_from_int(0x80 \/ ((Int >> 12) /\ 0x3f)),
|
|
C = uint8.cast_from_int(0x80 \/ ((Int >> 6) /\ 0x3f)),
|
|
D = uint8.cast_from_int(0x80 \/ (Int /\ 0x3f)),
|
|
NumCodeUnits = 4
|
|
else
|
|
error($pred, "illegal code point")
|
|
).
|
|
|
|
%---------------------------------------------------------------------------%
|
|
%
|
|
% Conversion to UTF-16 code units.
|
|
%
|
|
|
|
to_utf16(Char, CodeUnits) :-
|
|
to_utf16_code_units(Char, NumCodeUnits, A, B),
|
|
(
|
|
NumCodeUnits = 1,
|
|
CodeUnits = [uint16.to_int(A)]
|
|
;
|
|
NumCodeUnits = 2,
|
|
CodeUnits = [uint16.to_int(A), uint16.to_int(B)]
|
|
).
|
|
|
|
to_utf16_uint16(Char, CodeUnits) :-
|
|
to_utf16_code_units(Char, NumCodeUnits, A, B),
|
|
(
|
|
NumCodeUnits = 1,
|
|
CodeUnits = [A]
|
|
;
|
|
NumCodeUnits = 2,
|
|
CodeUnits = [A, B]
|
|
).
|
|
|
|
:- pred to_utf16_code_units(char::in, int::out(bound(1 ; 2)),
|
|
uint16::out, uint16::out) is semidet.
|
|
|
|
to_utf16_code_units(Char, NumCodeUnits, A, B) :-
|
|
Int = char.to_int(Char),
|
|
( if Int < 0xd800 then
|
|
% Common case.
|
|
A = uint16.cast_from_int(Int),
|
|
B = 0u16,
|
|
NumCodeUnits = 1
|
|
else if Int =< 0xdfff then
|
|
% Surrogate.
|
|
fail
|
|
else if Int =< 0xffff then
|
|
A = uint16.cast_from_int(Int),
|
|
B = 0u16,
|
|
NumCodeUnits = 1
|
|
else if Int =< 0x10ffff then
|
|
U = Int - 0x10000,
|
|
A = uint16.cast_from_int(0xd800 \/ (U >> 10)),
|
|
B = uint16.cast_from_int(0xdc00 \/ (U /\ 0x3ff)),
|
|
NumCodeUnits = 2
|
|
else
|
|
error($pred, "illegal code point")
|
|
).
|
|
|
|
%---------------------------------------------------------------------------%
|
|
|
|
is_surrogate(Char) :-
|
|
Int = char.to_int(Char),
|
|
char_int_is_surrogate(Int).
|
|
|
|
is_leading_surrogate(Char) :-
|
|
Int = char.to_int(Char),
|
|
Int >= 0xd800,
|
|
Int =< 0xdbff.
|
|
|
|
is_trailing_surrogate(Char) :-
|
|
Int = char.to_int(Char),
|
|
Int >= 0xdc00,
|
|
Int =< 0xdfff.
|
|
|
|
is_noncharacter(Char) :-
|
|
Int = char.to_int(Char),
|
|
( 0xfdd0 =< Int, Int =< 0xfdef
|
|
; Int /\ 0xfffe = 0xfffe
|
|
).
|
|
|
|
is_control(Char) :-
|
|
Int = char.to_int(Char),
|
|
( 0x0000 =< Int, Int =< 0x001f
|
|
; 0x007f =< Int, Int =< 0x009f
|
|
).
|
|
|
|
is_space_separator(Char) :-
|
|
Int = char.to_int(Char),
|
|
( Int = 0x0020
|
|
; Int = 0x00a0
|
|
; Int = 0x1680
|
|
; 0x2000 =< Int, Int =< 0x200a
|
|
; Int = 0x202f
|
|
; Int = 0x205f
|
|
; Int = 0x3000
|
|
).
|
|
|
|
is_line_separator(Char) :-
|
|
0x2028 = char.to_int(Char).
|
|
|
|
is_paragraph_separator(Char) :-
|
|
0x2029 = char.to_int(Char).
|
|
|
|
is_private_use(Char) :-
|
|
Int = char.to_int(Char),
|
|
( 0xe000 =< Int, Int =< 0xf8ff % Private Use Area.
|
|
; 0xf0000 =< Int, Int =< 0xffffd % Supplemental Private Use Area-A.
|
|
; 0x100000 =< Int, Int =< 0x10fffd % Supplemental Private Use Area-B.
|
|
).
|
|
|
|
char_to_doc(C) = pretty_printer.char_to_doc(C).
|
|
|
|
%---------------------------------------------------------------------------%
|
|
|
|
is_hex_digit(Digit, Int) :-
|
|
hex_digit_to_int(Digit, Int).
|
|
|
|
int_to_hex_char(Int, Char) :-
|
|
int_to_hex_digit(Int, Char).
|
|
|
|
%---------------------------------------------------------------------------%
|
|
|
|
:- pred int_to_extended_digit(int, char).
|
|
:- mode int_to_extended_digit(in, out) is semidet.
|
|
:- mode int_to_extended_digit(out, in) is semidet.
|
|
|
|
int_to_extended_digit(0, '0').
|
|
int_to_extended_digit(1, '1').
|
|
int_to_extended_digit(2, '2').
|
|
int_to_extended_digit(3, '3').
|
|
int_to_extended_digit(4, '4').
|
|
int_to_extended_digit(5, '5').
|
|
int_to_extended_digit(6, '6').
|
|
int_to_extended_digit(7, '7').
|
|
int_to_extended_digit(8, '8').
|
|
int_to_extended_digit(9, '9').
|
|
int_to_extended_digit(10, 'A').
|
|
int_to_extended_digit(11, 'B').
|
|
int_to_extended_digit(12, 'C').
|
|
int_to_extended_digit(13, 'D').
|
|
int_to_extended_digit(14, 'E').
|
|
int_to_extended_digit(15, 'F').
|
|
int_to_extended_digit(16, 'G').
|
|
int_to_extended_digit(17, 'H').
|
|
int_to_extended_digit(18, 'I').
|
|
int_to_extended_digit(19, 'J').
|
|
int_to_extended_digit(20, 'K').
|
|
int_to_extended_digit(21, 'L').
|
|
int_to_extended_digit(22, 'M').
|
|
int_to_extended_digit(23, 'N').
|
|
int_to_extended_digit(24, 'O').
|
|
int_to_extended_digit(25, 'P').
|
|
int_to_extended_digit(26, 'Q').
|
|
int_to_extended_digit(27, 'R').
|
|
int_to_extended_digit(28, 'S').
|
|
int_to_extended_digit(29, 'T').
|
|
int_to_extended_digit(30, 'U').
|
|
int_to_extended_digit(31, 'V').
|
|
int_to_extended_digit(32, 'W').
|
|
int_to_extended_digit(33, 'X').
|
|
int_to_extended_digit(34, 'Y').
|
|
int_to_extended_digit(35, 'Z').
|
|
|
|
%---------------------------------------------------------------------------%
|
|
|
|
hash(C) = H :-
|
|
uint.hash(uint.cast_from_int(char.to_int(C)), H).
|
|
|
|
hash(C, H) :-
|
|
H = hash(C).
|
|
|
|
%---------------------------------------------------------------------------%
|
|
|
|
char_int_is_surrogate(Int) :-
|
|
% This code is sort-of duplicated, in C, in runtime/mercury_string.h,
|
|
% in the macro MR_is_surrogate.
|
|
Int >= 0xd800,
|
|
Int =< 0xdfff.
|
|
|
|
%---------------------------------------------------------------------------%
|
|
:- end_module char.
|
|
%---------------------------------------------------------------------------%
|