mirror of
https://github.com/Mercury-Language/mercury.git
synced 2026-04-28 15:54:18 +00:00
Branches: main Deprecate string.substring, string.foldl_substring, etc. in favour of new procedures named string.between, string.foldl_between, etc. The "between" procedures take a pair of [Start, End) endpoints instead of Start, Count arguments. The reasons for this change are: - the "between" procedures are more convenient - "between" should be unambiguous. You can guess that it takes an End argument instead of a Count argument without looking up the manual. - Count arguments necessarily counted code units, but when working with non-ASCII strings, almost the only way that the values would be arrived at is by substracting one end point from another. - it paves the way for a potential change to replace string offsets with an abstract type. We cannot do that if users regularly have to perform a subtraction between two offsets. library/string.m: Add string.foldl_between, string.foldl2_between, string.foldr_between, string.between. Deprecate the old substring names. Replace string.substring_by_codepoint by string.between_codepoints. compiler/elds_to_erlang.m: compiler/error_util.m: compiler/rbmm.live_variable_analysis.m: compiler/timestamp.m: extras/posix/samples/mdprof_cgid.m: library/bitmap.m: library/integer.m: library/lexer.m: library/parsing_utils.m: mdbcomp/trace_counts.m: profiler/demangle.m: Conform to changes. tests/general/Mercury.options: tests/general/string_foldl_substring.exp: tests/general/string_foldl_substring.m: tests/general/string_foldr_substring.exp: tests/general/string_foldr_substring.m: tests/hard_coded/Mercury.options: tests/hard_coded/string_substring.m: Test both between and substring procedures. tests/hard_coded/string_codepoint.exp: tests/hard_coded/string_codepoint.exp2: tests/hard_coded/string_codepoint.m: Update names in test outputs. NEWS: Announce the change.
91 lines
2.0 KiB
Plaintext
91 lines
2.0 KiB
Plaintext
count_code_units:
|
|
7
|
|
|
|
count_codepoints:
|
|
6
|
|
|
|
codepoint_offset:
|
|
string.codepoint_offset(Str, 0, 0)
|
|
string.codepoint_offset(Str, 0, 1, 1)
|
|
string.index(Str, 0, 'a')
|
|
string.codepoint_offset(Str, 1, 1)
|
|
string.codepoint_offset(Str, 1, 1, 2)
|
|
string.index(Str, 1, 'ß')
|
|
string.codepoint_offset(Str, 2, 2)
|
|
string.codepoint_offset(Str, 2, 1, 3)
|
|
string.index(Str, 2, 'ξ')
|
|
string.codepoint_offset(Str, 3, 3)
|
|
string.codepoint_offset(Str, 3, 1, 4)
|
|
string.index(Str, 3, '啕')
|
|
string.codepoint_offset(Str, 4, 4)
|
|
string.codepoint_offset(Str, 4, 1, 6)
|
|
string.index(Str, 4, '𐀀')
|
|
string.codepoint_offset(Str, 5, 6)
|
|
string.codepoint_offset(Str, 6, 1, _) failed
|
|
string.index(Str, 6, '.')
|
|
string.codepoint_offset(Str, 6, _) failed
|
|
|
|
to_char_list:
|
|
['a', 'ß', 'ξ', '啕', '𐀀', '.']
|
|
|
|
from_char_list:
|
|
aßξ啕𐀀.
|
|
|
|
from_rev_char_list:
|
|
.𐀀啕ξßa
|
|
|
|
to_code_unit_list:
|
|
[97, 223, 958, 21845, 55296, 56320, 46]
|
|
|
|
from_code_unit_list:
|
|
aßξ啕𐀀.
|
|
|
|
index_next:
|
|
index_next(Str, 0, 1, 'a')
|
|
index_next(Str, 1, 2, 'ß')
|
|
index_next(Str, 2, 3, 'ξ')
|
|
index_next(Str, 3, 4, '啕')
|
|
index_next(Str, 4, 6, '𐀀')
|
|
index_next(Str, 6, 7, '.')
|
|
end
|
|
|
|
index_next(-1):
|
|
end
|
|
|
|
unsafe_index_next:
|
|
unsafe_index_next(Str, 0, 1, 'a')
|
|
unsafe_index_next(Str, 1, 2, 'ß')
|
|
unsafe_index_next(Str, 2, 3, 'ξ')
|
|
unsafe_index_next(Str, 3, 4, '啕')
|
|
unsafe_index_next(Str, 4, 6, '𐀀')
|
|
unsafe_index_next(Str, 6, 7, '.')
|
|
end
|
|
|
|
unsafe_prev_index:
|
|
unsafe_prev_index(Str, 7, 6, '.')
|
|
unsafe_prev_index(Str, 6, 4, '𐀀')
|
|
unsafe_prev_index(Str, 4, 3, '啕')
|
|
unsafe_prev_index(Str, 3, 2, 'ξ')
|
|
unsafe_prev_index(Str, 2, 1, 'ß')
|
|
unsafe_prev_index(Str, 1, 0, 'a')
|
|
end
|
|
|
|
split_by_codepoint:
|
|
split_by_codepoint(Str, -1, "", "aßξ啕𐀀.")
|
|
split_by_codepoint(Str, 0, "", "aßξ啕𐀀.")
|
|
split_by_codepoint(Str, 1, "a", "ßξ啕𐀀.")
|
|
split_by_codepoint(Str, 2, "aß", "ξ啕𐀀.")
|
|
split_by_codepoint(Str, 3, "aßξ", "啕𐀀.")
|
|
split_by_codepoint(Str, 4, "aßξ啕", "𐀀.")
|
|
split_by_codepoint(Str, 5, "aßξ啕𐀀", ".")
|
|
split_by_codepoint(Str, 6, "aßξ啕𐀀.", "")
|
|
|
|
left_by_codepoint:
|
|
aßξ
|
|
|
|
right_by_codepoint:
|
|
啕𐀀.
|
|
|
|
between_codepoints:
|
|
ξ啕
|