Files
mercury/compiler/builtin_ops.m
2025-11-17 09:19:13 +11:00

628 lines
24 KiB
Mathematica

%-----------------------------------------------------------------------------%
% vim: ft=mercury ts=4 sw=4 et
%-----------------------------------------------------------------------------%
% Copyright (C) 1999-2001, 2003-2006, 2009-2011 The University of Melbourne.
% Copyright (C) 2014-2018, 2020-2025 The Mercury team.
% This file may only be copied under the terms of the GNU General
% Public License - see the file COPYING in the Mercury distribution.
%-----------------------------------------------------------------------------%
%
% File: builtin_ops.m -- defines the builtin operator types.
% Main author: fjh.
%
% This module defines various types which enumerate the different builtin
% operators. The different back-ends -- the LLDS and and MLDS backends,
% and others when we had them -- all use the same set of builtin operators.
% These operators are defined here.
%
%-----------------------------------------------------------------------------%
:- module backend_libs.builtin_ops.
:- interface.
:- import_module hlds.
:- import_module hlds.hlds_pred.
:- import_module mdbcomp.
:- import_module mdbcomp.sym_name.
:- import_module parse_tree.
:- import_module parse_tree.prog_data.
:- import_module list.
%-----------------------------------------------------------------------------%
:- type unary_op
---> tag
; strip_tag
; mkbody
; unmkbody
; bitwise_complement(int_type)
; logical_not
; hash_string
; hash_string2
; hash_string3
; hash_string4
; hash_string5
; hash_string6
; dword_float_get_word0
; dword_float_get_word1
; dword_int64_get_word0
; dword_int64_get_word1
; dword_uint64_get_word0
; dword_uint64_get_word1.
:- type shift_by_type
---> shift_by_int
; shift_by_uint.
:- type maybe_size
---> no_size
; size(int).
% The int_cmp operations eq and ne are used not just on integers,
% but also on characters and enumerations.
% XXX These two uses are not covered by int_type, but for now we use
% the convention that they should use `int_type_int'.
% XXX Which is a historical artifact; we *should* be using int_type_uint
% for them instead.
:- type cmp_op
---> eq
; ne
; lt
; le
; gt
; ge.
:- type int_as_uint_cmp_op =< cmp_op
---> lt
; le.
% NOTE The ao_ prefix on these function symbols is there because
% "div" and "rem" are operators.
:- type arith_op
---> ao_add
; ao_sub
; ao_mul
; ao_div % Assumed to truncate toward zero.
; ao_rem. % Remainder with respect to truncating integer division.
% The remainder operation does not make any sense for floats.
:- type float_arith_op =< arith_op
---> ao_add
; ao_sub
; ao_mul
; ao_div.
:- type binary_op
---> int_arith(int_type, arith_op)
% For shifts, the first argument specifies the type of
% the value being shifted, while the second specifies
% the type of the shift amount.
; unchecked_left_shift(int_type, shift_by_type)
; unchecked_right_shift(int_type, shift_by_type)
; bitwise_and(int_type)
; bitwise_or(int_type)
; bitwise_xor(int_type)
; logical_and
; logical_or
; int_cmp(int_type, cmp_op)
; body
; array_index(array_elem_type)
% The element type does not seem to be used. It could probably
% be deleted, but it seems wise to not to delete the code
% that currently fills in this slot in case some backend ever
% *does* start needing to know the element type.
; string_unsafe_index_code_unit
; str_cmp(cmp_op)
; str_nzp % returns negative, zero or positive
; offset_str_eq(int, maybe_size)
% offset_str_eq(Offset, MaybeSize)
%
% This op will do "strcmp(StrA+Offset, StrB+Offset) == 0" or
% "strncmp(StrA+Offset, StrB+Offset, Size) == 0" depending
% on whether MaybeSize is "no_size" or "size(Size)".
%
% This op is not recognized in user-written code; it is only
% generated by the compiler when implementing string switches
% via tries.
%
% We use binop(offset_str_eq(Offset, no_size), StrA, StrB) at
% leaf trie nodes. One string is the string-so-far represented
% by the trie node, the other is the string being switched on.
% We use this version of this binary_op to test whether the
% rest of the switched-on string matches what we expect.
%
% We use binop(offset_str_eq(Offset, size(Size)), StrA, StrB)
% at non-leaf trie nodes where all the alternatives, besides
% starting with the same Offset code units that identify
% the trie node, also have the next Size code units in common
% as well, though they do have differences at the next code unit
% after *that*.
%
% For both our use cases, the initial Offset code units of StrA
% and StrB will be identical. This means that on backends which
% cannot start the comparison at a specified code unit offset,
% we *could* redundantly compare these as well, comparing either
% "strcmp(StrA, StrB)" or "strncmp(StrA, StrB, Offset+Size)"
% against zero. However, we don't do that; instead, we simply
% avoid generating this operation.
; int_as_uint_cmp(int_as_uint_cmp_op)
% The arguments to these ops are just ordinary (signed)
% Mercury ints, but the comparison is done *after* casting both
% arguments to the uint type. This means that e.g. the expression
% binary(int_as_uint_cmp_op(le), int_const(1), int_const(-1))
% returns true, since (MR_Unsigned) 1 <= (MR_Unsigned) -1.
; in_range
% Tests for "0 =< Index, Index < Range". On Java, that is its
% implementation, while for C and C#, it is implemented the same
% as int_as_uint_cmp_op(lt). The reason for the difference is
% that in C and C#, the int->uint cast is free, while in Java
% it requires masking both operands. That makes it unclear
% which approach to range tests is faster in Java. The only way
% to decide is to benchmark both approaches, which requires
% both to be implemented.
; float_arith(float_arith_op)
; float_cmp(cmp_op)
% Note that we do not have primitive operations in library/float.m
% for comparing floats for equality and inequality, since the
% approximate nature of floats makes such operations "iffy".
% However, it is possible to unify float variables, and it is
% of course possible to negate such goals, so we need both
% the eq and ne cmp_ops for floats as well as for other types.
; float_from_dword
; int64_from_dword
; uint64_from_dword
; pointer_equal_conservative.
:- inst int_misc_binary_op for binary_op/0
---> unchecked_left_shift(ground, ground)
; unchecked_right_shift(ground, ground)
; bitwise_and(ground)
; bitwise_or(ground)
; bitwise_xor(ground).
% For the MLDS back-end, we need to know the element type for each
% array_index operation.
%
% Currently array index operations are only generated in limited
% circumstances. Using a simple representation for them here,
% rather than just putting the MLDS type here, avoids the need
% for this module to depend on back-end specific stuff like MLDS types.
:- type array_elem_type
---> array_elem_scalar(scalar_array_elem_type)
; array_elem_struct(list(scalar_array_elem_type)).
:- type scalar_array_elem_type
---> scalar_elem_string % ml_string_type
; scalar_elem_int % mlds_native_int_type
; scalar_elem_generic. % mlds_generic_type
:- func negate_cmp_op(cmp_op) = cmp_op.
:- func dump_arith_op(arith_op) = string.
:- func dump_cmp_op(cmp_op) = string.
:- func arith_op_c_operator(arith_op) = string.
:- func cmp_op_c_operator(cmp_op) = string.
% test_if_builtin(ModuleName, PredName, PredFormArity):
%
% Given the identity of a predicate, or a function, in the form of
%
% - the module in which it is defined,
% - its name, and
% - its pred form arity, i.e. the number of its argument including
% any function result argument,
%
% succeed iff that predicate or function is an inline builtin.
%
% Note that we don't have to know whether the entity being asked about
% is a predicate or a function. This is because of all of our inline
% builtin operations are defined in a few modules of the standard library,
% and we main an invariant in these modules. This states that
%
% - given a builtin predicate Module.Name/Arity, either
% there is no corresponding function Module.Name/Arity-1,
% or there is, but its semantics is exactly the same as the predicate's,
% and
%
% - given a builtin function Module.Name/Arity, either
% there is no corresponding predicate Module.Name/Arity+1,
% or there is, but its semantics is exactly the same as the function's.
%
:- pred test_if_builtin(module_name::in, string::in, int::in) is semidet.
% translate_builtin(ModuleName, PredName, ProcId, Args, Code):
%
% This predicate should be invoked only on predicates and functions
% for which test_if_builtin has succeeded.
%
% In such cases, it returns an abstract representation of the code
% that can be used to evaluate a call to the predicate or function
% with the given arguments, which will be either an assignment or a noop
% (if the builtin is det) or a test (if the builtin is semidet).
%
:- pred translate_builtin(module_name::in, string::in, proc_id::in,
list(T)::in, simple_code(T)::out) is det.
:- type simple_code(T)
---> assign(T, simple_assigned_expr(T))
; ref_assign(T, T)
; test(simple_test_expr(T))
; noop(list(T)).
% Note that assign_const is not used for any builtins, but it *is* used
% in call_gen.m to implement casts involving dummy types. (The code that
% implements casts reuses the machinery for implementing builtins.)
%
% Note: _lc means "left arg is a constant".
:- type simple_assigned_expr(T)
---> assign_copy(T)
; assign_const(simple_const)
; assign_binary(binary_op, T, T)
; assign_binary_lc(binary_op, simple_const, T)
; assign_unary(unary_op, T).
:- type simple_const
---> int_const(int)
; uint_const(uint)
; int8_const(int8)
; uint8_const(uint8)
; int16_const(int16)
; uint16_const(uint16)
; int32_const(int32)
; uint32_const(uint32)
; int64_const(int64)
; uint64_const(uint64)
; float_const(float).
:- type simple_test_expr(T)
---> binary_test(binary_op, T, T).
%-----------------------------------------------------------------------------%
%-----------------------------------------------------------------------------%
:- implementation.
:- import_module mdbcomp.builtin_modules.
:- import_module require.
:- import_module string.
%-----------------------------------------------------------------------------%
negate_cmp_op(eq) = ne.
negate_cmp_op(ne) = eq.
negate_cmp_op(lt) = ge.
negate_cmp_op(le) = gt.
negate_cmp_op(gt) = le.
negate_cmp_op(ge) = lt.
dump_arith_op(ao_add) = "add".
dump_arith_op(ao_sub) = "sub".
dump_arith_op(ao_mul) = "mul".
dump_arith_op(ao_div) = "div".
dump_arith_op(ao_rem) = "rem".
dump_cmp_op(eq) = "eq".
dump_cmp_op(ne) = "ne".
dump_cmp_op(lt) = "lt".
dump_cmp_op(le) = "le".
dump_cmp_op(gt) = "gt".
dump_cmp_op(ge) = "ge".
arith_op_c_operator(ao_add) = "+".
arith_op_c_operator(ao_sub) = "-".
arith_op_c_operator(ao_mul) = "*".
arith_op_c_operator(ao_div) = "/".
arith_op_c_operator(ao_rem) = "%".
cmp_op_c_operator(eq) = "==".
cmp_op_c_operator(ne) = "!=".
cmp_op_c_operator(lt) = "<".
cmp_op_c_operator(le) = "<=".
cmp_op_c_operator(gt) = ">".
cmp_op_c_operator(ge) = ">=".
%-----------------------------------------------------------------------------%
test_if_builtin(FullyQualifiedModule, PredName, Arity) :-
is_std_lib_module_name(FullyQualifiedModule, ModuleName),
% The value of the ProcNum argument does not influence the test
% of whether this predicate or function is a builtin; it influences
% on the generated code, which we are ignore.
% Likewise for the values of the elements in Args (as opposed to
% the *number* of arguments, which *does* influence success/failure).
ProcNum = 0,
list.duplicate(Arity, 0, Args),
builtin_translation(ModuleName, PredName, ProcNum, Args, _Code).
translate_builtin(FullyQualifiedModule, PredName, ProcId, Args, Code) :-
( if
is_std_lib_module_name(FullyQualifiedModule, ModuleName),
proc_id_to_int(ProcId, ProcNum),
builtin_translation(ModuleName, PredName, ProcNum, Args, CodePrime)
then
Code = CodePrime
else
list.length(Args, Arity),
string.format("unknown builtin %s/%d", [s(PredName), i(Arity)], Msg),
unexpected($pred, Msg)
).
:- pred builtin_translation(string::in, string::in, int::in, list(T)::in,
simple_code(T)::out) is semidet.
:- pragma inline(pred(builtin_translation/5)).
builtin_translation(ModuleName, PredName, ProcNum, Args, Code) :-
(
ModuleName = "builtin",
PredName = "unsafe_promise_unique", ProcNum = 0, Args = [X, Y],
Code = assign(Y, assign_copy(X))
;
ModuleName = "io",
(
PredName = "unsafe_get_io_state", ProcNum = 0, Args = [X],
Code = noop([X])
;
PredName = "unsafe_set_io_state", ProcNum = 0, Args = [_X],
Code = noop([])
)
;
ModuleName = "private_builtin",
builtin_translation_private_builtin(PredName, ProcNum, Args, Code)
;
ModuleName = "term_size_prof_builtin",
PredName = "term_size_plus", ProcNum = 0, Args = [X, Y, Z],
Code = assign(Z, assign_binary(int_arith(int_type_int, ao_add), X, Y))
;
( ModuleName = "int", IT = int_type_int
; ModuleName = "int8", IT = int_type_int8
; ModuleName = "int16", IT = int_type_int16
; ModuleName = "int32", IT = int_type_int32
; ModuleName = "int64", IT = int_type_int64
; ModuleName = "uint", IT = int_type_uint
; ModuleName = "uint8", IT = int_type_uint8
; ModuleName = "uint16", IT = int_type_uint16
; ModuleName = "uint32", IT = int_type_uint32
; ModuleName = "uint64", IT = int_type_uint64
),
builtin_translation_int(IT, PredName, ProcNum, Args, Code)
;
ModuleName = "float",
builtin_translation_float(PredName, ProcNum, Args, Code)
).
:- pred builtin_translation_private_builtin(string::in, int::in, list(T)::in,
simple_code(T)::out) is semidet.
builtin_translation_private_builtin(PredName, ProcNum, Args, Code) :-
(
PredName = "trace_get_io_state", ProcNum = 0, Args = [X],
Code = noop([X])
;
PredName = "trace_set_io_state", ProcNum = 0, Args = [_X],
Code = noop([])
;
PredName = "store_at_ref_impure",
ProcNum = 0, Args = [X, Y],
Code = ref_assign(X, Y)
;
PredName = "unsafe_type_cast", ProcNum = 0, Args = [X, Y],
% Note that the code we generate for unsafe_type_cast
% is not type-correct. Back-ends that require type-correct
% intermediate code (e.g. the MLDS back-end) must handle
% unsafe_type_cast separately, rather than by calling
% builtin_translation.
Code = assign(Y, assign_copy(X))
;
( PredName = "builtin_int_gt", Type = int_type_int, Cmp = gt
; PredName = "builtin_int_lt", Type = int_type_int, Cmp = lt
; PredName = "builtin_int8_gt", Type = int_type_int8, Cmp = gt
; PredName = "builtin_int8_lt", Type = int_type_int8, Cmp = lt
; PredName = "builtin_int16_gt", Type = int_type_int16, Cmp = gt
; PredName = "builtin_int16_lt", Type = int_type_int16, Cmp = lt
; PredName = "builtin_int32_gt", Type = int_type_int32, Cmp = gt
; PredName = "builtin_int32_lt", Type = int_type_int32, Cmp = lt
; PredName = "builtin_int64_gt", Type = int_type_int64, Cmp = gt
; PredName = "builtin_int64_lt", Type = int_type_int64, Cmp = lt
; PredName = "builtin_uint_gt", Type = int_type_uint, Cmp = gt
; PredName = "builtin_uint_lt", Type = int_type_uint, Cmp = lt
; PredName = "builtin_uint8_gt", Type = int_type_uint8, Cmp = gt
; PredName = "builtin_uint8_lt", Type = int_type_uint8, Cmp = lt
; PredName = "builtin_uint16_gt", Type = int_type_uint16, Cmp = gt
; PredName = "builtin_uint16_lt", Type = int_type_uint16, Cmp = lt
; PredName = "builtin_uint32_gt", Type = int_type_uint32, Cmp = gt
; PredName = "builtin_uint32_lt", Type = int_type_uint32, Cmp = lt
; PredName = "builtin_uint64_gt", Type = int_type_uint64, Cmp = gt
; PredName = "builtin_uint64_lt", Type = int_type_uint64, Cmp = lt
),
CmpOp = int_cmp(Type, Cmp),
ProcNum = 0, Args = [X, Y],
Code = test(binary_test(CmpOp, X, Y))
;
( PredName = "unsigned_lt", CmpOp = int_as_uint_cmp(lt)
; PredName = "unsigned_le", CmpOp = int_as_uint_cmp(le)
; PredName = "in_range", CmpOp = in_range
),
ProcNum = 0, Args = [X, Y],
Code = test(binary_test(CmpOp, X, Y))
;
PredName = "pointer_equal", ProcNum = 0,
% The arity of this predicate is two during parsing,
% and three after the polymorphism pass.
( Args = [X, Y]
; Args = [_TypeInfo, X, Y]
),
Code = test(binary_test(pointer_equal_conservative, X, Y))
;
PredName = "partial_inst_copy", ProcNum = 0, Args = [X, Y],
Code = assign(Y, assign_copy(X))
).
:- pred builtin_translation_int(int_type::in, string::in,
int::in, list(T)::in, simple_code(T)::out) is semidet.
builtin_translation_int(IT, PredName, ProcNum, Args, Code) :-
(
PredName = "+",
(
Args = [X, Y, Z],
(
ProcNum = 0,
Code = assign(Z,
assign_binary(int_arith(IT, ao_add), X, Y))
;
ProcNum = 1,
Code = assign(X,
assign_binary(int_arith(IT, ao_sub), Z, Y))
;
ProcNum = 2,
Code = assign(Y,
assign_binary(int_arith(IT, ao_sub), Z, X))
)
;
Args = [X, Y],
ProcNum = 0,
Code = assign(Y, assign_copy(X))
)
;
PredName = "-",
(
Args = [X, Y, Z],
(
ProcNum = 0,
Code = assign(Z,
assign_binary(int_arith(IT, ao_sub), X, Y))
;
ProcNum = 1,
Code = assign(X,
assign_binary(int_arith(IT, ao_add), Y, Z))
;
ProcNum = 2,
Code = assign(Y,
assign_binary(int_arith(IT, ao_sub), X, Z))
)
;
Args = [X, Y],
ProcNum = 0,
IntZeroConst = make_int_zero_const(IT),
Code = assign(Y,
assign_binary_lc(int_arith(IT, ao_sub), IntZeroConst, X))
)
;
PredName = "xor", Args = [X, Y, Z],
(
ProcNum = 0,
Code = assign(Z, assign_binary(bitwise_xor(IT), X, Y))
;
ProcNum = 1,
Code = assign(Y, assign_binary(bitwise_xor(IT), X, Z))
;
ProcNum = 2,
Code = assign(X, assign_binary(bitwise_xor(IT), Y, Z))
)
;
( PredName = "plus", ArithOp = ao_add
; PredName = "minus", ArithOp = ao_sub
; PredName = "*", ArithOp = ao_mul
; PredName = "times", ArithOp = ao_mul
; PredName = "unchecked_quotient", ArithOp = ao_div
; PredName = "unchecked_rem", ArithOp = ao_rem
),
ProcNum = 0, Args = [X, Y, Z],
Code = assign(Z, assign_binary(int_arith(IT, ArithOp), X, Y))
;
( PredName = "unchecked_left_shift",
ArithOp = unchecked_left_shift(IT, shift_by_int)
; PredName = "unchecked_left_ushift",
ArithOp = unchecked_left_shift(IT, shift_by_uint)
; PredName = "unchecked_right_shift",
ArithOp = unchecked_right_shift(IT, shift_by_int)
; PredName = "unchecked_right_ushift",
ArithOp = unchecked_right_shift(IT, shift_by_uint)
; PredName = "/\\", ArithOp = bitwise_and(IT)
; PredName = "\\/", ArithOp = bitwise_or(IT)
),
ProcNum = 0, Args = [X, Y, Z],
Code = assign(Z, assign_binary(ArithOp, X, Y))
;
PredName = "\\", ProcNum = 0, Args = [X, Y],
Code = assign(Y, assign_unary(bitwise_complement(IT), X))
;
( PredName = ">", CmpOp = gt
; PredName = "<", CmpOp = lt
; PredName = ">=", CmpOp = ge
; PredName = "=<", CmpOp = le
),
ProcNum = 0, Args = [X, Y],
Code = test(binary_test(int_cmp(IT ,CmpOp), X, Y))
).
:- pred builtin_translation_float(string::in, int::in, list(T)::in,
simple_code(T)::out) is semidet.
builtin_translation_float(PredName, ProcNum, Args, Code) :-
(
PredName = "+",
(
Args = [X, Y],
ProcNum = 0,
Code = assign(Y, assign_copy(X))
;
Args = [X, Y, Z],
ProcNum = 0,
Code = assign(Z, assign_binary(float_arith(ao_add), X, Y))
)
;
PredName = "-",
(
Args = [X, Y],
ProcNum = 0,
Code = assign(Y,
assign_binary_lc(float_arith(ao_sub), float_const(0.0), X))
;
Args = [X, Y, Z],
ProcNum = 0,
Code = assign(Z, assign_binary(float_arith(ao_sub), X, Y))
)
;
( PredName = "*", ArithOp = ao_mul
; PredName = "unchecked_quotient", ArithOp = ao_div
),
ProcNum = 0, Args = [X, Y, Z],
Code = assign(Z, assign_binary(float_arith(ArithOp), X, Y))
;
( PredName = ">", CmpOp = gt
; PredName = "<", CmpOp = lt
; PredName = ">=", CmpOp = ge
; PredName = "=<", CmpOp = le
),
ProcNum = 0, Args = [X, Y],
Code = test(binary_test(float_cmp(CmpOp), X, Y))
).
%-----------------------------------------------------------------------------%
:- func make_int_zero_const(int_type) = simple_const.
make_int_zero_const(int_type_int) = int_const(0).
make_int_zero_const(int_type_int8) = int8_const(0i8).
make_int_zero_const(int_type_int16) = int16_const(0i16).
make_int_zero_const(int_type_int32) = int32_const(0i32).
make_int_zero_const(int_type_int64) = int64_const(0i64).
make_int_zero_const(int_type_uint) = uint_const(0u).
make_int_zero_const(int_type_uint8) = uint8_const(0u8).
make_int_zero_const(int_type_uint16) = uint16_const(0u16).
make_int_zero_const(int_type_uint32) = uint32_const(0u32).
make_int_zero_const(int_type_uint64) = uint64_const(0u64).
%-----------------------------------------------------------------------------%
:- end_module backend_libs.builtin_ops.
%-----------------------------------------------------------------------------%