mercury/compiler/builtin_ops.m

%-----------------------------------------------------------------------------%
% vim: ft=mercury ts=4 sw=4 et
%-----------------------------------------------------------------------------%
% Copyright (C) 1999-2001, 2003-2006, 2009-2011 The University of Melbourne.
% Copyright (C) 2014-2018, 2020-2025 The Mercury team.
% This file may only be copied under the terms of the GNU General
% Public License - see the file COPYING in the Mercury distribution.
%-----------------------------------------------------------------------------%
%
% File: builtin_ops.m -- defines the builtin operator types.
% Main author: fjh.
%
% This module defines various types which enumerate the different builtin
% operators. The different back-ends -- the LLDS and and MLDS backends,
% and others when we had them -- all use the same set of builtin operators.
% These operators are defined here.
%
%-----------------------------------------------------------------------------%

:- module backend_libs.builtin_ops.
:- interface.

:- import_module hlds.
:- import_module hlds.hlds_pred.
:- import_module mdbcomp.
:- import_module mdbcomp.sym_name.
:- import_module parse_tree.
:- import_module parse_tree.prog_data.

:- import_module list.

%-----------------------------------------------------------------------------%

:- type unary_op
    --->    tag
    ;       strip_tag
    ;       mkbody
    ;       unmkbody
    ;       bitwise_complement(int_type)
    ;       logical_not
    ;       hash_string
    ;       hash_string2
    ;       hash_string3
    ;       hash_string4
    ;       hash_string5
    ;       hash_string6
    ;       dword_float_get_word0
    ;       dword_float_get_word1
    ;       dword_int64_get_word0
    ;       dword_int64_get_word1
    ;       dword_uint64_get_word0
    ;       dword_uint64_get_word1.

:- type shift_by_type
    --->    shift_by_int
    ;       shift_by_uint.

:- type maybe_size
    --->    no_size
    ;       size(int).

    % The int_cmp operations eq and ne are used not just on integers,
    % but also on characters and enumerations.
    % XXX These two uses are not covered by int_type, but for now we use
    % the convention that they should use `int_type_int'.
    % XXX Which is a historical artifact; we *should* be using int_type_uint
    % for them instead.
:- type cmp_op
    --->    eq
    ;       ne
    ;       lt
    ;       le
    ;       gt
    ;       ge.

:- type int_as_uint_cmp_op =< cmp_op
    --->    lt
    ;       le.

    % NOTE The ao_ prefix on these function symbols is there because
    % "div" and "rem" are operators.
:- type arith_op
    --->    ao_add
    ;       ao_sub
    ;       ao_mul
    ;       ao_div     % Assumed to truncate toward zero.
    ;       ao_rem.    % Remainder with respect to truncating integer division.

    % The remainder operation does not make any sense for floats.
:- type float_arith_op =< arith_op
    --->    ao_add
    ;       ao_sub
    ;       ao_mul
    ;       ao_div.

:- type binary_op
    --->    int_arith(int_type, arith_op)
            % For shifts, the first argument specifies the type of
            % the value being shifted, while the second specifies
            % the type of the shift amount.
    ;       unchecked_left_shift(int_type, shift_by_type)
    ;       unchecked_right_shift(int_type, shift_by_type)
    ;       bitwise_and(int_type)
    ;       bitwise_or(int_type)
    ;       bitwise_xor(int_type)
    ;       logical_and
    ;       logical_or
    ;       int_cmp(int_type, cmp_op)
    ;       body
    ;       array_index(array_elem_type)
            % The element type does not seem to be used. It could probably
            % be deleted, but it seems wise to not to delete the code
            % that currently fills in this slot in case some backend ever
            % *does* start needing to know the element type.
    ;       string_unsafe_index_code_unit
    ;       str_cmp(cmp_op)
    ;       str_nzp     % returns negative, zero or positive

    ;       offset_str_eq(int, maybe_size)
            % offset_str_eq(Offset, MaybeSize)
            %
            % This op will do "strcmp(StrA+Offset, StrB+Offset) == 0" or
            % "strncmp(StrA+Offset, StrB+Offset, Size) == 0" depending
            % on whether MaybeSize is "no_size" or "size(Size)".
            %
            % This op is not recognized in user-written code; it is only
            % generated by the compiler when implementing string switches
            % via tries.
            %
            % We use binop(offset_str_eq(Offset, no_size), StrA, StrB) at
            % leaf trie nodes. One string is the string-so-far represented
            % by the trie node, the other is the string being switched on.
            % We use this version of this binary_op to test whether the
            % rest of the switched-on string matches what we expect.
            %
            % We use binop(offset_str_eq(Offset, size(Size)), StrA, StrB)
            % at non-leaf trie nodes where all the alternatives, besides
            % starting with the same Offset code units that identify
            % the trie node, also have the next Size code units in common
            % as well, though they do have differences at the next code unit
            % after *that*.
            %
            % For both our use cases, the initial Offset code units of StrA
            % and StrB will be identical. This means that on backends which
            % cannot start the comparison at a specified code unit offset,
            % we *could* redundantly compare these as well, comparing either
            % "strcmp(StrA, StrB)" or "strncmp(StrA, StrB, Offset+Size)"
            % against zero. However, we don't do that; instead, we simply
            % avoid generating this operation.

    ;       int_as_uint_cmp(int_as_uint_cmp_op)
            % The arguments to these ops are just ordinary (signed)
            % Mercury ints, but the comparison is done *after* casting both
            % arguments to the uint type. This means that e.g. the expression
            % binary(int_as_uint_cmp_op(le), int_const(1), int_const(-1))
            % returns true, since (MR_Unsigned) 1 <= (MR_Unsigned) -1.

    ;       in_range
            % Tests for "0 =< Index, Index < Range". On Java, that is its
            % implementation, while for C and C#, it is implemented the same
            % as int_as_uint_cmp_op(lt). The reason for the difference is
            % that in C and C#, the int->uint cast is free, while in Java
            % it requires masking both operands. That makes it unclear
            % which approach to range tests is faster in Java. The only way
            % to decide is to benchmark both approaches, which requires
            % both to be implemented.

    ;       float_arith(float_arith_op)
    ;       float_cmp(cmp_op)
            % Note that we do not have primitive operations in library/float.m
            % for comparing floats for equality and inequality, since the
            % approximate nature of floats makes such operations "iffy".
            % However, it is possible to unify float variables, and it is
            % of course possible to negate such goals, so we need both
            % the eq and ne cmp_ops for floats as well as for other types.
    ;       float_from_dword
    ;       int64_from_dword
    ;       uint64_from_dword

    ;       pointer_equal_conservative.

:- inst int_misc_binary_op for binary_op/0
    --->    unchecked_left_shift(ground, ground)
    ;       unchecked_right_shift(ground, ground)
    ;       bitwise_and(ground)
    ;       bitwise_or(ground)
    ;       bitwise_xor(ground).

    % For the MLDS back-end, we need to know the element type for each
    % array_index operation.
    %
    % Currently array index operations are only generated in limited
    % circumstances. Using a simple representation for them here,
    % rather than just putting the MLDS type here, avoids the need
    % for this module to depend on back-end specific stuff like MLDS types.
:- type array_elem_type
    --->    array_elem_scalar(scalar_array_elem_type)
    ;       array_elem_struct(list(scalar_array_elem_type)).

:- type scalar_array_elem_type
    --->    scalar_elem_string    % ml_string_type
    ;       scalar_elem_int       % mlds_native_int_type
    ;       scalar_elem_generic.  % mlds_generic_type

:- func negate_cmp_op(cmp_op) = cmp_op.

:- func dump_arith_op(arith_op) = string.
:- func dump_cmp_op(cmp_op) = string.

:- func arith_op_c_operator(arith_op) = string.
:- func cmp_op_c_operator(cmp_op) = string.

    % test_if_builtin(ModuleName, PredName, PredFormArity):
    %
    % Given the identity of a predicate, or a function, in the form of
    %
    % - the module in which it is defined,
    % - its name, and
    % - its pred form arity, i.e. the number of its argument including
    %   any function result argument,
    %
    % succeed iff that predicate or function is an inline builtin.
    %
    % Note that we don't have to know whether the entity being asked about
    % is a predicate or a function. This is because of all of our inline
    % builtin operations are defined in a few modules of the standard library,
    % and we main an invariant in these modules. This states that
    %
    % - given a builtin predicate Module.Name/Arity, either
    %   there is no corresponding function Module.Name/Arity-1,
    %   or there is, but its semantics is exactly the same as the predicate's,
    %   and
    %
    % - given a builtin function Module.Name/Arity, either
    %   there is no corresponding predicate Module.Name/Arity+1,
    %   or there is, but its semantics is exactly the same as the function's.
    %
:- pred test_if_builtin(module_name::in, string::in, int::in) is semidet.

    % translate_builtin(ModuleName, PredName, ProcId, Args, Code):
    %
    % This predicate should be invoked only on predicates and functions
    % for which test_if_builtin has succeeded.
    %
    % In such cases, it returns an abstract representation of the code
    % that can be used to evaluate a call to the predicate or function
    % with the given arguments, which will be either an assignment or a noop
    % (if the builtin is det) or a test (if the builtin is semidet).
    %
:- pred translate_builtin(module_name::in, string::in, proc_id::in,
    list(T)::in, simple_code(T)::out) is det.

:- type simple_code(T)
    --->    assign(T, simple_assigned_expr(T))
    ;       ref_assign(T, T)
    ;       test(simple_test_expr(T))
    ;       noop(list(T)).

    % Note that assign_const is not used for any builtins, but it *is* used
    % in call_gen.m to implement casts involving dummy types. (The code that
    % implements casts reuses the machinery for implementing builtins.)
    %
    % Note: _lc means "left arg is a constant".
:- type simple_assigned_expr(T)
    --->    assign_copy(T)
    ;       assign_const(simple_const)
    ;       assign_binary(binary_op, T, T)
    ;       assign_binary_lc(binary_op, simple_const, T)
    ;       assign_unary(unary_op, T).

:- type simple_const
    --->    int_const(int)
    ;       uint_const(uint)
    ;       int8_const(int8)
    ;       uint8_const(uint8)
    ;       int16_const(int16)
    ;       uint16_const(uint16)
    ;       int32_const(int32)
    ;       uint32_const(uint32)
    ;       int64_const(int64)
    ;       uint64_const(uint64)
    ;       float_const(float).

:- type simple_test_expr(T)
    --->    binary_test(binary_op, T, T).

%-----------------------------------------------------------------------------%
%-----------------------------------------------------------------------------%

:- implementation.

:- import_module mdbcomp.builtin_modules.

:- import_module require.
:- import_module string.

%-----------------------------------------------------------------------------%

negate_cmp_op(eq) = ne.
negate_cmp_op(ne) = eq.
negate_cmp_op(lt) = ge.
negate_cmp_op(le) = gt.
negate_cmp_op(gt) = le.
negate_cmp_op(ge) = lt.

dump_arith_op(ao_add) = "add".
dump_arith_op(ao_sub) = "sub".
dump_arith_op(ao_mul) = "mul".
dump_arith_op(ao_div) = "div".
dump_arith_op(ao_rem) = "rem".

dump_cmp_op(eq) = "eq".
dump_cmp_op(ne) = "ne".
dump_cmp_op(lt) = "lt".
dump_cmp_op(le) = "le".
dump_cmp_op(gt) = "gt".
dump_cmp_op(ge) = "ge".

arith_op_c_operator(ao_add) = "+".
arith_op_c_operator(ao_sub) = "-".
arith_op_c_operator(ao_mul) = "*".
arith_op_c_operator(ao_div) = "/".
arith_op_c_operator(ao_rem) = "%".

cmp_op_c_operator(eq) = "==".
cmp_op_c_operator(ne) = "!=".
cmp_op_c_operator(lt) = "<".
cmp_op_c_operator(le) = "<=".
cmp_op_c_operator(gt) = ">".
cmp_op_c_operator(ge) = ">=".

%-----------------------------------------------------------------------------%

test_if_builtin(FullyQualifiedModule, PredName, Arity) :-
    is_std_lib_module_name(FullyQualifiedModule, ModuleName),
    % The value of the ProcNum argument does not influence the test
    % of whether this predicate or function is a builtin; it influences
    % on the generated code, which we are ignore.
    % Likewise for the values of the elements in Args (as opposed to
    % the *number* of arguments, which *does* influence success/failure).
    ProcNum = 0,
    list.duplicate(Arity, 0, Args),
    builtin_translation(ModuleName, PredName, ProcNum, Args, _Code).

translate_builtin(FullyQualifiedModule, PredName, ProcId, Args, Code) :-
    ( if
        is_std_lib_module_name(FullyQualifiedModule, ModuleName),
        proc_id_to_int(ProcId, ProcNum),
        builtin_translation(ModuleName, PredName, ProcNum, Args, CodePrime)
    then
        Code = CodePrime
    else
        list.length(Args, Arity),
        string.format("unknown builtin %s/%d", [s(PredName), i(Arity)], Msg),
        unexpected($pred, Msg)
    ).

:- pred builtin_translation(string::in, string::in, int::in, list(T)::in,
    simple_code(T)::out) is semidet.
:- pragma inline(pred(builtin_translation/5)).

builtin_translation(ModuleName, PredName, ProcNum, Args, Code) :-
    (
        ModuleName = "builtin",
        PredName = "unsafe_promise_unique", ProcNum = 0, Args = [X, Y],
        Code = assign(Y, assign_copy(X))
    ;
        ModuleName = "io",
        (
            PredName = "unsafe_get_io_state", ProcNum = 0, Args = [X],
            Code = noop([X])
        ;
            PredName = "unsafe_set_io_state", ProcNum = 0, Args = [_X],
            Code = noop([])
        )
    ;
        ModuleName = "private_builtin",
        builtin_translation_private_builtin(PredName, ProcNum, Args, Code)
    ;
        ModuleName = "term_size_prof_builtin",
        PredName = "term_size_plus", ProcNum = 0, Args = [X, Y, Z],
        Code = assign(Z, assign_binary(int_arith(int_type_int, ao_add), X, Y))
    ;
        ( ModuleName = "int",    IT = int_type_int
        ; ModuleName = "int8",   IT = int_type_int8
        ; ModuleName = "int16",  IT = int_type_int16
        ; ModuleName = "int32",  IT = int_type_int32
        ; ModuleName = "int64",  IT = int_type_int64
        ; ModuleName = "uint",   IT = int_type_uint
        ; ModuleName = "uint8",  IT = int_type_uint8
        ; ModuleName = "uint16", IT = int_type_uint16
        ; ModuleName = "uint32", IT = int_type_uint32
        ; ModuleName = "uint64", IT = int_type_uint64
        ),
        builtin_translation_int(IT, PredName, ProcNum, Args, Code)
    ;
        ModuleName = "float",
        builtin_translation_float(PredName, ProcNum, Args, Code)
    ).

:- pred builtin_translation_private_builtin(string::in, int::in, list(T)::in,
    simple_code(T)::out) is semidet.

builtin_translation_private_builtin(PredName, ProcNum, Args, Code) :-
    (
        PredName = "trace_get_io_state", ProcNum = 0, Args = [X],
        Code = noop([X])
    ;
        PredName = "trace_set_io_state", ProcNum = 0, Args = [_X],
        Code = noop([])
    ;
        PredName = "store_at_ref_impure",
        ProcNum = 0, Args = [X, Y],
        Code = ref_assign(X, Y)
    ;
        PredName = "unsafe_type_cast", ProcNum = 0, Args = [X, Y],
        % Note that the code we generate for unsafe_type_cast
        % is not type-correct. Back-ends that require type-correct
        % intermediate code (e.g. the MLDS back-end) must handle
        % unsafe_type_cast separately, rather than by calling
        % builtin_translation.
        Code = assign(Y, assign_copy(X))
    ;
        ( PredName = "builtin_int_gt",    Type = int_type_int,    Cmp = gt
        ; PredName = "builtin_int_lt",    Type = int_type_int,    Cmp = lt
        ; PredName = "builtin_int8_gt",   Type = int_type_int8,   Cmp = gt
        ; PredName = "builtin_int8_lt",   Type = int_type_int8,   Cmp = lt
        ; PredName = "builtin_int16_gt",  Type = int_type_int16,  Cmp = gt
        ; PredName = "builtin_int16_lt",  Type = int_type_int16,  Cmp = lt
        ; PredName = "builtin_int32_gt",  Type = int_type_int32,  Cmp = gt
        ; PredName = "builtin_int32_lt",  Type = int_type_int32,  Cmp = lt
        ; PredName = "builtin_int64_gt",  Type = int_type_int64,  Cmp = gt
        ; PredName = "builtin_int64_lt",  Type = int_type_int64,  Cmp = lt
        ; PredName = "builtin_uint_gt",   Type = int_type_uint,   Cmp = gt
        ; PredName = "builtin_uint_lt",   Type = int_type_uint,   Cmp = lt
        ; PredName = "builtin_uint8_gt",  Type = int_type_uint8,  Cmp = gt
        ; PredName = "builtin_uint8_lt",  Type = int_type_uint8,  Cmp = lt
        ; PredName = "builtin_uint16_gt", Type = int_type_uint16, Cmp = gt
        ; PredName = "builtin_uint16_lt", Type = int_type_uint16, Cmp = lt
        ; PredName = "builtin_uint32_gt", Type = int_type_uint32, Cmp = gt
        ; PredName = "builtin_uint32_lt", Type = int_type_uint32, Cmp = lt
        ; PredName = "builtin_uint64_gt", Type = int_type_uint64, Cmp = gt
        ; PredName = "builtin_uint64_lt", Type = int_type_uint64, Cmp = lt
        ),
        CmpOp = int_cmp(Type, Cmp),
        ProcNum = 0, Args = [X, Y],
        Code = test(binary_test(CmpOp, X, Y))
    ;
        ( PredName = "unsigned_lt",       CmpOp = int_as_uint_cmp(lt)
        ; PredName = "unsigned_le",       CmpOp = int_as_uint_cmp(le)
        ; PredName = "in_range",          CmpOp = in_range
        ),
        ProcNum = 0, Args = [X, Y],
        Code = test(binary_test(CmpOp, X, Y))
    ;
        PredName = "pointer_equal", ProcNum = 0,
        % The arity of this predicate is two during parsing,
        % and three after the polymorphism pass.
        ( Args = [X, Y]
        ; Args = [_TypeInfo, X, Y]
        ),
        Code = test(binary_test(pointer_equal_conservative, X, Y))
    ;
        PredName = "partial_inst_copy", ProcNum = 0, Args = [X, Y],
        Code = assign(Y, assign_copy(X))
    ).

:- pred builtin_translation_int(int_type::in, string::in,
    int::in, list(T)::in, simple_code(T)::out) is semidet.

builtin_translation_int(IT, PredName, ProcNum, Args, Code) :-
    (
        PredName = "+",
        (
            Args = [X, Y, Z],
            (
                ProcNum = 0,
                Code = assign(Z,
                    assign_binary(int_arith(IT, ao_add), X, Y))
            ;
                ProcNum = 1,
                Code = assign(X,
                    assign_binary(int_arith(IT, ao_sub), Z, Y))
            ;
                ProcNum = 2,
                Code = assign(Y,
                    assign_binary(int_arith(IT, ao_sub), Z, X))
            )
        ;
            Args = [X, Y],
            ProcNum = 0,
            Code = assign(Y, assign_copy(X))
        )
    ;
        PredName = "-",
        (
            Args = [X, Y, Z],
            (
                ProcNum = 0,
                Code = assign(Z,
                    assign_binary(int_arith(IT, ao_sub), X, Y))
            ;
                ProcNum = 1,
                Code = assign(X,
                    assign_binary(int_arith(IT, ao_add), Y, Z))
            ;
                ProcNum = 2,
                Code = assign(Y,
                    assign_binary(int_arith(IT, ao_sub), X, Z))
            )
        ;
            Args = [X, Y],
            ProcNum = 0,
            IntZeroConst = make_int_zero_const(IT),
            Code = assign(Y,
                assign_binary_lc(int_arith(IT, ao_sub), IntZeroConst, X))
        )
    ;
        PredName = "xor", Args = [X, Y, Z],
        (
            ProcNum = 0,
            Code = assign(Z, assign_binary(bitwise_xor(IT), X, Y))
        ;
            ProcNum = 1,
            Code = assign(Y, assign_binary(bitwise_xor(IT), X, Z))
        ;
            ProcNum = 2,
            Code = assign(X, assign_binary(bitwise_xor(IT), Y, Z))
        )
    ;
        ( PredName = "plus",                ArithOp = ao_add
        ; PredName = "minus",               ArithOp = ao_sub
        ; PredName = "*",                   ArithOp = ao_mul
        ; PredName = "times",               ArithOp = ao_mul
        ; PredName = "unchecked_quotient",  ArithOp = ao_div
        ; PredName = "unchecked_rem",       ArithOp = ao_rem
        ),
        ProcNum = 0, Args = [X, Y, Z],
        Code = assign(Z, assign_binary(int_arith(IT, ArithOp), X, Y))
    ;
        ( PredName = "unchecked_left_shift",
            ArithOp = unchecked_left_shift(IT, shift_by_int)
        ; PredName = "unchecked_left_ushift",
            ArithOp = unchecked_left_shift(IT, shift_by_uint)
        ; PredName = "unchecked_right_shift",
            ArithOp = unchecked_right_shift(IT, shift_by_int)
        ; PredName = "unchecked_right_ushift",
            ArithOp = unchecked_right_shift(IT, shift_by_uint)
        ; PredName = "/\\", ArithOp = bitwise_and(IT)
        ; PredName = "\\/", ArithOp = bitwise_or(IT)
        ),
        ProcNum = 0, Args = [X, Y, Z],
        Code = assign(Z, assign_binary(ArithOp, X, Y))
    ;
        PredName = "\\", ProcNum = 0, Args = [X, Y],
        Code = assign(Y, assign_unary(bitwise_complement(IT), X))
    ;
        ( PredName = ">",  CmpOp = gt
        ; PredName = "<",  CmpOp = lt
        ; PredName = ">=", CmpOp = ge
        ; PredName = "=<", CmpOp = le
        ),
        ProcNum = 0, Args = [X, Y],
        Code = test(binary_test(int_cmp(IT ,CmpOp), X, Y))
    ).

:- pred builtin_translation_float(string::in, int::in, list(T)::in,
    simple_code(T)::out) is semidet.

builtin_translation_float(PredName, ProcNum, Args, Code) :-
    (
        PredName = "+",
        (
            Args = [X, Y],
            ProcNum = 0,
            Code = assign(Y, assign_copy(X))
        ;
            Args = [X, Y, Z],
            ProcNum = 0,
            Code = assign(Z, assign_binary(float_arith(ao_add), X, Y))
        )
    ;
        PredName = "-",
        (
            Args = [X, Y],
            ProcNum = 0,
            Code = assign(Y,
                assign_binary_lc(float_arith(ao_sub), float_const(0.0), X))
        ;
            Args = [X, Y, Z],
            ProcNum = 0,
            Code = assign(Z, assign_binary(float_arith(ao_sub), X, Y))
        )
    ;
        ( PredName = "*",                   ArithOp = ao_mul
        ; PredName = "unchecked_quotient",  ArithOp = ao_div
        ),
        ProcNum = 0, Args = [X, Y, Z],
        Code = assign(Z, assign_binary(float_arith(ArithOp), X, Y))
    ;
        ( PredName = ">",  CmpOp = gt
        ; PredName = "<",  CmpOp = lt
        ; PredName = ">=", CmpOp = ge
        ; PredName = "=<", CmpOp = le
        ),
        ProcNum = 0, Args = [X, Y],
        Code = test(binary_test(float_cmp(CmpOp), X, Y))
    ).

%-----------------------------------------------------------------------------%

:- func make_int_zero_const(int_type) = simple_const.

make_int_zero_const(int_type_int)    = int_const(0).
make_int_zero_const(int_type_int8)   = int8_const(0i8).
make_int_zero_const(int_type_int16)  = int16_const(0i16).
make_int_zero_const(int_type_int32)  = int32_const(0i32).
make_int_zero_const(int_type_int64)  = int64_const(0i64).
make_int_zero_const(int_type_uint)   = uint_const(0u).
make_int_zero_const(int_type_uint8)  = uint8_const(0u8).
make_int_zero_const(int_type_uint16) = uint16_const(0u16).
make_int_zero_const(int_type_uint32) = uint32_const(0u32).
make_int_zero_const(int_type_uint64) = uint64_const(0u64).

%-----------------------------------------------------------------------------%
:- end_module backend_libs.builtin_ops.
%-----------------------------------------------------------------------------%