Fix deconstructing direct args.

The crash that this diff fixes occurred when giving a command such as
"print Var^1" to mdb, where the first argument of Var is a direct arg.

runtime/mercury_ml_expand_body.h:
    When deconstructing a term with a direct arg, return NULL
    as the value of expand_info->chosen_arg_word_sized_ptr.
    The crash occurred when we returned a non-null pointer,
    which violated the expectations of trace/mercury_trace_vars.c
    and its callers. (Not surprising, since those that function and
    its callers were written long before the direct_arg optimization
    was added to the system.)

runtime/mercury_deconstruct.h:
    Document the rationale behind the above changes. (The contents of
    mercury_ml_expand_body.h are #included in mercury_deconstruct.c.)

trace/mercury_trace_vars.c:
    Add the debugging code I used to track down this issue, in disabled form.

    Fix missing copyright year.

trace/mercury_trace_browse.c:
    Delete obsolete comment.

    Fix missing copyright years.

tests/debugger/direct_arg_test.{m,inp,exp}:
    A test case for this bug.

tests/debugger/Mmakefile:
    Enable the new test case.

compiler/hlds_out_type_table.m:
    When dumping out the data constructors in the type table,
    if a constructor has names for some of its fields,
    put the name and the type of each field on different lines.
    In the original test case for this bug, of which direct_arg_test.m
    is an extreme simplification, pretty much every line overflows
    without this.

    Also, factor out some duplicated code, and replace bools with values
    of a bespoke type.
This commit is contained in:
Zoltan Somogyi
2026-03-13 14:49:56 +11:00
parent a19bb893e4
commit 953ea7667f
9 changed files with 234 additions and 77 deletions

View File

@@ -1,7 +1,7 @@
%---------------------------------------------------------------------------%
% vim: ft=mercury ts=4 sw=4 et
%---------------------------------------------------------------------------%
% Copyright (C) 2021-2025 The Mercury team.
% Copyright (C) 2021-2026 The Mercury team.
% This file may only be copied under the terms of the GNU General
% Public License - see the file COPYING in the Mercury distribution.
%---------------------------------------------------------------------------%
@@ -494,8 +494,9 @@ format_ctor(TVarSet, Ctor, !State) :-
Args = [HeadArg | TailArgs],
string.builder.format("%s%s(\n", [s(BracePrefix), s(NameStr)], !State),
AnyFieldName = does_any_arg_have_a_field_name(Args),
BaseASIndent1Str = indent2_string(BaseIndent + ASIndent + 1u),
format_ctor_args(TVarSet, BaseASIndent1Str, AnyFieldName,
decide_arg_name_type_indents(AnyFieldName,
ArgTypeIndentStr, AnyFieldNameIndent),
format_ctor_args(TVarSet, ArgTypeIndentStr, AnyFieldNameIndent,
HeadArg, TailArgs, !State),
string.builder.format("%s)%s\n",
[s(BaseASIndentStr), s(BraceSuffix)], !State)
@@ -538,12 +539,13 @@ format_ctor_repn(TVarSet, CtorRepn, !State) :-
!State)
;
ArgRepns = [HeadArgRepn | TailArgRepns],
BaseASIndent1Str = indent2_string(BaseIndent + ASIndent + 1u),
string.builder.format("%s%s(\n%s",
[s(BracePrefix), s(NameStr), s(ConsTagString)], !State),
AnyFieldName = does_any_arg_repn_have_a_field_name(ArgRepns),
format_ctor_arg_repns(TVarSet, BaseASIndent1Str,
AnyFieldName, 1, HeadArgRepn, TailArgRepns, !State),
decide_arg_name_type_indents(AnyFieldName,
ArgTypeIndentStr, AnyFieldNameIndent),
format_ctor_arg_repns(TVarSet, ArgTypeIndentStr, AnyFieldNameIndent,
1, HeadArgRepn, TailArgRepns, !State),
string.builder.format("%s)%s\n",
[s(BaseASIndentStr), s(BraceSuffix)], !State)
),
@@ -551,27 +553,15 @@ format_ctor_repn(TVarSet, CtorRepn, !State) :-
%---------------------%
:- pred format_ctor_args(tvarset::in, string::in, bool::in,
:- pred format_ctor_args(tvarset::in, string::in, maybe_field_name_indent::in,
constructor_arg::in, list(constructor_arg)::in,
string.builder.state::di, string.builder.state::uo) is det.
format_ctor_args(TVarSet, IndentStr, AnyFieldName,
format_ctor_args(TVarSet, ArgTypeIndentStr, AnyFieldName,
HeadArg, TailArgs, !State) :-
HeadArg = ctor_arg(MaybeFieldName, Type, _Context),
string.builder.append_string(IndentStr, !State),
(
AnyFieldName = no
;
AnyFieldName = yes,
(
MaybeFieldName = no,
string.builder.format("%24s", [s("")], !State)
;
MaybeFieldName = yes(ctor_field_name(FieldName, _Ctxt)),
string.builder.format("%-20s :: ",
[s(unqualify_name(FieldName))], !State)
)
),
format_any_ctor_arg_field_name(AnyFieldName, MaybeFieldName, !State),
string.builder.append_string(ArgTypeIndentStr, !State),
mercury_format_type(TVarSet, print_name_only, Type,
string.builder.handle, !State),
(
@@ -580,70 +570,113 @@ format_ctor_args(TVarSet, IndentStr, AnyFieldName,
;
TailArgs = [HeadTailArg | TailTailArgs],
string.builder.append_string(",\n", !State),
format_ctor_args(TVarSet, IndentStr, AnyFieldName,
format_ctor_args(TVarSet, ArgTypeIndentStr, AnyFieldName,
HeadTailArg, TailTailArgs, !State)
).
:- pred format_ctor_arg_repns(tvarset::in, string::in, bool::in,
int::in, constructor_arg_repn::in, list(constructor_arg_repn)::in,
:- pred format_ctor_arg_repns(tvarset::in, string::in,
maybe_field_name_indent::in, int::in,
constructor_arg_repn::in, list(constructor_arg_repn)::in,
string.builder.state::di, string.builder.state::uo) is det.
format_ctor_arg_repns(TVarSet, IndentStr, AnyFieldName,
format_ctor_arg_repns(TVarSet, ArgTypeIndentStr, AnyFieldName,
CurArgNum, HeadArgRepn, TailArgRepns, !State) :-
HeadArgRepn = ctor_arg_repn(MaybeFieldName, _MaybeBaseCtorArg, Type,
ArgPosWidth, _Context),
string.builder.append_string(IndentStr, !State),
(
AnyFieldName = no
;
AnyFieldName = yes,
(
MaybeFieldName = no,
string.builder.format("%24s", [s("")], !State)
;
MaybeFieldName = yes(ctor_field_name(FieldName, _Ctxt)),
string.builder.format("%-20s :: ",
[s(unqualify_name(FieldName))], !State)
)
),
format_any_ctor_arg_field_name(AnyFieldName, MaybeFieldName, !State),
string.builder.append_string(ArgTypeIndentStr, !State),
mercury_format_type(TVarSet, print_name_only, Type,
string.builder.handle, !State),
(
TailArgRepns = [],
string.builder.append_string("\n", !State),
format_arg_pos_width(IndentStr, CurArgNum, ArgPosWidth, !State)
format_arg_pos_width(ArgTypeIndentStr, CurArgNum, ArgPosWidth, !State)
;
TailArgRepns = [HeadTailArgRepn | TailTailArgRepns],
string.builder.append_string(",\n", !State),
format_arg_pos_width(IndentStr, CurArgNum, ArgPosWidth, !State),
format_ctor_arg_repns(TVarSet, IndentStr, AnyFieldName,
format_arg_pos_width(ArgTypeIndentStr, CurArgNum, ArgPosWidth, !State),
format_ctor_arg_repns(TVarSet, ArgTypeIndentStr, AnyFieldName,
CurArgNum + 1, HeadTailArgRepn, TailTailArgRepns, !State)
).
%---------------------%
:- func does_any_arg_have_a_field_name(list(constructor_arg)) = bool.
:- type maybe_field_name_indent
---> no_field_names
; field_name_indent(string).
does_any_arg_have_a_field_name([]) = no.
:- pred decide_arg_name_type_indents(arg_field_names::in,
string::out, maybe_field_name_indent::out) is det.
decide_arg_name_type_indents(AnyFieldName,
ArgTypeIndentStr, AnyFieldNameIndent) :-
% The width of ArrowOrSemi is eight spaces, which is the same as
% four indents. This comes after the original one indent.
BaseIndent = 1u,
ASIndent = 4u,
(
AnyFieldName = no_arg_has_field_name,
AnyFieldNameIndent = no_field_names,
ArgTypeIndentStr = indent2_string(BaseIndent + ASIndent + 1u)
;
AnyFieldName = some_arg_has_field_name,
ArgFieldNameIndentStr = indent2_string(BaseIndent + ASIndent + 1u),
AnyFieldNameIndent = field_name_indent(ArgFieldNameIndentStr),
ArgTypeIndentStr = indent2_string(BaseIndent + ASIndent + 2u)
).
%---------------------%
:- pred format_any_ctor_arg_field_name(maybe_field_name_indent::in,
maybe(ctor_field_name)::in,
string.builder.state::di, string.builder.state::uo) is det.
format_any_ctor_arg_field_name(AnyFieldName, MaybeFieldName, !State) :-
(
AnyFieldName = no_field_names
;
AnyFieldName = field_name_indent(ArgFieldNameIndentStr),
string.builder.append_string(ArgFieldNameIndentStr, !State),
(
MaybeFieldName = no,
string.builder.append_string("<unnamed field>\n", !State)
;
MaybeFieldName = yes(ctor_field_name(FieldSymName, _Ctxt)),
string.builder.format("%s ::\n",
[s(unqualify_name(FieldSymName))], !State)
)
).
%---------------------%
:- type arg_field_names
---> no_arg_has_field_name
; some_arg_has_field_name.
:- func does_any_arg_have_a_field_name(list(constructor_arg))
= arg_field_names.
does_any_arg_have_a_field_name([]) = no_arg_has_field_name.
does_any_arg_have_a_field_name([Arg | Args]) = SomeArgHasFieldName :-
Arg = ctor_arg(MaybeFieldName, _, _),
(
MaybeFieldName = yes(_),
SomeArgHasFieldName = yes
SomeArgHasFieldName = some_arg_has_field_name
;
MaybeFieldName = no,
SomeArgHasFieldName = does_any_arg_have_a_field_name(Args)
).
:- func does_any_arg_repn_have_a_field_name(list(constructor_arg_repn)) = bool.
:- func does_any_arg_repn_have_a_field_name(list(constructor_arg_repn))
= arg_field_names.
does_any_arg_repn_have_a_field_name([]) = no.
does_any_arg_repn_have_a_field_name([]) = no_arg_has_field_name.
does_any_arg_repn_have_a_field_name([ArgRepn | ArgRepns])
= SomeArgHasFieldName :-
ArgRepn = ctor_arg_repn(MaybeFieldName, _, _, _, _),
(
MaybeFieldName = yes(_),
SomeArgHasFieldName = yes
SomeArgHasFieldName = some_arg_has_field_name
;
MaybeFieldName = no,
SomeArgHasFieldName = does_any_arg_repn_have_a_field_name(ArgRepns)

View File

@@ -1,7 +1,7 @@
// vim: ts=4 sw=4 expandtab ft=c
// Copyright (C) 2002, 2005, 2007, 2011 The University of Melbourne.
// Copyright (C) 2015-2016, 2018 The Mercury team.
// Copyright (C) 2015-2016, 2018, 2026 The Mercury team.
// This file is distributed under the terms specified in COPYING.LIB.
// mercury_deconstruct.h
@@ -116,9 +116,15 @@ extern void MR_expand_named_arg_only(MR_TypeInfo type_info,
// If the given term has an argument at the specified position, MR_arg returns
// MR_TRUE, and fills in *arg_type_info_ptr and *arg_term_ptr with the
// type_info and value of that argument at the selected position. It also
// fills in *word_sized_arg_ptr, with the address of the argument
// if the argument's size is exactly one word, or with NULL if the size
// is anything else (double word, subword, or nothing for dummies).
// fills in *word_sized_arg_ptr. It sets *word_sized_arg_ptr
//
// - to the address of the argument if the argument's size in the argument
// vector is exactly one word, or
//
// - to NULL, if the size is anything else (double word, subword, or nothing
// for dummies), or if the argument is not IN the argument vector
// (as with direct_args, where the argument is in what would usually be
// the *pointer* to the argument vector).
//
// If the given term does not have an argument at the specified position,
// MR_arg fails, i.e. it returns MR_FALSE.

View File

@@ -1,7 +1,7 @@
// vim: ts=4 sw=4 expandtab ft=c
// Copyright (C) 2001-2007, 2012 The University of Melbourne.
// Copyright (C) 2013, 2015-2018, 2021, 2024 The Mercury team.
// Copyright (C) 2013, 2015-2018, 2021, 2024, 2026 The Mercury team.
// This file is distributed under the terms specified in COPYING.LIB.
// mercury_ml_expand_body.h
@@ -17,14 +17,14 @@
// a million typeinfos can cause a system to start paging.) Therefore we try to
// make sure that in every circumstance we perform the minimum work possible.
//
// The code including this file must define these macros:
// The code that includes this file *must* define these macros:
//
// EXPAND_FUNCTION_NAME Gives the name of the function being defined.
//
// EXPAND_TYPE_NAME Gives the name of the type of the expand_info
// argument.
//
// The code including this file may define these macros:
// The code including this file *may* define these macros:
//
// EXPAND_FUNCTOR_FIELD If defined, gives the name of the field in the
// expand_info structure that contains the name of the
@@ -210,7 +210,7 @@
#ifdef EXPAND_ONE_ARG
#define handle_zero_arity_one_arg() \
do { \
expand_info->chosen_index_exists = MR_FALSE; \
expand_info->chosen_index_exists = MR_FALSE; \
} while (0)
#else // EXPAND_ONE_ARG
#define handle_zero_arity_one_arg() \
@@ -258,7 +258,7 @@
// If we are implementing the limited arity version of deconstruct
// and the current term is above the limit arity, say so and return.
// We rely on the default initialization of the limit_reached field
// to MR_FALSE If we are below the limit.
// to MR_FALSE if we are below the limit.
#ifdef EXPAND_APPLY_LIMIT
#define maybe_set_limit_reached_and_return(ei, max) \
@@ -368,13 +368,13 @@
#define notag_arg_get_chosen(ei, arg_ti_expr, dw_ptr, chosen) \
do { \
if (chosen == 0) { \
(ei)->chosen_index_exists = MR_TRUE; \
(ei)->chosen_index_exists = MR_TRUE; \
\
(ei)->chosen_arg_type_info = (arg_ti_expr); \
(ei)->chosen_arg_term = (dw_ptr)[0]; \
(ei)->chosen_arg_word_sized_ptr = (dw_ptr); \
(ei)->chosen_arg_type_info = (arg_ti_expr); \
(ei)->chosen_arg_term = (dw_ptr)[0]; \
(ei)->chosen_arg_word_sized_ptr = (dw_ptr); \
} else { \
(ei)->chosen_index_exists = MR_FALSE; \
(ei)->chosen_index_exists = MR_FALSE; \
} \
} while (0)
@@ -445,14 +445,14 @@
#define same_type_args_get_chosen(ei, arg_ti, arg_vector, chosen) \
do { \
if (0 <= chosen && chosen < (ei)->arity) { \
(ei)->chosen_index_exists = MR_TRUE; \
(ei)->chosen_index_exists = MR_TRUE; \
\
(ei)->chosen_arg_type_info = (arg_ti); \
(ei)->chosen_arg_term = (arg_vector)[chosen]; \
(ei)->chosen_arg_word_sized_ptr = \
(ei)->chosen_arg_type_info = (arg_ti); \
(ei)->chosen_arg_term = (arg_vector)[chosen]; \
(ei)->chosen_arg_word_sized_ptr = \
&((arg_vector)[chosen]); \
} else { \
(ei)->chosen_index_exists = MR_FALSE; \
(ei)->chosen_index_exists = MR_FALSE; \
} \
} while (0)
@@ -638,7 +638,7 @@ EXPAND_FUNCTION_NAME(MR_TypeInfo type_info, MR_Word *data_word_ptr,
MR_Word *ti_arg_vector;
MR_Word *ao_arg_vector;
MR_Word *word_size_arg_ptr;
MR_Word direct_arg;
MR_Word direct_arg = 0;
int arg_num;
data = *data_word_ptr;
@@ -665,6 +665,11 @@ EXPAND_FUNCTION_NAME(MR_TypeInfo type_info, MR_Word *data_word_ptr,
functor_desc);
assert_no_exist_info(functor_desc, "MR_SECTAG_NONE_DIRECT_ARG");
direct_arg = MR_body(data, ptag);
#if 0
fprintf(stderr, "DIRECT_ARG data %lx, ptag %d,", data, ptag);
fprintf(stderr, " direct_arg %lx, direct_arg_ptr %p\n",
direct_arg, &direct_arg);
#endif
// The word containing the direct arg in effect forms an argument
// vector with just one element.
ti_arg_vector = &direct_arg;
@@ -682,9 +687,9 @@ EXPAND_FUNCTION_NAME(MR_TypeInfo type_info, MR_Word *data_word_ptr,
return;
case MR_SECTAG_LOCAL_BITS:
sectag = MR_unmkbody(data) &
// XXX ARG_PACK
// Consider storing this mask in the ptag_layout.
// Consider storing the mask in the ptag_layout.
sectag = MR_unmkbody(data) &
((1 << ptag_layout->MR_sectag_numbits) - 1);
MR_index_or_search_sectag_functor(ptag_layout, sectag,
functor_desc);
@@ -857,6 +862,13 @@ EXPAND_FUNCTION_NAME(MR_TypeInfo type_info, MR_Word *data_word_ptr,
} else {
expand_info->chosen_index_exists = MR_FALSE;
}
if (direct_arg != 0) {
// In the case of direct args, the argument is not in the
// argument vector, and therefore may not be on the heap.
// Some of our callers care about that.
expand_info->chosen_arg_word_sized_ptr = NULL;
}
#endif // EXPAND_ONE_ARG
return;
}

View File

@@ -34,6 +34,7 @@ NONRETRY_PROGS = \
cond \
debugger_regs \
dice \
direct_arg_test \
double_print \
exception_cmd \
exception_value \
@@ -328,6 +329,10 @@ deeply_nested_typeinfo.out: deeply_nested_typeinfo deeply_nested_typeinfo.inp
$(MDB_STD) ./deeply_nested_typeinfo < deeply_nested_typeinfo.inp \
> deeply_nested_typeinfo.out 2>&1
direct_arg_test.out: direct_arg_test direct_arg_test.inp
$(MDB_STD) ./direct_arg_test < direct_arg_test.inp \
> direct_arg_test.out 2>&1
double_print.out: double_print double_print.inp
$(MDB_STD) ./double_print < double_print.inp > double_print.out 2>&1

View File

@@ -0,0 +1,18 @@
E1: C1 CALL pred direct_arg_test.main/2-0 (det) direct_arg_test.m:48
mdb> echo on
Command echo enabled.
mdb> context none
Contexts will not be printed.
mdb> register --quiet
mdb> break get_maybe_t
0: + stop interface pred direct_arg_test.get_maybe_t/1-0 (det)
mdb> continue
E2: C2 CALL pred direct_arg_test.get_maybe_t/1-0 (det)
mdb> finish
E3: C2 EXIT pred direct_arg_test.get_maybe_t/1-0 (det)
mdb> print MaybeT
MaybeT (arg 1) yes_t(t(1, 2, 43))
mdb> print MaybeT^1
MaybeT (arg 1)^1 t(1, 2, 43)
mdb> continue
yes_t(t(1, 2, 43))

View File

@@ -0,0 +1,10 @@
echo on
context none
register --quiet
break get_maybe_t
continue
finish
print MaybeT
print MaybeT^1
continue

View File

@@ -0,0 +1,56 @@
%---------------------------------------------------------------------------%
% vim: ts=4 sw=4 et ft=mercury
%---------------------------------------------------------------------------%
%
% This program tests whether the debugger can handle printing
% the value of a direct_arg argument.
%
% This is a regression test. With versions of Mercury before 2026 mar 13,
% executing this program with mdb
%
% - could handle stopping at the EXIT port of get_maybe_t
% and printing "MaybeT",
%
% - but could NOT handle printing "MaybeT^1".
%
% The symptom was a core dump. The cause was that
%
% - the argument of the yes_t function is a direct_arg, which is means that
% this argument is not in an argument vector in the heap, yet
%
% - MR_arg in runtime/mercury_ml_expand_body.h gave MR_select_specified_subterm
% in trace/mercury_trace_vars.c a non-NULL value of word_sized_arg_ptr,
% which MR_select_specified_subterm interpreted to mean that it should be
% treated as a word-sized arg in a heap argument vector.
%
%---------------------------------------------------------------------------%
:- module direct_arg_test.
:- interface.
:- import_module io.
:- pred main(io::di, io::uo) is det.
%---------------------------------------------------------------------------%
:- implementation.
%---------------------------------------------------------------------------%
:- type maybe_t
---> no_t
; yes_t(direct_arg :: t).
:- type t
---> t(int, int, int).
%---------------------------------------------------------------------------%
main(!IO) :-
get_maybe_t(MaybeT),
io.write_line(MaybeT, !IO).
:- pred get_maybe_t(maybe_t::out) is det.
get_maybe_t(MaybeT) :-
T = t(1, 2, 43),
MaybeT = yes_t(T).

View File

@@ -1,7 +1,7 @@
// vim: ts=4 sw=4 expandtab ft=c
// Copyright (C) 1998-2006 The University of Melbourne.
// Copyright (C) 2017-2018 The Mercury team.
// Copyright (C) 2017-2018, 2020-2023, 2026 The Mercury team.
// This file is distributed under the terms specified in COPYING.LIB.
// mercury_trace_browse.c
@@ -13,9 +13,6 @@
// Some header files refer to files automatically generated by the Mercury
// compiler for modules in the browser and library directories.
//
// XXX Figure out how to prevent these names from encroaching on the user's
// name space.
#include "mercury_imp.h"
#include "mercury_deep_copy.h"

View File

@@ -1,7 +1,7 @@
// vim: ts=4 sw=4 expandtab ft=c
// Copyright (C) 1999-2011 The University of Melbourne.
// Copyright (C) 2014, 2016, 2018, 2025 The Mercury team.
// Copyright (C) 2014, 2016, 2018, 2023, 2025-2026 The Mercury team.
// This file is distributed under the terms specified in COPYING.LIB.
// This file contains the code for managing information about the
@@ -1859,6 +1859,10 @@ MR_select_specified_subterm(char *path, MR_TypeInfo type_info, MR_Word *value,
return NULL;
}
#if 0
fprintf(stderr, "ti %p, val %lx, path %s\n", type_info, *value, path);
#endif
while (*path != '\0') {
old_path = path;
@@ -1902,6 +1906,10 @@ MR_select_specified_subterm(char *path, MR_TypeInfo type_info, MR_Word *value,
{
type_info = arg_type_info;
if (word_sized_arg_ptr == NULL) {
#if 0
fprintf(stderr, "arg_ti %p, arg_val %lx, wsap NULL\n",
arg_type_info, arg_value);
#endif
// XXX This is *very* strange code. However, this issue
// was not brought up during review in 2011, from Jun 27
// to Jul 5.
@@ -1911,15 +1919,27 @@ MR_select_specified_subterm(char *path, MR_TypeInfo type_info, MR_Word *value,
((MR_Word *) storage)[0] = arg_value;
value = (MR_Word *) storage;
} else {
#if 0
fprintf(stderr, "arg_ti %p, arg_val %lx, wsap * %lx, wsap %p\n",
arg_type_info, arg_value,
*word_sized_arg_ptr, word_sized_arg_ptr);
#endif
value = word_sized_arg_ptr;
}
#if 0
fprintf(stderr, "new_ti %p, new_val %lx\n", type_info, *value);
#endif
} else {
return old_path;
}
}
*sub_value = value;
*sub_type_info = type_info;
*sub_value = value;
#if 0
fprintf(stderr, "sub_ti %p, sub_val %lx\n", type_info, *value);
#endif
return NULL;
}