Files
mercury/util/mdemangle.c
Zoltan Somogyi af70cb4f84 Use . as the module qualifier.
We stopped using : as the module qualifier decades ago.
2023-03-19 00:26:21 +11:00

1180 lines
35 KiB
C

//---------------------------------------------------------------------------//
// vim: ft=c ts=4 sw=4 et
//---------------------------------------------------------------------------//
//
// Copyright (C) 1995-2006, 2008 The University of Melbourne.
// This file may only be copied under the terms of the GNU General
// Public License - see the file COPYING in the Mercury distribution.
//
// File: mdemangle.c
// Author: fjh
//
// A mercury symbol demangler.
// This is used to convert error messages from the linker back
// into a form that users can understand.
//
// This is implemented in C to minimize startup time and memory usage.
//
// BEWARE:
// This code is duplicated in profiler/demangle.m and profiler/mdemangle.m.
// Any changes here will need to be duplicated there and vice versa.
//
//---------------------------------------------------------------------------//
// mercury_std.h includes mercury_regs.h, and must precede system headers.
#include "mercury_std.h"
#include <ctype.h>
#include <string.h>
#include <stdlib.h>
#include <stdio.h>
// We used this for the size of fixed-length buffers in a few places <sigh>.
#define MAX_SYMBOL_LENGTH 1000
static void demangle(const char *name);
static const char *strip_module_name(char **start_ptr, char *end,
const char *special_prefixes[],
const char *special_suffixes[]);
static MR_bool check_for_suffix(char *start, char *position,
const char *suffix, int sizeof_suffix, int *mode_num2);
static char *fix_mangled_ascii(char *str, char **end);
static MR_bool fix_mangled_special_case(char *str, char **end);
static MR_bool find_double_underscore(char **str, char *end);
static MR_bool cut_trailing_integer(char *str, char **end, int *num);
static MR_bool cut_trailing_underscore_integer(char *str,
char **end, int *num);
static MR_bool strip_prefix(char **str, const char *prefix);
static MR_bool strip_suffix(const char *str, char **end,
const char *suffix);
static MR_bool strip_leading_integer(char **start_ptr, int *num);
// Bloody SunOS 4.x doesn't have memmove()...
// Using memcpy() may not work, but it doesn't really matter
// if the demangler doesn't work 100% correctly on SunOS 4.x.
#ifndef MR_HAVE_MEMMOVE
#define memmove memcpy
#endif
// This option indicates whether we should output verbose
// explanations of linker error messages.
MR_bool explain_link_errors = MR_FALSE;
// This variable gets set if the symbols MR_grade_* or MR_mercury_grade
// were found. If it gets set, then we print out the error message below.
char *found_grade_symbol = NULL;
const char probably_grade_error[] =
"Mercury Linker:\n"
"\tNote: the symbol `%s' was mentioned.\n"
"\tAny link errors are most likely due to linking together object\n"
"\tfiles compiled with different compilation model options.\n"
"\tTry doing `mmake clean' and then rebuilding.\n";
int
main(int argc, char **argv)
{
const char *progname = argv[0];
// We should use getopt_long(), but for one option, that is overkill.
while (argc > 1 && argv[1][0] == '-') {
if (strcmp(argv[1], "-e") == 0 ||
strcmp(argv[1], "--explain-link-errors") == 0)
{
explain_link_errors = MR_TRUE;
argc--, argv++;
} else if (strcmp(argv[1], "--") == 0) {
argc--, argv++;
break;
} else {
fprintf(stderr, "%s: unknown option `%s'\n", progname, argv[1]);
exit(1);
}
}
if (argc > 1) {
int i;
// Invoke demangle() on each command line argument.
for (i = 1; i < argc; i++) {
demangle(argv[i]);
putchar('\n');
}
} else {
// Copy stdin to stdout, calling demangle() for every valid
// C identifier in the input.
for (;;) {
char buf[MAX_SYMBOL_LENGTH];
size_t len;
int c;
len = 0;
c = getchar();
while (c != EOF && (isalnum(c) || c == '_')) {
if (len >= sizeof(buf) - 1) {
break;
}
buf[len++] = (char) c;
c = getchar();
}
if (len > 0) {
buf[len] = '\0';
demangle(buf);
fflush(stdout);
}
if (c == EOF) {
break;
}
putchar(c);
}
}
if (explain_link_errors && found_grade_symbol) {
printf(probably_grade_error, found_grade_symbol);
free(found_grade_symbol);
}
return 0;
}
// demangle():
// Convert a mangled Mercury identifier into human-readable form
// and then print it to stdout.
static void
demangle(const char *orig_name)
{
static const char entry[] = "_entry_";
static const char mercury[] = "mercury__";
static const char func_prefix[] = "fn__"; // added for functions
static const char unify1[] = "__Unify___";
static const char unify2[] = "__Unify____";
static const char compare1[] = "__Compare___";
static const char compare2[] = "__Compare____";
static const char index1[] = "__Index___";
static const char index2[] = "__Index____";
static const char introduced[] = "IntroducedFrom__";
static const char deforestation[] = "DeforestationIn__";
static const char accumulator[] = "AccFrom__";
static const char type_spec[] = "TypeSpecOf__";
static const char unused_arg[] = "UnusedArgs__";
static const char pred[] = "pred__";
static const char func[] = "func__";
static const char porf[] = "pred_or_func__";
// XXX This is out-of-date. The compiler now generates names
// such as UnusedArgs__p__[1].
static const char ua_suffix[] = "__ua"; // added by unused_args.m
static const char ua_suffix2[] = "__uab"; // added by unused_args.m
static const char ho_suffix[] = "__ho"; // added by higher_order.m
static const char mercury_common[] = "mercury_common_";
static const char mercury_data[] = "mercury_data_";
static const char type_ctor_layout[] = "type_ctor_layout_";
static const char type_ctor_info[] = "type_ctor_info_";
static const char type_ctor_functors[] = "type_ctor_functors_";
static const char base_typeclass_info[] = "base_typeclass_info_";
static const char underscores_base_typeclass_info[] =
"__base_typeclass_info_";
static const char common[] = "common";
static const char arity_string[] = "arity";
static const char MR_grade[] = "MR_grade_";
static const char MR_runtime_grade[] = "MR_runtime_grade";
static const char *trailing_context_1[] = {
introduced,
deforestation,
accumulator,
type_spec,
unused_arg,
unify1, compare1, index1,
NULL
};
static const char *trailing_context_1_hl_suffixes[] = {
ua_suffix,
ua_suffix2,
ho_suffix,
NULL
};
static const char *trailing_context_2[] = {
type_ctor_layout,
type_ctor_info,
type_ctor_functors,
common,
NULL
};
static const char *trailing_context_3[] = {
arity_string,
NULL
};
char name[MAX_SYMBOL_LENGTH];
char *start = name;
const char *module = ""; // module name
char *end = name + strlen(orig_name);
char *position; // current position in string
int mode_num;
int mode_num2;
int arity;
MR_bool high_level = MR_TRUE;
MR_bool matched = MR_FALSE;
const char *pred_or_func; // either "predicate" or "function"
MR_bool unused_args = MR_FALSE;
// does this proc have any unused arguments
MR_bool unused_args_extra = MR_FALSE;
// __uab suffix rather than __ua
int unused_args_num = 0;
MR_bool higher_order = MR_FALSE; // has this proc been specialized?
int higher_order_num = 0;
int internal = -1;
char *name_before_prefixes = NULL;
int lambda_line = 0;
int lambda_seq_number = 0;
char *lambda_pred_name = NULL;
char *end_of_lambda_pred_name = NULL;
const char *lambda_kind = NULL;
enum { ORDINARY, UNIFY, COMPARE, INDEX,
LAMBDA, DEFORESTATION, ACCUMULATOR, TYPE_SPEC } category;
enum { COMMON, INFO, LAYOUT, FUNCTORS } data_category;
const char *class_name;
int class_arity;
char class_arg_buf[MAX_SYMBOL_LENGTH];
int class_arg_num;
const char *class_arg;
const char *type_spec_sub;
// Copy orig_name to a local buffer which we can modify,
// making sure that we don't overflow the buffer.
if (strlen(orig_name) >= sizeof(name)) {
goto too_long;
}
strcpy(name, orig_name);
// Skip any leading underscore inserted by the C compiler
// (but don't skip it if it came from the `_entry_' prefix).
if (*start == '_' && strncmp(start, entry, strlen(entry)) != 0) {
start++;
}
// Check for `MR_grade_*' and `MR_runtime_grade'.
if (strncmp(start, MR_grade, strlen(MR_grade)) == 0 ||
strcmp(start, MR_runtime_grade) == 0)
{
if (found_grade_symbol == NULL) {
found_grade_symbol = malloc(strlen(start) + 1);
if (found_grade_symbol != NULL) {
strcpy(found_grade_symbol, start);
}
}
goto wrong_format;
}
// Skip the `_entry_' prefix, if any.
strip_prefix(&start, entry);
// Strip off the `mercury__' prefix, if any.
if (strip_prefix(&start, mercury)) {
matched = MR_TRUE;
}
// Code for dealing with predicate symbols.
// Get integer from end of string (it might be the mode number,
// it might be the internal label number). We'll assume its mode
// number for the moment.
if (!cut_trailing_integer(start, &end, &mode_num)) {
goto not_plain_mercury;
}
if (end == start) {
goto not_plain_mercury;
}
// If we got to an `i', that means it is an internal label of the form
// `mercury__append_3_0_i1'. In that case, save the internal label number
// and then get the mode number.
if (*--end == 'i') {
internal = mode_num;
if (end == start || *--end != '_') {
goto not_plain_mercury;
}
if (!cut_trailing_underscore_integer(start, &end, &mode_num)) {
goto not_plain_mercury;
}
}
if (end == start) {
goto not_plain_mercury;
}
// Strip off the `fn__' prefix, if any.
if (strip_prefix(&start, func_prefix)) {
high_level = MR_FALSE;
pred_or_func = "function";
} else if (strip_suffix(start, &end, "_f")) {
high_level = MR_TRUE;
matched = MR_TRUE;
pred_or_func = "function";
} else if (strip_suffix(start, &end, "_p")) {
high_level = MR_TRUE;
matched = MR_TRUE;
pred_or_func = "predicate";
} else {
// It is not a function. But it could be either an LLDS predicate,
// or an MLDS compiler-generated predicate.
high_level = (strstr(start, unify2) ||
strstr(start, compare2) ||
strstr(start, index2));
pred_or_func = "predicate";
}
if (end == start) {
goto not_plain_mercury;
}
// Scan back past the arity number and then parse it.
if (!cut_trailing_underscore_integer(start, &end, &arity)) {
goto not_plain_mercury;
}
if (high_level) {
module = strip_module_name(&start, end,
trailing_context_1, trailing_context_1_hl_suffixes);
}
// Now start processing from the start of the string again. Check whether
// the start of the string matches the name of one of the special
// compiler-generated predicates; if so, set the `category' to the
// appropriate value and then skip past the prefix.
if (strip_prefix(&start, unify1)) {
category = UNIFY;
} else if (strip_prefix(&start, compare1)) {
category = COMPARE;
if (mode_num != 0) goto not_plain_mercury;
} else if (strip_prefix(&start, index1)) {
category = INDEX;
if (mode_num != 0) goto not_plain_mercury;
} else {
category = ORDINARY;
// For ordinary predicates, we should have matched against something
// by now --
// either the "mercury__" prefix, for LLDS mangling,
// or the "_f" or "_p" suffix, for MLDS mangling.
if (!matched) {
goto not_plain_mercury;
}
}
if (category != ORDINARY && start[0] == '_') {
start++;
}
// Fix any ascii codes mangled in the predicate name.
start = fix_mangled_ascii(start, &end);
// Process the mangling introduced by unused_args.m.
// This involves stripping off the `__ua<m>' or `__uab<m>' added to
// the end of the predicate/function name, where m is the mode number.
position = end; // save end of name
do {
if (position == start) {
goto wrong_format;
}
position--;
} while (MR_isdigit(*position));
// Get the mode number.
if (check_for_suffix(start, position, ua_suffix,
sizeof(ua_suffix), &mode_num2))
{
unused_args = MR_TRUE;
unused_args_extra = MR_FALSE;
unused_args_num = mode_num;
end = position + 1 - (sizeof(ua_suffix) - 1);
mode_num = mode_num2 % 10000;
} else if (check_for_suffix(start, position, ua_suffix2,
sizeof(ua_suffix2), &mode_num2))
{
unused_args = MR_TRUE;
unused_args_extra = MR_TRUE;
unused_args_num = mode_num;
end = position + 1 - (sizeof(ua_suffix2) - 1);
mode_num = mode_num2 % 10000;
}
// Process the mangling introduced by higher_order.m.
// This involves stripping off the `__ho<n>' where
// n is a unique identifier for this specialized version
position = end;
do {
if (position == start) {
goto wrong_format;
}
position--;
} while (MR_isdigit(*position));
if (check_for_suffix(start, position, ho_suffix,
sizeof(ho_suffix), &higher_order_num))
{
end = position + 1 - (sizeof(ho_suffix) - 1);
higher_order = MR_TRUE;
}
// Cut off the string before the start of the arity number,
// and the unused_args and specialization information,
// i.e. at the end of the predicate name or type name.
*end = '\0';
// Make sure special predicates with unused_args
// are reported correctly.
if (unused_args && category != ORDINARY) {
if (!cut_trailing_integer(start, &end, &arity)) {
goto wrong_format;
}
}
if (!high_level) {
module = strip_module_name(&start, end, trailing_context_1, NULL);
}
// Look for "IntroducedFrom" or "DeforestationIn" or "AccFrom"
// or "TypeSpecOf".
// XXX This don't yet handle multiple prefixes. If we get an error after
// this point, just treat predicate name as an ordinary predicate.
name_before_prefixes = start;
if (category == ORDINARY) {
if (strip_prefix(&start, introduced)) {
category = LAMBDA;
} else if (strip_prefix(&start, deforestation)) {
category = DEFORESTATION;
} else if (strip_prefix(&start, accumulator)) {
category = ACCUMULATOR;
} else if (strip_prefix(&start, type_spec)) {
category = TYPE_SPEC;
}
}
if (category == LAMBDA || category == DEFORESTATION ||
category == ACCUMULATOR || category == TYPE_SPEC)
{
if (strip_prefix(&start, pred)) {
lambda_kind = "pred";
} else if (strip_prefix(&start, func)) {
lambda_kind = "func";
} else if (category == TYPE_SPEC && strip_prefix(&start, porf)) {
lambda_kind = "";
} else {
goto wrong_format;
}
lambda_pred_name = start;
if (!find_double_underscore(&start, end)) {
category = ORDINARY;
start = name_before_prefixes;
} else {
end_of_lambda_pred_name = start;
start += 2;
}
if (category == TYPE_SPEC) {
if (start < end && *start == '[') {
int nest_level;
nest_level = 1;
type_spec_sub = start;
start++;
// Handle matched brackets in type names.
while (start < end) {
if (*start == '[') {
nest_level++;
}
if (*start == ']') {
nest_level--;
}
if (nest_level == 0) {
*(start + 1) = '\0';
break;
}
start++;
}
if (nest_level != 0) {
category = ORDINARY;
start = name_before_prefixes;
} else {
// The compiler adds a redundant mode number to the
// predicate name to avoid creating two predicates
// with the same name (deep profiling doesn't like that).
// It isn't used here, so we just ignore it. The compiler
// also adds a version number for the argument order used
// for specialized versions, which can also be ignored.
*end_of_lambda_pred_name = '\0';
start = lambda_pred_name;
}
} else {
category = ORDINARY;
start = name_before_prefixes;
}
} else if (category != ORDINARY) {
lambda_line = 0;
if (start >= end || !MR_isdigit(*start)) {
category = ORDINARY;
start = name_before_prefixes;
}
while (start < end && MR_isdigit(*start)) {
lambda_line = lambda_line * 10 + (*start - '0');
start++;
}
if (strip_prefix(&start, "__")) {
if (start < end && MR_isdigit(*start)) {
lambda_seq_number = 0;
while (start < end && MR_isdigit(*start)) {
lambda_seq_number =
lambda_seq_number * 10 + (*start - '0');
start++;
}
*end_of_lambda_pred_name = '\0';
} else {
category = ORDINARY;
start = name_before_prefixes;
}
} else {
category = ORDINARY;
start = name_before_prefixes;
}
}
}
// Now, finally, we can print the demangled symbol name.
printf("<");
switch(category) {
case UNIFY:
printf("unification predicate for type '%s.%s'/%d mode %d",
module, start, arity, mode_num);
break;
case COMPARE:
printf("compare/3 predicate for type '%s.%s'/%d",
module, start, arity);
break;
case INDEX:
printf("index/2 predicate for type '%s.%s'/%d",
module, start, arity);
break;
case LAMBDA:
printf("%s goal (#%d) from '%s' in module '%s' line %d",
lambda_kind, lambda_seq_number,
lambda_pred_name, module, lambda_line);
break;
case ACCUMULATOR:
printf("accumulator procedure from '%s' in module '%s' line %d",
lambda_pred_name, module, lambda_line);
break;
case DEFORESTATION:
printf("deforestation procedure (#%d) from '%s' "
"in module '%s' line %d",
lambda_seq_number, lambda_pred_name,
module, lambda_line);
break;
case TYPE_SPEC:
default:
if (*module == '\0') {
printf("%s '%s'/%d mode %d",
pred_or_func, start, arity, mode_num);
} else {
printf("%s '%s.%s'/%d mode %d",
pred_or_func, module, start, arity, mode_num);
}
}
if (category == TYPE_SPEC) {
printf(" (type specialized %s)", type_spec_sub);
}
if (higher_order) {
printf(" (specialized [#%d])", higher_order_num);
}
if (unused_args) {
if (unused_args_extra) {
printf(" (minus extra unused args [#%d])", unused_args_num);
} else {
printf(" (minus unused args [#%d])", unused_args_num);
}
}
if (internal != -1) {
printf(" label %d", internal);
}
printf(">");
return;
// Code to deal with mercury_data items.
not_plain_mercury:
// Undo any in-place modifications done while trying to demangle
// predicate names.
strcpy(name, orig_name);
start = name;
end = name + strlen(name);
// Skip any leading underscore inserted by the C compiler.
if (*start == '_') {
start++;
}
if (strip_prefix(&start, mercury_common)) {
if (!strip_leading_integer(&start, &arity)) {
goto wrong_format;
}
printf("<shared constant number %d>", arity);
return;
}
if (strip_prefix(&start, mercury_data)) {
// LLDS
high_level = MR_FALSE;
if (strip_prefix(&start, base_typeclass_info)) {
goto typeclass_info;
}
// Also try the old format, in case we're demangling old files.
if (strip_prefix(&start, underscores_base_typeclass_info)) {
goto typeclass_info;
}
} else {
// MLDS
high_level = MR_TRUE;
if (strip_prefix(&start, base_typeclass_info)) {
goto typeclass_info;
}
strip_prefix(&start, mercury);
}
module = strip_module_name(&start, end, trailing_context_2, NULL);
if (high_level) {
// For MLDS, the module name gets duplicated (XXX why?)
// So here we must replace `foo.foo' with just `foo'.
size_t half_len;
half_len = strlen(module) / 2;
if (strncmp(module, module + half_len + 1, half_len) != 0) {
goto wrong_format;
}
module += half_len + 1;
}
if (strip_prefix(&start, type_ctor_info)) {
data_category = INFO;
if (!cut_trailing_underscore_integer(start, &end, &arity)) {
goto wrong_format;
}
} else if (strip_prefix(&start, type_ctor_layout)) {
data_category = LAYOUT;
if (!cut_trailing_underscore_integer(start, &end, &arity)) {
goto wrong_format;
}
} else if (strip_prefix(&start, type_ctor_functors)) {
data_category = FUNCTORS;
if (!cut_trailing_underscore_integer(start, &end, &arity)) {
goto wrong_format;
}
} else if (strip_prefix(&start, common)) {
data_category = COMMON;
if (!cut_trailing_underscore_integer(start, &end, &arity)) {
goto wrong_format;
}
} else {
goto wrong_format;
}
start = fix_mangled_ascii(start, &end);
switch (data_category) {
case INFO:
if (*module == '\0') {
printf("<type_ctor_info for type '%s'/%d>",
start, arity);
} else {
printf("<type_ctor_info for type '%s.%s'/%d>",
module, start, arity);
}
break;
case LAYOUT:
if (*module == '\0') {
printf("<type_ctor_layout for type '%s'/%d>",
start, arity);
} else {
printf("<type_ctor_layout for type '%s.%s'/%d>",
module, start, arity);
}
break;
case FUNCTORS:
if (*module == '\0') {
printf("<type_ctor_functors for type '%s'/%d>",
start, arity);
} else {
printf("<type_ctor_functors for type '%s.%s'/%d>",
module, start, arity);
}
break;
case COMMON:
printf("<shared constant number %d for module %s>",
arity, module);
break;
default:
goto wrong_format;
}
return;
typeclass_info:
// Parse the class name and class arity, which have the following layout:
// <module-qualified class name>__arity<arity>__
class_name = strip_module_name(&start, end, trailing_context_3, NULL);
// XXX fix_mangled_ascii()
if (!(strip_prefix(&start, arity_string)
&& strip_leading_integer(&start, &class_arity)
&& strip_prefix(&start, "__")))
{
goto wrong_format;
}
// Parse the class argument types, which each have the following layout:
// <module-qualified type name>__arity<arity>__
//
// We store the human-readable formatted output in class_arg_buf as we go.
fix_mangled_ascii(start, &end);
strcpy(class_arg_buf, "");
for (class_arg_num = 0; class_arg_num < class_arity; class_arg_num++) {
if (class_arg_num != 0) {
strcat(class_arg_buf, ", ");
}
class_arg = strip_module_name(&start, end, trailing_context_3, NULL);
if (!(strip_prefix(&start, arity_string)
&& strip_leading_integer(&start, &arity)
&& strip_prefix(&start, "__")))
{
goto wrong_format;
}
sprintf(class_arg_buf + strlen(class_arg_buf), "%s/%d",
class_arg, arity);
}
// Now print the results.
printf("<instance declaration for %s(%s)>", class_name, class_arg_buf);
return;
wrong_format:
strcpy(name, orig_name);
start = name;
end = name + strlen(name);
start = fix_mangled_ascii(start, &end);
fputs(name, stdout);
return;
too_long:
fputs(orig_name, stdout);
return;
} // end demangle()
// Remove a module name prefix.
// Just keep munching up double-underscores until we get to something
// that matches the specified trailing context, at which point we stop,
// or until there are no double-underscores left.
static const char *
strip_module_name(char **start_ptr, char *end,
const char *special_prefixes[], const char *special_suffixes[])
{
const char *module; // module name
char *module_end; // end of the module name
char *next_double_underscore;
char *start;
start = *start_ptr;
// Strip off the module name
module = start;
module_end = start;
while ((next_double_underscore = strstr(start, "__")) != NULL) {
int len;
int i;
MR_bool stop;
// Check for special cases.
stop = MR_FALSE;
for (i = 0; special_prefixes[i] != NULL; i++) {
if (strncmp(start, special_prefixes[i],
strlen(special_prefixes[i])) == 0)
{
stop = MR_TRUE;
}
}
for (i = 0; special_suffixes != NULL && special_suffixes[i] != NULL;
i++)
{
if (strncmp(next_double_underscore, special_suffixes[i],
strlen(special_suffixes[i])) == 0)
{
stop = MR_TRUE;
}
}
if (stop) {
break;
}
len = next_double_underscore - start;
if (module != module_end) {
// Append a module qualifier, and shift the module name
// into the right place.
*module_end = '.';
module_end++;
memmove(module_end, start, len);
}
module_end += len;
start = next_double_underscore + 2;
}
if (module == module_end) {
module = "";
} else {
*module_end = '\0';
}
*start_ptr = start;
return module;
}
// Remove the prefix from a string, if it has it.
// Returns MR_TRUE if the string has that prefix, and *str will then point
// to the rest of that string. If the string doesn't have that prefix,
// *str will be unchanged, and the function will return MR_FALSE.
static MR_bool
strip_prefix(char **str, const char *prefix)
{
size_t len;
len = strlen(prefix);
if (strncmp(*str, prefix, len) == 0) {
*str += len;
return MR_TRUE;
}
return MR_FALSE;
}
// Remove the suffix from a string, if it has it.
// Returns MR_TRUE if the string between start and *end has the specified
// suffix, and sets *end to point to the beginning of the suffix.
static MR_bool
strip_suffix(const char *start, char **end, const char *suffix)
{
size_t len;
len = strlen(suffix);
if (*end - start >= len && strncmp(*end - len, suffix, len) == 0) {
*end -= len;
return MR_TRUE;
}
return MR_FALSE;
}
// If the string pointed to by *start_ptr starts with an integer,
// then advance *start_ptr past the leading integer, store the value
// of the integer in the int pointed to by `num', and return true;
// otherwise leave *start_ptr unchanged and return false.
// (The string itself is always left unchanged.)
static MR_bool
strip_leading_integer(char **start_ptr, int *num)
{
char *start;
char save_char;
MR_bool got_int;
start = *start_ptr;
while(MR_isdigit(*start)) {
start++;
}
if (start == *start_ptr) {
return MR_FALSE;
}
save_char = *start;
*start = '\0';
got_int = (sscanf(*start_ptr, "%d", num) == 1);
*start = save_char;
if (got_int) {
*start_ptr = start;
return MR_TRUE;
} else {
return MR_FALSE;
}
}
// Remove trailing integer (at the supplied `real_end' of the string),
// and return it in the int pointed to by `num'. We return true if there is
// an integer at the end, and false if there is not. If we return false,
// the string will not be cut. `real_end' is updated with the new end
// of the string.
//
// Requires *str to contain more than just a number; doesn't work
// if the trailing integer starts at the first character of str.
static MR_bool
cut_trailing_integer(char *str, char **real_end, int *num)
{
char *end;
end = *real_end;
do {
if (end == str) {
return MR_FALSE;
}
end--;
} while (MR_isdigit(*end));
if (sscanf(end + 1, "%d", num) != 1) {
return MR_FALSE;
}
*++end = '\0';
*real_end = end;
return MR_TRUE;
}
// Same as cut_trailing_integer, but move end back past the underscore as well.
// If cut_trailing_underscore_integer returns MR_TRUE, the `real_end' will be
// moved back before the underscore and the integer. If it returns MR_FALSE,
// the `real_end' is unchanged.
static MR_bool
cut_trailing_underscore_integer(char *str, char **real_end, int *num)
{
char *end;
end = *real_end;
if (!cut_trailing_integer(str, &end, num)) {
return MR_FALSE;
}
if (end == str || *(--end) != '_') {
return MR_FALSE;
}
*end = '\0';
*real_end = end;
return MR_TRUE;
}
// Scan for `__' and return a pointer to the first `_'.
// Returns MR_TRUE if `__' was found, MR_FALSE otherwise.
static MR_bool
find_double_underscore(char **start, char *end)
{
char *str;
str = *start;
while (*str != '_' || *(str + 1) != '_') {
if (str == end) {
return MR_FALSE;
}
str++;
}
*start = str;
return MR_TRUE;
}
// The compiler changes all names starting with `f_' so that they start with
// `f__' instead, and uses names starting with `f_' for mangled names
// which are either descriptions (such as `f_greater_than' for `>')
// or sequences of decimal representations of ASCII codes separated by
// underscores. If the name starts with `f__', we must change it back to
// start with `f_'. Otherwise, if it starts with `f_' we must convert
// the mnemonic or list of ASCII codes back into an identifier.
static char *
fix_mangled_ascii(char *str, char **real_end)
{
char *end;
end = *real_end;
// If it starts with `f__', replace that with `f_'.
if (strncmp(str, "f__" , 3) == 0) {
str++;
*str = 'f';
return str;
}
// If it starts with `f_' followed by a mnemonic description,
// then replace that with its unmangled version
if (strncmp(str, "f_", 2) == 0 &&
fix_mangled_special_case(str, real_end))
{
return str;
}
// Otherwise, if it starts with `f_' we must convert the list of
// ASCII codes back into an identifier.
if (strncmp(str, "f_", 2) == 0) {
char buf[MAX_SYMBOL_LENGTH];
char *num;
int count;
num = str + 2;
count = 0;
while (num < end) {
char *next_num;
next_num = num;
while (MR_isdigit(*next_num)) {
next_num++;
}
if (*next_num != '_' && *next_num != '\0') {
break;
}
*next_num = '\0';
buf[count++] = atoi(num);
num = next_num + 1;
}
// Copy anything after the mangled string.
while (num < end) {
buf[count++] = *num++;
}
buf[count] = '\0';
strcpy(str, buf);
*real_end = str + count;
}
return str;
}
static MR_bool
fix_mangled_special_case(char *str, char **real_end)
{
static const struct {
const char *mangled_name;
const char *unmangled_name;
} translations[] = {
// Beware: we assume that the unmangled name is always shorter
// than the mangled name.
{ "f_not_equal", "\\=" },
{ "f_greater_or_equal", ">=" },
{ "f_less_or_equal", "=<" },
{ "f_equal", "=" },
{ "f_less_than", "<" },
{ "f_greater_than", ">" },
{ "f_plus", "+" },
{ "f_times", "*" },
{ "f_minus", "-" },
{ "f_slash", "/" },
{ "f_comma", "," },
{ "f_semicolon", ";" },
{ "f_cut", "!" },
{ "f_tuple", "{}" },
{ "f_cons", "[|]" },
{ "f_nil", "[]" }
};
const int num_translations =
sizeof(translations) / sizeof(translations[0]);
int i;
// Check for the special cases listed in the table above.
for (i = 0; i < num_translations; i++) {
const char *mangled;
size_t mangled_len;
mangled = translations[i].mangled_name;
mangled_len = strlen(mangled);
if (strncmp(str, mangled, mangled_len) == 0) {
const char *unmangled;
size_t unmangled_len;
size_t leftover_len;
unmangled = translations[i].unmangled_name;
unmangled_len = strlen(unmangled);
leftover_len = strlen(str) - mangled_len;
assert(unmangled_len <= mangled_len);
strcpy(str, unmangled);
memmove(str + unmangled_len, str + mangled_len, leftover_len + 1);
*real_end = str + unmangled_len + leftover_len;
return MR_TRUE;
}
}
return MR_FALSE;
}
static MR_bool
check_for_suffix(char *start, char *position, const char *suffix,
int sizeof_suffix, int *mode_num2)
{
const int suffix_len = sizeof_suffix - 1;
return (
position - suffix_len >= start
&& sscanf(position + 1, "%d", mode_num2) == 1
&& strncmp(position - suffix_len + 1, suffix, suffix_len) == 0
);
}
//---------------------------------------------------------------------------//