From e6ac077baef62c70821b5130f2de171b9e98bd1e Mon Sep 17 00:00:00 2001 From: Fergus Henderson Date: Fri, 5 Dec 1997 15:58:34 +0000 Subject: [PATCH] Add support for memory profiling. Estimated hours taken: 40 (+ unknown time by Zoltan) Add support for memory profiling. (A significant part of this change is actuallly Zoltan's work. Zoltan did the changes to the compiler and a first go at the changes to the runtime and library. I rewrote much of Zoltan's changes to the runtime and library, added support for the new options/grades, added code to interface with mprof, did the changes to the profiler, and wrote the documentation.) [TODO: add test cases.] NEWS: Mention support for memory profiling. runtime/mercury_heap_profile.h: runtime/mercury_heap_profile.c: New files. These contain code to record heap profiling information. runtime/mercury_heap.h: Add new macros incr_hp_msg(), tag_incr_hp_msg(), incr_hp_atomic_msg(), and tag_incr_hp_atomic_msg(). These are like the non-`msg' versions, except that if PROFILE_MEMORY is defined, they also call MR_record_allocation() from mercury_heap_profile.h to record heap profiling information. Also, fix up the indentation in lots of places. runtime/mercury_prof.h: runtime/mercury_prof.c: Added code to dump out memory profiling information to files `Prof.MemoryWords' and `Prof.MemoryCells' (for use by mprof). Change the format of the `Prof.Counts' file so that the first line says what it is counting, the units, and a scale factor. Prof.MemoryWords and Prof.MemoryCells can thus have exactly the same format as Prof.Counts. Also cleaned up the interface to mercury_prof.c a bit, and did various other minor cleanups -- indentation changes, changes to use MR_ prefixes, additional comments, etc. runtime/mercury_prof_mem.h: runtime/mercury_prof_mem.c: Rename prof_malloc() as MR_prof_malloc(). Rename prof_make() as MR_PROF_NEW() and add MR_PROF_NEW_ARRAY(). runtime/mercury_wrapper.h: Minor modifications to reflect the new interface to mercury_prof.c. runtime/mercury_wrapper.c: runtime/mercury_label.c: Rename the old `-p' (primary cache size) option as `-C'. Add a new `-p' option to disable profiling. runtime/Mmakefile: Add mercury_heap_profile.[ch]. Put the list of files in alphabetical order. Delete some obsolete stuff for supporting `.mod' files. Mention that libmer_dll.h and libmer_globals.h are produced by Makefile.DLLs. runtime/mercury_imp.h: Mention that libmer_dll.h is produced by Makefile.DLLs. runtime/mercury_dummy.c: Change a comment to refer to libmer_dll.h rather than libmer_globals.h. compiler/llds.m: Add a new field to `create' and `incr_hp' instructions holding the name of the type, for heap profiling. compiler/unify_gen.m: Initialize the new field of `create' instructions with the appropriate type name. compiler/llds_out.m: Output incr_hp_msg() / tag_incr_hp_msg() instead of incr_hp() / tag_incr_hp(). compiler/*.m: Minor changes to most files in the compiler back-end to accomodate the new field in `incr_hp' and `create' instructions. library/io.m: Add `io__report_full_memory_stats'. library/benchmarking.m: Add `report_full_memory_stats'. This uses the information saved by runtime/mercury_heap_profile.{c,h} to print out a report of memory usage by procedures and by types. Also modify `report_stats' to print out some of that information. compiler/mercury_compile.m: If `--statistics' is enabled, call io__report_full_memory_stats at the end of main/2. This will print out full memory statistics, if the compiler was compiled with memory profiling enabled. compiler/options.m: compiler/handle_options.m: runtime/mercury_grade.h: scripts/ml.in: scripts/mgnuc.in: scripts/init_grade_options.sh-subr: scripts/parse_grade_options.sh-subr: Add new option `--memory-profiling' and new grade `.memprof'. Add `--time-profiling' as a new synonym for `--profiling'. Also add `--profile-memory' for more fine-grained control: `--memory-profiling' implies both `--profile-memory' and `--profile-calls'. scripts/mprof_merge_runs: Update to handle the new format of Prof.Counts and to also merge Prof.MemoryWords and Prof.MemoryCells. profiler/options.m: profiler/mercury_profile.m: Add new options `--profile memory-words' (`-m'), `--profile memory-cells' (`-M') and `--profile time' (`-t'). Thes options make the profiler select a different count file, Prof.MemoryWords or Prof.MemoryCells instead of Prof.Counts. specific to time profiling. profiler/read.m: profiler/process_file.m: profiler/prof_info.m: profiler/generate_output.m: Update to handle the new format of the counts file. When reading the counts file, look at the first line of the file to determine what is being profiled. profiler/globals.m: Add a new global variable `what_to_profile' that records what is being profiled. profiler/output.m: Change the headings to reflect what is being profiled. doc/user_guide.texi: Document memory profiling. Document new options. doc/user_guide.texi: compiler/options.m: Comment out the documentation for `.proftime'/`--profile-time', since doing time and call profiling seperately doesn't work, because the code addresses change when you recompile with a different grade. Ditto for `.profmem'/`--profile-memory'. Also comment out the documentation for `.profcalls'/`--profile-calls', since it is redundant -- `.memprof' produces the same information and more. configure.in: Build a `.memprof' grade. (Hmm, should we do this only if `--enable-all-grades' is specified?) Don't ever build a `.profcalls' grade. --- library/benchmarking.m | 496 +++++++++++++++++++++++++--- library/io.m | 11 +- profiler/generate_output.m | 39 +-- profiler/globals.m | 34 +- profiler/mercury_profile.m | 3 +- profiler/options.m | 72 +++- profiler/output.m | 178 ++++++---- profiler/process_file.m | 66 ++-- profiler/prof_info.m | 13 +- profiler/read.m | 69 +++- runtime/Mmakefile | 63 ++-- runtime/mercury_dummy.c | 2 +- runtime/mercury_grade.h | 34 +- runtime/mercury_heap.h | 284 +++++++++------- runtime/mercury_heap_profile.c | 123 +++++++ runtime/mercury_heap_profile.h | 181 ++++++++++ runtime/mercury_imp.h | 5 +- runtime/mercury_label.c | 7 +- runtime/mercury_prof.c | 266 ++++++++++----- runtime/mercury_prof.h | 68 +++- runtime/mercury_prof_mem.c | 10 +- runtime/mercury_prof_mem.h | 8 +- runtime/mercury_wrapper.c | 51 +-- runtime/mercury_wrapper.h | 2 + scripts/init_grade_options.sh-subr | 1 + scripts/mgnuc.in | 15 +- scripts/ml.in | 18 +- scripts/mprof_merge_runs | 33 +- scripts/parse_grade_options.sh-subr | 29 +- 29 files changed, 1672 insertions(+), 509 deletions(-) create mode 100644 runtime/mercury_heap_profile.c create mode 100644 runtime/mercury_heap_profile.h diff --git a/library/benchmarking.m b/library/benchmarking.m index f39279c2e..34177184b 100644 --- a/library/benchmarking.m +++ b/library/benchmarking.m @@ -20,10 +20,18 @@ % Declaratively, `report_stats' is the same as `true'. % It has the side-effect of reporting some memory and time usage statistics +% about the time period since the last call to report_stats to stdout. +% (Technically, every Mercury implementation must offer a mode of invocation +% which disables this side-effect.) + +:- pred report_stats is det. + +% Declaratively, `report_full_memory_stats' is the same as `true'. +% It has the side-effect of reporting a full memory profile % to stdout. (Technically, every Mercury implementation must offer % a mode of invocation which disables this side-effect.) -:- pred report_stats is det. +:- pred report_full_memory_stats is det. % benchmark_det(Pred, In, Out, Repeats, Time) is for benchmarking the % det predicate Pred. We call Pred with the input In and the output Out, @@ -51,13 +59,95 @@ :- pragma c_header_code(" -#include #include ""mercury_timing.h"" +extern void ML_report_stats(void); + +extern void ML_report_full_memory_stats(void); + +"). % end pragma c_header_code + +:- pragma c_code(report_stats, will_not_call_mercury, +" + ML_report_stats(); "). -:- pragma c_code(report_stats, will_not_call_mercury, " - int time_at_prev_stat; +:- pragma c_code(report_full_memory_stats, will_not_call_mercury, +" +#ifdef MEMORY_PROFILING + ML_report_full_memory_stats(); +#endif +"). + +%-----------------------------------------------------------------------------% + +:- pragma c_code(" + +#include +#include ""mercury_prof_mem.h"" +#include ""mercury_heap_profile.h"" + +#ifdef MEMORY_PROFILING + + #define MEMORY_PROFILE_SIZE 10 /* Profile the top 10 entries */ + + #define MAX_REPORT_LINES 10 /* Display the top 10 entries */ + + /* local types */ + + typedef struct ML_memprof_float_counter + { + double cells_at_period_end; + double words_at_period_end; + double cells_since_period_start; + double words_since_period_start; + } ML_memprof_float_counter; + + typedef struct ML_memprof_report_entry + { + const char *name; + ML_memprof_float_counter counter; + } ML_memprof_report_entry; + + /* static variables */ + + static ML_memprof_float_counter ML_overall_counter; + + /* local function declarations */ + + static void ML_update_counter(MR_memprof_counter *counter, + ML_memprof_float_counter *float_counter); + + static int ML_insert_into_table(const ML_memprof_report_entry *new_entry, + ML_memprof_report_entry *table, + int table_size, int next_slot) + + static int ML_memory_profile_top_table(MR_memprof_record *node, + ML_memprof_report_entry *table, + int size, int next_slot); + + static int ML_memory_profile_fill_table(MR_memprof_table *node, + ML_memprof_report_entry *table, int next_slot); + + static void ML_memory_profile_report(const ML_memprof_report_entry *, + int num_entries, bool complete); + + static int ML_memory_profile_compare_final(const void *, const void *); + +#endif + +void +ML_report_stats(void) +{ + int time_at_prev_stat; +#ifdef MEMORY_PROFILING + int num_table_entries; + ML_memprof_report_entry table[MEMORY_PROFILE_SIZE]; +#endif + + /* + ** Print timing and stack usage information + */ time_at_prev_stat = time_at_last_stat; time_at_last_stat = MR_get_user_cpu_miliseconds(); @@ -70,22 +160,338 @@ ((char *) maxfr - (char *) nondetstack_zone->min) / 1024.0 ); + /* + ** Print heap usage information. + */ + #ifdef CONSERVATIVE_GC fprintf(stderr, ""#GCs: %lu,\\n"" - ""Heap used since last GC: %.3fk, Total used: %.3fk]\\n"", + ""Heap used since last GC: %.3fk, Total used: %.3fk"", (unsigned long) GC_gc_no, GC_get_bytes_since_gc() / 1024.0, GC_get_heap_size() / 1024.0 ); #else fprintf(stderr, - ""Heap: %.3fk]\\n"", + ""Heap: %.3fk"", ((char *) hp - (char *) heap_zone->min) / 1024.0 ); #endif + +#ifdef MEMORY_PROFILING + + /* + ** Update the overall counter (this needs to be done first, + ** so that the percentages come out right). + */ + ML_update_counter(&MR_memprof_overall, &ML_overall_counter); + + /* + ** Print out the per-procedure memory profile (top N entries) + */ + num_table_entries = ML_memory_profile_top_table(MR_memprof_procs.root, + table, MEMORY_PROFILE_SIZE, 0); + fprintf(stderr, ""\\nMemory profile by procedure\\n""); + ML_memory_profile_report(table, num_table_entries, FALSE); + + /* + ** Print out the per-type memory profile (top N entries) + */ + num_table_entries = ML_memory_profile_top_table(MR_memprof_types.root, + table, MEMORY_PROFILE_SIZE, 0); + fprintf(stderr, ""\\nMemory profile by type\\n""); + ML_memory_profile_report(table, num_table_entries, FALSE); + + /* + ** Print out the overall memory usage. + */ + fprintf(stderr, + ""Overall memory usage:"" + ""+%8.8g %8.8g cells, +%8.8g %8.8g words\\n"", + ML_overall_counter.cells_since_period_start, + ML_overall_counter.cells_at_period_end, + ML_overall_counter.words_since_period_start, + ML_overall_counter.words_at_period_end + ); + +#endif + + fprintf(stderr, ""]\\n""); +} + +#ifdef MEMORY_PROFILING + +void +ML_report_full_memory_stats(void) +{ + int num_table_entries; + int table_size; + ML_memprof_report_entry *table; + + /* + ** Update the overall counter (this needs to be done first, + ** so that the percentages come out right). + */ + ML_update_counter(&MR_memprof_overall, &ML_overall_counter); + + /* + ** Allocate space for the table + */ + if (MR_memprof_proc.num_entries > MR_memprof_type.num_entries) { + table_size = MR_memprof_proc.num_entries; + } else { + table_size = MR_memprof_type.num_entries; + } + table = make_many(ML_memprof_report_entry, table_size); + + /* + ** Print the by-procedure memory profile + */ + num_table_entries = ML_memory_profile_fill_table(MR_memprof_procs.root, + table, 0); + qsort(table, MR_memprof_proc.num_entries, + sizeof(ML_memprof_report_entry), + ML_memory_profile_compare_final); + fprintf(stderr, ""\\nMemory profile by procedure\\n""); + fprintf(stderr, ""%-50s %-14s %s\\n"", + ""procedure label"", ""Cells"", ""Words""); + ML_memory_profile_report(table, num_table_entries, TRUE); + + /* + ** Print the by-type memory profile + */ + num_table_entries = ML_memory_profile_fill_table(MR_memprof_types.root, + table, 0); + qsort(table, MR_memprof_type.num_entries, + sizeof(ML_memprof_report_entry), + ML_memory_profile_compare_final); + fprintf(stderr, ""\\nMemory profile by type\\n""); + fprintf(stderr, ""%-50s %-14s %s\\n"", + ""procedure label"", ""Cells"", ""Words""); + ML_memory_profile_report(table, num_table_entries, TRUE); + + /* + ** Deallocate space for the table + */ + oldmem(table); + + /* + ** Print the overall memory usage + */ + fprintf(stderr, + ""\\nOverall memory usage: %8.8g cells, %8.8g words\\n"", + ML_overall_counter.cells_at_period_end, + ML_overall_counter.words_at_period_end + ); + +/* +** ML_update_counter(counter, float_counter): +** Copy the data for a period from `counter' into +** `float_counter' (changing the format slightly as we go), +** and update `counter' to reflect the start of a new period. +*/ +static void +ML_update_counter(MR_memprof_counter *counter, + ML_memprof_float_counter *float_counter) +{ + MR_add_two_dwords(counter.cells_at_period_start, + counter.cells_since_period_start); + MR_add_two_dwords(counter.words_at_period_start, + counter.words_since_period_start); + + MR_convert_dword_to_double(counter.cells_since_period_start, + flout_counter.cells_since_period_start); + MR_convert_dword_to_double(counter.words_since_period_start, + float_counter.words_since_period_start); + + /* since the 'at start' numbers have already been incremented, */ + /* they now refer to the start of the *next* period */ + MR_convert_dword_to_double(counter.cells_at_period_start, + float_counter.cells_at_period_end); + MR_convert_dword_to_double(counter.words_at_period_start, + float_counter.words_at_period_end); + + MR_zero_dword(counter.cells_since_period_start); + MR_zero_dword(counter.words_since_period_start); +} + +/* +** Insert an entry into the table of the top `table_size' entries. +** Entries are ranked according to their words_at_period_end. +** (This is an arbitrary choice; we might equally well choose +** to order them by cells_at_period_end. I prefer words.) +** Entries that are not in the top `table_size' are discarded. +*/ +static int +ML_insert_into_table(const ML_memprof_report_entry *new_entry, + ML_memprof_report_entry *table, int table_size, int next_slot) +{ + int slot; + + /* ignore entries whose counts are zero */ + if (new_entry->counter.words_at_period_end <= 0.0) { + return next_slot; + } + + /* + ** Find the slot where this entry should be inserted. + ** Start at the end and work backwards until we find + ** the start of the table or until we find a table + ** entry which ranks higher that the new entry. + */ + slot = next_slot; + while (slot > 0 && table[slot - 1].counter.words_at_period_end < + new_entry->counter.words_at_period_end) + { + slot--; + } + + /* + ** If this entry fits in the table, then + ** shuffle the displaced entries to the right, + ** insert the new entry in the table, and increment next_slot + ** (unless it is already at the end of the table). + */ + if (slot < table_size) { + int i; + for (i = table_size - 1; i >= slot; i--) { + table[i + 1] = table[i]; + } + + table[slot] = *new_entry; + + if (next_slot < table_size) { + next_slot++; + } + } + + return next_slot; +} + +/* +** ML_memory_profile_top_table(node, table, table_size, next_slot): +** Insert the entries for `node' and its children into `table', +** which is big enough to hold the top `table_size' entries. +** `next_slot' specifies the number of entries currently +** in the table. Returns the new value of `next_slot'. +*/ +static int +ML_memory_profile_top_table(MR_memprof_record *node, + ML_memprof_report_entry *table, int table_size, int next_slot) +{ + ML_memprof_report_entry new_entry; + + if (node != NULL) { + next_slot = ML_memory_profile_top_table(node->left, + table, table_size, next_slot); + + new_entry.name = node->name; + ML_update_counter(&node->counter, &new_entry.counter); + next_slot = ML_insert_into_table(&new_entry, + table, table_size, next_slot) + + next_slot = ML_memory_profile_top_table(node->right, + table, table_size, next_slot); + } + return next_slot; +} + +/* +** ML_memory_profile_fill_table(node, table, next_slot): +** Insert the entries for `node' and its children into `table', +** which the caller guarantees is big enough to hold them all. +** `next_slot' specifies the number of entries currently +** in the table. Returns the new value of `next_slot'. +*/ +static int +ML_memory_profile_fill_table(MR_memprof_record *node, + ML_memprof_report_entry *table, int next_slot) +{ + if (node != NULL) { + next_slot = ML_memory_profile_fill_table(node->left, + table, next_slot); + + table[next_slot].name = node->name; + ML_update_counter(&node->counter, &table[next_slot].counter); + next_slot++; + + next_slot = ML_memory_profile_fill_table(node->right, + table, next_slot); + } + return next_slot; +} + +/* +** ML_memory_profile_report(table, num_entries, complete): +** Print out a profiling report for the specified table. +*/ +static void +ML_memory_profile_report(const ML_memprof_table *table, int num_entries, + bool complete) +{ + int i; + const char *name; + + if (num_entries > MAX_REPORT_LINES && !complete) { + num_entries = MAX_REPORT_LINES; + } + + for (i = 0; i < num_entries; i++) { + if (complete) { + fprintf(stderr, ""%-50s "" + ""%8.8g/%4.1f%% "" + ""%8.8g/%4.1f%%\\n"", + table[i].name, + table[i].cells_at_period_end, + 100 * table[i].cells_at_period_end / + ML_overall_counter.cells_at_period_end, + table[i].words_at_period_end, + 100 * table[i].words_at_period_end / + ML_overall_counter.words_at_period_end + ); + } else { + fprintf(stderr, ""%-50s "" + ""%8.8g/%4.1f%% "" + ""%8.8g/%4.1f%%\\n"", + table[i].name, + table[i].cells_since_period_start, + 100 * table[i].cells_since_period_start / + ML_overall_counter.cells_since_period_start, + table[i].words_since_period_start, + 100 * table[i].words_since_period_start / + ML_overall_counter.words_since_period_start + ); + } + } +} + +/* +** Comparison routine used for qsort(). +** Compares two ML_memprof_report_entry structures. +*/ +static int +ML_memory_profile_compare_final(const void *i1, const void *i2) +{ + const ML_memprof_report_entry *e1 = + (const ML_memprof_report_entry *) i1; + const ML_memprof_report_entry *e2 = + (const ML_memprof_report_entry *) i2; + + if (e1->words_at_period_end < e2->words_at_period_end) { + return 1; + } else if (e1->words_at_period_end > e2->words_at_period_end) { + return -1; + } else { + return strcmp(e1->name, e2->name); + } +} + +#endif /* MEMORY_PROFILING */ "). +%-----------------------------------------------------------------------------% + :- external(benchmark_det/5). :- external(benchmark_nondet/5). @@ -116,24 +522,28 @@ ENDINIT */ #ifdef COMPACT_ARGS -#define rep_count r5 -#define count_output r1 -#define soln_output r1 -#define time_output r2 + #define rep_count r5 + #define count_output r1 + #define soln_output r1 + #define time_output r2 #else -#define rep_count r6 -#define count_output r5 -#define soln_output r5 -#define time_output r7 + #define rep_count r6 + #define count_output r5 + #define soln_output r5 + #define time_output r7 #endif Define_extern_entry(mercury__benchmarking__benchmark_nondet_5_0); Declare_label(mercury__benchmarking__benchmark_nondet_5_0_i1); Declare_label(mercury__benchmarking__benchmark_nondet_5_0_i2); + MR_MAKE_STACK_LAYOUT_ENTRY(mercury__benchmarking__benchmark_nondet_5_0); MR_MAKE_STACK_LAYOUT_INTERNAL(mercury__benchmarking__benchmark_nondet_5_0, 1); MR_MAKE_STACK_LAYOUT_INTERNAL(mercury__benchmarking__benchmark_nondet_5_0, 2); +Declare_entry(do_call_nondet_closure); +Declare_entry(do_call_det_closure); + BEGIN_MODULE(benchmark_nondet_module) init_entry(mercury__benchmarking__benchmark_nondet_5_0); init_label(mercury__benchmarking__benchmark_nondet_5_0_i1); @@ -162,10 +572,11 @@ Define_entry(mercury__benchmarking__benchmark_nondet_5_0); framevar(0) = r3; framevar(1) = r4; - if (rep_count <= 0) + if (rep_count <= 0) { framevar(2) = 1; - else + } else { framevar(2) = rep_count; + } framevar(3) = 0; mark_hp(framevar(5)); @@ -176,12 +587,9 @@ Define_entry(mercury__benchmarking__benchmark_nondet_5_0); r2 = (Word) 1; /* the higher-order call has 1 extra input argument */ r3 = (Word) 1; /* the higher-order call has 1 extra output argument */ /* r4 already has the extra input argument */ - { - Declare_entry(do_call_nondet_closure); - call(ENTRY(do_call_nondet_closure), - LABEL(mercury__benchmarking__benchmark_nondet_5_0_i1), - LABEL(mercury__benchmarking__benchmark_nondet_5_0)); - } + call(ENTRY(do_call_nondet_closure), + LABEL(mercury__benchmarking__benchmark_nondet_5_0_i1), + LABEL(mercury__benchmarking__benchmark_nondet_5_0)); Define_label(mercury__benchmarking__benchmark_nondet_5_0_i1); /* we found a solution */ @@ -191,11 +599,12 @@ Define_label(mercury__benchmarking__benchmark_nondet_5_0_i1); Define_label(mercury__benchmarking__benchmark_nondet_5_0_i2); /* no more solutions for this iteration, so mark it completed */ framevar(2) = framevar(2) - 1; + /* we can now reclaim memory by resetting the heap pointer */ restore_hp(framevar(5)); + /* are there any other iterations? */ - if (framevar(2) > 0) - { + if (framevar(2) > 0) { /* yes, so reset the solution counter */ /* and then set up the call just like last time */ framevar(3) = 0; @@ -203,12 +612,9 @@ Define_label(mercury__benchmarking__benchmark_nondet_5_0_i2); r2 = (Word) 1; r3 = (Word) 1; r4 = framevar(1); - { - Declare_entry(do_call_nondet_closure); - call(ENTRY(do_call_nondet_closure), - LABEL(mercury__benchmarking__benchmark_nondet_5_0_i1), - LABEL(mercury__benchmarking__benchmark_nondet_5_0)); - } + call(ENTRY(do_call_nondet_closure), + LABEL(mercury__benchmarking__benchmark_nondet_5_0_i1), + LABEL(mercury__benchmarking__benchmark_nondet_5_0)); } /* no more iterations */ @@ -250,10 +656,11 @@ Define_entry(mercury__benchmarking__benchmark_det_5_0); detstackvar(1) = r3; detstackvar(2) = r4; - if (rep_count <= 0) + if (rep_count <= 0) { detstackvar(3) = 1; - else + } else { detstackvar(3) = rep_count; + } detstackvar(4) = MR_get_user_cpu_miliseconds(); @@ -262,31 +669,26 @@ Define_entry(mercury__benchmarking__benchmark_det_5_0); r2 = (Word) 1; /* the higher-order call has 1 extra input argument */ r3 = (Word) 1; /* the higher-order call has 1 extra output argument */ /* r4 already has the extra input argument */ - { - Declare_entry(do_call_det_closure); - call(ENTRY(do_call_det_closure), - LABEL(mercury__benchmarking__benchmark_det_5_0_i1), - LABEL(mercury__benchmarking__benchmark_det_5_0)); - } + call(ENTRY(do_call_det_closure), + LABEL(mercury__benchmarking__benchmark_det_5_0_i1), + LABEL(mercury__benchmarking__benchmark_det_5_0)); Define_label(mercury__benchmarking__benchmark_det_5_0_i1); + /* mark current iteration completed */ detstackvar(3) = detstackvar(3) - 1; + /* are there any other iterations? */ - if (detstackvar(3) > 0) - { + if (detstackvar(3) > 0) { /* yes, so set up the call just like last time */ restore_hp(detstackvar(5)); r1 = detstackvar(1); r2 = (Word) 1; r3 = (Word) 1; r4 = detstackvar(2); - { - Declare_entry(do_call_det_closure); - call(ENTRY(do_call_det_closure), - LABEL(mercury__benchmarking__benchmark_det_5_0_i1), - LABEL(mercury__benchmarking__benchmark_det_5_0)); - } + call(ENTRY(do_call_det_closure), + LABEL(mercury__benchmarking__benchmark_det_5_0_i1), + LABEL(mercury__benchmarking__benchmark_det_5_0)); } /* no more iterations */ @@ -304,3 +706,5 @@ void mercury_benchmarking_init_benchmark(void) { } "). + +%-----------------------------------------------------------------------------% diff --git a/library/io.m b/library/io.m index 6c3d354d4..d19d6c3ba 100644 --- a/library/io.m +++ b/library/io.m @@ -848,11 +848,17 @@ % Memory management predicates. - % Write some memory/time usage statistics to stdout. + % Write memory/time usage statistics to stdout. :- pred io__report_stats(io__state, io__state). :- mode io__report_stats(di, uo) is det. + % Write complete memory usage statistics to stdout, + % including information about all procedures and types. + +:- pred io__report_full_memory_stats(io__state, io__state). +:- mode io__report_full_memory_stats(di, uo) is det. + % Preallocate heap space (to avoid NU-Prolog panic). :- pred io__preallocate_heap_space(int, io__state, io__state). @@ -1903,6 +1909,9 @@ io__set_environment_var(Var, Value) --> io__report_stats --> { report_stats }. +io__report_full_memory_stats --> + { report_full_memory_stats }. + %-----------------------------------------------------------------------------% %-----------------------------------------------------------------------------% diff --git a/profiler/generate_output.m b/profiler/generate_output.m index c82c055c6..16db9dcae 100644 --- a/profiler/generate_output.m +++ b/profiler/generate_output.m @@ -126,7 +126,7 @@ process_prof_node(ProfNode, Prof, OutputProf0, OutputProf) :- :- mode generate_output__cycle(in, in, in, out) is det. generate_output__cycle(ProfNode, Prof, OutputProf0, OutputProf) :- - prof_get_entire(Prof, Hertz, ClockTicks, IntTotalCounts, _, _, + prof_get_entire(Prof, Scale, _Units, IntTotalCounts, _, _, _CycleMap), int__to_float(IntTotalCounts, TotalCounts), @@ -150,17 +150,8 @@ generate_output__cycle(ProfNode, Prof, OutputProf0, OutputProf) :- % Calculate the self time spent in the current predicate. % Calculate the descendant time, which is the time spent in the % current predicate and its descendants - int__to_float(Hertz, HertzFloat), - int__to_float(ClockTicks, ClockTicksFloat), - ( - HertzFloat = 0.0 - -> - SelfTime = 0.0, - DescTime = 0.0 - ; - SelfTime is InitialFloat / HertzFloat * ClockTicksFloat, - DescTime is (InitialFloat+Prop) / HertzFloat * ClockTicksFloat - ), + SelfTime is InitialFloat * Scale, + DescTime is (InitialFloat+Prop) * Scale, OutputProfNode = output_cycle_prof( Name, CycleNum, SelfTime, DescPercentage, @@ -182,7 +173,7 @@ generate_output__cycle(ProfNode, Prof, OutputProf0, OutputProf) :- :- mode generate_output__single_predicate(in, in, in, out) is det. generate_output__single_predicate(ProfNode, Prof, OutputProf0, OutputProf) :- - prof_get_entire(Prof, Hertz, ClockTicks, IntTotalCounts, _, _, + prof_get_entire(Prof, Scale, _Units, IntTotalCounts, _, _, CycleMap), int__to_float(IntTotalCounts, TotalCounts), @@ -222,18 +213,8 @@ generate_output__single_predicate(ProfNode, Prof, OutputProf0, OutputProf) :- % Calculate the self time spent in the current predicate. % Calculate the descendant time, which is the time spent in the % current predicate and its descendants - int__to_float(Hertz, HertzFloat), - int__to_float(ClockTicks, ClockTicksFloat), - ( - HertzFloat = 0.0 - -> - SelfTime = 0.0, - DescTime = 0.0 - ; - SelfTime is InitialFloat / HertzFloat * ClockTicksFloat, - DescTime is (InitialFloat+Prop) / HertzFloat - * ClockTicksFloat - ), + SelfTime is InitialFloat * Scale, + DescTime is (InitialFloat+Prop) * Scale, process_prof_node_parents(ParentList, SelfTime, DescTime, TotalCalls, CycleNum, CycleMap, @@ -440,7 +421,7 @@ remove_child_cycle_members([PN | PNs], CycleNum, CycleMap, List, process_prof_node_children_2([], _, Output, Output). process_prof_node_children_2([PN | PNs], Prof, Output0, Output) :- pred_info_get_entire(PN, LabelName, Calls), - prof_get_entire(Prof, Hertz, ClockTicks, _, AddrMap, ProfNodeMap, + prof_get_entire(Prof, Scale, _Units, _, AddrMap, ProfNodeMap, CycleMap), ( @@ -464,13 +445,11 @@ process_prof_node_children_2([PN | PNs], Prof, Output0, Output) :- checked_float_divide(FloatCalls, FloatTotalCalls, Proportion), % Calculate the self time spent in the current predicate. - int__to_float(Hertz, HertzFloat), - int__to_float(ClockTicks, ClockTicksFloat), - SelfTime is (InitialFloat / HertzFloat) * ClockTicksFloat, + SelfTime is InitialFloat * Scale, % Calculate the descendant time, which is the time spent in the % current predicate and its descendants - DescTime is (CurrentCount / HertzFloat) * ClockTicksFloat, + DescTime is CurrentCount * Scale, % Calculate the amount of the current predicate's self-time spent % due to the parent. diff --git a/profiler/globals.m b/profiler/globals.m index 7cd52af9f..85f9efd39 100644 --- a/profiler/globals.m +++ b/profiler/globals.m @@ -22,10 +22,26 @@ %-----------------------------------------------------------------------------% +:- type what_to_profile + ---> memory_words + ; memory_cells + ; user_plus_system_time + ; user_time + ; real_time. + +:- pred what_to_profile(string, what_to_profile) is semidet. +:- mode what_to_profile(in, out) is semidet. +:- mode what_to_profile(out, in) is det. + % Access predicates for the `globals' structure. :- pred globals__init(option_table::in, globals::out) is det. +:- pred globals__get_what_to_profile(globals::in, what_to_profile::out) is det. + +:- pred globals__set_what_to_profile(globals::in, what_to_profile::in, + globals::out) is det. + :- pred globals__get_options(globals::in, option_table::out) is det. :- pred globals__set_options(globals::in, option_table::in, globals::out) @@ -83,16 +99,28 @@ %-----------------------------------------------------------------------------% +what_to_profile("memory-words", memory_words). +what_to_profile("memory-cells", memory_cells). +what_to_profile("user-plus-system-time", user_plus_system_time). +what_to_profile("user-time", user_time). +what_to_profile("real-time", real_time). + :- type globals ---> globals( + what_to_profile, option_table ). -globals__init(Options, globals(Options)). +globals__init(Options, globals(user_plus_system_time, Options)). -globals__get_options(globals(Options), Options). +globals__get_what_to_profile(globals(WhatToProfile, _), WhatToProfile). -globals__set_options(globals(_), Options, globals(Options)). +globals__set_what_to_profile(globals(_, A), WhatToProfile, + globals(WhatToProfile, A)). + +globals__get_options(globals(_, Options), Options). + +globals__set_options(globals(A, _), Options, globals(A, Options)). globals__lookup_option(Globals, Option, OptionData) :- globals__get_options(Globals, OptionTable), diff --git a/profiler/mercury_profile.m b/profiler/mercury_profile.m index 9447dd991..0302385c9 100644 --- a/profiler/mercury_profile.m +++ b/profiler/mercury_profile.m @@ -45,7 +45,8 @@ main --> io__command_line_arguments(Args0), - { OptionOps = option_ops(short_option, long_option, option_defaults) }, + { OptionOps = option_ops(short_option, long_option, option_defaults, + special_handler) }, { getopt__process_options(OptionOps, Args0, Args, Result0) }, postprocess_options(Result0, Args, Result), main_2(Result, Args). diff --git a/profiler/options.m b/profiler/options.m index 869a60f81..1218bb80f 100644 --- a/profiler/options.m +++ b/profiler/options.m @@ -23,6 +23,10 @@ % Profiler options ; dynamic_cg ; call_graph + ; profile + ; profile_time + ; profile_memory_words + ; profile_memory_cells ; countfile ; pairfile ; declfile @@ -36,6 +40,8 @@ :- pred long_option(string::in, option::out) is semidet. :- pred option_defaults(option::out, option_data::out) is nondet. :- pred option_default(option::out, option_data::out) is multidet. +:- pred special_handler(option::in, special_data::in, + option_table::in, maybe_option_table(option)::out) is semidet. :- pred options_help(io__state::di, io__state::uo) is det. @@ -48,7 +54,7 @@ %-----------------------------------------------------------------------------% :- implementation. -:- import_module std_util. +:- import_module std_util, map. option_defaults(Option, Default) :- semidet_succeed, @@ -59,12 +65,16 @@ option_default(verbose, bool(no)). option_default(very_verbose, bool(no)). % General profiler options -option_default(dynamic_cg, bool(no)). -option_default(call_graph, bool(no)). -option_default(countfile, string("Prof.Counts")). -option_default(pairfile, string("Prof.CallPair")). -option_default(declfile, string("Prof.Decl")). -option_default(libraryfile, string("")). +option_default(dynamic_cg, bool(no)). +option_default(call_graph, bool(no)). +option_default(profile, string_special). +option_default(profile_time, special). +option_default(profile_memory_words, special). +option_default(profile_memory_cells, special). +option_default(countfile, string("Prof.Counts")). +option_default(pairfile, string("Prof.CallPair")). +option_default(declfile, string("Prof.Decl")). +option_default(libraryfile, string("")). % Miscellaneous Options option_default(help, bool(no)). @@ -77,7 +87,11 @@ short_option('d', dynamic_cg). short_option('D', declfile). short_option('h', help). short_option('L', libraryfile). +short_option('m', profile_memory_words). +short_option('M', profile_memory_cells). +short_option('p', profile). short_option('P', pairfile). +short_option('t', profile_time). short_option('v', verbose). short_option('V', very_verbose). @@ -87,10 +101,40 @@ long_option("count-file", countfile). long_option("declaration-file", declfile). long_option("help", help). long_option("library-callgraph", help). +long_option("profile", profile). +long_option("profile-memory-words", profile_memory_words). +long_option("profile-memory-cells", profile_memory_cells). +long_option("profile-time", profile_time). long_option("use-dynamic", dynamic_cg). long_option("verbose", verbose). long_option("very-verbose", very_verbose). +special_handler(profile, string(WhatToProfile), OptionTable0, Result) + :- + ( valid_profile_option(WhatToProfile, CountFile) -> + map__set(OptionTable0, countfile, string(CountFile), + OptionTable), + Result = ok(OptionTable) + ; + Result = error("Invalid argument to `--profile' or `-p' option") + ). +special_handler(profile_memory_words, _, OptionTable0, ok(OptionTable)) :- + map__set(OptionTable0, countfile, string("Prof.MemoryWords"), + OptionTable). +special_handler(profile_memory_cells, _, OptionTable0, ok(OptionTable)) :- + map__set(OptionTable0, countfile, string("Prof.MemoryCells"), + OptionTable). +special_handler(profile_time, _, OptionTable0, ok(OptionTable)) :- + map__set(OptionTable0, countfile, string("Prof.Counts"), + OptionTable). + +:- pred valid_profile_option(string::in, string::out) is semidet. +valid_profile_option("memory-words", "Prof.MemoryWords"). +valid_profile_option("memory-cells", "Prof.MemoryCells"). +valid_profile_option("time", "Prof.Counts"). + +% :- pred special_handler(option::in, special_data::in, +% option_table::in, maybe_option_table::out) is semidet. options_help --> io__write_string("\t-h, --help\n"), @@ -101,8 +145,20 @@ options_help --> io__write_string("\t\tInclude the call graph profile.\n"), io__write_string("\t-d, --use-dynamic\n"), io__write_string("\t\tBuild the call graph dynamically.\n"), + io__write_string("\t-p, --profile {time, memory-words, memory-cells}\n"), + io__write_string("\t\tSelect what to profile: time, amount of memory allocated, or\n"), + io__write_string("\t\tnumber of memory allocations (regardless of size).\n"), + io__write_string("\t-m\n"), + io__write_string("\t\tSame as `--profile memory-words'\n"), + io__write_string("\t-M\n"), + io__write_string("\t\tSame as `--profile memory-cells'.\n"), + io__write_string("\t-t\n"), + io__write_string("\t\tSame as `--profile time'.\n"), + + io__write_string("\nFilename Options:\n"), io__write_string("\t-C , --count-file \n"), - io__write_string("\t\tName of the count file. Usually `Prof.Counts'.\n"), + io__write_string("\t\tName of the count file. Usually `Prof.Counts',\n"), + io__write_string("\t\t`Prof.MemoryWords', or `Prof.MemoryCells'.\n"), io__write_string("\t-D , --declaration-file \n"), io__write_string("\t\tName of the declaration file. Usually `Prof.Decl'.\n"), io__write_string("\t-P , --call-pair-file \n"), diff --git a/profiler/output.m b/profiler/output.m index 8755ade7f..c6bb55521 100644 --- a/profiler/output.m +++ b/profiler/output.m @@ -35,6 +35,11 @@ :- import_module globals, options, generate_output. output__main(Output, IndexMap) --> + globals__io_get_globals(Globals), + { globals__get_what_to_profile(Globals, WhatToProfile) }, + { what_to_profile(WhatToProfileString, WhatToProfile) }, + io__format("*** profiling %s ***\n\n", [s(WhatToProfileString)]), + { Output = output(InfoMap, CallList, FlatList) }, globals__io_lookup_bool_option(call_graph, CallGraphOpt), ( @@ -45,87 +50,126 @@ output__main(Output, IndexMap) --> ; { true } ), + output__flat_headers, output__flat_profile(FlatList, 0.0, InfoMap, IndexMap), + output_alphabet_headers, output_alphabet_listing(IndexMap). +:- type header_category + ---> time_headers + ; memory_words_headers + ; memory_cells_headers. + +:- pred classify_profile(what_to_profile::in, header_category::out) is det. +classify_profile(user_time, time_headers). +classify_profile(user_plus_system_time, time_headers). +classify_profile(real_time, time_headers). +classify_profile(memory_words, memory_words_headers). +classify_profile(memory_cells, memory_cells_headers). + +:- pred units(header_category::in, string::out, string::out, string::out, string::out, + string::out, string::out, string::out, string::out) is det. +units(time_headers, "time", "time", "running time", + "seconds", "seconds", "milliseconds", "ms/call", "spent executing"). +units(memory_words_headers, + "mem", "memory", "allocated memory", + "k-words", "kilowords", "words", "wds/call", "allocated by"). +units(memory_cells_headers, + "cells", "allocations", "number of memory allocations", + "k-cells", "kilocells", "cells", "cls/call", "occurring in"). :- pred output__call_graph_headers(io__state, io__state). :- mode output__call_graph_headers(di, uo) is det. output__call_graph_headers --> + globals__io_get_globals(Globals), + { globals__get_what_to_profile(Globals, WhatToProfile) }, + { classify_profile(WhatToProfile, Category) }, + { units(Category, ShortWhat, What, LongWhat, + _ShortUnits, Units, _MilliUnits, _MilliUnitsPerCall, SpentIn) }, + io__write_string("call graph profile:\n"), - io__write_string("\tSorted on the %time field.\n\n"), + io__format("\tSorted on the %%%s field.\n\n", [s(ShortWhat)]), - io__write_string("\tpredicate entries:\n\n"), + io__write_string("\tprocedure entries:\n\n"), - io__write_string("index\tthe index number of the predicate in the call graph\n"), - io__write_string("\tlisting.\n\n"), + io__write_string("index\t\tthe index number of the procedure in the call graph\n"), + io__write_string("\t\tlisting.\n\n"), - io__write_string("%time\tthe percentage of the total running time of\n"), - io__write_string("\tthe program spent in this predicate and its\n"), - io__write_string("\tdescendents.\n\n"), + io__format("%%%s\t\tthe percentage of the total %s of\n", + [s(ShortWhat), s(LongWhat)]), + io__format("\t\tthe program %s this procedure and its\n", + [s(SpentIn)]), + io__write_string("\t\tdescendents.\n\n"), - io__write_string("self\tthe number of seconds spent actually executing\n"), - io__write_string("\tthe predicate's own code.\n\n"), + io__format("self\t\tthe number of %s actually %s\n", + [s(Units), s(SpentIn)]), + io__write_string("\t\tthe procedure's own code.\n\n"), - io__write_string("descendents\n"), - io__write_string("\tthe number of seconds spent executing the\n"), - io__write_string("\tdescendents of the current predicate.\n\n"), + io__format("descendents\tthe number of %s %s the\n", + [s(Units), s(SpentIn)]), + io__write_string("\t\tdescendents of the current procedure.\n\n"), - io__write_string("called\tthe number of times the current predicate is\n"), - io__write_string("\tcalled (not counting self recursive calls).\n\n"), + io__write_string("called\t\tthe number of times the current procedure is\n"), + io__write_string("\t\tcalled (not counting self recursive calls).\n\n"), - io__write_string("self\tthe number of self recursive calls.\n\n"), + io__write_string("self\t\tthe number of self recursive calls.\n\n"), - io__write_string("name\tthe name of the current predicate.\n\n"), + io__write_string("name\t\tthe name of the current procedure.\n\n"), - io__write_string("index\ta index number to locate the function easily.\n\n\n\n"), + io__write_string("index\t\tan index number to locate the function easily.\n\n\n\n"), io__write_string("\tparent listings:\n\n"), - io__write_string("self*\tthe number of seconds of the current predicates self\n"), - io__write_string("\ttime due to calls from this parent.\n\n"), + io__format("self*\t\tthe number of %s of the current procedure's self\n", + [s(Units)]), + io__format("\t\t%s due to calls from this parent.\n\n", + [s(What)]), - io__write_string("descendents*\n"), - io__write_string("\tthe number of seconds of the current predicate's descendent\n"), - io__write_string("\ttime which is due to calls from this parent.\n\n"), + io__format("descendents*\tthe number of %s of the current procedure's descendent\n", + [s(Units)]), + io__format("\t\t%s which is due to calls from this parent.\n\n", + [s(What)]), - io__write_string("called*\tthe number of times the current predicate is called\n"), - io__write_string("\tby this parent.\n\n"), + io__write_string("called*\t\tthe number of times the current procedure is called\n"), + io__write_string("\t\tby this parent.\n\n"), - io__write_string("total\tthe number of times this predicate is called by its parents.\n\n"), + io__write_string("total\t\tthe number of times this procedure is called by its parents.\n\n"), - io__write_string("parents\tthe name of this parent.\n\n"), + io__write_string("parents\t\tthe name of this parent.\n\n"), - io__write_string("index\tthe index number of the parent predicate\n\n\n\n"), + io__write_string("index\t\tthe index number of the parent procedure\n\n\n\n"), io__write_string("children listings:\n\n"), - io__write_string("self*\tthe number of seconds of this child's self time which is due\n"), - io__write_string("\tto being called by the current predicate.\n\n"), + io__format("self*\t\tthe number of %s of this child's self %s which is\n", + [s(Units), s(What)]), + io__write_string("\t\tdue to being called by the current procedure.\n\n"), - io__write_string("descendent*\n"), - io__write_string("\tthe number of seconds of this child's desdendent time which is due\n"), - io__write_string("\tto the current predicate.\n\n"), + io__format("descendent*\tthe number of %s of this child's descendent %s which\n", + [s(Units), s(What)]), + io__write_string("\t\tis due to the current procedure.\n\n"), - io__write_string("called*\tthe number of times this child is called by the current\n"), - io__write_string("\tpredicate.\n\n"), + io__write_string("called*\t\tthe number of times this child is called by the current\n"), + io__write_string("\t\tprocedure.\n\n"), - io__write_string("total*\tthe number of times this child is called by all predicates.\n\n"), + io__write_string("total*\t\tthe number of times this child is called by all procedures.\n\n"), io__write_string("children\tthe name of this child.\n\n"), - io__write_string("index\tthe index number of the child.\n\n"), + io__write_string("index\t\tthe index number of the child.\n\n\n\n"), io__write_string(" called/total"), io__write_string(" parents\n"), - io__write_string("index %time self descendents called+self"), + { string__append("%", ShortWhat, PercentShortWhat) }, + io__format("index %6s self descendents called+self", + [s(PercentShortWhat)]), io__write_string(" name index\n"), io__write_string(" called/total"), io__write_string(" children\n\n"). @@ -196,7 +240,7 @@ output_formatted_prof_node(ProfNode, Index, IndexMap) --> ), - % Output the info about the current predicate. + % Output the info about the current procedure. io__write_string(InitMiddleStr), ( { SelfCalls = 0 } @@ -225,7 +269,7 @@ output_formatted_prof_node(ProfNode, Index, IndexMap) --> % output_formatted_cycle_parent_list -% outputs the parents of a predicate that are in the same cycle. +% outputs the parents of a procedure that are in the same cycle. % :- pred output_formatted_cycle_parent_list(list(parent), map(string, int), io__state, io__state). @@ -247,7 +291,7 @@ output_formatted_cycle_parent_list([Parent | Parents], IndexMap) --> % output_formatted_parent_list: -% Outputs the parent list of the current predicate +% Outputs the parent list of the current procedure :- pred output_formatted_parent_list(list(parent), map(string, int), int, io__state, io__state). @@ -269,7 +313,7 @@ output_formatted_parent_list([Parent | Parents], IndexMap, TotalCalls) --> % output_formatted_cycle_child_list -% outputs the childs of a predicate that are in the same cycle. +% outputs the children of a procedure that are in the same cycle. % :- pred output_formatted_cycle_child_list(list(child), map(string, int), io__state, io__state). @@ -291,7 +335,7 @@ output_formatted_cycle_child_list([Child | Childs], IndexMap) --> % output_formatted_child_list: -% outputs the child list of the current predicate. +% outputs the child list of the current procedure. % :- pred output_formatted_child_list(list(child), map(string, int), io__state, io__state). @@ -315,31 +359,53 @@ output_formatted_child_list([Child | Children], IndexMap) --> :- mode output__flat_headers(di, uo) is det. output__flat_headers --> + globals__io_get_globals(Globals), + { globals__get_what_to_profile(Globals, WhatToProfile) }, + { classify_profile(WhatToProfile, Category) }, + { units(Category, ShortWhat, _What, LongWhat, + ShortUnits, Units, MilliUnits, MilliUnitsPerCall, SpentIn) }, + io__write_string("\nflat profile:\n\n"), - io__write_string(" %\tthe percentage of total running time of the program\n"), - io__write_string("time\tused by this function.\n\n"), + io__format("%%\t\tthe percentage of total %s of the program\n", + [s(LongWhat)]), + io__format("%s\t\tused by this procedure.\n\n", + [s(ShortWhat)]), - io__write_string(" cum\tthe total time of the current predicate and the ones\n"), - io__write_string("time\tlisted above it.\n\n"), + io__format( + "cumulative\tthe total number of %s for the current procedure and\n", + [s(Units)]), + io__format("%s\t\tthe ones listed above it.\n\n", + [s(ShortUnits)]), - io__write_string(" self\tthe number of seconds accounted for by this predicate alone.\n"), - io__write_string("seconds\tThe listing is sorted on this row.\n\n"), + io__format( + "self\t\tthe number of %s accounted for by this procedure alone.\n", + [s(Units)]), + io__format("%s\t\tThe listing is sorted on this row.\n\n", + [s(ShortUnits)]), - io__write_string("calls\tthe number of times this predicate was called.\n\n"), + io__write_string( + "calls\t\tthe number of times this procedure was called.\n\n"), - io__write_string(" self\tthe average number of milliseconds spent in\n"), - io__write_string("ms/call\tthis predicate per call.\n\n"), + io__format("self\t\tthe average number of %s %s\n", + [s(MilliUnits), s(SpentIn)]), + io__format("%s \tthis procedure per call.\n\n", + [s(MilliUnitsPerCall)]), - io__write_string(" total\tthe average number of milliseconds spent in this predicate and its\n"), - io__write_string("ms/call\tdescendents per call.\n\n"), + io__format( + "total\tthe average number of %s %s this procedure and its\n", + [s(MilliUnits), s(SpentIn)]), + io__format("%s \tdescendents per call.\n\n", + [s(MilliUnitsPerCall)]), - io__write_string("name\tthe name of the predicate followed by its index number.\n\n"), + io__write_string( + "name\t\tthe name of the procedure followed by its index number.\n\n"), io__write_string(" % cumulative self self"), io__write_string(" total\n"), - io__write_string(" time seconds seconds calls ms/call"), - io__write_string(" ms/call name\n"). + io__format(" %4s %7s %7s calls %8s %8s name\n", + [s(ShortWhat), s(ShortUnits), s(ShortUnits), + s(MilliUnitsPerCall), s(MilliUnitsPerCall)]). :- pred output__flat_profile(list(string), float, map(string, output_prof), diff --git a/profiler/process_file.m b/profiler/process_file.m index af98175ea..2edc690e5 100644 --- a/profiler/process_file.m +++ b/profiler/process_file.m @@ -40,7 +40,7 @@ process_file__main(Prof, DynamicCallGraph) --> - globals__io_lookup_bool_option(very_verbose, VVerbose), + globals__io_lookup_bool_option(very_verbose, VVerbose), globals__io_lookup_string_option(declfile, DeclFile), globals__io_lookup_string_option(countfile, CountFile), globals__io_lookup_string_option(pairfile, PairFile), @@ -48,30 +48,33 @@ process_file__main(Prof, DynamicCallGraph) --> globals__io_lookup_bool_option(dynamic_cg, Dynamic), % process the decl file - maybe_write_string(VVerbose, "\n\t% Processing "), + maybe_write_string(VVerbose, "\n\t% Processing "), maybe_write_string(VVerbose, DeclFile), maybe_write_string(VVerbose, "..."), - process_addr_decl(AddrDeclMap, ProfNodeMap0), - maybe_write_string(VVerbose, " done.\n"), + process_addr_decl(AddrDeclMap, ProfNodeMap0), + maybe_write_string(VVerbose, " done.\n"), % process the timing counts file - maybe_write_string(VVerbose, "\t% Processing "), + maybe_write_string(VVerbose, "\t% Processing "), maybe_write_string(VVerbose, CountFile), maybe_write_string(VVerbose, "..."), - process_addr(ProfNodeMap0, ProfNodeMap1, Hertz, ClockTicks, + process_addr(ProfNodeMap0, ProfNodeMap1, WhatToProfile, Scale, Units, TotalCounts), - maybe_write_string(VVerbose, " done.\n"), + maybe_write_string(VVerbose, " done.\n"), % process the call pair counts file - maybe_write_string(VVerbose, "\t% Processing "), + maybe_write_string(VVerbose, "\t% Processing "), maybe_write_string(VVerbose, PairFile), maybe_write_string(VVerbose, "..."), - process_addr_pair(ProfNodeMap1, DynamicCallGraph, ProfNodeMap), - maybe_write_string(VVerbose, " done.\n"), + process_addr_pair(ProfNodeMap1, DynamicCallGraph, ProfNodeMap), + maybe_write_string(VVerbose, " done.\n"), { map__init(CycleMap) }, - { prof_set_entire(Hertz, ClockTicks, TotalCounts, AddrDeclMap, + { prof_set_entire(Scale, Units, TotalCounts, AddrDeclMap, ProfNodeMap, CycleMap, Prof) }, + globals__io_get_globals(Globals0), + { globals__set_what_to_profile(Globals0, WhatToProfile, Globals) }, + globals__io_set_globals(Globals), ( { Dynamic = no } @@ -114,7 +117,7 @@ process_addr_decl(AddrDeclMap, ProfNodeMap) --> io__seen ; { Result = error(Error) }, - { io__error_message(Error, ErrorMsg) }, + { io__error_message(Error, ErrorMsg) }, { string__append("error opening declaration file `", DeclFile, Str0) }, @@ -165,17 +168,19 @@ process_addr_decl_2(AddrDecl0, ProfNodeMap0, AddrDecl, ProfNodeMap) --> % Reads in the Prof.Counts file and stores all the counts in the % prof_node structure. Also sums the total counts at the same time. % -:- pred process_addr(prof_node_map, prof_node_map, int, int, int, - io__state, io__state). -:- mode process_addr(in, out, out, out, out, di, uo) is det. +:- pred process_addr(prof_node_map, prof_node_map, + what_to_profile, float, string, int, io__state, io__state). +:- mode process_addr(in, out, out, out, out, out, di, uo) is det. -process_addr(ProfNodeMap0, ProfNodeMap, Hertz, ClockTicks, TotalCounts) --> +process_addr(ProfNodeMap0, ProfNodeMap, WhatToProfile, Scale, Units, + TotalCounts) --> globals__io_lookup_string_option(countfile, CountFile), io__see(CountFile, Result), ( { Result = ok }, - read_int(Hertz), - read_int(ClockTicks), + read_what_to_profile(WhatToProfile), + read_float(Scale), + read_string(Units), process_addr_2(0, ProfNodeMap0, TotalCounts, ProfNodeMap), io__seen ; @@ -186,15 +191,16 @@ process_addr(ProfNodeMap0, ProfNodeMap, Hertz, ClockTicks, TotalCounts) --> io__write_string("': "), io__write_string(ErrorMsg), io__write_string("\n"), - io__write_string("The generated profile will not include "), - io__write_string("timing information.\n\n"), + io__write_string("The generated profile will only include "), + io__write_string("call counts.\n\n"), { TotalCounts = 0 }, { ProfNodeMap = ProfNodeMap0 }, - % We can use any arbitrary values for Hertz and ClockTicks; - % the values here won't be used, since all the times will be - % zero. - { Hertz = 1 }, - { ClockTicks = 1 } + % We can use any arbitrary values for WhatToProfile and Scale; + % the values specified here won't be used, + % since all the times will be zero. + { WhatToProfile = user_plus_system_time }, + { Scale = 1.0 }, + { Units = "" } ). :- pred process_addr_2(int, prof_node_map, int, prof_node_map, @@ -259,7 +265,7 @@ process_addr_pair(ProfNodeMap0, DynamicCallGraph, ProfNodeMap) --> io__seen ; { Result = error(Error) }, - { io__error_message(Error, ErrorMsg) }, + { io__error_message(Error, ErrorMsg) }, { string__append("error opening pair file `", PairFile, Str0) }, { string__append(Str0, "': ", Str1) }, @@ -353,10 +359,10 @@ process_library_callgraph(LibraryATSort, LibPredMap) --> io__seen ; { Result = error(Error) }, - { io__error_message(Error, ErrorMsg) }, - io__stderr_stream(StdErr), - io__write_strings(StdErr, ["mprof: error opening pair file `", - LibFile, "': ", ErrorMsg, "\n"]), + { io__error_message(Error, ErrorMsg) }, + io__stderr_stream(StdErr), + io__write_strings(StdErr, ["mprof: error opening pair file `", + LibFile, "': ", ErrorMsg, "\n"]), { LibraryATSort = [] }, { LibPredMap = LibPredMap0 } ). diff --git a/profiler/prof_info.m b/profiler/prof_info.m index 553a76221..f0e39b8f9 100644 --- a/profiler/prof_info.m +++ b/profiler/prof_info.m @@ -75,7 +75,8 @@ % *** Access prof predicates *** % -:- pred prof_get_entire(prof, int,int,int, addrdecl, prof_node_map, cycle_map). +:- pred prof_get_entire(prof, + float, string, int, addrdecl, prof_node_map, cycle_map). :- mode prof_get_entire(in, out, out, out, out, out, out) is det. :- pred prof_get_addrdeclmap(prof, addrdecl). @@ -87,7 +88,8 @@ % *** Update prof predicates *** % -:- pred prof_set_entire(int,int,int, addrdecl, prof_node_map, cycle_map, prof). +:- pred prof_set_entire(float, string, int, addrdecl, prof_node_map, cycle_map, + prof). :- mode prof_set_entire(in, in, in, in, in, in, out) is det. :- pred prof_set_profnodemap(prof_node_map, prof, prof). @@ -197,9 +199,10 @@ :- type prof ---> prof( - int, % Hertz of the system clock - int, % No. of clock ticks between - % each prof signal. + float, % Scaling factor + string, % Units + % (Each profiling count is + % equivalent to Scale Units) int, % Total counts of the profile % run addrdecl, % Map between label name and diff --git a/profiler/read.m b/profiler/read.m index 42f9cefa9..caceaed0e 100644 --- a/profiler/read.m +++ b/profiler/read.m @@ -19,6 +19,7 @@ :- interface. :- import_module int, io. +:- import_module globals. :- pred maybe_read_label_addr(maybe(int), io__state, io__state). :- mode maybe_read_label_addr(out, di, uo) is det. @@ -32,9 +33,18 @@ :- pred read_label_name(string, io__state, io__state). :- mode read_label_name(out, di, uo) is det. +:- pred read_string(string, io__state, io__state). +:- mode read_string(out, di, uo) is det. + :- pred read_int(int, io__state, io__state). :- mode read_int(out, di, uo) is det. +:- pred read_float(float, io__state, io__state). +:- mode read_float(out, di, uo) is det. + +:- pred read_what_to_profile(what_to_profile, io__state, io__state). +:- mode read_what_to_profile(out, di, uo) is det. + %-----------------------------------------------------------------------------% %-----------------------------------------------------------------------------% @@ -153,28 +163,61 @@ read_label_name(LabelName) --> %-----------------------------------------------------------------------------% -read_int(Count) --> +read_string(String) --> io__read_word(WordResult), ( { WordResult = ok(CharList) }, - { string__from_char_list(CharList, CountStr) }, - ( - { string__to_int(CountStr, Count0) } - -> - { Count = Count0 } - ; - io__write_string("\nInteger = "), - io__write_string(CountStr), - { error("\nread_int: Not an integer\n") } - ) + { string__from_char_list(CharList, String) } ; { WordResult = eof }, - { error("read_int: EOF reached") } + { error("read_string: EOF reached") } ; { WordResult = error(Error) }, { io__error_message(Error, ErrorStr) }, - { string__append("read_int: ", ErrorStr, Str) }, + { string__append("read_string: ", ErrorStr, Str) }, { error(Str) } ). %-----------------------------------------------------------------------------% + +read_int(Int) --> + read_string(IntStr), + ( + { string__to_int(IntStr, Int0) } + -> + { Int = Int0 } + ; + io__write_string("\nInteger = "), + io__write_string(IntStr), + { error("\nread_int: Not an integer\n") } + ). + +%-----------------------------------------------------------------------------% + +read_float(Float) --> + read_string(FloatStr), + ( + { string__to_float(FloatStr, Float0) } + -> + { Float = Float0 } + ; + io__write_string("\nFloat = "), + io__write_string(FloatStr), + { error("\nread_float: Not an float\n") } + ). + +%-----------------------------------------------------------------------------% + +read_what_to_profile(WhatToProfile) --> + read_string(Str), + ( + { what_to_profile(Str, WhatToProfile0) } + -> + { WhatToProfile = WhatToProfile0 } + ; + io__write_string("\nWhatToProfile = "), + io__write_string(Str), + { error("\nread_what_to_profile: invalid input\n") } + ). + +%-----------------------------------------------------------------------------% diff --git a/runtime/Mmakefile b/runtime/Mmakefile index b9e390f40..6daea4749 100644 --- a/runtime/Mmakefile +++ b/runtime/Mmakefile @@ -23,15 +23,6 @@ MOD2C = $(SCRIPTS_DIR)/mod2c # keep this list in alphabetical order, please HDRS = \ - mercury_engine.h \ - mercury_getopt.h \ - mercury_grade.h \ - mercury_goto.h \ - mercury_heap.h \ - mercury_imp.h \ - mercury_init.h \ - mercury_label.h \ - mercury_memory.h \ mercury_accurate_gc.h \ mercury_calls.h \ mercury_conf.h \ @@ -40,11 +31,17 @@ HDRS = \ mercury_deep_copy.h \ mercury_dummy.h \ mercury_dlist.h \ + mercury_engine.h \ mercury_float.h \ - mercury_string.h \ - mercury_trace.h \ - mercury_trail.h \ - mercury_types.h \ + mercury_getopt.h \ + mercury_goto.h \ + mercury_grade.h \ + mercury_heap.h \ + mercury_heap_profile.h \ + mercury_imp.h \ + mercury_init.h \ + mercury_label.h \ + mercury_memory.h \ mercury_misc.h \ mercury_overflow.h \ mercury_prof.h \ @@ -54,9 +51,13 @@ HDRS = \ mercury_spinlock.h \ mercury_std.h \ mercury_stacks.h \ + mercury_string.h \ mercury_table.h \ mercury_tags.h \ mercury_timing.h \ + mercury_trace.h \ + mercury_trail.h \ + mercury_types.h \ mercury_type_info.h \ mercury_wrapper.h \ $(LIBMER_DLL_H) @@ -75,19 +76,15 @@ MACHHDRS = machdeps/no_regs.h \ machdeps/rs6000_regs.h -# XXX .mod support is being removed. This will soon disappear. -MODS = -MOD_CS = -MOD_OS = $(MOD_CS:.c=.o) - # keep this list in alphabetical order, please -ORIG_CS = mercury_context.c \ +CFILES = mercury_context.c \ mercury_deep_copy.c \ mercury_dlist.c \ mercury_dummy.c \ mercury_engine.c \ mercury_float.c \ mercury_grade.c \ + mercury_heap_profile.c \ mercury_ho_call.c \ mercury_label.c \ mercury_memory.c \ @@ -103,10 +100,8 @@ ORIG_CS = mercury_context.c \ mercury_type_info.c \ mercury_wrapper.c -ORIG_OS = $(ORIG_CS:.c=.o) -ORIG_OS = $(ORIG_CS:.c=.o) -OBJS = $(MOD_OS) $(ORIG_OS) -PIC_OBJS = $(OBJS:.o=.$(EXT_FOR_PIC_OBJECTS)) +OBJS = $(CFILES:.c=.o) +PIC_OBJS = $(CFILES:.c=.$(EXT_FOR_PIC_OBJECTS)) LDFLAGS = -L$(BOEHM_GC_DIR) LDLIBS = \ @@ -124,6 +119,7 @@ ifeq ($(USE_DLLS),yes) DLL_CFLAGS = -Dlibmer_DEFINE_DLL +# the following header files are created automatically by Makefile.DLLs LIBMER_DLL_H = libmer_dll.h LIBMER_GLOBALS_H = libmer_globals.h @@ -152,8 +148,8 @@ libmer.so: $(PIC_OBJS) $(LDFLAGS) $(LDLIBS) \ $(SHARED_LIBS) -runtime.init: $(ORIG_CS) - cat `vpath_find $(ORIG_CS)` | grep '^INIT ' > runtime.init +runtime.init: $(CFILES) + cat `vpath_find $(CFILES)` | grep '^INIT ' > runtime.init mercury_conf.h.date: $(MERCURY_DIR)/config.status mercury_conf.h.in CONFIG_FILES= CONFIG_HEADERS=mercury_conf.h $(MERCURY_DIR)/config.status @@ -163,10 +159,10 @@ mercury_conf.h: mercury_conf.h.date @true .PHONY: cs -cs: $(MOD_CS) +cs: $(CFILES) -tags: $(MODS) $(ORIG_CS) - ctags $(MODS) $(ORIG_CS) $(HDRS) +tags: $(CFILES) $(HDRS) + ctags $(CFILES) $(HDRS) .PHONY: check_headers check_headers: @@ -205,23 +201,16 @@ install_lib: libmer.a libmer.$(EXT_FOR_SHARED_LIB) #-----------------------------------------------------------------------------# -# prevent GNU make from removing these intermediate files -dont_remove: engine.c wrapper.c call.c - # prevent Mmake from removing C files RM_C=: #-----------------------------------------------------------------------------# -clean: clean_o clean_mod_c +clean: clean_o .PHONY: clean_o clean_o: - rm -f $(MOD_OS) $(OBJS) $(PIC_OBJS) - -.PHONY: clean_mod_c -clean_mod_c: - rm -f $(MOD_OS) + rm -f $(OBJS) $(PIC_OBJS) realclean: rm -f libmer.a libmer.so runtime.init diff --git a/runtime/mercury_dummy.c b/runtime/mercury_dummy.c index a8f1b3edf..75160c230 100644 --- a/runtime/mercury_dummy.c +++ b/runtime/mercury_dummy.c @@ -5,7 +5,7 @@ */ #include "mercury_dummy.h" -#include "mercury_imp.h" /* we need libmer_globals.h for Windows DLLs */ +#include "mercury_imp.h" /* we need libmer_dll.h for Windows DLLs */ /* ** This dummy function is in a file of its own to ensure diff --git a/runtime/mercury_grade.h b/runtime/mercury_grade.h index 6eadc2f5c..3b2403d00 100644 --- a/runtime/mercury_grade.h +++ b/runtime/mercury_grade.h @@ -75,15 +75,41 @@ #ifdef PROFILE_TIME #ifdef PROFILE_CALLS - #define MR_GRADE_PART_4 _prof + #ifdef PROFILE_MEMORY + #define MR_GRADE_PART_4 _profall + #else + #define MR_GRADE_PART_4 _prof + #endif #else - #define MR_GRADE_PART_4 _proftime + #ifdef PROFILE_MEMORY + /* + ** Memory profiling interferes with time profiling, + ** so there's no point in allowing this. + */ + #error "Invalid combination of profiling options" + #else + /* Currently useless, but... */ + #define MR_GRADE_PART_4 _proftime + #endif #endif #else #ifdef PROFILE_CALLS - #define MR_GRADE_PART_4 _profcalls + #ifdef PROFILE_MEMORY + #define MR_GRADE_PART_4 _memprof + #else + #define MR_GRADE_PART_4 _profcalls + #endif #else - #define MR_GRADE_PART_4 + #ifdef PROFILE_MEMORY + /* + ** Call-graph memory profiling requires call profiling, + ** and call profiling is reasonably cheap, so there's + ** no point in allowing this. + */ + #error "Invalid combination of profiling options" + #else + #define MR_GRADE_PART_4 + #endif #endif #endif diff --git a/runtime/mercury_heap.h b/runtime/mercury_heap.h index 9654e9dc0..842ecdbbf 100644 --- a/runtime/mercury_heap.h +++ b/runtime/mercury_heap.h @@ -9,165 +9,209 @@ #ifndef MERCURY_HEAP_H #define MERCURY_HEAP_H -#include "mercury_types.h" /* for `Word' */ -#include "mercury_context.h" /* for min_heap_reclamation_point() */ +#include "mercury_types.h" /* for `Word' */ +#include "mercury_context.h" /* for min_heap_reclamation_point() */ +#include "mercury_heap_profile.h" /* for MR_record_allocation() */ #ifdef CONSERVATIVE_GC -#include "gc.h" + #include "gc.h" -#define tag_incr_hp_n(dest,tag,count) \ - ((dest) = (Word)mkword((tag), (Word)GC_MALLOC((count) * sizeof(Word)))) -#define tag_incr_hp_atomic(dest,tag,count) \ - ((dest) = (Word)mkword((tag), \ - (Word)GC_MALLOC_ATOMIC((count) * sizeof(Word)))) + #define tag_incr_hp_n(dest, tag, count) \ + ((dest) = (Word) mkword((tag), \ + (Word) GC_MALLOC((count) * sizeof(Word)))) + #define tag_incr_hp_atomic(dest, tag, count) \ + ((dest) = (Word) mkword((tag), \ + (Word) GC_MALLOC_ATOMIC((count) * sizeof(Word)))) -#ifdef INLINE_ALLOC + #ifdef INLINE_ALLOC -/* -** The following stuff uses the macros in the `gc_inl.h' header file in the -** Boehm garbage collector. They improve performance a little for -** highly allocation-intensive programs (e.g. the `nrev' benchmark). -** You'll probably need to fool around with the `-I' options to get this -** to work. Also, you must make sure that you compile with the same -** setting for -DSILENT that the boehm_gc directory was compiled with. -** -** We only want to inline allocations if the allocation size is a compile-time -** constant. This should be true for almost all the code that we generate, -** but with GCC we can use the `__builtin_constant_p()' extension to find out. -** -** The inline allocation macros are used only for allocating amounts -** of less than 16 words, to avoid fragmenting memory by creating too -** many distinct free lists. The garbage collector also requires that -** if we're allocating more than one word, we round up to an even number -** of words. -*/ + /* + ** The following stuff uses the macros in the `gc_inl.h' header file in the + ** Boehm garbage collector. They improve performance a little for + ** highly allocation-intensive programs (e.g. the `nrev' benchmark). + ** You'll probably need to fool around with the `-I' options to get this + ** to work. Also, you must make sure that you compile with the same + ** setting for -DSILENT that the boehm_gc directory was compiled with. + ** + ** We only want to inline allocations if the allocation size is a + ** compile-time constant. This should be true for almost all the code that + ** we generate, but with GCC we can use the `__builtin_constant_p()' + ** extension to find out. + ** + ** The inline allocation macros are used only for allocating amounts + ** of less than 16 words, to avoid fragmenting memory by creating too + ** many distinct free lists. The garbage collector also requires that + ** if we're allocating more than one word, we round up to an even number + ** of words. + */ -#ifndef __GNUC__ -/* -** Without the gcc extensions __builtin_constant_p() and ({...}), -** INLINE_ALLOC would probably be a performance _loss_. -*/ -#error "INLINE_ALLOC requires the use of GCC" -#endif + #ifndef __GNUC__ + /* + ** Without the gcc extensions __builtin_constant_p() and ({...}), + ** INLINE_ALLOC would probably be a performance _loss_. + */ + #error "INLINE_ALLOC requires the use of GCC" + #endif -#include "gc_inl.h" -#define tag_incr_hp(dest,tag,count) \ + #include "gc_inl.h" + #define tag_incr_hp(dest, tag, count) \ ( __builtin_constant_p(count) && (count) < 16 \ ? ({ void * temp; \ /* if size > 1, round up to an even number of words */ \ - Word num_words = ((count) == 1 ? 1 : 2 * (((count) + 1) / 2)); \ + Word num_words = ((count) == 1 ? 1 : 2 * (((count) + 1) / 2));\ GC_MALLOC_WORDS(temp, num_words); \ (dest) = (Word)mkword((tag), temp); \ }) \ - : tag_incr_hp_n((dest),(tag),(count)) \ + : tag_incr_hp_n((dest), (tag), (count)) \ ) -#else /* not INLINE_ALLOC */ + #else /* not INLINE_ALLOC */ -#define tag_incr_hp(dest,tag,count) \ - tag_incr_hp_n((dest),(tag),(count)) + #define tag_incr_hp(dest, tag, count) \ + tag_incr_hp_n((dest), (tag), (count)) -#endif /* not INLINE_ALLOC */ + #endif /* not INLINE_ALLOC */ -#define mark_hp(dest) ((void)0) -#define restore_hp(src) ((void)0) + #define mark_hp(dest) ((void)0) + #define restore_hp(src) ((void)0) /* we use `hp' as a convenient temporary here */ -#define hp_alloc(count) (incr_hp(MR_hp,(count)), MR_hp += (count), (void)0) -#define hp_alloc_atomic(count) \ - (incr_hp_atomic(MR_hp,(count)), MR_hp += (count), (void)0) + #define hp_alloc(count) \ + (incr_hp(MR_hp, (count)), MR_hp += (count), (void)0) + #define hp_alloc_atomic(count) \ + (incr_hp_atomic(MR_hp, (count)), MR_hp += (count), (void)0) #else /* not CONSERVATIVE_GC */ -#define tag_incr_hp(dest,tag,count) ( \ - (dest) = (Word)mkword(tag, (Word)MR_hp), \ - debugincrhp(count, MR_hp), \ - MR_hp += (count), \ - heap_overflow_check(), \ - (void)0 \ - ) -#define tag_incr_hp_atomic(dest,tag,count) tag_incr_hp((dest),(tag),(count)) + #define tag_incr_hp(dest, tag, count) \ + ( \ + (dest) = (Word) mkword(tag, (Word) MR_hp), \ + debugincrhp(count, MR_hp), \ + MR_hp += (count), \ + heap_overflow_check(), \ + (void)0 \ + ) + #define tag_incr_hp_atomic(dest, tag, count) \ + tag_incr_hp((dest), (tag), (count)) -#define mark_hp(dest) ( \ - (dest) = (Word)MR_hp, \ - (void)0 \ - ) + #define mark_hp(dest) \ + ( \ + (dest) = (Word) MR_hp, \ + (void)0 \ + ) -/* -** When restoring hp, we must make sure that we don't truncate the heap -** further than it is safe to. We can only truncate it as far as -** min_heap_reclamation_point. See the comments in mercury_context.h next to -** the set_min_heap_reclamation_point() macro. -*/ -#define restore_hp(src) ( \ - LVALUE_CAST(Word,MR_hp) = \ - ( (Word) MR_min_hp_rec < (src) ? \ - (src) : (Word) MR_min_hp_rec ), \ - (void)0 \ - ) + /* + ** When restoring hp, we must make sure that we don't truncate the heap + ** further than it is safe to. We can only truncate it as far as + ** min_heap_reclamation_point. See the comments in mercury_context.h next + ** to the set_min_heap_reclamation_point() macro. + */ + #define restore_hp(src) \ + ( \ + LVALUE_CAST(Word, MR_hp) = \ + ( (Word) MR_min_hp_rec < (src) \ + ? (src) \ + : (Word) MR_min_hp_rec \ + ), \ + (void)0 \ + ) -#define hp_alloc(count) incr_hp(hp,count) -#define hp_alloc_atomic(count) incr_hp_atomic(count) + #define hp_alloc(count) incr_hp(hp, count) + #define hp_alloc_atomic(count) incr_hp_atomic(count) #endif /* not CONSERVATIVE_GC */ -#define incr_hp(dest,count) tag_incr_hp((dest),mktag(0),(count)) -#define incr_hp_atomic(dest,count) \ - tag_incr_hp_atomic((dest),mktag(0),(count)) +#ifdef PROFILE_MEMORY + #define tag_incr_hp_msg(dest, tag, count, proclabel, type) \ + ( \ + MR_record_allocation((count), LABEL(proclabel), \ + MR_STRINGIFY(proclabel), (type)), \ + tag_incr_hp((dest), (tag), (count)) \ + ) + #define tag_incr_hp_atomic_msg(dest, tag, count, proclabel, type) \ + ( \ + MR_record_allocation((count), LABEL(proclabel), \ + MR_STRINGIFY(proclabel), (type)), \ + tag_incr_hp_atomic((dest), (tag), (count)) \ + ) +#else /* not PROFILE_MEMORY */ + #define tag_incr_hp_msg(dest, tag, count, proclabel, type) \ + tag_incr_hp((dest), (tag), (count)) + #define tag_incr_hp_atomic_msg(dest, tag, count, proclabel, type) \ + tag_incr_hp_atomic((dest), (tag), (count)) +#endif /* not PROFILE_MEMORY */ + +/* +** The incr_hp*() macros are defined in terms of the tag_incr_hp*() macros. +*/ +#define incr_hp(dest, count) \ + tag_incr_hp((dest), mktag(0), (count)) +#define incr_hp_msg(dest, count, proclabel, type) \ + tag_incr_hp_msg((dest), mktag(0), (count), (proclabel), (type)) +#define incr_hp_atomic(dest, count) \ + tag_incr_hp_atomic((dest), mktag(0), (count)) +#define incr_hp_atomic_msg(dest, count, proclabel, type) \ + tag_incr_hp_atomic_msg((dest), mktag(0), (count), \ + (proclabel), (type)) /* ** Note that gcc optimizes `hp += 2; return hp - 2;' ** to `tmp = hp; hp += 2; return tmp;', so we don't need to use -** gcc's expression statements here +** gcc's expression statements in the code below. */ /* used only by the hand-written example programs */ /* not by the automatically generated code */ -#define create1(w1) ( \ - hp_alloc(1), \ - MR_hp[-1] = (Word) (w1), \ - debugcr1(MR_hp[-1], MR_hp), \ - /* return */ (Word) (MR_hp - 1) \ - ) +#define create1(w1) \ + ( \ + hp_alloc(1), \ + MR_hp[-1] = (Word) (w1), \ + debugcr1(MR_hp[-1], MR_hp), \ + /* return */ (Word) (MR_hp - 1) \ + ) /* used only by the hand-written example programs */ /* not by the automatically generated code */ -#define create2(w1, w2) ( \ - hp_alloc(2), \ - MR_hp[-2] = (Word) (w1), \ - MR_hp[-1] = (Word) (w2), \ - debugcr2(MR_hp[-2], MR_hp[-1], MR_hp), \ - /* return */ (Word) (MR_hp - 2) \ - ) +#define create2(w1, w2) \ + ( \ + hp_alloc(2), \ + MR_hp[-2] = (Word) (w1), \ + MR_hp[-1] = (Word) (w2), \ + debugcr2(MR_hp[-2], MR_hp[-1], MR_hp), \ + /* return */ (Word) (MR_hp - 2) \ + ) /* used only by the hand-written example programs */ /* not by the automatically generated code */ -#define create3(w1, w2, w3) ( \ - hp_alloc(3), \ - MR_hp[-3] = (Word) (w1), \ - MR_hp[-2] = (Word) (w2), \ - MR_hp[-1] = (Word) (w3), \ - /* return */ (Word) (MR_hp - 3) \ - ) +#define create3(w1, w2, w3) \ + ( \ + hp_alloc(3), \ + MR_hp[-3] = (Word) (w1), \ + MR_hp[-2] = (Word) (w2), \ + MR_hp[-1] = (Word) (w3), \ + /* return */ (Word) (MR_hp - 3) \ + ) /* used only by the hand-written example programs */ /* not by the automatically generated code */ -#define create2_bf(w1) ( \ - MR_hp = MR_hp + 2, \ - MR_hp[-2] = (Word) (w1), \ - heap_overflow_check(), \ - /* return */ (Word) (MR_hp - 2) \ - ) +#define create2_bf(w1) \ + ( \ + MR_hp = MR_hp + 2, \ + MR_hp[-2] = (Word) (w1), \ + heap_overflow_check(), \ + /* return */ (Word) (MR_hp - 2) \ + ) /* used only by the hand-written example programs */ /* not by the automatically generated code */ -#define create2_fb(w2) ( \ - MR_hp = MR_hp + 2, \ - MR_hp[-1] = (Word) (w2), \ - heap_overflow_check(), \ - /* return */ (Word) (MR_hp - 2) \ - ) +#define create2_fb(w2) \ + ( \ + MR_hp = MR_hp + 2, \ + MR_hp[-1] = (Word) (w2), \ + heap_overflow_check(), \ + /* return */ (Word) (MR_hp - 2) \ + ) /* ** Indended for use in handwritten C code where the Mercury registers @@ -177,16 +221,18 @@ ** restore_transient_registers() after. */ -#define incr_saved_hp(A,B) do { \ - restore_transient_registers(); \ - incr_hp((A), (B)); \ - save_transient_registers(); \ - } while (0) +#define incr_saved_hp(A, B) \ + do { \ + restore_transient_registers(); \ + incr_hp((A), (B)); \ + save_transient_registers(); \ + } while (0) -#define incr_saved_hp_atomic(A,B) do { \ - restore_transient_registers(); \ - incr_hp_atomic((A), (B)); \ - save_transient_registers(); \ - } while (0) +#define incr_saved_hp_atomic(A, B) \ + do { \ + restore_transient_registers(); \ + incr_hp_atomic((A), (B)); \ + save_transient_registers(); \ + } while (0) #endif /* not MERCURY_HEAP_H */ diff --git a/runtime/mercury_heap_profile.c b/runtime/mercury_heap_profile.c new file mode 100644 index 000000000..a9a502331 --- /dev/null +++ b/runtime/mercury_heap_profile.c @@ -0,0 +1,123 @@ +/* +** Copyright (C) 1997 The University of Melbourne. +** This file may only be copied under the terms of the GNU Library General +** Public License - see the file COPYING.LIB in the Mercury distribution. +*/ + +/* +** File: mercury_heap_profile.c. +** Main authors: zs, fjh +** +** This module records information about the allocations of cells on the heap. +** +** The information recorded by this module is used by code in +** library/benchmarking.m. +*/ + +/*---------------------------------------------------------------------------*/ + +#include "mercury_imp.h" + +#include +#include + +#include "mercury_prof_mem.h" +#include "mercury_heap_profile.h" + +/* all fields of these variables are initialized to 0 */ +MR_memprof_counter MR_memprof_overall; +MR_memprof_table MR_memprof_procs; +MR_memprof_table MR_memprof_types; + +/* +** Initialize a heap profiling counter. +*/ +static void +MR_init_counter(MR_memprof_counter *counter) +{ + MR_zero_dword(counter->cells_at_period_start); + MR_zero_dword(counter->words_at_period_start); + MR_zero_dword(counter->cells_since_period_start); + MR_zero_dword(counter->words_since_period_start); +} + +/* +** Increment the fields in a heap profiling counter to record the allocation +** of a single cell of `size' words. +*/ +static void +MR_increment_counter(MR_memprof_counter *counter, int size) +{ + MR_increment_dword(counter->cells_since_period_start, 1); + MR_increment_dword(counter->words_since_period_start, size); +} + +/* +** Search the specified `table' to find the entry for the given `name' +** allocating one if there isn't one already, and then increment +** the counters for that entry for an allocation of the specified `size'. +*/ +static void +MR_increment_table_entry(MR_memprof_table *table, const char *name, + Code *addr, int size) +{ + bool found; + int diff; + MR_memprof_record **node_addr; + MR_memprof_record *node; + + /* + ** Search the tree to find the node with this name. + */ + found = FALSE; + node_addr = &table->root; + while ((node = *node_addr) != NULL) { + diff = strcmp(name, node->name); + if (diff < 0) { + node_addr = &node->left; + } else if (diff > 0) { + node_addr = &node->right; + } else { + found = TRUE; + break; + } + } + + /* + ** If the tree didn't already contain a node with this name, + ** create a new node for it. + */ + if (!found) { + node = MR_PROF_NEW(MR_memprof_record); + node->name = name; + node->addr = addr; + node->left = NULL; + node->right = NULL; + MR_init_counter(&node->counter); + + *node_addr = node; + + table->num_entries++; + } + + /* Now record the counts in this node */ + MR_increment_counter(&node->counter, size); +} + +/* +** Record heap profiling information for an allocation of size `size' +** in procedure `proc' for an object of type `type'. +*/ +void +MR_record_allocation(int size, Code *proc_addr, + const char *proc_name, const char *type) +{ + /* + ** Increment the overall totals, + ** record the allocation in the per-procedure table, and + ** record the allocation in the per-type table. + */ + MR_increment_counter(&MR_memprof_overall, size); + MR_increment_table_entry(&MR_memprof_procs, proc_name, proc_addr, size); + MR_increment_table_entry(&MR_memprof_types, type, NULL, size); +} diff --git a/runtime/mercury_heap_profile.h b/runtime/mercury_heap_profile.h new file mode 100644 index 000000000..a411766ca --- /dev/null +++ b/runtime/mercury_heap_profile.h @@ -0,0 +1,181 @@ +/* +** Copyright (C) 1997 The University of Melbourne. +** This file may only be copied under the terms of the GNU Library General +** Public License - see the file COPYING.LIB in the Mercury distribution. +*/ + +/* +** File: mercury_heap_profile.h +** Main authors: zs, fjh +** +** This module records information about the allocations of cells on the heap. +** The information recorded here is used by mercury_prof.c and +** by library/benchmarking.m. +*/ + +/*---------------------------------------------------------------------------*/ + +#ifndef MERCURY_HEAP_PROFILE_H +#define MERCURY_HEAP_PROFILE_H + +/*---------------------------------------------------------------------------*/ + +/* +** Due to garbage collection, the total amount of memory allocated can +** exceed the amount of real or even virtual memory available. Hence +** the total amount of memory allocated by a long-running Mercury program +** might not fit into a single 32-bit `unsigned long'. +** Hence we record memory usage counts using either `unsigned long long', +** if available, or otherwise using a pair of unsigned longs (ugh). +*/ + +#ifdef MR_HAVE_LONG_LONG + + /* nice and simple */ + + typedef unsigned long long MR_dword; + + #define MR_convert_dword_to_double(dword_form, double_form) \ + ((double_form) = (double) (dword_form)) + + #define MR_zero_dword(dword) \ + ((dword) = 0) + + #define MR_increment_dword(dword, inc) \ + ((dword) += (inc)) + + #define MR_add_two_dwords(src_dest_dword, src_dword) \ + ((src_dest_dword) += (src_dword)) + +#else /* not MR_HAVE_LONG_LONG */ + + /* oh well, guess we have to do it the hard way :-( */ + + typedef struct MR_dword + { + unsigned long low_word; + unsigned long high_word; + } MR_dword; + + #include + + #define MR_HIGHWORD_TO_DOUBLE (((double) ULONG_MAX) + 1.0) + + #define MR_convert_dword_to_double(dword_form, double_form) \ + do { \ + double_form = (MR_HIGHWORD_TO_DOUBLE \ + * (double) (dword_form).high_word) \ + + (double) (dword_form).low_word; \ + } while (0) + + #define MR_zero_dword(dword) \ + do { \ + (dword).low_word = 0; \ + (dword).high_word = 0; \ + } while (0) + + #define MR_increment_dword(dword, inc) \ + do { \ + unsigned long old_low_word = (dword).low_word; \ + (dword).low_word += (inc); \ + if ((dword).low_word < old_low_word) { \ + (dword).high_word += 1; \ + } \ + } while (0) + + #define MR_add_two_dwords(src_dest_dword, src_dword) \ + do { \ + unsigned long old_low_word = (src_dest_dword).low_word; \ + (src_dest_dword).low_word += (src_dword).low_word; \ + if ((src_dest_dword).low_word < old_low_word) { \ + (src_dest_dword).high_word += 1; \ + } \ + (src_dest_dword).high_word += (src_dword).high_word; \ + } while (0) + +#endif /* not MR_HAVE_LONG_LONG */ + +/*---------------------------------------------------------------------------*/ + +/* type declarations */ + +/* +** We count memory allocation in units of +** - cells (i.e. individual allocations), and +** - words +** +** We keep track of how much allocation occurs in each "period". +** A period ends (and a new period begins) at each call to +** report_stats or report_full_memory_stats in library/benchmarking.m. +** We also keep track of the total allocation over all periods. +** +** We keep track of how much memory was allocated +** - by each procedure, +** - for objects of each type, +** - and an overall total +** +** The tables of counters for each procedure is represented +** as a binary search tree. Similarly for the table of counters +** for each type. +*/ + +typedef struct MR_memprof_counter +{ + MR_dword cells_at_period_start; + MR_dword words_at_period_start; + MR_dword cells_since_period_start; + MR_dword words_since_period_start; +} MR_memprof_counter; + +/* type representing a binary tree node */ +typedef struct MR_memprof_record +{ + const char *name; /* of the type or procedure */ + Code *addr; /* for procedures only */ + MR_memprof_counter counter; + struct MR_memprof_record *left; /* left sub-tree */ + struct MR_memprof_record *right; /* right sub-tree */ +} MR_memprof_record; + +/* type representing a binary tree */ +typedef struct MR_memprof_table +{ + MR_memprof_record *root; + int num_entries; +} MR_memprof_table; + +/*---------------------------------------------------------------------------*/ + +/* global variables */ + +extern MR_memprof_counter MR_memprof_overall; +extern MR_memprof_table MR_memprof_procs; +extern MR_memprof_table MR_memprof_types; + +/*---------------------------------------------------------------------------*/ + +/* function declarations */ + +/* +** MR_record_allocation(size, proc_addr, proc_name, type): +** Record heap profiling information for an allocation of one cell +** of `size' words by procedure `proc_name' with address `proc_addr' +** for an object of type `type'. +** The heap profiling information is recorded in the three global +** variables above. +*/ +extern void MR_record_allocation(int size, Code *proc_addr, + const char *proc_name, const char *type); + +/* +** MR_prof_output_mem_tables(): +** Write out the information recorded by MR_record_allocation() +** to a pair of files `Prof.MemoryWords' and `Prof.MemoryCells'. +*/ +extern void MR_prof_output_mem_tables(void); + +/*---------------------------------------------------------------------------*/ + +#endif /* MERCURY_HEAP_PROFILE_H */ + +/*---------------------------------------------------------------------------*/ diff --git a/runtime/mercury_imp.h b/runtime/mercury_imp.h index bf87d539e..b1015089b 100644 --- a/runtime/mercury_imp.h +++ b/runtime/mercury_imp.h @@ -26,8 +26,9 @@ #include "mercury_conf.h" /* -** The following must come before any definitions of global variables. -** This is necessary to support DLLs on Windows. +** The following must come before any declarations of or use of +** global variables. This is necessary to support DLLs on Windows. +** Note: `libmer_dll.h' is automatically generated by `Makefile.DLLs'. */ #ifdef USE_DLLS #include "libmer_dll.h" diff --git a/runtime/mercury_label.c b/runtime/mercury_label.c index 9548fdaad..c5463533d 100644 --- a/runtime/mercury_label.c +++ b/runtime/mercury_label.c @@ -9,6 +9,8 @@ ** that map from procedure names to addresses and vice versa. */ +#include "mercury_imp.h" /* we need libmer_dll.h for Windows DLLs */ + #include #include @@ -16,9 +18,8 @@ #include "mercury_label.h" -#include "mercury_imp.h" /* we need libmer_globals.h for Windows DLLs */ #include "mercury_table.h" /* for `Table' */ -#include "mercury_prof.h" /* for prof_output_addr_decls() */ +#include "mercury_prof.h" /* for prof_output_addr_decl() */ #include "mercury_engine.h" /* for `progdebug' */ #include "mercury_wrapper.h" /* for do_init_modules() */ @@ -76,7 +77,7 @@ insert_entry(const char *name, Code *addr, Word *entry_layout_info) entry->e_layout = entry_layout_info; #ifdef PROFILE_CALLS - prof_output_addr_decls(name, addr); + if (MR_profiling) MR_prof_output_addr_decl(name, addr); #endif #ifndef SPEED diff --git a/runtime/mercury_prof.c b/runtime/mercury_prof.c index 17b877fdb..7cb17e7b7 100644 --- a/runtime/mercury_prof.c +++ b/runtime/mercury_prof.c @@ -18,9 +18,10 @@ #include #include "mercury_prof.h" +#include "mercury_heap_profile.h" /* for MR_prof_output_mem_tables() */ +#include "mercury_prof_mem.h" /* for prof_malloc() */ #include "mercury_std.h" -#include "mercury_prof_mem.h" #include "mercury_timing.h" #if defined(PROFILE_TIME) @@ -38,11 +39,12 @@ static int MR_itimer_sig; static int MR_itimer_type; +static const char * MR_time_method; #endif /* PROFILE_TIME */ /* -** Need to make these command line options +** XXX Ought to make these command line options */ #define CALL_TABLE_SIZE 4096 #define TIME_TABLE_SIZE 4096 @@ -85,27 +87,47 @@ typedef struct s_prof_time_node { ** Global Variables */ -Code * volatile prof_current_proc; -static volatile int in_profiling_code = FALSE; +Code * volatile MR_prof_current_proc; /* ** Private global variables */ +static volatile int in_profiling_code = FALSE; + + #ifdef PROFILE_CALLS -static FILE *declfptr = NULL; -static prof_call_node *addr_pair_table[CALL_TABLE_SIZE] = {NULL}; -static bool profiling_on = FALSE; + static FILE *decl_fptr = NULL; + static prof_call_node *addr_pair_table[CALL_TABLE_SIZE] = {NULL}; #endif #ifdef PROFILE_TIME -static prof_time_node *addr_table[TIME_TABLE_SIZE] = {NULL}; + static bool time_profiling_on = FALSE; + static prof_time_node *addr_table[TIME_TABLE_SIZE] = {NULL}; #endif /* ** Local function declarations */ -static void prof_finish(void); + +#ifdef PROFILE_TIME + static void prof_init_time_profile_method(void); + static void prof_time_profile(int); + static void prof_output_addr_table(void); + static void print_time_node(FILE *fptr, prof_time_node *node); +#endif + +#ifdef PROFILE_CALLS + static void print_addr_pair_node(FILE *fptr, prof_call_node *node); + static void prof_output_addr_pair_table(void); +#endif + +#ifdef PROFILE_MEMORY + static void prof_output_mem_tables(void); + static void print_memory_node(FILE *words_fptr, FILE *cells_fptr, + MR_memprof_record *node); +#endif + /* ======================================================================== */ @@ -139,7 +161,7 @@ strerror(int errnum) /* utility routines for opening and closing files */ -#if defined(PROFILE_TIME) || defined(PROFILE_CALLS) +#if defined(PROFILE_TIME) || defined(PROFILE_CALLS) || defined(PROFILE_MEMORY) static FILE * checked_fopen(const char *filename, const char *message, const char *mode) @@ -180,7 +202,7 @@ checked_atexit(void (*func)(void)) } } -#endif /* defined(PROFILE_TIME) || defined(PROFILE_CALLS) */ +#endif /* PROFILE_TIME or PROFILE_CALLS or PROFILE_MEMORY */ #ifdef PROFILE_TIME @@ -234,15 +256,9 @@ checked_signal(int sig, void (*handler)(int)) #ifdef PROFILE_TIME -static void prof_init_time_profile_method(void); -static void prof_time_profile(int); - /* -** prof_init_time_profile: -** Writes the value of MR_CLOCK_TICKS_PER_SECOND and -** MR_CLOCK_TICKS_PER_PROF_SIG at the start of the file -** 'Prof.Counts'. -** Then sets up the profiling timer and starts it up. +** prof_turn_on_time_profiling: +** Sets up the profiling timer and starts it up. ** At the moment it is after every MR_CLOCK_TICKS_PER_PROF_SIG ** ticks of the clock. ** @@ -252,30 +268,20 @@ static void prof_time_profile(int); */ void -prof_init_time_profile(void) +MR_prof_turn_on_time_profiling(void) { FILE *fptr; struct itimerval itime; const long prof_sig_interval_in_usecs = MR_CLOCK_TICKS_PER_PROF_SIG * (MR_USEC_PER_SEC / MR_CLOCK_TICKS_PER_SECOND); - /* output the value of MR_CLOCK_TICKS_PER_SECOND */ - fptr = checked_fopen("Prof.Counts", "create", "w"); - fprintf(fptr, "%d %d\n", - MR_CLOCK_TICKS_PER_SECOND, MR_CLOCK_TICKS_PER_PROF_SIG); - checked_fclose(fptr, "Prof.Counts"); - - checked_atexit(prof_finish); - - profiling_on = TRUE; + time_profiling_on = TRUE; itime.it_value.tv_sec = 0; itime.it_value.tv_usec = prof_sig_interval_in_usecs; itime.it_interval.tv_sec = 0; itime.it_interval.tv_usec = prof_sig_interval_in_usecs; - prof_init_time_profile_method(); - checked_signal(MR_itimer_sig, prof_time_profile); checked_setitimer(MR_itimer_type, &itime); } @@ -293,18 +299,21 @@ prof_init_time_profile_method(void) case MR_profile_real_time: MR_itimer_type = ITIMER_REAL; MR_itimer_sig = SIGALRM; + MR_time_method = "real-time"; break; #endif #if defined(ITIMER_VIRTUAL) && defined(SIGVTALRM) case MR_profile_user_time: MR_itimer_type = ITIMER_VIRTUAL; MR_itimer_sig = SIGVTALRM; + MR_time_method = "user-time"; break; #endif #if defined(ITIMER_VIRTUAL) && defined(SIGVTALRM) case MR_profile_user_plus_system_time: MR_itimer_type = ITIMER_PROF; MR_itimer_sig = SIGPROF; + MR_time_method = "user-plus-system-time"; break; #endif default: @@ -324,7 +333,7 @@ prof_init_time_profile_method(void) */ void -prof_call_profile(Code *Callee, Code *Caller) +MR_prof_call_profile(Code *Callee, Code *Caller) { prof_call_node *node, **node_addr, *new_node; int hash_value; @@ -350,7 +359,7 @@ prof_call_profile(Code *Callee, Code *Caller) } } - new_node = prof_make(prof_call_node); + new_node = MR_PROF_NEW(prof_call_node); new_node->Callee = Callee; new_node->Caller = Caller; new_node->count = 1; @@ -380,6 +389,7 @@ prof_time_profile(int signum) { prof_time_node *node, **node_addr, *new_node; int hash_value; + Code *current_proc; /* Ignore any signals we get in this function or in prof_call_profile */ if (in_profiling_code) { @@ -388,13 +398,14 @@ prof_time_profile(int signum) in_profiling_code = TRUE; - hash_value = hash_prof_addr(prof_current_proc); + current_proc = MR_prof_current_proc; + hash_value = hash_prof_addr(current_proc); node_addr = &addr_table[hash_value]; while ((node = *node_addr) != NULL) { - if (node->Addr < prof_current_proc) { + if (node->Addr < current_proc) { node_addr = &node->left; - } else if (node->Addr > prof_current_proc) { + } else if (node->Addr > current_proc) { node_addr = &node->right; } else { node->count++; @@ -403,8 +414,8 @@ prof_time_profile(int signum) } } - new_node = prof_make(prof_time_node); - new_node->Addr = prof_current_proc; + new_node = MR_PROF_NEW(prof_time_node); + new_node->Addr = current_proc; new_node->count = 1; new_node->left = NULL; new_node->right = NULL; @@ -422,11 +433,11 @@ prof_time_profile(int signum) */ void -prof_turn_off_time_profiling(void) +MR_prof_turn_off_time_profiling(void) { struct itimerval itime; - if (profiling_on == FALSE) + if (time_profiling_on == FALSE) return; itime.it_value.tv_sec = 0; @@ -449,35 +460,27 @@ prof_turn_off_time_profiling(void) ** Caller then callee followed by count. */ -static void print_addr_pair_node(FILE *fptr, prof_call_node *node); - -void +static void prof_output_addr_pair_table(void) { - static bool addr_pair_table_written = FALSE; - FILE *fptr; - int i; - if (addr_pair_table_written == TRUE) - return; + FILE *fptr; + int i; fptr = checked_fopen("Prof.CallPair", "create", "w"); + for (i = 0; i < CALL_TABLE_SIZE ; i++) { print_addr_pair_node(fptr, addr_pair_table[i]); } checked_fclose(fptr, "Prof.CallPair"); - - addr_pair_table_written = TRUE; } static void print_addr_pair_node(FILE *fptr, prof_call_node *node) { - if (node != (prof_call_node *) NULL) { + if (node != NULL) { fprintf(fptr, "%ld %ld %lu\n", - (long) node->Caller, - (long) node->Callee, - node->count); + (long) node->Caller, (long) node->Callee, node->count); print_addr_pair_node(fptr, node->left); print_addr_pair_node(fptr, node->right); } @@ -487,22 +490,22 @@ print_addr_pair_node(FILE *fptr, prof_call_node *node) /* ======================================================================== */ -#ifdef PROFILE_CALLS +#if defined(PROFILE_CALLS) /* -** prof_output_addr_decls: -** Ouputs the main predicate labels as well as their machine -** addresses to a file called "Prof.Decl". -** This is called from insert_entry() in label.c. +** prof_output_addr_decl: +** Ouputs an entry label name and its corresponding machine +** address to a file called "Prof.Decl". +** This is called from insert_entry() in mercury_label.c. */ void -prof_output_addr_decls(const char *name, const Code *address) +MR_prof_output_addr_decl(const char *name, const Code *address) { - if (!declfptr) { - declfptr = checked_fopen("Prof.Decl", "create", "w"); + if (!decl_fptr) { + decl_fptr = checked_fopen("Prof.Decl", "create", "w"); } - fprintf(declfptr, "%ld\t%s\n", (long) address, name); + fprintf(decl_fptr, "%ld\t%s\n", (long) address, name); } #endif /* PROFILE_CALLS */ @@ -513,39 +516,45 @@ prof_output_addr_decls(const char *name, const Code *address) /* ** prof_output_addr_table: -** Outputs the addresses saved whenever PROF_SIG is received to -** the file "Prof.Counts" +** Writes out the time profiling counts recorded +** by the profile signal handler to the file `Prof.Counts'. */ -static void print_time_node(FILE *fptr, prof_time_node *node); - -void +static void prof_output_addr_table(void) { - static bool addr_table_written = FALSE; FILE *fptr; int i; + double scale; + double rate; - if (addr_table_written == TRUE) - return; + fptr = checked_fopen("Prof.Counts", "create", "w"); - fptr = checked_fopen("Prof.Counts", "append to", "a"); + /* + ** Write out header line indicating what we are profiling, + ** the scale factor, and the units. + ** The scale factor is the time per profiling interrupt. + ** The units are seconds. + */ + scale = (double) MR_CLOCK_TICKS_PER_PROF_SIG / + (double) MR_CLOCK_TICKS_PER_SECOND; + fprintf(fptr, "%s %f %s\n", MR_time_method, scale, "seconds"); + + /* + ** Write out the time profiling data: one one-line entry per node. + */ for (i = 0; i < TIME_TABLE_SIZE ; i++) { print_time_node(fptr, addr_table[i]); } checked_fclose(fptr, "Prof.Counts"); - - addr_table_written = TRUE; } static void print_time_node(FILE *fptr, prof_time_node *node) { if (node != (prof_time_node *) NULL) { - fprintf(fptr, "%ld %lu\n", - (long) node->Addr, - node->count); + fprintf(fptr, "%ld %lu\n", (long) node->Addr, node->count); print_time_node(fptr, node->left); print_time_node(fptr, node->right); } @@ -553,16 +562,107 @@ print_time_node(FILE *fptr, prof_time_node *node) #endif /* PROFILE_TIME */ -void prof_finish(void) -{ +/* ======================================================================== */ +#ifdef PROFILE_MEMORY + +/* +** prof_output_addr_table: +** Writes the value of MR_CLOCK_TICKS_PER_SECOND and +** MR_CLOCK_TICKS_PER_PROF_SIG at the start of the file +** `Prof.Counts'. Then outputs the addresses saved +** whenever PROF_SIG is received (also to `Prof.Counts'). +*/ + +static void +prof_output_mem_tables(void) +{ + FILE *words_fptr; + FILE *cells_fptr; + int i; + + words_fptr = checked_fopen("Prof.MemoryWords", "create", "w"); + cells_fptr = checked_fopen("Prof.MemoryCells", "create", "w"); + + fprintf(words_fptr, "%s %f %s\n", + "memory-words", 0.001, "kilowords"); + fprintf(cells_fptr, "%s %f %s\n", + "memory-cells", 0.001, "kilocells"); + + print_memory_node(words_fptr, cells_fptr, MR_memprof_procs.root); + + checked_fclose(words_fptr, "Prof.MemoryWords"); + checked_fclose(cells_fptr, "Prof.MemoryCells"); +} + +static void +print_memory_node(FILE *words_fptr, FILE *cells_fptr, MR_memprof_record *node) +{ + if (node != NULL) { + MR_dword cells, words; + + cells = node->counter.cells_at_period_start; + words = node->counter.words_at_period_start; + + MR_add_two_dwords(cells, + node->counter.cells_since_period_start); + MR_add_two_dwords(words, + node->counter.words_since_period_start); + + if (cells.high_word || words.high_word) { + fprintf(stderr, "Mercury runtime: memory profile " + "counter for `%s' overflowed\n", + node->name); + } + fprintf(words_fptr, "%ld %lu\n", + (long) node->addr, words.low_word); + fprintf(cells_fptr, "%ld %lu\n", + (long) node->addr, cells.low_word); + + print_memory_node(words_fptr, cells_fptr, node->left); + print_memory_node(words_fptr, cells_fptr, node->right); + } +} + +#endif /* PROFILE_MEMORY */ + +/* ======================================================================== */ + +void +MR_prof_init(void) +{ #ifdef PROFILE_TIME - prof_turn_off_time_profiling(); - prof_output_addr_table(); + prof_init_time_profile_method(); #endif -#ifdef PROFILE_CALLS - prof_output_addr_pair_table(); +#if defined(PROFILE_TIME) || defined(PROFILE_CALLS) || defined(PROFILE_MEMORY) + checked_atexit(MR_prof_finish); #endif } +void +MR_prof_finish(void) +{ + /* ensure this routine only gets run once, even if called twice */ + static bool done = FALSE; + if (done) return; + done = TRUE; + +#ifdef PROFILE_TIME + MR_prof_turn_off_time_profiling(); + prof_output_addr_table(); +#endif + +#if defined(PROFILE_CALLS) + if (decl_fptr) { + checked_fclose(decl_fptr, "Prof.Decl"); + } + prof_output_addr_pair_table(); +#endif + +#ifdef PROFILE_MEMORY + prof_output_mem_tables(); +#endif +} + +/* ======================================================================== */ diff --git a/runtime/mercury_prof.h b/runtime/mercury_prof.h index b6fdd875a..c76bd6e57 100644 --- a/runtime/mercury_prof.h +++ b/runtime/mercury_prof.h @@ -4,7 +4,10 @@ ** Public License - see the file COPYING.LIB in the Mercury distribution. */ -/* mercury_prof.h -- definitions for profiling */ +/* +** mercury_prof.h -- definitions for profiling. +** (See also mercury_heap_profiling.h.) +*/ #ifndef MERCURY_PROF_H #define MERCURY_PROF_H @@ -12,38 +15,69 @@ #include "mercury_types.h" /* for `Code *' */ /* -** this variable holds the address of the "current" procedure so that +** This variable holds the address of the "current" procedure so that ** when a profiling interrupt occurs, the profiler knows where we are, ** so that it can credit the time to the appropriate procedure. */ -extern Code * volatile prof_current_proc; +extern Code * volatile MR_prof_current_proc; /* -** the following two macros are used to ensure that the profiler can +** The following two macros are used to ensure that the profiler can ** use `prof_current_proc' to determine what procedure is currently -** being executed when a profiling interrupt occurs +** being executed when a profiling interrupt occurs. */ #ifdef PROFILE_TIME -#define set_prof_current_proc(target) (prof_current_proc = (target)) -#define update_prof_current_proc(target) (prof_current_proc = (target)) + #define set_prof_current_proc(target) \ + (MR_prof_current_proc = (target)) + #define update_prof_current_proc(target) \ + (MR_prof_current_proc = (target)) #else -#define set_prof_current_proc(target) ((void)0) -#define update_prof_current_proc(target) ((void)0) + #define set_prof_current_proc(target) ((void)0) + #define update_prof_current_proc(target) ((void)0) #endif +/* +** The PROFILE() macro is used (by mercury_calls.h) to record each call. +*/ + #ifdef PROFILE_CALLS -#define PROFILE(callee, caller) prof_call_profile((callee), (caller)) + #define PROFILE(callee, caller) MR_prof_call_profile((callee), (caller)) #else -#define PROFILE(callee, caller) ((void)0) + #define PROFILE(callee, caller) ((void)0) #endif -extern void prof_init_time_profile(void); -extern void prof_call_profile(Code *, Code *); -extern void prof_turn_off_time_profiling(void); -extern void prof_output_addr_pair_table(void); -extern void prof_output_addr_decls(const char *, const Code *); -extern void prof_output_addr_table(void); +#ifdef PROFILE_CALLS + extern void MR_prof_call_profile(Code *, Code *); +#endif + + +/* +** The prof_output_addr_decl() function is used by insert_entry() in +** mercury_label.c to record the address of each entry label. +*/ + +extern void MR_prof_output_addr_decl(const char *name, const Code *address); + + +/* +** The following functions are used by mercury_wrapper.c to +** initiate profiling, at the start of the the program, +** and to finish up profiling (writing the profiling data to files) +** at the end of the program. +** Note that prof_init() calls atexit(prof_finish), so that it can handle +** the case where the program exits by calling exit() rather than just +** returning, so it is actually not necessary to call prof_finish() +** explicitly. +*/ + +extern void MR_prof_init(void); +extern void MR_prof_finish(void); + +#ifdef PROFILE_TIME + extern void MR_prof_turn_on_time_profiling(void); + extern void MR_prof_turn_off_time_profiling(void); +#endif #endif /* not MERCURY_PROF_H */ diff --git a/runtime/mercury_prof_mem.c b/runtime/mercury_prof_mem.c index c114f0648..61c96ee04 100644 --- a/runtime/mercury_prof_mem.c +++ b/runtime/mercury_prof_mem.c @@ -11,10 +11,10 @@ ** ** This module is needed to get around the problem that when a signal occurs ** it may be in a malloc. The handling routine may also do a malloc which -** stuff's up the internal state of malloc and cause a seg fault. -** -** NB. This shouldn't be needed anymore when native gc is implemented -** as mercury will no longer use malloc anymore. +** stuffs up the internal state of malloc and cause a seg fault. +** To avoid this problem, we use our own version of malloc() which +** allocates memory in large chunks, reducing the chance of this +** problem occurring. */ #include @@ -50,7 +50,7 @@ static void *next = NULL; /* Pointer to next data block */ void * -prof_malloc(size_t size) +MR_prof_malloc(size_t size) { register void *p; diff --git a/runtime/mercury_prof_mem.h b/runtime/mercury_prof_mem.h index 019524f31..081d1e5b1 100644 --- a/runtime/mercury_prof_mem.h +++ b/runtime/mercury_prof_mem.h @@ -23,8 +23,12 @@ ** from a profiling interrupt that interrupted another call to newmem(). ** Doing that is bad news, because newmem() is not re-entrant. */ -void *prof_malloc(size_t); +extern void *MR_prof_malloc(size_t); -#define prof_make(t) ((t *) prof_malloc(sizeof(t))) +/* +** Use the following macros rather than calling MR_prof_malloc() directly. +*/ +#define MR_PROF_NEW(t) ((t *) MR_prof_malloc(sizeof(t))) +#define MR_PROF_NEW_ARRAY(t,n) ((t *) MR_prof_malloc(sizeof(t) * (n))) #endif /* not MERCURY_PROF_MEM_H */ diff --git a/runtime/mercury_wrapper.c b/runtime/mercury_wrapper.c index ec63d53aa..7e3aef7b7 100644 --- a/runtime/mercury_wrapper.c +++ b/runtime/mercury_wrapper.c @@ -88,6 +88,8 @@ int mercury_argc; /* not counting progname */ char ** mercury_argv; int mercury_exit_status = 0; +bool MR_profiling = TRUE; + /* ** EXTERNAL DEPENDENCIES ** @@ -221,6 +223,9 @@ mercury_runtime_init(int argc, char **argv) /* start up the Mercury engine */ init_engine(); + /* initialize profiling */ + if (MR_profiling) MR_prof_init(); + /* ** We need to call save_registers(), since we're about to ** call a C->Mercury interface function, and the C->Mercury @@ -427,7 +432,7 @@ process_options(int argc, char **argv) unsigned long size; int c; - while ((c = getopt(argc, argv, "acd:hLlP:p:r:s:tT:w:xz:1:2:3:")) != EOF) + while ((c = getopt(argc, argv, "acC:d:hLlP:pr:s:tT:w:xz:1:2:3:")) != EOF) { switch (c) { @@ -440,6 +445,14 @@ process_options(int argc, char **argv) check_space = TRUE; break; + case 'C': + if (sscanf(optarg, "%lu", &size) != 1) + usage(); + + pcache_size = size * 1024; + + break; + case 'd': if (streq(optarg, "b")) nondstackdebug = TRUE; @@ -513,6 +526,10 @@ process_options(int argc, char **argv) exit(0); } + case 'p': + MR_profiling = FALSE; + break; + #ifdef PARALLEL case 'P': if (sscanf(optarg, "%u", &numprocs) != 1) @@ -524,14 +541,6 @@ process_options(int argc, char **argv) break; #endif - case 'p': - if (sscanf(optarg, "%lu", &size) != 1) - usage(); - - pcache_size = size * 1024; - - break; - case 'r': if (sscanf(optarg, "%d", &repeats) != 1) usage(); @@ -603,7 +612,7 @@ process_options(int argc, char **argv) } case 'x': #ifdef CONSERVATIVE_GC - GC_dont_gc = 1; + GC_dont_gc = TRUE; #endif break; @@ -656,17 +665,18 @@ static void usage(void) { printf("Mercury runtime usage:\n" - "MERCURY_OPTIONS=\"[-hclt] [-d[abcdghs]] [-[sz][hdn]#]\n" - " [-p#] [-r#] [-1#] [-2#] [-3#] [-w name]\"\n" + "MERCURY_OPTIONS=\"[-hclLtxp] [-T[rvp]] [-d[abcdghs]]\n" + " [-[szt][hdn]#] [-C#] [-r#] [-w name] [-[123]#]\"\n" "-h \t\tprint this usage message\n" "-c \t\tcheck cross-function stack usage\n" "-l \t\tprint all labels\n" "-L \t\tcheck for duplicate labels\n" "-t \t\ttime program execution\n" + "-x \t\tdisable garbage collection\n" + "-p \t\tdisable profiling\n" "-Tr \t\tprofile real time (using ITIMER_REAL)\n" "-Tv \t\tprofile user time (using ITIMER_VIRTUAL)\n" "-Tp \t\tprofile user + system time (using ITIMER_PROF)\n" - "-x \t\tdisable garbage collection\n" "-dg \t\tdebug gotos\n" "-dc \t\tdebug calls\n" "-db \t\tdebug backtracking\n" @@ -690,9 +700,11 @@ usage(void) #ifdef MR_USE_TRAIL "-zt \t\tallocate n kb for the trail redzone\n" #endif + "-C \t\tprimary cache size in kbytes\n" +#ifdef PARALLEL "-P \t\tnumber of processes to use for parallel execution\n" "\t\tapplies only if Mercury is configured with --enable-parallel\n" - "-p \t\tprimary cache size in kbytes\n" +#endif "-r \t\trepeat n times\n" "-w \tcall predicate with given name (default: main/2)\n" "-1 \t\tinitialize register r1 with value x\n" @@ -872,7 +884,7 @@ Define_entry(do_interpreter); } #ifdef PROFILE_TIME - prof_init_time_profile(); + if (MR_profiling) MR_prof_turn_on_time_profiling(); #endif noprof_call(program_entry_point, LABEL(global_success)); @@ -904,12 +916,9 @@ Define_label(global_fail); #endif Define_label(all_done); + #ifdef PROFILE_TIME - prof_turn_off_time_profiling(); - prof_output_addr_table(); -#endif -#ifdef PROFILE_CALLS - prof_output_addr_pair_table(); + if (MR_profiling) MR_prof_turn_off_time_profiling(); #endif MR_maxfr = (Word *) pop(); @@ -947,6 +956,8 @@ mercury_runtime_terminate(void) (*MR_library_finalizer)(); + if (MR_profiling) MR_prof_finish(); + terminate_engine(); /* diff --git a/runtime/mercury_wrapper.h b/runtime/mercury_wrapper.h index 09e81b003..150de12ce 100644 --- a/runtime/mercury_wrapper.h +++ b/runtime/mercury_wrapper.h @@ -95,4 +95,6 @@ enum MR_TimeProfileMethod { extern enum MR_TimeProfileMethod MR_time_profile_method; +extern bool MR_profiling; + #endif /* not MERCURY_WRAPPER_H */ diff --git a/scripts/init_grade_options.sh-subr b/scripts/init_grade_options.sh-subr index 2281dc593..209fef5e3 100644 --- a/scripts/init_grade_options.sh-subr +++ b/scripts/init_grade_options.sh-subr @@ -19,6 +19,7 @@ global_regs=true gc_method=conservative profile_time=false profile_calls=false +profile_memory=false use_trail=false args_method=compact debug=false diff --git a/scripts/mgnuc.in b/scripts/mgnuc.in index 90369dc34..185b6e05e 100644 --- a/scripts/mgnuc.in +++ b/scripts/mgnuc.in @@ -26,6 +26,7 @@ Options: -p, --profiling --profile-calls --profile-time + --profile-memory --debug --use-trail --args {simple, compact} @@ -206,6 +207,11 @@ case $profile_calls in false) PROF_CALLS_OPTS="" ;; esac +case $profile_memory in + true) PROF_MEMORY_OPTS="-DPROFILE_MEMORY" ;; + false) PROF_MEMORY_OPTS="" ;; +esac + case $use_trail in true) TRAIL_OPTS="-DMR_USE_TRAIL" ;; false) TRAIL_OPTS="" ;; @@ -338,17 +344,20 @@ esac case $verbose in true) echo $CC -I$C_INCL_DIR $ANSI_OPTS $CHECK_OPTS \ - $GRADE_OPTS $GC_OPTS $PROF_TIME_OPTS $PROF_CALLS_OPTS \ + $GRADE_OPTS $GC_OPTS \ + $PROF_TIME_OPTS $PROF_CALLS_OPTS $PROF_MEMORY_OPTS \ $INLINE_ALLOC_OPTS $TRAIL_OPTS $SPLIT_OPTS \ $PICREG_OPTS $ARCH_OPTS $ARG_OPTS "$@" $OVERRIDE_OPTS ;; esac case $# in 0) exec $CC -I$C_INCL_DIR $ANSI_OPTS $CHECK_OPTS \ - $GRADE_OPTS $GC_OPTS $PROF_TIME_OPTS $PROF_CALLS_OPTS \ + $GRADE_OPTS $GC_OPTS \ + $PROF_TIME_OPTS $PROF_CALLS_OPTS $PROF_MEMORY_OPTS \ $INLINE_ALLOC_OPTS $TRAIL_OPTS $SPLIT_OPTS \ $PICREG_OPTS $ARCH_OPTS $OVERRIDE_OPTS ;; *) exec $CC -I$C_INCL_DIR $ANSI_OPTS $CHECK_OPTS \ - $GRADE_OPTS $GC_OPTS $PROF_TIME_OPTS $PROF_CALLS_OPTS \ + $GRADE_OPTS $GC_OPTS \ + $PROF_TIME_OPTS $PROF_CALLS_OPTS $PROF_MEMORY_OPTS \ $INLINE_ALLOC_OPTS $TRAIL_OPTS $SPLIT_OPTS \ $PICREG_OPTS $ARCH_OPTS $ARG_OPTS "$@" $OVERRIDE_OPTS ;; esac diff --git a/scripts/ml.in b/scripts/ml.in index d93e71062..03c296e43 100644 --- a/scripts/ml.in +++ b/scripts/ml.in @@ -55,6 +55,7 @@ Options: -p, --profiling --profile-calls --profile-time + --profile-memory --debug --use-trail --args {simple, compact} @@ -229,11 +230,18 @@ case $gc_method in conservative) GRADE="$GRADE.gc" ;; accurate) GRADE="$GRADE.agc" ;; esac -case $profile_time,$profile_calls in - true,true) GRADE="$GRADE.prof" ;; - true,false) GRADE="$GRADE.proftime" ;; - false,true) GRADE="$GRADE.profcalls" ;; - false,false) ;; +case $profile_time,$profile_calls,$profile_memory in + true,true,false) GRADE="$GRADE.prof" ;; + true,false,false) GRADE="$GRADE.proftime" ;; + false,true,false) GRADE="$GRADE.profcalls" ;; + true,true,true) GRADE="$GRADE.profall" ;; + false,true,true) GRADE="$GRADE.memprof" ;; + false,false,false) ;; + *) progname=`basename $0` + echo \ + "$progname: Error: invalid combination of profiling options." 1>&2 + exit 1 + ;; esac case $use_trail in true) GRADE="$GRADE.tr" ;; diff --git a/scripts/mprof_merge_runs b/scripts/mprof_merge_runs index b281f48b1..3630ecd06 100755 --- a/scripts/mprof_merge_runs +++ b/scripts/mprof_merge_runs @@ -6,7 +6,7 @@ # Public License - see the file COPYING in the Mercury distribution. #---------------------------------------------------------------------------# # -# mprof_merge_counts - +# mprof_merge_runs - # # Merges the profiling counts from different runs of a profiled program. # @@ -70,8 +70,9 @@ case $# in esac # -# The Prof.Counts file format is as follows: -# clock-frequency tick-rate +# The Prof.Counts, Prof.MemoryWords, and Prof.MemoryCells file format +# is as follows: +# what_to_profile scale units # address1 count1 # address2 count2 # ... @@ -80,17 +81,21 @@ esac # To merge two different count files, we just need to # add up the counts for each address. # -touch Prof.Counts.total && -awk ' - FNR == 1 { clock = $1; rate = $2; } - FNR != 1 { counts[$1] += $2; } - END { - printf("%d %d\n", clock, rate); - for (addr in counts) { - printf("%d %d\n", addr, counts[addr]); - } - } -' Prof.Counts.total Prof.Counts > Prof.Counts.newtotal && +for prof_file in Prof.Counts Prof.MemoryWords Prof.MemoryCells; do + if [ -f $prof_file ]; then + touch $prof_file.total && + awk ' + FNR == 1 { what_to_profile = $1; scale = $2; units = $3; } + FNR != 1 { counts[$1] += $2; } + END { + printf("%s %f %s\n", what_to_profile, scale, units); + for (addr in counts) { + printf("%d %d\n", addr, counts[addr]); + } + } + ' $prof_file.total $prof_file > $prof_file.newtotal + fi +done && # # The Prof.CallPair file format is as follows: # caller1 callee1 count1 diff --git a/scripts/parse_grade_options.sh-subr b/scripts/parse_grade_options.sh-subr index 1f3bfd94f..b0ace6bce 100644 --- a/scripts/parse_grade_options.sh-subr +++ b/scripts/parse_grade_options.sh-subr @@ -45,13 +45,20 @@ esac ;; - -p|--profiling) + -p|--profiling|--time-profiling) profile_time=true profile_calls=true + profile_memory=false + ;; + --memory-profiling) + profile_time=false + profile_calls=true + profile_memory=true ;; -p-|--no-profiling) profile_time=false profile_calls=false + profile_memory=false ;; --profile-time) profile_time=true ;; @@ -61,6 +68,10 @@ profile_calls=true ;; --no-profile-calls) profile_calls=false ;; + --profile-memory) + profile_memory=true ;; + --no-profile-memory) + profile_memory=false ;; --use-trail) use_trail=true ;; @@ -123,24 +134,40 @@ esac case "$grade" in + *.memprof) + profile_time=false + profile_calls=true + profile_memory=true + grade="` expr $grade : '\(.*\).memprof' `" + ;; *.prof) profile_time=true profile_calls=true + profile_memory=false grade="` expr $grade : '\(.*\).prof' `" ;; *.proftime) profile_time=true profile_calls=false + profile_memory=false grade="` expr $grade : '\(.*\).proftime' `" ;; *.profcalls) profile_time=false profile_calls=true + profile_memory=false grade="` expr $grade : '\(.*\).profcalls' `" ;; + *.profall) + profile_time=true + profile_calls=true + profile_memory=true + grade="` expr $grade : '\(.*\).profall' `" + ;; *) profile_time=false profile_calls=false + profile_memory=false ;; esac