diff --git a/Makefile b/Makefile index 4d4d2ab..81f7911 100644 --- a/Makefile +++ b/Makefile @@ -10,12 +10,18 @@ CFLAGS = -Wall -O3 -Werror -m32 -pthread -std=gnu11 SHARED_OBJS = mdriver.o memlib.o fsecs.o fcyc.o clock.o ftimer.o list.o OBJS = $(SHARED_OBJS) mm.o +MTOBJS = $(SHARED_OBJS) mmts.o BOOK_IMPL_OBJS = $(SHARED_OBJS) mm-book-implicit.o GBACK_IMPL_OBJS = $(SHARED_OBJS) mm-gback-implicit.o +all: mdriver mdriver-ts + mdriver: $(OBJS) $(CC) $(CFLAGS) -o mdriver $(OBJS) +mdriver-ts: $(MTOBJS) + $(CC) $(CFLAGS) -o mdriver-ts $(MTOBJS) + mdriver-book: $(BOOK_IMPL_OBJS) $(CC) $(CFLAGS) -o $@ $(BOOK_IMPL_OBJS) @@ -25,6 +31,9 @@ mdriver-gback: $(GBACK_IMPL_OBJS) mdriver.o: mdriver.c fsecs.h fcyc.h clock.h memlib.h config.h mm.h memlib.o: memlib.c memlib.h config.h mm.o: mm.c mm.h memlib.h +mmts.o: mm.c mm.h memlib.h + $(CC) $(CFLAGS) -DTHREAD_SAFE=1 -c mm.c -o mmts.o + fsecs.o: fsecs.c fsecs.h config.h fcyc.o: fcyc.c fcyc.h ftimer.o: ftimer.c ftimer.h config.h diff --git a/config.h b/config.h index 11ad08c..4a25eba 100644 --- a/config.h +++ b/config.h @@ -46,8 +46,13 @@ * gback@cs.vt.edu/spruett3@vt.edu: I set this to a value that is achieved by a r/b * tree-based implementation on our rlogin cluster as of Fall 2015; * regardless of the speed of the actual libc. Updated in Fall 2015. + * + * gback: Again updated in Spring 2016 after switching to clock. + * This is a base line for a single-threaded implementation, without + * locking overhead, when clock() is used to time it. + * This is not meaningful for the multi-threaded implementation */ -#define AVG_LIBC_THRUPUT 25.6E6 /* 14600 Kops/sec */ +#define AVG_LIBC_THRUPUT 21.5E6 /* 21,500 Kops/sec */ /* * This constant determines the contributions of space utilization @@ -69,8 +74,9 @@ /***************************************************************************** * Set exactly one of these USE_xxx constants to "1" to select a timing method *****************************************************************************/ -//#define USE_FCYC 1 /* cycle counter w/K-best scheme (x86 & Alpha only) */ +#define USE_FCYC 0 /* cycle counter w/K-best scheme (x86 & Alpha only) */ #define USE_ITIMER 0 /* interval timer (any Unix box) */ -#define USE_GETTOD 1 /* gettimeofday (any Unix box) */ +#define USE_GETTOD 0 /* gettimeofday (any Unix box) */ +#define USE_CLOCK 1 /* clock_gettime (Linux only) */ #endif /* __CONFIG_H */ diff --git a/fsecs.c b/fsecs.c index ae2346d..5347605 100644 --- a/fsecs.c +++ b/fsecs.c @@ -2,6 +2,7 @@ * High-level timing wrappers ****************************/ #include +#include #include "fsecs.h" #include "fcyc.h" #include "clock.h" @@ -36,6 +37,11 @@ void init_fsecs(void) #elif USE_GETTOD if (verbose) printf("Measuring performance with gettimeofday().\n"); +#elif USE_CLOCK + struct timespec res; + clock_getres(CLOCK_MONOTONIC_RAW, &res); + if (verbose) + printf("Measuring performance with clock_gettime(), advertised resolution %ldns.\n", res.tv_nsec); #endif } @@ -51,6 +57,8 @@ double fsecs(fsecs_test_funct f, void *argp) return ftimer_itimer(f, argp, 10); #elif USE_GETTOD return ftimer_gettod(f, argp, 10); +#elif USE_CLOCK + return ftimer_clock(f, argp, 10); #endif } diff --git a/ftimer.c b/ftimer.c index ea08560..392613b 100644 --- a/ftimer.c +++ b/ftimer.c @@ -9,6 +9,7 @@ * ftimer_gettod: version that uses gettimeofday */ #include +#include #include #include "ftimer.h" @@ -101,6 +102,37 @@ static double get_etime(void) { (first_p.it_value.tv_usec - r_curr.it_value.tv_usec)*1e-6); } +// https://gist.github.com/diabloneo/9619917 +void timespec_diff(struct timespec *start, struct timespec *stop, + struct timespec *result) +{ + if ((stop->tv_nsec - start->tv_nsec) < 0) { + result->tv_sec = stop->tv_sec - start->tv_sec - 1; + result->tv_nsec = stop->tv_nsec - start->tv_nsec + 1000000000; + } else { + result->tv_sec = stop->tv_sec - start->tv_sec; + result->tv_nsec = stop->tv_nsec - start->tv_nsec; + } + return; +} +/* + * ftimer_clock - Use gettimeofday to estimate the running time of + * f(argp). Return the average of n runs. + */ +double ftimer_clock(ftimer_test_funct f, void *argp, int n) +{ + int i; + struct timespec stv, etv, diff; + + clock_gettime(CLOCK_MONOTONIC_RAW, &stv); + for (i = 0; i < n; i++) + f(argp); + clock_gettime(CLOCK_MONOTONIC_RAW, &etv); + timespec_diff(&stv, &etv, &diff); + double rdiff = 1E6*diff.tv_sec + 1E-3*diff.tv_nsec; // in microseconds + rdiff /= n; + return (1E-6*rdiff); // in seconds +} diff --git a/ftimer.h b/ftimer.h index 3400603..6417f75 100644 --- a/ftimer.h +++ b/ftimer.h @@ -12,3 +12,6 @@ double ftimer_itimer(ftimer_test_funct f, void *argp, int n); Return the average of n runs */ double ftimer_gettod(ftimer_test_funct f, void *argp, int n); +/* Estimate the running time of f(argp) using clock_gettime + Return the average of n runs */ +double ftimer_clock(ftimer_test_funct f, void *argp, int n); diff --git a/mdriver.c b/mdriver.c index 3817494..59b92c9 100644 --- a/mdriver.c +++ b/mdriver.c @@ -60,6 +60,7 @@ typedef struct { int num_ids; /* number of alloc/realloc ids */ int num_ops; /* number of distinct requests */ int weight; /* weight for this trace (unused) */ + double multiplier; /* multiply sizes by this amount */ traceop_t *ops; /* array of requests */ char **blocks; /* array of ptrs returned by malloc/realloc... */ size_t *block_sizes; /* ... and a corresponding array of payload sizes */ @@ -141,6 +142,12 @@ static void malloc_error(int tracenum, int opnum, char *msg); static void app_error(char *msg); static FILE * open_jsonfile(const char * filename_mask); +static int +max(int a, int b) +{ + return a < b ? b : a; +} + /************** * Main routine **************/ @@ -160,6 +167,7 @@ int main(int argc, char **argv) int nthreads = 0; /* If set to > 0, number of threads for multi-threaded testing. */ int autograder = 0; /* If set, emit summary info for autograder (-g) */ int use_mmap = 0; /* If set, have memlib use mmap() instead malloc() */ + int vary_size = 0; /* If set, run each trace multiple times with varied sizes */ /* temporaries used to compute the performance index */ double secs, ops, util, avg_mm_util, avg_mm_throughput = 0, p1, p2, perfindex; @@ -168,8 +176,11 @@ int main(int argc, char **argv) /* * Read and interpret the command line arguments */ - while ((c = getopt(argc, argv, "nf:t:hvVgalm:")) != EOF) { + while ((c = getopt(argc, argv, "nf:t:hvVgalm:s")) != EOF) { switch (c) { + case 's': + vary_size = 1; + break; case 'n': use_mmap = 1; break; @@ -303,24 +314,54 @@ int main(int argc, char **argv) mem_init(use_mmap); int max_total_size = 0; + + double * size_multipliers; + int n_multipliers; + double one[] = { 1.0 }; + double many[] = { .75, 1.0, 1.25 }; + if (vary_size) { + size_multipliers = many; + n_multipliers = sizeof(many)/sizeof(many[0]); + } else { + size_multipliers = one; + n_multipliers = 1; + } + /* Evaluate student's mm malloc package using the K-best scheme */ for (i=0; i < num_tracefiles; i++) { trace = read_trace(tracedir, tracefiles[i], verbose > 1); mm_stats[i].ops = trace->num_ops; - if (verbose > 1) - printf("Checking mm_malloc for correctness, "); - check_heap_bounds = 1; - mm_stats[i].valid = eval_mm_valid(trace, i, &ranges); - if (mm_stats[i].valid) { - if (verbose > 1) - printf("efficiency, "); + mm_stats[i].valid = 1; + mm_stats[i].util = 0.0; + mm_stats[i].secs = 0.0; + for (int mi = 0; mi < n_multipliers; mi++) { + trace->multiplier = size_multipliers[mi]; + if (verbose > 1 && vary_size) + printf("Using trace multiplier: %f\n", size_multipliers[mi]); - max_total_size = eval_mm_util(trace, i, &ranges); - mm_stats[i].util = ((double)max_total_size / (double)mem_heapsize()); if (verbose > 1) - printf("and performance.\n"); - mm_stats[i].secs = fsecs(eval_mm_speed, trace); + printf("Checking mm_malloc for correctness, "); + + check_heap_bounds = 1; + int thisrunvalid = eval_mm_valid(trace, i, &ranges); + if (!thisrunvalid) + mm_stats[i].valid = 0; + + if (mm_stats[i].valid) { + if (verbose > 1) + printf("efficiency, "); + + int hwm = eval_mm_util(trace, i, &ranges); + if (size_multipliers[mi] == 1.0) // record max high water mark + max_total_size = hwm; + mm_stats[i].util += ((double)hwm / (double)mem_heapsize()); + if (verbose > 1) + printf("and performance.\n"); + mm_stats[i].secs += fsecs(eval_mm_speed, trace); + } } + mm_stats[i].util /= n_multipliers; + mm_stats[i].secs /= n_multipliers; /* Test multithreaded behavior */ if (nthreads) { @@ -461,6 +502,14 @@ int main(int argc, char **argv) /* Write results to JSON file for submission */ FILE * json = open_jsonfile("results.%d.json"); fprintf(json, "{"); + fprintf(json, " \"version\": \"1.1\",\n"); + fprintf(json, " \"varysize\": %d,\n", vary_size); + fprintf(json, " \"nthreads\": %d,\n", nthreads); +#ifdef THREAD_SAFE + fprintf(json, " \"THREAD_SAFE\": true,\n"); +#else + fprintf(json, " \"THREAD_SAFE\": false,\n"); +#endif fprintf(json, " \"results\": "); printresults_as_json(json, num_tracefiles, tracefiles, mm_stats); if (errors == 0) { @@ -610,6 +659,8 @@ static trace_t *read_trace(char *tracedir, char *filename, int verbose) if ((trace = (trace_t *) malloc(sizeof(trace_t))) == NULL) unix_error("malloc 1 failed in read_trance"); + trace->multiplier = 1.0; + /* Read the trace file header */ strcpy(path, tracedir); strcat(path, filename); @@ -710,7 +761,7 @@ reset_heap(int tracenum) /* * eval_mm_valid - Check the mm malloc package for correctness */ -static int eval_mm_valid(trace_t *trace, int tracenum, range_t **ranges) +static int eval_mm_valid(trace_t *trace, int tracenum, range_t **ranges) { if (!reset_heap(tracenum)) return 0; @@ -718,7 +769,7 @@ static int eval_mm_valid(trace_t *trace, int tracenum, range_t **ranges) return eval_mm_valid_inner(trace, tracenum, ranges); } -static int eval_mm_valid_inner(trace_t *trace, int tracenum, range_t **ranges) +static int eval_mm_valid_inner(trace_t *trace, int tracenum, range_t **ranges) { int i, j; int index; @@ -734,7 +785,8 @@ static int eval_mm_valid_inner(trace_t *trace, int tracenum, range_t **ranges) /* Interpret each operation in the trace in order */ for (i = 0; i < trace->num_ops; i++) { index = trace->ops[i].index; - size = trace->ops[i].size; + int rsize = (int) (trace->multiplier * trace->ops[i].size); + size = max(0, (int)rsize); switch (trace->ops[i].type) { @@ -875,7 +927,7 @@ static int eval_mm_util(trace_t *trace, int tracenum, range_t **ranges) case ALLOC: /* mm_alloc */ index = trace->ops[i].index; - size = trace->ops[i].size; + size = max(0, (int)(trace->multiplier * trace->ops[i].size)); if ((p = mm_malloc(size)) == NULL) app_error("mm_malloc failed in eval_mm_util"); @@ -895,7 +947,7 @@ static int eval_mm_util(trace_t *trace, int tracenum, range_t **ranges) case REALLOC: /* mm_realloc */ index = trace->ops[i].index; - newsize = trace->ops[i].size; + newsize = max(0, (int)(trace->multiplier * trace->ops[i].size)); oldsize = trace->block_sizes[index]; oldp = trace->blocks[index]; @@ -988,7 +1040,7 @@ static void eval_mm_speed_inner(void *ptr) case ALLOC: /* mm_malloc */ index = trace->ops[i].index; - size = trace->ops[i].size; + size = max(0, (int)(trace->multiplier * trace->ops[i].size)); if ((p = mm_malloc(size)) == NULL) app_error("mm_malloc error in eval_mm_speed"); trace->blocks[index] = p; @@ -996,7 +1048,7 @@ static void eval_mm_speed_inner(void *ptr) case REALLOC: /* mm_realloc */ index = trace->ops[i].index; - newsize = trace->ops[i].size; + newsize = max(0, (int)(trace->multiplier * trace->ops[i].size)); oldp = trace->blocks[index]; if ((newp = mm_realloc(oldp,newsize)) == NULL) app_error("mm_realloc error in eval_mm_speed"); @@ -1022,14 +1074,15 @@ static void eval_mm_speed_inner(void *ptr) */ static int eval_libc_valid(trace_t *trace, int tracenum) { - int i, newsize; + int i, newsize, size; char *p, *newp, *oldp; for (i = 0; i < trace->num_ops; i++) { switch (trace->ops[i].type) { case ALLOC: /* malloc */ - if ((p = malloc(trace->ops[i].size)) == NULL) { + size = max(0, (int)(trace->multiplier * trace->ops[i].size)); + if ((p = malloc(size)) == NULL) { malloc_error(tracenum, i, "libc malloc failed"); unix_error("System message"); } @@ -1037,7 +1090,7 @@ static int eval_libc_valid(trace_t *trace, int tracenum) break; case REALLOC: /* realloc */ - newsize = trace->ops[i].size; + newsize = max(0, (int)(trace->multiplier * trace->ops[i].size)); oldp = trace->blocks[trace->ops[i].index]; if ((newp = realloc(oldp, newsize)) == NULL) { malloc_error(tracenum, i, "libc realloc failed"); @@ -1074,7 +1127,7 @@ static void eval_libc_speed(void *ptr) switch (trace->ops[i].type) { case ALLOC: /* malloc */ index = trace->ops[i].index; - size = trace->ops[i].size; + size = max(0, (int)(trace->multiplier * trace->ops[i].size)); if ((p = malloc(size)) == NULL) unix_error("malloc failed in eval_libc_speed"); trace->blocks[index] = p; @@ -1082,7 +1135,7 @@ static void eval_libc_speed(void *ptr) case REALLOC: /* realloc */ index = trace->ops[i].index; - newsize = trace->ops[i].size; + newsize = max(0, (int)(trace->multiplier * trace->ops[i].size)); oldp = trace->blocks[index]; if ((newp = realloc(oldp, newsize)) == NULL) unix_error("realloc failed in eval_libc_speed\n"); @@ -1229,7 +1282,7 @@ void malloc_error(int tracenum, int opnum, char *msg) */ static void usage(void) { - fprintf(stderr, "Usage: mdriver [-hvVal] [-f ] [-t ]\n"); + fprintf(stderr, "Usage: mdriver [-shvVal] [-f ] [-m ] [-t ]\n"); fprintf(stderr, "Options\n"); fprintf(stderr, "\t-a Don't check the team structure.\n"); fprintf(stderr, "\t-f Use as the trace file.\n"); @@ -1240,4 +1293,6 @@ static void usage(void) fprintf(stderr, "\t-v Print per-trace performance breakdowns.\n"); fprintf(stderr, "\t-V Print additional debug info.\n"); fprintf(stderr, "\t-n Don't randomize addresses.\n"); + fprintf(stderr, "\t-s Vary amplitude of each trace.\n"); + fprintf(stderr, "\t-m Run with multiple threads (mdriver-ts only).\n"); } diff --git a/mm_ts.c b/mm_ts.c index ecc5051..bd104ae 100644 --- a/mm_ts.c +++ b/mm_ts.c @@ -7,6 +7,7 @@ * This is just a stop-gap solution. */ #include +#ifdef THREAD_SAFE static pthread_mutex_t malloc_lock = PTHREAD_MUTEX_INITIALIZER; void *_mm_malloc_thread_unsafe(size_t size); @@ -40,3 +41,7 @@ void *mm_realloc(void *ptr, size_t size) #define mm_free _mm_free_thread_unsafe #define mm_realloc _mm_realloc_thread_unsafe +#else +/* If THREAD_SAFE is not defined, we leave it as is in order + * to avoid the locking overhead. */ +#endif /* THREAD_SAFE */