// This program measures the speed of the matrix computation C = C + A * B, // which is a slight generalization of matrix multiplication. (If C is // initially 0, then this would be matrix multiplication.) The three // matrices A, B, and C are all of size n*n, where n is passed to this // program on its command line. The matrices are initialized with // pseudo-random numbers uniformly distributed in the range from 0.0 to 1.0. // // The actual multiplication is done by the procedure doComputations, // which gets passed two arguments: a thread number and the total number of // threads. To do ordinary single-threaded computation, the call would be // doComputations(0, 1); that is, do thread number 0 out of 1. For two threads, // doComputations(0, 2) and doComputations(1, 2); that is, threads 0 and 1 of 2. // This doComputations procedure would work for more than 2 threads as well, // though this program doesn't use that ability. // // The whole computation is done twice, once with one thread and once with // two threads. The speedup (speed multiplier) from using two threads // is reported. The results are also compared. The number of elements // in the result matrices that differ is reported. If this number is // nonzero, the largest and smallest differences are also reported, // together with the values that are involved. // // The times used to calculated the speedup are total elapsed time, // not CPU time (since CPU time wouldn't show a speedup, as it is // total across CPUs). This means the test must be done on an // otherwise idle system if it is to give meaningful results. #include #include #include #include #include #include static double *a, *b, *c; static int n; static void doComputations(int threadNumber, int threads){ int i, j, k, start, stop; start = threadNumber * n / threads; stop = (threadNumber + 1) * n / threads; for(i = start; i < stop; i++){ for(k = 0; k < n; k++){ for(j = 0; j < n; j++){ c[i*n+j] += a[i*n+k] * b[k*n + j]; } } } } static void *child(void *ignored){ // run in the child thread, when there are two doComputations(0, 2); return ignored; } static double randomDouble(){ // returns a number uniformly between 0.0 and 1.0 return random() / (double) 0x7fffffff; } static void initialize(){ int i; srandom(284); // reset the pseudo-random number generator for(i = 0; i < n*n; i++){ a[i] = randomDouble(); b[i] = randomDouble(); c[i] = randomDouble(); } } int main(int argc, char* argv[]){ // Get the value of n from the command line. assert(argc == 2); n = atoi(argv[1]); assert(n > 0); // Allocate space for three n*n arrays of double-precision floating point numbers. a = malloc(n*n*sizeof(double)); assert(a != 0); b = malloc(n*n*sizeof(double)); assert(b != 0); c = malloc(n*n*sizeof(double)); assert(c != 0); initialize(); // Get time before the matrix computation as a baseline. struct timeval before; assert(gettimeofday(&before, NULL) == 0); doComputations(0, 1); // single threaded for baseline time and correctness // Get the time after the matrix computation. struct timeval after; assert(gettimeofday(&after, NULL) == 0); double singleThreadedTime = ((after.tv_usec - before.tv_usec) * 1e-6) + (after.tv_sec - before.tv_sec); double *cReference = c; // save results for comparison c = malloc(n*n*sizeof(double)); // and allocate new copy assert(c != 0); initialize(); // go back to the same starting point // Get time before the matrix computation as a baseline. assert(gettimeofday(&before, NULL) == 0); pthread_t child_thread; // the child thread will be number 0 of 2 int code = pthread_create(&child_thread, NULL, child, NULL); if(code){ fprintf(stderr, "pthread_create failed with code %d\n", code); return 1; } doComputations(1, 2); // the parent thread is number 1 of 2 code = pthread_join(child_thread, NULL); if(code){ fprintf(stderr, "pthread_join failed with code %d\n", code); return 1; } // Get the time after the matrix computation. assert(gettimeofday(&after, NULL) == 0); double twoThreadedTime = ((after.tv_usec - before.tv_usec) * 1e-6) + (after.tv_sec - before.tv_sec); printf("speedup: %g\n", singleThreadedTime / twoThreadedTime); int i, positiveErrors = 0, negativeErrors = 0; int leastPositiveErrorPosition, mostPositiveErrorPosition; int leastNegativeErrorPosition, mostNegativeErrorPosition; double leastPositiveError = INFINITY, mostPositiveError = 0; double leastNegativeError = -INFINITY, mostNegativeError = 0; for(i = 0; i < n*n; i++){ double error = c[i]-cReference[i]; if(error > 0){ positiveErrors++; if(error < leastPositiveError){ leastPositiveError = error; leastPositiveErrorPosition = i; } if(error > mostPositiveError){ mostPositiveError = error; mostPositiveErrorPosition = i; } } else if(error < 0) { negativeErrors++; if(error > leastNegativeError){ leastNegativeError = error; leastNegativeErrorPosition = i; } if(error < mostNegativeError){ mostNegativeError = error; mostNegativeErrorPosition = i; } } } printf("%d errors\n", positiveErrors + negativeErrors); if(positiveErrors){ printf("most positive error: c[%d] = %g, but cReference[%d] = %g (difference %g)\n", mostPositiveErrorPosition, c[mostPositiveErrorPosition], mostPositiveErrorPosition, cReference[mostPositiveErrorPosition], mostPositiveError); printf("least positive error: c[%d] = %g, but cReference[%d] = %g (difference %g)\n", leastPositiveErrorPosition, c[leastPositiveErrorPosition], leastPositiveErrorPosition, cReference[leastPositiveErrorPosition], leastPositiveError); } if(negativeErrors){ printf("most negative error: c[%d] = %g, but cReference[%d] = %g (difference %g)\n", mostNegativeErrorPosition, c[mostNegativeErrorPosition], mostNegativeErrorPosition, cReference[mostNegativeErrorPosition], mostNegativeError); printf("least negative error: c[%d] = %g, but cReference[%d] = %g (difference %g)\n", leastNegativeErrorPosition, c[leastNegativeErrorPosition], leastNegativeErrorPosition, cReference[leastNegativeErrorPosition], leastNegativeError); } // Exit normally. return 0; }