// This program measures the speed of the matrix computation C = C + A * B, // which is a slight generalization of matrix multiplication. (If C is // initially 0, then this would be matrix multiplication.) The three // matrices A, B, and C are all of size n*n, where n is passed to this // program on its command line. The matrices are initialized with some // pseudo-random numbers. // // The computation is done twice, once with one thread and once with // two threads. The speedup (speed multiplier) from using two threads // is reported. The results are also compared. If any element of the // result matrices differs by more than 1e-6, a failure is reported. // // The times used to calculated the speedup are total elapsed time, // not CPU time (since CPU time wouldn't show a speedup, as it is // total across CPUs). This means the test must be done on an // otherwise idle system if it is to give meaningful results. #include #include #include #include #include #include static double *a, *b, *c; static int n; static void doComputations(int threadNumber, int threads){ int i, j, k, start, stop; start = threadNumber * n / threads; stop = (threadNumber + 1) * n / threads; for(i = start; i < stop; i++){ for(k = 0; k < n; k++){ for(j = 0; j < n; j++){ c[i*n+j] += a[i*n+k] * b[k*n + j]; } } } } static void *child(void *ignored){ doComputations(0, 2); } static double randomDouble(){ return random() / (double) 0x7fffffff; } static void initialize(){ int i; srandom(284); // reset the pseudo-random number generator for(i = 0; i < n*n; i++){ a[i] = randomDouble(); b[i] = randomDouble(); c[i] = randomDouble(); } } int main(int argc, char* argv[]){ // Get the value of n from the command line. assert(argc == 2); n = atoi(argv[1]); assert(n > 0); // Allocate space for three n*n arrays of double-precision floating point numbers. a = malloc(n*n*sizeof(double)); assert(a != 0); b = malloc(n*n*sizeof(double)); assert(b != 0); c = malloc(n*n*sizeof(double)); assert(c != 0); initialize(); // Get time before the matrix computation as a baseline. struct timeval before; assert(gettimeofday(&before, NULL) == 0); doComputations(0, 1); // single threaded for baseline time and correctness // Get the time after the matrix computation. struct timeval after; assert(gettimeofday(&after, NULL) == 0); double singleThreadedTime = ((after.tv_usec - before.tv_usec) * 1e-6) + (after.tv_sec - before.tv_sec); double *cReference = c; // save results for comparison c = malloc(n*n*sizeof(double)); // and allocate new copy assert(c != 0); initialize(); // go back to the same starting point // Get time before the matrix computation as a baseline. assert(gettimeofday(&before, NULL) == 0); pthread_t child_thread; // the child thread will be number 0 of 2 int code = pthread_create(&child_thread, NULL, child, NULL); if(code){ fprintf(stderr, "pthread_create failed with code %d\n", code); return 1; } doComputations(1, 2); // the parent thread is number 1 of 2 code = pthread_join(child_thread, NULL); if(code){ fprintf(stderr, "pthread_join failed with code %d\n", code); return 1; } // Get the time after the matrix computation. assert(gettimeofday(&after, NULL) == 0); double twoThreadedTime = ((after.tv_usec - before.tv_usec) * 1e-6) + (after.tv_sec - before.tv_sec); printf("%g\n", singleThreadedTime / twoThreadedTime); // print the speedup int i; for(i = 0; i < n*n; i++){ // check the results were the same if(fabs(c[i] - cReference[i]) > 1e-6){ fprintf(stderr, "c[%d] = %g, but cRreference[%d] = %g\n", i, c[i], i, cReference[i]); return 1; } } // Exit normally. return 0; }