// This program measures the speed of the matrix computation C = C + A * B, // which is a slight generalization of matrix multiplication. (If C is // initially 0, then this would be matrix multiplication.) The three // matrices A, B, and C are all of size n*n, where n is passed to this // program on its command line. The reported result is the number of // double-precision (64-bit) floating point operations per second. // The operational speed is unlikely to be significantly influenced // by the particular numbers being multiplied and added; the matrices are // initialized to a specific pseudo-random sequence for repeatability. #include #include #include #include // generate a pseudo-random double-precision floating point number static double randomDouble(){ return random() / (double) 0x7fffffff; } int main(int argc, char* argv[]){ // Get the value of n from the command line. assert(argc == 2); int n = atoi(argv[1]); assert(n > 0); // Allocate memory for three n*n arrays of double-precision floating point numbers. double *a = malloc(n*n*sizeof(double)); assert(a != NULL); double *b = malloc(n*n*sizeof(double)); assert(b != NULL); double *c = malloc(n*n*sizeof(double)); assert(c != NULL); // Initialize the arrays with a fixed sequence of pseudo-random numbers. srandom(284); // reset the pseudo-random number generator int i, j, k; for(i = 0; i < n*n; i++){ a[i] = randomDouble(); b[i] = randomDouble(); c[i] = randomDouble(); } // Get resource consumption information before the matrix computation as a baseline. struct rusage before; assert(getrusage(RUSAGE_SELF, &before) == 0); // Now comes the matrix computation itself; this is the part you would change. for(i = 0; i < n; i++){ for(j = 0; j < n; j++){ for(k = 0; k < n; k++){ c[i*n + j] += a[i*n + k] * b[k*n + j]; // <- this line is executed n*n*n times } } } // Get the resource consumption information after the matrix computation. struct rusage after; assert(getrusage(RUSAGE_SELF, &after) == 0); // Print the number of floating point operations per second. // This is calculated based on the total user-mode CPU time elapsed and // the fact that 2*n*n*n floating point operations are performed (one // floating point multiplication and one floating point addition each // of the n*n*n times that the line marked above is executed). printf("%.2E\n", 2.0 * n * n * n / (((after.ru_utime.tv_usec - before.ru_utime.tv_usec) * 1e-6) + (after.ru_utime.tv_sec - before.ru_utime.tv_sec))); // Exit normally. return 0; }