// This program measures the speed of the matrix computation C = C + A * B,
// which is a slight generalization of matrix multiplication. (If C is
// initially 0, then this would be matrix multiplication.) The three
// matrices A, B, and C are all of size n*n, where n is passed to this
// program on its command line. The reported result is the number of
// double-precision (64-bit) floating point operations per second.
// The operational speed is unlikely to be significantly influenced
// by the particular numbers being multiplied and added; the matrices are
// initialized to a specific pseudo-random sequence for repeatability.
#include
#include
#include
#include
// generate a pseudo-random double-precision floating point number
static double randomDouble(){
return random() / (double) 0x7fffffff;
}
int main(int argc, char* argv[]){
// Get the value of n from the command line.
assert(argc == 2);
int n = atoi(argv[1]);
assert(n > 0);
// Allocate memory for three n*n arrays of double-precision floating point numbers.
double *a = malloc(n*n*sizeof(double));
assert(a != NULL);
double *b = malloc(n*n*sizeof(double));
assert(b != NULL);
double *c = malloc(n*n*sizeof(double));
assert(c != NULL);
// Initialize the arrays with a fixed sequence of pseudo-random numbers.
srandom(284); // reset the pseudo-random number generator
int i, j, k;
for(i = 0; i < n*n; i++){
a[i] = randomDouble();
b[i] = randomDouble();
c[i] = randomDouble();
}
// Get resource consumption information before the matrix computation as a baseline.
struct rusage before;
assert(getrusage(RUSAGE_SELF, &before) == 0);
// Now comes the matrix computation itself; this is the part you would change.
for(i = 0; i < n; i++){
for(j = 0; j < n; j++){
for(k = 0; k < n; k++){
c[i*n + j] += a[i*n + k] * b[k*n + j]; // <- this line is executed n*n*n times
}
}
}
// Get the resource consumption information after the matrix computation.
struct rusage after;
assert(getrusage(RUSAGE_SELF, &after) == 0);
// Print the number of floating point operations per second.
// This is calculated based on the total user-mode CPU time elapsed and
// the fact that 2*n*n*n floating point operations are performed (one
// floating point multiplication and one floating point addition each
// of the n*n*n times that the line marked above is executed).
printf("%.2E\n",
2.0 * n * n * n /
(((after.ru_utime.tv_usec - before.ru_utime.tv_usec) * 1e-6) +
(after.ru_utime.tv_sec - before.ru_utime.tv_sec)));
// Exit normally.
return 0;
}