// This program measures the speed of the matrix computation C = C + A * B,
// which is a slight generalization of matrix multiplication.  (If C is
// initially 0, then this would be matrix multiplication.)  The three
// matrices A, B, and C are all of size n*n, where n is passed to this
// program on its command line.  The reported result is the number of
// double-precision (64-bit) floating point operations per second.
// The operational speed is unlikely to be significantly influenced
// by the particular numbers being multiplied and added; the matrices are
// initialized to a specific pseudo-random sequence for repeatability.

#include <stdlib.h>
#include <assert.h>
#include <sys/resource.h>
#include <stdio.h>

// generate a pseudo-random double-precision floating point number
static double randomDouble(){
  return random() / (double) 0x7fffffff;
}

int main(int argc, char* argv[]){
  // Get the value of n from the command line.
  assert(argc == 2);
  int n = atoi(argv[1]);
  assert(n > 0);

  // Allocate memory for three n*n arrays of double-precision floating point numbers.
  double *a = malloc(n*n*sizeof(double));
  assert(a != NULL);
  double *b = malloc(n*n*sizeof(double));
  assert(b != NULL);
  double *c = malloc(n*n*sizeof(double));
  assert(c != NULL);

  // Initialize the arrays with a fixed sequence of pseudo-random numbers.
  srandom(284); // reset the pseudo-random number generator
  int i, j, k;
  for(i = 0; i < n*n; i++){
    a[i] = randomDouble();
    b[i] = randomDouble();
    c[i] = randomDouble();
  }

  // Get resource consumption information before the matrix computation as a baseline.
  struct rusage before;
  assert(getrusage(RUSAGE_SELF, &before) == 0);

  // Now comes the matrix computation itself; this is the part you would change.
  for(i = 0; i < n; i++){
    for(j = 0; j < n; j++){
      for(k = 0; k < n; k++){
	c[i*n + j] += a[i*n + k] * b[k*n + j]; // <- this line is executed n*n*n times
      }
    }
  }

  // Get the resource consumption information after the matrix computation.
  struct rusage after;
  assert(getrusage(RUSAGE_SELF, &after) == 0);

  // Print the number of floating point operations per second.
  // This is calculated based on the total user-mode CPU time elapsed and
  // the fact that 2*n*n*n floating point operations are performed (one
  // floating point multiplication and one floating point addition each
  // of the n*n*n times that the line marked above is executed).
  printf("%.2E\n",
	 2.0 * n * n * n /
	 (((after.ru_utime.tv_usec - before.ru_utime.tv_usec) * 1e-6) +
	  (after.ru_utime.tv_sec - before.ru_utime.tv_sec)));

  // Exit normally.
  return 0;
}