/*
 * Common functions used in matrix computations.
 */

#include <stdlib.h>
#include <stdio.h>
#include <math.h>
#include <string.h>

#include "matcom.h"

/*
 * Generates a random m-by-n matrix with leading dimension m.  The uniform
 * randomly generated elements are scaled by factor alpha.
 */
void create_random_matrix( double alpha, int m, int n, double *E )
{
	const int ldim = m;

	for ( int j = 0; j < n; j++ ) {	
		for ( int i = 0; i < m; i++ ) {
			E[j*ldim + i] = alpha * drand48() - (0.50 * alpha);
		}
	}
}

/*
 * Generates a random n-by-n unit lower triangular matrix with leading dimension
 * n.  Matrix elements are stored in column-major order. The uniform randomly
 * generated elements are scaled by factor alpha.  
 */
void create_random_unit_lower( double alpha, int n, double *E )
{
	const int ldim = n;

	for ( int j = 0; j < n; j++ ) {
		double *E_j = E + j*ldim;
		memset( E_j, 0, (j-1)*sizeof(double) );
		*(E_j + j) = 1.0;	
		for ( int i = j+1; i < n; i++ ) {
			*(E_j + i) = alpha * drand48() - (0.50 * alpha);
		}
	}
}

/*
 * Generates a random n-by-n lower triangular matrix with leading dimension n.
 * Matrix elements are stored in column-major order.  The uniform randomly 
 * generated elements are scaled by factor alpha.
 */
void create_random_lower( double alpha, int n, double *E )
{
	const int ldim = n;

	for ( int j = 0; j < n; j++ ) {
		double *E_j = E + j*ldim;
		memset( E_j, 0, (j-1)*sizeof(double) );	
		for ( int i = j; i < n; i++ ) {
			*(E_j + i) = alpha * drand48() - (0.50 * alpha);
		}
	}
}


/*
 * Generates a random n-by-n upper triangular matrix with leading dimension n.
 * Matrix elements are stored in column-major order.  The uniform randomly
 * generated elements are scaled by factor alpha.
 */
void create_random_upper( double alpha, int n, double *E )
{
	const int ldim = n;

	for ( int j = 0; j < n; j++ ) {
		double *E_j = E + j*ldim;
		for ( int i = 0; i <= j; i++ ) {
			*(E_j + i) = alpha * drand48() - (0.50 * alpha);
		}
		memset( E_j+j+1, 0, (n-j-1)*sizeof(double) );
	}
}

/*
 * Generates a random n-by-n nonsingular (invertible) matrix with leading 
 * dimension n.  Matrix elements are stored in column-major order.  A 
 * nonsingular matrix has a unique LU factorization, where L is a unit lower 
 * triangular matrix and U is an upper triangular matrix.  The uniform randomly
 * generated elements are scaled by factor alpha.
 */
void create_random_nonsingular( double alpha, int n, double *E )
{
	const int ldim = n;
	double *L, *U;
	
	L = (double *) malloc( n*n*sizeof(double) );
	U = (double *) malloc( n*n*sizeof(double) );

	create_random_unit_lower( alpha, n, L );
	create_random_upper( alpha, n, U );
	// Compute E = L*U
	clear_matrix( n, n, E );
	multiply_matrix( n, n, n, ldim, L, ldim, U, ldim, E );

	free( L );
	free( U );
}

/*
 * Generates a random n-by-n symmetric positive definite (SPD) matrix with 
 * leading dimension n.  Matrix elements are stored in column-major order. 
 * x'Ex = x'(M'M)x = (Mx)'(Mx) = ||Mx||^2 >= 0
 * The uniform randomly generated elements are scaled by factor alpha. 
 */
void create_random_spd( double alpha, int n, double *E )
{
	const int ldim = n;
	double *M, *T;
	
	M = (double *) malloc( n*n*sizeof(double) );
	T = (double *) malloc( n*n*sizeof(double) );	

	create_random_matrix( alpha, n, n, M );
	transpose_matrix( n, n, M, T );
	// Compute E = M'*M = T*M
	clear_matrix( n, n, E );
	multiply_matrix( n, n, n, ldim, T, ldim, M, ldim, E );

	free( M );
	free( T );
}

/*
 * Generates a random n-by-n symmetric matrix with leading dimension n.  Matrix 
 * elements are stored in column-major order. The uniform randomly generated
 * elements are scaled by factor alpha.
 */
void create_random_symmetric( double alpha, int n, double *E )
{
	const int ldim = n;

	// Generate random lower triangular matrix
	for ( int j = 0; j < n; j++ ) {	
		for ( int i = j; i < n; i++ ) {
			E[j*ldim + i] = alpha * drand48();
		}
	}
	// Transpose element below the diagonal to create symmetric matrix
	for ( int j = 0; j < n; j++ ) {
		for ( int i = j+1; i < n; i++ ) {
			*(E + j + i*ldim) = *(E + i + j*ldim);
		}
	}
}

/*
 * Sets elements of m-by-n matrix with leading dimension m to zero.
 */
void clear_matrix( int m, int n, double *E )
{
	const int ldim = m;
	
	for ( int j =  0; j < n; j++ ) {
		memset( E + j*ldim, 0, m*sizeof(double) );
	}
	
}

/*
 * Copies the elements of an m-by-n matrix E to matrix F.  For both matrices 
 * the leading dimension is m, and elements are stored in column-major order.
 */
void copy_matrix( int m, int n, const double *E, double *F )
{
	const int ldim = m;	

	for ( int j = 0; j < n; j++ ) {
		const double *E_j = E + j*ldim;
		double *F_j = F + j*ldim;
		memcpy( F_j, E_j, m*sizeof(double) );
	}
}


/*
 * Transposes the elements of an m-by-n matrix E and stores them in matrix F. 
 * The leading dimension of matrix E is m, while that of matrix F is n.  Both
 * matrices are stored in column-major order.
 */
void transpose_matrix( int m, int n, const double *E, double *F )
{
	const int ldimE = m;
	const int ldimF = n;

	for ( int j = 0; j < n; j++ ) {
		for ( int i = 0; i < m; i++ ) {
			*(F + j + i*ldimF) = *(E + i + j*ldimE);
		}
	}
}

/*
 * Copies elements of an m-by-n matrix E to mm-by-nn matrix F with leading 
 * dimensions ldimE and ldimF, respectively.  Elements of matrix E are stored in
 * column-major order.  Array F stores bdim-by-bdim matrix blocks contiguously,
 * and within each block stores elements in column-major order.  Also, elements
 * of fringe blocks that do not belong to m-by-n matrix E are set to zero in 
 * array F.  (Note that contiguous blocks of matrix F are stored in column-major
 * order.)
 */
void form_contig_blocks( int m, int n, int ldimE, const double *E, 
	int mm, int nn, int bdim, int ldimF, double *F )
{
 
	for ( int j = 0; j < nn; j += bdim ) {
		int s = (j + bdim > n) ? (n - j) : bdim;
		int q = (j + bdim > nn) ? (nn - j) : bdim;
		for ( int i = 0; i < mm; i += bdim ) {
			int r = (i + bdim > m) ? (m - i) : bdim;
			int p = (i + bdim > mm) ? (mm - i) : bdim;
			// Clear fringe blocks by setting elements to zero
			if ( s != q || r != p ) {
				double *Fij = F + j*ldimF + i*q;
				memset( Fij, 0, p*q*sizeof(double) );
			}
			for ( int k = 0; k < s; k++ ) {
				const double *Eij = E + j*ldimE + i + k*ldimE;
				double *Fij = F + j*ldimF + i*q + k*p;
				memcpy( Fij, Eij, r*sizeof(double) );
			}
		}
	}
}

/*
 * Copies elements of an m-by-n matrix E to mm-by-nn matrix F with leading 
 * dimensions ldimE and ldimF, respectively.  Elements of matrix E are stored in
 * column-major order.  First, matrix E is copied to a temporary array, where 
 * bdim-by-bdim blocks are stored contiguously. Then, these contiguous blocks 
 * are copied to array E, such that kdim-by-kdim sub-blocks of each block are 
 * stored contiguously.  That is, matrix F employs recursive contiguous block 
 * storage.  (Note that contiguous blocks of matrix F are stored in column-
 * major order, and contiguous sub-blocks within each block are stored in 
 * column-major order.)
 */
void form_recur_blocks( int m, int n, int ldimE, const double *E, 
	int mm, int nn, int kdim, int bdim, int ldimF, double *F )
{
	double *W;

	W = (double *) malloc( mm*nn*sizeof(double) );
	// Form contiguous matrix blocks
	form_contig_blocks( m, n, ldimE, E, mm, nn, bdim, ldimF, W );
	
	// Within each matrix block, form contiguous matrix sub-blocks
	for ( int j = 0; j < nn; j += bdim ) {
		int s = (j + bdim > n) ? (n - j) : bdim;
		int q = (j + bdim > nn) ? (nn - j) : bdim;
		for ( int i = 0; i < mm; i += bdim ) {
			int r = (i + bdim > m) ? (m - i) : bdim;
			int p = (i + bdim > mm) ? (mm - i) : bdim;
			double *Wij = W + j*ldimF + i*q;
			double *Fij = F + j*ldimF + i*q;
			form_contig_blocks( r, s, p, Wij, p, q, kdim, p, Fij );
		}
	}
	free( W );
}

/*
 * Copies elements of an mm-by-nn matrix E to m-by-n matrix F with leading 
 * dimensions ldimE and ldimF, respectively.  Array E stores bdim-by-bdim matrix
 * blocks contiguously, and within each block stores elements in column-major
 * order.  As matrix E is copied to array F, elements are unpacked and stored
 * in conventional column-major order.
 */
void unpack_contig_blocks( int mm, int nn, int bdim, int ldimE,
	const double *E, int m, int n, int ldimF, double *F )
{	
	for ( int j = 0; j < nn; j += bdim ) {
		int s = (j + bdim > n) ? (n - j) : bdim;
		int q = (j + bdim > nn) ? (nn - j) : bdim;
		for ( int i = 0; i < mm; i += bdim ) {
			int r = (i + bdim > m) ? (m - i) : bdim;
			int p = (i + bdim > mm) ? (mm - i) : bdim;
			for ( int k = 0; k < s; k++ ) {
				const double *Eij = E + j*ldimE + i*q + k*p;
				double *Fij = F + j*ldimF + i + k*ldimF;
				memcpy( Fij, Eij, r*sizeof(double) );
			}
		}
	}
}

/*
 * Copies elements of an mm-by-nn matrix E to m-by-n matrix F with leading 
 * dimensions ldimE and ldimF, respectively.  Array E employs recursive 
 * contiguous block storage, i.e., matrix blocks of size bdim-by-bdim are stored
 * contiguously, and within each block, sub-blocks of size kdim-by-kdim are 
 * stored contiguously.  First, matrix E is copied to a temporary array, where 
 * elements of each bdim-by-bdim block are unpacked and stored in column-major 
 * order.  Then the temporary array is copied to matrix F where the elements of
 * matrix E are unpacked and stored in conventional column-major order. 
 */
void unpack_recur_blocks( int mm, int nn, int kdim, int bdim, int ldimE, 
	const double *E, int m, int n, int ldimF, double *F )
{
	double *W;

	W = (double *) malloc( mm*nn*sizeof(double) );
	// Within each matrix block, unpack contiguous sub-blocks
	for ( int j = 0; j < nn; j += bdim ) {
		int s = (j + bdim > n) ? (n - j) : bdim;
		int q = (j + bdim > nn) ? (nn - j) : bdim;
		for ( int i = 0; i < mm; i += bdim ) {
			int r = (i + bdim > m) ? (m - i) : bdim;
			int p = (i + bdim > mm) ? (mm - i) : bdim;
			const double *Eij = E + j*ldimE + i*q;
			double *Wij = W + j*ldimE + i*q;
			unpack_contig_blocks( p, q, kdim, p, Eij, r, s, p, Wij );
		}
	}

	// Unpack contiguous matrix blocks
	unpack_contig_blocks( mm, nn, bdim, ldimE, W, m, n, ldimF, F );
	free( W );
}

/*
 * Computes the relative and absolute errors in a matrix computation using the 
 * Frobenius norm ||F - E||, where F is the result of the floating point matrix
 * computation and E is the exact solution.  Both matrices are stored in 
 * column-major order with leading dimension m.
 */
void error_matrix_comp_frob( double *eps, double *err, int m, int n, 
	const double *E, const double *F )
{
	int		ldim = m;
	double	ssq_delta = 0.0;
	double	ssq_eij = 0.0;

	for ( int j = 0; j < n; j++ ) {
		const double *E_j = E + j*ldim;
		const double *F_j = F + j*ldim;
		for ( int i = 0; i < m; i++ ) {
			double delta = *(E_j + i) - *(F_j + i);
			ssq_delta +=  delta * delta;
			ssq_eij += *(E_j + i) * *(E_j + i);
		}
	}
	*err = sqrt( ssq_delta );
	*eps = *err / sqrt( ssq_eij );
}

/*
 * Computes the relative and absolute errors in a matrix computation using the 
 * l1-norm ||F - E||, where F is the result of the floating point matrix
 * computation and E is the exact solution.  Both matrices are stored in 
 * column-major order with leading dimension m.
 */
void error_matrix_comp_l1( double *eps, double *err, int m, int n, 
	const double *E, const double *F )
{
	int		ldim = m;
	double	sum_abs_delta = 0.0;
	double	sum_abs_eij = 0.0;

	*err = 0.0;
	*eps = 0.0;

	for ( int j = 0; j < n; j++ ) {
		const double *E_j = E + j*ldim;
		const double *F_j = F + j*ldim;
		for ( int i = 0; i < m; i++ ) {
			double delta = *(E_j + i) - *(F_j + i);
			sum_abs_delta +=  fabs( delta );
			sum_abs_eij += fabs(*(E_j + i));
		}
		if ( sum_abs_delta > *err ) {
			*err = sum_abs_delta;
			*eps = *err / sum_abs_eij;
		}
	}
}

/*
 * Performs matrix multiplication and addition, C = C + A*B, using the SAXPY
 * operation -- jki indexing.  The inner-most loop adds a scalar multiple of
 * column vector x to column vector y.  A (m-by-p), B (p-by-n) and C (m-by-n) 
 * are rectangular matrices stored in column-major order with leading dimensions
 * ldimA, ldimB and ldimC, respectively.
 */
void multiply_matrix( int m, int n, int p, int ldimA, const double *A, 
	int ldimB, const double *B, int ldimC, double *C ) 	
{
	for ( int j = 0; j < n; j++ ) {
		const double *B_j = B + j*ldimB;			// Points to element B(0,j)
		double *C_j = C + j*ldimC;					// Points to element C(0,j)
		for ( int k = 0; k < p; k++ ) {
			const double *A_k = A + k*ldimA;		// Points to element A(0,k)
			double bkj = *(B_j + k);				// Element B(k,j)
			for ( int i = 0; i < m; i++ ) {
				*(C_j + i) += *(A_k + i) * bkj;
			}										// C(i,j) += A(i,k) * B(k,j)
		}
	}
}

