/*
 * Testing harness for unblocked and blocked algorithms implementing the 
 * factorization of matrices representing linear systems.  Matrix factorizations
 * include LU (Gaussian elimination), standard Cholesky, symmetric indefinite
 * (LDL'), and modified Choleksy (Gill-Murray-Wright and Cheng-Higham
 * algorithms).  The number of tests and error count are accumulated through a 
 * single execution of the mfactest program, and all test results are written to
 * an output file destination (terminal).
 */

#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <math.h>
#include <float.h>

#include "lufact.h"
#include "cholfact.h"
#include "ldltfact.h"
#include "modchol.h"
#include "matcom.h"
#include "lapack.h"

static void test_assert( double eps, double tol, const char *test_name );
static void test_lu_outer_product( void );
static void test_lu_saxpy( void );
static void test_lu_block( void );
static void test_lu_recur_block( void );
static void test_lu_pivot_outer_product( void );
static void test_lu_pivot_saxpy( void );
static void test_lu_pivot_block( void );
static void test_lu_pivot_lapack( void );
static void test_chol_outer_product( void );
static void test_chol_saxpy( void );
static void test_chol_block( void );
static void test_chol_rect_block( void );
static void test_chol_contig_block( void );
static void test_chol_recur_block( void );
static void test_chol_block_blas( void );
static void test_chol_contig_block_blas( void );
static void test_chol_lapack( void );
static void test_ldlt_outer_product (void );
static void test_ldlt_saxpy( void );
static void test_ldlt_block( void );
static void test_ldlt_block_blas( void );
static void test_ldlt_lapack( void );
static void test_chol_gmw_outer_product( void );
static void test_chol_gmw_saxpy( void );
static void test_chol_gmw_block( void );
static void test_chol_gmw_block_blas( void );
static void test_chol_ch_outer_product( void );
static void test_chol_ch_saxpy( void );
static void test_chol_ch_block( void );
static void test_chol_ch_block_blas( void );
static void print_matrix( int m, int n, int ldim, const double *E );

static int		tests = 0,			// Test count
				errs = 0;			// Error count
static FILE		*fp;

int main()
{	
	fp = stdout;

	// Test LU factorization
#if defined(LUFACT)
	test_lu_outer_product();
	test_lu_saxpy();
	test_lu_block();
	test_lu_recur_block();
#endif

	// Test LU factorization with partial pivoting
#if defined(LUPIVOT)
	test_lu_pivot_outer_product();
	test_lu_pivot_saxpy();
	test_lu_pivot_block();
	test_lu_pivot_lapack();
#endif

	// Test Cholesky factorization
#if defined(CHOLFACT)
	test_chol_outer_product();
	test_chol_saxpy();
	test_chol_block();
	test_chol_rect_block();
	test_chol_contig_block();
	test_chol_recur_block();
	test_chol_block_blas();
	test_chol_contig_block_blas();
	test_chol_lapack();
#endif

	// Test LDL' factorization
#if defined(LDLTFACT)
	test_ldlt_outer_product();
	test_ldlt_saxpy();
	test_ldlt_block();
	test_ldlt_block_blas();
	test_ldlt_lapack();
#endif

	// Test modified Cholesky factorization
#if defined(MODCHOL)
	test_chol_gmw_outer_product();
	test_chol_gmw_saxpy();
	test_chol_gmw_block();
	test_chol_gmw_block_blas();
	test_chol_ch_outer_product();
	test_chol_ch_saxpy();
	test_chol_ch_block();
	test_chol_ch_block_blas();
#endif

	if ( errs == 0 ) {
		fprintf( fp, "Passed all %d tests.\n", tests );
	} else {
		fprintf( fp, "Total of %d error(s) encountered in %d tests.\n", 
			errs, tests );
	}
	return 0;
}

/*
 * Verifies that test results are accurate within specified tolerance, and
 * prints message indicating whether the routine passed or failed the test.
 */
void test_assert( double eps, double tol, char *test_name )
{
	tests++;
	if ( eps <= tol ) {
		fprintf( fp, "PASSED: %s\n(eps=%e <= tol=%e)\n", test_name, eps, tol );
	} else {
		fprintf( fp, "FAILED: %s\n(eps=%e > tol=%e)\n", test_name, eps, tol );
		errs++;
	}
}

/******************************************************************************/

/*
 * Checks whether the outer product method (kji indexing) for LU factorization
 * is performed correctly on nonsingular n-by-n matrices.  Matrix A represents
 * an n-by-n linear system and matrix LU stores the correct unit lower and upper
 * triangular factors.  Randomly generated nonsingular matrices are diagonally
 * dominant, so pivoting in not required.
 */
void test_lu_outer_product( void )
{
	const double	tol = 1e-12;		// Error tolerance
	
	char	test_name[80];
	int		n;
	double	eps, err;
	double	A[] =	{ 2, 0.5, 0.25, 0.25, 4.0625, 2.03125, 0.5, 0.25, 3.125 },		
			LUA[] =	{ 2, 0.25, 0.125, 0.25, 4, 0.5, 0.5, 0.125, 3 },
			B[] =	{ 6, 18, 12, 24, 1, 7, 26, 12, 
					  3, 11, 23, 41, 2, 6, 5, 16 },			
			LUB[] =	{ 6, 3, 2, 4, 1, 4, 6, 2, 
					  3, 2, 5, 5, 2, 0, 1, 3 };
	double	C[] =	{  3,  6,  3, 15,  9,  0,  3,  9, 15, 15,
					   2,  5,  5, 12, 10,  1,  4, 11, 14, 15,
					   1,  7, 18, 25, 29, 13, 19, 32, 31, 32, 
					   2,  9, 18, 26, 29, 13, 19, 35, 38, 36, 
					   4, 11, 15, 39, 33, 29, 33, 46, 65, 49,
					   2,  4,  7, 40, 23, 46, 51, 36, 64, 25, 
					   4, 10, 11, 31, 23, 15, 28, 50, 69, 37,
					   1,  4, 11, 29, 23, 19, 29, 38, 52, 29,  
					   3,  7, 10, 38, 28, 28, 37, 46, 72, 51,
					   5, 14, 19, 45, 39, 26, 44, 68, 99, 82 },
			LUC[] =	{  3, 2, 1, 5, 3, 0, 1, 3, 5, 5,
					   2, 1, 3, 2, 4, 1, 2, 5, 4, 5, 
					   1, 5, 2, 5, 3, 4, 4, 2, 3, 1,
					   2, 5, 1, 1, 0, 4, 3, 2, 5, 0,   
					   4, 3, 2, 3, 3, 2, 2, 3, 4, 4,  
					   2, 0, 5, 5, 2, 2, 5, 2, 3, 1, 
					   4, 2, 1, 2, 0, 1, 5, 4, 5, 1, 
					   1, 2, 4, 0, 0, 1, 3, 3, 3, 2,   
					   3, 1, 4, 1, 3, 1, 2, 3, 2, 3,
					   5, 4, 2, 2, 2, 2, 3, 3, 4, 4 };

	n = 3;
	sprintf( test_name, "LU factorization, outer product, %dx%d matrix", n, n );
	// Perform LU factorization and compare result with correct answer
	lu_outer_product( n, A );
	error_matrix_comp_frob( &eps, &err, n, n, LUA, A );
	test_assert( eps, tol, test_name );

	n = 4;
	sprintf( test_name, "LU factorization, outer product, %dx%d matrix", n, n );
	// Perform LU factorization and compare result with correct answer
	lu_outer_product( n, B );
	error_matrix_comp_frob( &eps, &err, n, n, LUB, B );
	test_assert( eps, tol, test_name );

	n = 10;
	sprintf( test_name, "LU factorization, outer product, %dx%d matrix", n, n );
	// Perform LU factorization and compare result with correct answer
	lu_outer_product( n, C );
	error_matrix_comp_frob( &eps, &err, n, n, LUC, C );
	test_assert( eps, tol, test_name );
}

/*
 * Checks whether an implementation of the SAXPY operation (jki indexing) for
 * LU factorization is performed correctly on an n-by-n nonsingular matrix.  
 * The result is verified against that produced by the outer product method.  
 * Randomly generated nonsingular matrices are diagonally dominant, so pivoting
 * in not required.
 */
void test_lu_saxpy( void )
{
	const int		n = 12;			// n-by-n matrix A
	const double	tol = 1e-12,	// Error tolerance
					alpha = 1.0;	// Scaling factor for random matrix

	char	test_name[80];
	double	eps, err;
	double	*A, *LU;

	sprintf( test_name, "LU factorization, SAXPY, %dx%d matrix", n, n );
	A = (double *) malloc( n*n*sizeof(double) );
	LU = (double *) malloc( n*n*sizeof(double) );
	create_random_nonsingular( alpha, n, A );
	copy_matrix( n, n, A, LU );
	
	// Perform LU factorization and compare result with outer product solution
	lu_saxpy( n, A );
	lu_outer_product( n, LU );
	error_matrix_comp_frob( &eps, &err, n, n, LU, A );
	test_assert( eps, tol, test_name );
	free( A );
	free( LU );
}

/*
 * Checks whether simple blocking for LU factorization is performed correctly 
 * on n-by-n nonsingular matrices.  The results are verified against those 
 * produced by the outer product method. Randomly generated nonsingular matrices
 * are diagonally dominant, so pivoting in not required.
 */
void test_lu_block( void )
{
	const int		mat_size[] = { 12, 64, 82 };
#define SIZES (sizeof(mat_size) / sizeof(int))
	const double	tol = 1e-12,	// Error tolerance
					alpha = 1.0;	// Scaling factor for random matrix

	char 	*test_name[SIZES];
	double	eps, err;
	double	*A, *LU;
	
	// Define test names
	test_name[0] = "LU factorization, simple blocking --\n"
		"matrix dimension less than block dimension";
	test_name[1] = "LU factorization, simple blocking --\n"
		"matrix dimension a multiple of block dimension";
	test_name[2] = "LU factorization, simple blocking --\n"
		"matrix dimension not a multiple of block dimension";

	for ( int i = 0; i < SIZES; i++ ) {
		int n = mat_size[i];
		A = (double *) malloc( n*n*sizeof(double) );
		LU = (double *) malloc( n*n*sizeof(double) );
		create_random_nonsingular( alpha, n, A );
		copy_matrix( n, n, A, LU );
		// Perform LU factorization, compare result with outer product solution
		lu_block( n, A );
		lu_outer_product( n, LU );
		error_matrix_comp_frob( &eps, &err, n, n, LU, A );
		test_assert( eps, tol, test_name[i] );
		free( A );
		free( LU );
	}
#undef SIZES
}

/*
 * Checks whether recursive contiguous blocking for LU factorization is 
 * performed correctly on n-by-n nonsingular matrices.  The results are verified
 * against those produced by the outer product method.  Randomly generated 
 * nonsingular matrices are diagonally dominant, so pivoting is not required.
 */
void test_lu_recur_block( void )
{
	const int		mat_size[] = { 22, 96, 111 };
#define SIZES (sizeof(mat_size) / sizeof(int))
	const double	tol = 1e-12,	// Error tolerance
					alpha = 1.0;	// Scaling factor for random matrix
	
	char 	*test_name[SIZES];
	double	eps, err;
	double	*A, *LU;
	
	// Define test names
	test_name[0] = "LU factorization, recursive contiguous blocking --\n"
		"matrix dimension less than block dimension";
	test_name[1] = "LU factorization, recursive contiguous blocking --\n"
			"matrix dimension a multiple of block dimension";
	test_name[2] = "LU factorization, recursive contiguous blocking --\n"
			"matrix dimension not a multiple of block dimension";

	for ( int i = 0; i < SIZES; i++ ) {
		int n = mat_size[i];
		A = (double *) malloc( n*n*sizeof(double) );
		LU = (double *) malloc( n*n*sizeof(double) );
		create_random_nonsingular( alpha, n, A );
		copy_matrix( n, n, A, LU );
		// Perform LU factorization, compare result with outer product solution
		lu_recur_block( n, A );
		lu_outer_product( n, LU );
		error_matrix_comp_frob( &eps, &err, n, n, LU, A );
		test_assert( eps, tol, test_name[i] );
		free( A );
		free( LU );
	}
#undef SIZES
}

/******************************************************************************/

/*
 * Checks whether the outer product method (kji indexing) for LU factorization
 * with partial pivoting is performed correctly on n-by-n nonsingular matrices.  
 * Matrix A represents an n-by-n linear system and matrix LU stores the correct
 * unit lower and upper triangular factors.
 */
void test_lu_pivot_outer_product( void )
{
	const double	tol = 1e-12;		// Error tolerance
	
	char	test_name[80];
	int		n;
	int		*piv, *ord;
	double	eps, err;
	double	A[] =	{ 3, 2, 6, 17, 4, 18, 10, -2, -12 },		
			LUA[] =	{ 6, 0.5, 1/3.0, 18, 8, -0.25, -12, 16, 6 },
			B[] =	{ 6, 18, 12, 24, 1, 7, 26, 12, 
					  3, 11, 23, 41, 2, 6, 5, 16 },			
			LUB[] =	{ 24, 0.50, 0.75, 0.25, 12, 20, -0.10, -0.10, 
					  41, 2.50, -19.50, 14/39.0, 16, -3, -6.30, -1/26.0 };

	n = 3;
	sprintf( test_name, 
"LU factorization with partial pivoting, outer product, %dx%d matrix", n, n );
	piv = (int *) malloc( n*sizeof(int) );	
	ord = (int *) malloc( n*sizeof(int) );
	// Perform LU factorization and compare result with correct answer
	lu_pivot_outer_product( 'G', n, piv, ord, A );
	error_matrix_comp_frob( &eps, &err, n, n, LUA, A );
	test_assert( eps, tol, test_name );
	free( piv );
	free( ord );

	n = 4;
	sprintf( test_name, 
"LU factorization with partial pivoting, outer product, %dx%d matrix", n, n );
	piv = (int *) malloc( n*sizeof(int) );
	ord = (int *) malloc( n*sizeof(int) );
	// Perform LU factorization and compare result with correct answer
	lu_pivot_outer_product( 'G', n, piv, ord, B );
	error_matrix_comp_frob( &eps, &err, n, n, LUB, B );
	test_assert( eps, tol, test_name );
	free( piv );
	free( ord );
}

/*
 * Checks whether an implementation of the SAXPY operation (jki indexing)
 * for LU factorization with partial pivoting is performed correctly on n-by-n 
 * nonsingular matrices.  The result is verified against that produced by the 
 * outer product method.
 */
void test_lu_pivot_saxpy( void )
{
	const int		n = 14;			// n-by-n matrix A
	const double	tol = 1e-12,	// Error tolerance
					alpha = 10.0;	// Scaling factor for random matrix
	
	char	test_name[80];
	int		*piv, *ord;
	double	eps, err;
	double	*A, *LU;
	
	sprintf( test_name, 
		"LU factorization with partial pivoting, SAXPY, %dx%d matrix", n, n );
	A = (double *) malloc( n*n*sizeof(double) );
	LU = (double *) malloc( n*n*sizeof(double) );
	piv = (int *) malloc( n*sizeof(int) );	
	ord = (int *) malloc( n*sizeof(int) );	
	create_random_nonsingular( alpha, n, A );
	copy_matrix( n, n, A, LU );
	
	// Perform LU factorization and compare result with outer product solution
	lu_pivot_saxpy( 'G', n, piv, ord, A );
	lu_pivot_outer_product( 'G', n, piv, ord, LU );
	error_matrix_comp_frob( &eps, &err, n, n, LU, A );
	test_assert( eps, tol, test_name );
	free( A );
	free( LU );
	free( piv );
	free( ord );
}

/*
 * Checks whether simple blocking for LU factorization with partial pivoting
 * is performed correctly on n-by-n nonsingular matrices.  The results are 
 * verified against those produced by the outer product method.
 */
void test_lu_pivot_block( void )
{
	const int		mat_size[] = { 14, 48, 82 };
#define SIZES (sizeof(mat_size) / sizeof(int))
	const double	tol = 1e-12,	// Error tolerance
					alpha = 10.0;	// Scaling factor for random matrix
	
	int		*piv, *ord;	
	char 	*test_name[SIZES];
	double	eps, err;
	double	*A, *LU;
	
	// Define test names
	test_name[0] = "LU factorization with partial pivoting, simple blocking --\n"
		"matrix dimension less than block dimension";
	test_name[1] = "LU factorization with partial pivoting, simple blocking --\n"
		"matrix dimension a multiple of block dimension";
	test_name[2] = "LU factorization with partial pivoting, simple blocking --\n"
		"matrix dimension not a multiple of block dimension";

	for ( int i = 0; i < SIZES; i++ ) {
		const int n = mat_size[i];
		A = (double *) malloc( n*n*sizeof(double) );
		LU = (double *) malloc( n*n*sizeof(double) );
		piv = (int *) malloc( n*sizeof(int) );	
		ord = (int *) malloc( n*sizeof(int) );	
		create_random_nonsingular( alpha, n, A );
		copy_matrix( n, n, A, LU );
		
		// Perform LU factorization, compare result with outer product solution
		lu_pivot_block( 'G', n, piv, ord, A );
		lu_pivot_outer_product( 'G', n, piv, ord, LU );
		error_matrix_comp_frob( &eps, &err, n, n, LU, A );
		test_assert( eps, tol, test_name[i] );
		free( A );
		free( LU );
		free( piv );
		free( ord );
	}
#undef SIZES
}

/*
 * Checks whether the wrapper function properly invokes LAPACK routine DGETRF,
 * which computes an LU factorization of a nonsingular matrix using partial
 * pivoting with row interchanges.
 */
void test_lu_pivot_lapack( void )
{
	const int		n = 42;			// n-by-n matrix A
	const double	tol = 1e-12,	// Error tolerance
					alpha = 10.0;	// Scaling factor for random matrix
	
	char	test_name[80];
	int		*piv, *ord;
	double	eps, err, normA, normLU;
	double	*A, *LU;
	
	sprintf( test_name, 
		"LU factorization, LAPACK routine DGETRF, %dx%d matrix", n, n );
	A = (double *) malloc( n*n*sizeof(double) );
	LU = (double *) malloc( n*n*sizeof(double) );
	piv = (int *) malloc( n*sizeof(int) );	
	ord = (int *) malloc( n*sizeof(int) );	
	create_random_nonsingular( alpha, n, A );
	copy_matrix( n, n, A, LU );
	
	// Perform LU factorization and compare result with outer product solution
	lu_pivot_lapack( 'G', n, piv, ord, A );
	lu_pivot_outer_product( 'G', n, piv, ord, LU );
	// The factorization produced by LAPACK routine DGETRF takes the form 
	// A = P*L*U, whereas the factorization produced by lu_pivot_outer_product()
	// takes the form P*A = L*U.  Therefore, verify that DGETRF is invoked
	// correctly by comparing the norms of factors (matrices) computed by DGETRF
	// and lu_pivot_outer_product(). 
	normA = 0.0;
	normLU = 0.0;
	for ( int j = 0; j < n; j++ ) {
		for ( int i = 0; i < n; i++ ) {
			double aij = fabs( *(A + i + j*n) );
			double lij = fabs( *(LU + i + j*n) );
			normA += aij;
			normLU += lij;
		}
	}
	err = abs( normA - normLU );
	eps = err / normA;
	test_assert( eps, tol, test_name );
	free( A );
	free( LU );
	free( piv );
	free( ord );
}

/******************************************************************************/

/*
 * Checks whether the outer product method (kji indexing) for Cholesky
 * factorization is performed correctly on n-by-n symmetric positive definite
 * matrices.  Cholesky factorization, A = L*L', computes a unique lower 
 * triangular factor.  Matrix L stores the correct lower triangular factor.  
 */
void test_chol_outer_product( void )
{
	const double	tol = 1e-12;		// Error tolerance
	
	char	test_name[80];
	int		n;
	double	eps, err;
	double	A[] =	{ 4, -2, -6, -2, 10, 9, -6, 9, 14 },		
			LA[] =	{ 2, -1, -3, -2, 3, 2, -6, 9, 1 },
			B[] =	{ 16, 8, 12, 8, 8, 29, 11, 24,
					  12, 11, 46, 22, 8, 24, 22, 33 },			
			LB[] =	{ 4, 2, 3, 2, 8, 5, 1, 4, 
					  12, 11, 6, 2, 8, 24, 22, 3 };
	double	C[] =	{ 25, 10, 15,  0, 20, 10, 25,  5, 
					  10,  8, 14,  8, 14,  4, 18, 12,
					  15, 14, 29, 24, 30,  8, 33, 29,  
					   0,  8, 24, 33, 28,  6, 23, 33,
					  20, 14, 30, 28, 66, 23, 47, 44, 
					  10,  4,  8,  6, 23, 19, 32, 19,
					  25, 18, 33, 23, 47, 32, 92, 54,  
					   5, 12, 29, 33, 44, 19, 54, 62 },
			LC[] = 	{  5,  2,  3,  0,  4,  2,  5,  1, 
					  10,  2,  4,  4,  3,  0,  4,  5,
					  15, 14,  2,  4,  3,  1,  1,  3, 
					   0,  8, 24,  1,  4,  2 , 3,  1,
					  20, 14, 30, 28,  4,  1,  0,  3, 
					  10,  4,  8,  6, 23,  3,  5,  3,
					  25, 18, 33, 23, 47, 32,  4,  2, 
					   5, 12, 29, 33, 44, 19, 54,  2 };  
	
	n = 3;
	sprintf( test_name, 
		"Cholesky factorization, outer product, %dx%d matrix", n, n );
	// Perform Cholesky factorization and compare result with correct answer
	chol_outer_product( n, A );
	error_matrix_comp_frob( &eps, &err, n, n, LA, A );
	test_assert( eps, tol, test_name );

	n = 4;
	sprintf( test_name, 
		"Cholesky factorization, outer product, %dx%d matrix", n, n );
	// Perform Cholesky factorization and compare result with correct answer
	chol_outer_product( n, B );
	error_matrix_comp_frob( &eps, &err, n, n, LB, B );
	test_assert( eps, tol, test_name );

	n = 8;
	sprintf( test_name, 
		"Cholesky factorization, outer product, %dx%d matrix", n, n );
	// Perform Cholesky factorization and compare result with correct answer
	chol_outer_product( n, C );
	error_matrix_comp_frob( &eps, &err, n, n, LC, C );
	test_assert( eps, tol, test_name );
}

/*
 * Checks whether an implementation of the SAXPY operation (jki indexing) for
 * Cholesky factorization is performed correctly on an n-by-n symmetric positive 
 * definite matrix A.  The result is verified against that produced by the 
 * outer product method.
 */
void test_chol_saxpy( void )
{
	const double	tol = 1e-12,	// Error tolerance
					alpha = 1.0;	// Scaling factor for random matrix
	const int		n = 21;			// n-by-n matrix A
	char	test_name[80];
	double	eps, err;
	double	*A, *L;
	
	sprintf( test_name, "Cholesky factorization, SAXPY, %dx%d matrix", n, n );
	A = (double *) malloc( n*n*sizeof(double) );
	L = (double *) malloc( n*n*sizeof(double) );
	create_random_spd( alpha, n, A );
	copy_matrix( n, n, A, L );
	
	// Perform Cholesky factorization, compare result with outer product solution
	chol_saxpy( n, A );
	chol_outer_product( n, L );
	error_matrix_comp_frob( &eps, &err, n, n, L, A );
	test_assert( eps, tol, test_name );
	free( A );
	free( L );
}

/*
 * Checks whether simple blocking for Cholesky factorization is performed 
 * correctly on n-by-n symmetric positive definite matrices.  The results are 
 * verified against those produced by the outer product method.
 */
void test_chol_block( void )
{
	const int		mat_size[] = { 11, 48, 77 };
#define SIZES (sizeof(mat_size) / sizeof(int))
	const double	tol = 1e-12,	// Error tolerance
					alpha = 1.0;	// Scaling factor for random matrix
	
	char 	*test_name[SIZES];
	double	eps, err;
	double	*A, *L;
	
	// Define test names
	test_name[0] = "Cholesky factorization, simple blocking --\n"
		"matrix dimension less than block dimension";
	test_name[1] = "Cholesky factorization, simple blocking --\n"
		"matrix dimension a multiple of block dimension";
	test_name[2] = "Cholesky factorization, simple blocking --\n"
		"matrix dimension not a multiple of block dimension";

	for ( int i = 0; i < SIZES; i++ ) {
		int n = mat_size[i];
		A = (double *) malloc( n*n*sizeof(double) );
		L = (double *) malloc( n*n*sizeof(double) );
		create_random_spd( alpha, n, A );
		copy_matrix( n, n, A, L );
		// Perform Cholesky factorization, compare result with outer product solution
		chol_block( n, A );
		chol_outer_product( n, L );
		error_matrix_comp_frob( &eps, &err, n, n, L, A );
		test_assert( eps, tol, test_name[i] );
		free( A );
		free( L );
	}
#undef SIZES
}

/*
 * Checks whether an implementation of simple blocking using a rectangular 
 * version of the SAXPY operation for Cholesky factorization is performed 
 * correctly on n-by-n symmetric positive definite matrices.  The results are 
 * verified against those produced by the outer product method.
 */
void test_chol_rect_block( void )
{
	const int		mat_size[] = { 12, 48, 82 };
#define SIZES (sizeof(mat_size) / sizeof(int))
	const double	tol = 1e-12,	// Error tolerance
					alpha = 1.0;	// Scaling factor for random matrix

	char 	*test_name[SIZES];
	double	eps, err;
	double	*A, *L;
	
	// Define test names
	test_name[0] = "Cholesky factorization, simple blocking, rectangular --\n"
		"matrix dimension less than block dimension";
	test_name[1] = "Cholesky factorization, simple blocking, rectangular --\n"
		"matrix dimension a multiple of block dimension";
	test_name[2] = "Cholesky factorization, simple blocking, rectangular --\n"
		"matrix dimension not a multiple of block dimension";

	for ( int i = 0; i < SIZES; i++ ) {
		int n = mat_size[i];
		A = (double *) malloc( n*n*sizeof(double) );
		L = (double *) malloc( n*n*sizeof(double) );
		create_random_spd( alpha, n, A );
		copy_matrix( n, n, A, L );
		// Perform Cholesky factorization, compare result with outer product solution
		chol_rect_block( n, A );
		chol_outer_product( n, L );
		error_matrix_comp_frob( &eps, &err, n, n, L, A );
		test_assert( eps, tol, test_name[i] );
		free( A );
		free( L );
	}
#undef SIZES
}

/*
 * Checks whether contiguous blocking for Cholesky factorization is performed 
 * correctly on n-by-n symmetric positive definite matrices.  The results are 
 * verified against those produced by the outer product method. 
 */
void test_chol_contig_block( void )
{
	const int		mat_size[] = { 12, 96, 123 };
#define SIZES (sizeof(mat_size) / sizeof(int))
	const double	tol = 1e-12,	// Error tolerance
					alpha = 1.0;	// Scaling factor for random matrix

	char 	*test_name[SIZES];
	double	eps, err;
	double	*A, *L;
	
	// Define test names
	test_name[0] = "Cholesky factorization, contiguous blocking --\n"
		"matrix dimension less than block dimension";
	test_name[1] = "Cholesky factorization, contiguous blocking --\n"
		"matrix dimension a multiple of block dimension";
	test_name[2] = "Cholesky factorization, contiguous blocking --\n"
		"matrix dimension not a multiple of block dimension";

	for ( int i = 0; i < SIZES; i++ ) {
		int n = mat_size[i];
		A = (double *) malloc( n*n*sizeof(double) );
		L = (double *) malloc( n*n*sizeof(double) );
		create_random_spd( alpha, n, A );
		copy_matrix( n, n, A, L );
		// Perform Cholesky factorization, compare result with outer product solution
		chol_contig_block( n, A );
		chol_outer_product( n, L );
		error_matrix_comp_frob( &eps, &err, n, n, L, A );
		test_assert( eps, tol, test_name[i] );
		free( A );
		free( L );
	}
#undef SIZES
}

/*
 * Checks whether recursive contiguous blocking for Cholesky factorization is 
 * performed correctly on n-by-n symmetric positive definite matrices.  The 
 * results are verified against those produced by the outer product method. 
 */
void test_chol_recur_block( void )
{
	const int		mat_size[] = { 18, 96, 107 }; 
#define SIZES (sizeof(mat_size) / sizeof(int))
	const double	tol = 1e-12,	// Error tolerance
					alpha = 1.0;	// Scaling factor for random matrix
	
	char 	*test_name[SIZES];
	double	eps, err;
	double	*A, *L;
	
	// Define test names
	test_name[0] = "Cholesky factorization, recursive contiguous blocking --\n"
		"matrix dimension less than block dimension";
	test_name[1] = "Cholesky factorization, recursive contiguous blocking --\n"
		"matrix dimension a multiple of block dimension";
	test_name[2] = "Cholesky factorization, recursive contiguous blocking --\n"
		"matrix dimension not a multiple of block dimension";

	for ( int i = 0; i < SIZES; i++ ) {
		int n = mat_size[i];
		A = (double *) malloc( n*n*sizeof(double) );
		L = (double *) malloc( n*n*sizeof(double) );
		create_random_spd( alpha, n, A );
		copy_matrix( n, n, A, L );
		// Perform Cholesky factorization, compare result with outer product solution
		chol_recur_block( n, A );
		chol_outer_product( n, L );
		error_matrix_comp_frob( &eps, &err, n, n, L, A );
		test_assert( eps, tol, test_name[i] );
		free( A );
		free( L );
	}
#undef SIZES
}

/*
 * Checks whether an implementation of simple blocking using the BLAS library 
 * for Cholesky factorization is performed correctly on n-by-n symmetric 
 * positive definite matrices.  The results are verified against those produced
 * by the outer product method.
 */
void test_chol_block_blas( void )
{
	const int		mat_size[] = { 12, 64, 82 };
#define SIZES (sizeof(mat_size) / sizeof(int))
	const double	tol = 1e-12,	// Error tolerance
					alpha = 1.0;	// Scaling factor for random matrix

	char 	*test_name[SIZES];
	double	eps, err;
	double	*A, *L;
	
	// Define test names
	test_name[0] = "Cholesky, simple blocking using the BLAS library --\n"
		"matrix dimension less than block dimension";
	test_name[1] = "Cholesky, simple blocking using the BLAS library --\n"
		"matrix dimension a multiple of block dimension";
	test_name[2] = "Cholesky, simple blocking using the BLAS library --\n"
		"matrix dimension not a multiple of block dimension";

	for ( int i = 0; i < SIZES; i++ ) {
		int n = mat_size[i];
		A = (double *) malloc( n*n*sizeof(double) );
		L = (double *) malloc( n*n*sizeof(double) );
		create_random_spd( alpha, n, A );
		copy_matrix( n, n, A, L );
		// Perform Cholesky factorization, compare result with outer product solution
		chol_block_blas( n, A );
		chol_outer_product( n, L );
		error_matrix_comp_frob( &eps, &err, n, n, L, A );
		test_assert( eps, tol, test_name[i] );
		free( A );
		free( L );
	}
#undef SIZES
}

/*
 * Checks whether an implementation of contiguous blocking using the BLAS and
 * LAPACK libraries for Cholesky factorization is performed correctly on n-by-n 
 * symmetric positive definite matrices.  The results are verified against those
 * produced by the outer product algorithm.
 */
void test_chol_contig_block_blas( void )
{
	const int		mat_size[] = { 11, 64, 87 };
#define SIZES (sizeof(mat_size) / sizeof(int))
	const double	tol = 1e-12,	// Error tolerance
					alpha = 1.0;	// Scaling factor for random matrix

	char 	*test_name[SIZES];
	double	eps, err;
	double	*A, *L;
	
	// Define test names
	test_name[0] = 
		"Cholesky, contiguous blocking using BLAS and LAPACK libraries --\n"
		"matrix dimension less than block dimension";
	test_name[1] = 
		"Cholesky, contiguous blocking using BLAS and LAPACK libraries --\n"
		"matrix dimension a multiple of block dimension";

	test_name[2] = 
		"Cholesky, contiguous blocking using BLAS and LAPACK libraries --\n"
		"matrix dimension not a multiple of block dimension";

	for ( int i = 0; i < SIZES; i++ ) {
		int n = mat_size[i];
		A = (double *) malloc( n*n*sizeof(double) );
		L = (double *) malloc( n*n*sizeof(double) );
		create_random_spd( alpha, n, A );
		copy_matrix( n, n, A, L );
		// Perform Cholesky factorization, compare result with outer product solution
		chol_contig_block_blas( n, A );
		chol_outer_product( n, L );
		error_matrix_comp_frob( &eps, &err, n, n, L, A );
		test_assert( eps, tol, test_name[i] );
		free( A );
		free( L );
	}
#undef SIZES
}

/*
 * Checks whether the wrapper function properly invokes LAPACK routine DPOTRF, 
 * which computes the Cholesky factorization of a real symmetric positive 
 * definite matrix.
 */
void test_chol_lapack( void )
{
	const double	tol = 1e-12,	// Error tolerance
					alpha = 1.0;	// Scaling factor for random matrix
	const int		n = 52;			// n-by-n matrix A

	char	test_name[80];
	double	eps, err;
	double	*A, *L;
	
	sprintf( test_name, 
		"Cholesky factorization, LAPACK routine DPOTRF, %dx%d matrix", n, n );
	A = (double *) malloc( n*n*sizeof(double) );
	L = (double *) malloc( n*n*sizeof(double) );
	create_random_spd(alpha, n, A);
	copy_matrix( n, n, A, L );
	// Perform Cholesky factorization, compare result with outer product solution
	chol_lapack( n, A );
	chol_outer_product( n, L );
	error_matrix_comp_frob( &eps, &err, n, n, L, A );
	test_assert( eps, tol, test_name );
	free( A );
	free( L );
}

/******************************************************************************/

/*
 * Checks whether the outer product method (kji indexing) for symmetric 
 * indefinite factorization (LDL') is performed correctly on n-by-n matrices.
 * The factorization PAP = LDL', where A is an n-by-n symmetric matrix, L is 
 * unit lower triangular, D is block diagonal with block order 1 or 2 and P is 
 * the permutation matrix, is tested with Bunch-Kaufman (partial), bounded 
 * Bunch-Kaufman (rook) and Bunch-Parlett (complete) pivoting.  Matrix LD stores
 * the correct unit lower triangular and diagonal factors.
 */
void test_ldlt_outer_product( void )
{
	const double	tol = 1e-12;		// Error tolerance

	char	test_name[80];
	int		n;
	int		*piv, *ord;
	double	eps, err;
	double	AA[16], BB[16], CC[16];
	double	A[] =	{ 1, 5, 7, 8, 5, 4, 12, 3, 
					  7, 12, 10, 9, 8, 3, 9, 6 },			
			LKA[] =	{ 6, 0.5, 1.5, 4.0/3.0, 5, 2.5, 7.5, -34.0/65.0,
					  7, 12, -3.5, 4.0/13.0, 8, 3, 9, -1483.0/195.0 },
			LBA[] =	{ 6, 0.5, 1.5, 4.0/3.0, 5, 2.5, 7.5, -34.0/65.0,
					  7, 12, -3.5, 4.0/13.0, 8, 3, 9, -1483.0/195.0 },
			B[] =	{ -4, 8, 2, -4, 8, 6, -12, 3, 
					  2, -12, 4, 2, -4, 3, 2, 3 },			
			LBB[] =	{ 6, -12, -7.0/15.0, -0.3, 8, 4, -0.9, -0.4, 
					  2, -12, 23.0/15.0, -12.0/23.0, -4, 3, 2, 197.0/46.0 },
			LPB[] =	{ 6, -12, -0.3, -7.0/15.0, 8, 4, -0.4, -0.9, 
					  2, -12, 4.7, -8.0/47.0, -4, 3, 2, 197.0/141.0 },
			C[] =	{ 4, 6, 1, -4, 6, 8, -12, 8, 
					  1, -12, 6, 10, -4, 8, 10, 4 },
			LKC[] =	{ 4, 1.5, -1, 0.25, 6, -1, 14, 11.0/14.0, 
					  1, -12, 0, -89.0/98.0, -4, 8, 10, 1291.0/49.0 },
			LBC[] =	{ 4, 1.5, -1, 0.25, 6, -1, 14, 11.0/14.0, 
					  1, -12, 0, -89.0/98.0, -4, 8, 10, 1291.0/49.0 },
			LPC[] =	{ 8, -1.5, 1, 0.75, 6, -12, 22, -45.0/109.0, 
					  1, -12, -4, 25.0/109.0, -4, 8, 10, 1291.0/218.0 };

	n = 4;
	sprintf( test_name, 
"LDL' factorization, Bunch-Kaufman pivoting, outer product, %dx%d matrix", n, n );
	piv = (int *) malloc( n*sizeof(int) );	
	ord = (int *) malloc( n*sizeof(int) );
	// Perform LDL' factorization with Bunch-Kaufman pivoting,
	// and compare result with correct answer
	copy_matrix( n, n, A, AA );
	ldlt_outer_product( 'K', n, piv, ord, AA );
	error_matrix_comp_frob( &eps, &err, n, n, LKA, AA );
	test_assert( eps, tol, test_name );
	free( piv );
	free( ord );

	sprintf( test_name, 
"LDL' factorization, bounded Bunch-Kaufman pivoting, outer product, %dx%d matrix",
		n, n );
	piv = (int *) malloc( n*sizeof(int) );	
	ord = (int *) malloc( n*sizeof(int) );
	// Perform LDL' factorization with bounded Bunch-Kaufman pivoting,
	// and compare result with correct answer
	copy_matrix( n, n, A, AA );
	ldlt_outer_product( 'B', n, piv, ord, AA );
	error_matrix_comp_frob( &eps, &err, n, n, LBA, AA );
	test_assert( eps, tol, test_name );
	free( piv );
	free( ord );

	sprintf( test_name, 
"LDL' factorization, bounded Bunch-Kaufman pivoting, outer product, %dx%d matrix",
		n, n );
	piv = (int *) malloc( n*sizeof(int) );	
	ord = (int *) malloc( n*sizeof(int) );
	// Perform LDL' factorization with bounded Bunch-Kaufman pivoting,
	// and compare result with correct answer
	copy_matrix( n, n, B, BB );
	ldlt_outer_product( 'B', n, piv, ord, BB );
	error_matrix_comp_frob( &eps, &err, n, n, LBB, BB );
	test_assert( eps, tol, test_name );
	free( piv );
	free( ord );

	n = 4;
	sprintf( test_name, 
"LDL' factorization, Bunch-Parlett pivoting, outer product, %dx%d matrix", n, n );
	piv = (int *) malloc( n*sizeof(int) );	
	ord = (int *) malloc( n*sizeof(int) );
	// Perform LDL' factorization with bounded Bunch-Kaufman pivoting,
	// and compare result with correct answer
	copy_matrix( n, n, B, BB );
	ldlt_outer_product( 'P', n, piv, ord, BB );
	error_matrix_comp_frob( &eps, &err, n, n, LPB, BB );
	test_assert( eps, tol, test_name );
	free( piv );
	free( ord );

	sprintf( test_name, 
"LDL' factorization, Bunch-Kaufman pivoting, outer product, %dx%d matrix", n, n );
	piv = (int *) malloc( n*sizeof(int) );	
	ord = (int *) malloc( n*sizeof(int) );
	// Perform LDL' factorization with bounded Bunch-Kaufman pivoting,
	// and compare result with correct answer
	copy_matrix( n, n, C, CC );
	ldlt_outer_product( 'K', n, piv, ord, CC );
	error_matrix_comp_frob( &eps, &err, n, n, LKC, CC );
	test_assert( eps, tol, test_name );
	free( piv );
	free( ord );

	sprintf( test_name, 
"LDL' factorization, bounded Bunch-Kaufman pivoting, outer product, %dx%d matrix",
		n, n );
	piv = (int *) malloc( n*sizeof(int) );	
	ord = (int *) malloc( n*sizeof(int) );
	// Perform LDL' factorization with bounded Bunch-Kaufman pivoting,
	// and compare result with correct answer
	copy_matrix( n, n, C, CC );
	ldlt_outer_product( 'B', n, piv, ord, CC );
	error_matrix_comp_frob( &eps, &err, n, n, LBC, CC );
	test_assert( eps, tol, test_name );
	free( piv );
	free( ord );

	n = 4;
	sprintf( test_name, 
"LDL' factorization, Bunch-Parlett pivoting, outer product, %dx%d matrix", n, n );
	piv = (int *) malloc( n*sizeof(int) );	
	ord = (int *) malloc( n*sizeof(int) );
	// Perform LDL' factorization with bounded Bunch-Kaufman pivoting,
	// and compare result with correct answer
	copy_matrix( n, n, C, CC );
	ldlt_outer_product( 'P', n, piv, ord, CC );
	error_matrix_comp_frob( &eps, &err, n, n, LPC, CC );
	test_assert( eps, tol, test_name );
	free( piv );
	free( ord );
}

/*
 * Checks whether an implementation of the SAXPY operation (jki indexing) for
 * symmetric indefinite factorization (LDL') is performed correctly on an n-by-n 
 * matrices.  Symmetric indefinite factorization is tested with Bunch-Kaufman
 * (partial) and bounded Bunch-Kaufman (rook) pivoting.  The results are 
 * verified against those produced by the outer product method. 
 */
void test_ldlt_saxpy( void )
{
	const int		n = 14;			// n-by-n matrix A
	const double	tol = 1e-12,	// Error tolerance
					alpha = 10.0;	// Scaling factor for random matrix

	char	test_name[80];
	int		*piv, *ord;
	double	eps, err;
	double	*A, *LD;
	
	// Bunch-Kaufman pivoting
	sprintf( test_name, 
"LDL' factorization, Bunch-Kaufman pivoting, SAXPY, %dx%d matrix", n, n );
	A = (double *) malloc( n*n*sizeof(double) );
	LD = (double *) malloc( n*n*sizeof(double) );
	piv = (int *) malloc( n*sizeof(int) );	
	ord = (int *) malloc( n*sizeof(int) );
	create_random_symmetric( alpha, n, A );	
	copy_matrix( n, n, A, LD );
	// Perform LDL' factorization with Bunch-Kaufman pivoting,
	// and compare result with outer product solution
	ldlt_saxpy( 'K', n, piv, ord, A );
	ldlt_outer_product( 'K', n, piv, ord, LD );
	error_matrix_comp_frob( &eps, &err, n, n, LD, A );
	test_assert( eps, tol, test_name );
	free( A );
	free( LD );
	free( piv );
	free( ord );

	// Bounded Bunch-Kaufman pivoting
	sprintf( test_name, 
"LDL' factorization, bounded Bunch-Kaufman pivoting, SAXPY, %dx%d matrix", n, n );
	A = (double *) malloc( n*n*sizeof(double) );
	LD = (double *) malloc( n*n*sizeof(double) );
	piv = (int *) malloc( n*sizeof(int) );	
	ord = (int *) malloc( n*sizeof(int) );
	create_random_symmetric( alpha, n, A );
	copy_matrix( n, n, A, LD );
	// Perform LDL' factorization with bounded Bunch-Kaufman pivoting,
	// and compare result with outer product solution
	ldlt_saxpy( 'B', n, piv, ord, A );
	ldlt_outer_product( 'B', n, piv, ord, LD );
	error_matrix_comp_frob( &eps, &err, n, n, LD, A );
	test_assert( eps, tol, test_name );
	free( A );
	free( LD );
	free( piv );
	free( ord );
}

/*
 * Checks whether simple blocking for symmetric indefinite factorization is 
 * performed correctly on n-by-n matrices.  Symmetric indefinite factorization
 * is tested with Bunch-Kaufman (partial), bounded Bunch-Kaufman (rook) and 
 * Bunch-Parlett (complete) pivoting.  The results are verified against those 
 * produced by the outer product method.
 */
void test_ldlt_block( void )
{
	const int		mat_size[] = { 14, 64, 114 };
#define SIZES (sizeof(mat_size) / sizeof(int))
	const double	tol = 1e-12,	// Error tolerance
					alpha = 10.0;	// Scaling factor for random matrix

	char 	*test_name[SIZES];
	int		*piv, *ord;	
	double	eps, err;
	double	*A, *LD;
	
	// Bunch-Kaufman pivoting
	test_name[0] = "LDL' factorization, Bunch-Kaufman pivoting, simple blocking --\n"
		"matrix dimension less than block dimension";
	test_name[1] = "LDL' factorization, Bunch-Kaufman pivoting, simple blocking --\n"
		"matrix dimension a multiple of block dimension";
	test_name[2] = "LDL' factorization, Bunch-Kaufman pivoting, simple blocking --\n"
		"matrix dimension not a multiple of block dimension";
	for ( int i = 0; i < SIZES; i++ ) {
		const int n = mat_size[i];
		A = (double *) malloc( n*n*sizeof(double) );
		LD = (double *) malloc( n*n*sizeof(double) );
		piv = (int *) malloc( n*sizeof(int) );	
		ord = (int *) malloc( n*sizeof(int) );	
		create_random_symmetric( alpha, n, A );
		copy_matrix( n, n, A, LD );
		// Perform LDL' factorization and compare result with outer product solution
		ldlt_block( 'K', n, piv, ord, A );
		ldlt_outer_product( 'K', n, piv, ord, LD );
		error_matrix_comp_frob( &eps, &err, n, n, LD, A );
		test_assert( eps, tol, test_name[i] );
		free( A );
		free( LD );
		free( piv );
		free( ord );
	}

	// Bounded Bunch-Kaufman pivoting
	test_name[0] = 
		"LDL' factorization, bounded Bunch-Kaufman pivoting, simple blocking --\n"
		"matrix dimension less than block dimension";
	test_name[1] = 
		"LDL' factorization, bounded Bunch-Kaufman pivoting, simple blocking --\n"
		"matrix dimension a multiple of block dimension";
	test_name[2] = 
		"LDL' factorization, bounded Bunch-Kaufman pivoting, simple blocking --\n"
		"matrix dimension not a multiple of block dimension";
	for ( int i = 0; i < SIZES; i++ ) {
		const int n = mat_size[i];
		A = (double *) malloc( n*n*sizeof(double) );
		LD = (double *) malloc( n*n*sizeof(double) );
		piv = (int *) malloc( n*sizeof(int) );	
		ord = (int *) malloc( n*sizeof(int) );	
		create_random_symmetric( alpha, n, A );
		copy_matrix( n, n, A, LD );
		// Perform LDL' factorization, compare result with outer product solution
		ldlt_block( 'B', n, piv, ord, A );
		ldlt_outer_product( 'B', n, piv, ord, LD );
		error_matrix_comp_frob( &eps, &err, n, n, LD, A );
		test_assert( eps, tol, test_name[i] );
		free( A );
		free( LD );
		free( piv );
		free( ord );
	}

	// Bunch-Parlett pivoting
	test_name[0] = "LDL' factorization, Bunch-Parlett pivoting, simple blocking --\n"
		"matrix dimension less than block dimension";
	test_name[1] = "LDL' factorization, Bunch-Parlett pivoting, simple blocking --\n"
		"matrix dimension a multiple of block dimension";
	test_name[2] = "LDL' factorization, Bunch-Parlett pivoting, simple blocking --\n"
		"matrix dimension not a multiple of block dimension";
	for ( int i = 0; i < SIZES; i++ ) {
		const int n = mat_size[i];
		A = (double *) malloc( n*n*sizeof(double) );
		LD = (double *) malloc( n*n*sizeof(double) );
		piv = (int *) malloc( n*sizeof(int) );	
		ord = (int *) malloc( n*sizeof(int) );	
		create_random_symmetric( alpha, n, A );
		copy_matrix( n, n, A, LD );
		// Perform LDL' factorization, compare result with outer product solution
		ldlt_block( 'P', n, piv, ord, A );
		ldlt_outer_product( 'P', n, piv, ord, LD );
		error_matrix_comp_frob( &eps, &err, n, n, LD, A );
		test_assert( eps, tol, test_name[i] );
		free( A );
		free( LD );
		free( piv );
		free( ord );
	}
#undef SIZES
}

/*
 * Checks whether an implementation of simple blocking using the BLAS library 
 * for symmetric indefinite factorization is performed correctly on n-by-n 
 * matrices.  Symmetric indefinite factorization is tested with Bunch-Kaufman
 * (partial) and bounded Bunch-Kaufman (rook) pivoting.  The results are 
 * verified against those produced by the outer product method.
 */
void test_ldlt_block_blas( void )
{
	const int		mat_size[] = { 27, 96, 133 };
#define SIZES (sizeof(mat_size) / sizeof(int))
	const double	tol = 1e-12,	// Error tolerance
					alpha = 10.0;	// Scaling factor for random matrix

	char 	*test_name[SIZES];
	int		*piv, *ord;	
	double	eps, err;
	double	*A, *LD;
	
	// Bunch-Kaufman pivoting
	test_name[0] = "LDL' factorization, Bunch-Kaufman pivoting, BLAS routines --\n"
		"matrix dimension less than block dimension";
	test_name[1] = "LDL' factorization, Bunch-Kaufman pivoting, BLAS routines --\n"
		"matrix dimension a multiple of block dimension";
	test_name[2] = "LDL' factorization, Bunch-Kaufman pivoting, BLAS routines --\n"
		"matrix dimension not a multiple of block dimension";
	for ( int i = 0; i < SIZES; i++ ) {
		const int n = mat_size[i];
		A = (double *) malloc( n*n*sizeof(double) );
		LD = (double *) malloc( n*n*sizeof(double) );
		piv = (int *) malloc( n*sizeof(int) );	
		ord = (int *) malloc( n*sizeof(int) );	
		create_random_symmetric( alpha, n, A );
		copy_matrix( n, n, A, LD );
		// Perform LDL' factorization, compare result with outer product solution
		ldlt_block_blas( 'K', n, piv, ord, A );
		ldlt_outer_product( 'K', n, piv, ord, LD );
		error_matrix_comp_frob( &eps, &err, n, n, LD, A );
		test_assert( eps, tol, test_name[i] );
		free( A );
		free( LD );
		free( piv );
		free( ord );
	}

	// Bounded Bunch-Kaufman pivoting
	test_name[0] = 
		"LDL' factorization, bounded Bunch-Kaufman pivoting, BLAS routines --\n"
		"matrix dimension less than block dimension";
	test_name[1] = 
		"LDL' factorization, bounded Bunch-Kaufman pivoting, BLAS routines --\n"
		"matrix dimension a multiple of block dimension";
	test_name[2] = 
		"LDL' factorization, bounded Bunch-Kaufman pivoting, BLAS routines --\n"
		"matrix dimension not a multiple of block dimension";
	for ( int i = 0; i < SIZES; i++ ) {
		const int n = mat_size[i];
		A = (double *) malloc( n*n*sizeof(double) );
		LD = (double *) malloc( n*n*sizeof(double) );
		piv = (int *) malloc( n*sizeof(int) );	
		ord = (int *) malloc( n*sizeof(int) );	
		create_random_symmetric( alpha, n, A );
		copy_matrix( n, n, A, LD );
		// Perform LDL' factorization, compare result with outer product solution
		ldlt_block_blas( 'B', n, piv, ord, A );
		ldlt_outer_product( 'B', n, piv, ord, LD );
		error_matrix_comp_frob( &eps, &err, n, n, LD, A );
		test_assert( eps, tol, test_name[i] );
		free( A );
		free( LD );
		free( piv );
		free( ord );
	}
#undef SIZES
}

/*
 * Checks whether the wrapper function properly invokes LAPACK routine DSYTRF, 
 * which computes the factorization of a real symmetric indefinite matrix.
 */
void test_ldlt_lapack( void )
{
	const double	tol = 1e-12,	// Error tolerance
					alpha = 10.0;	// Scaling factor for random matrix
	const int		n = 18;			// n-by-n matrix A

	double	eps, err, normA, normLD;
	char	test_name[80];
	int		*piv, *ord;
	double	*A, *LD;
	
	sprintf( test_name, 
"LDL' factorization, Bunch-Kaufman pivoting, LAPACK routine DSYTRF, %dx%d matrix", 
		n, n );
	A = (double *) malloc( n*n*sizeof(double) );
	LD = (double *) malloc( n*n*sizeof(double) );
	piv = (int *) malloc( n*sizeof(int) );	
	ord = (int *) malloc( n*sizeof(int) );	
	create_random_symmetric( alpha, n, A );
	copy_matrix( n, n, A, LD );
	// Perform LDL' factorization with LAPACK routine DSYTRF,
	// and compare result with outer product solution
	ldlt_lapack( 'K', n, piv, ord, A );
	ldlt_outer_product( 'K', n, piv, ord, LD );

	// The factorization produced by LAPACK routine DSYTRF takes the form 
	// A = (P*L)*D*(P*L)', whereas the factorization produced by
	// ldlt_outer_product() takes the form P*A*P' = L*D*L'.  Therefore, verify 
	// that DSYTRF is called correctly by comparing the norms of factors 
	// (matrices) computed by DSYTRF and ldlt_outer_product(). 
	normA = 0.0;
	normLD = 0.0;
	for ( int j = 0; j < n; j++ ) {
		for ( int i = 0; i < n; i++ ) {
			double aij = fabs( *(A + i + j*n) );
			double lij = fabs( *(LD + i + j*n) );
			normA += aij;
			normLD += lij;
		}
	}
	err = abs( normA - normLD );
	eps = err / normA;
	test_assert( eps, tol, test_name );
	free( A );
	free( LD );
	free( piv );
	free( ord );
}

/******************************************************************************/

/*
 * Checks whether the outer product method (kji indexing) for the modified
 * Cholesky algorithm proposed by Gill, Murray & Wright is performed correctly
 * on an n-by-n symmetric matrix.  Matrix A represents an n-by-n symmetric 
 * linear system and matrix LD stores the correct unit lower triangular and 
 * modified diagonal factors.
 */
void test_chol_gmw_outer_product( void )
{
	const double	tol = 1e-12;		// Error tolerance

	char	test_name[80];
	int		n;
	int		*piv, *ord;
	double	eps, err;
	double	A[] =	{ -4, 3, 8, 2, 8, 5, 6, 1, 
					  2, 6, -9, 12, 3, 1, 5, 8 },			
			LDA[] =	{ 16, 0.5, 0.75, 0.375, 8, 8, -0.5, 0,
					  2, 6, 3, -7.0/6.0, 3, 1, 5, 4.0/3.0 };
	
	n = 4;
	sprintf( test_name, 
"Modified Cholesky, Gill-Murray-Wright, outer product, %dx%d matrix", n, n );
	piv = (int *) malloc( n*sizeof(int) );	
	ord = (int *) malloc( n*sizeof(int) );
	// Perform modified Cholesky factorization, compare result with correct answer
	chol_gmw_outer_product( 'D', n, piv, ord, A );
	error_matrix_comp_frob( &eps, &err, n, n, LDA, A );
	test_assert( eps, tol, test_name );
	free( piv );
	free( ord );
}

/*
 * Checks whether an implementation of the SAXPY operation (jki indexing) for
 * the modified Cholesky algorithm proposed by Gill, Murray & Wright is 
 * performed correctly on an n-by-n symmetric matrix.  The result is verified
 * against that produced by the outer product method.
 */
void test_chol_gmw_saxpy( void )
{
	const int		n = 22;			// n-by-n matrix A
	const double	tol = 1e-12,	// Error tolerance
					alpha = 10.0;	// Scaling factor for random matrix
	
	char	test_name[80];
	int		*piv, *ord;
	double	eps, err;
	double	*A, *LD;
	
	sprintf( test_name, 
		"Modified Cholesky, Gill-Murray-Wright, SAXPY, %dx%d matrix", n, n );
	A = (double *) malloc( n*n*sizeof(double) );
	LD = (double *) malloc( n*n*sizeof(double) );
	piv = (int *) malloc( n*sizeof(int) );	
	ord = (int *) malloc( n*sizeof(int) );
	create_random_symmetric( alpha, n, A );
	copy_matrix( n, n, A, LD );
	// Perform modified Cholesky factorization,
	// and compare result with outer product solution
	chol_gmw_saxpy( 'D', n, piv, ord, A );
	chol_gmw_outer_product( 'D', n, piv, ord, LD );
	error_matrix_comp_frob( &eps, &err, n, n, LD, A );
	test_assert( eps, tol, test_name );
	free( A );
	free( LD );
	free( piv );
	free( ord );
}

/*
 * Checks whether simple blocking for the modified Cholesky algorithm proposed 
 * by Gill, Murray & Wright is performed correctly on n-by-n symmetric matrices.  
 * The results are verified against those produced by the outer product method.
 */
void test_chol_gmw_block( void )
{
	const int		mat_size[] = { 25, 96, 107 };
#define SIZES (sizeof(mat_size) / sizeof(int))
	const double	tol = 1e-12,	// Error tolerance
					alpha = 10.0;	// Scaling factor for random matrix

	char 	*test_name[SIZES];
	int		*piv, *ord;	
	double	eps, err;
	double	*A, *LD;
	
	test_name[0] = "Modified Cholesky, Gill-Murray-Wright, simple blocking --\n"
		"matrix dimension less than block dimension";
	test_name[1] = "Modified Cholesky, Gill-Murray-Wright, simple blocking --\n"
		"matrix dimension a multiple of block dimension";
	test_name[2] = "Modified Cholesky, Gill-Murray-Wright, simple blocking --\n"
		"matrix dimension not a multiple of block dimension";
	for ( int i = 0; i < SIZES; i++ ) {
		const int n = mat_size[i];
		A = (double *) malloc( n*n*sizeof(double) );
		LD = (double *) malloc( n*n*sizeof(double) );
		piv = (int *) malloc( n*sizeof(int) );	
		ord = (int *) malloc( n*sizeof(int) );	
		create_random_symmetric( alpha, n, A );
		copy_matrix( n, n, A, LD );
		// Perform modified Cholesky factorization,
		// and compare result with outer product solution
		chol_gmw_block( 'D', n, piv, ord, A );
		chol_gmw_outer_product( 'D', n, piv, ord, LD );
		error_matrix_comp_frob( &eps, &err, n, n, LD, A );
		test_assert( eps, tol, test_name[i] );
		free( A );
		free( LD );
		free( piv );
		free( ord );
	}
#undef SIZES
}

/*
 * Checks whether an implementation of simple blocking using the BLAS library 
 * for the modified Cholesky algorithm proposed by Gill, Murray & Wright is 
 * performed correctly on n-by-n symmetric matrices.  The results are verified 
 * against those produced by the outer product method.
 */
void test_chol_gmw_block_blas( void )
{
	const int		mat_size[] = { 25, 96, 107 };
#define SIZES (sizeof(mat_size) / sizeof(int))
	const double	tol = 1e-12,	// Error tolerance
					alpha = 10.0;	// Scaling factor for random matrix

	char 	*test_name[SIZES];
	int		*piv, *ord;	
	double	eps, err;
	double	*A, *LD;
	
	test_name[0] = "Modified Cholesky, Gill-Murray-Wright, BLAS routines --\n"
		"matrix dimension less than block dimension";
	test_name[1] = "Modified Cholesky, Gill-Murray-Wright, BLAS routines --\n"
		"matrix dimension a multiple of block dimension";
	test_name[2] = "Modified Cholesky, Gill-Murray-Wright, BLAS routines --\n"
		"matrix dimension not a multiple of block dimension";
	for ( int i = 0; i < SIZES; i++ ) {
		const int n = mat_size[i];
		A = (double *) malloc( n*n*sizeof(double) );
		LD = (double *) malloc( n*n*sizeof(double) );
		piv = (int *) malloc( n*sizeof(int) );	
		ord = (int *) malloc( n*sizeof(int) );
		create_random_symmetric( alpha, n, A );
		copy_matrix( n, n, A, LD );
		// Perform modified Cholesky factorization,
		// and compare result with outer product solution
		chol_gmw_block_blas( 'D', n, piv, ord, A );
		chol_gmw_outer_product( 'D', n, piv, ord, LD );
		error_matrix_comp_frob( &eps, &err, n, n, LD, A );
		test_assert( eps, tol, test_name[i] );
		free( A );
		free( LD );
		free( piv );
		free( ord );
	}
#undef SIZES
}

/*
 * Checks whether the outer product method (kji indexing) for the modified 
 * Cholesky algorithm proposed by Cheng & Higham is performed correctly on an
 * n-by-n symmetric matrix.  Matrix A represents an n-by-n symmetric linear 
 * system and matrix LD stores the correct unit lower triangular and modified 
 * diagonal factors.
 */
void test_chol_ch_outer_product( void )
{
	const double	tol = 1e-06;		// Error tolerance
					
	char	test_name[80];
	int		n;
	int		*piv, *ord;
	double	eps, err;
	double	a, b, c, delta;
	double	A[] =	{ 1, 5, 7, 8, 5, 4, 12, 3, 
					  7, 12, 10, 9, 8, 3, 9, 6 },
			LDA[] =	{ 6, 0.5, 1.5, 4.0/3.0, 5, 2.5, 7.5, -34.0/65.0,
					  7, 12, -3.5, 4.0/13.0, 8, 3, 9, -1483.0/195.0 };
	
	n = 4;
	sprintf( test_name, 
		"Modified Cholesky, Cheng-Higham, outer product, %dx%d matrix", n, n );
	piv = (int *) malloc( n*sizeof(int) );	
	ord = (int *) malloc( n*sizeof(int) );
	// Matrix LDA has been initialized with results from symmetric indefinite
	// factorization, P*A*P = L*D*L'.  Update diagonal block D so that (A + dA)
	// is positive definite i.e., P*(A+dA)*P = L*D*L'.
	delta = 38.0 * sqrt(0.5*DBL_EPSILON);
	c = sqrt(29.0);
	a = 58.0 + 4.0 * c;
	b = 58.0 - 4.0 * c;
	LDA[5] = 12.5 * (3.0*c - 1.0) / b + 25.0 * delta / a;
	LDA[6] = 2.5 * (89.0 - 7.0*c) / b + 5.0 * delta * (2.0 + c) / a;
	LDA[10] = 0.5 * (103.0*c - 381.0) / b + delta * (33.0 + 4.0*c) / a;
	LDA[15] = delta;
	// Perform modified Cholesky factorization, compare result with correct answer
	chol_ch_outer_product( 'K', n, piv, ord, A );
	error_matrix_comp_frob( &eps, &err, n, n, LDA, A );
	test_assert( eps, tol, test_name );
	free(piv);
	free(ord);
}

/*
 * Checks whether an implementation of the SAXPY operation (jki indexing) for
 * the modified Cholesky algorithm proposed by Cheng & Higham is performed 
 * correctly on an n-by-n symmetric matrix.  The result is verified against that
 * produced by the outer product method.
 */
void test_chol_ch_saxpy( void )
{
	const int		n = 20;			// n-by-n matrix A
	const double	tol = 1e-12,	// Error tolerance
					alpha = 10.0;	// Scaling factor for random matrix

	int		*piv, *ord;
	char	test_name[80];
	double	eps, err;
	double	*A, *LD;
	
	sprintf(test_name, 
		"Modified Cholesky, Cheng-Higham, SAXPY, %dx%d matrix", n, n);
	A = (double *) malloc( n*n*sizeof(double) );
	LD = (double *) malloc( n*n*sizeof(double) );
	piv = (int *) malloc( n*sizeof(int) );	
	ord = (int *) malloc( n*sizeof(int) );
	create_random_symmetric( alpha, n, A );
	copy_matrix( n, n, A, LD );
	// Perform modified Cholesky factorization,
	// and compare result with outer product solution
	chol_ch_saxpy( 'K', n, piv, ord, A );
	chol_ch_outer_product( 'K', n, piv, ord, LD );
	error_matrix_comp_frob( &eps, &err, n, n, LD, A );
	test_assert( eps, tol, test_name );
	free( A );
	free( LD );
	free( piv );
	free( ord );
}

/*
 * Checks whether simple blocking for the modified Cholesky algorithm proposed
 * by Cheng & Higham is performed correctly on n-by-n symmetric matrices.  The
 * results are verified against those produced by the outer product method.
 */
void test_chol_ch_block( void )
{
	const int		mat_size[] = { 14, 64, 87 };
#define SIZES (sizeof(mat_size) / sizeof(int))
	const double	tol = 1e-12,	// Error tolerance
					alpha = 10.0;	// Scaling factor for random matrix

	char 	*test_name[SIZES];
	int		*piv, *ord;	
	double	eps, err;
	double	*A, *LD;
	
	// Bunch Kaufman pivoting
	test_name[0] = "Modified Cholesky, Cheng-Higham, simple blocking --\n"
		"matrix dimension less than block dimension";
	test_name[1] = "Modified Cholesky, Cheng-Higham, simple blocking --\n"
		"matrix dimension a multiple of block dimension";
	test_name[2] = "Modified Cholesky, Cheng-Higham, simple blocking --\n"
		"matrix dimension not a multiple of block dimension";
	for ( int i = 0; i < SIZES; i++ ) {
		const int n = mat_size[i];
		A = (double *) malloc( n*n*sizeof(double) );
		LD = (double *) malloc( n*n*sizeof(double) );
		piv = (int *) malloc( n*sizeof(int) );	
		ord = (int *) malloc( n*sizeof(int) );	
		create_random_symmetric( alpha, n, A );
		copy_matrix( n, n, A, LD );
		// Perform modified Cholesky factorization,
		// and compare result with outer product solution
		chol_ch_block( 'K', n, piv, ord, A );
		chol_ch_outer_product( 'K', n, piv, ord, LD );
		error_matrix_comp_frob( &eps, &err, n, n, LD, A );
		test_assert( eps, tol, test_name[i] );
		free( A );
		free( LD );
		free( piv );
		free( ord );
	}
#undef SIZES
}

/*
 * Checks whether an implementation of simple blocking using the BLAS library 
 * for the modified Cholesky algorithm proposed by Cheng & Higham is performed 
 * correctly on n-by-n symmetric matrices.  The results are verified against 
 * those produced by the outer product method.
 */
void test_chol_ch_block_blas( void )
{
	const int		mat_size[] = { 14, 64, 87 };
#define SIZES (sizeof(mat_size) / sizeof(int))
	const double	tol = 1e-12,	// Error tolerance
					alpha = 10.0;	// Scaling factor for random matrix

	char 	*test_name[SIZES];
	int		*piv, *ord;	
	double	eps, err;
	double	*A, *LD;
	
	// Bunch Kaufman pivoting
	test_name[0] = "Modified Cholesky, Cheng-Higham, BLAS routines --\n"
		"matrix dimension less than block dimension";
	test_name[1] = "Modified Cholesky, Cheng-Higham, BLAS routines --\n"
		"matrix dimension a multiple of block dimension";
	test_name[2] = "Modified Cholesky, Cheng-Higham, BLAS routines --\n"
		"matrix dimension not a multiple of block dimension";
	for ( int i = 0; i < SIZES; i++ ) {
		const int n = mat_size[i];
		A = (double *) malloc( n*n*sizeof(double) );
		LD = (double *) malloc( n*n*sizeof(double) );
		piv = (int *) malloc( n*sizeof(int) );	
		ord = (int *) malloc( n*sizeof(int) );	
		create_random_symmetric( alpha, n, A );
		copy_matrix( n, n, A, LD );
		// Perform modified Cholesky factorization,
		// and compare result with outer product solution
		chol_ch_block_blas( 'K', n, piv, ord, A );
		chol_ch_outer_product( 'K', n, piv, ord, LD );
		error_matrix_comp_frob( &eps, &err, n, n, LD, A );
		test_assert( eps, tol, test_name[i] );
		free( A );
		free( LD );
		free( piv );
		free( ord );
	}
#undef SIZES
}

