#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include <float.h>
#include "ss.h"

/*
 * matalloc: allocates space for m-by-n matrix, returns pointer if
 *  successful, NULL if not.  The matrix elements are not initialized.
 *  The matrix elements are contiguous, since space for all of the rows
 *  is obtained with one malloc() call.  a[-1] (as well as a[0]) is set
 *  to point to the storage so that rows may be interchanged by swapping
 *  row pointers, and matfree() can still reference the storage through
 *  a[-1] even if a[0] was changed.
 */
double **matalloc( int m, int n)
{
  double **a;

  if( m < 1 || n < 1)
    return NULL;
 /*
  * first, get space for m+1 pointers to double
  */
  if( (a = malloc( (m+1) * sizeof(double *)) ) == NULL)
    return NULL;
 /*
  * get space for all of the rows, m*n doubles
  */
  if( (a[0] = a[1] = malloc( m*n*sizeof(double)) ) == NULL)
  {
    free(a);
    return NULL;
  }
 /*
  * set the pointers a[1]..a[m-1] to point to their row
  */
  ++a;
  for( int i = 1; i < m; i++)
    a[i] = a[i-1] + n;

  return a;
}

void matfree( double **a)
{
  if( a)
  {
    free(a[-1]); // free the matrix data
    free(a-1);   // free the row pointers
  }
}

//---
// print matrix
//
void matprint( const char *msg, const char *fmt, double **a, int m, int n)
{
  fprintf( out, "%s:\n", msg);
  for( int i = 0; i < m; ++i)
  {
    for( int j = 0; j < n; ++j)
    {
      putc( ' ', out);
      fprintf( out, fmt, a[i][j]);
    }
    putc( '\n', out);
  }
}

//---
// print vector
//
void vecprint( const char *msg, const char *fmt, double *a, int n)
{
  fprintf( out, "%s:\n", msg);
  for( int i = 0; i < n; ++i)
  {
    putc( ' ', out);
    fprintf( out, fmt, a[i]);
  }
  putc( '\n', out);
}

//---
// Singular Value Decomposition using orthogonalization by plane rotations.
// Algorithm #1 from the book by J. C. Nash, "Compact Numerical Methods
// for Computers: Linear Algebra and Function Minimisation," Wiley 1979.
//
// Adapted to orthogonalize the rows instead of columns for cache efficiency.

/* get sum-of-squares of vector */

static double get_z( double *u, int m)
{
    double r = 0.0;

    for( int i = 0; i < m; i++)
	r += u[i]*u[i];

    return r;
}

/* swap elements j & k of z */

static void swap( double *z, int j, int k)
{
  double t = z[j]; z[j] = z[k]; z[k] = t;
}

/* swap rows j & k of u */

static void swap_rows( double **u, int j, int k)
{
  double *t = u[j]; u[j] = u[k]; u[k] = t;
}

/* plane rotation on rows j & k of u */

static void rotate( double *uj, double *uk, double c, double s, int m)
{
  double t;

  for( int i = 0; i < m; i++)
  {
    t = uj[i];
    uj[i] = t*c + uk[i]*s;
    uk[i] = uk[i]*c - t*s;
  }
}

// SVD - given u (allocated and initialized), z (allocated),
// v (allocated, or NULL if not needed), and eps (DBL_EPSILON,
// or FLT_EPSILON if less precision needed)
//
int svd( double **u, double *z, double **v, int m, int n, double eps)
{
  int rank = 0, i, j, k, count;
  double tol, p, q, r, c, s, d;

  tol = n * eps; // tolerance for zero
  tol *= tol;

  if( v) // V <- I
  {
    for( i = 0; i < n; i++)
      for( j = 0; j < n; j++)
        v[i][j] = 0;

    for( i = 0; i < n; i++)
      v[i][i] = 1;
  }

  for( i = 0; i < n; i++)
    z[i] = get_z( u[i], m); // store squared row norms in z

  do
  {
    count = (n*(n-1)) / 2;

    for( j = 0; j < n-1; j++)
      for( k = j+1; k < n; k++) // make rows j & k of u orthogonal
      {
        q = z[j]; r = z[k];

        if( q < r) // reverse order
        {
	  swap_rows( u, j, k);
	  if( v) swap_rows( v, j, k);
	  swap( z, j, k);
        }
        else if(  q*r < tol) --count; // too small
	else
        {
          for( p = 0.0, i = 0; i < m; i++)
	    p += u[j][i] * u[k][i];

	  if(  (p*p)/(q*r) < tol) --count; // orthogonal
          else // rotate
	  {
            q -= r;
            d = sqrt(4.0*p*p+q*q);
            c = sqrt((d+q)/(2.0*d));
            s = p/(d*c);
            if( v) rotate( v[j], v[k], c, s, n);
            rotate( u[j], u[k], c, s, m);
            z[j] = get_z( u[j], m);
            z[k] = get_z( u[k], m);
          }
        }
      }
    }
    while( count != 0);

  // Note that the rows of U are left unnormalized
  // and z contains the squares of the singular values

  /* find rank */

  if( z[0] == 0.0) return 0;

  tol = 10.0 * m * eps; // relative tolerance

  tol *= tol; // tolerance for squares of the singular values

  for( j = 0; j < n; j++)
    if( z[j] >= tol*z[0]) ++rank; else break;

  return rank;
}