EVSL  1.1.0
EigenValues Slicing Library
 All Data Structures Files Functions Variables Typedefs Enumerations Enumerator Macros
chebsi.c
Go to the documentation of this file.
1 #include <stdlib.h>
2 #include <stdio.h>
3 #include <float.h>
4 #include <math.h>
5 #include "def.h"
6 #include "blaslapack.h"
7 #include "struct.h"
8 #include "internal_proto.h"
9 
10 #define NBUF 2
11 
42 int ChebSI(int nev, double *intv, int maxit,
43  double tol, double *vinit, polparams *pol, int *nevo,
44  double **lamo, double **Yo, double **reso, FILE *fstats) {
45  /*-------------------- for stats */
46  double tm, tall=0.0, tmv=0.0;
47  int icol, nmv = 0;
48  double tolP = tol*0.01;
49  tall = evsl_timer();
50  // int max_deg = pol->max_deg, min_deg = pol->min_deg;
51  /*------------------- size of A */
52  int n = evsldata.n;
53  /*-------------------- some constants frequently used */
54  char cT = 'T';
55  char cN = 'N';
56  int one = 1;
57  int nev2 = nev*nev;
58  int nnev = n*nev;
59  double done=1.0,dmone=-1.0,dzero=0.0;
60  if (check_intv(intv, fstats) < 0) {
61  *nevo = 0;
62  return 0;
63  }
64  double aa = intv[0];
65  double bb = intv[1];
66  /*-------------------- frequency of convergence checks (check every cvcheck iterations) */
67  int cvcheck = 5;
68  /*------------------- misc */
69  int i,j;
70  // this code(step 1)is the same as in cheblan; to be moved
71  // to a separate file for reuse
72  /*-------------------- unpack some values from pol */
73  int deg =pol->deg;
74  double gamB=pol->gam, bar=pol->bar;
75  fprintf(fstats, "Cheb Poly- deg = %d, gam = %.15e, bar: %.15e\n",
76  deg, gamB, bar);
77  /*-------------------- gamB must be within [-1, 1] */
78  if (gamB > 1.0 || gamB < -1.0) {
79  fprintf(stdout, "gamB error %.15e\n", gamB);
80  return -1;
81  }
82  /*-------------------- filtered subspace iteration */
83  fprintf(fstats, "Step 2: ChebSI, block size = %d\n", nev);
84  /*-------------------- it = number of the current iteration */
85  int it = 0;
86  /*-------------------- memory for projected matrix T=V'p(A)V and its eigenpairs (evecT,evalT) */
87  double *T, *evecT, *evalT;
88  Malloc(T, nev2, double);
89  Malloc(evecT, nev2, double);
90  Malloc(evalT, nev, double);
92  // double *Y, *Lam;
93  // Malloc(Y, nev2, double);
94  // Malloc(Lam, nev2, double);
95 
96  /*-------------------- memory for block of approx. eigenvectors/evals and their products with p(A)*/
97  double *V, *Lam, *PV, *R, *res;
98  double *V_out, *Lam_out, *res_out;
99  Malloc(V, nnev, double);
100  Malloc(PV, nnev, double);
101  Malloc(Lam, nev, double);
102  Malloc(R, nnev, double); // block of residuals
103  Malloc(res, nev, double); // residual norms w.r.t. A
104 
105  /*-------------------- number of locked, and unconverged (active) pairs */
106  int nlock, nact, nlock_ab;
107  int *act_idx, *lock_idx; // arrays used to store idices of active and locked columns of V
108  Malloc(act_idx, nev, int);
109  Malloc(lock_idx, nev, int);
110  nlock = 0;
111  nact = nev;
112  nlock_ab = 0; // number of locked eigenvalues that are in [a,b]
113 
114  /*-------------------- alloc some work space */
115  double *work, *buf;
116  int work_size = 3*n;
117  Malloc(work, work_size, double);
118  Malloc(buf, nnev, double); // buffer for DGEMM calls
119 
120  /*-------------------- orthonormalize initial V and compute PV = P(A)V */
121  orth(vinit,n,nev,V,work);
122 
123  tm = evsl_timer();
124  for (icol = 0; icol<nev; icol++) {
125  ChebAv(pol, V+icol*n, PV+icol*n, work);
126  }
127  tmv += evsl_timer() - tm;
128  nmv += deg*nev;
129 
130  int find_more = 1;
131  /*-------------------- Filtered Subspace itertion: MAIN LOOP */
132  while ( (it < maxit) && (find_more) ) {
133 
134  /*--- V <- orth(PV) */
135  int nnlock = n*nlock; // pointer to the first active column
136  int nnact = n*nact;
137  DCOPY(&nnact, PV+nnlock, &one, V+nnlock, &one);
138  /*--- Orthogonalize active columns V(:,nlock:nev-1) against locked columns V(:,0:nlocked-1)*/
139  if ( nlock>0 ) {
140  DGEMM(&cT, &cN, &nlock, &nact, &n, &done, V, &n, V+nnlock, &n, &dzero, T, &nev);
141  DGEMM(&cN, &cN, &n, &nact, &nlock, &dmone, V, &n, T, &nev, &done, V+nnlock, &n);
142  }
143  /*--- Orthogonormalize columns of V(:,nlock:nev-1) */
144  orth(V+nnlock, n, nact, buf, work);
145  DCOPY(&nnact, buf, &one, V+nnlock, &one);
146  /*--- PV <- P(A)*V */
147  tm = evsl_timer();
148  //polyfilt(A, deg, mu, dd, cc, V+nnlock, nact, PV+nnlock, work);
149  for (icol = nlock; icol<nlock+nact; icol++)
150  ChebAv(pol, V+icol*n, PV+icol*n, work);
151  tmv += evsl_timer() - tm;
152  nmv += deg*nact;
153  // orthogonalize PVact against Vlocked
154  if ( nlock>0 ) {
155  DGEMM(&cT, &cN, &nlock, &nact, &n, &done, V, &n, PV+nnlock, &n, &dzero, T, &nev);
156  DGEMM(&cN, &cN, &n, &nact, &nlock, &dmone, V, &n, T, &nev, &done, PV+nnlock, &n);
157  }
158  // end orthogonalize PVact against Vlocked
159  /*--- Lock converged pairs */
160  if ( ((it+1)%cvcheck == 0) || ((it+1)==maxit) ) {
162  /*--- T = V'p(A)V; [evecT,evalT]=eig(T); */
163  DGEMM(&cT,&cN,&nact,&nact,&n,&done,V+nnlock,&n,PV+nnlock,&n,&dzero,T,&nev);
164  SymEigenSolver(nact, T, nev, evecT, nev, evalT+nlock);
165  /*--- V <- V*evecT; p(A)V <- PV*evecT (update only active columns) */
166  DGEMM(&cN,&cN,&n,&nact,&nact,&done,V+nnlock,&n,evecT,&nev,&dzero,buf,&n);
167  DCOPY(&nnact, buf, &one, V+nnlock, &one);
168  DGEMM(&cN,&cN,&n,&nact,&nact,&done,PV+nnlock,&n,evecT,&nev,&dzero,buf,&n);
169  DCOPY(&nnact, buf, &one, PV+nnlock, &one);
170  /*--- Compute active residuals R = PV - V*diag(evalT) */
171  for (i=nlock; i<nev; i++) {
172  double t = -evalT[i];
173  //DSCAL(&n, &t, R+i*n, &one);
174  for (j=0; j<n; j++) {
175  R[i*n+j] = PV[i*n+j]+t*V[i*n+j];
176  }
177  }
178  /*--- Detect converged eigenpairs w.r.t. p(A) and A (candidates). Move them to first nlock columns */
179  int nlock_new = 0; // number of newly locked pairs
180  int nlock_ab_new = 0;
181  nact = 0;
182  for (i=nlock; i<nev; i++) {
183  /*--- Compute norm of R(:,i) */
184  double resP = sqrt(DDOT(&n, R+i*n, &one, R+i*n, &one));
185  if (resP < tolP) {
186  /*--- Compute norm of AV(:,i) - V(:,i)*Lambda(i) */
187  tm = evsl_timer();
188  matvec_A(V+i*n, buf);
189  tmv += evsl_timer() - tm;
190  nmv++;
191  double rq = DDOT(&n, V+i*n, &one, buf, &one); // Rayleigh Quotient for A
192  for (j=0; j < n; j++) {
193  R[i*n+j] = buf[j] - rq*V[i*n+j];
194  }
195  double resA = sqrt(DDOT(&n, R+i*n, &one, R+i*n, &one));
196  if (resA < tol) {
197  lock_idx[nlock_new] = i;
198  res[nlock+nlock_new] = resA;
199  Lam[nlock+nlock_new] = rq;
200  nlock_new++;
201  /*--- Determine if the newly locked eigenvalue is in [a,b] */
202  if ( rq >= aa - DBL_EPSILON && rq <= bb + DBL_EPSILON ) {
203  nlock_ab_new++;
204  }
205  } else {
206  act_idx[nact] = i;
207  nact++;
208  }
209  } else {
210  act_idx[nact] = i;
211  nact++;
212  }
213  }
214 
215  /*--- Move newly locked eigenvectors to columns nlock:nlock+nlock_new-1 of V */
216  for (i = 0; i<nlock_new; i++) {
217  DCOPY(&n, V+lock_idx[i]*n, &one, buf+nnlock+i*n, &one);
218  DCOPY(&n, PV+lock_idx[i]*n, &one, R+nnlock+i*n, &one);
219  }
220  /*--- Move active columns to V(:, nlock:nev)*/
221  for (i = 0; i<nact; i++) {
222  DCOPY(&n, V+act_idx[i]*n, &one, buf+nnlock+(nlock_new+i)*n, &one);
223  DCOPY(&n, PV+act_idx[i]*n, &one, R+nnlock+(nlock_new+i)*n, &one);
224  }
225  DCOPY(&nnact, buf+nnlock, &one, V+nnlock, &one);
226  DCOPY(&nnact, R+nnlock, &one, PV+nnlock, &one);
227 
228  nlock += nlock_new;
229  nlock_ab += nlock_ab_new;
230 
231  fprintf(fstats, "it %4d: nMV %7d, nlock %3d, nlock_ab %3d\n",
232  it+1, nmv, nlock, nlock_ab);
233 
234  /*--- Decide if iteration should be stopped */
235  /* Stop if number of locked pairs that are in [a,b] are by NBUF smaller than total
236  number of locked pairs or if all nev pairs in the block have been locked */
237  if ( ((nlock-nlock_ab)>=NBUF) || (nlock == nev) ) {
238  find_more =0;
239  fprintf(fstats, "-------------------------------------\n");
240  fprintf(fstats, " No ev.s left to be computed\n");
241  fprintf(fstats, " Number of evals found = %d\n", nlock_ab);
242  fprintf(fstats, "-------------------------------------------------------------------\n");
243  }
244  }
245  it++;
246  }
247 
248  /*-------------------- Postprocessing : remove eigenpairs not associated with [a,b]*/
249  Malloc(Lam_out, nev, double);
250  Malloc(res_out, nev, double);
251  Malloc(V_out, nnev, double);
252  int idx=0;
253  for (i=0; i<nlock;i++) {
254  double t = Lam[i];
255  if ( t >= aa - DBL_EPSILON && t <= bb + DBL_EPSILON ) {
256  Lam_out[idx] = t;
257  res_out[idx] = res[i];
258  DCOPY(&n, V+i*n, &one, V_out+idx*n, &one);
259  idx++;
260  }
261  }
262 
263  /*-------------------- Done. output : */
264  *nevo = idx;
265  *lamo = Lam_out;
266  *Yo = V_out;
267  *reso = res_out;
268  /*-------------------- free arrays */
269  free(T);
270  free(evalT);
271  free(evecT);
272  free(R);
273  free(V);
274  free(PV);
275  free(Lam);
276  free(res);
277  free(act_idx);
278  free(lock_idx);
279  free(buf);
280  free(work);
281  /*-------------------- record stats */
282  tall = evsl_timer() - tall;
283  /*-------------------- print stat */
284  fprintf(fstats, "------This slice consumed: \n");
285  fprintf(fstats, "Matvecs : %d\n", nmv);
286  fprintf(fstats, "total time : %.2f\n", tall);
287  fprintf(fstats, "matvec time : %.2f\n", tmv);
288  fprintf(fstats,"======================================================\n");
289  return 0;
290 
291 }
292 
int deg
Definition: struct.h:70
void SymEigenSolver(int n, double *A, int lda, double *Q, int ldq, double *lam)
interface to LAPACK SYMMETRIC EIGEN-SOLVER
Definition: misc_la.c:156
defs in EVSL
void DGEMM(char *transa, char *transb, int *m, int *n, int *k, double *alpha, double *a, int *lda, double *b, int *ldb, double *beta, double *c, int *ldc)
double DDOT(int *n, double *x, int *incx, double *y, int *incy)
double gam
Definition: struct.h:64
int ChebSI(int nev, double *intv, int maxit, double tol, double *vinit, polparams *pol, int *nevo, double **lamo, double **Yo, double **reso, FILE *fstats)
Chebyshev polynomial filtering Subspace Iteration.
Definition: chebsi.c:42
int n
Definition: struct.h:165
This file contains function prototypes and constant definitions internally used in EVSL...
#define NBUF
Definition: chebsi.c:10
void orth(double *V, int n, int k, double *Vo, double *work)
Orthogonalize columns of n-by-k matrix V.
Definition: misc_la.c:269
#define Malloc(base, nmem, type)
Definition: def.h:22
structs used in evsl
Defs for blaslapack routines.
int ChebAv(polparams *pol, double *v, double *y, double *w)
Computes y=P(A) v, where pn is a Cheb. polynomial expansion
Definition: chebpoly.c:510
evslData evsldata
global variable of EVSL
Definition: evsl.c:15
parameters for polynomial filter
Definition: struct.h:45
double evsl_timer()
evsl timer for mac
Definition: mactime.c:14
double bar
Definition: struct.h:65
void DCOPY(int *n, double *dx, int *incx, double *dy, int *incy)