EVSL  1.1.0
EigenValues Slicing Library
 All Data Structures Files Functions Variables Typedefs Enumerations Enumerator Macros
chebpoly.c
Go to the documentation of this file.
1 #include <stdlib.h>
2 #include <stdio.h>
3 #include <math.h>
4 #include <float.h>
5 #include <string.h>
6 #include "def.h"
7 #include "blaslapack.h"
8 #include "struct.h"
9 #include "internal_proto.h"
19  pol->max_deg = 10000; // max degree allowed
20  pol->min_deg = 2; // min allowed degree
21  pol->damping = 2; // damping. 0 = no damping, 1 = Jackson, 2 = Lanczos
22  pol->thresh_ext = 0.20; // threshold for accepting polynomial for end intervals
23  pol->thresh_int = 0.8; // threshold for accepting polynomial for interior
24  pol->tol = 1e-3; // tolerance for LS approximation
25  pol->mu = NULL;
26  pol->deg = 0; // degree =0 means determine optimal degree.
27  pol->intvtol = 1e-9; // cut-off point of middle interval
28 }
29 
40 int dampcf(int m, int damping, double *jac){
41  double thetJ = 0.0, thetL = 0.0, a1 = 0.0, a2 = 0.0, dm = (double) m;
42  int k, k1;
43  if (damping==1){
44  thetJ = PI/(dm+2);
45  a1 = 1/(dm+2);
46  a2 = sin(thetJ);
47  }
48  if (damping == 2)
49  thetL = PI/(dm+1);
50  jac[0] = 0.5; // <<-- Note this is instead of 1 - in order
51  // to reflect the 1/2 factor in zeroth term
52  // of Chebyshev expansion
53 
54  for (k=1; k<=m; k++) {
55  if (damping == 0) {
56  jac[k] = 1.0;
57  } else if (damping == 1){
58  //-------------------- Note: slightly simpler formulas for jackson:
59  k1 = k+1;
60  jac[k] = a1*sin(k1*thetJ)/a2 + (1-k1*a1)*cos(k*thetJ) ;
61  //-------------------- Lanczos sigma-damping:
62  } else {
63  jac[k] = sin(k*thetL)/(k*thetL);
64  }
65  }
66  return (0);
67 }
68 
79 double dif_eval(double *v, int m, double thc, double *jac){
80  double fval = 0.0;
81  int j;
82  for (j=0; j<=m; j++)
83  fval += v[j]*cos(j*thc)*jac[j];
84  return (fval);
85 }
86 
106 int chebxPltd(int m, double *mu, int npts, double *xi, double *yi) {
107  int k, j, one = 1, n = npts;
108  double scal, *vkm1, *vkp1, *vk;
109  Malloc(vkm1, n, double);
110  Malloc(vkp1, n, double);
111  Malloc(vk, n, double);
112  double *tmp;
113  //-------------------- compute p(xi)
114  memset(vkm1, 0, n*sizeof(double));
115  vecset (n, 1.0, vk) ;
116  //-------------------- start yi = mu[0]*vk = mu[0]*ones
117  vecset (n, mu[0], yi) ;
118  //-------------------- POLYNOMIAL loop
119  for (k=0; k<m; k++) {
120  //-------------------- generates term of degree k+1
121  scal = (k == 0 ? 1.0 : 2.0);
122  for (j=0; j<n; j++)
123  vkp1[j] = scal*xi[j]*vk[j] - vkm1[j];
124  tmp = vkm1;
125  vkm1 = vk;
126  vk = vkp1;
127  vkp1 = tmp;
128  //-------------------- accumulation of vector.
129  DAXPY(&n, &mu[k+1], vk, &one, yi, &one) ;
130  }
131  //-------------------- done
132  free(vkm1);
133  free(vkp1);
134  free(vk);
135  return 0;
136 }
137 
171 void chext(polparams *pol, double aIn, double bIn){
172  int max_deg = pol->max_deg;
173  // int min_deg = pol->min_deg; NOT used
174  double thresh = pol->thresh_ext;
175  double *mu = pol->mu;
176  //-------------------- local variables
177  double del = 0.1*sqrt((bIn-aIn)*0.5);
178  //double del = 0.0;
179  double eps = 1e-13;
180  //double eps = 0.0;
181  double a, b, x, e, c, sigma, sigma1, sigma_new, g0, g1, gnew, s1, s2, s3;
182  double *t0, *t1, *tnew; // coef. of three consecutive cheby. expansions
183  double bar, gam;
184  int mbest=0;
185  int m1 = max_deg+1, j, k;
186  //-------------------- local work space
187  int work_size = 3*m1;
188  //-------------------- this is for the forced degree case
189  if (pol->deg > 0){
190  thresh = -1.0;
191  max_deg = pol->deg;
192  }
193 
194  if (bIn >= (1.0-eps)) {
195  /*-------------------- right interval case */
196  x = aIn;
197  b = x - del;
198  a = -1.0;
199  gam = 1.0;
200  } else {
201  /*-------------------- left interval case */
202  x = bIn;
203  a = x+del;
204  b = 1.0;
205  gam = -1.0;
206  }
207  e = (b-a)*0.5;
208  c = (b+a)*0.5;
209  sigma1 = e/(gam-c);
210  sigma = sigma1;
211  /*-------------------- alloc some work space for coef. of cheby. expansions*/
212  Calloc(t0, work_size, double);
213  t1 = t0+m1;
214  tnew = t1+m1;
215  // t0 = 1
216  t0[0] = 1.0;
217  // t1(t) = (t-c)*sig1/e
218  t1[0] = -c*(sigma1/e);
219  t1[1] = (sigma1/e);
220  //-------------------- for evaluating polyn. at x
221  g0 = 1.0;
222  g1 = (x-c)*sigma1/e;
223  if (g1 < thresh){
224  mbest = 1;
225  }else{
226  //-------------------- degree loop : [! make sure to start at k=2]
227  for(k=2;k<=max_deg;k++){
228  sigma_new = 1.0/(2.0/sigma1-sigma);
229  s1 = sigma_new/e;
230  s2 = 2*s1*c;
231  s3 = sigma*sigma_new;
232  for(j=2;j<=k;j++)
233  tnew[j-1] = s1*(t1[j]+t1[j-2]);
234  tnew[1] = tnew[1] + s1*t1[0];
235  tnew[0] = s1*t1[1];
236  tnew[k] = s1*t1[k-1];
237  for(j=0;j<=k;j++)
238  tnew[j] = tnew[j] - s2*t1[j] - s3*t0[j];
239  for(j=0;j<=k;j++){
240  t0[j] = t1[j];
241  t1[j] = tnew[j];
242  }
243  //-------------------- recurrence to evaluate pn(x)
244  gnew = 2*(x-c)*s1*g1 - s3*g0;
245  g0 = g1;
246  g1 = gnew;
247  //-------------------- best degree
248  mbest = k;
249  if (g1<thresh)
250  break;
251  sigma = sigma_new;
252  }
253  }
254  memcpy(mu,t1,(mbest+1)*sizeof(double));
255  bar = g1;
256  pol->deg = mbest;
257  pol->bar = bar;
258  pol->gam = gam;
259  free(t0);
260 }
261 
268 int indexofSmallestElement(double *array, int size){
269  int index = 0, i;
270  for(i = 1; i < size; i++){
271  if(array[i] < array[index])
272  index = i;
273  }
274  return index;
275 }
276 
287 int rootchb(int m, double *v, double* jac, double tha, double thb, double *mu,
288  double *thcOut){
289  int MaxIterBalan = 30; // max steps in Newton to balance interval
290  double tolBal;
291  // do 2 newton steps -- if OK exit otherwise
292  // continue to get root by solving eigv. pb
293  int j, it;
294  double fval = 0.0, d;
295  double fa, fb, thN, thc;
296  tolBal = fabs(tha-thb)*1.e-13;
297  thc = 0.5*(tha+thb);
298  /*-------------------- check whether or not this will work */
299  fb = dif_eval(v, m, thb, jac);
300  fa = dif_eval(v, m, tha, jac);
301  //--------------------this will not work -exit + use higher deg.
302  if ((fa>0) || (fb<0))
303  return 1 ;
304  /*-------------------- Newton iteration to balance the interval*/
305  for (it=0; it<=MaxIterBalan; it++) {
306  fval = dif_eval(v, m, thc, jac);
307  /*-------------------- do one newton step - d= derivative
308  thN = thetaNewton*/
309  d = 0.0;
310  for (j=1; j<=m; j++)
311  d += jac[j]*j*sin(j*(thc))*v[j];
312  thN = thc + fval/d;
313  /*-------------------- test for stopping */
314  if ((fabs(fval)<tolBal) || fabs(thc - thN)<DBL_EPSILON*abs(thc))
315  break;
316  /*-------------------- test for doing a form of bisection */
317  if (fval >0){
318  if((thN < thb) || (thN > tha))
319  thN = 0.5*(thc+tha);
320  thb = thc;
321  thc = thN;
322  } else {
323  if((thN < thb) || (thN > tha) )
324  thN = 0.5*(thc+thb);
325  tha = thc;
326  thc = thN;
327  }
328  }
329  /*-------------------- done - return mu and thethac */
330  for(j=0; j<=m; j++)
331  mu[j] = cos(j*(thc))*jac[j];
332  *thcOut = thc;
333  return 0;
334 }
335 
361 int find_pol(double *intv, polparams *pol) {
362  double *mu, *v, *jac, t=0.0, itv[2], vals[2];
363  int max_deg=pol->max_deg, min_deg=pol->min_deg, damping=pol->damping;
364  double tha=0.0, thb=0.0, thc=0.0;
365  double gam, thresh;
366  int m, j, nitv, mbest;
367  //-------------------- intervals related
368  if (check_intv(intv, stdout) < 0) {
369  return -1;
370  }
371  /*-------------------- A few parameters to be set or reset */
372  Malloc(mu, max_deg+1, double);
373  pol->mu = mu;
374  Malloc(v, max_deg+1, double);
375  Malloc(jac, max_deg+1, double);
376  /*-------------------- A parameter for interval check */
377  // double IntTol = 2*DBL_EPSILON; // If b*IntTol>1 accept [a b] extreme
378  //double IntTol = 0.0005;
379  double IntTol = pol->intvtol;
380  double aa, bb;
381  aa = max(intv[0], intv[2]); bb = min(intv[1], intv[3]);
382  if (intv[0] < intv[2] || intv[1] > intv[3]) {
383  fprintf(stdout, " warning [%s (%d)]: interval (%e, %e) is adjusted to (%e, %e)\n",
384  __FILE__, __LINE__, intv[0], intv[1], aa, bb);
385  }
386  double lmin = intv[2], lmax = intv[3];
387  /*-------------------- cc, rr: center and half-width of [lmin, lmax] */
388  double cc = 0.5 * (lmax + lmin);
389  double dd = 0.5 * (lmax - lmin);
390  pol->cc = cc;
391  pol->dd = dd;
392  /*-------------------- adjust intervals just in case. */
393  //a = max(a, lmin);
394  //b = min(b, lmax);
395  /* transform [lmin, lmax] to [-1,1] by y = (x-cc) / dd
396  * transform [a, b] to [aT, bT] accordingly */
397  aa = (aa - cc) / dd;
398  bb = (bb - cc) / dd;
399  aa = max(aa, -1.0);
400  bb = min(bb, 1.0);
401  //printf("transformed interval [%.15e %.15e]\n", a,b);
402  thb = acos(bb);
403  tha = acos(aa);
404  //printf(" a = %e b = %e tha = %e thb = %e\n",a,b,tha,thb);
405  // right interval. Same thing on other end
406  /*-------------------- deal with extremal interval cases */
407  if (aa-IntTol <= -1.0) {
408  /*-------------------- left interval */
409  thc = tha;
410  //-------------------- note: p(itv) == where to evaluate p to
411  // obtain bar values.
412  nitv = 1;
413  aa = -1.0;
414  gam = -1.0; // set center for a
415  pol->type = 1;
416  } else if (bb + IntTol >= 1.0) {
417  /*-------------------- right interval */
418  thc = thb;
419  nitv = 1;
420  bb = 1;
421  gam = 1; // set center for b
422  pol->type = 2;
423  } else {
424  /*-------------------- middle interval */
425  itv[0] = aa;
426  itv[1] = bb;
427  nitv = 2;
428  pol->type = 0;
429  }
430  /*-------------------- threshold for finding the best poly */
431  if (nitv == 1) {
432  /*-------------------- end interval case done separately */
433  chext(pol,aa,bb);
434  } else {
435  /*-------------------- give a starting degree - around 1/(half gap) */
436  min_deg = 2 + (int) 0.5/(bb-aa);
437  // min_deg = max(min_deg,2);
438  // min_deg = 2;
439  thresh = pol->thresh_int;
440  //-------------------- this is a short-circuit for the
441  // case of a forced degree
442  if (pol->deg > 0){
443  thresh = -1;
444  max_deg = pol->deg;
445  }
446  /*-------------------- initialize v vector */
447  for (j=0; j<min_deg; j++)
448  v[j] = cos(j*thb) - cos(j*tha);
449  /*-------------------- DEGREE LOOP -------------------- */
450  for (m=min_deg; m < max_deg; m++){
451  dampcf(m, damping, jac);
452  //-------------------- update v: add one more entry
453  v[m] = cos(m*thb)-cos(m*tha);
454  //---------------------Balacing the interval + get new mu
455  if (rootchb(m, v, jac, tha, thb, mu, &thc)) {
456  //-------------------- if m<0 degree is too low - skip
457  //printf("rootchb == 1, m = %d, [%.15e, %.15e], tha %e thb %e\n",
458  // m, aa, bb, tha, thb);
459  continue;
460  }
461  //----------------------New center
462  gam = cos(thc);
463  //-------------------- for scaling
464  chebxPltd(m, mu, 1, &gam, &t);
465  chebxPltd(m, mu, nitv, itv, vals);
466  //-------------------- test for acceptance of this pol.
467  //printf("mindeg=%d, m = %d, val %e %e, t %e, thresh %e\n",
468  // min_deg, m, vals[0], vals[1], t, thresh);
469  if (vals[0] <= t*thresh && vals[1] <= t*thresh) {
470  m++;
471  break;
472  }
473  }
474  mbest = m - 1;
475  //-------------------- scale the polynomial
476  for (j=0; j<=m; j++)
477  mu[j] /= t;
478  pol->bar = min(vals[0], vals[1])/t;
479  pol->gam = gam;
480  pol->deg = mbest;
481  }
482  //save_vec(pol->deg+1, mu, "OUT/mu.mtx");
483  free(v);
484  free(jac);
485  return 0;
486 }
487 
488 void free_pol(polparams *pol) {
489  if (pol->mu) {
490  free(pol->mu);
491  }
492 }
493 
510 int ChebAv(polparams *pol, double *v, double *y, double *w) {
511  double tt = evsl_timer();
512  const int ifGenEv = evsldata.ifGenEv;
513  int n = evsldata.n;
514  /*-------------------- unpack pol */
515  double *mu = pol->mu;
516  double dd = pol->dd;
517  double cc = pol->cc;
518  double ncc = -cc;
519  int m = pol->deg;
520  int one = 1;
521  double dmone = -1.0;
522  /*-------------------- pointers to v_[k-1],v_[k], v_[k+1] from w */
523  double *vk = w;
524  double *vkp1 = w+n;
525  double *vkm1 = vkp1+n;
526  double *w2 = ifGenEv ? vkm1 + n : NULL;
527  /*-------------------- */
528  int k;
529  double t, s, *tmp, t1= 1.0 / dd, t2 = 2.0 / dd;
530  /*-------------------- vk <- v; vkm1 <- zeros(n,1) */
531  /* we have to do this copy, because we don't want to alter the
532  * elements in v */
533  memcpy(vk, v, n*sizeof(double));
534  //memset(vkm1, 0, n*sizeof(double));
535  /*-------------------- special case: k == 0 */
536  s = mu[0];
537  /*
538  for (i=0; i<n; i++) {
539  y[i] = s*vk[i];
540  }
541  */
542  memcpy(y, v, n*sizeof(double));
543  DSCAL(&n, &s, y, &one);
544 
545  /*-------------------- degree loop. k IS the degree */
546  for (k=1; k<=m; k++) {
547  /*-------------------- y = mu[k]*Vk + y */
548  t = k == 1 ? t1 : t2;
549  /*-------------------- */
550  s = mu[k];
551  if (ifGenEv) {
552  /*-------------------- Vkp1 = A*B\Vk - cc*Vk */
553  solve_B(vk, w2);
554  matvec_A(w2, vkp1);
555  } else {
556  /*-------------------- Vkp1 = A*Vk - cc*Vk */
557  matvec_A(vk, vkp1);
558  }
559 
560  /*
561  for (i=0; i<n; i++) {
562  vkp1[i] = t*(vkp1[i]-cc*vk[i]) - vkm1[i];
563  y[i] += s*vkp1[i];
564  }
565  */
566  double ts = evsl_timer();
567  DAXPY(&n, &ncc, vk, &one, vkp1, &one);
568  DSCAL(&n, &t, vkp1, &one);
569  if (k > 1) {
570  DAXPY(&n, &dmone, vkm1, &one, vkp1, &one);
571  }
572  DAXPY(&n, &s, vkp1, &one, y, &one);
573  evslstat.t_sth += evsl_timer() - ts;
574 
575  /*-------------------- next: rotate vectors via pointer exchange */
576  tmp = vkm1;
577  vkm1 = vk;
578  vk = vkp1;
579  vkp1 = tmp;
580  }
581  evslstat.n_polAv ++;
582  evslstat.t_polAv += evsl_timer() - tt;
583 
584  return 0;
585 }
586 
int deg
Definition: struct.h:70
void free_pol(polparams *pol)
Definition: chebpoly.c:488
double * mu
Definition: struct.h:61
defs in EVSL
int ifGenEv
Definition: struct.h:166
void vecset(int n, double t, double *v)
Definition: vect.c:48
int find_pol(double *intv, polparams *pol)
Sets the values in pol.
Definition: chebpoly.c:361
double gam
Definition: struct.h:64
int chebxPltd(int m, double *mu, int npts, double *xi, double *yi)
function yi = chebxPltd computes yi = p_mu (xi),
Definition: chebpoly.c:106
int min_deg
Definition: struct.h:49
double t_polAv
Definition: struct.h:211
void chext(polparams *pol, double aIn, double bIn)
Determines polynomial for end interval cases.
Definition: chebpoly.c:171
int n
Definition: struct.h:165
void DSCAL(int *n, double *a, double *x, int *incx)
#define Calloc(base, nmem, type)
Definition: def.h:32
This file contains function prototypes and constant definitions internally used in EVSL...
#define min(a, b)
Definition: def.h:62
double thresh_ext
Definition: struct.h:51
size_t n_polAv
Definition: struct.h:219
double t_sth
Definition: struct.h:213
void set_pol_def(polparams *pol)
set default values for polparams struct.
Definition: chebpoly.c:18
double dd
Definition: struct.h:63
#define Malloc(base, nmem, type)
Definition: def.h:22
double thresh_int
Definition: struct.h:52
evslStat evslstat
global statistics of EVSL
Definition: evsl.c:21
int rootchb(int m, double *v, double *jac, double tha, double thb, double *mu, double *thcOut)
Finds the roots of linear combination of chebyshev polynomials.
Definition: chebpoly.c:287
structs used in evsl
int type
Definition: struct.h:59
void DAXPY(int *n, double *alpha, double *x, int *incx, double *y, int *incy)
double tol
Definition: struct.h:53
int max_deg
Definition: struct.h:48
#define max(a, b)
Definition: def.h:56
Defs for blaslapack routines.
#define PI
Definition: def.h:15
int damping
Definition: struct.h:50
int ChebAv(polparams *pol, double *v, double *y, double *w)
Computes y=P(A) v, where pn is a Cheb. polynomial expansion
Definition: chebpoly.c:510
double intvtol
Definition: struct.h:54
evslData evsldata
global variable of EVSL
Definition: evsl.c:15
parameters for polynomial filter
Definition: struct.h:45
int dampcf(int m, int damping, double *jac)
Computes damping coefficient for cheb. expansions.
Definition: chebpoly.c:40
double cc
Definition: struct.h:62
double evsl_timer()
evsl timer for mac
Definition: mactime.c:14
double dif_eval(double *v, int m, double thc, double *jac)
function dif_eval(v, m, thc, jac) evaluates the difference between the right and left values of the p...
Definition: chebpoly.c:79
double bar
Definition: struct.h:65
int indexofSmallestElement(double *array, int size)
Find the indexofSmallestElement in array.
Definition: chebpoly.c:268