subroutine cg (suba,subat,subql,subqlt,subqr,subqrt,subadp, a coef,jcoef,n,u,ubar,rhs,wksp,iwksp, a iparm,rparm,ier) external suba, subat, subql, subqlt, subqr, subqrt, subadp integer iparm(30), jcoef(2), iwksp(1) dimension rhs(1), u(1), ubar(1), rparm(30), coef(1), wksp(1) c common / itcom1 / in, itmax, level, nout, ns1, ns2, ns3, a iplr, iqlr, ntest, is, iacel, idgts, nbl1d, nbl2d common / cwkcon / lenr, irpnt, irmax, leni, iipnt, iimax c ier = 0 call needw ('cg',0,irpnt,3*n+2*itmax,ier) if (ier .lt. 0) return nw = lenr - irpnt + 1 call cgw (suba,subql,coef,jcoef,wksp,iwksp, a n,u,ubar,rhs,wksp(irpnt),nw,iparm,rparm,ier) irmax = irpnt + nw - 1 return end subroutine si (suba,subat,subql,subqlt,subqr,subqrt,subadp, a coef,jcoef,n,u,ubar,rhs,wksp,iwksp, a iparm,rparm,ier) external suba, subat, subql, subqlt, subqr, subqrt, subadp integer iparm(30), jcoef(2), iwksp(1) dimension rhs(1), u(1), ubar(1), rparm(30), coef(1), wksp(1) c common / cwkcon / lenr, irpnt, irmax, leni, iipnt, iimax c ier = 0 call needw ('si',0,irpnt,4*n,ier) if (ier .lt. 0) return nw = lenr - irpnt + 1 call siw (suba,subql,coef,jcoef,wksp,iwksp, a n,u,ubar,rhs,wksp(irpnt),nw,iparm,rparm,ier) irmax = irpnt + nw - 1 return end subroutine srcg (suba,subat,subql,subqlt,subqr,subqrt,subadp, a coef,jcoef,n,u,ubar,rhs,wksp,iwksp, a iparm,rparm,ier) external suba, subat, subql, subqlt, subqr, subqrt, subadp integer iparm(30), jcoef(2), iwksp(1) dimension rhs(1), u(1), ubar(1), rparm(30), coef(1), wksp(1) c common / itcom1 / in, itmax, level, nout, ns1, ns2, ns3, a iplr, iqlr, ntest, is, iacel, idgts, nbl1d, nbl2d common / cwkcon / lenr, irpnt, irmax, leni, iipnt, iimax c ier = 0 call needw ('srcg',0,irpnt,3*n+2*itmax,ier) if (ier .lt. 0) return nw = lenr - irpnt + 1 call srcgw (suba,subql,subadp,coef,jcoef,wksp,iwksp, a n,u,ubar,rhs,wksp(irpnt),nw,iparm,rparm,ier) irmax = irpnt + nw - 1 return end subroutine srsi (suba,subat,subql,subqlt,subqr,subqrt,subadp, a coef,jcoef,n,u,ubar,rhs,wksp,iwksp, a iparm,rparm,ier) external suba, subat, subql, subqlt, subqr, subqrt, subadp integer iparm(30), jcoef(2), iwksp(1) dimension rhs(1), u(1), ubar(1), rparm(30), coef(1), wksp(1) c common / cwkcon / lenr, irpnt, irmax, leni, iipnt, iimax c ier = 0 call needw ('srsi',0,irpnt,4*n,ier) if (ier .lt. 0) return nw = lenr - irpnt + 1 call srsiw (suba,subql,subadp,coef,jcoef,wksp,iwksp, a n,u,ubar,rhs,wksp(irpnt),nw,iparm,rparm,ier) irmax = irpnt + nw - 1 return end subroutine sor (suba,subat,subql,subqlt,subqr,subqrt,subadp, a coef,jcoef,n,u,ubar,rhs,wksp,iwksp, a iparm,rparm,ier) external suba, subat, subql, subqlt, subqr, subqrt, subadp integer iparm(30), jcoef(2), iwksp(1) dimension rhs(1), u(1), ubar(1), rparm(30), coef(1), wksp(1) c common / cwkcon / lenr, irpnt, irmax, leni, iipnt, iimax c ier = 0 call needw ('sor',0,irpnt,2*n,ier) if (ier .lt. 0) return nw = lenr - irpnt + 1 call sorw (suba,subql,coef,jcoef,wksp,iwksp, a n,u,ubar,rhs,wksp(irpnt),nw,iparm,rparm,ier) irmax = irpnt + nw - 1 return end subroutine basic (suba,subat,subql,subqlt,subqr,subqrt,subadp, a coef,jcoef,n,u,ubar,rhs,wksp,iwksp,iparm,rparm,ier) c dimension u(1), ubar(1), rhs(1), coef(1), jcoef(2), a wksp(1), iwksp(1) dimension iparm(30), rparm(30) external suba, subql, subqr external subat, subqlt, subqrt external subadp c c ... data common blocks c common / cwkcon / lenr, irpnt, irmax, leni, iipnt, iimax c nw = lenr - irpnt + 1 call basicw (suba,subql,subqr,coef,jcoef, a wksp,iwksp,n,u,ubar,rhs,wksp(irpnt),nw, a iparm,rparm,ier) irmax = max0 (irmax,irpnt-1+nw) iimax = max0 (iimax,iipnt-1) return end subroutine me (suba,subat,subql,subqlt,subqr,subqrt,subadp, a coef,jcoef,n,u,ubar,rhs,wksp,iwksp,iparm,rparm,ier) c dimension u(1), ubar(1), rhs(1), coef(1), jcoef(2), a wksp(1), iwksp(1) dimension iparm(30), rparm(30) external suba, subql, subqr external subat, subqlt, subqrt external subadp c c ... data common blocks c common / cwkcon / lenr, irpnt, irmax, leni, iipnt, iimax c nw = lenr - irpnt + 1 call mew (suba,subql,subqr,coef,jcoef, a wksp,iwksp,n,u,ubar,rhs,wksp(irpnt),nw, a iparm,rparm,ier) irmax = max0 (irmax,irpnt-1+nw) iimax = max0 (iimax,iipnt-1) return end subroutine odir (suba,subat,subql,subqlt,subqr,subqrt,subadp, a coef,jcoef,n,u,ubar,rhs,wksp,iwksp,iparm,rparm,ier) c dimension u(1), ubar(1), rhs(1), coef(1), jcoef(2), a wksp(1), iwksp(1) dimension iparm(30), rparm(30) external suba, subql, subqr external subat, subqlt, subqrt external subadp c c ... data common blocks c common / cwkcon / lenr, irpnt, irmax, leni, iipnt, iimax c nw = lenr - irpnt + 1 call odirw (suba,subql,subqr,coef,jcoef, a wksp,iwksp,n,u,ubar,rhs,wksp(irpnt),nw, a iparm,rparm,ier) irmax = max0 (irmax,irpnt-1+nw) iimax = max0 (iimax,iipnt-1) return end subroutine omin (suba,subat,subql,subqlt,subqr,subqrt,subadp, a coef,jcoef,n,u,ubar,rhs,wksp,iwksp,iparm,rparm,ier) c dimension u(1), ubar(1), rhs(1), coef(1), jcoef(2), a wksp(1), iwksp(1) dimension iparm(30), rparm(30) external suba, subql, subqr external subat, subqlt, subqrt external subadp c c ... data common blocks c common / cwkcon / lenr, irpnt, irmax, leni, iipnt, iimax c nw = lenr - irpnt + 1 call ominw (suba,subql,subqr,coef,jcoef, a wksp,iwksp,n,u,ubar,rhs,wksp(irpnt),nw, a iparm,rparm,ier) irmax = max0 (irmax,irpnt-1+nw) iimax = max0 (iimax,iipnt-1) return end subroutine ores (suba,subat,subql,subqlt,subqr,subqrt,subadp, a coef,jcoef,n,u,ubar,rhs,wksp,iwksp,iparm,rparm,ier) c dimension u(1), ubar(1), rhs(1), coef(1), jcoef(2), a wksp(1), iwksp(1) dimension iparm(30), rparm(30) external suba, subql, subqr external subat, subqlt, subqrt external subadp c c ... data common blocks c common / cwkcon / lenr, irpnt, irmax, leni, iipnt, iimax c nw = lenr - irpnt + 1 call oresw (suba,subql,subqr,coef,jcoef, a wksp,iwksp,n,u,ubar,rhs,wksp(irpnt),nw, a iparm,rparm,ier) irmax = max0 (irmax,irpnt-1+nw) iimax = max0 (iimax,iipnt-1) return end subroutine iom (suba,subat,subql,subqlt,subqr,subqrt,subadp, a coef,jcoef,n,u,ubar,rhs,wksp,iwksp,iparm,rparm,ier) c dimension u(1), ubar(1), rhs(1), coef(1), jcoef(2), a wksp(1), iwksp(1) dimension iparm(30), rparm(30) external suba, subql, subqr external subat, subqlt, subqrt external subadp c c ... data common blocks c common / cwkcon / lenr, irpnt, irmax, leni, iipnt, iimax c nw = lenr - irpnt + 1 call iomw (suba,subql,subqr,coef,jcoef, a wksp,iwksp,n,u,ubar,rhs,wksp(irpnt),nw, a iparm,rparm,ier) irmax = max0 (irmax,irpnt-1+nw) iimax = max0 (iimax,iipnt-1) return end subroutine gmres (suba,subat,subql,subqlt,subqr,subqrt,subadp, a coef,jcoef,n,u,ubar,rhs,wksp,iwksp,iparm,rparm,ier) c dimension u(1), ubar(1), rhs(1), coef(1), jcoef(2), a wksp(1), iwksp(1) dimension iparm(30), rparm(30) external suba, subql, subqr external subat, subqlt, subqrt external subadp c c ... data common blocks c common / cwkcon / lenr, irpnt, irmax, leni, iipnt, iimax c nw = lenr - irpnt + 1 call gmresw (suba,subql,subqr,coef,jcoef, a wksp,iwksp,n,u,ubar,rhs,wksp(irpnt),nw, a iparm,rparm,ier) irmax = max0 (irmax,irpnt-1+nw) iimax = max0 (iimax,iipnt-1) return end subroutine cgnr (suba,subat,subql,subqlt,subqr,subqrt,subadp, a coef,jcoef,n,u,ubar,rhs,wksp,iwksp,iparm,rparm,ier) c dimension u(1), ubar(1), rhs(1), coef(1), jcoef(2), a wksp(1), iwksp(1) dimension iparm(30), rparm(30) external suba, subql, subqr external subat, subqlt, subqrt external subadp c c ... data common blocks c common / cwkcon / lenr, irpnt, irmax, leni, iipnt, iimax c nw = lenr - irpnt + 1 call cgnrw (suba,subat,subql,subqlt,subqr,subqrt, a coef,jcoef, a wksp,iwksp,n,u,ubar,rhs,wksp(irpnt),nw, a iparm,rparm,ier) irmax = max0 (irmax,irpnt-1+nw) iimax = max0 (iimax,iipnt-1) return end subroutine lsqr (suba,subat,subql,subqlt,subqr,subqrt,subadp, a coef,jcoef,n,u,ubar,rhs,wksp,iwksp,iparm,rparm,ier) c dimension u(1), ubar(1), rhs(1), coef(1), jcoef(2), a wksp(1), iwksp(1) dimension iparm(30), rparm(30) external suba, subql, subqr external subat, subqlt, subqrt external subadp c c ... data common blocks c common / cwkcon / lenr, irpnt, irmax, leni, iipnt, iimax c nw = lenr - irpnt + 1 call lsqrw (suba,subat,subql,subqlt,subqr,subqrt, a coef,jcoef, a wksp,iwksp,n,u,ubar,rhs,wksp(irpnt),nw, a iparm,rparm,ier) irmax = max0 (irmax,irpnt-1+nw) iimax = max0 (iimax,iipnt-1) return end subroutine usymlq (suba,subat,subql,subqlt,subqr,subqrt,subadp, a coef,jcoef,n,u,ubar,rhs,wksp,iwksp,iparm,rparm,ier) c dimension u(1), ubar(1), rhs(1), coef(1), jcoef(2), a wksp(1), iwksp(1) dimension iparm(30), rparm(30) external suba, subql, subqr external subat, subqlt, subqrt external subadp c c ... data common blocks c common / cwkcon / lenr, irpnt, irmax, leni, iipnt, iimax c nw = lenr - irpnt + 1 call uslqw (suba,subat,subql,subqlt,subqr,subqrt, a coef,jcoef, a wksp,iwksp,n,u,ubar,rhs,wksp(irpnt),nw, a iparm,rparm,ier) irmax = max0 (irmax,irpnt-1+nw) iimax = max0 (iimax,iipnt-1) return end subroutine usymqr (suba,subat,subql,subqlt,subqr,subqrt,subadp, a coef,jcoef,n,u,ubar,rhs,wksp,iwksp,iparm,rparm,ier) c dimension u(1), ubar(1), rhs(1), coef(1), jcoef(2), a wksp(1), iwksp(1) dimension iparm(30), rparm(30) external suba, subql, subqr external subat, subqlt, subqrt external subadp c c ... data common blocks c common / cwkcon / lenr, irpnt, irmax, leni, iipnt, iimax c nw = lenr - irpnt + 1 call usqrw (suba,subat,subql,subqlt,subqr,subqrt, a coef,jcoef, a wksp,iwksp,n,u,ubar,rhs,wksp(irpnt),nw, a iparm,rparm,ier) irmax = max0 (irmax,irpnt-1+nw) iimax = max0 (iimax,iipnt-1) return end subroutine landir (suba,subat,subql,subqlt,subqr,subqrt,subadp, a coef,jcoef,n,u,ubar,rhs,wksp,iwksp,iparm,rparm,ier) c dimension u(1), ubar(1), rhs(1), coef(1), jcoef(2), a wksp(1), iwksp(1) dimension iparm(30), rparm(30) external suba, subql, subqr external subat, subqlt, subqrt external subadp c c ... data common blocks c common / cwkcon / lenr, irpnt, irmax, leni, iipnt, iimax c nw = lenr - irpnt + 1 call ldirw (suba,subat,subql,subqlt,subqr,subqrt, a coef,jcoef, a wksp,iwksp,n,u,ubar,rhs,wksp(irpnt),nw, a iparm,rparm,ier) irmax = max0 (irmax,irpnt-1+nw) iimax = max0 (iimax,iipnt-1) return end subroutine lanmin (suba,subat,subql,subqlt,subqr,subqrt,subadp, a coef,jcoef,n,u,ubar,rhs,wksp,iwksp,iparm,rparm,ier) c dimension u(1), ubar(1), rhs(1), coef(1), jcoef(2), a wksp(1), iwksp(1) dimension iparm(30), rparm(30) external suba, subql, subqr external subat, subqlt, subqrt external subadp c c ... data common blocks c common / cwkcon / lenr, irpnt, irmax, leni, iipnt, iimax c nw = lenr - irpnt + 1 call lminw (suba,subat,subql,subqlt,subqr,subqrt, a coef,jcoef, a wksp,iwksp,n,u,ubar,rhs,wksp(irpnt),nw, a iparm,rparm,ier) irmax = max0 (irmax,irpnt-1+nw) iimax = max0 (iimax,iipnt-1) return end subroutine lanres (suba,subat,subql,subqlt,subqr,subqrt,subadp, a coef,jcoef,n,u,ubar,rhs,wksp,iwksp,iparm,rparm,ier) c dimension u(1), ubar(1), rhs(1), coef(1), jcoef(2), a wksp(1), iwksp(1) dimension iparm(30), rparm(30) external suba, subql, subqr external subat, subqlt, subqrt external subadp c c ... data common blocks c common / cwkcon / lenr, irpnt, irmax, leni, iipnt, iimax c nw = lenr - irpnt + 1 call lresw (suba,subat,subql,subqlt,subqr,subqrt, a coef,jcoef, a wksp,iwksp,n,u,ubar,rhs,wksp(irpnt),nw, a iparm,rparm,ier) irmax = max0 (irmax,irpnt-1+nw) iimax = max0 (iimax,iipnt-1) return end subroutine cgcr (suba,subat,subql,subqlt,subqr,subqrt,subadp, a coef,jcoef,n,u,ubar,rhs,wk,iwk,iparm,rparm,ier) c c this routine implements the constrained residual method of c j. r. wallis, coupled with truncated/restarted orthomin. for c further information about the algorithm, see "constrained residual c acceleration of conjugate residual methods", by j. r. wallis, c r. p. kendall and t. e. little of j. s. nolen and assocs. inc.; c report spe 13536, society of petroleum engineers, 1985. c c right preconditioning only is allowed in this algorithm. c c unfortunately, this routine is limited -- all blocks must be the c same size. but the idea can be easily generalized. c dimension u(1), ubar(1), rhs(1), coef(1), jcoef(2), a wk(1), iwk(1) dimension iparm(30), rparm(30) external suba, subql, subqr external subat, subqlt, subqrt external subadp external nullpl, cgcrpr logical ipl, ipr, iql, iqr c common / itcom1 / in, itmax, level, nout, ns1, ns2, ns3, a iplr, iqlr, ntest, is, iacel, idgts, nbl1d, nbl2d common / itcom6 / method, iscale, iperm, nstore, a ifact, kblsz, lvfill, ltrunc, ndeg, a ipropa, isymm, ifctv c c ... data common blocks common / cwkcon / lenr, irpnt, irmax, leni, iipnt, iimax common / ccgcr / nblk, nband, ictac, ieta, ivcgcr c c time to proceed ... c if (nstore.ne.2 .and. nstore.ne.3) go to 998 c irpsav = irpnt iql = iqlr.eq.1 .or. iqlr.eq.3 iqr = iqlr.eq.2 .or. iqlr.eq.3 if (iql) go to 998 c ipl = .false. ipr = .true. iplr = 0 if (ipl) iplr = iplr + 1 if (ipr) iplr = iplr + 2 c c form the c**(t)*a*c matrix c 1 if (nbl1d.le.0 .or. nbl2d.le.0) go to 998 nbl0d = 1 if (mod(nbl2d,nbl1d).ne.0 .or. mod(nbl1d,nbl0d).ne.0) go to 998 nblk = n / nbl2d if (nblk .eq. 1) nblk = n / nbl1d ictac = irpnt nwgb = lenr - ictac + 1 ierpp = 0 call getblk (coef,jcoef,n,nblk,nband,wk(ictac),nwgb,ierpp) irmax = max0 (irmax,ictac-1+nwgb) if (ierpp .lt. 0) go to 999 irpnt = ictac + nblk*nband c c perform first-iterate calculations c ieta = irpnt ivcgcr = ieta + nblk iv2 = ivcgcr + n irmax = max0(irmax,iv2-1+n) if (irmax .gt. lenr) go to 997 c call suba (coef,jcoef,wk,iwk,n,u,wk(ivcgcr)) call vexopy (n,wk(ivcgcr),rhs,wk(ivcgcr),2) call tmult (n,nblk,nband,wk(ictac),wk(ieta),wk(ivcgcr), a wk(ivcgcr)) call vexopy (n,u,u,wk(ivcgcr),1) c c pass it on to orthomin ... c irpnt = iv2 nw = lenr - irpnt + 1 call omingw (suba,subql,subqr,nullpl,cgcrpr,coef,jcoef, a wk,iwk,n,u,ubar,rhs,wk(irpnt),nw,iparm,rparm,ier) irmax = max0 (irmax,irpnt-1+nw) iimax = max0 (iimax,iipnt-1) c irpnt = irpsav return c c error returns ... c c insuff. real workspace ... 997 ier = -2 call ershow (ier,'cgcr') return c c unimplemented option ... 998 ier = -16 call ershow (ier,'cgcr') return c c generic handler ... 999 ier = ierpp return end subroutine bcgs (suba,subat,subql,subqlt,subqr,subqrt,subadp, a coef,jcoef,n,u,ubar,rhs,wksp,iwksp,iparm,rparm,ier) c dimension u(1), ubar(1), rhs(1), coef(1), jcoef(2), a wksp(1), iwksp(1) dimension iparm(30), rparm(30) external suba, subql, subqr external subat, subqlt, subqrt external subadp c c ... data common blocks c common / cwkcon / lenr, irpnt, irmax, leni, iipnt, iimax c nw = lenr - irpnt + 1 call bcgsw (suba,subql,subqr,coef,jcoef, a wksp,iwksp,n,u,ubar,rhs,wksp(irpnt),nw, a iparm,rparm,ier) irmax = max0 (irmax,irpnt-1+nw) iimax = max0 (iimax,iipnt-1) return end subroutine cgw (suba,subq,coef,jcoef,wfac,jwfac,nn,u,ubar,rhs, a wksp,nw,iparm,rparm,ier) c c cgw drives the conjugate gradient algorithm. c c ... parameters -- c c suba matrix-vector multiplication routine c subq preconditioning routine c n input integer. order of the system (= nn) c u input/output vector. on input, u contains the c initial guess to the solution. on output, it c contains the latest estimate to the solution. c ubar input vector containing the true solution c (optional) c rhs input vector. contains the right hand side c of the matrix problem. c wksp vector used for working space. c nw length of wksp array. if this length is less than c the amount needed, nw will give the needed amount c upon output. c iparm integer vector of length 30. allows user to c specify some integer parameters which affect c the method. c rparm real vector of length 30. allows user to c specify some real parameters which affect c the method. c ier output integer. error flag. c c ... specifications for parameters c external suba, subq integer iparm(30), jcoef(2), jwfac(1) dimension rhs(1), u(1), ubar(1), wksp(1), rparm(30), coef(1), a wfac(1) c c *** begin -- package common c common / itcom1 / in, itmax, level, nout, ns1, ns2, ns3, a iplr, iqlr, ntest, is, iacel, idgts, nbl1d, nbl2d common / itcom3 / alpha, beta, zeta, emax, emin, pap, b alphao, gamma, sigma, rr, rho, dkq, dkm1, b ff, rqmin, rqmax, stptst, udnm, ubarnm, b bnorm, bnorm1 logical omgadp common / itcom5 / omgadp, omega, alphab, betab, fff, specr c c *** end -- package common c c ... initialize common blocks c ier = 0 n = nn t1 = timer (dummy) iacel = 1 timit = 0.0 digit1 = 0.0 digit2 = 0.0 call echall (n,iparm,rparm,1,2,ier) if (ier .lt. 0) go to 35 if (level .ge. 2) write (nout,10) 10 format (1x,'cg') c c ... compute workspace base addresses and check for sufficient c ... workspace. c iw1 = 1 iw2 = iw1 + n iw3 = iw2 + n iw4 = iw3 + n nwksp = 3*n + 2*itmax if (nw .ge. nwksp) go to 15 ier = -2 call ershow (ier,'cgw') go to 30 15 continue call nmcalc (coef,jcoef,wfac,jwfac,1,subq,n,rhs,ubar,wksp,ier) if (ier .lt. 0) go to 30 c c ... zero out workspace c call vfill (nwksp,wksp,0.0) c c ... iteration sequence c call itcg (suba,subq,coef,jcoef,wfac,jwfac,n,u,ubar,rhs, a wksp(iw1),wksp(iw2),wksp(iw3),wksp(iw4),ier) c if (ier .lt. 0 .or. ier .eq. 1) go to 25 c c ... method has converged c if (level .ge. 1) write (nout,20) in 20 format (/1x,'cg has converged in ',i5,' iterations' ) c c ... optional error analysis c 25 if (idgts .lt. 0) go to 30 call perror (suba,coef,jcoef,wfac,jwfac,n,u,rhs,wksp,digit1, a digit2,idgts) c c ... set return parameters in iparm and rparm c 30 t2 = timer (dummy) nw = 3*n + 2*in timit = t2 - t1 iparm(2) = in rparm(1) = zeta rparm(2) = emax rparm(3) = emin rparm(6) = timit rparm(7) = digit1 rparm(8) = digit2 rparm(9) = omega rparm(10) = alphab rparm(11) = betab rparm(12) = specr c 35 continue if (level .ge. 3) call echall (n,iparm,rparm,2,2,ier) c return end subroutine siw (suba,subq,coef,jcoef,wfac,jwfac,nn,u,ubar,rhs, a wksp,nw,iparm,rparm,ier) c c siw drives the chebyshev acceleration algorithm. c c ... parameters -- c c suba matrix-vector multiplication routine c subq preconditioning routine c n input integer. order of the system (= nn) c u input/output vector. on input, u contains the c initial guess to the solution. on output, it c contains the latest estimate to the solution. c ubar input vector containing the true solution c (optional) c rhs input vector. contains the right hand side c of the matrix problem. c wksp vector used for working space. c nw length of wksp array. if this length is less than c the amount needed, nw will give the needed amount c upon output. c iparm integer vector of length 30. allows user to c specify some integer parameters which affect c the method. c rparm real vector of length 30. allows user to c specify some real parameters which affect c the method. c ier output integer. error flag. c c ... specifications for parameters c external suba, subq integer iparm(30), jcoef(2), jwfac(1) dimension rhs(1), u(1), ubar(1), wksp(1), rparm(30), coef(1), a wfac(1) c c *** begin -- package common c common / itcom1 / in, itmax, level, nout, ns1, ns2, ns3, a iplr, iqlr, ntest, is, iacel, idgts, nbl1d, nbl2d logical halt, maxadp, minadp, maxadd, minadd common / itcom2 / halt, maxadp, minadp, maxadd, minadd common / itcom3 / alpha, beta, zeta, emax, emin, pap, b alphao, gamma, sigma, rr, rho, dkq, dkm1, b ff, rqmin, rqmax, stptst, udnm, ubarnm, b bnorm, bnorm1 logical omgadp common / itcom5 / omgadp, omega, alphab, betab, fff, specr c c *** end -- package common c c ... initialize common blocks c ier = 0 n = nn t1 = timer (dummy) iacel = 2 timit = 0.0 digit1 = 0.0 digit2 = 0.0 call echall (n,iparm,rparm,1,2,ier) if (ier .lt. 0) go to 35 if (level .ge. 2) write (nout,10) 10 format (1x,'si') c c ... compute workspace base addresses and check for sufficient c ... workspace. c iw1 = 1 iw2 = iw1 + n iw3 = iw2 + n iw4 = iw3 + n nwksp = 4*n if (nw .ge. nwksp) go to 15 ier = -2 call ershow (ier,'siw') go to 30 15 continue call nmcalc (coef,jcoef,wfac,jwfac,1,subq,n,rhs,ubar,wksp,ier) if (ier .lt. 0) go to 30 c c ... compute an initial rayleigh quotient and adjust emax, emin. c call vfill (n,wksp,1.0) call subq (coef,jcoef,wfac,jwfac,n,wksp,wksp(iw2)) call suba (coef,jcoef,wfac,jwfac,n,wksp(iw2),wksp(iw3)) rq = vdot (n,wksp(iw2),wksp(iw3)) / a vdot (n,wksp(iw2),wksp) rqmax = rq rqmin = rq if (maxadd) emax = amax1 (emax,rqmax) if (minadd) emin = amin1 (emin,rqmin) if (minadd) emin = amax1 (emin,0.0) c c ... zero out workspace c call vfill (nwksp,wksp,0.0) c c ... iteration sequence c call itsi (suba,subq,coef,jcoef,wfac,jwfac,n,u,ubar,rhs, a wksp(iw1),wksp(iw2),wksp(iw3),wksp(iw4),ier) c if (ier .lt. 0 .or. ier .eq. 1) go to 25 c c ... method has converged c if (level .ge. 1) write (nout,20) in 20 format (/1x,'si has converged in ',i5,' iterations ') c c ... optional error analysis c 25 if (idgts .lt. 0) go to 30 call perror (suba,coef,jcoef,wfac,jwfac,n,u,rhs,wksp,digit1, a digit2,idgts) c c ... set return parameters in iparm and rparm c 30 t2 = timer (dummy) nw = 4*n timit = t2 - t1 iparm(2) = in rparm(1) = zeta rparm(2) = emax rparm(3) = emin rparm(6) = timit rparm(7) = digit1 rparm(8) = digit2 rparm(9) = omega rparm(10) = alphab rparm(11) = betab rparm(12) = specr c 35 continue if (level .ge. 3) call echall (n,iparm,rparm,2,2,ier) c return end subroutine srcgw (suba,subq,subadp,coef,jcoef,wfac,jwfac, a nn,u,ubar,rhs,wksp,nw,iparm,rparm,ier) c c srcgw drives the ssor conjugate gradient algorithm. c c ... parameters -- c c suba matrix-vector multiplication routine c subq preconditioning routine c subadp adpation routine c n input integer. order of the system (= nn) c u input/output vector. on input, u contains the c initial guess to the solution. on output, it c contains the latest estimate to the solution. c ubar input vector containing the true solution c (optional) c rhs input vector. contains the right hand side c of the matrix problem. c wksp vector used for working space. c nw length of wksp array. if this length is less than c the amount needed, nw will give the needed amount c upon output. c iparm integer vector of length 30. allows user to c specify some integer parameters which affect c the method. c rparm real vector of length 30. allows user to c specify some real parameters which affect c the method. c ier output integer. error flag. c c ... specifications for parameters c external suba, subq, subadp integer iparm(30), jcoef(2), jwfac(1) dimension rhs(1), u(1), ubar(1), wksp(1), rparm(30), coef(1), a wfac(1) c c *** begin -- package common c common / itcom1 / in, itmax, level, nout, ns1, ns2, ns3, a iplr, iqlr, ntest, is, iacel, idgts, nbl1d, nbl2d common / itcom3 / alpha, beta, zeta, emax, emin, pap, b alphao, gamma, sigma, rr, rho, dkq, dkm1, b ff, rqmin, rqmax, stptst, udnm, ubarnm, b bnorm, bnorm1 logical omgadp common / itcom5 / omgadp, omega, alphab, betab, fff, specr c c *** end -- package common c c ... initialize common blocks c ier = 0 n = nn t1 = timer (dummy) iacel = 1 timit = 0.0 digit1 = 0.0 digit2 = 0.0 call echall (n,iparm,rparm,1,2,ier) if (ier .lt. 0) go to 35 if (level .ge. 2) write (nout,10) 10 format (1x,'srcg') c c ... compute workspace base addresses and check for sufficient c ... workspace. c iw1 = 1 iw2 = iw1 + n iw3 = iw2 + n iw4 = iw3 + n nwksp = 3*n + 2*itmax if (nw .ge. nwksp) go to 15 ier = -2 call ershow (ier,'srcgw') go to 30 15 continue c c ... zero out workspace c call vfill (nwksp,wksp,0.0) c c ... iteration sequence c call itsrcg (suba,subq,subadp,coef,jcoef,wfac,jwfac,n,u,ubar, a rhs,wksp(iw1),wksp(iw2),wksp(iw3),wksp(iw4),ier) c if (ier .lt. 0 .or. ier .eq. 1) go to 25 c c ... method has converged c if (level .ge. 1) write (nout,20) in 20 format (/1x,'srcg has converged in ',i5,' iterations' ) c c ... optional error analysis c 25 if (idgts .lt. 0) go to 30 call perror (suba,coef,jcoef,wfac,jwfac,n,u,rhs,wksp,digit1, a digit2,idgts) c c ... set return parameters in iparm and rparm c 30 t2 = timer (dummy) timit = t2 - t1 nw = 3*n + 2*in iparm(2) = in rparm(1) = zeta rparm(2) = emax rparm(3) = emin rparm(6) = timit rparm(7) = digit1 rparm(8) = digit2 rparm(9) = omega rparm(10) = alphab rparm(11) = betab rparm(12) = specr c 35 continue if (level .ge. 3) call echall (n,iparm,rparm,2,2,ier) c return end subroutine srsiw (suba,subq,subadp,coef,jcoef,wfac,jwfac, a nn,u,ubar,rhs,wksp,nw,iparm,rparm,ier) c c srsiw drives the ssor chebyshev acceleration algorithm. c c ... parameters -- c c suba matrix-vector multiplication routine c subq preconditioning routine c subadp adpation routine c n input integer. order of the system (= nn) c u input/output vector. on input, u contains the c initial guess to the solution. on output, it c contains the latest estimate to the solution. c ubar input vector containing the true solution c (optional) c rhs input vector. contains the right hand side c of the matrix problem. c wksp vector used for working space. c nw length of wksp array. if this length is less than c the amount needed, nw will give the needed amount c upon output. c iparm integer vector of length 30. allows user to c specify some integer parameters which affect c the method. c rparm real vector of length 30. allows user to c specify some real parameters which affect c the method. c ier output integer. error flag. c c ... specifications for parameters c external suba, subq, subadp integer iparm(30), jcoef(2), jwfac(1) dimension rhs(1), u(1), ubar(1), wksp(1), rparm(30), coef(1), a wfac(1) c c *** begin -- package common c common / itcom1 / in, itmax, level, nout, ns1, ns2, ns3, a iplr, iqlr, ntest, is, iacel, idgts, nbl1d, nbl2d logical halt, maxadp, minadp, maxadd, minadd common / itcom2 / halt, maxadp, minadp, maxadd, minadd common / itcom3 / alpha, beta, zeta, emax, emin, pap, b alphao, gamma, sigma, rr, rho, dkq, dkm1, b ff, rqmin, rqmax, stptst, udnm, ubarnm, b bnorm, bnorm1 logical omgadp common / itcom5 / omgadp, omega, alphab, betab, fff, specr c c *** end -- package common c c ... initialize common blocks c ier = 0 n = nn t1 = timer (dummy) iacel = 2 timit = 0.0 digit1 = 0.0 digit2 = 0.0 call echall (n,iparm,rparm,1,2,ier) if (ier .lt. 0) go to 35 if (level .ge. 2) write (nout,10) 10 format (1x,'srsi') c c ... compute workspace base addresses and check for sufficient c ... workspace. c iw1 = 1 iw2 = iw1 + n iw3 = iw2 + n iw4 = iw3 + n nwksp = 4*n if (nw .ge. nwksp) go to 15 ier = -2 call ershow (ier,'srsiw') go to 30 15 continue c c ... compute an initial rayleigh quotient and adjust emax, emin. c call vfill (n,wksp,1.0) call subq (coef,jcoef,wfac,jwfac,n,wksp,wksp(iw2)) call suba (coef,jcoef,wfac,jwfac,n,wksp(iw2),wksp(iw3)) rq = vdot (n,wksp(iw2),wksp(iw3)) / a vdot (n,wksp(iw2),wksp) rqmax = 1.0 rqmin = rq c c ... adjust emax, emin. c emax = 1.0 maxadd = .false. if (minadd) emin = amin1 (emin,rqmin) if (minadd) emin = amax1 (emin,0.0) c c ... zero out workspace c call vfill (nwksp,wksp,0.0) c c ... iteration sequence c call itsrsi (suba,subq,subadp,coef,jcoef,wfac,jwfac,n,u,ubar, a rhs,wksp(iw1),wksp(iw2),wksp(iw3),wksp(iw4),ier) c if (ier .lt. 0 .or. ier .eq. 1) go to 25 c c ... method has converged c if (level .ge. 1) write (nout,20) in 20 format (/1x,'srsi has converged in ',i5,' iterations ') c c ... optional error analysis c 25 if (idgts .lt. 0) go to 30 call perror (suba,coef,jcoef,wfac,jwfac,n,u,rhs,wksp,digit1, a digit2,idgts) c c ... set return parameters in iparm and rparm c 30 t2 = timer (dummy) timit = t2 - t1 nw = 4*n iparm(2) = in rparm(1) = zeta rparm(2) = emax rparm(3) = emin rparm(6) = timit rparm(7) = digit1 rparm(8) = digit2 rparm(9) = omega rparm(10) = alphab rparm(11) = betab rparm(12) = specr c 35 continue if (level .ge. 3) call echall (n,iparm,rparm,2,2,ier) c return end subroutine sorw (suba,subq,coef,jcoef,wfac,jwfac,nn, a u,ubar,rhs,wksp,nw,iparm,rparm,ier) c c sorw drives the successive over-relaxation algorithm. c c ... parameters -- c c suba matrix-vector multiplication routine c subq routine to do an sor pass c n input integer. order of the system (= nn) c u input/output vector. on input, u contains the c initial guess to the solution. on output, it c contains the latest estimate to the solution. c ubar input vector containing the true solution c (optional) c rhs input vector. contains the right hand side c of the matrix problem. c wksp vector used for working space. c nw length of wksp array. if this length is less than c the amount needed, nw will give the needed amount c upon output. c iparm integer vector of length 30. allows user to c specify some integer parameters which affect c the method. c rparm real vector of length 30. allows user to c specify some real parameters which affect c the method. c ier output integer. error flag. c c ... specifications for parameters c external suba, subq integer iparm(30), jcoef(2), jwfac(1) dimension rhs(1), u(1), ubar(1), wksp(1), rparm(30), coef(1), a wfac(1) c c *** begin -- package common c common / itcom1 / in, itmax, level, nout, ns1, ns2, ns3, a iplr, iqlr, ntest, is, iacel, idgts, nbl1d, nbl2d common / itcom3 / alpha, beta, zeta, emax, emin, pap, b alphao, gamma, sigma, rr, rho, dkq, dkm1, b ff, rqmin, rqmax, stptst, udnm, ubarnm, b bnorm, bnorm1 logical omgadp common / itcom5 / omgadp, omega, alphab, betab, fff, specr c c *** end -- package common c c ... initialize common blocks c ier = 0 n = nn t1 = timer (dummy) iacel = 3 timit = 0.0 digit1 = 0.0 digit2 = 0.0 call echall (n,iparm,rparm,1,2,ier) if (ier .lt. 0) go to 35 if (level .ge. 2) write (nout,10) 10 format (1x,'sor') c c ... compute workspace base addresses and check for sufficient c ... workspace. c nwksp = 2*n if (nw .ge. nwksp) go to 15 ier = -2 call ershow (ier,'sorw') go to 30 c c ... zero out workspace c 15 call vfill (nwksp,wksp,0.0) c c ... iteration sequence c call itsor (subq,coef,jcoef,wfac,jwfac,n,u,ubar,rhs,wksp,ier) c if (ier .lt. 0 .or. ier .eq. 1) go to 25 c c ... method has converged c if (level .ge. 1) write (nout,20) in 20 format (/1x,'sor has converged in ',i5,' iterations' ) c c ... optional error analysis c 25 if (idgts .lt. 0) go to 30 call perror (suba,coef,jcoef,wfac,jwfac,n,u,rhs,wksp,digit1, a digit2,idgts) c c ... set return parameters in iparm and rparm c 30 t2 = timer (dummy) timit = t2 - t1 nw = 2*n iparm(2) = in rparm(1) = zeta rparm(2) = emax rparm(3) = emin rparm(6) = timit rparm(7) = digit1 rparm(8) = digit2 rparm(9) = omega rparm(10) = alphab rparm(11) = betab rparm(12) = specr c 35 continue if (level .ge. 3) call echall (n,iparm,rparm,2,2,ier) c return end subroutine itcg (suba,subq,coef,jcoef,wfac,jwfac,nn,u,ubar, a rhs,r,p,z,tri,ier) c c itcg does the conjugate gradient iterations. c c ... parameters -- c c suba matrix-vector multiplication routine c subq preconditioning routine c n order of system (= nn) c u current solution c ubar known solution (optional) c rhs right hand side vector c r,p,z workspace vectors of length n each c tri tridiagonal matrix associated with the c eigenvalues of the tridiagonal matrix. c ier error code c c ... specifications for parameters c external suba, subq integer jcoef(2), jwfac(1) dimension coef(1), wfac(1) dimension u(1), ubar(1), rhs(1), r(1), p(1), z(1), tri(1) c c *** begin -- package common c common / itcom1 / in, itmax, level, nout, ns1, ns2, ns3, a iplr, iqlr, ntest, is, iacel, idgts, nbl1d, nbl2d logical halt, maxadp, minadp, maxadd, minadd common / itcom2 / halt, maxadp, minadp, maxadd, minadd common / itcom3 / alpha, beta, zeta, emax, emin, pap, b alphao, gamma, sigma, rr, rho, dkq, dkm1, b ff, rqmin, rqmax, stptst, udnm, ubarnm, b bnorm, bnorm1 common / itcom9 / rhave, zhave, zthave, rcalp, zcalp, ztcalp, a udhav, rdhav, rzhav, rzthav, zdhav, zzthav, ztdhav, a rdot, rzdot, rztdot, zdot, zztdot, ztdot logical rhave, zhave, zthave, rcalp, zcalp, ztcalp, a udhav, rdhav, rzhav, rzthav, zdhav, zzthav, ztdhav c c *** end -- package common c n = nn in = 0 is = 0 rzdot = 0.0 alpha = 0.0 beta = 0.0 alphao = 0.0 maxadp = maxadd minadp = minadd c c compute r = residual c call suba (coef,jcoef,wfac,jwfac,n,u,r) do 10 i = 1,n 10 r(i) = rhs(i) - r(i) go to 25 c c***** begin iteration loop ***** c 15 do 20 i = 1,n 20 r(i) = r(i) - alpha*z(i) c c ... do preconditioning step -- solve q*z = r for z. c 25 call subq (coef,jcoef,wfac,jwfac,n,r,z) c c ... compute rzdot = (r,z) c dkm1 = rzdot rzdot = 0.0 do 30 i = 1,n 30 rzdot = rzdot + r(i)*z(i) if (rzdot .gt. 0.0) go to 35 ier = -7 call ershow (ier,'itcg') return c c ... determine whether or not to stop. c 35 call pstops (n,r,z,u,ubar,ier) if (level .ge. 2) call iterm (n,u) if (halt .or. ier .lt. 0) return if (in .lt. itmax) go to 40 ier = 1 call ershow (ier,'itcg') zeta = stptst return c c ... compute beta = rzdot/dkm1 c 40 if (in .eq. 0) go to 45 beta = rzdot/dkm1 c c ... compute p = z + beta*p c 45 do 50 i = 1,n 50 p(i) = z(i) + beta*p(i) c c ... compute alpha = rzdot / (p,a*p) c call suba (coef,jcoef,wfac,jwfac,n,p,z) alphao = alpha pap = 0.0 do 55 i = 1,n 55 pap = pap + p(i)*z(i) alpha = rzdot / pap if (pap .gt. 0.0) go to 60 ier = -6 call ershow (ier,'itcg') return c c ... compute latest eigenvalue estimates. c 60 if (maxadp .or. minadp) call chgcon (tri,ier) c c ... compute new solution u = u + alpha*p c do 65 i = 1,n 65 u(i) = u(i) + alpha*p(i) in = in + 1 is = is + 1 go to 15 end subroutine itsi (suba,subq,coef,jcoef,wfac,jwfac,nn,u,ubar, a rhs,r,p,z,wksp,ier) c c itsi does the semi-iterative iterations. c c ... parameters -- c c suba matrix-vector multiplication routine c subq preconditioning routine c n order of system (= nn) c u current solution c ubar known solution (optional) c rhs right hand side vector c r,p,z, workspace vectors of length n each c wksp volatile workspace c ier error code c c ... specifications for parameters c external suba, subq integer jcoef(2), jwfac(1) dimension coef(1), wfac(1) dimension u(1), ubar(1), rhs(1), r(1), p(1), z(1), wksp(1) c c *** begin -- package common c common / itcom1 / in, itmax, level, nout, ns1, ns2, ns3, a iplr, iqlr, ntest, is, iacel, idgts, nbl1d, nbl2d logical halt, maxadp, minadp, maxadd, minadd common / itcom2 / halt, maxadp, minadp, maxadd, minadd common / itcom3 / alpha, beta, zeta, emax, emin, pap, b alphao, gamma, sigma, rr, rho, dkq, dkm1, b ff, rqmin, rqmax, stptst, udnm, ubarnm, b bnorm, bnorm1 common / itcom9 / rhave, zhave, zthave, rcalp, zcalp, ztcalp, a udhav, rdhav, rzhav, rzthav, zdhav, zzthav, ztdhav, a rdot, rzdot, rztdot, zdot, zztdot, ztdot logical rhave, zhave, zthave, rcalp, zcalp, ztcalp, a udhav, rdhav, rzhav, rzthav, zdhav, zzthav, ztdhav c c *** end -- package common c n = nn in = 0 c c ... new chebychev sequence. c 10 is = 0 alpha = 0.0 beta = 0.0 rho = 1.0 rzdot = 0.0 gamma = 2.0/(emax + emin) sigma = (emax - emin)/(emax + emin) term = sqrt (1.0 - sigma*sigma) rr = (1.0 - term)/(1.0 + term) maxadp = maxadd minadp = minadd c c compute r = residual c call suba (coef,jcoef,wfac,jwfac,n,u,r) do 15 i = 1,n 15 r(i) = rhs(i) - r(i) go to 30 c c***** begin iteration loop ***** c 20 do 25 i = 1,n 25 r(i) = r(i) - alpha*z(i) c c ... do preconditioning step -- solve q*z = r for z. c 30 call subq (coef,jcoef,wfac,jwfac,n,r,z) c c ... compute rzdot = (r,z) c dkm1 = rzdot rzdot = 0.0 do 35 i = 1,n 35 rzdot = rzdot + r(i)*z(i) if (is .eq. 0) dkq = rzdot if (rzdot .ge. 0.0) go to 40 ier = -7 call ershow (ier,'itsi') return c c ... determine whether or not to stop. c 40 call pstops (n,r,z,u,ubar,ier) if (level .ge. 2) call iterm (n,u) if (halt .or. ier .lt. 0) return if (in .lt. itmax) go to 45 ier = 1 call ershow (ier,'itsi') zeta = stptst return c c ... compute iteration parameters. c 45 call parsi c c ... compute p = z + beta*p c ... u = u + alpha*p c do 50 i = 1,n p(i) = z(i) + beta*p(i) u(i) = u(i) + alpha*p(i) 50 continue c c ... adapt on emin and emax c in = in + 1 if (.not. maxadp .and. .not. minadp) go to 55 call chgsi (suba,coef,jcoef,wfac,jwfac,n,z,wksp,icode,ier) if (ier .lt. 0) return c c ... check if new estimates of emax, emin are to be used. c if (icode .eq. 1) go to 10 c c ... estimates of emax, emin are still good. c 55 is = is + 1 call suba (coef,jcoef,wfac,jwfac,n,p,z) go to 20 end subroutine itsrcg (suba,subq,subadp,coef,jcoef,wfac,jwfac, a nn,u,ubar,rhs,r,p,z,tri,ier) c c itsrcg does the ssor conjugate gradient iterations. c c ... parameters -- c c suba matrix-vector multiplication routine c subq preconditioning routine c subadp adpation routine c n order of system (= nn) c u current solution c ubar known solution (optional) c rhs right hand side vector c r,p,z workspace vectors of length n each c tri tridiagonal matrix associated with the c eigenvalues of the tridiagonal matrix. c ier error code c c ... specifications for parameters c external suba, subq, subadp integer jcoef(2), jwfac(1) dimension coef(1), wfac(1) dimension u(1), ubar(1), rhs(1), r(1), p(1), z(1), tri(1) c c *** begin -- package common c common / itcom1 / in, itmax, level, nout, ns1, ns2, ns3, a iplr, iqlr, ntest, is, iacel, idgts, nbl1d, nbl2d logical halt, maxadp, minadp, maxadd, minadd common / itcom2 / halt, maxadp, minadp, maxadd, minadd common / itcom3 / alpha, beta, zeta, emax, emin, pap, b alphao, gamma, sigma, rr, rho, dkq, dkm1, b ff, rqmin, rqmax, stptst, udnm, ubarnm, b bnorm, bnorm1 common / itcom9 / rhave, zhave, zthave, rcalp, zcalp, ztcalp, a udhav, rdhav, rzhav, rzthav, zdhav, zzthav, ztdhav, a rdot, rzdot, rztdot, zdot, zztdot, ztdot logical rhave, zhave, zthave, rcalp, zcalp, ztcalp, a udhav, rdhav, rzhav, rzthav, zdhav, zzthav, ztdhav c c *** end -- package common c n = nn in = 0 isw = 1 5 is = 0 rzdot = 0.0 alpha = 0.0 beta = 0.0 alphao = 0.0 maxadp = maxadd minadp = minadd c c recompute bnorm c call nmcalc (coef,jcoef,wfac,jwfac,isw,subq,n,rhs,ubar,r,ier) if (ier .lt. 0) return isw = 2 c c compute r = residual c call suba (coef,jcoef,wfac,jwfac,n,u,r) do 10 i = 1,n 10 r(i) = rhs(i) - r(i) go to 25 c c***** begin iteration loop ***** c 15 do 20 i = 1,n 20 r(i) = r(i) - alpha*z(i) c c ... do preconditioning step -- solve q*z = r for z. c 25 call subq (coef,jcoef,wfac,jwfac,n,r,z) c c ... compute rzdot = (r,z) c dkm1 = rzdot rzdot = 0.0 do 30 i = 1,n 30 rzdot = rzdot + r(i)*z(i) if (rzdot .ge. 0.0) go to 35 ier = -7 call ershow (ier,'itsrcg') return c c ... determine whether or not to stop. c 35 call pstops (n,r,z,u,ubar,ier) if (level .ge. 2) call iterm (n,u) if (halt .or. ier .lt. 0) return if (in .lt. itmax) go to 40 ier = 1 call ershow (ier,'itsrcg') zeta = stptst return c c ... compute beta = rzdot/dkm1 c 40 if (is .eq. 0) go to 45 beta = rzdot/dkm1 c c ... compute p = z + beta*p c 45 do 50 i = 1,n 50 p(i) = z(i) + beta*p(i) c c ... compute alpha = rzdot / (p,a*p) c call suba (coef,jcoef,wfac,jwfac,n,p,z) alphao = alpha pap = 0.0 do 55 i = 1,n 55 pap = pap + p(i)*z(i) alpha = rzdot / pap if (pap .ge. 0.0) go to 60 ier = -6 call ershow (ier,'itsrcg') return c c ... compute latest eigenvalue estimates. c 60 if (minadp) call chgcon (tri,ier) c c ... compute new solution u = u + alpha*p c do 65 i = 1,n 65 u(i) = u(i) + alpha*p(i) is = is + 1 in = in + 1 call ssorad (subadp,coef,jcoef,wfac,jwfac,n,p,z,r,icode) if (icode .eq. 0) go to 15 go to 5 end subroutine itsrsi (suba,subq,subadp,coef,jcoef,wfac,jwfac, a nn,u,ubar,rhs,r,p,z,wksp,ier) c c itsrsi does the ssor semi-iterative iterations. c c ... parameters -- c c suba matrix-vector multiplication routine c subq preconditioning routine c subadp adpation routine c n order of system (= nn) c u current solution c ubar known solution (optional) c rhs right hand side vector c r,p,z, workspace vectors of length n each c wksp volatile workspace c ier error code c c ... specifications for parameters c external suba, subq, subadp integer jcoef(2), jwfac(1) dimension coef(1), wfac(1) dimension u(1), ubar(1), rhs(1), r(1), p(1), z(1), wksp(1) c c *** begin -- package common c common / itcom1 / in, itmax, level, nout, ns1, ns2, ns3, a iplr, iqlr, ntest, is, iacel, idgts, nbl1d, nbl2d logical halt, maxadp, minadp, maxadd, minadd common / itcom2 / halt, maxadp, minadp, maxadd, minadd common / itcom3 / alpha, beta, zeta, emax, emin, pap, b alphao, gamma, sigma, rr, rho, dkq, dkm1, b ff, rqmin, rqmax, stptst, udnm, ubarnm, b bnorm, bnorm1 common / itcom9 / rhave, zhave, zthave, rcalp, zcalp, ztcalp, a udhav, rdhav, rzhav, rzthav, zdhav, zzthav, ztdhav, a rdot, rzdot, rztdot, zdot, zztdot, ztdot logical rhave, zhave, zthave, rcalp, zcalp, ztcalp, a udhav, rdhav, rzhav, rzthav, zdhav, zzthav, ztdhav c c *** end -- package common c n = nn c in = 0 isw = 1 c c recompute bnorm c 5 call nmcalc (coef,jcoef,wfac,jwfac,isw,subq,n,rhs,ubar,r,ier) if (ier .lt. 0) return isw = 2 c c ... update rayleigh quotient . c if (in .eq. 0) go to 10 call subq (coef,jcoef,wfac,jwfac,n,p,z) call suba (coef,jcoef,wfac,jwfac,n,z,r) rq = vdot (n,z,r) / vdot (n,z,p) rqmin = rq if (minadd) emin = rqmin c c ... new chebychev sequence. c 10 is = 0 alpha = 0.0 beta = 0.0 rho = 1.0 rzdot = 0.0 gamma = 2.0/(emax + emin) sigma = (emax - emin)/(emax + emin) term = sqrt (1.0 - sigma*sigma) rr = (1.0 - term)/(1.0 + term) minadp = minadd c c compute r = residual c call suba (coef,jcoef,wfac,jwfac,n,u,r) do 15 i = 1,n 15 r(i) = rhs(i) - r(i) go to 30 c c***** begin iteration loop ***** c 20 do 25 i = 1,n 25 r(i) = r(i) - alpha*z(i) c c ... do preconditioning step -- solve q*z = r for z. c 30 call subq (coef,jcoef,wfac,jwfac,n,r,z) c c ... compute rzdot = (r,z) c dkm1 = rzdot rzdot = 0.0 do 35 i = 1,n 35 rzdot = rzdot + r(i)*z(i) if (is .eq. 0) dkq = rzdot if (rzdot .ge. 0.0) go to 40 ier = -7 call ershow (ier,'itsrsi') return c c ... determine whether or not to stop. c 40 call pstops (n,r,z,u,ubar,ier) if (level .ge. 2) call iterm (n,u) if (halt .or. ier .lt. 0) return if (in .lt. itmax) go to 45 ier = 1 call ershow (ier,'itsrsi') zeta = stptst return c c ... compute iteration parameters. c 45 call parsi c c ... compute p = z + beta*p c ... u = u + alpha*p c do 50 i = 1,n p(i) = z(i) + beta*p(i) u(i) = u(i) + alpha*p(i) 50 continue c c ... adapt on emin and emax c in = in + 1 if (.not. minadp) go to 55 call chgsi (suba,coef,jcoef,wfac,jwfac,n,z,wksp,icode,ier) if (ier .lt. 0) return c c ... check if new estimates of emax, emin are to be used. c if (icode .eq. 1) go to 10 c c ... estimates of emax, emin are still good. c 55 is = is + 1 call suba (coef,jcoef,wfac,jwfac,n,p,z) call ssorad (subadp,coef,jcoef,wfac,jwfac,n,p,z,r,icode) if (icode .eq. 0) go to 20 go to 5 end subroutine itsor (subq,coef,jcoef,wfac,jwfac,nn,u,ubar, a rhs,wksp,ier) c c ... itsor does the sor iterations c c ... parameters -- c c subq routine to do an sor pass c n size of system c rhs right hand side c u solution vector c ubar known solution (optional) c wksp workspace vector of length 2*n c c ... specifications for parameters c integer jcoef(2), jwfac(1) dimension coef(1), wfac(1) dimension rhs(1), u(1), ubar(1), wksp(1) external subq logical change c c *** begin -- itpack common c common / itcom1 / in, itmax, level, nout, ns1, ns2, ns3, a iplr, iqlr, ntest, is, iacel, idgts, nbl1d, nbl2d logical halt, maxadp, minadp, maxadd, minadd common / itcom2 / halt, maxadp, minadp, maxadd, minadd common / itcom3 / alpha, beta, zeta, emax, emin, pap, b alphao, gamma, sigma, rr, rho, dkq, dkm1, b ff, rqmin, rqmax, stptst, udnm, ubarnm, b bnorm, bnorm1 logical omgadp common / itcom5 / omgadp, omega, alphab, betab, fff, specr c c *** end -- itpack common c c c ... set initial parameters not already set c n = nn in = 0 is = 0 ip = 0 iss = 0 iphat = 2 delnnm = 0.0 delsnm = 0.0 call sorstp (n,u,ubar,0.0,0.0) change = omgadp ib2 = n + 1 if (.not. omgadp) go to 10 omegap = omega omega = 1.0 ipstar = 4 if (omegap .le. 1.0) change = .false. c c ... start iterating. c 10 do 55 iter = 1,itmax+1 c c ... output intermediate information c if (level .ge. 2) call iterm (n,u) if (halt) return if (.not. change) go to 15 change = .false. is = is + 1 ip = 0 iss = 0 omega = amin1 (omegap,tau(is)) iphat = max0 ( 3 , int ( (omega-1.0)/(2.0-omega) ) ) ipstar = ipstr (omega) c c ... compute u (in + 1) and norm of del(s,p) c 15 delsnm = delnnm spcrm1 = specr do 20 i = 1,n 20 wksp(i) = rhs(i) call subq (coef,jcoef,wfac,jwfac,n,u,wksp,wksp(ib2)) do 25 i = 1,n 25 wksp(i) = u(i) - wksp(n+i) sum = 0.0 do 28 i = 1,n 28 sum = sum + wksp(i)*wksp(i) delnnm = sqrt (sum) do 30 i = 1,n 30 u(i) = wksp(i+n) if (delnnm .eq. 0.0) go to 35 if (in .ne. 0) specr = delnnm / delsnm if (ip .lt. iphat) go to 50 c c ... stopping test, set h c if (specr .ge. 1.0) go to 50 if (.not. (specr .gt. (omega - 1.0))) go to 35 h = specr go to 40 35 iss = iss + 1 h = omega - 1.0 c c ... perform stopping test. c 40 continue dnrm = delnnm**2 call sorstp (n,u,ubar,dnrm,h) if (halt) go to 50 c c ... method has not converged yet, test for changing omega c if (.not. omgadp) go to 50 if (ip .lt. ipstar) go to 50 if (omega .gt. 1.0) go to 45 emax = sqrt (abs (specr)) omegap = 2.0 / (1.0 + sqrt (abs (1.0 - specr))) change = .true. go to 50 45 if (iss .ne. 0) go to 50 if (specr .le. (omega - 1.0)**fff) go to 50 if ((specr + 0.00005) .le. spcrm1) go to 50 c c ... change parameters c emax = (specr + omega - 1.0) / (sqrt (abs (specr))*omega) omegap = 2.0 / (1.0 + sqrt (abs (1.0 - emax*emax))) change = .true. c 50 ip = ip + 1 in = in + 1 55 continue ier = 1 in = in - 1 call ershow (ier,'itsor') zeta = stptst return end subroutine basicw (suba,subql,subqr, a coef,jcoef,wfac,jwfac,n,u,ubar,rhs,wk,nw, a iparm,rparm,ier) c c code to run the basic (unaccelerated) iterative method, c with preconditioning. that is, it applies the fixed point method c to the preconditioned system. c two-sided preconditioning is efficiently implemented. c dimension u(1), ubar(1), rhs(1), wk(1), coef(1), jcoef(2), a wfac(1), jwfac(1) logical iql, iqr external suba, subql, subqr dimension iparm(30), rparm(30) c c *** begin -- package common c common / itcom1 / in, itmax, level, nout, ns1, ns2, ns3, a iplr, iqlr, ntest, is, iacel, idgts, nbl1d, nbl2d logical halt, maxadp, minadp, maxadd, minadd common / itcom2 / halt, maxadp, minadp, maxadd, minadd common / itcom3 / alpha, beta, zeta, emax, emin, pap, b alphao, gamma, sigma, rr, rho, dkq, dkm1, b ff, rqmin, rqmax, stptst, udnm, ubarnm, b bnorm, bnorm1 common / itcom4 / keygs, srelpr, keyzer common / itcom9 / rhave, zhave, zthave, rcalp, zcalp, ztcalp, a udhav, rdhav, rzhav, rzthav, zdhav, zzthav, ztdhav, a rdot, rzdot, rztdot, zdot, zztdot, ztdot logical rhave, zhave, zthave, rcalp, zcalp, ztcalp, a udhav, rdhav, rzhav, rzthav, zdhav, zzthav, ztdhav c c *** end -- package common c c preliminary calculations c iacel = 0 ier = 0 nwusd = 0 t1 = timer (dummy) call echall (n,iparm,rparm,1,2,ier) if (ier .lt. 0) go to 996 if (level .ge. 2) write (nout,496) 496 format (' basic') c use knowledge about spectrum to optimally extrapolate ... extrap = (emax+emin)/2.0 iql = iqlr .eq. 1 .or. iqlr .eq. 3 iqr = iqlr .eq. 2 .or. iqlr .eq. 3 c c initialize the stopping test ... c call inithv (0) zthave = .true. nwpstp = nw call pstop (0,suba,subql,subqr, a coef,jcoef,wfac,jwfac,n,u,ubar,rhs,xxx,xxx,xxx, a wk,nwpstp,ier) nwusd = max0(nwusd,nwpstp) if (ier .lt. 0) go to 735 c c bust up workspace ... c izt = 1 iv1 = izt + n iwfree = iv1 + n if (iqlr .eq. 0) iwfree = iv1 nwusd = max0(nwusd,iwfree-1) c c check the memory usage ... c if (nwusd .gt. nw) go to 999 c c do preliminary calculations ... c in = 0 is = 0 go to (151,152,153,154),iqlr + 1 c 151 call suba (coef,jcoef,wfac,jwfac,n,u,wk(izt)) call vexopy (n,wk(izt),rhs,wk(izt),2) go to 10 c 152 call suba (coef,jcoef,wfac,jwfac,n,u,wk(iv1)) call vexopy (n,wk(iv1),rhs,wk(iv1),2) call subql (coef,jcoef,wfac,jwfac,n,wk(iv1),wk(izt)) go to 10 c 153 call suba (coef,jcoef,wfac,jwfac,n,u,wk(iv1)) call vexopy (n,wk(iv1),rhs,wk(iv1),2) call subqr (coef,jcoef,wfac,jwfac,n,wk(iv1),wk(izt)) go to 10 c 154 call suba (coef,jcoef,wfac,jwfac,n,u,wk(izt)) call vexopy (n,wk(izt),rhs,wk(izt),2) call subql (coef,jcoef,wfac,jwfac,n,wk(izt),wk(iv1)) call subqr (coef,jcoef,wfac,jwfac,n,wk(iv1),wk(izt)) go to 10 c c-----------------------begin iteration loop---------------------- c c determine whether or not to stop -- c 10 call inithv (1) nwpstp = nw - (iwfree-1) call pstop (1,suba,subql,subqr, a coef,jcoef,wfac,jwfac,n,u,ubar,rhs,xxx,xxx,wk(izt), a wk(iwfree),nwpstp,ier) nwusd = max0(nwusd,nwpstp+iwfree-1) if (level .ge. 2) call iterm (n,u) if (halt .or. in .ge. itmax .or. ier .lt. 0) go to 900 c c form iterate ... c call vtriad (n,u,u,1e0/extrap,wk(izt),1) c c form residuals, as necessary ... c go to (161,162,163,164),iqlr + 1 c 161 call suba (coef,jcoef,wfac,jwfac,n,u,wk(izt)) call vexopy (n,wk(izt),rhs,wk(izt),2) go to 110 c 162 call suba (coef,jcoef,wfac,jwfac,n,u,wk(iv1)) call vexopy (n,wk(iv1),rhs,wk(iv1),2) call subql (coef,jcoef,wfac,jwfac,n,wk(iv1),wk(izt)) go to 110 c 163 call suba (coef,jcoef,wfac,jwfac,n,u,wk(iv1)) call vexopy (n,wk(iv1),rhs,wk(iv1),2) call subqr (coef,jcoef,wfac,jwfac,n,wk(iv1),wk(izt)) go to 110 c 164 call suba (coef,jcoef,wfac,jwfac,n,u,wk(izt)) call vexopy (n,wk(izt),rhs,wk(izt),2) call subql (coef,jcoef,wfac,jwfac,n,wk(izt),wk(iv1)) call subqr (coef,jcoef,wfac,jwfac,n,wk(iv1),wk(izt)) go to 110 c c proceed to next iteration c 110 in = in + 1 is = is + 1 go to 10 c c--------------------------------finish up------------------------- c 900 if (halt) go to 715 ier = 1 call ershow (ier,'basicw') zeta = stptst go to 725 715 continue if (level .ge. 1) write (nout,720) in 720 format (/' basic method converged in ',i5,' iterations.') c 725 continue if (idgts .lt. 0) go to 730 call perror (suba,coef,jcoef,wfac,jwfac,n,u,rhs,wk, a digit1,digit2,idgts) 730 t2 = timer (dummy) timit = t2 - t1 iparm(2) = in rparm(1) = zeta rparm(2) = emax rparm(3) = emin rparm(6) = timit rparm(7) = digit1 rparm(8) = digit2 735 continue if (level .ge. 3) call echall (n,iparm,rparm,2,2,ier) nw = nwusd return c c error returns c 996 call ershow (ier,'basicw') go to 735 c c insuff. real wksp ... 999 ier = -2 call ershow (ier,'basicw') go to 735 c end subroutine mew (suba,subql,subqr, a coef,jcoef,wfac,jwfac,n,u,ubar,rhs,wk,nw, a iparm,rparm,ier) c c this routine runs the minimal error algorithm of fridman. c the reference is: v. m. fridman, "the method of minimum iterations c ...", ussr computational math. and math. phys., vol. 2, 1962, c pp. 362-3. c c two-sided preconditioning is implemented. the iteration matrix c should be symmetric for this algorithm to work. c c we have introduced periodic scaling of the direction vectors, to c prevent overflow. c dimension u(1), ubar(1), rhs(1), wk(1), coef(1), jcoef(2), a wfac(1), jwfac(1) external suba, subql, subqr dimension iparm(30), rparm(30) logical iql, iqr c c *** begin -- package common c common / itcom1 / in, itmax, level, nout, ns1, ns2, ns3, a iplr, iqlr, ntest, is, iacel, idgts, nbl1d, nbl2d logical halt, maxadp, minadp, maxadd, minadd common / itcom2 / halt, maxadp, minadp, maxadd, minadd common / itcom3 / alpha, beta, zeta, emax, emin, pap, b alphao, gamma, sigma, rr, rho, dkq, dkm1, b ff, rqmin, rqmax, stptst, udnm, ubarnm, b bnorm, bnorm1 common / itcom4 / keygs, srelpr, keyzer common / itcom9 / rhave, zhave, zthave, rcalp, zcalp, ztcalp, a udhav, rdhav, rzhav, rzthav, zdhav, zzthav, ztdhav, a rdot, rzdot, rztdot, zdot, zztdot, ztdot logical rhave, zhave, zthave, rcalp, zcalp, ztcalp, a udhav, rdhav, rzhav, rzthav, zdhav, zzthav, ztdhav c c *** end -- package common c c the following indexing functions are used to access the old c direction vectors -- c indp(i) = ip + mod(i,2)*n indpt(i) = ipt + mod(i,2)*n c c various preliminary calculations. c dot = 0e0 nwusd = 0 ier = 0 iacel = 4 t1 = timer (dummy) call echall (n,iparm,rparm,1,2,ier) if (ier .lt. 0) go to 997 if (level .ge. 2) write (nout,496) 496 format (' me') iql = iqlr .eq. 1 .or. iqlr .eq. 3 iqr = iqlr .eq. 2 .or. iqlr .eq. 3 c c initialize the stopping test ... c call inithv (0) zhave = .true. nwpstp = nw call pstop (0,suba,subql,subqr, a coef,jcoef,wfac,jwfac,n,u,ubar,rhs,xxx,xxx,xxx, a wk,nwpstp,ier) nwusd = max0(nwusd,nwpstp) if (ier .lt. 0) go to 730 c c memory allocation, etc. c c nomenclature -- r -- residual of the original system. c z -- inv(ql)*r c zt -- inv(qr)*z c ip = 1 ipt = ip + 2*n iz = ipt + 2*n ir = iz + n iv1 = ir + n if (.not. rcalp) iv1 = ir izt = iv1 + n iv2 = izt + n if (.not. ztcalp) iv2 = izt iqlap = iv1 iqrlap = iv2 iwfree = iv2 + n c c note that memory usage has been overlapped whenever possible, c in order to save space. c nwusd = max0(nwusd,iwfree-1) c c check the memory usage -- c if (nwusd .gt. nw) go to 999 c in = 0 is = 0 rhave = rcalp zthave = ztcalp c c perform first-iterate calculations c call suba (coef,jcoef,wfac,jwfac,n,u,wk(ir)) call vexopy (n,wk(ir),rhs,wk(ir),2) call subql (coef,jcoef,wfac,jwfac,n,wk(ir),wk(iz)) call subqr (coef,jcoef,wfac,jwfac,n,wk(iz),wk(izt)) c c---------------------------- begin iteration loop ---------------------------- c c determine whether or not to stop -- c note that we have already done the calculations necessary so that suba c and subql are not actually used by pstop. c 10 call inithv (1) nwpstp = nw - (iwfree-1) call pstop (1,suba,subql,subqr, a coef,jcoef,wfac,jwfac,n,u,ubar,rhs,wk(ir),wk(iz),wk(izt), a wk(iwfree),nwpstp,ier) nwusd = max0(nwusd,nwpstp+iwfree-1) if (level .ge. 2) call iterm (n,u) if (halt .or. in .ge. itmax .or. ier .lt. 0) go to 900 c c compute p(n), the direction vector, and inv(qr)*p(n) (=pt(n)). c scal = 1e0 c c first, case of in .eq. 0 c if (in .ne. 0) go to 100 toplam = vdot (n,wk(iz),wk(iz)) call suba (coef,jcoef,wfac,jwfac,n,wk(izt),wk(iv1)) call subql (coef,jcoef,wfac,jwfac,n,wk(iv1),wk(indp(in))) call subqr (coef,jcoef,wfac,jwfac,n,wk(indp(in)),wk(indpt(in))) go to 120 c c case in .gt. 0 c 100 toplam = vdot (n,wk(indp(in-1)),wk(iz)) bet1 = - vdot (n,wk(indp(in-1)),wk(iqlap)) / dot if (in .ne. 1) go to 110 c c case in .eq. 1 c call vtriad (n,wk(indp(in)),wk(iqlap),bet1,wk(indp(in-1)),1) call vtriad (n,wk(indpt(in)),wk(iqrlap),bet1,wk(indpt(in-1)),1) go to 120 c c case in .gt. 1 c 110 bet2 = - vdot (n,wk(indp(in-2)),wk(iqlap)) / dotold call vtriad (n,wk(indp(in)), wk(iqlap), bet2,wk(indp(in-2)), 1) call vtriad (n,wk(indpt(in)),wk(iqrlap),bet2,wk(indpt(in-2)),1) call vtriad (n,wk(indp(in)), wk(indp(in)), bet1,wk(indp(in-1)), 1) call vtriad (n,wk(indpt(in)),wk(indpt(in)),bet1,wk(indpt(in-1)),1) c c at this point, we are finished forming the latest direction vector. c we proceed to calculate lambda and update the solution and the c residual. c 120 dotold = dot dot = vdot (n,wk(indp(in)),wk(indp(in))) c if (dot .lt. srelpr) go to 998 c c scale direction vector if necessary ... if (dot.lt.srelpr**2 .or. dot.gt.(1e0/srelpr**2)) then scal = sqrt(dot) call vtriad (n,wk(indp(in)), xxx,1e0/scal,wk(indp(in)), 2) call vtriad (n,wk(indpt(in)),xxx,1e0/scal,wk(indpt(in)),2) dot = 1e0 end if c 124 vlamda = toplam / dot / scal c c u -- c call vtriad (n,u,u,vlamda,wk(indpt(in)),1) c c r -- c call suba (coef,jcoef,wfac,jwfac,n,wk(indpt(in)),wk(iv2)) if (rhave) call vtriad (n,wk(ir),wk(ir),-vlamda,wk(iv2),1) c c z -- c call subql (coef,jcoef,wfac,jwfac,n,wk(iv2),wk(iqlap)) call vtriad (n,wk(iz),wk(iz),-vlamda,wk(iqlap),1) c c zt -- c call subqr (coef,jcoef,wfac,jwfac,n,wk(iqlap),wk(iqrlap)) if (zthave) call vtriad (n,wk(izt),wk(izt),-vlamda,wk(iqrlap),1) c c proceed to next iteration c in = in + 1 is = is + 1 go to 10 c c-------------------------------finish up----------------------------- c 900 if (halt) go to 715 ier = 1 call ershow (ier,'mew') zeta = stptst go to 725 715 continue if (level .ge. 1) write (nout,720) in 720 format (/' me converged in ',i5,' iterations.') c 725 continue if (idgts .lt. 0) go to 730 call perror (suba,coef,jcoef,wfac,jwfac,n,u,rhs,wk,digit1, a digit2,idgts) 730 t2 = timer (dummy) timit = t2 - t1 iparm(2) = in rparm(1) = zeta rparm(2) = emax rparm(3) = emin rparm(6) = timit rparm(7) = digit1 rparm(8) = digit2 735 continue if (level .ge. 3) call echall (n,iparm,rparm,2,2,ier) nw = nwusd return c c error returns c 997 call ershow (ier,'mew') go to 735 c 998 ier = -15 call ershow (ier,'mew') go to 725 c 999 ier = -2 call ershow (ier,'mew') go to 735 c end subroutine cgnrw (suba,subat,subql,subqlt,subqr,subqrt, a coef,jcoef,wfac,jwfac,n,u,ubar,rhs,wk,nw,iparm,rparm,ier) c c code to run the conjugate gradient algorithm on the normal equations. c in this variant, the residual of the original system is minimized c per iteration. currently, only left preconditioning is implemented. c dimension u(1), ubar(1), rhs(1), wk(1), coef(1), jcoef(2), a wfac(1), jwfac(1) external suba, subat, subql, subqlt, subqr, subqrt dimension iparm(30), rparm(30) logical iql, iqr c c *** begin -- package common c common / itcom1 / in, itmax, level, nout, ns1, ns2, ns3, a iplr, iqlr, ntest, is, iacel, idgts, nbl1d, nbl2d logical halt, maxadp, minadp, maxadd, minadd common / itcom2 / halt, maxadp, minadp, maxadd, minadd common / itcom3 / alpha, beta, zeta, emax, emin, pap, b alphao, gamma, sigma, rr, rho, dkq, dkm1, b ff, rqmin, rqmax, stptst, udnm, ubarnm, b bnorm, bnorm1 common / itcom4 / keygs, srelpr, keyzer common / itcom9 / rhave, zhave, zthave, rcalp, zcalp, ztcalp, a udhav, rdhav, rzhav, rzthav, zdhav, zzthav, ztdhav, a rdot, rzdot, rztdot, zdot, zztdot, ztdot logical rhave, zhave, zthave, rcalp, zcalp, ztcalp, a udhav, rdhav, rzhav, rzthav, zdhav, zzthav, ztdhav c c *** end -- package common c c preliminary calculations. c nwusd = 0 ier = 0 iacel = 5 t1 = timer (dummy) call echall (n,iparm,rparm,1,2,ier) if (ier .lt. 0) go to 997 iql = iqlr .eq. 1 .or. iqlr .eq. 3 iqr = iqlr .eq. 2 .or. iqlr .eq. 3 if (iqr) go to 995 if (level .ge. 2) write (nout,496) 496 format (' cgnr') maxadp = maxadd minadp = minadd alphao = 0e0 alpha = 0e0 beta = 0e0 c c initialize the stopping test ... c call inithv (0) zthave = .true. nwpstp = nw call pstop (0,suba,subql,subqr,coef,jcoef, a wfac,jwfac,n,u,ubar,rhs,xxx,xxx,xxx, a wk,nwpstp,ier) nwusd = max0(nwusd,nwpstp) if (ier .lt. 0) go to 730 c itri = 1 ip = itri if ( .not. (maxadd .or. minadd) ) go to 850 ip = itri + 2*itmax call vfill (2*itmax,wk(itri),0e0) 850 ir = ip + n iv1 = ir + n iv2 = iv1 + n nwusd = max0(nwusd,iv2-1+n) c c check the memory usage -- c if (nwusd .gt. nw) go to 999 c in = 0 is = 0 call suba (coef,jcoef,wfac,jwfac,n,u,wk(iv1)) call vexopy (n,wk(iv1),rhs,wk(iv1),2) call subql (coef,jcoef,wfac,jwfac,n,wk(iv1),wk(ir)) c c--------------------------begin iteration loop------------------------ c c determine whether or not to stop -- c 10 call inithv (1) nwpstp = nw - (iv1-1) call pstop (1,suba,subql,subqr,coef,jcoef, a wfac,jwfac,n,u,ubar,rhs,xxx,xxx,wk(ir), a wk(iv1),nwpstp,ier) nwusd = max0(nwusd,nwpstp+iv1-1) if (level .ge. 2) call iterm (n,u) if (halt .or. in .ge. itmax .or. ier .lt. 0) go to 900 c if (in .ne. 0) go to 110 c c perform first-iterate calculations c call subqlt (coef,jcoef,wfac,jwfac,n,wk(ir),wk(iv1)) call subat (coef,jcoef,wfac,jwfac,n,wk(iv1),wk(ip)) ard = vdot (n,wk(ip),wk(ip)) go to 111 c c perform subsequent-iterate calculations c 110 ardold = ard c if (abs(ardold) .lt. srelpr) go to 996 call subqlt (coef,jcoef,wfac,jwfac,n,wk(ir),wk(iv1)) call subat (coef,jcoef,wfac,jwfac,n,wk(iv1),wk(iv2)) ard = vdot (n,wk(iv2),wk(iv2)) an = ard/ardold call vtriad (n,wk(ip),wk(iv2),an,wk(ip),1) beta = an c c proceed to form the iterate. c 111 call suba (coef,jcoef,wfac,jwfac,n,wk(ip),wk(iv1)) call subql (coef,jcoef,wfac,jwfac,n,wk(iv1),wk(iv2)) pap = vdot (n,wk(iv2),wk(iv2)) if (abs(pap) .lt. srelpr**2) go to 998 vlamda = ard/pap c call vtriad (n,u,u,vlamda,wk(ip),1) call vtriad (n,wk(ir),wk(ir),-vlamda,wk(iv2),1) c c update eigenvalue estimates c alphao = alpha alpha = vlamda if (maxadp .or. minadp) call chgcon (wk(itri),ier) if (ier .lt. 0) go to 725 c c proceed to next iteration c in = in + 1 is = is + 1 go to 10 c c-------------------------------finish up--------------------------- c 900 if (halt) go to 715 ier = 1 call ershow (ier,'cgnrw') zeta = stptst go to 725 715 continue if (level .ge. 1) write (nout,720) in 720 format (/' cgnr converged in ',i5,' iterations.') c 725 continue if (idgts .lt. 0) go to 730 call perror (suba,coef,jcoef,wfac,jwfac,n,u,rhs,wk,digit1, a digit2,idgts) 730 t2 = timer (dummy) timit = t2 - t1 iparm(2) = in rparm(1) = zeta rparm(2) = emax rparm(3) = emin rparm(6) = timit rparm(7) = digit1 rparm(8) = digit2 735 continue if (level .ge. 3) call echall (n,iparm,rparm,2,2,ier) nw = nwusd return c c error returns c 995 ier = -16 call ershow (ier,'cgnrw') return c 996 ier = -13 call ershow (ier,'cgnrw') go to 725 c 997 call ershow (ier,'cgnrw') go to 735 c 998 ier = -15 call ershow (ier,'cgnrw') go to 725 c 999 ier = -2 call ershow (ier,'cgnrw') go to 735 c end subroutine lsqrw (suba,subat,subql,subqlt,subqr,subqrt, a coef,jcoef,wfac,jwfac,n,u,ubar,rhs,wk,nw,iparm,rparm,ier) c c code to run the lsqr algorithm. the algorithm is taken from c the article 'lsqr -- an algorithm for sparse linear equations c and sparse least squares.' c by c. c. paige amd m. a. saunders, in acm transactions on c mathematical software, vol. 8, no. 1, march 1982, pp. 43-71. c the iterates produced are the same as those of cgnr, in exact c arithmetic, but this should be more stable. only left c preconditioning is currently implemented. c dimension u(1), ubar(1), rhs(1), wk(1), coef(1), jcoef(2), a wfac(1), jwfac(1) integer vect1, vect2, os external suba, subat, subql, subqlt, subqr, subqrt dimension iparm(30), rparm(30) logical iql, iqr c c *** begin -- package common c common / itcom1 / in, itmax, level, nout, ns1, ns2, ns3, a iplr, iqlr, ntest, is, iacel, idgts, nbl1d, nbl2d logical halt, maxadp, minadp, maxadd, minadd common / itcom2 / halt, maxadp, minadp, maxadd, minadd common / itcom3 / alpha, beta, zeta, emax, emin, pap, b alphao, gamma, sigma, rr, rho, dkq, dkm1, b ff, rqmin, rqmax, stptst, udnm, ubarnm, b bnorm, bnorm1 common / itcom4 / keygs, srelpr, keyzer common / itcom9 / rhave, zhave, zthave, rcalp, zcalp, ztcalp, a udhav, rdhav, rzhav, rzthav, zdhav, zzthav, ztdhav, a rdot, rzdot, rztdot, zdot, zztdot, ztdot logical rhave, zhave, zthave, rcalp, zcalp, ztcalp, a udhav, rdhav, rzhav, rzthav, zdhav, zzthav, ztdhav c c *** end -- package common c c preliminary calculations. c nwusd = 0 ier = 0 iacel = 6 t1 = timer (dummy) call echall (n,iparm,rparm,1,2,ier) if (ier .lt. 0) go to 996 iql = iqlr .eq. 1 .or. iqlr .eq. 3 iqr = iqlr .eq. 2 .or. iqlr .eq. 3 if (iqr) go to 995 if (level .ge. 2) write (nout,496) 496 format (' lsqr') c c initialize the stopping test ... c call inithv (0) zdhav = .true. nwpstp = nw call pstop (0,suba,subql,subqr, a coef,jcoef,wfac,jwfac,n,u,ubar,rhs,xxx,xxx,xxx, a wk,nwpstp,ier) nwusd = max0(nwusd,nwpstp) if (ier .lt. 0) go to 735 c c ... associated integer variables. c iu = 1 iv = iu + n iw = iv + n iv1 = iw + n iv2 = iv1 + n nwusd = max0(nwusd,iv2-1+n) c c check the memory usage -- c if (nwusd .gt. nw) go to 999 c in = 0 is = 0 c c now, perform first-iterate calculations c call suba (coef,jcoef,wfac,jwfac,n,u,wk(iv1)) call vexopy (n,wk(iv1),rhs,wk(iv1),2) call subql (coef,jcoef,wfac,jwfac,n,wk(iv1),wk(iv2)) beta = sqrt(vdot (n,wk(iv2),wk(iv2))) if (abs(beta) .lt. srelpr) go to 997 call vtriad (n,wk(iu),xxx,1.0/beta,wk(iv2),2) c call subqlt (coef,jcoef,wfac,jwfac,n,wk(iu),wk(iv1)) call subat (coef,jcoef,wfac,jwfac,n,wk(iv1),wk(iv2)) alpha = sqrt(vdot (n,wk(iv2),wk(iv2))) if (abs(alpha) .lt. srelpr) go to 997 call vtriad (n,wk(iv),xxx,1.0/alpha,wk(iv2),2) c call vcopy (n,wk(iv),wk(iw)) phibar = beta rhobar = alpha zdot = phibar**2 c if u(0) is zero, then the norm of u(n) can be calculated for free. c otherwise, i don't know. c c---------------------------begin iteration loop--------------------- c c determine whether or not to stop -- c 10 call inithv (1) zdhav = .true. nwpstp = nw - (iv1-1) call pstop (1,suba,subql,subqr,coef,jcoef, a wfac,jwfac,n,u,ubar,rhs,xxx,xxx,xxx, a wk(iv1),nwpstp,ier) nwusd = max0(nwusd,nwpstp+iv1-1) if (level .ge. 2) call iterm (n,u) if (halt .or. in .ge. itmax .or. ier .lt. 0) go to 900 c c ... compute the lanczos vectors. c call suba (coef,jcoef,wfac,jwfac,n,wk(iv),wk(iv1)) call subql (coef,jcoef,wfac,jwfac,n,wk(iv1),wk(iv2)) call vtriad (n,wk(iu),wk(iv2),-alpha,wk(iu),1) beta = sqrt(vdot (n,wk(iu),wk(iu))) if (abs(beta) .lt. srelpr) go to 997 call vtriad (n,wk(iu),xxx,1.0/beta,wk(iu),2) c call subqlt (coef,jcoef,wfac,jwfac,n,wk(iu),wk(iv1)) call subat (coef,jcoef,wfac,jwfac,n,wk(iv1),wk(iv2)) call vtriad (n,wk(iv),wk(iv2),-beta,wk(iv),1) alpha = sqrt(vdot (n,wk(iv),wk(iv))) if (abs(alpha) .lt. srelpr) go to 997 call vtriad (n,wk(iv),xxx,1.0/alpha,wk(iv),2) c c continue by calculating various scalars. c rho = sqrt(rhobar**2+beta**2) if (rho .lt. srelpr) go to 998 c = rhobar/rho s = beta/rho theta = s*alpha rhobar = -c*alpha phi = c*phibar phibar = s*phibar c c now generate the new u and w vectors. c call vtriad (n,u,u,phi/rho,wk(iw),1) call vtriad (n,wk(iw),wk(iv),-theta/rho,wk(iw),1) c c proceed to next iteration c zdot = phibar**2 in = in + 1 is = is + 1 go to 10 c c-----------------------------finish up------------------------- c 900 if (halt) go to 715 ier = 1 call ershow (ier,'lsqrw') zeta = stptst go to 725 715 continue if (level .ge. 1) write (nout,720) in 720 format (/' lsqr converged in ',i5,' iterations.') c 725 continue if (idgts .lt. 0) go to 730 call perror (suba,coef,jcoef,wfac,jwfac,n,u,rhs,wk,digit1, a digit2,idgts) 730 t2 = timer (dummy) timit = t2 - t1 iparm(2) = in rparm(1) = zeta rparm(2) = emax rparm(3) = emin rparm(6) = timit rparm(7) = digit1 rparm(8) = digit2 735 continue if (level .ge. 3) call echall (n,iparm,rparm,2,2,ier) nw = nwusd return c c error returns c 995 ier = -16 call ershow (ier,'lsqrw') return c 996 call ershow (ier,'lsqrw') go to 735 c 997 ier = -13 call ershow (ier,'lsqrw') go to 725 c 998 ier = -14 call ershow (ier,'lsqrw') go to 725 c 999 ier = -2 call ershow (ier,'lsqrw') go to 735 c end subroutine odirw (suba,subql,subqr, a coef,jcoef,wfac,jwfac,n,u,ubar,rhs,wk,nw,iparm,rparm,ier) c c this routine implements orthodir with truncation and c restarting and with 2-sided preconditioning. the effective value c of the z matrix is (inv(ql)*a*inv(qr))**t. c dimension u(1), ubar(1), rhs(1), wk(1), coef(1), jcoef(2), a wfac(1), jwfac(1) logical iql, iqr external suba, subql, subqr dimension iparm(30), rparm(30) c c *** begin -- package common c common / itcom1 / in, itmax, level, nout, ns1, ns2, ns3, a iplr, iqlr, ntest, is, iacel, idgts, nbl1d, nbl2d logical halt, maxadp, minadp, maxadd, minadd common / itcom2 / halt, maxadp, minadp, maxadd, minadd common / itcom3 / alpha, beta, zeta, emax, emin, pap, b alphao, gamma, sigma, rr, rho, dkq, dkm1, b ff, rqmin, rqmax, stptst, udnm, ubarnm, b bnorm, bnorm1 common / itcom4 / keygs, srelpr, keyzer common / itcom9 / rhave, zhave, zthave, rcalp, zcalp, ztcalp, a udhav, rdhav, rzhav, rzthav, zdhav, zzthav, ztdhav, a rdot, rzdot, rztdot, zdot, zztdot, ztdot logical rhave, zhave, zthave, rcalp, zcalp, ztcalp, a udhav, rdhav, rzhav, rzthav, zdhav, zzthav, ztdhav c c *** end -- package common c c the following indexing functions are used to access the old c direction vectors and dot products -- c indpt(i) = ipt + mod(i,nv)*n indqap(i) = iqapt + mod(i,nv)*n inddot(i) = idot + mod(i,nv) c c various preliminary calculations. c c nwusd = 0 ier = 0 t1 = timer (dummy) call echall (n,iparm,rparm,1,2,ier) if (ier .lt. 0) go to 997 write (nout,496) 496 format (' orthodir') iacel = 7 iql = iqlr .eq. 1 .or. iqlr .eq. 3 iqr = iqlr .eq. 2 .or. iqlr .eq. 3 c c initialize the stopping test ... c call inithv (0) zhave = .true. zthave = .true. nwpstp = nw call pstop (0,suba,subql,subqr,coef,jcoef, a wfac,jwfac,n,u,ubar,rhs,xxx,xxx,xxx, a wk,nwpstp,ier) nwusd = max0(nwusd,nwpstp) if (ier .lt. 0) go to 997 c c memory allocation, etc. c nv = max0(1,min0(ns1,ns2-1)) ipt = 1 iqapt = ipt + nv*n idot = iqapt + nv*n iz = idot + nv izt = iz + n if (.not. iqr) izt = iz isv = izt + n iv1 = isv + n iv2 = iv1 + n c if (iql) nwusd = max0(nwusd,iv2-1+n) if (.not. iql) nwusd = max0(nwusd,iv1-1+n) c c check the memory usage -- c if (nwusd .gt. nw) go to 999 c in = 0 is = 0 c c perform first-iterate calculations c if (iql) go to 122 call suba (coef,jcoef,wfac,jwfac,n,u,wk(iz)) call vexopy (n,wk(iz),rhs,wk(iz),2) go to 121 122 call suba (coef,jcoef,wfac,jwfac,n,u,wk(iv1)) call vexopy (n,wk(iv1),rhs,wk(iv1),2) call subql (coef,jcoef,wfac,jwfac,n,wk(iv1),wk(iz)) 121 if (iqr) call subqr (coef,jcoef,wfac,jwfac,n,wk(iz),wk(izt)) c if (.not. iqr) zdot = vdot (n,wk(iz),wk(iz)) c c ========================= begin iteration loop ==================== c c c determine whether or not to stop ... c 10 call inithv (1) nwpstp = nw - (iv1-1) call pstop (1,suba,subql,subqr,coef,jcoef, a wfac,jwfac,n,u,ubar,rhs,xxx,wk(iz),wk(izt), a wk(iv1),nwpstp,ier) nwusd = max0(nwusd,nwpstp+iv1-1) if (level .ge. 2) call iterm (n,u) if (halt .or. in .ge. itmax .or. ier .lt. 0) go to 900 c c proceed to calculate the direction vectors. c c first, case of no old p vectors. c np = min0(mod(in,ns2),ns1) if (np .ne. 0) go to 100 c if (is .eq. 0) call vcopy (n,wk(izt),wk(indpt(in))) if (is .ne. 0) call vcopy (n,wk(isv),wk(indpt(in))) if (iql) go to 123 call suba (coef,jcoef,wfac,jwfac,n,wk(indpt(in)),wk(indqap(in))) go to 120 123 call suba (coef,jcoef,wfac,jwfac,n,wk(indpt(in)),wk(iv1)) call subql (coef,jcoef,wfac,jwfac,n,wk(iv1),wk(indqap(in))) go to 120 c c case of at least one old p vector. c this case is handled in a tricky way, to optimize the workspace. c 100 if (iql) go to 124 call suba (coef,jcoef,wfac,jwfac,n,wk(isv),wk(iv1)) go to 125 124 call suba (coef,jcoef,wfac,jwfac,n,wk(isv),wk(iv2)) call subql (coef,jcoef,wfac,jwfac,n,wk(iv2),wk(iv1)) c 125 top = vdot (n,wk(indqap(in-np)),wk(iv1)) bet = - top / wk(inddot(in-np)) call vtriad (n,wk(indpt(in)),wk(isv),bet,wk(indpt(in-np)),1) call vtriad (n,wk(indqap(in)),wk(iv1),bet,wk(indqap(in-np)),1) ibegin = in - np + 1 iend = in - 1 if (ibegin .gt. iend) go to 613 do 612 i = ibegin,iend top = vdot (n,wk(indqap(i)),wk(iv1)) bet = - top / wk(inddot(i)) call vtriad (n,wk(indpt(in)),wk(indpt(in)),bet,wk(indpt(i)),1) 612 call vtriad (n,wk(indqap(in)),wk(indqap(in)),bet,wk(indqap(i)),1) 613 continue c c periodically scale the direction vector, to prevent overflow ... c 120 continue dot = vdot (n,wk(indqap(in)),wk(indqap(in))) if (dot.lt.srelpr**2 .or. dot.gt.(1e0/srelpr**2)) then call vtriad (n,wk(indpt(in)), xxx,1e0/dot,wk(indpt(in)), 2) call vtriad (n,wk(indqap(in)),xxx,1e0/dot,wk(indqap(in)),2) dot = 1e0 end if c c at this point, we are finished forming the latest direction vector. c we proceed to calculate lambda and update the solution and c the residuals. c 129 continue c if (abs(dot) .lt. srelpr) go to 998 wk(inddot(in)) = dot top = vdot (n,wk(indqap(in)),wk(iz)) vlamda = top / dot c the following commented-out line is unstable. but it can be fixed. c if (.not. iqr) zdot = zdot - 2*vlamda*top + vlamda**2*dot c c u -- c call vtriad (n,u,u,vlamda,wk(indpt(in)),1) c c z -- c call vtriad (n,wk(iz),wk(iz),-vlamda,wk(indqap(in)),1) c c zt -- c call subqr (coef,jcoef,wfac,jwfac,n,wk(indqap(in)),wk(isv)) if (iqr) call vtriad (n,wk(izt),wk(izt),-vlamda,wk(isv),1) c c proceed to next iteration c in = in + 1 is = is + 1 if (is .eq. ns2) is = 0 go to 10 c c-------------------------------finish up---------------------------- c 900 if (.not. halt) go to 996 if (level .ge. 1) write (nout,720) in 720 format (/' orthodir converged in ',i5,' iterations.') c 725 if (idgts .ge. 0) a call perror (suba,coef,jcoef,wfac,jwfac,n,u,rhs,wk, a digit1,digit2,idgts) c pack revised parms into iparm, rparm ... t2 = timer (dummy) timit = t2 - t1 iparm(2) = in rparm(1) = zeta rparm(2) = emax rparm(3) = emin rparm(6) = timit rparm(7) = digit1 rparm(8) = digit2 735 if (level .ge. 3) call echall (n,iparm,rparm,2,2,ier) nw = nwusd return c c c error returns c c no convergence ... 996 ier = 1 call ershow (ier,'odirw') zeta = stptst go to 725 c c generic error handler ... 997 call ershow (ier,'odirw') go to 735 c c breakdown ... 998 ier = -15 call ershow (ier,'odirw') go to 725 c c insufficient real wksp ... 999 ier = -2 call ershow (ier,'odirw') go to 735 end subroutine ominw (suba,subql,subqr, a coef,jcoef,wfac,jwfac,n,u,ubar,rhs,wk,nw,iparm,rparm,ier) c c this routine implements the truncated/restarted orthomin algorithm. c eigenvalue estimation is implemented. c note that this also implements the gcr algorithm. c c dimension u(1), ubar(1), rhs(1), wk(1), coef(1), jcoef(2), a wfac(1), jwfac(1) external suba, subql, subqr external nullpl, nullpr dimension iparm(30), rparm(30) c ier = 0 call echall (n,iparm,rparm,1,2,ier) if (ier .lt. 0) return c c pass on to workhorse routine ... c call omingw (suba,subql,subqr,nullpl,nullpr, a coef,jcoef,wfac,jwfac,n,u,ubar,rhs,wk,nw,iparm,rparm,ier) return end subroutine omingw (suba,subql,subqr,precl,precr, a coef,jcoef,wfac,jwfac,n,u,ubar,rhs,wk,nw,iparm,rparm,ier) c c this is a generalized version of the omingw routine which allows a c more general computational form for the preconditioning. c dimension u(1), ubar(1), rhs(1), wk(1), coef(1), jcoef(2), a wfac(1), jwfac(1) logical ipl, ipr external suba, subql, subqr, precl, precr dimension iparm(30), rparm(30) logical ztget, havest, hadest, evest c c *** begin -- package common c common / itcom1 / in, itmax, level, nout, ns1, ns2, ns3, a iplr, iqlr, ntest, is, iacel, idgts, nbl1d, nbl2d logical halt, maxadp, minadp, maxadd, minadd common / itcom2 / halt, maxadp, minadp, maxadd, minadd common / itcom3 / alpha, beta, zeta, emax, emin, pap, b alphao, gamma, sigma, rr, rho, dkq, dkm1, b ff, rqmin, rqmax, stptst, udnm, ubarnm, b bnorm, bnorm1 common / itcom4 / keygs, srelpr, keyzer common / itcom9 / rhave, zhave, zthave, rcalp, zcalp, ztcalp, a udhav, rdhav, rzhav, rzthav, zdhav, zzthav, ztdhav, a rdot, rzdot, rztdot, zdot, zztdot, ztdot logical rhave, zhave, zthave, rcalp, zcalp, ztcalp, a udhav, rdhav, rzhav, rzthav, zdhav, zzthav, ztdhav c c *** end -- package common c c the following indexing functions are used to access the old c direction vectors and dot products -- c indpt(i) = ipt + mod(i,nv)*n indqap(i) = iqapt + mod(i,nv)*n inddot(i) = idot + mod(i,nv+1) indhes(i,j) = ihess + (i-1) + (j-1)*nhess inapar(i) = iapar + mod(i,nv) indlam(i) = ilam + mod(i,nv+1) c c various preliminary calculations. c t1 = timer (dummy) c ipl = iplr .eq. 1 .or. iplr .eq. 3 ipr = iplr .eq. 2 .or. iplr .eq. 3 c iacel = 8 nwusd = 0 if (level .ge. 1) write (nout,497) 497 format (' omin') c c initialize the stopping test ... c call inithv (0) zhave = .true. zthave = .true. nwpstp = nw call pstopg (0,suba,subql,subqr,precl,precr,coef,jcoef, a wfac,jwfac,n,u,ubar,rhs,xxx,xxx,xxx, a wk,nwpstp,ier) nwusd = max0(nwusd,nwpstp) if (ier .lt. 0) go to 997 ztget = ztcalp zthave = ztget c c memory allocation, etc. c numbig = 1000 methev = 1 if (iabs(ns3) .ge. numbig) then if (ns3 .gt. 0) ns3 = ns3 - numbig if (ns3 .lt. 0) ns3 = ns3 + numbig methev = 2 end if c evest = ns3.ne.0 .and. (maxadd.or.minadd) nhess = 2 + min0(itmax,ns2) nv = max0(1,min0(ns1,ns2-1)) ipt = 1 iqapt = ipt + nv*n idot = iqapt + nv*n iapar = idot + (nv+1) ihess = iapar + nv ilam = ihess + nhess*(nv+2) if (.not. evest) ilam = ihess iz = ilam + (nv+1) izt = iz + n if (.not. ipr) izt = iz iv1 = izt + n iv2 = iv1 + n ir = iz if (ipl) ir = iv1 c nwtmp = iv1 - 1 + n if (ipl) nwtmp = iv2 - 1 + n nwusd = max0(nwusd,nwtmp) c c check the memory usage -- c if (nwusd .gt. nw) go to 999 c in = 0 is = 0 c c perform first-iterate calculations c call suba (coef,jcoef,wfac,jwfac,n,u,wk(ir)) call vexopy (n,wk(ir),rhs,wk(ir),2) if (ipl) call precl (coef,jcoef,wfac,jwfac,n,subql,suba,subqr, a wk(ir),wk(iz)) hadest = .false. c c ------------------------- begin iteration loop ---------------------- c c determine whether or not to stop ... c 10 if (.not. ztget) go to 710 if (ipr) call precr (coef,jcoef,wfac,jwfac,n,subql,suba,subqr, a wk(iz),wk(izt)) c 710 call inithv (1) nwpstp = nw - (iv1-1) call pstopg (1,suba,subql,subqr,precl,precr,coef,jcoef, a wfac,jwfac,n,u,ubar,rhs,xxx,wk(iz),wk(izt), a wk(iv1),nwpstp,ier) nwusd = max0(nwusd,nwpstp+iv1-1) if (level .ge. 2) call iterm (n,u) if (halt .or. in .ge. itmax .or. ier .lt. 0) go to 900 c if (zthave) go to 711 if (ipr) call precr (coef,jcoef,wfac,jwfac,n,subql,suba,subqr, a wk(iz),wk(izt)) c c------------------proceed to calculate the direction vectors---------------- c c first, case of no old p vectors. c 711 np = min0(mod(in,ns2),ns1) if (np .ne. 0) go to 100 c call vcopy (n,wk(izt),wk(indpt(in))) if (.not. ipl) then call suba (coef,jcoef,wfac,jwfac,n,wk(indpt(in)),wk(indqap(in))) else call suba (coef,jcoef,wfac,jwfac,n,wk(indpt(in)),wk(iv1)) call precl (coef,jcoef,wfac,jwfac,n,subql,suba,subqr, a wk(iv1),wk(indqap(in))) end if go to 120 c c case of at least one old p vector. c this case is handled in a tricky way, to optimize the workspace. c 100 if (.not. ipl) then call suba (coef,jcoef,wfac,jwfac,n,wk(izt),wk(iv1)) else call suba (coef,jcoef,wfac,jwfac,n,wk(izt),wk(iv2)) call precl (coef,jcoef,wfac,jwfac,n,subql,suba,subqr, a wk(iv2),wk(iv1)) end if c top = vdot (n,wk(indqap(in-np)),wk(iv1)) wk(inapar(in-np)) = top bet = - top / wk(inddot(in-np)) call vtriad (n,wk(indpt(in)),wk(izt),bet,wk(indpt(in-np)),1) call vtriad (n,wk(indqap(in)),wk(iv1),bet,wk(indqap(in-np)),1) c do 612 i = in-np+1, in-1 top = vdot (n,wk(indqap(i)),wk(iv1)) wk(inapar(i)) = top bet = - top / wk(inddot(i)) call vtriad (n,wk(indpt(in)), wk(indpt(in)), bet,wk(indpt(i)), 1) 612 call vtriad (n,wk(indqap(in)),wk(indqap(in)),bet,wk(indqap(i)),1) c c at this point, we are finished forming the latest direction vector. c we proceed to calculate lambda and update the solution and c the residuals. c 120 continue apap = vdot (n,wk(indqap(in)),wk(indqap(in))) c if (abs(apap) .lt. srelpr**2) go to 998 if (abs(apap) .eq. 0e0) go to 998 wk(inddot(in)) = apap top = vdot (n,wk(indqap(in)),wk(iz)) vlamda = top / apap c if (.not. ipr) zzdot = zzdot - 2*vlamda*top + vlamda**2*apap c c u -- call vtriad (n,u,u,vlamda,wk(indpt(in)),1) c c z -- call vtriad (n,wk(iz),wk(iz),-vlamda,wk(indqap(in)),1) c c----------------------------hess matrix update--------------------------- c c there are two schemes here, based on two different ways of projecting c the iteration matrix. c c update hessenberg matrix: scheme 1 c if (.not. evest) go to 955 wk(indlam(in)) = vlamda if (is .eq. 0) call vfill (nhess*(nv+2),wk(ihess),0e0) if (methev .ne. 1) go to 746 c do 954 i=in-np,in if (i .eq. in) apar = apap if (i .ne. in) apar = wk(inapar(i)) wk(indhes(i+1+(is-in),in-i+2)) = wk(indhes(i+1+(is-in),in-i+2)) a + apar/wk(indlam(in)) / sqrt(wk(inddot(in))*wk(inddot(i))) if (is .ne. 0) a wk(indhes(i+1+(is-in),in-i+1)) = wk(indhes(i+1+(is-in),in-i+1)) a - apar/wk(indlam(in-1)) / sqrt(wk(inddot(in-1))*wk(inddot(i))) 954 continue iesize = is go to 747 c c update hessenberg matrix: scheme 2 c 746 iesize = is + 1 wk(indhes(is+2,1)) = -1e0 / vlamda wk(indhes(is+1,2)) = 1e0 / vlamda if (np .eq. 0) go to 749 do 748 i=in-np,in-1 id = in - i + 1 wk(indhes(is+3-id,id )) = wk(indhes(is+3-id,id )) a - wk(inapar(i))/wk(inddot(i))/wk(indlam(i)) 748 wk(indhes(is+2-id,id+1)) = wk(indhes(is+2-id,id+1)) a + wk(inapar(i))/wk(inddot(i))/wk(indlam(i)) 749 continue c c estimate eigenvalues ... c 747 nwhe = nw - (iv1-1) call hesest (wk(ihess),nhess,nv+2,iesize,ns3,havest, a emaxnw,eminnw,wk(iv1),nwhe,ier) nwusd = max0 (nwusd,iv1-1+nwhe) if (ier .ne. 0) go to 995 if (.not. havest) go to 955 if (hadest) go to 956 if (maxadd) emax = emaxnw if (minadd) emin = eminnw hadest = .true. go to 955 956 if (maxadd) emax = amax1 (emax,emaxnw) if (minadd) emin = amin1 (emin,eminnw) c c---------------------------proceed to next iteration------------------------ c 955 in = in + 1 is = is + 1 if (is .eq. ns2) is = 0 go to 10 c c-------------------------------finish up------------------------------------ c 900 if (.not. halt) go to 996 if (level .ge. 1) write (nout,720) in 720 format (/' orthomin converged in ',i5,' iterations.') c 725 if (idgts .ge. 0) a call perror (suba,coef,jcoef,wfac,jwfac,n,u,rhs,wk,digit1, a digit2,idgts) c pack revised parms into iparm, rparm ... t2 = timer (dummy) timit = t2 - t1 iparm(2) = in rparm(1) = zeta rparm(2) = emax rparm(3) = emin rparm(6) = timit rparm(7) = digit1 rparm(8) = digit2 735 if (level .ge. 3) call echall (n,iparm,rparm,2,2,ier) nw = nwusd return c c c--------------------------------error returns----------------------------- c c unimplemented option ... 995 ier = -16 call ershow (ier,'omingw') go to 725 c c no convergence ... 996 ier = 1 call ershow (ier,'omingw') zeta = stptst go to 725 c c generic error handler ... 997 call ershow (ier,'omingw') go to 735 c c breakdown ... 998 ier = -15 call ershow (ier,'omingw') go to 725 c c insufficient real wksp ... 999 ier = -2 call ershow (ier,'omingw') go to 735 end subroutine oresw (suba,subql,subqr, a coef,jcoef,wfac,jwfac,n,u,ubar,rhs,wk,nw,iparm,rparm,ier) c c this routine implements orthores with truncation and c restarting and with 2-sided preconditioning. the value of z is c the identity. the code is optimal in speed and workspace c requirements, for general a, ql and qr. c dimension u(1), ubar(1), rhs(1), wk(1), coef(1), jcoef(2), a wfac(1), jwfac(1) external suba, subql, subqr dimension iparm(30), rparm(30) logical iql, iqr c c *** begin -- package common c common / itcom1 / in, itmax, level, nout, ns1, ns2, ns3, a iplr, iqlr, ntest, is, iacel, idgts, nbl1d, nbl2d logical halt, maxadp, minadp, maxadd, minadd common / itcom2 / halt, maxadp, minadp, maxadd, minadd common / itcom3 / alpha, beta, zeta, emax, emin, pap, b alphao, gamma, sigma, rr, rho, dkq, dkm1, b ff, rqmin, rqmax, stptst, udnm, ubarnm, b bnorm, bnorm1 common / itcom4 / keygs, srelpr, keyzer common / itcom9 / rhave, zhave, zthave, rcalp, zcalp, ztcalp, a udhav, rdhav, rzhav, rzthav, zdhav, zzthav, ztdhav, a rdot, rzdot, rztdot, zdot, zztdot, ztdot logical rhave, zhave, zthave, rcalp, zcalp, ztcalp, a udhav, rdhav, rzhav, rzthav, zdhav, zzthav, ztdhav c c *** end -- package common c c the following indexing functions are used to access the old c direction vectors and dot products -- c indu(i) = iu + mod(i,nv)*n indz(i) = iz + mod(i,nv)*n inddot(i) = idot + mod(i,nv) c c various preliminary calculations. c nwusd = 0 ier = 0 iacel = 9 t1 = timer (dummy) call echall (n,iparm,rparm,1,2,ier) if (ier .lt. 0) go to 997 if (level .ge. 2) write (nout,496) 496 format (' orthores') iql = iqlr .eq. 1 .or. iqlr .eq. 3 iqr = iqlr .eq. 2 .or. iqlr .eq. 3 c c initialize the stopping test ... c call inithv (0) zhave = .true. zthave = .true. nwpstp = nw call pstop (0,suba,subql,subqr,coef,jcoef, a wfac,jwfac,n,u,ubar,rhs,xxx,xxx,xxx, a wk,nwpstp,ier) nwusd = max0(nwusd,nwpstp) if (ier .lt. 0) go to 730 c c memory allocation, etc. c c nomenclature -- r -- residual of the original system. c z -- inv(ql)*r c zt -- inv(qr)*z c nv = max0(1,min0(ns1+1,ns2)) iu = 1 iz = iu + nv*n idot = iz + nv*n iv1 = idot + nv iv2 = iv1 + n nwusd = max0(nwusd,iv2-1+n) c c check the memory usage -- c if (nwusd .gt. nw) go to 999 c in = 0 c c perform first-iterate calculations. c note -- we will use the vector 'u' to store ztilde. the u vectors c will be stored in the table. wk(iv1) will hold r. c call vcopy (n,u,wk(indu(0))) call suba (coef,jcoef,wfac,jwfac,n,u,wk(iv1)) call vexopy (n,wk(iv1),rhs,wk(iv1),2) call subql (coef,jcoef,wfac,jwfac,n,wk(iv1),wk(indz(0))) call subqr (coef,jcoef,wfac,jwfac,n,wk(indz(0)),u) wk(inddot(0)) = vdot (n,wk(indz(0)),wk(indz(0))) c c-------------------------begin iteration loop----------------------- c c determine whether or not to stop -- c 10 call inithv (1) nwpstp = nw - (iv2-1) call pstop (1,suba,subql,subqr,coef,jcoef, a wfac,jwfac,n,wk(indu(in)),ubar,rhs,xxx,wk(indz(in)),u, a wk(iv2),nwpstp,ier) nwusd = max0(nwusd,nwpstp+iv2-1) if (level .ge. 2) call iterm (n,wk(indu(in))) if (halt .or. in .ge. itmax .or. ier .lt. 0) go to 900 c c proceed to calculate the iterates. c np = min0(mod(in,ns2)+1,ns1+1) call suba (coef,jcoef,wfac,jwfac,n,u,wk(iv1)) call subql (coef,jcoef,wfac,jwfac,n,wk(iv1),wk(iv2)) top = vdot (n,wk(indz(in+1-np)),wk(iv2)) sig = top / wk(inddot(in+1-np)) call vtriad (n,wk(indz(in+1)),wk(iv2),-sig,wk(indz(in+1-np)),1) call vtriad (n,wk(indu(in+1)),u,sig,wk(indu(in+1-np)),1) sigsum = sig ibegin = in - np + 2 iend = in if (ibegin .gt. iend) go to 613 do 612 i = ibegin,iend top = vdot (n,wk(indz(i)),wk(iv2)) den = wk(inddot(i)) if (abs(den) .lt. srelpr) go to 998 sig = top / den call vtriad (n,wk(indz(in+1)),wk(indz(in+1)),-sig,wk(indz(i)),1) call vtriad (n,wk(indu(in+1)),wk(indu(in+1)),sig,wk(indu(i)),1) 612 sigsum = sigsum + sig 613 continue if (abs(sigsum) .lt. srelpr) go to 998 vlamda = 1.0/sigsum call vtriad (n,wk(indz(in+1)),xxx,-vlamda,wk(indz(in+1)),2) call vtriad (n,wk(indu(in+1)),xxx,vlamda,wk(indu(in+1)),2) wk(inddot(in+1)) = vdot (n,wk(indz(in+1)),wk(indz(in+1))) c call subqr (coef,jcoef,wfac,jwfac,n,wk(indz(in+1)),u) c c proceed to next iteration c in = in + 1 go to 10 c c-----------------------------finish up---------------------------- c 900 call vcopy (n,wk(indu(in)),u) if (halt) go to 715 ier = 1 call ershow (ier,'oresw') zeta = stptst go to 725 715 continue if (level .ge. 1) write (nout,720) in 720 format (/' orthores converged in ',i5,' iterations.') c 725 continue if (idgts .lt. 0) go to 730 call perror (suba,coef,jcoef,wfac,jwfac,n,u,rhs,wk,digit1, a digit2,idgts) 730 t2 = timer (dummy) timit = t2 - t1 iparm(2) = in rparm(1) = zeta rparm(2) = emax rparm(3) = emin rparm(6) = timit rparm(7) = digit1 rparm(8) = digit2 735 continue if (level .ge. 3) call echall (n,iparm,rparm,2,2,ier) nw = nwusd return c c error returns c 997 call ershow (ier,'oresw') go to 735 c 998 ier = -15 call ershow (ier,'oresw') go to 725 c 999 ier = -2 call ershow (ier,'oresw') go to 735 c end subroutine iomw (suba,subql,subqr, a coef,jcoef,wfac,jwfac,n,u,ubar,rhs,wk,nw,iparm,rparm,ier) c c code to run the (truncated) iom algorithm. the reference is c youcef saad, "krylov subspace methods ...", mathematics of c computation, vol. 37, july 1981, pp. 105f. c c in the symmetric case this algorithm reduces to the symmlq c algorithm of paige and saunders, except paige and saunders have c implemented a trick to avoid breakdown before convergence. this c trick is not implemented here. c dimension u(1), ubar(1), rhs(1), wk(1), coef(1), jcoef(2), a wfac(1), jwfac(1) integer idotw, vect1, vect2, dots1, dots2, os logical uneed external suba, subql, subqr dimension iparm(30), rparm(30) dimension gdum(1), wkxxx(1) logical iql, iqr logical exact, gamize c c *** begin -- package common c common / itcom1 / in, itmax, level, nout, ns1, ns2, ns3, a iplr, iqlr, ntest, is, iacel, idgts, nbl1d, nbl2d logical halt, maxadp, minadp, maxadd, minadd common / itcom2 / halt, maxadp, minadp, maxadd, minadd common / itcom3 / alpha, beta, zeta, emax, emin, pap, b alphao, gamma, sigma, rr, rho, dkq, dkm1, b ff, rqmin, rqmax, stptst, udnm, ubarnm, b bnorm, bnorm1 common / itcom4 / keygs, srelpr, keyzer common / itcom9 / rhave, zhave, zthave, rcalp, zcalp, ztcalp, a udhav, rdhav, rzhav, rzthav, zdhav, zzthav, ztdhav, a rdot, rzdot, rztdot, zdot, zztdot, ztdot logical rhave, zhave, zthave, rcalp, zcalp, ztcalp, a udhav, rdhav, rzhav, rzthav, zdhav, zzthav, ztdhav c c *** end -- package common c c next, the indexing functions. c indv1(i) = vect1 + mod(i,nv)*n indbe2(i) = ibeta2 + mod(i,os) indc(i) = icos + mod(i,os) inds(i) = isin + mod(i,os) indu(i) = iu + mod(i,os+1) indw(i) = iw + n*mod(i,os) c c preliminary calculations. c nwusd = 0 ier = 0 iacel = 10 t1 = timer (dummy) call echall (n,iparm,rparm,1,2,ier) if (ier .lt. 0) go to 996 iql = iqlr .eq. 1 .or. iqlr .eq. 3 iqr = iqlr .eq. 2 .or. iqlr .eq. 3 gamize = .true. if (iqr) go to 995 if (level .ge. 2) write (nout,496) 496 format (' iom') c the following flag tells us whether the truncating actually c throws out important information. it should actually be set to c true if the matrix is symmetric. exact = .false. c c initialize the stopping test ... c call inithv (0) zdhav = .true. nwpstp = nw call pstop (0,suba,subql,subqr,coef,jcoef, a wfac,jwfac,n,u,ubar,rhs,xxx,xxx,xxx, a wk,nwpstp,ier) nwusd = max0(nwusd,nwpstp) if (ier .lt. 0) go to 730 c c ... associated integer variables. c os = iabs(ns1) iv = 1 nv = os idotw = 1 iw = 1 vect1 = iw + iv*n*os vect2 = vect1 dots1 = vect2 + iv*n*nv dots2 = dots1 ibeta1 = dots2 + idotw*os ibeta2 = ibeta1 icos = ibeta2 + os isin = icos + os iu = isin + os iv1 = iu + os+1 iv2 = iv1 + n nwusd = max0(nwusd,iv2-1+n) c c check the memory usage -- c if (nwusd .gt. nw) go to 999 c in = 0 is = 0 uneed = rcalp .or. zcalp .or. ztcalp .or. udhav a .or. ntest .eq. 6 .or. level .ge. 3 c c--------------------------begin iteration loop--------------------- c c perform first-iterate calculations ... c 10 if (is .ne. 0) go to 100 call suba (coef,jcoef,wfac,jwfac,n,u,wk(iv1)) call vexopy (n,wk(iv1),rhs,wk(iv1),2) call subql (coef,jcoef,wfac,jwfac,n,wk(iv1),wk(iv2)) call pvec (n,nv,iv,1,os,idotw,is,1,1,wk(vect1),wk(dots1),0, a wk(ibeta1),gdum,gamize,wk(iv2),wkxxx,ier) gamma1 = gdum(1) if (ier .lt. 0) go to 997 gamma2 = gamma1 vnorm1 = 1.0/gamma1 vnorm2 = 1.0/gamma2 zdot = vnorm1**2 ucnp1= 0.0 c 100 call inithv (1) zdhav = .true. nwpstp = nw - (iv1-1) call pstop (1,suba,subql,subqr,coef,jcoef, a wfac,jwfac,n,u,ubar,rhs,xxx,xxx,xxx, a wk(iv1),nwpstp,ier) nwusd = max0(nwusd,nwpstp+iv1-1) if (level .ge. 2) call iterm (n,u) if (halt .or. in .ge. itmax .or. ier .lt. 0) go to 900 c c c ... compute q(n+1), etc -- the direction vectors c call suba (coef,jcoef,wfac,jwfac,n,wk(indv1(is)),wk(iv1)) call subql (coef,jcoef,wfac,jwfac,n,wk(iv1),wk(iv2)) call pvec (n,nv,iv,1,os,idotw,is+1,1,1,wk(vect1),wk(dots1),0, a wk(ibeta1),gdum,gamize,wk(iv2),wkxxx,ier) gamma1 = gdum(1) if (ier .lt. 0) go to 997 gamma2 = gamma1 c c ... now record norms. c vn1old = vnorm1 vnorm1 = 1.0/gamma1 vn2old = vnorm2 vnorm2 = 1.0/gamma2 c c ... now update the factorization c ucnbar = ucnp1 ibgn = max0(0,is+1-os) do 1 i = ibgn,is 1 wk(indu(i+1)) = -wk(indbe2(i)) if (ibgn .gt. 0) wk(indu(ibgn))= 0.0 call qrupd (is+1,os+1,os,wk(icos),wk(isin),ucnbar,ucn,wk(iu), a vn2old,ier) if (ier .lt. 0) go to 998 ucnp1 = wk(indu(is+1)) c c ... update the old w vector. c if (is .ne. 0) a call vtriad (n,wk(indw(is-1)),xxx,ucnbar/ucn,wk(indw(is-1)),2) c c ... now generate the new w vector. c if (abs(ucnp1) .lt. srelpr) go to 998 call vcopy (n,wk(indv1(is)),wk(iv1)) ibgn = max0(1,is-os+1) iend = is if (iend .lt. ibgn) go to 200 do 201 i = ibgn,iend 201 call vtriad (n,wk(iv1),wk(iv1),-wk(indu(i)),wk(indw(i-1)),1) 200 continue call vtriad (n,wk(indw(is)),xxx,1.0/ucnp1,wk(iv1),2) if (is .ne. 0) go to 205 c c ... update iterate u(0). c zold= 0.0 zbar = vn1old if (uneed) call vtriad (n,u,u,zbar,wk(indw(0)),1) go to 210 c c ... update subsequent iterates u(n). c 205 zold = wk(indc(is))*zbar zbold = zbar zbar =-wk(inds(is))*zbar factor = zold if (uneed) factor = factor - zbold*ucn/ucnbar call vtriad (n,u,u,factor,wk(indw(is-1)),1) if (uneed) call vtriad (n,u,u,zbar,wk(indw(is)),1) c to avoid breakdown for the symmetric indefinite case, we'd only add c in w(is-1) here, i believe. 210 continue zdot = (zbar/ucnp1*vnorm1)**2 c c proceed to next iteration c in = in + 1 is = is + 1 go to 10 c c-----------------------------finish up------------------------------ c 900 if (.not. uneed) call vtriad (n,u,u,zbar,wk(indw(is-1)),1) if (halt) go to 715 ier = 1 call ershow (ier,'iomw') zeta = stptst go to 725 715 continue if (level .ge. 1) write (nout,720) in 720 format (/' iom converged in ',i5,' iterations.') c 725 continue if (idgts .lt. 0) go to 730 call perror (suba,coef,jcoef,wfac,jwfac,n,u,rhs,wk,digit1, a digit2,idgts) 730 t2 = timer (dummy) timit = t2 - t1 iparm(2) = in rparm(1) = zeta rparm(2) = emax rparm(3) = emin rparm(6) = timit rparm(7) = digit1 rparm(8) = digit2 735 continue if (level .ge. 3) call echall (n,iparm,rparm,2,2,ier) nw = nwusd return c c error returns c 995 ier = -16 call ershow (ier,'iomw') return c 996 call ershow (ier,'iomw') go to 735 c 997 ier = -13 call ershow (ier,'iomw') go to 725 c 998 ier = -14 call ershow (ier,'iomw') go to 725 c 999 ier = -2 call ershow (ier,'iomw') go to 735 c end subroutine gmresw (suba,subql,subqr, a coef,jcoef,wfac,jwfac,n,u,ubar,rhs,wk,nw,iparm,rparm,ier) c c code to run the truncated/restarted gmres algorithm. a detailed c description of this useful algorithm may be found in the paper, c "gmres: a generalized minimal residual algorithm for solving c nonsymmetric linear systems", youcef saad and martin h. schultz, c siam j. sci. stat. comput., v. 7, no. 3, july 1986. c c further scoop on how to set up qr factorizations can be obtained in c "practical use of some krylov subspace methods for solving c indefinite and unsymmetric linear systems", youcef saad, siam j. sci. c stat. comput., v. 5, no. 1, march 1984. c c the advantage of this algorithm over its competitors orthomin and gcr c is that work and storage are saved by avoiding the computation of c certain vectors. c c this routine now handles right and 2-sided preconditioning. the main c thing to note about this is that a new table of basis vecttors is now c necessary, to use to update the solution. c c this routine also avoids explicit scaling of the p and w vectors. c c for the pure restarted case, we actually compute the final arnoldi c vector, rather than just estimating its norm. this is a diversion c from the saad/schultz paper. this was done because in some cases it c was found that the norm estimation was subject to significant c numerical error. c c modified feb. 1990 to make the restarted method more efficient. c specifically, new formulas were installed for the scalar part of c the computation to give an optimal asymptotic dependence on ns2. c c---------------------------------------------------------------------- c dimension coef(*), jcoef(*), wfac(*), jwfac(*) dimension u(*), ubar(*), rhs(*), wk(*) logical uneed, zneed external suba, subql, subqr dimension iparm(30), rparm(30) logical iql, iqr logical trunc, exact, rstrt, rstrtd, zhvold logical havest, hadest, evadpt c c *** begin -- package common c common / itcom1 / in, itmax, level, nout, ns1, ns2, ns3, a iplr, iqlr, ntest, is, iacel, idgts, nbl1d, nbl2d logical halt, maxadp, minadp, maxadd, minadd common / itcom2 / halt, maxadp, minadp, maxadd, minadd common / itcom3 / alpha, beta, zeta, emax, emin, pap, b alphao, gamma, sigma, rr, rho, dkq, dkm1, b ff, rqmin, rqmax, stptst, udnm, ubarnm, b bnorm, bnorm1 common / itcom4 / keygs, srelpr, keyzer common / itcom9 / rhave, zhave, zthave, rcalp, zcalp, ztcalp, a udhav, rdhav, rzhav, rzthav, zdhav, zzthav, ztdhav, a rdot, rzdot, rztdot, zdot, zztdot, ztdot logical rhave, zhave, zthave, rcalp, zcalp, ztcalp, a udhav, rdhav, rzhav, rzthav, zdhav, zzthav, ztdhav c c *** end -- package common c c-------------------------- indexing functions -------------------------- c c the following function accesses the arnoldi vectors. indp(i) = ip + mod(i,nv)*n c c the following accesses q-r times the arnoldi vectors indpt(i) = ipt + mod(i,nvt)*n c c fudge factor for the arnoldi vectors. p(actual) = p(stored)*pfudge. c (we do the same trick with a*p.) indpf(i) = ipf + mod(i,nv) c c the following accesses the w-vectors. indw(i) = iw + n*mod(i,nv) c c fudge factors for the w vectors ... c (similarly, the vector "xi" is fudged.) indwf(i) = iwf + mod(i,nv) c c the following accesses the hessenberg matrix -- stored by diagonals ... indhes(i,j) = ihess + (i-1) + (j-i+1)*nhess c c the following are the cosines and sines of the rotations. indc(i) = icos + mod(i,nrot) inds(i) = isin + mod(i,nrot) c c the following accesses the u matrix -- stored by columns ... indu(i,j) = iu + j-i+1 + mod(j-1,nuc)*nbwuh c c the following accesses the z-vector ... indzc(i) = izc + mod(i-1,nzc) c c----------------------- preliminary calculations ---------------------- c nwusd = 0 ier = 0 iacel = 11 t1 = timer (dummy) call echall (n,iparm,rparm,1,2,ier) if (ier .lt. 0) go to 996 iql = iqlr.eq.1 .or. iqlr.eq.3 iqr = iqlr.eq.2 .or. iqlr.eq.3 iadpt = ns3 evadpt = (maxadd.or.minadd) .and. iadpt.ne.0 trunc = ns1 .lt. (ns2-1) exact = .not. trunc if (ns1 .lt. 2) go to 995 if (level .ge. 2) write (nout,496) 496 format (' gmres') c c--------------------------initialize the stopping test------------------ c call inithv (0) zdhav = .not. (trunc .and. .not.exact) nwpstp = nw call pstop (0,suba,subql,subqr,coef,jcoef, a wfac,jwfac,n,u,ubar,rhs,xxx,xxx,xxx, a wk,nwpstp,ier) nwusd = max0(nwusd,nwpstp) if (ier .lt. 0) go to 730 c c uneed tells us whether u must be computed explicitly per iteration. c similarly for zneed. uneed = rcalp .or. udhav a .or. ntest .eq. 6 .or. level .ge. 3 zneed = zcalp hadest = .false. c c------------------------associated integer variables--------------------- c c---effective ns2--- ns2e = min0(ns2,itmax) c---length of diags of hess matrix--- nhess = ns2e + 2 c---bandwidth of the hess matrix--- nbwh = min0(ns1+1,ns2e+1) c---bandwidth of u-or-h--- nbwuh = min0(ns1+2, ns2e+1) c---number columns stored of the u matrix--- if ( trunc) nuc = 1 if (.not.trunc) nuc = ns2e c---size of arnoldi-vector tables--- nv = min0(ns1,ns2e+1) nvt = nv if (iqr .and. .not.trunc) nvt = nv - 1 if (iqr .and. trunc) nvt = 1 c---number of givens rotations to store--- nrot = min0(ns1,ns2e) c---number of elts of z-vector to store--- if ( trunc) nzc = 2 if (.not.trunc) nzc = ns2e + 1 c c------------------------------memory layout--------------------------- c ihess = 1 ipt = ihess + nhess*nbwh if (.not.evadpt) ipt = ihess ip = ipt + n*nvt if (.not.iqr) ip = ipt izc = ip + n*nv icos = izc + nzc isin = icos + nrot iy = isin + nrot iu = iy + ns2e if (trunc .or. .not.uneed) iu = iy ipz = iu + nbwuh*nuc ipf = ipz + ns2e+1 if (trunc) ipf = ipz iz = ipf + nv iw = iz + n iwf = iw + n*nv ixi = iwf + nv iv1 = ixi + n if (.not. trunc) iv1 = iw iv2 = iv1 + n nwusd = max0(nwusd,iv2+n-1) c c --- check the memory usage --- c if (nwusd .gt. nw) go to 999 c in = 0 is = 0 rstrtd = .true. c c-------------------------------------------------------------------- c---------------------------begin iteration loop--------------------- c-------------------------------------------------------------------- c c handle first iteration after restart ... c 10 call inithv (1) zdhav = .not.(trunc.and..not.exact) .and. in.ne.0 if (.not. rstrtd) go to 100 c ---get resid--- if (.not. zhave) then if (iql) then call suba (coef,jcoef,wfac,jwfac,n,u,wk(iv1)) call vexopy (n,wk(iv1),rhs,wk(iv1),2) call subql (coef,jcoef,wfac,jwfac,n,wk(iv1),wk(iz)) else call suba (coef,jcoef,wfac,jwfac,n,u,wk(iz)) call vexopy (n,wk(iz),rhs,wk(iz),2) end if zhave = .true. end if c ---get resid norm--- if (.not. zdhav) then zdot = vdot (n,wk(iz),wk(iz)) zdhav = .true. end if if (zdot .lt. 0e0) go to 994 vnorm = sqrt(zdot) if (vnorm .lt. srelpr**2) go to 997 call vcopy (n,wk(iz),wk(indp(is))) wk(indpf(is)) = 1e0/vnorm wk(indzc(is+1)) = vnorm c c --- perform stopping test --- c 100 nwpstp = nw - (iv1-1) call pstop (1,suba,subql,subqr,coef,jcoef, a wfac,jwfac,n,u,ubar,rhs,xxx,wk(iz),xxx, a wk(iv1),nwpstp,ier) nwusd = max0(nwusd,nwpstp+iv1-1) if (level .ge. 2) call iterm (n,u) if (halt .or. in .ge. itmax .or. ier .lt. 0) go to 900 c c rstrt tells us whether this is the last step before restarting ... rstrt = (is+1 .eq. ns2) if (evadpt .and. is.eq.0) call vfill (nhess*nbwh,wk(ihess),0e0) c c-----------------------compute the new arnoldi vector---------------- c c pn(is+1)*p(is+1) = a*p(is) + sum (i=0 to is) (beta(is+1,i)*p(i)), c c---get a times old vec--- if (iqr) call subqr (coef,jcoef,wfac,jwfac,n,wk(indp (is)), a wk(indpt(is))) if (iql) then call suba (coef,jcoef,wfac,jwfac,n,wk(indpt(is)),wk(iv1)) call subql (coef,jcoef,wfac,jwfac,n,wk(iv1),wk(iv2)) else call suba (coef,jcoef,wfac,jwfac,n,wk(indpt(is)),wk(iv2)) end if apf = wk(indpf(is)) c---compute arnoldi vector--- ibeg = max0(is+1-ns1,0) iend = is if (ibeg .gt. 0) wk(indu(ibeg,is+1)) = 0e0 pfnew = apf do 199 i = ibeg,iend h = vdot (n,wk(indp(i)),wk(iv2)) * wk(indpf(i))*apf wk(indu(i+1,is+1)) = h if (evadpt) wk(indhes(i+1,is+1)) = h if (i .eq. ibeg) call vtriad (n,wk(indp(is+1)),wk(iv2), a -h*wk(indpf(i))/pfnew,wk(indp(i)),1) if (i .ne. ibeg) call vtriad (n,wk(indp(is+1)),wk(indp(is+1)), a -h*wk(indpf(i))/pfnew,wk(indp(i)),1) 199 continue wk(indpf(is+1)) = pfnew c---get norm--- dot = vdot (n,wk(indp(is+1)),wk(indp(is+1))) * pfnew**2 vnorm = sqrt(dot) if (vnorm .lt. srelpr**2) go to 192 wk(indu(is+2,is+1)) = vnorm if (evadpt) wk(indhes(is+2,is+1)) = vnorm c---scale--- wk(indpf(is+1)) = wk(indpf(is+1))/vnorm if (abs(wk(indpf(is+1))).lt.srelpr .or. a abs(wk(indpf(is+1))).gt.1e0/srelpr) then call vtriad (n,wk(indp(is+1)),xxx,wk(indpf(is+1)), a wk(indp(is+1)),2) wk(indpf(is+1)) = 1e0 end if c c-----------------------update the qr factorization------------------- c 192 continue c---apply old rotations--- ibgn = max(0,is-ns1) iuold = indu(ibgn+1,is+1) do 7977 i = ibgn, is-1 iunew = indu(i+2,is+1) ut = wk(iuold) h = wk(iunew) ctmp = wk(indc(i+1)) stmp = wk(inds(i+1)) wk(iuold) = ctmp*ut + stmp*h wk(iunew) = -stmp*ut + ctmp*h 7977 iuold = iunew iunew = indu(is+2,is+1) c---calc new rotation--- v1 = wk(iuold) v2 = wk(iunew) denom = sqrt (v1**2 + v2**2) if (denom .lt. srelpr) go to 998 wk(indc(is+1)) = v1/denom wk(inds(is+1)) = v2/denom c---apply new rotation--- wk(iuold) = denom wk(iunew) = 0e0 c c--------------------------compute w, if needed------------------------ c uc = wk(indu(is+1,is+1)) if (abs(uc) .lt. srelpr**2) go to 998 if (.not.trunc) go to 572 c c ... case of explicit w calc ... c if (is .eq. 0) then call vcopy (n,wk(indpt(is)),wk(indw(1))) wk(indwf(is+1)) = wk(indpf(is))/uc c call vtriad (n,wk(indw(is+1)),xxx,1.0/uc,wk(indpt(is)),2) go to 572 end if wfnew = wk(indpf(is)) ibeg = max0(1,is+1-ns1) iend = is do 574 i = ibeg, iend if (i .eq. ibeg) call vtriad (n,wk(indw(is+1)),wk(indpt(is)), a -wk(indu(i,is+1))*wk(indwf(i))/wfnew,wk(indw(i)),1) if (i .ne. ibeg) call vtriad (n,wk(indw(is+1)),wk(indw(is+1)), a -wk(indu(i,is+1))*wk(indwf(i))/wfnew,wk(indw(i)),1) 574 continue wk(indwf(is+1)) = wfnew/uc if (abs(wk(indwf(is+1))).lt.srelpr .or. a abs(wk(indwf(is+1))).gt.1e0/srelpr) then call vtriad (n,wk(indw(is+1)),xxx,wk(indwf(is+1)), a wk(indw(is+1)),2) wk(indwf(is+1)) = 1e0 end if 572 continue c c---get new zc entries--- wk(indzc(is+2)) = -wk(inds(is+1))*wk(indzc(is+1)) wk(indzc(is+1)) = wk(indc(is+1))*wk(indzc(is+1)) c c------------------- u-vector computation section --------------------- c if (trunc) then c c---truncated case--- call vtriad (n,u,u,wk(indzc(is+1))*wk(indwf(is+1)), . wk(indw(is+1)),1) else c c---non-truncated case--- if (.not.(uneed .or. rstrt)) go to 410 iynew = iv1 nwusd = max0(nwusd,iynew+ns2e-1) if (nwusd .gt. nw) go to 999 c ---do back solve on u-matrix--- nm = is + 1 do 623 i = nm, 1, -1 sum = wk(indzc(i)) do 624 j = i+1, nm 624 sum = sum - wk(iynew-1+j)*wk(indu(i,j)) 623 wk(iynew-1+i) = sum/wk(indu(i,i)) c ---form iterate--- do 625 i = 0, nm-1 val = wk(iynew+i) if (uneed .and. i.ne.nm-1) val = val - wk(iy+i) 625 call vtriad (n,u,u,val*wk(indpf(i)),wk(indpt(i)),1) if (uneed) call vcopy (nm,wk(iynew),wk(iy)) endif 410 continue c c--------------------- residual computation section ------------------- c zhvold = zhave zhave = .false. if (trunc) go to 671 c c---non-truncated case--- c c do it if resid needed by pstop or if restarting. if (zneed .or. rstrt) then ipznew = iv1 nwusd = max0(nwusd,ipznew+ns2e) if (nwusd .gt. nw) go to 999 call vcopy (is+1,wk(izc),wk(ipznew)) wk(ipznew+is+1) = 0e0 c ---apply rotations--- do 644 i = is+1, 1, -1 v1 = wk(indc(i))*wk(ipznew+i-1) - wk(inds(i))*wk(ipznew+i) v2 = wk(inds(i))*wk(ipznew+i-1) + wk(indc(i))*wk(ipznew+i) wk(ipznew+i-1) = v1 644 wk(ipznew+i) = v2 c ---form resid--- do 645 i = 0, is+1 val = wk(ipznew+i) if (zhvold .and. i.ne.is+1) val = val - wk(ipz) 645 call vtriad (n,wk(iz),wk(iz),-val*wk(indpf(i)), a wk(indp(i)),1) call vcopy (is+2,wk(ipznew),wk(ipz)) zhave = .true. end if go to 425 c c---truncated case--- c c do it if pstop needs it or if we may restart later. 671 if ( zneed .or. (itmax.gt.ns2) ) then c ---update xi--- if (is .eq. 0) then call vcopy (n,wk(indp(is)),wk(ixi)) xif = wk(indpf(is)) else xif = xif*(-wk(inds(is))) call vtriad (n,wk(ixi),wk(ixi), a wk(indc(is))*wk(indpf(is))/xif,wk(indp(is)),1) end if if (abs(xif).lt.srelpr .or. a abs(xif).gt.1e0/srelpr) then call vtriad (n,wk(ixi),xxx,xif,wk(ixi),2) xif = 1e0 end if c ---form resid--- call vtriad (n,wk(iz),wk(iz), . -wk(indzc(is+1))*wk(indc(is+1))*xif,wk(ixi),1) call vtriad (n,wk(iz),wk(iz), . -wk(indzc(is+1))*wk(inds(is+1))*wk(indpf(is+1)), a wk(indp(is+1)),1) zhave = .true. endif 425 continue c c---get resid norm--- c if (exact) then zdot = wk(indzc(is+2))**2 end if c c--------------------------------ev est------------------------------- c if (evadpt) then nwhe = nw - (iv1-1) call hesest (wk(ihess),nhess,nv+2,is+1,iadpt,havest, a emaxnw,eminnw,wk(iv1),nwhe,ier) nwusd = max0(nwusd,iv1-1+nwhe) if (ier .ne. 0) go to 996 if (.not. havest) go to 874 if (hadest) go to 876 if (maxadd) emax = emaxnw if (minadd) emin = eminnw hadest = .true. go to 874 876 if (maxadd) emax = amax1 (emax,emaxnw) if (minadd) emin = amin1 (emin,eminnw) end if c c-------------------------finish up the iteration---------------------- c 874 in = in + 1 is = is + 1 if (rstrt) is = 0 rstrtd = rstrt go to 10 c c---------------------------------------------------------------------- c------------------------------wrap it up------------------------------ c---------------------------------------------------------------------- c c---form u, if not up-to-date--- c 900 if (uneed .or. rstrtd .or. trunc) go to 901 iynew = iv1 nwusd = max0(nwusd,iynew+ns2e-1) if (nwusd .gt. nw) go to 999 c ---do back solve on u-matrix--- nm = is do 663 i = nm, 1, -1 sum = wk(indzc(i)) do 664 j = i+1, nm 664 sum = sum - wk(iynew-1+j)*wk(indu(i,j)) 663 wk(iynew-1+i) = sum/wk(indu(i,i)) c ---form iterate--- do 665 i = 0, nm-1 val = wk(iynew+i) 665 call vtriad (n,u,u,val*wk(indpf(i)),wk(indpt(i)),1) c c---------------------------------------------------------------------- c-----------------------------head out of here------------------------- c---------------------------------------------------------------------- c 901 continue if (halt) go to 715 ier = 1 call ershow (ier,'gmresw') zeta = stptst go to 725 715 continue if (level .ge. 1) write (nout,720) in 720 format (/' gmres converged in ',i5,' iterations.') c 725 continue if (idgts .lt. 0) go to 730 call perror (suba,coef,jcoef,wfac,jwfac,n,u,rhs,wk,digit1, a digit2,idgts) 730 t2 = timer (dummy) timit = t2 - t1 iparm(2) = in rparm(1) = zeta rparm(2) = emax rparm(3) = emin rparm(6) = timit rparm(7) = digit1 rparm(8) = digit2 735 continue if (level .ge. 3) call echall (n,iparm,rparm,2,2,ier) nw = nwusd return c c----------------------------error returns----------------------------- c 994 ier = -15 call ershow (ier,'gmresw') return c 995 ier = -16 call ershow (ier,'gmresw') return c 996 call ershow (ier,'gmresw') go to 735 c 997 ier = -13 call ershow (ier,'gmresw') go to 725 c 998 ier = -14 call ershow (ier,'gmresw') go to 725 c 999 ier = -2 call ershow (ier,'gmresw') go to 735 c end subroutine uslqw (suba,subat,subql,subqlt,subqr,subqrt, a coef,jcoef,wfac,jwfac,n,u,ubar,rhs,wk,nw,iparm,rparm,ier) c c code to run the usymlq algorithm. see: m. a. saunders, h. d. simon c and e. l. yip, "two conjugate-gradient-type methods for sparse c unsymmetric linear equations, report eta-tr-18, boeing computer c services, seattle, washington, 1984, to appear in siam journal on c numerical analysis. c c note -- this routine is still not quite optimal. c dimension u(1), ubar(1), rhs(1), wk(1), coef(1), jcoef(2), a wfac(1), jwfac(1) integer vect1, vect2, os logical uneed external suba, subat, subql, subqlt, subqr, subqrt dimension iparm(30), rparm(30) logical iql, iqr c c *** begin -- package common c common / itcom1 / in, itmax, level, nout, ns1, ns2, ns3, a iplr, iqlr, ntest, is, iacel, idgts, nbl1d, nbl2d logical halt, maxadp, minadp, maxadd, minadd common / itcom2 / halt, maxadp, minadp, maxadd, minadd common / itcom3 / alpha, beta, zeta, emax, emin, pap, b alphao, gamma, sigma, rr, rho, dkq, dkm1, b ff, rqmin, rqmax, stptst, udnm, ubarnm, b bnorm, bnorm1 common / itcom4 / keygs, srelpr, keyzer common / itcom9 / rhave, zhave, zthave, rcalp, zcalp, ztcalp, a udhav, rdhav, rzhav, rzthav, zdhav, zzthav, ztdhav, a rdot, rzdot, rztdot, zdot, zztdot, ztdot logical rhave, zhave, zthave, rcalp, zcalp, ztcalp, a udhav, rdhav, rzhav, rzthav, zdhav, zzthav, ztdhav c c *** end -- package common c