c     $Id: convsetsf,v 1.1.2.1 2003/03/13 18:56:54 muchomas Exp $
c     $Log: convsetsf,v $
c     Revision 1.1.2.1  2003/03/13 18:56:54  muchomas
c     Move convsets.f to fortran-source text file convsetsf for use with f2c.
c     Put f2c result in convsets.c, so as to eliminate need for fortran compilers.
c
c     Revision 1.1.2.2  2002/10/28 22:07:29  matthew
c     Non-orthogonal Laplaciancvs add coeff_d.c!  Works for orthorhombic cells, still in need of debugging for other unit cells.
c
c     Revision 1.1.1.1  2001/04/18 17:15:14  daykov
c     starting project
c
c     Revision 1.1  1999/09/14 19:40:30  torkel
c     Initial revision
c
c     Revision 1.12  1999/07/04 20:48:04  torkel
c     Full O implemented.
c
c     Revision 1.11  1999/07/02 07:04:05  torkel
c     the convolution in Osame seems to work for this version.
c
c     Revision 1.10  1999/07/02 03:11:48  torkel
c     In the middle of making Osame work
c
c     Revision 1.9  1999/06/23 22:20:05  torkel
c     revised version where Obelow is dagger of Oabove for grid without
c     >> siblings.
c
c     Revision 1.8  1999/06/21 18:56:14  torkel
c     updated version for multilevel operators
c
c     Revision 1.7  1999/06/15 23:24:45  torkel
c     debugging, trying to make Obelow work
c
c     Revision 1.6  1999/06/11 12:53:50  torkel
c     including convolutions for obelow
c
c     Revision 1.4  1999/04/25 22:32:56  muchomas
c     *** empty log message ***
c
c     Revision 1.3  1999/04/25 18:35:39  muchomas
c     Contains new routines for same-level operator convolutions
c     Working toward more arbitrary length convolution routines
c
c     Revision 1.2  1999/04/20 15:59:37  muchomas
c     Fully documented code
c
c     Revision 1.1  1999/04/19 18:12:11  muchomas
c     Initial revision
c
c     Revision 1.6  1999/04/17 22:00:09  muchomas
c     Working dagger operators, still slow...
c
c     Revision 1.5  1999/04/14 19:48:37  muchomas
c     Working transpose transform, but doesn't take input/output arrays -- yet!
c
c     Revision 1.4  1999/04/13 15:11:36  muchomas
c     Does ghosts, making compact version, but still in larger array
c
c     Revision 1.3  1999/04/13 14:32:53  muchomas
c     Idag that computes only only ghosts, leaving sparse data distribution
c
c     Revision 1.2  1999/04/11 19:53:46  muchomas
c     Working forward/inverse xform package
c
c     Revision 1.1  1999/04/10 23:04:29  muchomas
c     Initial revision
c
c     Revision 1.3  1999/04/10 22:40:41  muchomas
c     Now using log functionality
c


c     Back and forth, linear convolution of sets of points using padding
c        for filters of extent +/- 3 (IN PLACE)
c
c     out: input/output array for in place computation
c     outm3,outm2,outm1,outp1,outp2,outp3: pointers in output array to
c         start of data sets, 3 previous, 2 previous, 1 previous, 1 forward,
c         2 forward, 3 forward
c     area: total size of each set
c     stride: separation between data sets (stride=&out-&outm1)
c     n: actual number of data sets being convolved
c     npad: size of padded region needed for "leakage" of foward filters
c     tr: reverse-time filter coefficients	
c     tf: forward-time filter coefficients
      subroutine convsets3(out,outm3,outm2,outm1,outp1,outp2,outp3,
     &     area,stride,n,npad,tr,tf)
      implicit none

      double precision out(*)
      double precision outm3(*),outm2(*),outm1(*)
      double precision outp1(*),outp2(*),outp3(*)
      integer stride,area,n,npad
      double precision tr(*),tf(*)
      double precision t0,t1,t2,t3

      integer i,j,out2

      
c     First, make sure pad is zero
      do i=n,npad-1
         out2=i*stride
         do j=1+out2,area+out2
            out(j)=0.
         enddo
      enddo
         
c     Reverse phase
      t0=tr(1)
      t1=tr(2)
      t2=tr(3)
      t3=tr(4)
      do i=npad-1,3,-1
         out2=i*stride
         do j=1+out2,area+out2
            out(j)=
     &           t0*out(j)+t1*outm1(j)+t2*outm2(j)+t3*outm3(j)
         enddo
      enddo

      out2=2*stride
      do j=1+out2,area+out2
         out(j)=
     &        t0*out(j)+t1*outm1(j)+t2*outm2(j)
      enddo

      out2=stride
      do j=1+out2,area+out2
         out(j)=
     &        t0*out(j)+t1*outm1(j)
      enddo
      
      out2=0
      do j=1+out2,area+out2
         out(j)=
     &        t0*out(j)
      enddo

c     Forward phase (padding now ensures room!)
      t0=tf(1)
      t1=tf(2)
      t2=tf(3)
      t3=tf(4)
      do i=0,n-1
         out2=i*stride

         do j=1+out2,area+out2
            out(j)=
     &           t0*out(j)+t1*outp1(j)+t2*outp2(j)+t3*outp3(j)
         enddo
      enddo

      return
      end


c     Back and forth, linear convolution of sets of points using padding
c        for filters of extent +/- 5 (IN PLACE)
c
c     out: input/output array for in place computation
c     outm5-outp5,out: pointers in in/output array to
c         start of data sets
c     area: total size of each set
c     stride: separation between data sets (stride=&out-&outm1)
c     n: actual number of data sets being convolved
c     npad: size of padded region needed for "leakage" of foward filters
c     tr: reverse-time filter coefficients	
c     tf: forward-time filter coefficients
      subroutine convsets5(out,outm5,outm4,outm3,outm2,outm1,
     &     outp1,outp2,outp3,outp4,outp5,
     &     area,stride,n,npad,tr,tf)
      implicit none

      double precision out(*)
      double precision outm5(*),outm4(*),outm3(*),outm2(*),outm1(*)
      double precision outp1(*),outp2(*),outp3(*),outp4(*),outp5(*)
      integer stride,area,n,npad
      double precision tr(*),tf(*)
      double precision t0,t1,t2,t3,t4,t5

      integer i,j,out2

      
c     First, make sure pad is zero
      do i=n,npad-1
         out2=i*stride
         do j=1+out2,area+out2
            out(j)=0.
         enddo
      enddo
      
c     Reverse phase
      t0=tr(1)
      t1=tr(2)
      t2=tr(3)
      t3=tr(4)
      t4=tr(5)
      t5=tr(6)


      do i=npad-1,5,-1
         out2=i*stride
         do j=1+out2,area+out2
            out(j)=
     $           t0*out(j)+t1*outm1(j)+t2*outm2(j)+t3*outm3(j)
     $           +t4*outm4(j)+t5*outm5(j)
         enddo
      enddo
      
      out2=4*stride
      do j=1+out2,area+out2
         out(j)=
     &        t0*out(j)+t1*outm1(j)+t2*outm2(j)+t3*outm3(j)+t4*outm4(j)
      enddo

      out2=3*stride
      do j=1+out2,area+out2
         out(j)=
     &        t0*out(j)+t1*outm1(j)+t2*outm2(j)+t3*outm3(j)
      enddo

      out2=2*stride
      do j=1+out2,area+out2
         out(j)=
     &        t0*out(j)+t1*outm1(j)+t2*outm2(j)
      enddo

      out2=1*stride
      do j=1+out2,area+out2
         out(j)=
     &        t0*out(j)+t1*outm1(j)
      enddo
      
      out2=0*stride
      do j=1+out2,area+out2
         out(j)=
     &        t0*out(j)
      enddo

c     Forward phase (padding now ensures room!)
      t0=tf(1)
      t1=tf(2)
      t2=tf(3)
      t3=tf(4)
      t4=tf(5)
      t5=tf(6)
      do i=0,n-1
         out2=i*stride

         do j=1+out2,area+out2
            out(j)=
     &           t0*out(j)+t1*outp1(j)+t2*outp2(j)+t3*outp3(j)
     $           +t4*outp4(j)+t5*outp5(j)
         enddo
      enddo

      return
      end


c     Back and forth, linear convolution of sets of points using padding
c        for filters of extent +/- 5 (IN PLACE)
c
c     out: input/output array for in place computation
c     outm5-outp5,out: pointers in in/output array to
c         start of data sets
c     area: total size of each set
c     stride: separation between data sets (stride=&out-&outm1)
c     n: actual number of data sets being convolved
c     npad: size of padded region needed for "leakage" of foward filters
c     tr: reverse-time filter coefficients	
c     tf: forward-time filter coefficients
      subroutine convsetsper5(out,outm5,outm4,outm3,outm2,outm1,
     &     outp1,outp2,outp3,outp4,outp5,
     &     area,stride,n,npad,tr,tf)
      implicit none
      
      double precision out(*)
      double precision outm5(*),outm4(*),outm3(*),outm2(*),outm1(*)
      double precision outp1(*),outp2(*),outp3(*),outp4(*),outp5(*)
      integer stride,area,n,npad
      double precision tr(*),tf(*)
      double precision t0,t1,t2,t3,t4,t5

      integer i,j,out2

      
c     First, make sure pad is zero
      do i=n,npad-1
         out2=i*stride
         do j=1+out2,area+out2
            out(j)=0.
         enddo
      enddo
      
c     Reverse phase
      t0=tr(1)
      t1=tr(2)
      t2=tr(3)
      t3=tr(4)
      t4=tr(5)
      t5=tr(6)

      i=npad-1
      out2=i*stride
      do j=1+out2,area+out2
         out(j)=t0*out(j-(i-4)*stride)+t1*out(j-(i-3)*stride)
     $        +t2*out(j-(i-2)*stride)+t3*out(j-(i-1)*stride)
     $        +t4*out(j-i*stride)+t5*outm5(j)
      enddo

      i=npad-2
      out2=i*stride
      do j=1+out2,area+out2
         out(j)=t0*out(j-(i-3)*stride)+t1*out(j-(i-2)*stride)
     $        +t2*out(j-(i-1)*stride)+t3*out(j-i*stride)
     $        +t4*outm4(j)+t5*outm5(j)
      enddo

      i=npad-3
      out2=i*stride
      do j=1+out2,area+out2
         out(j)=t0*out(j-(i-2)*stride)+t1*out(j-(i-1)*stride)
     $        +t2*out(j-i*stride)+t3*outm3(j)
     $        +t4*outm4(j)+t5*outm5(j)
      enddo

      i=npad-4
      out2=i*stride
      do j=1+out2,area+out2
         out(j)=t0*out(j-(i-1)*stride)+t1*out(j-i*stride)
     $        +t2*outm2(j)+t3*outm3(j)
     $        +t4*outm4(j)+t5*outm5(j)
      enddo

      i=npad-5
      out2=i*stride
      do j=1+out2,area+out2
         out(j)=t0*out(j-i*stride)+t1*outm1(j)
     $        +t2*outm2(j)+t3*outm3(j)
     $        +t4*outm4(j)+t5*outm5(j)
      enddo

      do i=npad-6,5,-1
         out2=i*stride
         do j=1+out2,area+out2
            out(j)=
     $           t0*out(j)+t1*outm1(j)+t2*outm2(j)+t3*outm3(j)
     $           +t4*outm4(j)+t5*outm5(j)
         enddo
      enddo
      
      out2=4*stride
      do j=1+out2,area+out2
         out(j)=out(j+(npad-5)*stride)
c     &        t0*out(j)+t1*outm1(j)+t2*outm2(j)+t3*outm3(j)+t4*outm4(j)
      enddo

      out2=3*stride
      do j=1+out2,area+out2
         out(j)=out(j+(npad-5)*stride)
c     &        t0*out(j)+t1*outm1(j)+t2*outm2(j)+t3*outm3(j)
      enddo

      out2=2*stride
      do j=1+out2,area+out2
         out(j)=out(j+(npad-5)*stride)
c     &        t0*out(j)+t1*outm1(j)+t2*outm2(j)
      enddo

      out2=1*stride
      do j=1+out2,area+out2
         out(j)=out(j+(npad-5)*stride)
c     &        t0*out(j)+t1*outm1(j)
      enddo
      
      out2=0*stride
      do j=1+out2,area+out2
         out(j)=out(j+(npad-5)*stride)
c     &        t0*out(j)
      enddo

c     Forward phase (padding now ensures room!)
      t0=tf(1)
      t1=tf(2)
      t2=tf(3)
      t3=tf(4)
      t4=tf(5)
      t5=tf(6)
      do i=0,n-1
         out2=i*stride

         do j=1+out2,area+out2
            out(j)=
     &           t0*out(j)+t1*outp1(j)+t2*outp2(j)+t3*outp3(j)
     $           +t4*outp4(j)+t5*outp5(j)
         enddo
      enddo

      return
      end


c     Back and forth, linear convolution of sets of points using padding
c        for filters of extent +/- 5 (IN PLACE)
c
c     out: input/output array for in place computation
c     outm5-outp5,out: pointers in in/output array to
c         start of data sets
c     area: total size of each set
c     stride: separation between data sets (stride=&out-&outm1)
c     n: actual number of data sets being convolved
c     npad: size of padded region needed for "leakage" of foward filters
c     tr: reverse-time filter coefficients	
c     tf: forward-time filter coefficients
      subroutine convsetsio5(datout,
     &     work,workm5,workm4,workm3,workm2,workm1,
     &     workp1,workp2,workp3,workp4,workp5,
     &     area,stride,n,npad,
     &     stridel,tr,tf)
      
      double precision datout(*)
      double precision work(*)
      double precision workm5(*),workm4(*),workm3(*),workm2(*),workm1(*)
      double precision workp1(*),workp2(*),workp3(*),workp4(*),workp5(*)
      integer stride,area,n,npad,stridel
      double precision tr(*),tf(*)
      double precision t0,t1,t2,t3,t4,t5

      integer i,j,work2,out2s,jl

c     First, make sure pad is zero
      do i=n,npad-1
         work2=i*stride
         do j=1+work2,area+work2
            work(j)=0.
         enddo
      enddo
      
c     Reverse phase
      t0=tr(1)
      t1=tr(2)
      t2=tr(3)
      t3=tr(4)
      t4=tr(5)
      t5=tr(6)
      
      
      do i=npad-1,5,-1
         work2=i*stride
         do j=1+work2,area+work2
            work(j)=
     $           t0*work(j)+t1*workm1(j)+t2*workm2(j)+t3*workm3(j)
     $           +t4*workm4(j)+t5*workm5(j)
         enddo
      enddo
      
      work2=4*stride
      do j=1+work2,area+work2
         work(j)=
     &        t0*work(j)+t1*workm1(j)+t2*workm2(j)
     &        +t3*workm3(j)+t4*workm4(j)
      enddo
      
      work2=3*stride
      do j=1+work2,area+work2
         work(j)=
     &        t0*work(j)+t1*workm1(j)+t2*workm2(j)+t3*workm3(j)
      enddo
      
      work2=2*stride
      do j=1+work2,area+work2
         work(j)=
     &        t0*work(j)+t1*workm1(j)+t2*workm2(j)
      enddo
      
      work2=1*stride
      do j=1+work2,area+work2
         work(j)=
     &        t0*work(j)+t1*workm1(j)
      enddo
      
      work2=0*stride
      do j=1+work2,area+work2
         work(j)=
     &        t0*work(j)
      enddo

c     Forward phase (padding now ensures room!)
      t0=tf(1)
      t1=tf(2)
      t2=tf(3)
      t3=tf(4)
      t4=tf(5)
      t5=tf(6)
      do i=0,n-1
         work2=i*stride
         out2s=i*(stridel-stride)

         do j=1+work2,area+work2
            jl=j+out2s
            datout(jl)=
     &           t0*work(j)+t1*workp1(j)+t2*workp2(j)+t3*workp3(j)
     &           +t4*workp4(j)+t5*workp5(j)
         enddo
      enddo

      return
      end


c     Back and forth, linear convolution of sets of points using padding
c        for filters of extent +/- 5 (IN PLACE)
c
c     out: input/output array for in place computation
c     outm5-outp5,out: pointers in in/output array to
c         start of data sets
c     area: total size of each set
c     stride: separation between data sets (stride=&out-&outm1)
c     n: actual number of data sets being convolved
c     npad: size of padded region needed for "leakage" of foward filters
c     tr: reverse-time filter coefficients	
c     tf: forward-time filter coefficients
      subroutine convsetsio5full(datout,
     &     work,workm5,workm4,workm3,workm2,workm1,
     &     workp1,workp2,workp3,workp4,workp5,
     &     area,stride,n,npad,
     &     stridel,filter)
      
      double precision datout(*)
      double precision work(*)
      double precision workm5(*),workm4(*),workm3(*),workm2(*),workm1(*)
      double precision workp1(*),workp2(*),workp3(*),workp4(*),workp5(*)
      integer stride,area,n,npad,stridel
      double precision filter(*)
      double precision t0,t1,t2,t3,t4,t5

      integer i,j,work2,out2s,jl

c     First, make sure pad is zero
      do i=n,npad-1
         work2=i*stride
         do j=1+work2,area+work2
            work(j)=0.
         enddo
      enddo
      
c     Reverse phase
      t0=filter(1)
      t1=filter(2)
      t2=filter(3)
      t3=filter(4)
      t4=filter(5)
      t5=filter(6)

      if (t0 /= 0.0) then
c     symmetric filter
      i=0
      work2=i*stride
      out2s=i*(stridel-stride)
      
      do j=1+work2,area+work2
         jl=j+out2s
         datout(jl)=t0*work(j)+t1*workp1(j)+t2*workp2(j)+t3*workp3(j)
     &        +t4*workp4(j)+t5*workp5(j)
      enddo

      i=1
      work2=i*stride
      out2s=i*(stridel-stride)
      
      do j=1+work2,area+work2
         jl=j+out2s
         datout(jl)=t1*workm1(j)
     &        +t0*work(j)+t1*workp1(j)+t2*workp2(j)+t3*workp3(j)
     &        +t4*workp4(j)+t5*workp5(j)
      enddo

      i=2
      work2=i*stride
      out2s=i*(stridel-stride)
      
      do j=1+work2,area+work2
         jl=j+out2s
         datout(jl)=t2*workm2(j)+t1*workm1(j)
     &        +t0*work(j)+t1*workp1(j)+t2*workp2(j)+t3*workp3(j)
     &        +t4*workp4(j)+t5*workp5(j)
      enddo

      i=3
      work2=i*stride
      out2s=i*(stridel-stride)
      
      do j=1+work2,area+work2
         jl=j+out2s
         datout(jl)=t3*workm3(j)
     &        +t2*workm2(j)+t1*workm1(j)
     &        +t0*work(j)+t1*workp1(j)+t2*workp2(j)+t3*workp3(j)
     &        +t4*workp4(j)+t5*workp5(j)
      enddo
      
      i=4
      work2=i*stride
      out2s=i*(stridel-stride)

      do j=1+work2,area+work2
         jl=j+out2s
         datout(jl)=t4*workm4(j)+t3*workm3(j)
     &        +t2*workm2(j)+t1*workm1(j)
     &        +t0*work(j)+t1*workp1(j)+t2*workp2(j)+t3*workp3(j)
     &        +t4*workp4(j)+t5*workp5(j)
      enddo
      
      do i=5,n-6
         work2=i*stride
         out2s=i*(stridel-stride)
         
         do j=1+work2,area+work2
            jl=j+out2s
            datout(jl)=t5*workm5(j)+t4*workm4(j)+t3*workm3(j)
     &           +t2*workm2(j)+t1*workm1(j)
     &           +t0*work(j)+t1*workp1(j)+t2*workp2(j)+t3*workp3(j)
     &           +t4*workp4(j)+t5*workp5(j)
         enddo
      enddo
      
      i=n-5
      work2=i*stride
      out2s=i*(stridel-stride)

      do j=1+work2,area+work2
         jl=j+out2s
         datout(jl)=t5*workm5(j)+t4*workm4(j)+t3*workm3(j)
     &        +t2*workm2(j)+t1*workm1(j)
     &        +t0*work(j)+t1*workp1(j)+t2*workp2(j)+t3*workp3(j)
     &        +t4*workp4(j)
      enddo

      i=n-4
      work2=i*stride
      out2s=i*(stridel-stride)
      
      do j=1+work2,area+work2
         jl=j+out2s
         datout(jl)=t5*workm5(j)+t4*workm4(j)+t3*workm3(j)
     &        +t2*workm2(j)+t1*workm1(j)
     &        +t0*work(j)+t1*workp1(j)+t2*workp2(j)+t3*workp3(j)
      enddo

      i=n-3
      work2=i*stride
      out2s=i*(stridel-stride)

      do j=1+work2,area+work2
         jl=j+out2s
         datout(jl)=t5*workm5(j)+t4*workm4(j)+t3*workm3(j)
     &        +t2*workm2(j)+t1*workm1(j)
     &        +t0*work(j)+t1*workp1(j)+t2*workp2(j)
      enddo

      i=n-2
      work2=i*stride
      out2s=i*(stridel-stride)
      
      do j=1+work2,area+work2
         jl=j+out2s
         datout(jl)=t5*workm5(j)+t4*workm4(j)+t3*workm3(j)
     &        +t2*workm2(j)+t1*workm1(j)
     &        +t0*work(j)+t1*workp1(j)
      enddo
      
      i=n-1
      work2=i*stride
      out2s=i*(stridel-stride)

      do j=1+work2,area+work2
         jl=j+out2s
         datout(jl)=t5*workm5(j)+t4*workm4(j)+t3*workm3(j)
     &        +t2*workm2(j)+t1*workm1(j)+t0*work(j)
      enddo
c     end of symmetric part
      else
c     antisymmetric filter
      i=0
      work2=i*stride
      out2s=i*(stridel-stride)
      
      do j=1+work2,area+work2
         jl=j+out2s
         datout(jl)=t0*work(j)+t1*workp1(j)+t2*workp2(j)+t3*workp3(j)
     &        +t4*workp4(j)+t5*workp5(j)
      enddo

      i=1
      work2=i*stride
      out2s=i*(stridel-stride)
      
      do j=1+work2,area+work2
         jl=j+out2s
         datout(jl)=-1.*t1*workm1(j)
     &        +t0*work(j)+t1*workp1(j)+t2*workp2(j)+t3*workp3(j)
     &        +t4*workp4(j)+t5*workp5(j)
      enddo

      i=2
      work2=i*stride
      out2s=i*(stridel-stride)
      
      do j=1+work2,area+work2
         jl=j+out2s
         datout(jl)=-1.*t2*workm2(j)-t1*workm1(j)
     &        +t0*work(j)+t1*workp1(j)+t2*workp2(j)+t3*workp3(j)
     &        +t4*workp4(j)+t5*workp5(j)
      enddo

      i=3
      work2=i*stride
      out2s=i*(stridel-stride)
      
      do j=1+work2,area+work2
         jl=j+out2s
         datout(jl)=-1.*t3*workm3(j)
     &        -t2*workm2(j)-t1*workm1(j)
     &        +t0*work(j)+t1*workp1(j)+t2*workp2(j)+t3*workp3(j)
     &        +t4*workp4(j)+t5*workp5(j)
      enddo
      
      i=4
      work2=i*stride
      out2s=i*(stridel-stride)

      do j=1+work2,area+work2
         jl=j+out2s
         datout(jl)=-1.*t4*workm4(j)-t3*workm3(j)
     &        -t2*workm2(j)-t1*workm1(j)
     &        +t0*work(j)+t1*workp1(j)+t2*workp2(j)+t3*workp3(j)
     &        +t4*workp4(j)+t5*workp5(j)
      enddo
      
      do i=5,n-6
         work2=i*stride
         out2s=i*(stridel-stride)
         
         do j=1+work2,area+work2
            jl=j+out2s
            datout(jl)=-1.*t5*workm5(j)-t4*workm4(j)-t3*workm3(j)
     &           -t2*workm2(j)-t1*workm1(j)
     &           +t0*work(j)+t1*workp1(j)+t2*workp2(j)+t3*workp3(j)
     &           +t4*workp4(j)+t5*workp5(j)
         enddo
      enddo
      
      i=n-5
      work2=i*stride
      out2s=i*(stridel-stride)

      do j=1+work2,area+work2
         jl=j+out2s
         datout(jl)=-1.*t5*workm5(j)-t4*workm4(j)-t3*workm3(j)
     &        -t2*workm2(j)-t1*workm1(j)
     &        +t0*work(j)+t1*workp1(j)+t2*workp2(j)+t3*workp3(j)
     &        +t4*workp4(j)
      enddo

      i=n-4
      work2=i*stride
      out2s=i*(stridel-stride)
      
      do j=1+work2,area+work2
         jl=j+out2s
         datout(jl)=-1.*t5*workm5(j)-t4*workm4(j)-t3*workm3(j)
     &        -t2*workm2(j)-t1*workm1(j)
     &        +t0*work(j)+t1*workp1(j)+t2*workp2(j)+t3*workp3(j)
      enddo

      i=n-3
      work2=i*stride
      out2s=i*(stridel-stride)

      do j=1+work2,area+work2
         jl=j+out2s
         datout(jl)=-1.*t5*workm5(j)-t4*workm4(j)-t3*workm3(j)
     &        -t2*workm2(j)-t1*workm1(j)
     &        +t0*work(j)+t1*workp1(j)+t2*workp2(j)
      enddo

      i=n-2
      work2=i*stride
      out2s=i*(stridel-stride)
      
      do j=1+work2,area+work2
         jl=j+out2s
         datout(jl)=-1.*t5*workm5(j)-t4*workm4(j)-t3*workm3(j)
     &        -t2*workm2(j)-t1*workm1(j)
     &        +t0*work(j)+t1*workp1(j)
      enddo
      
      i=n-1
      work2=i*stride
      out2s=i*(stridel-stride)

      do j=1+work2,area+work2
         jl=j+out2s
         datout(jl)=-1.*t5*workm5(j)-t4*workm4(j)-t3*workm3(j)
     &        -t2*workm2(j)-t1*workm1(j)+t0*work(j)
      enddo
c     end of antisymmetric part
      endif

      return
      end


c     Back and forth, linear convolution of sets of points using padding
c        for filters of extent +/- 5 (IN PLACE)
c
c     out: input/output array for in place computation
c     outm5-outp5,out: pointers in in/output array to
c         start of data sets
c     area: total size of each set
c     stride: separation between data sets (stride=&out-&outm1)
c     n: actual number of data sets being convolved
c     npad: size of padded region needed for "leakage" of foward filters
c     tr: reverse-time filter coefficients	
c     tf: forward-time filter coefficients
      subroutine convsetsioppercum5(datout,
     &     work,workm5,workm4,workm3,workm2,workm1,
     &     workp1,workp2,workp3,workp4,workp5,
     &     area,stride,n,npad,
     &     stridel,tr,tf)
      
      
      double precision datout(*)
      double precision work(*)
      double precision workm5(*),workm4(*),workm3(*),workm2(*),workm1(*)
      double precision workp1(*),workp2(*),workp3(*),workp4(*),workp5(*)
      integer stride,area,n,npad,stridel
      double precision tr(*),tf(*)
      double precision t0,t1,t2,t3,t4,t5

      integer i,j,work2,out2s,jl

c     First, make sure pad is zero
      do i=n,npad-1
         work2=i*stride
         do j=1+work2,area+work2
            work(j)=0.
         enddo
      enddo
      
c     Reverse phase
      t0=tr(1)
      t1=tr(2)
      t2=tr(3)
      t3=tr(4)
      t4=tr(5)
      t5=tr(6)

      i=npad-1
      work2=i*stride
      do j=1+work2,area+work2
         work(j)=t0*work(j-(i-4)*stride)+t1*work(j-(i-3)*stride)
     $        +t2*work(j-(i-2)*stride)+t3*work(j-(i-1)*stride)
     $        +t4*work(j-i*stride)+t5*workm5(j)
      enddo

      i=npad-2
      work2=i*stride
      do j=1+work2,area+work2
         work(j)=t0*work(j-(i-3)*stride)+t1*work(j-(i-2)*stride)
     $        +t2*work(j-(i-1)*stride)+t3*work(j-i*stride)
     $        +t4*workm4(j)+t5*workm5(j)
      enddo

      i=npad-3
      work2=i*stride
      do j=1+work2,area+work2
         work(j)=t0*work(j-(i-2)*stride)+t1*work(j-(i-1)*stride)
     $        +t2*work(j-i*stride)+t3*workm3(j)
     $        +t4*workm4(j)+t5*workm5(j)
      enddo

      i=npad-4
      work2=i*stride
      do j=1+work2,area+work2
         work(j)=t0*work(j-(i-1)*stride)+t1*work(j-i*stride)
     $        +t2*workm2(j)+t3*workm3(j)
     $        +t4*workm4(j)+t5*workm5(j)
      enddo

      i=npad-5
      work2=i*stride
      do j=1+work2,area+work2
         work(j)=t0*work(j-i*stride)+t1*workm1(j)
     $        +t2*workm2(j)+t3*workm3(j)
     $        +t4*workm4(j)+t5*workm5(j)
      enddo

      do i=npad-6,5,-1
         work2=i*stride
         do j=1+work2,area+work2
            work(j)=
     $           t0*work(j)+t1*workm1(j)+t2*workm2(j)+t3*workm3(j)
     $           +t4*workm4(j)+t5*workm5(j)
         enddo
      enddo
      
      work2=4*stride
      do j=1+work2,area+work2
         work(j)=work(j+(npad-5)*stride)
c     &        t0*work(j)+t1*workm1(j)+t2*workm2(j)+t3*workm3(j)+t4*workm4(j)
      enddo

      work2=3*stride
      do j=1+work2,area+work2
         work(j)=work(j+(npad-5)*stride)
c     &        t0*work(j)+t1*workm1(j)+t2*workm2(j)+t3*workm3(j)
      enddo

      work2=2*stride
      do j=1+work2,area+work2
         work(j)=work(j+(npad-5)*stride)
c     &        t0*work(j)+t1*workm1(j)+t2*workm2(j)
      enddo

      work2=1*stride
      do j=1+work2,area+work2
         work(j)=work(j+(npad-5)*stride)
c     &        t0*work(j)+t1*workm1(j)
      enddo
      
      work2=0*stride
      do j=1+work2,area+work2
         work(j)=work(j+(npad-5)*stride)
c     &        t0*work(j)
      enddo
           
c     Forward phase (padding now ensures room!)
      t0=tf(1)
      t1=tf(2)
      t2=tf(3)
      t3=tf(4)
      t4=tf(5)
      t5=tf(6)
      do i=0,n-1
         work2=i*stride
         out2s=i*(stridel-stride)

         do j=1+work2,area+work2
            jl=j+out2s
            datout(jl)=
     &           t0*work(j)+t1*workp1(j)+t2*workp2(j)+t3*workp3(j)
     &           +t4*workp4(j)+t5*workp5(j)
         enddo
      enddo

      return
      end


c     Back and forth, linear convolution of sets of points using padding
c        for filters of extent +/- 5 (IN PLACE)
c
c     out: input/output array for in place computation
c     outm5-outp5,out: pointers in in/output array to
c         start of data sets
c     area: total size of each set
c     stride: separation between data sets (stride=&out-&outm1)
c     n: actual number of data sets being convolved
c     npad: size of padded region needed for "leakage" of foward filters
c     tr: reverse-time filter coefficients	
c     tf: forward-time filter coefficients
      subroutine convsetsiop5(datout,
     &     work,workm5,workm4,workm3,workm2,workm1,
     &     workp1,workp2,workp3,workp4,workp5,
     &     area,stride,n,npad,
     &     stridel,tr,tf)
      
      double precision datout(*)
      double precision work(*)
      double precision workm5(*),workm4(*),workm3(*),workm2(*),workm1(*)
      double precision workp1(*),workp2(*),workp3(*),workp4(*),workp5(*)
      integer stride,area,n,npad,stridel
      double precision tr(*),tf(*)
      double precision t0,t1,t2,t3,t4,t5

      integer i,j,work2,out2s,jl

c     First, make sure pad is zero
      do i=n,npad-1
         work2=i*stride
         do j=1+work2,area+work2
            work(j)=0.
         enddo
      enddo
      
c     Reverse phase
      t0=tr(1)
      t1=tr(2)
      t2=tr(3)
      t3=tr(4)
      t4=tr(5)
      t5=tr(6)
      
      
      do i=npad-1,5,-1
         work2=i*stride
         do j=1+work2,area+work2
            work(j)=
     $           t0*work(j)+t1*workm1(j)+t2*workm2(j)+t3*workm3(j)
     $           +t4*workm4(j)+t5*workm5(j)
         enddo
      enddo
      
      work2=4*stride
      do j=1+work2,area+work2
         work(j)=
     &        t0*work(j)+t1*workm1(j)+t2*workm2(j)
     &        +t3*workm3(j)+t4*workm4(j)
      enddo
      
      work2=3*stride
      do j=1+work2,area+work2
         work(j)=
     &        t0*work(j)+t1*workm1(j)+t2*workm2(j)+t3*workm3(j)
      enddo
      
      work2=2*stride
      do j=1+work2,area+work2
         work(j)=
     &        t0*work(j)+t1*workm1(j)+t2*workm2(j)
      enddo
      
      work2=1*stride
      do j=1+work2,area+work2
         work(j)=
     &        t0*work(j)+t1*workm1(j)
      enddo
      
      work2=0*stride
      do j=1+work2,area+work2
         work(j)=
     &        t0*work(j)
      enddo

c     Forward phase (padding now ensures room!)
      t0=tf(1)
      t1=tf(2)
      t2=tf(3)
      t3=tf(4)
      t4=tf(5)
      t5=tf(6)
      do i=0,n-1
         work2=i*stride
         out2s=i*(stridel-stride)

         do j=1+work2,area+work2
            jl=j+out2s
            datout(jl)=datout(jl)+
     &           t0*work(j)+t1*workp1(j)+t2*workp2(j)+t3*workp3(j)
     &           +t4*workp4(j)+t5*workp5(j)
         enddo
      enddo

      return
      end


c     Back and forth, linear convolution of sets of points using padding
c        for filters of extent +/- 5 (IN PLACE)
c
c     out: input/output array for in place computation
c     outm5-outp5,out: pointers in in/output array to
c         start of data sets
c     area: total size of each set
c     stride: separation between data sets (stride=&out-&outm1)
c     n: actual number of data sets being convolved
c     npad: size of padded region needed for "leakage" of foward filters
c     tr: reverse-time filter coefficients	
c     tf: forward-time filter coefficients
      subroutine convsetsiop5full(datout,
     &     work,workm5,workm4,workm3,workm2,workm1,
     &     workp1,workp2,workp3,workp4,workp5,
     &     area,stride,n,npad,
     &     stridel,filter)
      
      double precision datout(*)
      double precision work(*)
      double precision workm5(*),workm4(*),workm3(*),workm2(*),workm1(*)
      double precision workp1(*),workp2(*),workp3(*),workp4(*),workp5(*)
      integer stride,area,n,npad,stridel
      double precision filter(*)
      double precision t0,t1,t2,t3,t4,t5

      integer i,j,work2,out2s,jl

c     First, make sure pad is zero
      do i=n,npad-1
         work2=i*stride
         do j=1+work2,area+work2
            work(j)=0.
         enddo
      enddo
      
c     Reverse phase
      t0=filter(1)
      t1=filter(2)
      t2=filter(3)
      t3=filter(4)
      t4=filter(5)
      t5=filter(6)

      if (t0 /= 0.0) then
c     symmetric part
      i=0
      work2=i*stride
      out2s=i*(stridel-stride)
      
      do j=1+work2,area+work2
         jl=j+out2s
         datout(jl)=datout(jl)+t0*work(j)+t1*workp1(j)+t2*workp2(j)
     &        +t3*workp3(j)+t4*workp4(j)+t5*workp5(j)
      enddo

      i=1
      work2=i*stride
      out2s=i*(stridel-stride)
      
      do j=1+work2,area+work2
         jl=j+out2s
         datout(jl)=datout(jl)+t1*workm1(j)
     &        +t0*work(j)+t1*workp1(j)+t2*workp2(j)+t3*workp3(j)
     &        +t4*workp4(j)+t5*workp5(j)
      enddo

      i=2
      work2=i*stride
      out2s=i*(stridel-stride)
      
      do j=1+work2,area+work2
         jl=j+out2s
         datout(jl)=datout(jl)+t2*workm2(j)+t1*workm1(j)
     &        +t0*work(j)+t1*workp1(j)+t2*workp2(j)+t3*workp3(j)
     &        +t4*workp4(j)+t5*workp5(j)
      enddo

      i=3
      work2=i*stride
      out2s=i*(stridel-stride)
      
      do j=1+work2,area+work2
         jl=j+out2s
         datout(jl)=datout(jl)+t3*workm3(j)
     &        +t2*workm2(j)+t1*workm1(j)
     &        +t0*work(j)+t1*workp1(j)+t2*workp2(j)+t3*workp3(j)
     &        +t4*workp4(j)+t5*workp5(j)
      enddo
      
      i=4
      work2=i*stride
      out2s=i*(stridel-stride)

      do j=1+work2,area+work2
         jl=j+out2s
         datout(jl)=datout(jl)+t4*workm4(j)+t3*workm3(j)
     &        +t2*workm2(j)+t1*workm1(j)
     &        +t0*work(j)+t1*workp1(j)+t2*workp2(j)+t3*workp3(j)
     &        +t4*workp4(j)+t5*workp5(j)
      enddo
      
      do i=5,n-6
         work2=i*stride
         out2s=i*(stridel-stride)
         
         do j=1+work2,area+work2
            jl=j+out2s
            datout(jl)=datout(jl)+t5*workm5(j)+t4*workm4(j)+t3*workm3(j)
     &           +t2*workm2(j)+t1*workm1(j)
     &           +t0*work(j)+t1*workp1(j)+t2*workp2(j)+t3*workp3(j)
     &           +t4*workp4(j)+t5*workp5(j)
         enddo
      enddo
      
      i=n-5
      work2=i*stride
      out2s=i*(stridel-stride)

      do j=1+work2,area+work2
         jl=j+out2s
         datout(jl)=datout(jl)+t5*workm5(j)+t4*workm4(j)+t3*workm3(j)
     &        +t2*workm2(j)+t1*workm1(j)
     &        +t0*work(j)+t1*workp1(j)+t2*workp2(j)+t3*workp3(j)
     &        +t4*workp4(j)
      enddo

      i=n-4
      work2=i*stride
      out2s=i*(stridel-stride)
      
      do j=1+work2,area+work2
         jl=j+out2s
         datout(jl)=datout(jl)+t5*workm5(j)+t4*workm4(j)+t3*workm3(j)
     &        +t2*workm2(j)+t1*workm1(j)
     &        +t0*work(j)+t1*workp1(j)+t2*workp2(j)+t3*workp3(j)
      enddo

      i=n-3
      work2=i*stride
      out2s=i*(stridel-stride)

      do j=1+work2,area+work2
         jl=j+out2s
         datout(jl)=datout(jl)+t5*workm5(j)+t4*workm4(j)+t3*workm3(j)
     &        +t2*workm2(j)+t1*workm1(j)
     &        +t0*work(j)+t1*workp1(j)+t2*workp2(j)
      enddo

      i=n-2
      work2=i*stride
      out2s=i*(stridel-stride)
      
      do j=1+work2,area+work2
         jl=j+out2s
         datout(jl)=datout(jl)+t5*workm5(j)+t4*workm4(j)+t3*workm3(j)
     &        +t2*workm2(j)+t1*workm1(j)
     &        +t0*work(j)+t1*workp1(j)
      enddo
      
      i=n-1
      work2=i*stride
      out2s=i*(stridel-stride)

      do j=1+work2,area+work2
         jl=j+out2s
         datout(jl)=datout(jl)+t5*workm5(j)+t4*workm4(j)+t3*workm3(j)
     &        +t2*workm2(j)+t1*workm1(j)+t0*work(j)
      enddo
c     end of symmetric part
      else
c     antisymmetric part
      i=0
      work2=i*stride
      out2s=i*(stridel-stride)
      
      do j=1+work2,area+work2
         jl=j+out2s
         datout(jl)=datout(jl)+t0*work(j)+t1*workp1(j)+t2*workp2(j)
     &        +t3*workp3(j)+t4*workp4(j)+t5*workp5(j)
      enddo

      i=1
      work2=i*stride
      out2s=i*(stridel-stride)
      
      do j=1+work2,area+work2
         jl=j+out2s
         datout(jl)=datout(jl)-t1*workm1(j)
     &        +t0*work(j)+t1*workp1(j)+t2*workp2(j)+t3*workp3(j)
     &        +t4*workp4(j)+t5*workp5(j)
      enddo

      i=2
      work2=i*stride
      out2s=i*(stridel-stride)
      
      do j=1+work2,area+work2
         jl=j+out2s
         datout(jl)=datout(jl)-t2*workm2(j)-t1*workm1(j)
     &        +t0*work(j)+t1*workp1(j)+t2*workp2(j)+t3*workp3(j)
     &        +t4*workp4(j)+t5*workp5(j)
      enddo

      i=3
      work2=i*stride
      out2s=i*(stridel-stride)
      
      do j=1+work2,area+work2
         jl=j+out2s
         datout(jl)=datout(jl)-t3*workm3(j)
     &        -t2*workm2(j)-t1*workm1(j)
     &        +t0*work(j)+t1*workp1(j)+t2*workp2(j)+t3*workp3(j)
     &        +t4*workp4(j)+t5*workp5(j)
      enddo
      
      i=4
      work2=i*stride
      out2s=i*(stridel-stride)

      do j=1+work2,area+work2
         jl=j+out2s
         datout(jl)=datout(jl)-t4*workm4(j)-t3*workm3(j)
     &        -t2*workm2(j)-t1*workm1(j)
     &        +t0*work(j)+t1*workp1(j)+t2*workp2(j)+t3*workp3(j)
     &        +t4*workp4(j)+t5*workp5(j)
      enddo
      
      do i=5,n-6
         work2=i*stride
         out2s=i*(stridel-stride)
         
         do j=1+work2,area+work2
            jl=j+out2s
            datout(jl)=datout(jl)-t5*workm5(j)-t4*workm4(j)-t3*workm3(j)
     &           -t2*workm2(j)-t1*workm1(j)
     &           +t0*work(j)+t1*workp1(j)+t2*workp2(j)+t3*workp3(j)
     &           +t4*workp4(j)+t5*workp5(j)
         enddo
      enddo
      
      i=n-5
      work2=i*stride
      out2s=i*(stridel-stride)

      do j=1+work2,area+work2
         jl=j+out2s
         datout(jl)=datout(jl)-t5*workm5(j)-t4*workm4(j)-t3*workm3(j)
     &        -t2*workm2(j)-t1*workm1(j)
     &        +t0*work(j)+t1*workp1(j)+t2*workp2(j)+t3*workp3(j)
     &        +t4*workp4(j)
      enddo

      i=n-4
      work2=i*stride
      out2s=i*(stridel-stride)
      
      do j=1+work2,area+work2
         jl=j+out2s
         datout(jl)=datout(jl)-t5*workm5(j)-t4*workm4(j)-t3*workm3(j)
     &        -t2*workm2(j)-t1*workm1(j)
     &        +t0*work(j)+t1*workp1(j)+t2*workp2(j)+t3*workp3(j)
      enddo

      i=n-3
      work2=i*stride
      out2s=i*(stridel-stride)

      do j=1+work2,area+work2
         jl=j+out2s
         datout(jl)=datout(jl)-t5*workm5(j)-t4*workm4(j)-t3*workm3(j)
     &        -t2*workm2(j)-t1*workm1(j)
     &        +t0*work(j)+t1*workp1(j)+t2*workp2(j)
      enddo

      i=n-2
      work2=i*stride
      out2s=i*(stridel-stride)
      
      do j=1+work2,area+work2
         jl=j+out2s
         datout(jl)=datout(jl)-t5*workm5(j)-t4*workm4(j)-t3*workm3(j)
     &        -t2*workm2(j)-t1*workm1(j)
     &        +t0*work(j)+t1*workp1(j)
      enddo
      
      i=n-1
      work2=i*stride
      out2s=i*(stridel-stride)

      do j=1+work2,area+work2
         jl=j+out2s
         datout(jl)=datout(jl)-t5*workm5(j)-t4*workm4(j)-t3*workm3(j)
     &        -t2*workm2(j)-t1*workm1(j)+t0*work(j)
      enddo
c     end of antisymmetric part
      end if

      return
      end

c     Back and forth, linear convolution of sets of points using padding
c        for filters of extent +/- 5 (IN PLACE)
c
c     out: input/output array for in place computation
c     outm5-outp5,out: pointers in in/output array to
c         start of data sets
c     area: total size of each set
c     stride: separation between data sets (stride=&out-&outm1)
c     n: actual number of data sets being convolved
c     npad: size of padded region needed for "leakage" of foward filters
c     tr: reverse-time filter coefficients	
c     tf: forward-time filter coefficients
      subroutine convsetsiopper5(datout,
     &     work,workm5,workm4,workm3,workm2,workm1,
     &     workp1,workp2,workp3,workp4,workp5,
     &     area,stride,n,npad,
     &     stridel,tr,tf)
      
      double precision datout(*)
      double precision work(*)
      double precision workm5(*),workm4(*),workm3(*),workm2(*),workm1(*)
      double precision workp1(*),workp2(*),workp3(*),workp4(*),workp5(*)
      integer stride,area,n,npad,stridel
      double precision tr(*),tf(*)
      double precision t0,t1,t2,t3,t4,t5

      integer i,j,work2,out2s,jl

c     First, make sure pad is zero
      do i=n,npad-1
         work2=i*stride
         do j=1+work2,area+work2
            work(j)=0.
         enddo
      enddo
      
c     Reverse phase
      t0=tr(1)
      t1=tr(2)
      t2=tr(3)
      t3=tr(4)
      t4=tr(5)
      t5=tr(6)
      
      i=npad-1
      work2=i*stride
      do j=1+work2,area+work2
         work(j)=t0*work(j-(i-4)*stride)+t1*work(j-(i-3)*stride)
     $        +t2*work(j-(i-2)*stride)+t3*work(j-(i-1)*stride)
     $        +t4*work(j-i*stride)+t5*workm5(j)
      enddo

      i=npad-2
      work2=i*stride
      do j=1+work2,area+work2
         work(j)=t0*work(j-(i-3)*stride)+t1*work(j-(i-2)*stride)
     $        +t2*work(j-(i-1)*stride)+t3*work(j-i*stride)
     $        +t4*workm4(j)+t5*workm5(j)
      enddo

      i=npad-3
      work2=i*stride
      do j=1+work2,area+work2
         work(j)=t0*work(j-(i-2)*stride)+t1*work(j-(i-1)*stride)
     $        +t2*work(j-i*stride)+t3*workm3(j)
     $        +t4*workm4(j)+t5*workm5(j)
      enddo

      i=npad-4
      work2=i*stride
      do j=1+work2,area+work2
         work(j)=t0*work(j-(i-1)*stride)+t1*work(j-i*stride)
     $        +t2*workm2(j)+t3*workm3(j)
     $        +t4*workm4(j)+t5*workm5(j)
      enddo

      i=npad-5
      work2=i*stride
      do j=1+work2,area+work2
         work(j)=t0*work(j-i*stride)+t1*workm1(j)
     $        +t2*workm2(j)+t3*workm3(j)
     $        +t4*workm4(j)+t5*workm5(j)
      enddo

      do i=npad-6,5,-1
         work2=i*stride
         do j=1+work2,area+work2
            work(j)=
     $           t0*work(j)+t1*workm1(j)+t2*workm2(j)+t3*workm3(j)
     $           +t4*workm4(j)+t5*workm5(j)
         enddo
      enddo
      
      work2=4*stride
      do j=1+work2,area+work2
         work(j)=work(j+(npad-5)*stride)
c     &        t0*work(j)+t1*workm1(j)+t2*workm2(j)+t3*workm3(j)+t4*workm4(j)
      enddo

      work2=3*stride
      do j=1+work2,area+work2
         work(j)=work(j+(npad-5)*stride)
c     &        t0*work(j)+t1*workm1(j)+t2*workm2(j)+t3*workm3(j)
      enddo

      work2=2*stride
      do j=1+work2,area+work2
         work(j)=work(j+(npad-5)*stride)
c     &        t0*work(j)+t1*workm1(j)+t2*workm2(j)
      enddo

      work2=1*stride
      do j=1+work2,area+work2
         work(j)=work(j+(npad-5)*stride)
c     &        t0*work(j)+t1*workm1(j)
      enddo
      
      work2=0*stride
      do j=1+work2,area+work2
         work(j)=work(j+(npad-5)*stride)
c     &        t0*work(j)
      enddo
           
c     Forward phase (padding now ensures room!)
      t0=tf(1)
      t1=tf(2)
      t2=tf(3)
      t3=tf(4)
      t4=tf(5)
      t5=tf(6)
      do i=0,n-1
         work2=i*stride
         out2s=i*(stridel-stride)

         do j=1+work2,area+work2
            jl=j+out2s
            datout(jl)=datout(jl)+
     &           t0*work(j)+t1*workp1(j)+t2*workp2(j)+t3*workp3(j)
     &           +t4*workp4(j)+t5*workp5(j)
         enddo
      enddo

      return
      end

c     Back and forth, linear convolution of sets of points using padding,
c     computing data ONLY on ghostpoints (IN PLACE)
c
c     out: input/output array for in place computation
c     outm3,outm2,outm1,outp1,outp2,outp3: pointers in output array to
c         start of data sets, 3 previous, 2 previous, 1 previous, 1 forward,
c         2 forward, 3 forward
c     area: total size of each set
c     stride: separation between data sets (stride=&out-&outm1)
c     n: actual number of data sets being convolved
c     npad: size of padded region needed for "leakage" of foward filters
c     tr: reverse-time filter coefficients	
c     tf: forward-time filter coefficients
      subroutine pconvsets3(out,outm3,outm2,outm1,outp1,outp2,outp3,
     &     area,stride,n,npad,tr,tf)
      implicit none

      double precision out(*)
      double precision outm3(*),outm2(*),outm1(*)
      double precision outp1(*),outp2(*),outp3(*)
      integer stride,area,n,npad
      double precision tr(*),tf(*)
      double precision t0,t1,t2,t3

      integer i,j,out2

      
c     First, make sure pad is zero for forward phase.
c     For pretty code the last number would be npad-1, but technically
c     npad-2 (for extra speed) is all that is needed.
      do i=n,npad-2
         out2=i*stride
         do j=1+out2,area+out2
            out(j)=0.
         enddo
      enddo
         
c     Reverse phase
      t0=tr(1)
      t1=tr(2)
      t2=tr(3)
      t3=tr(4)
      do i=n-1,3,-1
         out2=i*stride
         do j=1+out2,area+out2
            out(j)=
     &           t0*out(j)+t1*outm1(j)+t2*outm2(j)+t3*outm3(j)
         enddo
      enddo

      out2=2*stride
      do j=1+out2,area+out2
         out(j)=
     &        t0*out(j)+t1*outm1(j)+t2*outm2(j)
      enddo

      out2=stride
      do j=1+out2,area+out2
         out(j)=
     &        t0*out(j)+t1*outm1(j)
      enddo
      
      out2=0
      do j=1+out2,area+out2
         out(j)=
     &        t0*out(j)
      enddo

c     Forward phase (padding now ensures room!)
      t0=tf(1)
      t1=tf(2)
      t2=tf(3)
      t3=tf(4)
      do i=0,n-1,2
         out2=i*stride

         do j=1+out2,area+out2
            out(j)=
     &           t0*out(j)+t1*outp1(j)+t2*outp2(j)+t3*outp3(j)
         enddo
      enddo

      return
      end

c     Back and forth, linear convolution of sets of points using padding,
c     computing data ONLY on ghostpoints (IN PLACE)
c
c     out: input/output array for in place computation
c     outm3,outm2,outm1,outp1,outp2,outp3: pointers in output array to
c         start of data sets, 3 previous, 2 previous, 1 previous, 1 forward,
c         2 forward, 3 forward
c     area: total size of each set
c     stride: separation between data sets (stride=&out-&outm1)
c     n: actual number of data sets being convolved
c     npad: size of padded region needed for "leakage" of foward filters
c     tr: reverse-time filter coefficients	
c     tf: forward-time filter coefficients
      subroutine pconvsetsio3full(out,in,inm3,inm2,inm1,inp1,inp2,inp3,
     &     area,stride,n,npad,filt)
      implicit none

      double precision out(*)
      double precision in(*)
      double precision inm3(*),inm2(*),inm1(*)
      double precision inp1(*),inp2(*),inp3(*)
      integer stride,area,n,npad
      double precision filt(*)
      double precision t0,t1,t3

      integer i,j,out2

c     Forward phase (padding now ensures room!)
      t0=filt(1)
      t1=filt(2)
      t3=filt(4)

      i=0
      out2=i*stride
      do j=1+out2,area+out2
         out(j)=t3*inp3(j)
      enddo

      i=2
      out2=i*stride
      do j=1+out2,area+out2
         out(j)=in(j)+t1*inp1(j)+t3*inp3(j)
      enddo

      do i=4,n-6,2
         out2=i*stride
         
         do j=1+out2,area+out2
            out(j)=t3*inm3(j)+t1*inm1(j)+
     &           in(j)+t1*inp1(j)+t3*inp3(j)
         enddo
      enddo

      i=n-4
      out2=i*stride
      do j=1+out2,area+out2
         out(j)=t3*inm3(j)+t1*inm1(j)+in(j)
      enddo
      
      i=n-2
      out2=i*stride
      do j=1+out2,area+out2
         out(j)=t3*inm3(j)
      enddo

      end

c     Back and forth, linear convolution of sets of points using padding,
c     computing data ONLY on ghostpoints (IN PLACE)
c
c     out: input/output array for in place computation
c     outm3,outm2,outm1,outp1,outp2,outp3: pointers in output array to
c         start of data sets, 3 previous, 2 previous, 1 previous, 1 forward,
c         2 forward, 3 forward
c     area: total size of each set
c     stride: separation between data sets (stride=&out-&outm1)
c     n: actual number of data sets being convolved
c     npad: size of padded region needed for "leakage" of foward filters
c     tr: reverse-time filter coefficients	
c     tf: forward-time filter coefficients
      subroutine pconvsetsio8full(out,in,inm8,inm7,inm6,inm5,inm4,
     &     inm3,inm2,inm1,inp1,inp2,inp3,inp4,inp5,inp6,inp7,inp8,
     &     area,stride,n,npad,filt)
      implicit none

      double precision out(*)
      double precision in(*)
      double precision inm8(*),inm7(*),inm6(*),inm5(*),inm4(*)
      double precision inm3(*),inm2(*),inm1(*)
      double precision inp1(*),inp2(*),inp3(*)
      double precision inp4(*),inp5(*),inp6(*),inp7(*),inp8(*)
      integer stride,area,n,npad
      double precision filt(*)
      double precision t0,t2,t1,t3,t4,t5,t6,t7,t8

      integer i,j,out2

c     Forward phase (padding now ensures room!)
      t0=filt(1)
      t1=filt(2)
      t2=filt(3)
      t3=filt(4)
      t4=filt(5)
      t5=filt(6)
      t6=filt(7)
      t7=filt(8)
      t8=filt(9)

      if (t0 /= 0.0) then
c     symmetric part
      i=0
      out2=i*stride
      do j=1+out2,area+out2
         out(j)=t8*inp8(j)
      enddo

      i=2
      out2=i*stride
      do j=1+out2,area+out2
         out(j)=t6*inp6(j)+t7*inp7(j)+t8*inp8(j)
      enddo

      i=4
      out2=i*stride
      do j=1+out2,area+out2
         out(j)=t4*inp4(j)+t5*inp5(j)+t6*inp6(j)+t7*inp7(j)+t8*inp8(j)
      enddo

      i=6
      out2=i*stride
      do j=1+out2,area+out2
         out(j)=t2*inp2(j)+t3*inp3(j)+t4*inp4(j)
     &    +t5*inp5(j)+t6*inp6(j)+t7*inp7(j)+t8*inp8(j)
      enddo

      i=8
      out2=i*stride
      do j=1+out2,area+out2
         out(j)=t0*in(j)+t1*inp1(j)+t2*inp2(j)+t3*inp3(j)+t4*inp4(j)
     &        +t5*inp5(j)+t6*inp6(j)+t7*inp7(j)+t8*inp8(j)
      enddo

      i=10
      out2=i*stride
      do j=1+out2,area+out2
         out(j)=t2*inm2(j)+t1*inm1(j)+t0*in(j)+t1*inp1(j)
     &        +t2*inp2(j)+t3*inp3(j)+t4*inp4(j)
     &        +t5*inp5(j)+t6*inp6(j)+t7*inp7(j)+t8*inp8(j)
      enddo

      i=12
      out2=i*stride
      do j=1+out2,area+out2
         out(j)=t4*inm4(j)+t3*inm3(j)+t2*inm2(j)+t1*inm1(j)
     &        +t0*in(j)+t1*inp1(j)+t2*inp2(j)+t3*inp3(j)+t4*inp4(j)
     &        +t5*inp5(j)+t6*inp6(j)+t7*inp7(j)+t8*inp8(j)
      enddo

      i=14
      out2=i*stride
      do j=1+out2,area+out2
         out(j)=t6*inm6(j)+t5*inm5(j)+t4*inm4(j)+t3*inm3(j)
     &        +t2*inm2(j)+t1*inm1(j)+t0*in(j)+t1*inp1(j)
     &        +t2*inp2(j)+t3*inp3(j)+t4*inp4(j)
     &        +t5*inp5(j)+t6*inp6(j)+t7*inp7(j)+t8*inp8(j)
      enddo

      do i=16,n-18,2
         out2=i*stride
         
         do j=1+out2,area+out2
            out(j)=t8*inm8(j)+t7*inm7(j)+t6*inm6(j)+t5*inm5(j)
     &           +t4*inm4(j)+t3*inm3(j)
     &           +t2*inm2(j)+t1*inm1(j)+t0*in(j)+t1*inp1(j)
     &           +t2*inp2(j)+t3*inp3(j)+t4*inp4(j)
     &           +t5*inp5(j)+t6*inp6(j)+t7*inp7(j)+t8*inp8(j)
         enddo
      enddo

      i=n-16
      out2=i*stride
      do j=1+out2,area+out2
         out(j)=t7*inp7(j)+t6*inp6(j)+t5*inp5(j)+t4*inp4(j)
     &        +t3*inp3(j)+t2*inp2(j)+t1*inp1(j)+t0*in(j)
     &        +t1*inm1(j)+t2*inm2(j)+t3*inm3(j)+t4*inm4(j)
     &        +t5*inm5(j)+t6*inm6(j)+t7*inm7(j)+t8*inm8(j)
      enddo

      i=n-14
      out2=i*stride
      do j=1+out2,area+out2
         out(j)=t5*inp5(j)+t4*inp4(j)+t3*inp3(j)+t2*inp2(j)
     &        +t1*inp1(j)+t0*in(j)+t1*inm1(j)+t2*inm2(j)
     &        +t3*inm3(j)+t4*inm4(j)+t5*inm5(j)
     &        +t6*inm6(j)+t7*inm7(j)+t8*inm8(j)
      enddo

      i=n-12
      out2=i*stride
      do j=1+out2,area+out2
         out(j)=t3*inp3(j)+t2*inp2(j)+t1*inp1(j)+t0*in(j)
     &    +t1*inm1(j)+t2*inm2(j)+t3*inm3(j)+t4*inm4(j)+t5*inm5(j)
     &        +t6*inm6(j)+t7*inm7(j)+t8*inm8(j)
      enddo

      i=n-10
      out2=i*stride
      do j=1+out2,area+out2
         out(j)=t1*inp1(j)+t0*in(j)+t1*inm1(j)+t2*inm2(j)
     &        +t3*inm3(j)+t4*inm4(j)+t5*inm5(j)
     &        +t6*inm6(j)+t7*inm7(j)+t8*inm8(j)
      enddo

      i=n-8
      out2=i*stride
      do j=1+out2,area+out2
         out(j)=t1*inm1(j)+t2*inm2(j)+t3*inm3(j)+t4*inm4(j)+t5*inm5(j)
     &        +t6*inm6(j)+t7*inm7(j)+t8*inm8(j)
      enddo

      i=n-6
      out2=i*stride
      do j=1+out2,area+out2
         out(j)=t3*inm3(j)+t4*inm4(j)+t5*inm5(j)
     &        +t6*inm6(j)+t7*inm7(j)+t8*inm8(j)
      enddo

      i=n-4
      out2=i*stride
      do j=1+out2,area+out2
         out(j)=t5*inm5(j)+t6*inm6(j)+t7*inm7(j)+t8*inm8(j)
      enddo

      i=n-2
      out2=i*stride
      do j=1+out2,area+out2
         out(j)=t7*inm7(j)+t8*inm8(j)
      enddo
c     end of symmetric part
      else
c     antisymmetric part
      i=0
      out2=i*stride
      do j=1+out2,area+out2
         out(j)=t8*inp8(j)
      enddo

      i=2
      out2=i*stride
      do j=1+out2,area+out2
         out(j)=t6*inp6(j)+t7*inp7(j)+t8*inp8(j)
      enddo

      i=4
      out2=i*stride
      do j=1+out2,area+out2
         out(j)=t4*inp4(j)+t5*inp5(j)+t6*inp6(j)+t7*inp7(j)+t8*inp8(j)
      enddo

      i=6
      out2=i*stride
      do j=1+out2,area+out2
         out(j)=t2*inp2(j)+t3*inp3(j)+t4*inp4(j)
     &    +t5*inp5(j)+t6*inp6(j)+t7*inp7(j)+t8*inp8(j)
      enddo

      i=8
      out2=i*stride
      do j=1+out2,area+out2
         out(j)=t0*in(j)+t1*inp1(j)+t2*inp2(j)+t3*inp3(j)+t4*inp4(j)
     &        +t5*inp5(j)+t6*inp6(j)+t7*inp7(j)+t8*inp8(j)
      enddo

      i=10
      out2=i*stride
      do j=1+out2,area+out2
         out(j)=-1.*t2*inm2(j)-t1*inm1(j)+t0*in(j)+t1*inp1(j)
     &        +t2*inp2(j)+t3*inp3(j)+t4*inp4(j)
     &        +t5*inp5(j)+t6*inp6(j)+t7*inp7(j)+t8*inp8(j)
      enddo

      i=12
      out2=i*stride
      do j=1+out2,area+out2
         out(j)=-1.*t4*inm4(j)-t3*inm3(j)-t2*inm2(j)-t1*inm1(j)
     &        +t0*in(j)+t1*inp1(j)+t2*inp2(j)+t3*inp3(j)+t4*inp4(j)
     &        +t5*inp5(j)+t6*inp6(j)+t7*inp7(j)+t8*inp8(j)
      enddo

      i=14
      out2=i*stride
      do j=1+out2,area+out2
         out(j)=-1.*t6*inm6(j)-t5*inm5(j)-t4*inm4(j)-t3*inm3(j)
     &        -t2*inm2(j)-t1*inm1(j)+t0*in(j)+t1*inp1(j)
     &        +t2*inp2(j)+t3*inp3(j)+t4*inp4(j)
     &        +t5*inp5(j)+t6*inp6(j)+t7*inp7(j)+t8*inp8(j)
      enddo

      do i=16,n-18,2
         out2=i*stride
         
         do j=1+out2,area+out2
            out(j)=-1.*t8*inm8(j)-t7*inm7(j)-t6*inm6(j)-t5*inm5(j)
     &           -t4*inm4(j)-t3*inm3(j)
     &           -t2*inm2(j)-t1*inm1(j)+t0*in(j)+t1*inp1(j)
     &           +t2*inp2(j)+t3*inp3(j)+t4*inp4(j)
     &           +t5*inp5(j)+t6*inp6(j)+t7*inp7(j)+t8*inp8(j)
         enddo
      enddo

      i=n-16
      out2=i*stride
      do j=1+out2,area+out2
         out(j)=t7*inp7(j)+t6*inp6(j)+t5*inp5(j)+t4*inp4(j)
     &        +t3*inp3(j)+t2*inp2(j)+t1*inp1(j)+t0*in(j)
     &        -t1*inm1(j)-t2*inm2(j)-t3*inm3(j)-t4*inm4(j)
     &        -t5*inm5(j)-t6*inm6(j)-t7*inm7(j)-t8*inm8(j)
      enddo

      i=n-14
      out2=i*stride
      do j=1+out2,area+out2
         out(j)=t5*inp5(j)+t4*inp4(j)+t3*inp3(j)+t2*inp2(j)
     &        +t1*inp1(j)+t0*in(j)-t1*inm1(j)-t2*inm2(j)
     &        -t3*inm3(j)-t4*inm4(j)-t5*inm5(j)
     &        -t6*inm6(j)-t7*inm7(j)-t8*inm8(j)
      enddo

      i=n-12
      out2=i*stride
      do j=1+out2,area+out2
         out(j)=t3*inp3(j)+t2*inp2(j)+t1*inp1(j)+t0*in(j)
     &    -t1*inm1(j)-t2*inm2(j)-t3*inm3(j)-t4*inm4(j)-t5*inm5(j)
     &        -t6*inm6(j)-t7*inm7(j)-t8*inm8(j)
      enddo

      i=n-10
      out2=i*stride
      do j=1+out2,area+out2
         out(j)=t1*inp1(j)+t0*in(j)-t1*inm1(j)-t2*inm2(j)
     &        -t3*inm3(j)-t4*inm4(j)-t5*inm5(j)
     &        -t6*inm6(j)-t7*inm7(j)-t8*inm8(j)
      enddo

      i=n-8
      out2=i*stride
      do j=1+out2,area+out2
         out(j)=-1.*t1*inm1(j)-t2*inm2(j)-t3*inm3(j)-t4*inm4(j)
     &        -t5*inm5(j)-t6*inm6(j)-t7*inm7(j)-t8*inm8(j)
      enddo

      i=n-6
      out2=i*stride
      do j=1+out2,area+out2
         out(j)=-1.*t3*inm3(j)-t4*inm4(j)-t5*inm5(j)
     &        -t6*inm6(j)-t7*inm7(j)-t8*inm8(j)
      enddo

      i=n-4
      out2=i*stride
      do j=1+out2,area+out2
         out(j)=-1.*t5*inm5(j)-t6*inm6(j)-t7*inm7(j)-t8*inm8(j)
      enddo

      i=n-2
      out2=i*stride
      do j=1+out2,area+out2
         out(j)=-1.*t7*inm7(j)-t8*inm8(j)
      enddo
c     end of antisymmetric part
      end if

      end

c     Back and forth, linear convolution of sets of points using padding,
c     computing data ONLY on ghostpoints (IN PLACE)
c
c     out: input/output array for in place computation
c     outm3,outm2,outm1,outp1,outp2,outp3: pointers in output array to
c         start of data sets, 3 previous, 2 previous, 1 previous, 1 forward,
c         2 forward, 3 forward
c     area: total size of each set
c     stride: separation between data sets (stride=&out-&outm1)
c     n: actual number of data sets being convolved
c     npad: size of padded region needed for "leakage" of foward filters
c     tr: reverse-time filter coefficients	
c     tf: forward-time filter coefficients
      subroutine pconvsets3new(outx,out,outm3,outm2,outm1,outp1,
     &     outp2,outp3,
     &     area,stride,n,npad,tr,tf)
      implicit none

      double precision out(*)
      double precision outx(*)
      double precision outm3(*),outm2(*),outm1(*)
      double precision outp1(*),outp2(*),outp3(*)
      integer stride,area,n,npad
      double precision tr(*),tf(*)
      double precision t0,t1,t2,t3

      integer i,j,out2

      
c     First, make sure pad is zero for forward phase.
c     For pretty code the last number would be npad-1, but technically
c     npad-2 (for extra speed) is all that is needed.
      do i=n,npad-2
         out2=i*stride
         do j=1+out2,area+out2
            out(j)=0.
         enddo
      enddo
         
c     Reverse phase
      t0=tr(1)
      t1=tr(2)
      t2=tr(3)
      t3=tr(4)
      do i=n-1,3,-1
         out2=i*stride
         do j=1+out2,area+out2
            out(j)=
     &           t0*out(j)+t1*outm1(j)+t2*outm2(j)+t3*outm3(j)
         enddo
      enddo

      out2=2*stride
      do j=1+out2,area+out2
         out(j)=
     &        t0*out(j)+t1*outm1(j)+t2*outm2(j)
      enddo

      out2=stride
      do j=1+out2,area+out2
         out(j)=
     &        t0*out(j)+t1*outm1(j)
      enddo
      
      out2=0
      do j=1+out2,area+out2
         out(j)=
     &        t0*out(j)
      enddo

c     Forward phase (padding now ensures room!)
      t0=tf(1)
      t1=tf(2)
      t2=tf(3)
      t3=tf(4)
      do i=0,n-1,2
         out2=i*stride

         do j=1+out2,area+out2
            outx(j)=
     &           t0*out(j)+t1*outp1(j)+t2*outp2(j)+t3*outp3(j)
         enddo
      enddo

      return
      end

c     Back and forth, linear convolution of sets of points using padding,
c     computing data ONLY on ghostpoints (IN PLACE)
c
c     out: input/output array for in place computation
c     outm3,outm2,outm1,outp1,outp2,outp3: pointers in output array to
c         start of data sets, 3 previous, 2 previous, 1 previous, 1 forward,
c         2 forward, 3 forward
c     area: total size of each set
c     stride: separation between data sets (stride=&out-&outm1)
c     n: actual number of data sets being convolved
c     npad: size of padded region needed for "leakage" of foward filters
c     tr: reverse-time filter coefficients	
c     tf: forward-time filter coefficients
      subroutine pconvsets8(
     &     out,outm8,outm7,outm6,outm5,outm4,
     &     outm3,outm2,outm1,
     &     outp1,outp2,outp3,
     &     outp4,outp5,outp6,outp7,outp8,
     &     area,stride,n,npad,tr,tf)
      implicit none

      double precision out(*)
      double precision outm8(*),outm7(*)
      double precision outm6(*),outm5(*),outm4(*)
      double precision outm3(*),outm2(*),outm1(*)
      double precision outp1(*),outp2(*),outp3(*)
      double precision outp4(*),outp5(*),outp6(*)
      double precision outp7(*),outp8(*)
      integer stride,area,n,npad
      double precision tr(*),tf(*)
      double precision t0,t1,t2,t3,t4,t5,t6,t7,t8

      integer i,j,out2

c     First, make sure pad is zero for forward phase.
c     For pretty code the last number would be npad-1, but technically
c     npad-2 (for extra speed) is all that is needed.
      do i=n,npad-2
         out2=i*stride
         do j=1+out2,area+out2
            out(j)=0.
         enddo
      enddo
         
c     Reverse phase
      t0=tr(1)
      t1=tr(2)
      t2=tr(3)
      t3=tr(4)
      t4=tr(5)
      t5=tr(6)
      t6=tr(7)
      t7=tr(8)
      t8=tr(9)
      do i=n-1,8,-1
         out2=i*stride
         do j=1+out2,area+out2
            out(j)=
     &           t0*out(j)+t1*outm1(j)+t2*outm2(j)+t3*outm3(j)
     &           +t4*outm4(j)+t5*outm5(j)+t6*outm6(j)+t7*outm7(j)
     &           +t8*outm8(j)
         enddo
      enddo
      
      out2=7*stride
      do j=1+out2,area+out2
         out(j)=
     &        t0*out(j)+t1*outm1(j)+t2*outm2(j)
     &        +t3*outm3(j)+t4*outm4(j)+t5*outm5(j)
     &        +t6*outm6(j)+t7*outm7(j)
      enddo

      out2=6*stride
      do j=1+out2,area+out2
         out(j)=
     &        t0*out(j)+t1*outm1(j)+t2*outm2(j)
     &        +t3*outm3(j)+t4*outm4(j)+t5*outm5(j)
     &        +t6*outm6(j)
      enddo

      out2=5*stride
      do j=1+out2,area+out2
         out(j)=
     &        t0*out(j)+t1*outm1(j)+t2*outm2(j)
     &        +t3*outm3(j)+t4*outm4(j)+t5*outm5(j)
      enddo

      out2=4*stride
      do j=1+out2,area+out2
         out(j)=
     &        t0*out(j)+t1*outm1(j)+t2*outm2(j)
     &        +t3*outm3(j)+t4*outm4(j)
      enddo

      out2=3*stride
      do j=1+out2,area+out2
         out(j)=
     &        t0*out(j)+t1*outm1(j)+t2*outm2(j)
     &        +t3*outm3(j)
      enddo

      out2=2*stride
      do j=1+out2,area+out2
         out(j)= t0*out(j)+t1*outm1(j)+t2*outm2(j)
      enddo

      out2=1*stride
      do j=1+out2,area+out2
         out(j)= t0*out(j)+t1*outm1(j)
      enddo

      out2=0*stride
      do j=1+out2,area+out2
         out(j)= t0*out(j)
      enddo

c     Forward phase (padding now ensures room!)
      t0=tf(1)
      t1=tf(2)
      t2=tf(3)
      t3=tf(4)
      t4=tf(5)
      t5=tf(6)
      t6=tf(7)
      t7=tf(8)
      t8=tf(9)
      do i=0,n-1,2
         out2=i*stride

         do j=1+out2,area+out2
            out(j)=
     &           t0*out(j)+t1*outp1(j)+t2*outp2(j)+t3*outp3(j)
     &           +t4*outp4(j)+t5*outp5(j)+t6*outp6(j)+t7*outp7(j)
     &           +t8*outp8(j)
         enddo
      enddo

      return
      end

c     Back and forth, linear convolution of sets of points using padding
c     computing data ONLY on ghostpoints, AND folding in input of a
c     different data set (IN PLACE)
c
c     out: input/output array for in place computation
c     outm3,outm2,outm1,outp1,outp2,outp3: pointers in output array to
c         start of data sets, 3 previous, 2 previous, 1 previous, 1 forward,
c         2 forward, 3 forward
c     in: input/output array for in place computation
c     inm3,inm2,inm1,inp1,inp2,inp3: pointers in input array to
c         start of data sets, 3 previous, 2 previous, 1 previous, 1 forward,
c         2 forward, 3 forward
c     area: total size of out sets
c     stride: separation between out data sets (stride=&out-&outm1)
c     n: actual number of data sets being convolved
c     npad: size of padded region needed for "leakage" of foward filters
c     stridel: separation between in data sets (stride=&in-&inm1)
c     fptl, lptl: first and last data components actually present in input
c     tr: reverse-time filter coefficients	
c     tf: forward-time filter coefficients
c     eflag: mod(,2) of present line (tells if special ghost handling needed)
c     zcflag: =1 means we can count on zero ghosts and need special handling
      subroutine pconvsetsi3old(
     $     out,outm3,outm2,outm1,outp1,outp2,outp3,
     $     in,inm3,inm2,inm1,inp1,inp2,inp3,
     $     area,stride,n,npad,
     $     stridel,fptl,lptl,
     $     tr,tf,eflag,zcflag)
      implicit none

      double precision out(*)
      double precision outm3(*),outm2(*),outm1(*)
      double precision outp1(*),outp2(*),outp3(*)

      double precision in(*)
      double precision inm3(*),inm2(*),inm1(*)
      double precision inp1(*),inp2(*),inp3(*)

      integer stride,area,n,npad
      integer stridel,fptl,lptl
      double precision tr(*),tf(*)
      integer eflag,zcflag

      double precision t0,t1,t2,t3
      integer i,j,out2

c     Reverse phase
      t0=tr(1)
      t1=tr(2)
      t2=tr(3)
      t3=tr(4)

c     First, make sure pad is zero for forward phase.
c     For pretty code the last number would be npad-1, but technically
c     npad-2 (for extra speed) is all that is needed.
      do i=n,npad-2
         call zeroline3(out,in,inm1,inm2,inm3,i,
     $        stridel,stride,fptl,lptl,area,
     $        t0,t1,t2,t3)
      enddo

      call zeroline3(out,in,inm1,inm2,inm3,n-1,
     $     stridel,stride,fptl,lptl,area,
     $     t0,t1,t2,t3)

      if (eflag.eq.0 .and. zcflag.eq.1) then
c     For Idag (zcflag=1) we are coming from real space, and so can't
c     depend on the ghostpoints being zero.  Thus, on the even planes (eflag),
c     we must skip over the data in the appropriate lines for the even
c     points.  This is what the line??[eo] routines do.

c     For the UNUSED lines below, we point to "inm3" to avoid seg faults
c     because we know "inm3" will always be in range (unless the box is
c     ridiculously small).  Although multiplied by zero, the code still
c     accesses the dummy data!
         call lineioe3(out,inm3,inm3,inm3,inm3,n-2,
     $        stridel,stride,fptl,lptl,area,
     $        0.d0,0.d0,0.d0,t3)
         
         call lineioo3(out,inm3,inm3,inm2,inm3,n-3,
     $        stridel,stride,fptl,lptl,area,
     $        0.d0,0.d0,t2,t3)

         call lineioe3(out,inm3,inm1,inm2,inm3,n-4,
     $        stridel,stride,fptl,lptl,area,
     $        0.d0,t1,t2,t3)

         do i=n-5,5,-1
            if (mod(i,2).eq.0) then
               call lineioe3(out,in,inm1,inm2,inm3,i,
     $              stridel,stride,fptl,lptl,area,
     $              t0,t1,t2,t3)
            else
               call lineioo3(out,in,inm1,inm2,inm3,i,
     $              stridel,stride,fptl,lptl,area,
     $              t0,t1,t2,t3)
            endif
         enddo

         call lineioe3(out,in,inm1,inm2,in,4,
     $        stridel,stride,fptl,lptl,area,
     $        t0,t1,t2,0.d0)

         call lineioo3(out,in,inm1,in,in,3,
     $        stridel,stride,fptl,lptl,area,
     $        t0,t1,0.d0,0.d0)

         call lineioe3(out,in,in,in,in,2,
     $        stridel,stride,fptl,lptl,area,
     $        t0,0.d0,0.d0,0.d0)

      else

c     For the UNUSED lines below, we point to "inm3" to avoid seg faults
c     because we know "inm3" will always be in range (unless the box is
c     ridiculously small).  Although multiplied by zero, the code still
c     accesses the dummy data!

         call lineio3(out,inm3,inm3,inm3,inm3,n-2,
     $        stridel,stride,fptl,lptl,area,
     $        0.d0,0.d0,0.d0,t3)
         
         call lineio3(out,inm3,inm3,inm2,inm3,n-3,
     $        stridel,stride,fptl,lptl,area,
     $        0.d0,0.d0,t2,t3)

         call lineio3(out,inm3,inm1,inm2,inm3,n-4,
     $        stridel,stride,fptl,lptl,area,
     $        0.d0,t1,t2,t3)

         do i=n-5,5,-1
            call lineio3(out,in,inm1,inm2,inm3,i,
     $           stridel,stride,fptl,lptl,area,
     $           t0,t1,t2,t3)
         enddo

c     For the UNUSED lines below, we must point to "in" to avoid seg
c     faults.  On this end of the convolution, we know that "in" will
c     always be in range.

         call lineio3(out,in,inm1,inm2,in,4,
     $        stridel,stride,fptl,lptl,area,
     $        t0,t1,t2,0.d0)

         call lineio3(out,in,inm1,in,in,3,
     $        stridel,stride,fptl,lptl,area,
     $        t0,t1,0.d0,0.d0)

         call lineio3(out,in,in,in,in,2,
     $        stridel,stride,fptl,lptl,area,
     $        t0,0.d0,0.d0,0.d0)

      endif

      call zeroline3(out,in,inm1,inm2,inm3,1,
     $     stridel,stride,fptl,lptl,area,
     $     t0,t1,t2,t3)


      call zeroline3(out,in,inm1,inm2,inm3,0,
     $     stridel,stride,fptl,lptl,area,
     $     t0,t1,t2,t3)


c     Forward phase (padding now ensures room!)
      t0=tf(1)
      t1=tf(2)
      t2=tf(3)
      t3=tf(4)
      do i=0,n-1,2
         out2=i*stride

         do j=1+out2,area+out2
            out(j)=
     &           t0*out(j)+t1*outp1(j)+t2*outp2(j)+t3*outp3(j)
         enddo
      enddo

      return
      end


c     Back and forth, linear convolution of sets of points using padding
c     computing data ONLY on ghostpoints, AND folding in input of a
c     different data set (IN PLACE)
c
c     out: input/output array for in place computation
c     outm3,outm2,outm1,outp1,outp2,outp3: pointers in output array to
c         start of data sets, 3 previous, 2 previous, 1 previous, 1 forward,
c         2 forward, 3 forward
c     in: input/output array for in place computation
c     inm3,inm2,inm1,inp1,inp2,inp3: pointers in input array to
c         start of data sets, 3 previous, 2 previous, 1 previous, 1 forward,
c         2 forward, 3 forward
c     area: total size of out sets
c     stride: separation between out data sets (stride=&out-&outm1)
c     n: actual number of data sets being convolved
c     npad: size of padded region needed for "leakage" of foward filters
c     stridel: separation between in data sets (stride=&in-&inm1)
c     fptl, lptl: first and last data components actually present in input
c     tr: reverse-time filter coefficients	
c     tf: forward-time filter coefficients
c     eflag: mod(,2) of present line (tells if special ghost handling needed)
c     zcflag: =1 means we can count on zero ghosts and need special handling
      subroutine pconvsetsi3new(out,
     $     in,inm3,inm2,inm1,inp1,inp2,inp3,
     $     area,stride,n,npad,
     $     stridel,fptl,lptl,
     $     filt,eflag,zcflag)
      implicit none
      
      double precision out(*)

      double precision in(*)
      double precision inm3(*),inm2(*),inm1(*)
      double precision inp1(*),inp2(*),inp3(*)

      integer stride,area,n,npad
      integer stridel,fptl,lptl
      double precision filt(*)
      integer eflag,zcflag

      double precision t0,t1,t2,t3
      integer i

c     Filter coefficients
      t0=filt(1)
      t1=filt(2)
      t2=filt(3)
      t3=filt(4)

      do i=0,npad-2
         call zeroline3(out,in,inm1,inm2,inm3,i,
     $        stridel,stride,fptl,lptl,area,
     $        t0,t1,t2,t3)
      enddo

c     For the UNUSED lines below, we point to "inm3" or "inp3" to avoid
c     seg faults because we know "inm3" or "inp3" will always be in range 
c     (unless the box is ridiculously small).  Although multiplied 
c     by zero, the code still accesses the dummy data!
      
      if (eflag.eq.0 .and. zcflag.eq.1) then

         call lineio3full(out,inp3,inp3,inp3,inp3,inp3,inp3,inp3,0,
     $        stridel,stride,fptl,lptl,area,
     $        0.d0,0.d0,0.d0,0.d0,0.d0,0.d0,t3)
         
         call lineio3full(out,inp3,inp3,inp3,in,inp1,inp2,inp3,2,
     $        stridel,stride,fptl,lptl,area,
     $        0.d0,0.d0,0.d0,t0,t1,t2,t3)
         
         call lineio3full(out,inp3,inp3,inm1,in,inp1,inp2,inp3,4,
     $        stridel,stride,fptl,lptl,area,
     $        0.d0,t2,t1,t0,t1,t2,t3)
      
         do i=6,n-8,2
            call lineio3full(out,inm3,inm2,inm1,in,inp1,inp2,inp3,i,
     $           stridel,stride,fptl,lptl,area,
     $           t3,t2,t1,t0,t1,t2,t3)
         enddo
         
         call lineio3full(out,inm3,inm2,inm1,
     $        in,inp1,inm3,inm3,n-6,
     $        stridel,stride,fptl,lptl,area,
     $        t3,t2,t1,t0,t1,0.d0,0.d0)
         
         call lineio3full(out,inm3,inm2,inm1,
     $        inm3,inm3,inm3,inm3,n-4,
     $        stridel,stride,fptl,lptl,area,
     $        t3,t2,t1,0.d0,0.d0,0.d0,0.d0)
         
         call lineio3full(out,inm3,inm3,inm3,
     $        inm3,inm3,inm3,inm3,n-2,
     $        stridel,stride,fptl,lptl,area,
     $        t3,0.d0,0.d0,0.d0,0.d0,0.d0,0.d0)
      
      else

         call lineio3fullacc(out,inp3,inp3,inp3,inp3,inp3,inp3,inp3,0,
     $        stridel,stride,fptl,lptl,area,
     $        0.d0,0.d0,0.d0,0.d0,0.d0,0.d0,t3)
         
         call lineio3fullacc(out,inp3,inp3,inp3,in,inp1,inp2,inp3,2,
     $        stridel,stride,fptl,lptl,area,
     $        0.d0,0.d0,0.d0,t0,t1,t2,t3)
         
         call lineio3fullacc(out,inp3,inp3,inm1,in,inp1,inp2,inp3,4,
     $        stridel,stride,fptl,lptl,area,
     $        0.d0,t2,t1,t0,t1,t2,t3)
         
         do i=6,n-8,2
            call lineio3fullacc(out,inm3,inm2,inm1,in,inp1,inp2,inp3,i,
     $           stridel,stride,fptl,lptl,area,
     $           t3,t2,t1,t0,t1,t2,t3)
         enddo
         
         call lineio3fullacc(out,inm3,inm2,inm1,
     $        in,inp1,inm3,inm3,n-6,
     $        stridel,stride,fptl,lptl,area,
     $        t3,t2,t1,t0,t1,0.d0,0.d0)
         
         call lineio3fullacc(out,inm3,inm2,inm1,
     $        inm3,inm3,inm3,inm3,n-4,
     $        stridel,stride,fptl,lptl,area,
     $        t3,t2,t1,0.d0,0.d0,0.d0,0.d0)
         
         call lineio3fullacc(out,inm3,inm3,inm3,
     $        inm3,inm3,inm3,inm3,n-2,
     $        stridel,stride,fptl,lptl,area,
     $        t3,0.d0,0.d0,0.d0,0.d0,0.d0,0.d0)
         
      endif

      return
      end

c     Back and forth, linear convolution of sets of points using padding
c     computing data ONLY on ghostpoints, AND folding in input of a
c     different data set (IN PLACE)
c
c     out: input/output array for in place computation
c     outm3,outm2,outm1,outp1,outp2,outp3: pointers in output array to
c         start of data sets, 3 previous, 2 previous, 1 previous, 1 forward,
c         2 forward, 3 forward
c     in: input/output array for in place computation
c     inm3,inm2,inm1,inp1,inp2,inp3: pointers in input array to
c         start of data sets, 3 previous, 2 previous, 1 previous, 1 forward,
c         2 forward, 3 forward
c     area: total size of out sets
c     stride: separation between out data sets (stride=&out-&outm1)
c     n: actual number of data sets being convolved
c     npad: size of padded region needed for "leakage" of foward filters
c     stridel: separation between in data sets (stride=&in-&inm1)
c     fptl, lptl: first and last data components actually present in input
c     tr: reverse-time filter coefficients	
c     tf: forward-time filter coefficients
c     eflag: mod(,2) of present line (tells if special ghost handling needed)
c     zcflag: =1 means we can count on zero ghosts and need special handling
      subroutine pconvsetsi8new(out,
     $     in,
     $     inm8,inm7,inm6,inm5,inm4,inm3,inm2,inm1,
     $     inp1,inp2,inp3,inp4,inp5,inp6,inp7,inp8,
     $     area,stride,n,npad,
     $     stridel,fptl,lptl,
     $     filt,eflag,zcflag)
      implicit none
      
      double precision out(*)
      
      double precision in(*)
      double precision inm8(*),inm7(*),inm6(*)
      double precision inm5(*),inm4(*),inm3(*),inm2(*),inm1(*)
      double precision inp1(*),inp2(*),inp3(*),inp4(*)
      double precision inp5(*),inp6(*),inp7(*),inp8(*)
      
      integer stride,area,n,npad
      integer stridel,fptl,lptl
      double precision filt(*)
      integer eflag,zcflag
      
      double precision t0,t1,t2,t3,t4,t5,t6,t7,t8
      integer i
      
c     Filter coefficients
      t0=filt(1)
      t1=filt(2)
      t2=filt(3)
      t3=filt(4)
      t4=filt(5)
      t5=filt(6)
      t6=filt(7)
      t7=filt(8)
      t8=filt(9)

c     For the UNUSED lines below, we point to "inm3" or "inp3" to avoid
c     seg faults because we know "inm3" or "inp3" will always be in range 
c     (unless the box is ridiculously small).  Although multiplied 
c     by zero, the code still accesses the dummy data!

      if (t0 /= 0.0) then
c     symmetric filter

      if (2.GT.3) then
c      if (eflag.eq.0 .and. zcflag.eq.1) then

         call lineio8full(out,inp8,inp8,inp8,inp8,inp8,inp8,inp8,
     $        inp8,inp8,inp8,inp8,inp8,inp8,inp8,inp8,inp8,inp8,
     $        0,
     $        stridel,stride,fptl,lptl,area,
     $        0.d0,0.d0,0.d0,0.d0,0.d0,0.d0,
     $        0.d0,0.d0,0.d0,0.d0,0.d0,0.d0,
     $        0.d0,0.d0,0.d0,0.d0,t3)
         
         call lineio8full(out,inp8,inp8,inp8,inp8,inp8,inp8,inp8,
     $        inp8,inp8,inp8,inp8,inp8,inp8,inp8,inp6,inp7,inp8,
     $        2,
     $        stridel,stride,fptl,lptl,area,
     $        0.d0,0.d0,0.d0,0.d0,0.d0,0.d0,
     $        0.d0,0.d0,0.d0,0.d0,0.d0,0.d0,
     $        0.d0,0.d0,t6,t7,t8)
         
         
         call lineio8full(out,inp8,inp8,inp8,inp8,inp8,inp8,inp8,
     $        inp8,inp8,inp8,inp8,inp8,inp4,inp5,inp6,inp7,inp8,
     $        4,
     $        stridel,stride,fptl,lptl,area,
     $        0.d0,0.d0,0.d0,0.d0,0.d0,0.d0,
     $        0.d0,0.d0,0.d0,0.d0,0.d0,0.d0,
     $        t4,t5,t6,t7,t8)
         
         call lineio8full(out,inp8,inp8,inp8,inp8,inp8,inp8,inp8,
     $        inp8,inp8,inp8,inp2,inp3,inp4,inp5,inp6,inp7,inp8,
     $        6,
     $        stridel,stride,fptl,lptl,area,
     $        0.d0,0.d0,0.d0,0.d0,0.d0,0.d0,
     $        0.d0,0.d0,0.d0,0.d0,t2,t3,
     $        t4,t5,t6,t7,t8)
         
         call lineio8full(out,inp8,inp8,inp8,inp8,inp8,inp8,inp8,
     $        inp8,in,inp1,inp2,inp3,inp4,inp5,inp6,inp7,inp8,
     $        8,
     $        stridel,stride,fptl,lptl,area,
     $        0.d0,0.d0,0.d0,0.d0,0.d0,0.d0,
     $        0.d0,0.d0,t0,t1,t2,t3,
     $        t4,t5,t6,t7,t8)
         
         call lineio8full(out,inp8,inp8,inp8,inp8,inp8,inp8,inm2,
     $        inm1,in,inp1,inp2,inp3,inp4,inp5,inp6,inp7,inp8,
     $        10,
     $        stridel,stride,fptl,lptl,area,
     $        0.d0,0.d0,0.d0,0.d0,0.d0,0.d0,
     $        t2,t1,t0,t1,t2,t3,
     $        t4,t5,t6,t7,t8)
         
         call lineio8full(out,inp8,inp8,inp8,inp8,inm4,inm3,inm2,
     $        inm1,in,inp1,inp2,inp3,inp4,inp5,inp6,inp7,inp8,
     $        12,
     $        stridel,stride,fptl,lptl,area,
     $        0.d0,0.d0,0.d0,0.d0,t4,t3,
     $        t2,t1,t0,t1,t2,t3,
     $        t4,t5,t6,t7,t8)
         
         call lineio8full(out,inp8,inp8,inm6,inm5,inm4,inm3,inm2,
     $        inm1,in,inp1,inp2,inp3,inp4,inp5,inp6,inp7,inp8,
     $        14,
     $        stridel,stride,fptl,lptl,area,
     $        0.d0,0.d0,t6,t5,t4,t3,
     $        t2,t1,t0,t1,t2,t3,
     $        t4,t5,t6,t7,t8)
         
         do i=16,n-18,2
            call lineio8full(out,inm8,inm7,inm6,inm5,inm4,inm3,inm2,
     $           inm1,in,inp1,inp2,inp3,inp4,inp5,inp6,inp7,inp8,
     $           i,
     $           stridel,stride,fptl,lptl,area,
     $           t8,t7,t6,t5,t4,t3,
     $           t2,t1,t0,t1,t2,t3,
     $           t4,t5,t6,t7,t8)
         enddo

         call lineio8full(out,inm8,inm7,inm6,inm5,inm4,inm3,inm2,
     $        inm1,in,inp1,inp2,inp3,inp4,inp5,inp6,inp7,inm8,
     $        n-16,
     $        stridel,stride,fptl,lptl,area,
     $        t8,t7,t6,t5,t4,t3,
     $        t2,t1,t0,t1,t2,t3,
     $        t4,t5,t6,t7,0.d0)

         call lineio8full(out,inm8,inm7,inm6,inm5,inm4,inm3,inm2,
     $        inm1,in,inp1,inp2,inp3,inp4,inp5,inm8,inm8,inm8,
     $        n-14,
     $        stridel,stride,fptl,lptl,area,
     $        t8,t7,t6,t5,t4,t3,
     $        t2,t1,t0,t1,t2,t3,
     $        t4,t5,0.d0,0.d0,0.d0)
         
         call lineio8full(out,inm8,inm7,inm6,inm5,inm4,inm3,inm2,
     $        inm1,in,inp1,inp2,inp3,inm8,inm8,inm8,inm8,inm8,
     $        n-12,
     $        stridel,stride,fptl,lptl,area,
     $        t8,t7,t6,t5,t4,t3,
     $        t2,t1,t0,t1,t2,t3,
     $        0.d0,0.d0,0.d0,0.d0,0.d0)
         
         call lineio8full(out,inm8,inm7,inm6,inm5,inm4,inm3,inm2,
     $        inm1,in,inp1,inm8,inm8,inm8,inm8,inm8,inm8,inm8,
     $        n-10,
     $        stridel,stride,fptl,lptl,area,
     $        t8,t7,t6,t5,t4,t3,
     $        t2,t1,t0,t1,0.d0,0.d0,
     $        0.d0,0.d0,0.d0,0.d0,0.d0)
         
         
         call lineio8full(out,inm8,inm7,inm6,inm5,inm4,inm3,inm2,
     $        inm1,inm8,inm8,inm8,inm8,inm8,inm8,inm8,inm8,inm8,
     $        n-8,
     $        stridel,stride,fptl,lptl,area,
     $        t8,t7,t6,t5,t4,t3,
     $        t2,t1,0.d0,0.d0,0.d0,0.d0,
     $        0.d0,0.d0,0.d0,0.d0,0.d0)
         
         
         call lineio8full(out,inm8,inm7,inm6,inm5,inm4,inm3,inm8,
     $        inm8,inm8,inm8,inm8,inm8,inm8,inm8,inm8,inm8,inm8,
     $        n-6,
     $        stridel,stride,fptl,lptl,area,
     $        t8,t7,t6,t5,t4,t3,
     $        0.d0,0.d0,0.d0,0.d0,0.d0,0.d0,
     $        0.d0,0.d0,0.d0,0.d0,0.d0)
         
         
         call lineio8full(out,inm8,inm7,inm6,inm5,inm8,inm8,inm8,
     $        inm8,inm8,inm8,inm8,inm8,inm8,inm8,inm8,inm8,inm8,
     $        n-4,
     $        stridel,stride,fptl,lptl,area,
     $        t8,t7,t6,t5,0.d0,0.d0,
     $        0.d0,0.d0,0.d0,0.d0,0.d0,0.d0,
     $        0.d0,0.d0,0.d0,0.d0,0.d0)
         
         call lineio8full(out,inm8,inm7,inm8,inm8,inm8,inm8,inm8,
     $        inm8,inm8,inm8,inm8,inm8,inm8,inm8,inm8,inm8,inm8,
     $        n-2,
     $        stridel,stride,fptl,lptl,area,
     $        t8,t7,0.d0,0.d0,0.d0,0.d0,
     $        0.d0,0.d0,0.d0,0.d0,0.d0,0.d0,
     $        0.d0,0.d0,0.d0,0.d0,0.d0)
         
      else

         call lineio8fullacc(out,inp8,inp8,inp8,inp8,inp8,inp8,inp8,
     $        inp8,inp8,inp8,inp8,inp8,inp8,inp8,inp8,inp8,inp8,
     $        0,
     $        stridel,stride,fptl,lptl,area,
     $        0.d0,0.d0,0.d0,0.d0,0.d0,0.d0,
     $        0.d0,0.d0,0.d0,0.d0,0.d0,0.d0,
     $        0.d0,0.d0,0.d0,0.d0,t8)
         
         call lineio8fullacc(out,inp8,inp8,inp8,inp8,inp8,inp8,inp8,
     $        inp8,inp8,inp8,inp8,inp8,inp8,inp8,inp6,inp7,inp8,
     $        2,
     $        stridel,stride,fptl,lptl,area,
     $        0.d0,0.d0,0.d0,0.d0,0.d0,0.d0,
     $        0.d0,0.d0,0.d0,0.d0,0.d0,0.d0,
     $        0.d0,0.d0,t6,t7,t8)
         
         call lineio8fullacc(out,inp8,inp8,inp8,inp8,inp8,inp8,inp8,
     $        inp8,inp8,inp8,inp8,inp8,inp4,inp5,inp6,inp7,inp8,
     $        4,
     $        stridel,stride,fptl,lptl,area,
     $        0.d0,0.d0,0.d0,0.d0,0.d0,0.d0,
     $        0.d0,0.d0,0.d0,0.d0,0.d0,0.d0,
     $        t4,t5,t6,t7,t8)
         
         call lineio8fullacc(out,inp8,inp8,inp8,inp8,inp8,inp8,inp8,
     $        inp8,inp8,inp8,inp2,inp3,inp4,inp5,inp6,inp7,inp8,
     $        6,
     $        stridel,stride,fptl,lptl,area,
     $        0.d0,0.d0,0.d0,0.d0,0.d0,0.d0,
     $        0.d0,0.d0,0.d0,0.d0,t2,t3,
     $        t4,t5,t6,t7,t8)
         
         call lineio8fullacc(out,inp8,inp8,inp8,inp8,inp8,inp8,inp8,
     $        inp8,in,inp1,inp2,inp3,inp4,inp5,inp6,inp7,inp8,
     $        8,
     $        stridel,stride,fptl,lptl,area,
     $        0.d0,0.d0,0.d0,0.d0,0.d0,0.d0,
     $        0.d0,0.d0,t0,t1,t2,t3,
     $        t4,t5,t6,t7,t8)
         
         call lineio8fullacc(out,inp8,inp8,inp8,inp8,inp8,inp8,inm2,
     $        inm1,in,inp1,inp2,inp3,inp4,inp5,inp6,inp7,inp8,
     $        10,
     $        stridel,stride,fptl,lptl,area,
     $        0.d0,0.d0,0.d0,0.d0,0.d0,0.d0,
     $        t2,t1,t0,t1,t2,t3,
     $        t4,t5,t6,t7,t8)
         
         call lineio8fullacc(out,inp8,inp8,inp8,inp8,inm4,inm3,inm2,
     $        inm1,in,inp1,inp2,inp3,inp4,inp5,inp6,inp7,inp8,
     $        12,
     $        stridel,stride,fptl,lptl,area,
     $        0.d0,0.d0,0.d0,0.d0,t4,t3,
     $        t2,t1,t0,t1,t2,t3,
     $        t4,t5,t6,t7,t8)
         
         call lineio8fullacc(out,inp8,inp8,inm6,inm5,inm4,inm3,inm2,
     $        inm1,in,inp1,inp2,inp3,inp4,inp5,inp6,inp7,inp8,
     $        14,
     $        stridel,stride,fptl,lptl,area,
     $        0.d0,0.d0,t6,t5,t4,t3,
     $        t2,t1,t0,t1,t2,t3,
     $        t4,t5,t6,t7,t8)
         
         do i=16,n-18,2
            call lineio8fullacc(out,inm8,inm7,inm6,inm5,inm4,inm3,inm2,
     $           inm1,in,inp1,inp2,inp3,inp4,inp5,inp6,inp7,inp8,
     $           i,
     $           stridel,stride,fptl,lptl,area,
     $           t8,t7,t6,t5,t4,t3,
     $           t2,t1,t0,t1,t2,t3,
     $           t4,t5,t6,t7,t8)
         enddo
         
         call lineio8fullacc(out,inm8,inm7,inm6,inm5,inm4,inm3,inm2,
     $        inm1,in,inp1,inp2,inp3,inp4,inp5,inp6,inp7,inm8,
     $        n-16,
     $        stridel,stride,fptl,lptl,area,
     $        t8,t7,t6,t5,t4,t3,
     $        t2,t1,t0,t1,t2,t3,
     $        t4,t5,t6,t7,0.d0)
         
         call lineio8fullacc(out,inm8,inm7,inm6,inm5,inm4,inm3,inm2,
     $        inm1,in,inp1,inp2,inp3,inp4,inp5,inm8,inm8,inm8,
     $        n-14,
     $        stridel,stride,fptl,lptl,area,
     $        t8,t7,t6,t5,t4,t3,
     $        t2,t1,t0,t1,t2,t3,
     $        t4,t5,0.d0,0.d0,0.d0)
         
         call lineio8fullacc(out,inm8,inm7,inm6,inm5,inm4,inm3,inm2,
     $        inm1,in,inp1,inp2,inp3,inm8,inm8,inm8,inm8,inm8,
     $        n-12,
     $        stridel,stride,fptl,lptl,area,
     $        t8,t7,t6,t5,t4,t3,
     $        t2,t1,t0,t1,t2,t3,
     $        0.d0,0.d0,0.d0,0.d0,0.d0)
         
         call lineio8fullacc(out,inm8,inm7,inm6,inm5,inm4,inm3,inm2,
     $        inm1,in,inp1,inm8,inm8,inm8,inm8,inm8,inm8,inm8,
     $        n-10,
     $        stridel,stride,fptl,lptl,area,
     $        t8,t7,t6,t5,t4,t3,
     $        t2,t1,t0,t1,0.d0,0.d0,
     $        0.d0,0.d0,0.d0,0.d0,0.d0)
         
         call lineio8fullacc(out,inm8,inm7,inm6,inm5,inm4,inm3,inm2,
     $        inm1,inm8,inm8,inm8,inm8,inm8,inm8,inm8,inm8,inm8,
     $        n-8,
     $        stridel,stride,fptl,lptl,area,
     $        t8,t7,t6,t5,t4,t3,
     $        t2,t1,0.d0,0.d0,0.d0,0.d0,
     $        0.d0,0.d0,0.d0,0.d0,0.d0)
         
         call lineio8fullacc(out,inm8,inm7,inm6,inm5,inm4,inm3,inm8,
     $        inm8,inm8,inm8,inm8,inm8,inm8,inm8,inm8,inm8,inm8,
     $        n-6,
     $        stridel,stride,fptl,lptl,area,
     $        t8,t7,t6,t5,t4,t3,
     $        0.d0,0.d0,0.d0,0.d0,0.d0,0.d0,
     $        0.d0,0.d0,0.d0,0.d0,0.d0)
         
         call lineio8fullacc(out,inm8,inm7,inm6,inm5,inm8,inm8,inm8,
     $        inm8,inm8,inm8,inm8,inm8,inm8,inm8,inm8,inm8,inm8,
     $        n-4,
     $        stridel,stride,fptl,lptl,area,
     $        t8,t7,t6,t5,0.d0,0.d0,
     $        0.d0,0.d0,0.d0,0.d0,0.d0,0.d0,
     $        0.d0,0.d0,0.d0,0.d0,0.d0)
         
         call lineio8fullacc(out,inm8,inm7,inm8,inm8,inm8,inm8,inm8,
     $        inm8,inm8,inm8,inm8,inm8,inm8,inm8,inm8,inm8,inm8,
     $        n-2,
     $        stridel,stride,fptl,lptl,area,
     $        t8,t7,0.d0,0.d0,0.d0,0.d0,
     $        0.d0,0.d0,0.d0,0.d0,0.d0,0.d0,
     $        0.d0,0.d0,0.d0,0.d0,0.d0)
         
      endif
c     end of symmetric part
      else
c     antisymmetric filter

      if (2.GT.3) then
c      if (eflag.eq.0 .and. zcflag.eq.1) then

         call lineio8full(out,inp8,inp8,inp8,inp8,inp8,inp8,inp8,
     $        inp8,inp8,inp8,inp8,inp8,inp8,inp8,inp8,inp8,inp8,
     $        0,
     $        stridel,stride,fptl,lptl,area,
     $        0.d0,0.d0,0.d0,0.d0,0.d0,0.d0,
     $        0.d0,0.d0,0.d0,0.d0,0.d0,0.d0,
     $        0.d0,0.d0,0.d0,0.d0,t3)
         
         call lineio8full(out,inp8,inp8,inp8,inp8,inp8,inp8,inp8,
     $        inp8,inp8,inp8,inp8,inp8,inp8,inp8,inp6,inp7,inp8,
     $        2,
     $        stridel,stride,fptl,lptl,area,
     $        0.d0,0.d0,0.d0,0.d0,0.d0,0.d0,
     $        0.d0,0.d0,0.d0,0.d0,0.d0,0.d0,
     $        0.d0,0.d0,t6,t7,t8)
         
         
         call lineio8full(out,inp8,inp8,inp8,inp8,inp8,inp8,inp8,
     $        inp8,inp8,inp8,inp8,inp8,inp4,inp5,inp6,inp7,inp8,
     $        4,
     $        stridel,stride,fptl,lptl,area,
     $        0.d0,0.d0,0.d0,0.d0,0.d0,0.d0,
     $        0.d0,0.d0,0.d0,0.d0,0.d0,0.d0,
     $        t4,t5,t6,t7,t8)
         
         call lineio8full(out,inp8,inp8,inp8,inp8,inp8,inp8,inp8,
     $        inp8,inp8,inp8,inp2,inp3,inp4,inp5,inp6,inp7,inp8,
     $        6,
     $        stridel,stride,fptl,lptl,area,
     $        0.d0,0.d0,0.d0,0.d0,0.d0,0.d0,
     $        0.d0,0.d0,0.d0,0.d0,t2,t3,
     $        t4,t5,t6,t7,t8)
         
         call lineio8full(out,inp8,inp8,inp8,inp8,inp8,inp8,inp8,
     $        inp8,in,inp1,inp2,inp3,inp4,inp5,inp6,inp7,inp8,
     $        8,
     $        stridel,stride,fptl,lptl,area,
     $        0.d0,0.d0,0.d0,0.d0,0.d0,0.d0,
     $        0.d0,0.d0,t0,t1,t2,t3,
     $        t4,t5,t6,t7,t8)
         
         call lineio8full(out,inp8,inp8,inp8,inp8,inp8,inp8,inm2,
     $        inm1,in,inp1,inp2,inp3,inp4,inp5,inp6,inp7,inp8,
     $        10,
     $        stridel,stride,fptl,lptl,area,
     $        0.d0,0.d0,0.d0,0.d0,0.d0,0.d0,
     $        t2,t1,t0,t1,t2,t3,
     $        t4,t5,t6,t7,t8)
         
         call lineio8full(out,inp8,inp8,inp8,inp8,inm4,inm3,inm2,
     $        inm1,in,inp1,inp2,inp3,inp4,inp5,inp6,inp7,inp8,
     $        12,
     $        stridel,stride,fptl,lptl,area,
     $        0.d0,0.d0,0.d0,0.d0,t4,t3,
     $        t2,t1,t0,t1,t2,t3,
     $        t4,t5,t6,t7,t8)
         
         call lineio8full(out,inp8,inp8,inm6,inm5,inm4,inm3,inm2,
     $        inm1,in,inp1,inp2,inp3,inp4,inp5,inp6,inp7,inp8,
     $        14,
     $        stridel,stride,fptl,lptl,area,
     $        0.d0,0.d0,t6,t5,t4,t3,
     $        t2,t1,t0,t1,t2,t3,
     $        t4,t5,t6,t7,t8)
         
         do i=16,n-18,2
            call lineio8full(out,inm8,inm7,inm6,inm5,inm4,inm3,inm2,
     $           inm1,in,inp1,inp2,inp3,inp4,inp5,inp6,inp7,inp8,
     $           i,
     $           stridel,stride,fptl,lptl,area,
     $           t8,t7,t6,t5,t4,t3,
     $           t2,t1,t0,t1,t2,t3,
     $           t4,t5,t6,t7,t8)
         enddo

         call lineio8full(out,inm8,inm7,inm6,inm5,inm4,inm3,inm2,
     $        inm1,in,inp1,inp2,inp3,inp4,inp5,inp6,inp7,inm8,
     $        n-16,
     $        stridel,stride,fptl,lptl,area,
     $        t8,t7,t6,t5,t4,t3,
     $        t2,t1,t0,t1,t2,t3,
     $        t4,t5,t6,t7,0.d0)

         call lineio8full(out,inm8,inm7,inm6,inm5,inm4,inm3,inm2,
     $        inm1,in,inp1,inp2,inp3,inp4,inp5,inm8,inm8,inm8,
     $        n-14,
     $        stridel,stride,fptl,lptl,area,
     $        t8,t7,t6,t5,t4,t3,
     $        t2,t1,t0,t1,t2,t3,
     $        t4,t5,0.d0,0.d0,0.d0)
         
         call lineio8full(out,inm8,inm7,inm6,inm5,inm4,inm3,inm2,
     $        inm1,in,inp1,inp2,inp3,inm8,inm8,inm8,inm8,inm8,
     $        n-12,
     $        stridel,stride,fptl,lptl,area,
     $        t8,t7,t6,t5,t4,t3,
     $        t2,t1,t0,t1,t2,t3,
     $        0.d0,0.d0,0.d0,0.d0,0.d0)
         
         call lineio8full(out,inm8,inm7,inm6,inm5,inm4,inm3,inm2,
     $        inm1,in,inp1,inm8,inm8,inm8,inm8,inm8,inm8,inm8,
     $        n-10,
     $        stridel,stride,fptl,lptl,area,
     $        t8,t7,t6,t5,t4,t3,
     $        t2,t1,t0,t1,0.d0,0.d0,
     $        0.d0,0.d0,0.d0,0.d0,0.d0)
         
         
         call lineio8full(out,inm8,inm7,inm6,inm5,inm4,inm3,inm2,
     $        inm1,inm8,inm8,inm8,inm8,inm8,inm8,inm8,inm8,inm8,
     $        n-8,
     $        stridel,stride,fptl,lptl,area,
     $        t8,t7,t6,t5,t4,t3,
     $        t2,t1,0.d0,0.d0,0.d0,0.d0,
     $        0.d0,0.d0,0.d0,0.d0,0.d0)
         
         
         call lineio8full(out,inm8,inm7,inm6,inm5,inm4,inm3,inm8,
     $        inm8,inm8,inm8,inm8,inm8,inm8,inm8,inm8,inm8,inm8,
     $        n-6,
     $        stridel,stride,fptl,lptl,area,
     $        t8,t7,t6,t5,t4,t3,
     $        0.d0,0.d0,0.d0,0.d0,0.d0,0.d0,
     $        0.d0,0.d0,0.d0,0.d0,0.d0)
         
         
         call lineio8full(out,inm8,inm7,inm6,inm5,inm8,inm8,inm8,
     $        inm8,inm8,inm8,inm8,inm8,inm8,inm8,inm8,inm8,inm8,
     $        n-4,
     $        stridel,stride,fptl,lptl,area,
     $        t8,t7,t6,t5,0.d0,0.d0,
     $        0.d0,0.d0,0.d0,0.d0,0.d0,0.d0,
     $        0.d0,0.d0,0.d0,0.d0,0.d0)
         
         call lineio8full(out,inm8,inm7,inm8,inm8,inm8,inm8,inm8,
     $        inm8,inm8,inm8,inm8,inm8,inm8,inm8,inm8,inm8,inm8,
     $        n-2,
     $        stridel,stride,fptl,lptl,area,
     $        t8,t7,0.d0,0.d0,0.d0,0.d0,
     $        0.d0,0.d0,0.d0,0.d0,0.d0,0.d0,
     $        0.d0,0.d0,0.d0,0.d0,0.d0)
c     Is any of the above code ever called?  Is 2 ever greater than 3?
      else

         call lineio8fullacc(out,inp8,inp8,inp8,inp8,inp8,inp8,inp8,
     $        inp8,inp8,inp8,inp8,inp8,inp8,inp8,inp8,inp8,inp8,
     $        0,
     $        stridel,stride,fptl,lptl,area,
     $        0.d0,0.d0,0.d0,0.d0,0.d0,0.d0,
     $        0.d0,0.d0,0.d0,0.d0,0.d0,0.d0,
     $        0.d0,0.d0,0.d0,0.d0,t8)
         
         call lineio8fullacc(out,inp8,inp8,inp8,inp8,inp8,inp8,inp8,
     $        inp8,inp8,inp8,inp8,inp8,inp8,inp8,inp6,inp7,inp8,
     $        2,
     $        stridel,stride,fptl,lptl,area,
     $        0.d0,0.d0,0.d0,0.d0,0.d0,0.d0,
     $        0.d0,0.d0,0.d0,0.d0,0.d0,0.d0,
     $        0.d0,0.d0,t6,t7,t8)
         
         call lineio8fullacc(out,inp8,inp8,inp8,inp8,inp8,inp8,inp8,
     $        inp8,inp8,inp8,inp8,inp8,inp4,inp5,inp6,inp7,inp8,
     $        4,
     $        stridel,stride,fptl,lptl,area,
     $        0.d0,0.d0,0.d0,0.d0,0.d0,0.d0,
     $        0.d0,0.d0,0.d0,0.d0,0.d0,0.d0,
     $        t4,t5,t6,t7,t8)
         
         call lineio8fullacc(out,inp8,inp8,inp8,inp8,inp8,inp8,inp8,
     $        inp8,inp8,inp8,inp2,inp3,inp4,inp5,inp6,inp7,inp8,
     $        6,
     $        stridel,stride,fptl,lptl,area,
     $        0.d0,0.d0,0.d0,0.d0,0.d0,0.d0,
     $        0.d0,0.d0,0.d0,0.d0,t2,t3,
     $        t4,t5,t6,t7,t8)
         
         call lineio8fullacc(out,inp8,inp8,inp8,inp8,inp8,inp8,inp8,
     $        inp8,in,inp1,inp2,inp3,inp4,inp5,inp6,inp7,inp8,
     $        8,
     $        stridel,stride,fptl,lptl,area,
     $        0.d0,0.d0,0.d0,0.d0,0.d0,0.d0,
     $        0.d0,0.d0,t0,t1,t2,t3,
     $        t4,t5,t6,t7,t8)
         
         call lineio8fullacc(out,inp8,inp8,inp8,inp8,inp8,inp8,inm2,
     $        inm1,in,inp1,inp2,inp3,inp4,inp5,inp6,inp7,inp8,
     $        10,
     $        stridel,stride,fptl,lptl,area,
     $        0.d0,0.d0,0.d0,0.d0,0.d0,0.d0,
     $        -1.*t2,-1.*t1,t0,t1,t2,t3,
     $        t4,t5,t6,t7,t8)
         
         call lineio8fullacc(out,inp8,inp8,inp8,inp8,inm4,inm3,inm2,
     $        inm1,in,inp1,inp2,inp3,inp4,inp5,inp6,inp7,inp8,
     $        12,
     $        stridel,stride,fptl,lptl,area,
     $        0.d0,0.d0,0.d0,0.d0,-1.*t4,-1.*t3,
     $        -1.*t2,-1.*t1,t0,t1,t2,t3,
     $        t4,t5,t6,t7,t8)
         
         call lineio8fullacc(out,inp8,inp8,inm6,inm5,inm4,inm3,inm2,
     $        inm1,in,inp1,inp2,inp3,inp4,inp5,inp6,inp7,inp8,
     $        14,
     $        stridel,stride,fptl,lptl,area,
     $        0.d0,0.d0,-1.*t6,-1.*t5,-1.*t4,-1.*t3,
     $        -1.*t2,-1.*t1,t0,t1,t2,t3,
     $        t4,t5,t6,t7,t8)
         
         do i=16,n-18,2
            call lineio8fullacc(out,inm8,inm7,inm6,inm5,inm4,inm3,inm2,
     $           inm1,in,inp1,inp2,inp3,inp4,inp5,inp6,inp7,inp8,
     $           i,
     $           stridel,stride,fptl,lptl,area,
     $           -1.*t8,-1.*t7,-1.*t6,-1.*t5,-1.*t4,-1.*t3,
     $           -1.*t2,-1.*t1,t0,t1,t2,t3,
     $           t4,t5,t6,t7,t8)
         enddo
         
         call lineio8fullacc(out,inm8,inm7,inm6,inm5,inm4,inm3,inm2,
     $        inm1,in,inp1,inp2,inp3,inp4,inp5,inp6,inp7,inm8,
     $        n-16,
     $        stridel,stride,fptl,lptl,area,
     $        -1.*t8,-1.*t7,-1.*t6,-1.*t5,-1.*t4,-1.*t3,
     $        -1.*t2,-1.*t1,t0,t1,t2,t3,
     $        t4,t5,t6,t7,0.d0)
         
         call lineio8fullacc(out,inm8,inm7,inm6,inm5,inm4,inm3,inm2,
     $        inm1,in,inp1,inp2,inp3,inp4,inp5,inm8,inm8,inm8,
     $        n-14,
     $        stridel,stride,fptl,lptl,area,
     $        -1.*t8,-1.*t7,-1.*t6,-1.*t5,-1.*t4,-1.*t3,
     $        -1.*t2,-1.*t1,t0,t1,t2,t3,
     $        t4,t5,0.d0,0.d0,0.d0)
         
         call lineio8fullacc(out,inm8,inm7,inm6,inm5,inm4,inm3,inm2,
     $        inm1,in,inp1,inp2,inp3,inm8,inm8,inm8,inm8,inm8,
     $        n-12,
     $        stridel,stride,fptl,lptl,area,
     $        -1.*t8,-1.*t7,-1.*t6,-1.*t5,-1.*t4,-1.*t3,
     $        -1.*t2,-1.*t1,t0,t1,t2,t3,
     $        0.d0,0.d0,0.d0,0.d0,0.d0)
         
         call lineio8fullacc(out,inm8,inm7,inm6,inm5,inm4,inm3,inm2,
     $        inm1,in,inp1,inm8,inm8,inm8,inm8,inm8,inm8,inm8,
     $        n-10,
     $        stridel,stride,fptl,lptl,area,
     $        -1.*t8,-1.*t7,-1.*t6,-1.*t5,-1.*t4,-1.*t3,
     $        -1.*t2,-1.*t1,t0,t1,0.d0,0.d0,
     $        0.d0,0.d0,0.d0,0.d0,0.d0)
         
         call lineio8fullacc(out,inm8,inm7,inm6,inm5,inm4,inm3,inm2,
     $        inm1,inm8,inm8,inm8,inm8,inm8,inm8,inm8,inm8,inm8,
     $        n-8,
     $        stridel,stride,fptl,lptl,area,
     $        -1.*t8,-1.*t7,-1.*t6,-1.*t5,-1.*t4,-1.*t3,
     $        -1.*t2,-1.*t1,0.d0,0.d0,0.d0,0.d0,
     $        0.d0,0.d0,0.d0,0.d0,0.d0)
         
         call lineio8fullacc(out,inm8,inm7,inm6,inm5,inm4,inm3,inm8,
     $        inm8,inm8,inm8,inm8,inm8,inm8,inm8,inm8,inm8,inm8,
     $        n-6,
     $        stridel,stride,fptl,lptl,area,
     $        -1.*t8,-1.*t7,-1.*t6,-1.*t5,-1.*t4,-1.*t3,
     $        0.d0,0.d0,0.d0,0.d0,0.d0,0.d0,
     $        0.d0,0.d0,0.d0,0.d0,0.d0)
         
         call lineio8fullacc(out,inm8,inm7,inm6,inm5,inm8,inm8,inm8,
     $        inm8,inm8,inm8,inm8,inm8,inm8,inm8,inm8,inm8,inm8,
     $        n-4,
     $        stridel,stride,fptl,lptl,area,
     $        -1.*t8,-1.*t7,-1.*t6,-1.*t5,0.d0,0.d0,
     $        0.d0,0.d0,0.d0,0.d0,0.d0,0.d0,
     $        0.d0,0.d0,0.d0,0.d0,0.d0)
         
         call lineio8fullacc(out,inm8,inm7,inm8,inm8,inm8,inm8,inm8,
     $        inm8,inm8,inm8,inm8,inm8,inm8,inm8,inm8,inm8,inm8,
     $        n-2,
     $        stridel,stride,fptl,lptl,area,
     $        -1.*t8,-1.*t7,0.d0,0.d0,0.d0,0.d0,
     $        0.d0,0.d0,0.d0,0.d0,0.d0,0.d0,
     $        0.d0,0.d0,0.d0,0.d0,0.d0)
         
      endif
c     end of antisymmetric part
      endif

      return
      end
      

c     Back and forth, linear convolution of sets of points using padding
c     computing data ONLY on ghostpoints, AND folding in input of a
c     different data set (IN PLACE)
c
c     out: input/output array for in place computation
c     outm3,outm2,outm1,outp1,outp2,outp3: pointers in output array to
c         start of data sets, 3 previous, 2 previous, 1 previous, 1 forward,
c         2 forward, 3 forward
c     in: input/output array for in place computation
c     inm3,inm2,inm1,inp1,inp2,inp3: pointers in input array to
c         start of data sets, 3 previous, 2 previous, 1 previous, 1 forward,
c         2 forward, 3 forward
c     area: total size of out sets
c     stride: separation between out data sets (stride=&out-&outm1)
c     n: actual number of data sets being convolved
c     npad: size of padded region needed for "leakage" of foward filters
c     stridel: separation between in data sets (stride=&in-&inm1)
c     fptl, lptl: first and last data components actually present in input
c     tr: reverse-time filter coefficients	
c     tf: forward-time filter coefficients
c     eflag: mod(,2) of present line (tells if special ghost handling needed)
c     zcflag: =1 means we can count on zero ghosts and need special handling
      subroutine pconvsetsi8(
     $     out,outm8,outm7,outm6,outm5,outm4,
     $     outm3,outm2,outm1,outp1,outp2,outp3,
     $     outp4,outp5,outp6,outp7,outp8,
     $     in,inm8,inm7,inm6,inm5,inm4,
     $     inm3,inm2,inm1,inp1,inp2,inp3,
     $     inp4,inp5,inp6,inp7,inp8,
     $     area,stride,n,npad,
     $     stridel,fptl,lptl,
     $     tr,tf,eflag,zcflag)
      implicit none

      double precision out(*)
      double precision outm8(*),outm7(*)
      double precision outm6(*),outm5(*),outm4(*)
      double precision outm3(*),outm2(*),outm1(*)
      double precision outp1(*),outp2(*),outp3(*)
      double precision outp4(*),outp5(*),outp6(*)
      double precision outp7(*),outp8(*)

      double precision in(*)
      double precision inm8(*),inm7(*)
      double precision inm6(*),inm5(*),inm4(*)
      double precision inm3(*),inm2(*),inm1(*)
      double precision inp1(*),inp2(*),inp3(*)
      double precision inp4(*),inp5(*),inp6(*)
      double precision inp7(*),inp8(*)
      
      integer stride,area,n,npad
      integer stridel,fptl,lptl
      double precision tr(*),tf(*)
      integer eflag,zcflag
      
      double precision t0,t1,t2,t3,t4,t5,t6,t7,t8
      integer i,j,out2
      
c     Reverse phase
      t0=tr(1)
      t1=tr(2)
      t2=tr(3)
      t3=tr(4)
      t4=tr(5)
      t5=tr(6)
      t6=tr(7)
      t7=tr(8)
      t8=tr(9)
      
c     First, make sure pad is zero for forward phase.
c     For pretty code the last number would be npad-1, but technically
c     npad-2 (for extra speed) is all that is needed.
      do i=n,npad-2
         call zeroline8(out,in,inm1,inm2,inm3,inm4,
     $        inm5,inm6,inm7,inm8,i,
     $        stridel,stride,fptl,lptl,area,
     $        t0,t1,t2,t3,t4,t5,t6,t7,t8)
      enddo
      
c      call zeroline8(out,in,inm1,inm2,inm3,
c     $     inm4,inm5,inm6,inm7,inm8,n-1,
c     $     stridel,stride,fptl,lptl,area,
c     $     t0,t1,t2,t3,t4,t5,t6,t7,t8)
      
      
c      CALL flush(6)

c     For the UNUSED lines below, we point to "inm8" to avoid seg faults
c     because we know "inm8" will always be in range (unless the box is
c     ridiculously small).  Although multiplied by zero, the code still
c     accesses the dummy data!

      call lineio8(out,inm8,inm8,inm8,inm8,inm8,
     $     inm8,inm8,inm8,inm8,n-1,
     $     stridel,stride,fptl,lptl,area,
     $     0.d0,0.d0,0.d0,0.d0,0.d0,0.d0,0.d0,0.d0,t8)

      call lineio8(out,inm8,inm8,inm8,inm8,inm8,
     $     inm8,inm8,inm7,inm8,n-2,
     $     stridel,stride,fptl,lptl,area,
     $     0.d0,0.d0,0.d0,0.d0,0.d0,0.d0,0.d0,t7,t8)

      call lineio8(out,inm8,inm8,inm8,inm8,inm8,
     $     inm8,inm6,inm7,inm8,n-3,
     $     stridel,stride,fptl,lptl,area,
     $     0.d0,0.d0,0.d0,0.d0,0.d0,0.d0,t6,t7,t8)

      call lineio8(out,inm8,inm8,inm8,inm8,inm8,
     $     inm5,inm6,inm7,inm8,n-4,
     $     stridel,stride,fptl,lptl,area,
     $     0.d0,0.d0,0.d0,0.d0,0.d0,t5,t6,t7,t8)
      
      call lineio8(out,inm8,inm8,inm8,inm8,inm4,
     $     inm5,inm6,inm7,inm8,n-5,
     $     stridel,stride,fptl,lptl,area,
     $     0.d0,0.d0,0.d0,0.d0,t4,t5,t6,t7,t8)

      call lineio8(out,inm8,inm8,inm8,inm3,inm4,
     $     inm5,inm6,inm7,inm8,n-6,
     $     stridel,stride,fptl,lptl,area,
     $     0.d0,0.d0,0.d0,t3,t4,t5,t6,t7,t8)
      
      call lineio8(out,inm8,inm8,inm2,inm3,inm4,
     $     inm5,inm6,inm7,inm8,n-7,
     $     stridel,stride,fptl,lptl,area,
     $     0.d0,0.d0,t2,t3,t4,t5,t6,t7,t8)
      
      call lineio8(out,inm8,inm1,inm2,inm3,inm4,
     $     inm5,inm6,inm7,inm8,n-8,
     $     stridel,stride,fptl,lptl,area,
     $     0.d0,t1,t2,t3,t4,t5,t6,t7,t8)
      
      
      do i=n-9,16,-1
         call lineio8(out,in,inm1,inm2,inm3,
     $        inm4,inm5,inm6,inm7,inm8,i,
     $        stridel,stride,fptl,lptl,area,
     $        t0,t1,t2,t3,t4,t5,t6,t7,t8)
      enddo

c     For the UNUSED lines below, we must point to "in" to avoid seg
c     faults.  On this end of the convolution, we know that "in" will
c     always be in range.

         call lineio8(out,in,inm1,inm2,inm3,inm4,inm5,inm6,inm7,in,15,
     $        stridel,stride,fptl,lptl,area,
     $        t0,t1,t2,t3,t4,t5,t6,t7,0.d0)

         call lineio8(out,in,inm1,inm2,inm3,inm4,inm5,inm6,in,in,14,
     $        stridel,stride,fptl,lptl,area,
     $        t0,t1,t2,t3,t4,t5,t6,0.d0,0.d0)

         call lineio8(out,in,inm1,inm2,inm3,inm4,inm5,in,in,in,13,
     $        stridel,stride,fptl,lptl,area,
     $        t0,t1,t2,t3,t4,t5,0.d0,0.d0,0.d0)

         call lineio8(out,in,inm1,inm2,inm3,inm4,in,in,in,in,12,
     $        stridel,stride,fptl,lptl,area,
     $        t0,t1,t2,t3,t4,0.d0,0.d0,0.d0,0.d0)

         call lineio8(out,in,inm1,inm2,inm3,in,in,in,in,in,11,
     $        stridel,stride,fptl,lptl,area,
     $        t0,t1,t2,t3,0.d0,0.d0,0.d0,0.d0,0.d0)

         call lineio8(out,in,inm1,inm2,in,in,in,in,in,in,10,
     $        stridel,stride,fptl,lptl,area,
     $        t0,t1,t2,0.d0,0.d0,0.d0,0.d0,0.d0,0.d0)

         call lineio8(out,in,inm1,in,in,in,in,in,in,in,9,
     $        stridel,stride,fptl,lptl,area,
     $        t0,t1,0.d0,0.d0,0.d0,0.d0,0.d0,0.d0,0.d0)

         call lineio8(out,in,in,in,in,in,in,in,in,in,8,
     $        stridel,stride,fptl,lptl,area,
     $        t0,0.d0,0.d0,0.d0,0.d0,0.d0,0.d0,0.d0,0.d0)

         do i=7,0,-1
            call zeroline8(out,in,inm1,inm2,inm3,
     $           inm4,inm5,inm6,inm7,inm8,i,
     $           stridel,stride,fptl,lptl,area,
     $           t0,t1,t2,t3,t4,t5,t6,t7,t8)
         enddo

      
c     Forward phase (padding now ensures room!)
      t0=tf(1)
      t1=tf(2)
      t2=tf(3)
      t3=tf(4)
      t4=tf(5)
      t5=tf(6)
      t6=tf(7)
      t7=tf(8)
      t8=tf(9)
      
      do i=0,n-1,2
         out2=i*stride
         
         do j=1+out2,area+out2
            out(j)=
     &           t0*out(j)+t1*outp1(j)+t2*outp2(j)+t3*outp3(j)
     &           +t4*outp4(j)+t5*outp5(j)+t6*outp6(j)+t7*outp7(j)
     &           +t8*outp8(j)
         enddo
      enddo
      
      return
      end

c     Computes convolution for a single data set ("line") during reverse
c     phases (INPUT/OUTPUT)
c
c     out: output set
c     in,inm1,inm2,inm3: input set
c     i: index of line to be done
c     stridel: stride of input
c     stride: stride of output
c     fptl: first point of input actually used
c     lptl: last point of input actually used
c     area: full area to be generated
c     t0,t1,t2,t3: filter coefficients
      subroutine lineio3(out,in,inm1,inm2,inm3,i,
     $     stridel,stride,fptl,lptl,area,
     $     t0,t1,t2,t3)
      implicit none
      double precision out(*),in(*),inm1(*),inm2(*),inm3(*)
      integer i,stridel,stride,fptl,lptl,area
      double precision t0,t1,t2,t3

      integer out2,dout2,j

      out2=i*stridel
      dout2=i*(stride-stridel)

c     Zero out data before fptl
      do j=1+out2,fptl+out2
         out(j+dout2)=0.
      enddo

c     Do the line for "active" points
      do j=1+fptl+out2,1+lptl+out2
         out(j+dout2)=
     &        t0*in(j)+t1*inm1(j)+t2*inm2(j)+t3*inm3(j)
      enddo

c     Zero out data after lptl
      do j=1+lptl+out2+1,area+out2
         out(j+dout2)=0.
      enddo

      return
      end

c     Computes convolution for a single data set ("line") during reverse
c     phases (INPUT/OUTPUT)
c
c     out: output set
c     in,inm1,inm2,inm3: input set
c     i: index of line to be done
c     stridel: stride of input
c     stride: stride of output
c     fptl: first point of input actually used
c     lptl: last point of input actually used
c     area: full area to be generated
c     t0,t1,t2,t3: filter coefficients
      subroutine lineio3full(out,inm3,inm2,inm1,in,inp1,inp2,inp3,i,
     $     stridel,stride,fptl,lptl,area,
     $     tl3,tl2,tl1,t0,tr1,tr2,tr3)
      implicit none
      double precision out(*),in(*),inm1(*),inm2(*),inm3(*)
      double precision inp1(*),inp2(*),inp3(*)
      integer i,stridel,stride,fptl,lptl,area
      double precision tl3,tl2,tl1,t0,tr1,tr2,tr3

      integer out2,dout2,j

      out2=i*stridel
      dout2=i*(stride-stridel)


      do j=1+out2+2,lptl+out2+1,2
c      do j=1+out2+1,lptl+out2+1
         out(j+dout2)=tr3*inp3(j)+tr1*inp1(j)+
     &        tl1*inm1(j)+tl3*inm3(j)
         out(j+dout2+1)=tr3*inp3(j+1)+tr1*inp1(j+1)+t0*in(j+1)+
     &        tl1*inm1(j+1)+tl3*inm3(j+1)
      enddo

c     Zero out data after lptl
      do j=1+lptl+out2+1,area+out2
         out(j+dout2)=0.
      enddo

      return
      end

c     Computes convolution for a single data set ("line") during reverse
c     phases (INPUT/OUTPUT)
c
c     out: output set
c     in,inm1,inm2,inm3: input set
c     i: index of line to be done
c     stridel: stride of input
c     stride: stride of output
c     fptl: first point of input actually used
c     lptl: last point of input actually used
c     area: full area to be generated
c     t0,t1,t2,t3: filter coefficients
      subroutine lineio8full(out,inm8,inm7,inm6,inm5,inm4,
     $     inm3,inm2,inm1,in,inp1,inp2,inp3,
     $     inp4,inp5,inp6,inp7,inp8,
     $     i,
     $     stridel,stride,fptl,lptl,area,
     $     tl8,tl7,tl6,tl5,tl4,tl3,tl2,tl1,
     $     t0,tr1,tr2,tr3,tr4,tr5,tr6,tr7,tr8)

      implicit none
      double precision out(*),in(*),inm1(*),inm2(*),inm3(*)
      double precision inm4(*),inm5(*),inm6(*),inm7(*),inm8(*)
      double precision inp1(*),inp2(*),inp3(*),inp4(*)
      double precision inp5(*),inp6(*),inp7(*),inp8(*)
      integer i,stridel,stride,fptl,lptl,area
      double precision tl8,tl7,tl6,tl5,tl4,tl3,tl2,tl1
      double precision t0,tr1,tr2,tr3,tr4,tr5,tr6,tr7,tr8

      integer out2,dout2,j

      out2=i*stridel
      dout2=i*(stride-stridel)


      do j=1+out2+2,lptl+out2+1,2
         out(j+dout2)=tr8*inp8(j)+tr7*inp7(j)+tr6*inp6(j)
     &        +tr5*inp5(j)+tr4*inp4(j)+tr3*inp3(j)+tr2*inp2(j)
     &        +tr1*inp1(j)+tl1*inm1(j)+tl2*inm2(j)+tl3*inm3(j)
     &        +tl3*inm3(j)+tl4*inm4(j)+tl5*inm4(j)+tl6*inm6(j)
     &        +tl7*inm7(j)+tl8*inm8(j)
         out(j+dout2+1)=tr8*inp8(j+1)+tr7*inp7(j+1)+tr6*inp6(j+1)
     &        +tr5*inp5(j+1)+tr4*inp4(j+1)+tr3*inp3(j+1)+tr2*inp2(j+1)
     &        +tr1*inp1(j+1)+tl1*inm1(j+1)+tl2*inm2(j+1)+tl3*inm3(j+1)
     &        +tl3*inm3(j+1)+tl4*inm4(j+1)+tl5*inm4(j+1)+tl6*inm6(j+1)
     &        +tl7*inm7(j+1)+tl8*inm8(j+1)
      enddo
      
c     Zero out data after lptl
      do j=1+lptl+out2+1,area+out2
         out(j+dout2)=0.
      enddo

      return
      end

c     Computes convolution for a single data set ("line") during reverse
c     phases (INPUT/OUTPUT)
c
c     out: output set
c     in,inm1,inm2,inm3: input set
c     i: index of line to be done
c     stridel: stride of input
c     stride: stride of output
c     fptl: first point of input actually used
c     lptl: last point of input actually used
c     area: full area to be generated
c     t0,t1,t2,t3: filter coefficients
      subroutine lineio3fullacc(out,inm3,inm2,inm1,in,inp1,inp2,inp3,i,
     $     stridel,stride,fptl,lptl,area,
     $     tl3,tl2,tl1,t0,tr1,tr2,tr3)
      implicit none
      double precision out(*),in(*),inm1(*),inm2(*),inm3(*)
      double precision inp1(*),inp2(*),inp3(*)
      integer i,stridel,stride,fptl,lptl,area
      double precision tl3,tl2,tl1,t0,tr1,tr2,tr3

      integer out2,dout2,j

      out2=i*stridel
      dout2=i*(stride-stridel)

      do j=1+out2+fptl,lptl+out2+1
c      do j=1+out2+1,lptl+out2+1
         out(j+dout2)=tr3*inp3(j)+tr1*inp1(j)+t0*in(j)+
     &        tl1*inm1(j)+tl3*inm3(j)
      enddo

c     Zero out data after lptl
      do j=1+lptl+out2+1,area+out2
         out(j+dout2)=0.
      enddo

      return
      end


c     Computes convolution for a single data set ("line") during reverse
c     phases (INPUT/OUTPUT)
c
c     out: output set
c     in,inm1,inm2,inm3: input set
c     i: index of line to be done
c     stridel: stride of input
c     stride: stride of output
c     fptl: first point of input actually used
c     lptl: last point of input actually used
c     area: full area to be generated
c     t0,t1,t2,t3: filter coefficients
      subroutine lineio8fullacc(out,inm8,inm7,inm6,inm5,inm4,
     $     inm3,inm2,inm1,in,inp1,inp2,inp3,inp4,inp5,inp6,inp7,inp8,i,
     $     stridel,stride,fptl,lptl,area,
     $     tl8,tl7,tl6,tl5,tl4,tl3,tl2,tl1,
     $     t0,tr1,tr2,tr3,tr4,tr5,tr6,tr7,tr8)
      implicit none
      double precision out(*),in(*),inm1(*),inm2(*),inm3(*)
      double precision inm4(*),inm5(*),inm6(*),inm7(*),inm8(*)
      double precision inp1(*),inp2(*),inp3(*),inp4(*)
      double precision inp5(*),inp6(*),inp7(*),inp8(*)
      integer i,stridel,stride,fptl,lptl,area
      double precision tl8,tl7,tl6,tl5,tl4,tl3,tl2,tl1
      double precision t0,tr1,tr2,tr3,tr4,tr5,tr6,tr7,tr8

      integer out2,dout2,j

      out2=i*stridel
      dout2=i*(stride-stridel)

c     Zero out data before fptl
      do j=1+out2,fptl+out2
         out(j+dout2)=0.
      enddo

      do j=1+out2+fptl,lptl+out2+1
c      do j=1+out2+2,lptl+out2+1
c      do j=1+out2+1,lptl+out2+1
         out(j+dout2)=tr8*inp8(j)+tr7*inp7(j)+tr6*inp6(j)+tr5*inp5(j)
     &        +tr4*inp4(j)+tr3*inp3(j)+tr2*inp2(j)+tr1*inp1(j)+t0*in(j)
     &        +tl1*inm1(j)+tl2*inm2(j)+tl3*inm3(j)+tl4*inm4(j)
     &        +tl5*inm5(j)+tl6*inm6(j)+tl7*inm7(j)+tl8*inm8(j)
         
      enddo
      
c     Zero out data after lptl
      do j=1+lptl+out2+1,area+out2
         out(j+dout2)=0.
      enddo
      
      return
      end


c     Computes convolution for a single data set ("line") during reverse
c     phases (INPUT/OUTPUT)
c
c     out: output set
c     in,inm1,inm2,inm3,inm4,inm5,inm6,inm7,inm8: input set
c     i: index of line to be done
c     stridel: stride of input
c     stride: stride of output
c     fptl: first point of input actually used
c     lptl: last point of input actually used
c     area: full area to be generated
c     t0,t1,t2,t3,t4,t5,t6,t7,t8: filter coefficients
      subroutine lineio8(out,in,inm1,inm2,inm3,
     $     inm4,inm5,inm6,inm7,inm8,i,
     $     stridel,stride,fptl,lptl,area,
     $     t0,t1,t2,t3,t4,t5,t6,t7,t8)
      implicit none
      double precision out(*),in(*),inm1(*),inm2(*),inm3(*)
      double precision inm4(*),inm5(*),inm6(*),inm7(*),inm8(*)
      integer i,stridel,stride,fptl,lptl,area
      double precision t0,t1,t2,t3,t4,t5,t6,t7,t8

      integer out2,dout2,j

      out2=i*stridel
      dout2=i*(stride-stridel)

c     Zero out data before fptl
      do j=1+out2,fptl+out2
         out(j+dout2)=0.
      enddo

c     Do the line for "active" points
      do j=1+fptl+out2,1+lptl+out2
         out(j+dout2)=
     &        t0*in(j)+t1*inm1(j)+t2*inm2(j)+t3*inm3(j)
     &        +t4*inm4(j)+t5*inm5(j)+t6*inm6(j)+t7*inm7(j)
     &        +t8*inm8(j)
      enddo

c     Zero out data after lptl
      do j=1+lptl+out2+1,area+out2
         out(j+dout2)=0.
      enddo

      return
      end


c     Zeros an output line (OUTPUT)
c     consistent calling sequence with lineio
c
c     out: output set
c     in,inm1,inm2,inm3: input set
c     i: index of line to be done
c     stridel: stride of input
c     stride: stride of output
c     fptl: first point of input actually used
c     lptl: last point of input actually used
c     area: full area to be generated
c     t0,t1,t2,t3: filter coefficients
      subroutine zeroline3(out,in,inm1,inm2,inm3,i,
     $     stridel,stride,fptl,lptl,area,
     $     t0,t1,t2,t3)
      implicit none
      double precision out(*),in(*),inm1(*),inm2(*),inm3(*)
      integer i,stridel,stride,fptl,lptl,area
      double precision t0,t1,t2,t3

      integer out2,j

      out2=i*stride

      do j=1+out2,area+out2
         out(j)=0.
      enddo

      return
      end

c     Zeros an output line (OUTPUT)
c     consistent calling sequence with lineio
c
c     out: output set
c     i: index of line to be done
c     stride: stride of output
c     area: full area to be generated
      subroutine zeroline8(out,in,inm1,inm2,inm3,
     $     inm4,inm5,inm6,inm7,inm8,i,
     $     stridel,stride,fptl,lptl,area,
     $     t0,t1,t2,t3,t4,t5,t6,t7,t8)
      implicit none
      double precision out(*),in(*),inm1(*),inm2(*),inm3(*)
      double precision inm4(*),inm5(*),inm6(*),inm7(*),inm8(*)
      integer i,stridel,stride,fptl,lptl,area
      double precision t0,t1,t2,t3,t4,t5,t6,t7,t8

      integer out2,j

      out2=i*stride

      do j=1+out2,area+out2
         out(j)=0.
      enddo

      return
      end

c     Computes convolution for a single "even" data set ("line") during
c     reverse phases (INPUT/OUTPUT)
c
c     out: output set
c     in,inm1,inm2,inm3: input set
c     i: index of line to be done
c     stridel: stride of input
c     stride: stride of output
c     fptl: first point of input actually used
c     lptl: last point of input actually used
c     area: full area to be generated
c     t0,t1,t2,t3: filter coefficients
      subroutine lineioe3(out,in,inm1,inm2,inm3,i,
     $     stridel,stride,fptl,lptl,area,
     $     t0,t1,t2,t3)
      implicit none
      double precision out(*),in(*),inm1(*),inm2(*),inm3(*)
      integer i,stridel,stride,fptl,lptl,area
      double precision t0,t1,t2,t3

      integer out2,dout2,j

      out2=i*stridel
      dout2=i*(stride-stridel)

c     Zero out data before fptl
      do j=1+out2,fptl+out2
         out(j+dout2)=0.
      enddo

c     Do the line for "active" points:
c     Ignore any even input data on even input lines!
      do j=1+fptl+out2,1+lptl+out2,2
         out(j+dout2)=
     &                 t1*inm1(j)           +t3*inm3(j)
         out(j+1+dout2)=
     &        t0*in(j+1)+t1*inm1(j+1)+t2*inm2(j+1)+t3*inm3(j+1)
      enddo

c     Zero out data after lptl
      do j=1+lptl+out2+1,area+out2
         out(j+dout2)=0.
      enddo

      return
      end

c     Computes convolution for a single "even" data set ("line") during
c     reverse phases (INPUT/OUTPUT)
c
c     out: output set
c     in,inm1,inm2,inm3,inm4,inm5,inm6,inm7,inm8: input set
c     i: index of line to be done
c     stridel: stride of input
c     stride: stride of output
c     fptl: first point of input actually used
c     lptl: last point of input actually used
c     area: full area to be generated
c     t0,t1,t2,t3,t4,t5,t6,t7,t8: filter coefficients
      subroutine lineioe8(out,in,inm1,inm2,inm3,inm4,inm5,inm6,
     $     inm7,inm8,i,
     $     stridel,stride,fptl,lptl,area,
     $     t0,t1,t2,t3,t4,t5,t6,t7,t8)
      implicit none
      double precision out(*),in(*),inm1(*),inm2(*),inm3(*)
      double precision inm4(*),inm5(*),inm6(*),inm7(*),inm8(*)
      integer i,stridel,stride,fptl,lptl,area
      double precision t0,t1,t2,t3,t4,t5,t6,t7,t8

      integer out2,dout2,j

      out2=i*stridel
      dout2=i*(stride-stridel)

c     Zero out data before fptl
      do j=1+out2,fptl+out2
         out(j+dout2)=0.
      enddo

c     Do the line for "active" points:
c     Ignore any even input data on even input lines!
      do j=1+fptl+out2,1+lptl+out2,2
         out(j+dout2)=
     &                 t1*inm1(j)           +t3*inm3(j)
         out(j+1+dout2)=
     &        t0*in(j+1)+t1*inm1(j+1)+t2*inm2(j+1)+t3*inm3(j+1)
     &        +t4*inm4(j+1)+t5*inm5(j+1)+t6*inm6(j+1)+t7*inm7(j+1)
     &        +t8*inm8(j+1)
      enddo

c     Zero out data after lptl
      do j=1+lptl+out2+1,area+out2
         out(j+dout2)=0.
      enddo

      return
      end


c     Computes convolution for a single "odd" data set ("line") during
c     reverse phases (INPUT/OUTPUT)
c
c     out: output set
c     in,inm1,inm2,inm3: input set
c     i: index of line to be done
c     stridel: stride of input
c     stride: stride of output
c     fptl: first point of input actually used
c     lptl: last point of input actually used
c     area: full area to be generated
c     t0,t1,t2,t3: filter coefficients
      subroutine lineioo3(out,in,inm1,inm2,inm3,i,
     $     stridel,stride,fptl,lptl,area,
     $     t0,t1,t2,t3)
      implicit none
      double precision out(*),in(*),inm1(*),inm2(*),inm3(*)
      integer i,stridel,stride,fptl,lptl,area
      double precision t0,t1,t2,t3

      integer out2,dout2,j

      out2=i*stridel
      dout2=i*(stride-stridel)

c     Zero out data before fptl
      do j=1+out2,fptl+out2
         out(j+dout2)=0.
      enddo

c     Do the line for "active" points:
c     Ignore any even input data on even input lines!
      do j=1+fptl+out2,1+lptl+out2,2
         out(j+dout2)=
     &        t0*in(j)           +t2*inm2(j)           
         out(j+1+dout2)=
     &        t0*in(j+1)+t1*inm1(j+1)+t2*inm2(j+1)+t3*inm3(j+1)
      enddo

c     Zero out data after lptl
      do j=1+lptl+out2+1,area+out2
         out(j+dout2)=0.
      enddo

      return
      end


c     Computes convolution for a single "odd" data set ("line") during
c     reverse phases (INPUT/OUTPUT)
c
c     out: output set
c     in,inm1,inm2,inm3,inm4,inm5,inm6,inm7,inm8: input set
c     i: index of line to be done
c     stridel: stride of input
c     stride: stride of output
c     fptl: first point of input actually used
c     lptl: last point of input actually used
c     area: full area to be generated
c     t0,t1,t2,t3,t4,t5,t6,t7,t8: filter coefficients
      subroutine lineioo8(out,in,inm1,inm2,inm3,
     $     inm4,inm5,inm6,inm7,inm8,i,
     $     stridel,stride,fptl,lptl,area,
     $     t0,t1,t2,t3,t4,t5,t6,t7,t8)
      implicit none
      double precision out(*),in(*),inm1(*),inm2(*),inm3(*)
      double precision inm4(*),inm5(*),inm6(*),inm7(*),inm8(*)
      integer i,stridel,stride,fptl,lptl,area
      double precision t0,t1,t2,t3,t4,t5,t6,t7,t8

      integer out2,dout2,j

      out2=i*stridel
      dout2=i*(stride-stridel)

c     Zero out data before fptl
      do j=1+out2,fptl+out2
         out(j+dout2)=0.
      enddo

c     Do the line for "active" points:
c     Ignore any even input data on even input lines!
      do j=1+fptl+out2,1+lptl+out2,2
         out(j+dout2)=
     &        t0*in(j)           +t2*inm2(j)           
         out(j+1+dout2)=
     &        t0*in(j+1)+t1*inm1(j+1)+t2*inm2(j+1)+t3*inm3(j+1)
     &        +t4*inm4(j+1)+t5*inm5(j+1)+t6*inm6(j+1)
     &        +t8*inm8(j+1)
      enddo

c     Zero out data after lptl
      do j=1+lptl+out2+1,area+out2
         out(j+dout2)=0.
      enddo

      return
      end


c     Back and forth, linear convolution of sets of points for +/- 3
c     onvolutions  using padding, and ignoring input data on non-ghost
c     points (IN PLACE) 
c
c     out: input/output array for in place computation
c     outm3,outm2,outm1,outp1,outp2,outp3: pointers in output array to
c         start of data sets, 3 previous, 2 previous, 1 previous, 1 forward,
c         2 forward, 3 forward
c     area: total size of each set
c     stride: separation between data sets (stride=&out-&outm1)
c     n: actual number of data sets being convolved
c     npad: size of padded region needed for "leakage" of foward filters
c     tr: reverse-time filter coefficients	
c     tf: forward-time filter coefficients
      subroutine convsetsp3(out,in,inm3,inm2,inm1,inp1,inp2,inp3,
     &     area,stride,n,npad,tr,tf)
      implicit none

      double precision out(*)
      double precision in(*)
      double precision inm3(*),inm2(*),inm1(*)
      double precision inp1(*),inp2(*),inp3(*)
      integer stride,area,n,npad
      double precision tr(*),tf(*)
      double precision t0,t1,t3

      integer i,j,out2

c     Forward phase (padding now ensures room!)
c     No change here relative to convsets

      t0=tf(1)
      t1=tf(2)
      t3=tf(4)

      do i=3,n-5,2
         out2=i*stride

         do j=1+out2,area+out2
            out(j)=t3*inm3(j)+t1*inm1(j)+
     &           t1*inp1(j)+t3*inp3(j)
         enddo
      enddo

      do i=2,n-5,2
         out2=i*stride

         do j=1+out2,area+out2
            out(j)=in(j)
         enddo
      enddo

      return
      end

c     Back and forth, linear convolution of sets of points for +/- 8
c     onvolutions  using padding, and ignoring input data on non-ghost
c     points (IN PLACE) 
c
c     out: input/output array for in place computation
c     outm3,outm2,outm1,outp1,outp2,outp3: pointers in output array to
c         start of data sets, 3 previous, 2 previous, 1 previous, 1 forward,
c         2 forward, 3 forward
c     area: total size of each set
c     stride: separation between data sets (stride=&out-&outm1)
c     n: actual number of data sets being convolved
c     npad: size of padded region needed for "leakage" of foward filters
c     tr: reverse-time filter coefficients	
c     tf: forward-time filter coefficients
      subroutine convsetsp8(out,
     $     outm8,outm7,outm6,outm5,outm4,outm3,outm2,outm1,
     $     outp1,outp2,outp3,outp4,outp5,outp6,outp7,outp8,
     $     area,stride,n,npad,tr,tf)
      implicit none

      double precision out(*)
      double precision outm8(*),outm7(*),outm6(*),outm5(*)
      double precision outm4(*),outm3(*),outm2(*),outm1(*)
      double precision outp1(*),outp2(*),outp3(*),outp4(*)
      double precision outp5(*),outp6(*),outp7(*),outp8(*)
      integer stride,area,n,npad
      double precision tr(*),tf(*)
      double precision t0,t1,t2,t3,t4,t5,t6,t7,t8

      integer i,j,out2


c     Reverse direction
c     Relative to convsets, we wiped out the non-ghost references
      t0=tr(1)
      t1=tr(2)
      t2=tr(3)
      t3=tr(4)
      t4=tr(5)
      t5=tr(6)
      t6=tr(7)
      t7=tr(8)
      t8=tr(9)

c     Pad region
c      out2=(npad-1)*stride
c      do j=1+out2,area+out2
c         out(j)=0.
c      enddo

c      out2=(npad-2)*stride
c      do j=1+out2,area+out2
c         out(j)=t8*outm8(j)
c      enddo

c      out2=(npad-3)*stride
c      do j=1+out2,area+out2
c         out(j)=t7*outm7(j)
c      enddo

c      out2=(npad-4)*stride
c      do j=1+out2,area+out2
c         out(j)=t6*outm6(j)+t8*outm8(j)
c      enddo

c      out2=(npad-5)*stride
c      do j=1+out2,area+out2
c         out(j)=t5*outm5(j)+t7*outm7(j)
c      enddo

c      out2=(npad-6)*stride
c      do j=1+out2,area+out2
c         out(j)=t4*outm4(j)+t6*outm6(j)+t8*outm8(j)
c      enddo

c      out2=(npad-7)*stride
c      do j=1+out2,area+out2
c         out(j)=t3*outm3(j)+t5*outm5(j)+t7*outm7(j)
c     enddo

c      out2=(npad-8)*stride
c      do j=1+out2,area+out2
c         out(j)=t2*outm2(j)+t4*outm4(j)+t6*outm6(j)+t8*outm8(j)
c      enddo

c     Main reverse phase exploiting zeros! 
      do i=n-1,8,-2
         out2=i*stride
         do j=1+out2,area+out2
            out(j)=t1*outm1(j)+t3*outm3(j)+t5*outm5(j)+t7*outm7(j)
         enddo

         out2=(i-1)*stride
         do j=1+out2,area+out2
            out(j)=t0*out(j)
     $           +t2*outm2(j)+t4*outm4(j)+t6*outm6(j)+t8*outm8(j)
         enddo
      enddo

c     Fence posting
c      out2=7*stride
c      do j=1+out2,area+out2
c         out(j)=t1*outm1(j)+t3*outm3(j)+t5*outm5(j)+t7*outm7(j)
c      enddo

c      out2=6*stride
c      do j=1+out2,area+out2
c         out(j)=t0*out(j)+t2*outm2(j)+t4*outm4(j)+t6*outm6(j)
c      enddo

c      out2=5*stride
c      do j=1+out2,area+out2
c         out(j)=t1*outm1(j)+t3*outm3(j)+t5*outm5(j)
c      enddo

c      out2=4*stride
c      do j=1+out2,area+out2
c         out(j)=t0*out(j)+t2*outm2(j)+t4*outm4(j)
c      enddo

c      out2=3*stride
c      do j=1+out2,area+out2
c         out(j)=t1*outm1(j)+t3*outm3(j)
c      enddo

c      out2=2*stride
c      do j=1+out2,area+out2
c         out(j)=t0*out(j)+t2*outm2(j)
c      enddo

c      out2=1*stride
c      do j=1+out2,area+out2
c         out(j)=t1*outm1(j)
c      enddo

c      out2=0*stride
c      do j=1+out2,area+out2
c         out(j)=t0*out(j)
c      enddo

c     Forward phase (padding now ensures room!)
c     No change here relative to convsets
      t0=tf(1)
      t1=tf(2)
      t2=tf(3)
      t3=tf(4)
      t4=tf(5)
      t5=tf(6)
      t6=tf(7)
      t7=tf(8)
      t8=tf(9)

c Need to verify that this is OK.....
c      do i=0,n-1
      do i=8,n-8
         out2=i*stride

         do j=1+out2,area+out2
            out(j)=
     &           t0*out(j)
     $           +t1*outp1(j)+t2*outp2(j)+t3*outp3(j)+t4*outp4(j)
     $           +t5*outp5(j)+t6*outp6(j)+t7*outp7(j)+t8*outp8(j)
         enddo
      enddo

      return
      end


c     Back and forth, linear convolution of sets of points for +/- 3
c     onvolutions  using padding, and ignoring input data on non-ghost
c     points (IN PLACE) 
c
c     out: input/output array for in place computation
c     outm3,outm2,outm1,outp1,outp2,outp3: pointers in output array to
c         start of data sets, 3 previous, 2 previous, 1 previous, 1 forward,
c         2 forward, 3 forward
c     area: total size of each set
c     stride: separation between data sets (stride=&out-&outm1)
c     n: actual number of data sets being convolved
c     npad: size of padded region needed for "leakage" of foward filters
c     tr: reverse-time filter coefficients	
c     tf: forward-time filter coefficients
      subroutine convsetsp8full(out,in,inm8,inm7,inm6,inm5,inm4,
     &     inm3,inm2,inm1,inp1,inp2,inp3,inp4,inp5,inp6,inp7,inp8,
     &     area,stride,n,npad,filter)
      implicit none

      double precision out(*)
      double precision in(*)
      double precision inm8(*),inm7(*),inm6(*),inm5(*),inm4(*)
      double precision inm3(*),inm2(*),inm1(*)
      double precision inp1(*),inp2(*),inp3(*)
      double precision inp4(*),inp5(*),inp6(*),inp7(*),inp8(*)
      integer stride,area,n,npad
      double precision filter(*)
      double precision t0,t1,t2,t3,t4,t5,t6,t7,t8

      integer i,j,out2

c     Forward phase (padding now ensures room!)
c     No change here relative to convsets

      t0=filter(1)
      t1=filter(2)
      t2=filter(3)
      t3=filter(4)
      t4=filter(5)
      t5=filter(6)
      t6=filter(7)
      t7=filter(8)
      t8=filter(9)

      if (t0 /= 0.0) then
c     symmetric filter

c     thin func
      do i=9,n-8,2
         out2=i*stride
         
         do j=1+out2,area+out2
            out(j)=t7*inm7(j)+t5*inm5(j)+t3*inm3(j)+t1*inm1(j)+
     &           t1*inp1(j)+t3*inp3(j)+t5*inp5(j)+t7*inp7(j)
         enddo
      enddo
      
c     fat func
      do i=8,n-8,2
         out2=i*stride

         do j=1+out2,area+out2
            out(j)=t8*inm8(j)+t6*inm6(j)+t4*inm4(j)+t2*inm2(j)+
     &           t0*in(j)+t2*inp2(j)+t4*inp4(j)+t6*inp6(j)+t8*inp8(j)
         enddo
      enddo
c     end of symmetric part
      else
c     antisymmetric filter

c     thin func
      do i=9,n-8,2
         out2=i*stride
         
         do j=1+out2,area+out2
            out(j)=-1.*t7*inm7(j)-t5*inm5(j)-t3*inm3(j)-t1*inm1(j)+
     &           t1*inp1(j)+t3*inp3(j)+t5*inp5(j)+t7*inp7(j)
         enddo
      enddo
      
c     fat func
      do i=8,n-8,2
         out2=i*stride

         do j=1+out2,area+out2
            out(j)=-1.*t8*inm8(j)-t6*inm6(j)-t4*inm4(j)-t2*inm2(j)+
     &           t0*in(j)+t2*inp2(j)+t4*inp4(j)+t6*inp6(j)+t8*inp8(j)
         enddo
      enddo
c     end of antisymmetric part
      endif

      return
      end
      
c     Back and forth, linear convolution of sets of points using padding,
c     ignoring input data on non-ghost points (IN/OUT)
c
c     lower: output array
c     outm3,outm2,outm1,outp1,outp2,outp3: pointers to input work array to
c         start of data sets, 3 previous, 2 previous, 1 previous, 1 forward,
c         2 forward, 3 forward
c     area: total size of each set
c     stride: separation between data sets (stride=&out-&outm1)
c     n: actual number of data sets being convolved
c     npad: size of padded region needed for "leakage" of foward filters
c     stridel: stride on output
c     fptl,lptl: first,last actual points in output set
c     fsetl,lsetl: first,last actual sets in output
c     tr: reverse-time filter coefficients	
c     tf: forward-time filter coefficients
      subroutine convsetspo3(lower,out,
     $     outm3,outm2,outm1,outp1,outp2,outp3,
     $     area,stride,n,npad,
     $     stridel,fptl,lptl,fsetl,lsetl,
     $     tr,tf)
      implicit none
      double precision lower(*)
      double precision out(*)
      double precision outm3(*),outm2(*),outm1(*)
      double precision outp1(*),outp2(*),outp3(*)
      integer stride,area,n,npad
      integer stridel,fptl,lptl,fsetl,lsetl
      double precision tr(*),tf(*)
      double precision t0,t1,t2,t3

      integer i,j,out2,out2s,jl

c     Reverse direction
c     No change here relative to convsetsp
      t0=tr(1)
      t1=tr(2)
      t2=tr(3)
      t3=tr(4)

c     Pad region
      out2=(npad-1)*stride
      do j=1+out2,area+out2
         out(j)=0.
      enddo

      out2=(npad-2)*stride
      do j=1+out2,area+out2
         out(j)=                                   t3*outm3(j)
      enddo

      out2=(npad-3)*stride
      do j=1+out2,area+out2
         out(j)=                       t2*outm2(j)
      enddo


c     Main reverse phase exploiting zeros! 
      do i=n-1,3,-2
         out2=i*stride
         do j=1+out2,area+out2
            out(j)=
     $                     t1*outm1(j)            +t3*outm3(j)
         enddo

         out2=(i-1)*stride
         do j=1+out2,area+out2
            out(j)=
     $           t0*out(j)            +t2*outm2(j)
         enddo
      enddo

c     Fence posting
      out2=stride
      do j=1+out2,area+out2
         out(j)=
     $                  t1*outm1(j)
      enddo
      
      out2=0
      do j=1+out2,area+out2
         out(j)=
     $        t0*out(j)
      enddo

c     Forward phase (padding now ensures room!)
c     Relative to convsetsp, output now places into lower instead
      t0=tf(1)
      t1=tf(2)
      t2=tf(3)
      t3=tf(4)
      do i=fsetl,lsetl
         out2=i*stride
         out2s=i*(stridel-stride)
         do j=1+out2+fptl,1+out2+lptl
            jl=j+out2s
c$$$            out(j)=
c$$$     $           t0*out(j)+t1*outp1(j)+t2*outp2(j)+t3*outp3(j)
            lower(jl)=
     $           t0*out(j)+t1*outp1(j)+t2*outp2(j)+t3*outp3(j)
         enddo
      enddo

      return
      end

c     lower: output array
c     outm3,outm2,outm1,outp1,outp2,outp3: pointers to input work array to
c         start of data sets, 3 previous, 2 previous, 1 previous, 1 forward,
c         2 forward, 3 forward
c     area: total size of each set
c     stride: separation between data sets (stride=&out-&outm1)
c     n: actual number of data sets being convolved
c     npad: size of padded region needed for "leakage" of foward filters
c     stridel: stride on output
c     fptl,lptl: first,last actual points in output set
c     fsetl,lsetl: first,last actual sets in output
c     tr: reverse-time filter coefficients	
c     tf: forward-time filter coefficients
      subroutine convsetspoadd3(lower,addon,out,
     $     outm3,outm2,outm1,outp1,outp2,outp3,
     $     area,stride,n,npad,
     $     stridel,fptl,lptl,fsetl,lsetl,
     $     tr,tf)
      implicit none
      double precision lower(*)
      double precision addon(*)
      double precision out(*)
      double precision outm3(*),outm2(*),outm1(*)
      double precision outp1(*),outp2(*),outp3(*)
      integer stride,area,n,npad
      integer stridel,fptl,lptl,fsetl,lsetl
      double precision tr(*),tf(*)
      double precision t0,t1,t2,t3

      integer i,j,out2,out2s,jl

c     Reverse direction
c     No change here relative to convsetsp
      t0=tr(1)
      t1=tr(2)
      t2=tr(3)
      t3=tr(4)

c     Pad region
      out2=(npad-1)*stride
      do j=1+out2,area+out2
         out(j)=0.
      enddo

      out2=(npad-2)*stride
      do j=1+out2,area+out2
         out(j)=                                   t3*outm3(j)
      enddo

      out2=(npad-3)*stride
      do j=1+out2,area+out2
         out(j)=                       t2*outm2(j)
      enddo


c     Main reverse phase exploiting zeros! 
      do i=n-1,3,-2
         out2=i*stride
         do j=1+out2,area+out2
            out(j)=
     $                     t1*outm1(j)            +t3*outm3(j)
         enddo

         out2=(i-1)*stride
         do j=1+out2,area+out2
            out(j)=
     $           t0*out(j)            +t2*outm2(j)
         enddo
      enddo

c     Fence posting
      out2=stride
      do j=1+out2,area+out2
         out(j)=
     $                  t1*outm1(j)
      enddo
      
      out2=0
      do j=1+out2,area+out2
         out(j)=
     $        t0*out(j)
      enddo

c     Forward phase (padding now ensures room!)
c     Relative to convsetsp, output now places into lower instead
      t0=tf(1)
      t1=tf(2)
      t2=tf(3)
      t3=tf(4)
      do i=fsetl,lsetl
         out2=i*stride
         out2s=i*(stridel-stride)
         do j=1+out2+fptl,1+out2+lptl
            jl=j+out2s
c$$$            out(j)=
c$$$     $           t0*out(j)+t1*outp1(j)+t2*outp2(j)+t3*outp3(j)
            lower(jl)=addon(jl)+
     $           t0*out(j)+t1*outp1(j)+t2*outp2(j)+t3*outp3(j)
         enddo
      enddo

      return
      end


c     Back and forth, linear convolution of sets of points for +/- 3
c      filters using padding, ignoring input data on non-ghost points
c      (IN/+=OUT) 
c
c     lower: output array for accumulation
c     outm3,outm2,outm1,outp1,outp2,outp3: pointers to input work array to
c         start of data sets, 3 previous, 2 previous, 1 previous, 1 forward,
c         2 forward, 3 forward
c     area: total size of each set
c     stride: separation between data sets (stride=&out-&outm1)
c     n: actual number of data sets being convolved
c     npad: size of padded region needed for "leakage" of foward filters
c     stridel: stride on output
c     fptl,lptl: first,last actual points in output set
c     fsetl,lsetl: first,last actual sets in output
c     tr: reverse-time filter coefficients	
c     tf: forward-time filter coefficients
      subroutine convsetspop3(lower,out,
     $     outm3,outm2,outm1,outp1,outp2,outp3,
     $     area,stride,n,npad,
     $     stridel,fptl,lptl,fsetl,lsetl,
     $     filt)
      implicit none
      double precision lower(*)
      double precision out(*)
      double precision outm3(*),outm2(*),outm1(*)
      double precision outp1(*),outp2(*),outp3(*)
      integer stride,area,n,npad
      integer stridel,fptl,lptl,fsetl,lsetl
      double precision filt(*)
      double precision t0,t1,t3

      integer i,j,out2,out2s,jl


c     Forward phase (padding now ensures room!)
c     Relative to convsetspo, only change is accumulation into lower
      t0=filt(1)
      t1=filt(2)
      t3=filt(4)

      do i=fsetl+1,lsetl,2
         out2=i*stride
         out2s=i*(stridel-stride)
         do j=1+out2+fptl,1+out2+lptl
            jl=j+out2s
            lower(jl)=lower(jl)+t3*outm3(j)+t1*outm1(j)+
     $           t1*outp1(j)+t3*outp3(j)
         enddo
      enddo

      do i=fsetl,lsetl,2
         out2=i*stride
         out2s=i*(stridel-stride)
         do j=1+out2+fptl,1+out2+lptl
            jl=j+out2s
            lower(jl)=lower(jl)+out(j)
         enddo
      enddo
      
      return
      end

c     Back and forth, linear convolution of sets of points for +/- 3
c      filters using padding, ignoring input data on non-ghost points
c      (IN/+=OUT) 
c
c     lower: output array for accumulation
c     outm3,outm2,outm1,outp1,outp2,outp3: pointers to input work array to
c         start of data sets, 3 previous, 2 previous, 1 previous, 1 forward,
c         2 forward, 3 forward
c     area: total size of each set
c     stride: separation between data sets (stride=&out-&outm1)
c     n: actual number of data sets being convolved
c     npad: size of padded region needed for "leakage" of foward filters
c     stridel: stride on output
c     fptl,lptl: first,last actual points in output set
c     fsetl,lsetl: first,last actual sets in output
c     tr: reverse-time filter coefficients	
c     tf: forward-time filter coefficients
      subroutine convsetspom3(lower,out,
     $     outm3,outm2,outm1,outp1,outp2,outp3,
     $     area,stride,n,npad,
     $     stridel,fptl,lptl,fsetl,lsetl,
     $     filt)
      implicit none
      double precision lower(*)
      double precision out(*)
      double precision outm3(*),outm2(*),outm1(*)
      double precision outp1(*),outp2(*),outp3(*)
      integer stride,area,n,npad
      integer stridel,fptl,lptl,fsetl,lsetl
      double precision filt(*)
      double precision t0,t1,t3

      integer i,j,out2,out2s,jl


c     Forward phase (padding now ensures room!)
c     Relative to convsetspo, only change is accumulation into lower
      t0=filt(1)
      t1=filt(2)
      t3=filt(4)

      do i=fsetl+1,lsetl,2
         out2=i*stride
         out2s=i*(stridel-stride)
         do j=1+out2+fptl,1+out2+lptl
            jl=j+out2s
            lower(jl)=lower(jl)-t3*outm3(j)-t1*outm1(j)-
     $           t1*outp1(j)-t3*outp3(j)
         enddo
      enddo

      do i=fsetl,lsetl,2
         out2=i*stride
         out2s=i*(stridel-stride)
         do j=1+out2+fptl,1+out2+lptl
            jl=j+out2s
            lower(jl)=lower(jl)-out(j)
         enddo
      enddo
      
      return
      end

c     Back and forth, linear convolution of sets of points for +/- 8
c      filters using padding, ignoring input data on non-ghost points
c      (IN/+=OUT) 
c
c     lower: output array for accumulation
c     outm3,outm2,outm1,outp1,outp2,outp3: pointers to input work array to
c         start of data sets, 3 previous, 2 previous, 1 previous, 1 forward,
c         2 forward, 3 forward
c     area: total size of each set
c     stride: separation between data sets (stride=&out-&outm1)
c     n: actual number of data sets being convolved
c     npad: size of padded region needed for "leakage" of foward filters
c     stridel: stride on output
c     fptl,lptl: first,last actual points in output set
c     fsetl,lsetl: first,last actual sets in output
c     tr: reverse-time filter coefficients	
c     tf: forward-time filter coefficients
      subroutine convsetspop8(lower,out,
     $     outm8,outm7,outm6,outm5,outm4,outm3,outm2,outm1,
     $     outp1,outp2,outp3,outp4,outp5,outp6,outp7,outp8,
     &     area,stride,n,npad,
     $     stridel,fptl,lptl,fsetl,lsetl,
     $     tr,tf)
      implicit none

      double precision lower(*)
      double precision out(*)
      double precision outm8(*),outm7(*),outm6(*),outm5(*)
      double precision outm4(*),outm3(*),outm2(*),outm1(*)
      double precision outp1(*),outp2(*),outp3(*),outp4(*)
      double precision outp5(*),outp6(*),outp7(*),outp8(*)
      integer stride,area,n,npad
      integer stridel,fptl,lptl,fsetl,lsetl
      double precision tr(*),tf(*)
      double precision t0,t1,t2,t3,t4,t5,t6,t7,t8

      integer i,j,out2,out2s,jl


c     Reverse direction
c     Relative to convsets, we wiped out the non-ghost references
      t0=tr(1)
      t1=tr(2)
      t2=tr(3)
      t3=tr(4)
      t4=tr(5)
      t5=tr(6)
      t6=tr(7)
      t7=tr(8)
      t8=tr(9)

c     Pad region
      out2=(npad-1)*stride
      do j=1+out2,area+out2
         out(j)=0.
      enddo

      out2=(npad-2)*stride
      do j=1+out2,area+out2
         out(j)=t8*outm8(j)
      enddo

      out2=(npad-3)*stride
      do j=1+out2,area+out2
         out(j)=t7*outm7(j)
      enddo

      out2=(npad-4)*stride
      do j=1+out2,area+out2
         out(j)=t6*outm6(j)+t8*outm8(j)
      enddo

      out2=(npad-5)*stride
      do j=1+out2,area+out2
         out(j)=t5*outm5(j)+t7*outm7(j)
      enddo

      out2=(npad-6)*stride
      do j=1+out2,area+out2
         out(j)=t4*outm4(j)+t6*outm6(j)+t8*outm8(j)
      enddo

      out2=(npad-7)*stride
      do j=1+out2,area+out2
         out(j)=t3*outm3(j)+t5*outm5(j)+t7*outm7(j)
      enddo

      out2=(npad-8)*stride
      do j=1+out2,area+out2
         out(j)=t2*outm2(j)+t4*outm4(j)+t6*outm6(j)+t8*outm8(j)
      enddo

c     Main reverse phase exploiting zeros! 
      do i=n-1,8,-2
         out2=i*stride
         do j=1+out2,area+out2
            out(j)=t7*outm7(j)+t5*outm5(j)+t3*outm3(j)+t1*outm1(j)
         enddo

         out2=(i-1)*stride
         do j=1+out2,area+out2
            out(j)=t0*out(j)
     $           +t2*outm2(j)+t4*outm4(j)+t6*outm6(j)+t8*outm8(j)
         enddo
      enddo

c     Fence posting
      out2=7*stride
      do j=1+out2,area+out2
         out(j)=t1*outm1(j)+t3*outm3(j)+t5*outm5(j)+t7*outm7(j)
      enddo

      out2=6*stride
      do j=1+out2,area+out2
         out(j)=t0*out(j)+t2*outm2(j)+t4*outm4(j)+t6*outm6(j)
      enddo

      out2=5*stride
      do j=1+out2,area+out2
         out(j)=t1*outm1(j)+t3*outm3(j)+t5*outm5(j)
      enddo

      out2=4*stride
      do j=1+out2,area+out2
         out(j)=t0*out(j)+t2*outm2(j)+t4*outm4(j)
      enddo

      out2=3*stride
      do j=1+out2,area+out2
         out(j)=t1*outm1(j)+t3*outm3(j)
      enddo

      out2=2*stride
      do j=1+out2,area+out2
         out(j)=t0*out(j)+t2*outm2(j)
      enddo

      out2=1*stride
      do j=1+out2,area+out2
         out(j)=t1*outm1(j)
      enddo

      out2=0*stride
      do j=1+out2,area+out2
         out(j)=t0*out(j)
      enddo

c     Forward phase (padding now ensures room!)
c     No change here relative to convsets
      t0=tf(1)
      t1=tf(2)
      t2=tf(3)
      t3=tf(4)
      t4=tf(5)
      t5=tf(6)
      t6=tf(7)
      t7=tf(8)
      t8=tf(9)

      do i=fsetl,lsetl
         out2=i*stride

         out2s=i*(stridel-stride)
         do j=1+out2+fptl,1+out2+lptl
            jl=j+out2s
c$$$            out(j)=
c$$$     $           t0*out(j)+t1*outp1(j)+t2*outp2(j)+t3*outp3(j)
            lower(jl)=lower(jl)+
     &           t0*out(j)
     $           +t1*outp1(j)+t2*outp2(j)+t3*outp3(j)+t4*outp4(j)
     $           +t5*outp5(j)+t6*outp6(j)+t7*outp7(j)+t8*outp8(j)
         enddo
      enddo

      return
      end


c     Back and forth, linear convolution of sets of points for +/- 3
c      filters using padding, ignoring input data on non-ghost points
c      (IN/+=OUT) 
c
c     lower: output array for accumulation
c     outm3,outm2,outm1,outp1,outp2,outp3: pointers to input work array to
c         start of data sets, 3 previous, 2 previous, 1 previous, 1 forward,
c         2 forward, 3 forward
c     area: total size of each set
c     stride: separation between data sets (stride=&out-&outm1)
c     n: actual number of data sets being convolved
c     npad: size of padded region needed for "leakage" of foward filters
c     stridel: stride on output
c     fptl,lptl: first,last actual points in output set
c     fsetl,lsetl: first,last actual sets in output
c     tr: reverse-time filter coefficients	
c     tf: forward-time filter coefficients
      subroutine convsetspop8full(lower,out,
     $     outm8,outm7,outm6,outm5,outm4,outm3,outm2,outm1,
     $     outp1,outp2,outp3,outp4,outp5,outp6,outp7,outp8,
     $     area,stride,n,npad,
     $     stridel,fptl,lptl,fsetl,lsetl,
     $     filt)
      implicit none
      double precision lower(*)
      double precision out(*)
      double precision outm8(*),outm7(*),outm6(*),outm5(*)
      double precision outm4(*),outm3(*),outm2(*),outm1(*)
      double precision outp1(*),outp2(*),outp3(*),outp4(*)
      double precision outp5(*),outp6(*),outp7(*),outp8(*)
      integer stride,area,n,npad
      integer stridel,fptl,lptl,fsetl,lsetl
      double precision filt(*)
      double precision t0,t1,t2,t3,t4,t5,t6,t7,t8

      integer i,j,out2,out2s,jl


c     Forward phase (padding now ensures room!)
c     Relative to convsetspo, only change is accumulation into lower
      t0=filt(1)
      t1=filt(2)
      t2=filt(3)
      t3=filt(4)
      t4=filt(5)
      t5=filt(6)
      t6=filt(7)
      t7=filt(8)
      t8=filt(9)

      if (t0 /= 0.0) then
c     symmetric filter
      do i=fsetl+1,lsetl,2
         out2=i*stride
         out2s=i*(stridel-stride)
         do j=1+out2+fptl,1+out2+lptl
            jl=j+out2s
            lower(jl)=lower(jl)
     $           +t7*outm7(j)+t5*outm5(j)+t3*outm3(j)+t1*outm1(j)
     $           +t1*outp1(j)+t3*outp3(j)+t5*outp5(j)+t7*outp7(j)
         enddo
      enddo

      do i=fsetl,lsetl,2
         out2=i*stride
         out2s=i*(stridel-stride)
         do j=1+out2+fptl,1+out2+lptl
            jl=j+out2s
            lower(jl)=lower(jl)
     $           +t8*outm8(j)+t6*outm6(j)+t4*outm4(j)+t2*outm2(j)
     $           +t0*out(j)+t2*outp2(j)+t4*outp4(j)+t6*outp6(j)
     $           +t8*outp8(j)
         enddo
      enddo
c     end of symmetric part
      else
c     antisymmetric filter
      do i=fsetl+1,lsetl,2
         out2=i*stride
         out2s=i*(stridel-stride)
         do j=1+out2+fptl,1+out2+lptl
            jl=j+out2s
            lower(jl)=lower(jl)
     $           -t7*outm7(j)-t5*outm5(j)-t3*outm3(j)-t1*outm1(j)
     $           +t1*outp1(j)+t3*outp3(j)+t5*outp5(j)+t7*outp7(j)
         enddo
      enddo

      do i=fsetl,lsetl,2
         out2=i*stride
         out2s=i*(stridel-stride)
         do j=1+out2+fptl,1+out2+lptl
            jl=j+out2s
            lower(jl)=lower(jl)
     $           -t8*outm8(j)-t6*outm6(j)-t4*outm4(j)-t2*outm2(j)
     $           +t0*out(j)+t2*outp2(j)+t4*outp4(j)+t6*outp6(j)
     $           +t8*outp8(j)
         enddo
      enddo
c     end of antisymmetric part
      endif 

      return
      end


c     Back and forth, linear convolution of sets of points for +/- 3
c      filters using padding, ignoring input data on non-ghost points
c      (IN/+=OUT) 
c
c     lower: output array for accumulation
c     outm3,outm2,outm1,outp1,outp2,outp3: pointers to input work array to
c         start of data sets, 3 previous, 2 previous, 1 previous, 1 forward,
c         2 forward, 3 forward
c     area: total size of each set
c     stride: separation between data sets (stride=&out-&outm1)
c     n: actual number of data sets being convolved
c     npad: size of padded region needed for "leakage" of foward filters
c     stridel: stride on output
c     fptl,lptl: first,last actual points in output set
c     fsetl,lsetl: first,last actual sets in output
c     tr: reverse-time filter coefficients	
c     tf: forward-time filter coefficients
      subroutine convsetspo8full(lower,out,
     $     outm8,outm7,outm6,outm5,outm4,outm3,outm2,outm1,
     $     outp1,outp2,outp3,outp4,outp5,outp6,outp7,outp8,
     $     area,stride,n,npad,
     $     stridel,fptl,lptl,fsetl,lsetl,
     $     filt)
      implicit none
      double precision lower(*)
      double precision out(*)
      double precision outm8(*),outm7(*),outm6(*),outm5(*)
      double precision outm4(*),outm3(*),outm2(*),outm1(*)
      double precision outp1(*),outp2(*),outp3(*),outp4(*)
      double precision outp5(*),outp6(*),outp7(*),outp8(*)
      integer stride,area,n,npad
      integer stridel,fptl,lptl,fsetl,lsetl
      double precision filt(*)
      double precision t0,t1,t2,t3,t4,t5,t6,t7,t8

      integer i,j,out2,out2s,jl


c     Forward phase (padding now ensures room!)
c     Relative to convsetspo, only change is accumulation into lower
      t0=filt(1)
      t1=filt(2)
      t2=filt(3)
      t3=filt(4)
      t4=filt(5)
      t5=filt(6)
      t6=filt(7)
      t7=filt(8)
      t8=filt(9)

      if (t0 /= 0.0) then
c     symmetric filter
      do i=fsetl+1,lsetl,2
         out2=i*stride
         out2s=i*(stridel-stride)
         do j=1+out2+fptl,1+out2+lptl
            jl=j+out2s
            lower(jl)=t7*outm7(j)+t5*outm5(j)+t3*outm3(j)+t1*outm1(j)
     $           +t1*outp1(j)+t3*outp3(j)+t5*outp5(j)+t7*outp7(j)
         enddo
      enddo

      do i=fsetl,lsetl,2
         out2=i*stride
         out2s=i*(stridel-stride)
         do j=1+out2+fptl,1+out2+lptl
            jl=j+out2s
            lower(jl)=t8*outm8(j)+t6*outm6(j)+t4*outm4(j)+t2*outm2(j)
     $           +t0*out(j)+t2*outp2(j)+t4*outp4(j)+t6*outp6(j)
     $           +t8*outp8(j)
         enddo
      enddo
c     end of symmetric part
      else
c     antisymmetric filter
      do i=fsetl+1,lsetl,2
         out2=i*stride
         out2s=i*(stridel-stride)
         do j=1+out2+fptl,1+out2+lptl
            jl=j+out2s
            lower(jl)=-1.*t7*outm7(j)-t5*outm5(j)-t3*outm3(j)
     $           -t1*outm1(j)+t1*outp1(j)+t3*outp3(j)+t5*outp5(j)
     $           +t7*outp7(j)
         enddo
      enddo

      do i=fsetl,lsetl,2
         out2=i*stride
         out2s=i*(stridel-stride)
         do j=1+out2+fptl,1+out2+lptl
            jl=j+out2s
            lower(jl)=-1.*t8*outm8(j)-t6*outm6(j)-t4*outm4(j)
     $           -t2*outm2(j)+t0*out(j)+t2*outp2(j)+t4*outp4(j)
     $           +t6*outp6(j)+t8*outp8(j)
         enddo
      enddo
c     end of antisymmetric part
      endif

      return
      end


c     Back and forth, linear convolution of sets of points for +/- 8
c      filters using padding, ignoring input data on non-ghost points
c      (IN/+=OUT) 
c
c     lower: output array for accumulation
c     outm3,outm2,outm1,outp1,outp2,outp3: pointers to input work array to
c         start of data sets, 3 previous, 2 previous, 1 previous, 1 forward,
c         2 forward, 3 forward
c     area: total size of each set
c     stride: separation between data sets (stride=&out-&outm1)
c     n: actual number of data sets being convolved
c     npad: size of padded region needed for "leakage" of foward filters
c     stridel: stride on output
c     fptl,lptl: first,last actual points in output set
c     fsetl,lsetl: first,last actual sets in output
c     tr: reverse-time filter coefficients	
c     tf: forward-time filter coefficients
      subroutine convsetspo8(lower,out,
     $     outm8,outm7,outm6,outm5,outm4,outm3,outm2,outm1,
     $     outp1,outp2,outp3,outp4,outp5,outp6,outp7,outp8,
     &     area,stride,n,npad,
     $     stridel,fptl,lptl,fsetl,lsetl,
     $     tr,tf)
      implicit none

      double precision lower(*)
      double precision out(*)
      double precision outm8(*),outm7(*),outm6(*),outm5(*)
      double precision outm4(*),outm3(*),outm2(*),outm1(*)
      double precision outp1(*),outp2(*),outp3(*),outp4(*)
      double precision outp5(*),outp6(*),outp7(*),outp8(*)
      integer stride,area,n,npad
      integer stridel,fptl,lptl,fsetl,lsetl
      double precision tr(*),tf(*)
      double precision t0,t1,t2,t3,t4,t5,t6,t7,t8

      integer i,j,out2,out2s,jl


c     Reverse direction
c     Relative to convsets, we wiped out the non-ghost references
      t0=tr(1)
      t1=tr(2)
      t2=tr(3)
      t3=tr(4)
      t4=tr(5)
      t5=tr(6)
      t6=tr(7)
      t7=tr(8)
      t8=tr(9)

c     Pad region
      out2=(npad-1)*stride
      do j=1+out2,area+out2
         out(j)=0.
      enddo

      out2=(npad-2)*stride
      do j=1+out2,area+out2
         out(j)=t8*outm8(j)
      enddo

      out2=(npad-3)*stride
      do j=1+out2,area+out2
         out(j)=t7*outm7(j)
      enddo

      out2=(npad-4)*stride
      do j=1+out2,area+out2
         out(j)=t6*outm6(j)+t8*outm8(j)
      enddo

      out2=(npad-5)*stride
      do j=1+out2,area+out2
         out(j)=t5*outm5(j)+t7*outm7(j)
      enddo

      out2=(npad-6)*stride
      do j=1+out2,area+out2
         out(j)=t4*outm4(j)+t6*outm6(j)+t8*outm8(j)
      enddo

      out2=(npad-7)*stride
      do j=1+out2,area+out2
         out(j)=t3*outm3(j)+t5*outm5(j)+t7*outm7(j)
      enddo

      out2=(npad-8)*stride
      do j=1+out2,area+out2
         out(j)=t2*outm2(j)+t4*outm4(j)+t6*outm6(j)+t8*outm8(j)
      enddo

c     Main reverse phase exploiting zeros! 
      do i=n-1,8,-2
         out2=i*stride
         do j=1+out2,area+out2
            out(j)=t7*outm7(j)+t5*outm5(j)+t3*outm3(j)+t1*outm1(j)
         enddo

         out2=(i-1)*stride
         do j=1+out2,area+out2
            out(j)=t0*out(j)
     $           +t2*outm2(j)+t4*outm4(j)+t6*outm6(j)+t8*outm8(j)
         enddo
      enddo

c     Fence posting
      out2=7*stride
      do j=1+out2,area+out2
         out(j)=t1*outm1(j)+t3*outm3(j)+t5*outm5(j)+t7*outm7(j)
      enddo

      out2=6*stride
      do j=1+out2,area+out2
         out(j)=t0*out(j)+t2*outm2(j)+t4*outm4(j)+t6*outm6(j)
      enddo

      out2=5*stride
      do j=1+out2,area+out2
         out(j)=t1*outm1(j)+t3*outm3(j)+t5*outm5(j)
      enddo

      out2=4*stride
      do j=1+out2,area+out2
         out(j)=t0*out(j)+t2*outm2(j)+t4*outm4(j)
      enddo

      out2=3*stride
      do j=1+out2,area+out2
         out(j)=t1*outm1(j)+t3*outm3(j)
      enddo

      out2=2*stride
      do j=1+out2,area+out2
         out(j)=t0*out(j)+t2*outm2(j)
      enddo

      out2=1*stride
      do j=1+out2,area+out2
         out(j)=t1*outm1(j)
      enddo

      out2=0*stride
      do j=1+out2,area+out2
         out(j)=t0*out(j)
      enddo

c     Forward phase (padding now ensures room!)
c     No change here relative to convsets
      t0=tf(1)
      t1=tf(2)
      t2=tf(3)
      t3=tf(4)
      t4=tf(5)
      t5=tf(6)
      t6=tf(7)
      t7=tf(8)
      t8=tf(9)

      do i=fsetl,lsetl
         out2=i*stride

         out2s=i*(stridel-stride)
         do j=1+out2+fptl,1+out2+lptl
            jl=j+out2s
c$$$            out(j)=
c$$$     $           t0*out(j)+t1*outp1(j)+t2*outp2(j)+t3*outp3(j)
            lower(jl)=
     &           t0*out(j)
     $           +t1*outp1(j)+t2*outp2(j)+t3*outp3(j)+t4*outp4(j)
     $           +t5*outp5(j)+t6*outp6(j)+t7*outp7(j)+t8*outp8(j)
         enddo
      enddo

      return
      end