mirror of
https://github.com/exaloop/codon.git
synced 2025-06-03 15:03:52 +08:00
2263 lines
78 KiB
Python
2263 lines
78 KiB
Python
# Copyright (C) 2022-2025 Exaloop Inc. <https://exaloop.io>
|
|
|
|
# Adapted from PocketFFT - https://github.com/mreineck/pocketfft
|
|
|
|
from ..util import free, sqrt, sin, cos, cast, count, sizeof, PI
|
|
|
|
def _special_mul(v1, v2, fwd: bool):
|
|
if type(v1) is complex and type(v2) is complex64:
|
|
return _special_mul(v1, complex(v2), fwd)
|
|
|
|
if type(v1) is complex64 and type(v2) is complex:
|
|
return _special_mul(complex(v1), v2, fwd)
|
|
|
|
T = type(v1)
|
|
if fwd:
|
|
return T(
|
|
v1.real * v2.real + v1.imag * v2.imag,
|
|
v1.imag * v2.real - v1.real * v2.imag
|
|
)
|
|
else:
|
|
return T(
|
|
v1.real * v2.real - v1.imag * v2.imag,
|
|
v1.real * v2.imag + v1.imag * v2.real
|
|
)
|
|
|
|
def ROT90(a):
|
|
T = type(a)
|
|
return T(-a.imag, a.real)
|
|
|
|
def ROTX90(a, fwd: bool):
|
|
T = type(a)
|
|
if fwd:
|
|
return T(a.imag, -a.real)
|
|
else:
|
|
return T(-a.imag, a.real)
|
|
|
|
def ROTX45(a, fwd: bool):
|
|
T = type(a)
|
|
T0 = type(a.real)
|
|
hsqt2 = cast(0.707106781186547524400844362104849, T0)
|
|
if fwd:
|
|
return T(hsqt2*(a.real+a.imag), hsqt2*(a.imag-a.real))
|
|
else:
|
|
return T(hsqt2*(a.real-a.imag), hsqt2*(a.imag+a.real))
|
|
|
|
def ROTX135(a, fwd: bool):
|
|
T = type(a)
|
|
T0 = type(a.real)
|
|
hsqt2 = cast(0.707106781186547524400844362104849, T0)
|
|
if fwd:
|
|
return T(hsqt2*(a.imag-a.real), hsqt2*(-a.real-a.imag))
|
|
else:
|
|
return T(hsqt2*(-a.real-a.imag), hsqt2*(a.real-a.imag))
|
|
|
|
@tuple
|
|
class arr:
|
|
p: Ptr[T]
|
|
sz: int
|
|
T: type
|
|
|
|
def __new__(sz: int) -> arr[T]:
|
|
return (Ptr[T](sz), sz)
|
|
|
|
def dealloc(self):
|
|
free(self.p)
|
|
|
|
def __getitem__(self, idx: int):
|
|
return self.p[idx]
|
|
|
|
def __setitem__(self, idx: int, val: T):
|
|
self.p[idx] = val
|
|
|
|
def size(self):
|
|
return self.sz
|
|
|
|
def data(self):
|
|
return self.p
|
|
|
|
@tuple
|
|
class sincos_2pibyn:
|
|
N: int
|
|
mask: int
|
|
shift: int
|
|
v1: arr[complex]
|
|
v2: arr[complex]
|
|
T: type
|
|
|
|
def calc(x: int, n: int, ang: float):
|
|
x <<= 3
|
|
if x < 4*n:
|
|
if x < 2*n:
|
|
if x < n:
|
|
y = cast(x, float) * ang
|
|
return complex(cos(y), sin(y))
|
|
else:
|
|
y = cast(2*n - x, float) * ang
|
|
return complex(sin(y), cos(y))
|
|
else:
|
|
x -= 2*n
|
|
if x < n:
|
|
y = cast(x, float) * ang
|
|
return complex(-sin(y), cos(y))
|
|
else:
|
|
y = cast(2*n - x, float) * ang
|
|
return complex(-cos(y), sin(y))
|
|
else:
|
|
x = 8*n - x
|
|
if x < 2*n:
|
|
if x < n:
|
|
y = cast(x, float) * ang
|
|
return complex(cos(y), -sin(y))
|
|
else:
|
|
y = cast(2*n - x, float) * ang
|
|
return complex(sin(y), -cos(y))
|
|
else:
|
|
x -= 2*n
|
|
if x < n:
|
|
y = cast(x, float) * ang
|
|
return complex(-sin(y), -cos(y))
|
|
else:
|
|
y = cast(2*n - x, float) * ang
|
|
return complex(-cos(y), -sin(y))
|
|
|
|
def __new__(n: int) -> sincos_2pibyn[T]:
|
|
ang = 0.25 * PI / n
|
|
nval = (n + 2) >> 1
|
|
shift = 1
|
|
while (1 << shift) * (1 << shift) < nval:
|
|
shift += 1
|
|
mask = (1 << shift) - 1
|
|
|
|
v1 = arr[complex](mask + 1)
|
|
v1[0] = complex(1, 0)
|
|
for i in range(1, v1.size()):
|
|
v1[i] = sincos_2pibyn.calc(i, n, ang)
|
|
|
|
v2 = arr[complex]((nval + mask) // (mask + 1))
|
|
v2[0] = complex(1, 0)
|
|
for i in range(1, v2.size()):
|
|
v2[i] = sincos_2pibyn.calc(i*(mask+1), n, ang)
|
|
|
|
return (n, mask, shift, v1, v2)
|
|
|
|
def cmplx(self, re, im):
|
|
if T is float:
|
|
return complex(cast(re, float), cast(im, float))
|
|
elif T is float32:
|
|
return complex64(cast(re, float32), cast(im, float32))
|
|
else:
|
|
compile_error("[internal error] bad float type")
|
|
|
|
def __getitem__(self, idx: int):
|
|
if 2 * idx <= self.N:
|
|
x1 = self.v1[idx & self.mask]
|
|
x2 = self.v2[idx >> self.shift]
|
|
return self.cmplx(x1.real * x2.real - x1.imag * x2.imag,
|
|
x1.real * x2.imag + x1.imag * x2.real)
|
|
else:
|
|
idx = self.N - idx
|
|
x1 = self.v1[idx & self.mask]
|
|
x2 = self.v2[idx >> self.shift]
|
|
return self.cmplx(x1.real * x2.real - x1.imag * x2.imag,
|
|
-(x1.real * x2.imag + x1.imag * x2.real))
|
|
|
|
def dealloc(self):
|
|
self.v1.dealloc()
|
|
self.v2.dealloc()
|
|
|
|
def _largest_prime_factor(n: int):
|
|
res = 1
|
|
while n & 1 == 0:
|
|
res = 2
|
|
n >>= 1
|
|
|
|
x = 3
|
|
while x * x <= n:
|
|
while n % x == 0:
|
|
res = x
|
|
n //= x
|
|
x += 2
|
|
|
|
if n > 1:
|
|
res = n
|
|
return res
|
|
|
|
def _cost_guess(n: int):
|
|
lfp = 1.1
|
|
ni = n
|
|
result = 0.0
|
|
|
|
while n & 1 == 0:
|
|
result += 2
|
|
n >>= 1
|
|
|
|
x = 3
|
|
while x * x <= n:
|
|
while n % x == 0:
|
|
result += float(x) if x <= 5 else lfp*float(x)
|
|
n //= x
|
|
x += 2
|
|
|
|
if n > 1:
|
|
result += float(n) if n <= 5 else lfp*float(n)
|
|
return result*float(ni)
|
|
|
|
def _good_size_cmplx(n: int):
|
|
if n <= 12:
|
|
return n
|
|
bestfac = 2*n
|
|
f11 = 1
|
|
while f11 < bestfac:
|
|
f117 = f11
|
|
while f117 < bestfac:
|
|
f1175 = f117
|
|
while f1175 < bestfac:
|
|
x = f1175
|
|
while x < n:
|
|
x *= 2
|
|
while True:
|
|
if x < n:
|
|
x *= 3
|
|
elif x > n:
|
|
if x < bestfac:
|
|
bestfac = x
|
|
if x & 1:
|
|
break
|
|
x >>= 1
|
|
else:
|
|
return n
|
|
f1175 *= 5
|
|
f117 *= 7
|
|
f11 *= 11
|
|
return bestfac
|
|
|
|
def _good_size_real(n: int):
|
|
if n <= 6:
|
|
return n
|
|
bestfac = 2*n
|
|
f5 = 1
|
|
while f5 < bestfac:
|
|
x = f5
|
|
while x < n:
|
|
x *= 2
|
|
while True:
|
|
if x < n:
|
|
x *= 3
|
|
elif x > n:
|
|
if x < bestfac:
|
|
bestfac = x
|
|
if x & 1:
|
|
break
|
|
x >>= 1
|
|
else:
|
|
return n
|
|
f5 *= 5
|
|
return bestfac
|
|
|
|
def _prev_good_size_cmplx(n: int):
|
|
if n <= 12:
|
|
return n
|
|
bestfound = 1
|
|
f11 = 1
|
|
while f11 <= n:
|
|
f117 = f11
|
|
while f117 <= n:
|
|
f1175 = f117
|
|
while f1175 <= n:
|
|
x = f1175
|
|
while x*2 <= n:
|
|
x *= 2
|
|
if x > bestfound:
|
|
bestfound = x
|
|
while True:
|
|
if x * 3 <= n:
|
|
x *= 3
|
|
elif x % 2 == 0:
|
|
x //= 2
|
|
else:
|
|
break
|
|
|
|
if x > bestfound:
|
|
bestfound = x
|
|
f1175 *= 5
|
|
f117 *= 7
|
|
f11 *= 11
|
|
return bestfound
|
|
|
|
def _prev_good_size_real(n: int):
|
|
if n <= 6:
|
|
return n
|
|
bestfound = 1
|
|
f5 = 1
|
|
while f5 <= n:
|
|
x = f5
|
|
while x*2 <= n:
|
|
x *= 2
|
|
if x > bestfound:
|
|
bestfound = x
|
|
while True:
|
|
if x * 3 <= n:
|
|
x *= 3
|
|
elif x % 2 == 0:
|
|
x //= 2
|
|
else:
|
|
break
|
|
|
|
if x > bestfound:
|
|
bestfound = x
|
|
f5 *= 5
|
|
return bestfound
|
|
|
|
def PM(a, b):
|
|
return a + b, a - b
|
|
|
|
def MP(a, b):
|
|
return a - b, a + b
|
|
|
|
@tuple
|
|
class fctdata:
|
|
fct: int
|
|
tw: Ptr[T]
|
|
tws: Ptr[T]
|
|
T: type
|
|
|
|
def with_fct(self, fct: int):
|
|
return fctdata[T](fct, self.tw, self.tws)
|
|
|
|
def with_tw(self, tw: Ptr[T]):
|
|
return fctdata[T](self.fct, tw, self.tws)
|
|
|
|
def with_tws(self, tws: Ptr[T]):
|
|
return fctdata[T](self.fct, self.tw, tws)
|
|
|
|
class cfftp:
|
|
length: int
|
|
mem: arr[T]
|
|
fact: List[fctdata[T]]
|
|
T: type # complex datatype
|
|
T0: type # real datatype
|
|
|
|
def add_factor(self, factor: int):
|
|
self.fact.append(fctdata(factor, Ptr[T](), Ptr[T]()))
|
|
|
|
def pass2(ido: int, l1: int, cc: Ptr[T], ch: Ptr[T], wa: Ptr[T], fwd: bool):
|
|
@inline
|
|
def CH(a: int, b: int, c: int):
|
|
return ch + (a+ido*(b+l1*c))
|
|
@inline
|
|
def CC(a: int, b: int, c: int):
|
|
return cc[a+ido*(b+2*c)]
|
|
@inline
|
|
def WA(x: int, i: int):
|
|
return wa[i-1+x*(ido-1)]
|
|
|
|
if ido == 1:
|
|
for k in range(l1):
|
|
CH(0,k,0)[0] = CC(0,0,k)+CC(0,1,k)
|
|
CH(0,k,1)[0] = CC(0,0,k)-CC(0,1,k)
|
|
else:
|
|
for k in range(l1):
|
|
CH(0,k,0)[0] = CC(0,0,k)+CC(0,1,k)
|
|
CH(0,k,1)[0] = CC(0,0,k)-CC(0,1,k)
|
|
for i in range(1, ido):
|
|
CH(i,k,0)[0] = CC(i,0,k)+CC(i,1,k)
|
|
CH(i,k,1)[0] = _special_mul(CC(i,0,k)-CC(i,1,k), WA(0,i), fwd)
|
|
|
|
def pass3(ido: int, l1: int, cc: Ptr[T], ch: Ptr[T], wa: Ptr[T], fwd: bool):
|
|
@inline
|
|
def CH(a: int, b: int, c: int):
|
|
return ch + (a+ido*(b+l1*c))
|
|
@inline
|
|
def CC(a: int, b: int, c: int):
|
|
return cc[a+ido*(b+3*c)]
|
|
@inline
|
|
def WA(x: int, i: int):
|
|
return wa[i-1+x*(ido-1)]
|
|
|
|
tw1r = cast(-0.5, T0)
|
|
tw1i = cast(0.8660254037844386467637231707529362, T0)
|
|
if fwd:
|
|
tw1i = -tw1i
|
|
|
|
if ido == 1:
|
|
for k in range(l1):
|
|
# POCKETFFT_PREP3(0)
|
|
t0 = CC(0,0,k)
|
|
t1, t2 = PM(CC(0,1,k), CC(0,2,k))
|
|
CH(0,k,0)[0] = t0 + t1
|
|
# POCKETFFT_PARTSTEP3a(1,2,tw1r,tw1i)
|
|
ca = t0 + t1*tw1r
|
|
cb = T(-t2.imag*tw1i, t2.real*tw1i)
|
|
CH(0,k,1)[0], CH(0,k,2)[0] = PM(ca, cb)
|
|
else:
|
|
for k in range(l1):
|
|
# POCKETFFT_PREP3(0)
|
|
t0 = CC(0,0,k)
|
|
t1, t2 = PM(CC(0,1,k), CC(0,2,k))
|
|
CH(0,k,0)[0] = t0 + t1
|
|
# POCKETFFT_PARTSTEP3a(1,2,tw1r,tw1i)
|
|
ca = t0 + t1*tw1r
|
|
cb = T(-t2.imag*tw1i, t2.real*tw1i)
|
|
CH(0,k,1)[0], CH(0,k,2)[0] = PM(ca, cb)
|
|
|
|
for i in range(1, ido):
|
|
# POCKETFFT_PREP3(i)
|
|
t0 = CC(i,0,k)
|
|
t1, t2 = PM(CC(i,1,k), CC(i,2,k))
|
|
CH(i,k,0)[0] = t0 + t1
|
|
# POCKETFFT_PARTSTEP3b(1,2,tw1r,tw1i)
|
|
ca = t0 + t1*tw1r
|
|
cb = T(-t2.imag*tw1i, t2.real*tw1i)
|
|
CH(i,k,1)[0] = _special_mul(ca+cb, WA(1-1,i), fwd)
|
|
CH(i,k,2)[0] = _special_mul(ca-cb, WA(2-1,i), fwd)
|
|
|
|
def pass4(ido: int, l1: int, cc: Ptr[T], ch: Ptr[T], wa: Ptr[T], fwd: bool):
|
|
@inline
|
|
def CH(a: int, b: int, c: int):
|
|
return ch + (a+ido*(b+l1*c))
|
|
@inline
|
|
def CC(a: int, b: int, c: int):
|
|
return cc[a+ido*(b+4*c)]
|
|
@inline
|
|
def WA(x: int, i: int):
|
|
return wa[i-1+x*(ido-1)]
|
|
|
|
if ido == 1:
|
|
for k in range(l1):
|
|
t2, t1 = PM(CC(0,0,k), CC(0,2,k))
|
|
t3, t4 = PM(CC(0,1,k), CC(0,3,k))
|
|
t4 = ROTX90(t4, fwd)
|
|
CH(0,k,0)[0], CH(0,k,2)[0] = PM(t2, t3)
|
|
CH(0,k,1)[0], CH(0,k,3)[0] = PM(t1, t4)
|
|
else:
|
|
for k in range(l1):
|
|
t2, t1 = PM(CC(0,0,k), CC(0,2,k))
|
|
t3, t4 = PM(CC(0,1,k), CC(0,3,k))
|
|
t4 = ROTX90(t4, fwd)
|
|
CH(0,k,0)[0], CH(0,k,2)[0] = PM(t2, t3)
|
|
CH(0,k,1)[0], CH(0,k,3)[0] = PM(t1, t4)
|
|
|
|
for i in range(1, ido):
|
|
cc0 = CC(i,0,k)
|
|
cc1 = CC(i,1,k)
|
|
cc2 = CC(i,2,k)
|
|
cc3 = CC(i,3,k)
|
|
t2, t1 = PM(cc0, cc2)
|
|
t3, t4 = PM(cc1, cc3)
|
|
t4 = ROTX90(t4, fwd)
|
|
CH(i,k,0)[0] = t2 + t3
|
|
CH(i,k,1)[0] = _special_mul(t1+t4, WA(0,i), fwd)
|
|
CH(i,k,2)[0] = _special_mul(t2-t3, WA(1,i), fwd)
|
|
CH(i,k,3)[0] = _special_mul(t1-t4, WA(2,i), fwd)
|
|
|
|
def pass5(ido: int, l1: int, cc: Ptr[T], ch: Ptr[T], wa: Ptr[T], fwd: bool):
|
|
@inline
|
|
def CH(a: int, b: int, c: int):
|
|
return ch + (a+ido*(b+l1*c))
|
|
@inline
|
|
def CC(a: int, b: int, c: int):
|
|
return cc[a+ido*(b+5*c)]
|
|
@inline
|
|
def WA(x: int, i: int):
|
|
return wa[i-1+x*(ido-1)]
|
|
|
|
tw1r = cast(0.3090169943749474241022934171828191, T0)
|
|
tw1i = cast(0.9510565162951535721164393333793821, T0)
|
|
if fwd:
|
|
tw1i = -tw1i
|
|
tw2r = cast(-0.8090169943749474241022934171828191, T0)
|
|
tw2i = cast(0.5877852522924731291687059546390728, T0)
|
|
if fwd:
|
|
tw2i = -tw2i
|
|
|
|
if ido == 1:
|
|
for k in range(l1):
|
|
# POCKETFFT_PREP5(0)
|
|
t0 = CC(0,0,k)
|
|
t1, t4 = PM(CC(0,1,k), CC(0,4,k))
|
|
t2, t3 = PM(CC(0,2,k), CC(0,3,k))
|
|
CH(0,k,0)[0] = T(t0.real+t1.real+t2.real, t0.imag+t1.imag+t2.imag)
|
|
# POCKETFFT_PARTSTEP5a(1,4,tw1r,tw2r,+tw1i,+tw2i)
|
|
ca = T(t0.real+tw1r*t1.real+tw2r*t2.real,
|
|
t0.imag+tw1r*t1.imag+tw2r*t2.imag)
|
|
cb = T(-(tw1i*t4.imag+tw2i*t3.imag),
|
|
tw1i*t4.real+tw2i*t3.real)
|
|
CH(0,k,1)[0], CH(0,k,4)[0] = PM(ca, cb)
|
|
# POCKETFFT_PARTSTEP5a(2,3,tw2r,tw1r,+tw2i,-tw1i)
|
|
ca = T(t0.real+tw2r*t1.real+tw1r*t2.real,
|
|
t0.imag+tw2r*t1.imag+tw1r*t2.imag)
|
|
cb = T(-(tw2i*t4.imag-tw1i*t3.imag),
|
|
tw2i*t4.real-tw1i*t3.real)
|
|
CH(0,k,2)[0], CH(0,k,3)[0] = PM(ca, cb)
|
|
else:
|
|
for k in range(l1):
|
|
# POCKETFFT_PREP5(0)
|
|
t0 = CC(0,0,k)
|
|
t1, t4 = PM(CC(0,1,k), CC(0,4,k))
|
|
t2, t3 = PM(CC(0,2,k), CC(0,3,k))
|
|
CH(0,k,0)[0] = T(t0.real+t1.real+t2.real, t0.imag+t1.imag+t2.imag)
|
|
# POCKETFFT_PARTSTEP5a(1,4,tw1r,tw2r,+tw1i,+tw2i)
|
|
ca = T(t0.real+tw1r*t1.real+tw2r*t2.real,
|
|
t0.imag+tw1r*t1.imag+tw2r*t2.imag)
|
|
cb = T(-(tw1i*t4.imag+tw2i*t3.imag),
|
|
tw1i*t4.real+tw2i*t3.real)
|
|
CH(0,k,1)[0], CH(0,k,4)[0] = PM(ca, cb)
|
|
# POCKETFFT_PARTSTEP5a(2,3,tw2r,tw1r,+tw2i,-tw1i)
|
|
ca = T(t0.real+tw2r*t1.real+tw1r*t2.real,
|
|
t0.imag+tw2r*t1.imag+tw1r*t2.imag)
|
|
cb = T(-(tw2i*t4.imag-tw1i*t3.imag),
|
|
tw2i*t4.real-tw1i*t3.real)
|
|
CH(0,k,2)[0], CH(0,k,3)[0] = PM(ca, cb)
|
|
for i in range(1, ido):
|
|
# POCKETFFT_PREP5(i)
|
|
t0 = CC(i,0,k)
|
|
t1, t4 = PM(CC(i,1,k), CC(i,4,k))
|
|
t2, t3 = PM(CC(i,2,k), CC(i,3,k))
|
|
CH(i,k,0)[0] = T(t0.real+t1.real+t2.real, t0.imag+t1.imag+t2.imag)
|
|
# POCKETFFT_PARTSTEP5b(1,4,tw1r,tw2r,+tw1i,+tw2i)
|
|
ca = T(t0.real+tw1r*t1.real+tw2r*t2.real,
|
|
t0.imag+tw1r*t1.imag+tw2r*t2.imag)
|
|
cb = T(-(tw1i*t4.imag+tw2i*t3.imag),
|
|
tw1i*t4.real+tw2i*t3.real)
|
|
CH(i,k,1)[0] = _special_mul(ca+cb, WA(1-1,i), fwd)
|
|
CH(i,k,4)[0] = _special_mul(ca-cb, WA(4-1,i), fwd)
|
|
# POCKETFFT_PARTSTEP5b(2,3,tw2r,tw1r,+tw2i,-tw1i)
|
|
ca = T(t0.real+tw2r*t1.real+tw1r*t2.real,
|
|
t0.imag+tw2r*t1.imag+tw1r*t2.imag)
|
|
cb = T(-(tw2i*t4.imag-tw1i*t3.imag),
|
|
tw2i*t4.real-tw1i*t3.real)
|
|
CH(i,k,2)[0] = _special_mul(ca+cb, WA(2-1,i), fwd)
|
|
CH(i,k,3)[0] = _special_mul(ca-cb, WA(3-1,i), fwd)
|
|
|
|
def pass7(ido: int, l1: int, cc: Ptr[T], ch: Ptr[T], wa: Ptr[T], fwd: bool):
|
|
@inline
|
|
def CH(a: int, b: int, c: int):
|
|
return ch + (a+ido*(b+l1*c))
|
|
@inline
|
|
def CC(a: int, b: int, c: int):
|
|
return cc[a+ido*(b+7*c)]
|
|
@inline
|
|
def WA(x: int, i: int):
|
|
return wa[i-1+x*(ido-1)]
|
|
|
|
tw1r = cast(0.6234898018587335305250048840042398, T0)
|
|
tw1i = cast(0.7818314824680298087084445266740578, T0)
|
|
if fwd:
|
|
tw1i = -tw1i
|
|
tw2r = cast(-0.2225209339563144042889025644967948, T0)
|
|
tw2i = cast(0.9749279121818236070181316829939312, T0)
|
|
if fwd:
|
|
tw2i = -tw2i
|
|
tw3r = cast(-0.9009688679024191262361023195074451, T0)
|
|
tw3i = cast(0.433883739117558120475768332848359, T0)
|
|
if fwd:
|
|
tw3i = -tw3i
|
|
|
|
if ido == 1:
|
|
for k in range(l1):
|
|
# POCKETFFT_PREP7(0)
|
|
t1 = CC(0,0,k)
|
|
t2, t7 = PM(CC(0,1,k), CC(0,6,k))
|
|
t3, t6 = PM(CC(0,2,k), CC(0,5,k))
|
|
t4, t5 = PM(CC(0,3,k), CC(0,4,k))
|
|
CH(0,k,0)[0] = T(t1.real+t2.real+t3.real+t4.real,
|
|
t1.imag+t2.imag+t3.imag+t4.imag)
|
|
# POCKETFFT_PARTSTEP7a(1,6,tw1r,tw2r,tw3r,+tw1i,+tw2i,+tw3i)
|
|
ca = T(t1.real+tw1r*t2.real+tw2r*t3.real+tw3r*t4.real,
|
|
t1.imag+tw1r*t2.imag+tw2r*t3.imag+tw3r*t4.imag)
|
|
cb = T(-(tw1i*t7.imag+tw2i*t6.imag+tw3i*t5.imag),
|
|
tw1i*t7.real+tw2i*t6.real+tw3i*t5.real)
|
|
CH(0,k,1)[0], CH(0,k,6)[0] = PM(ca, cb)
|
|
# POCKETFFT_PARTSTEP7a(2,5,tw2r,tw3r,tw1r,+tw2i,-tw3i,-tw1i)
|
|
ca = T(t1.real+tw2r*t2.real+tw3r*t3.real+tw1r*t4.real,
|
|
t1.imag+tw2r*t2.imag+tw3r*t3.imag+tw1r*t4.imag)
|
|
cb = T(-(tw2i*t7.imag-tw3i*t6.imag-tw1i*t5.imag),
|
|
tw2i*t7.real-tw3i*t6.real-tw1i*t5.real)
|
|
CH(0,k,2)[0], CH(0,k,5)[0] = PM(ca, cb)
|
|
# POCKETFFT_PARTSTEP7a(3,4,tw3r,tw1r,tw2r,+tw3i,-tw1i,+tw2i)
|
|
ca = T(t1.real+tw3r*t2.real+tw1r*t3.real+tw2r*t4.real,
|
|
t1.imag+tw3r*t2.imag+tw1r*t3.imag+tw2r*t4.imag)
|
|
cb = T(-(tw3i*t7.imag-tw1i*t6.imag+tw2i*t5.imag),
|
|
tw3i*t7.real-tw1i*t6.real+tw2i*t5.real)
|
|
CH(0,k,3)[0], CH(0,k,4)[0] = PM(ca, cb)
|
|
else:
|
|
for k in range(l1):
|
|
# POCKETFFT_PREP7(0)
|
|
t1 = CC(0,0,k)
|
|
t2, t7 = PM(CC(0,1,k), CC(0,6,k))
|
|
t3, t6 = PM(CC(0,2,k), CC(0,5,k))
|
|
t4, t5 = PM(CC(0,3,k), CC(0,4,k))
|
|
CH(0,k,0)[0] = T(t1.real+t2.real+t3.real+t4.real,
|
|
t1.imag+t2.imag+t3.imag+t4.imag)
|
|
# POCKETFFT_PARTSTEP7a(1,6,tw1r,tw2r,tw3r,+tw1i,+tw2i,+tw3i)
|
|
ca = T(t1.real+tw1r*t2.real+tw2r*t3.real+tw3r*t4.real,
|
|
t1.imag+tw1r*t2.imag+tw2r*t3.imag+tw3r*t4.imag)
|
|
cb = T(-(tw1i*t7.imag+tw2i*t6.imag+tw3i*t5.imag),
|
|
tw1i*t7.real+tw2i*t6.real+tw3i*t5.real)
|
|
CH(0,k,1)[0], CH(0,k,6)[0] = PM(ca, cb)
|
|
# POCKETFFT_PARTSTEP7a(2,5,tw2r,tw3r,tw1r,+tw2i,-tw3i,-tw1i)
|
|
ca = T(t1.real+tw2r*t2.real+tw3r*t3.real+tw1r*t4.real,
|
|
t1.imag+tw2r*t2.imag+tw3r*t3.imag+tw1r*t4.imag)
|
|
cb = T(-(tw2i*t7.imag-tw3i*t6.imag-tw1i*t5.imag),
|
|
tw2i*t7.real-tw3i*t6.real-tw1i*t5.real)
|
|
CH(0,k,2)[0], CH(0,k,5)[0] = PM(ca, cb)
|
|
# POCKETFFT_PARTSTEP7a(3,4,tw3r,tw1r,tw2r,+tw3i,-tw1i,+tw2i)
|
|
ca = T(t1.real+tw3r*t2.real+tw1r*t3.real+tw2r*t4.real,
|
|
t1.imag+tw3r*t2.imag+tw1r*t3.imag+tw2r*t4.imag)
|
|
cb = T(-(tw3i*t7.imag-tw1i*t6.imag+tw2i*t5.imag),
|
|
tw3i*t7.real-tw1i*t6.real+tw2i*t5.real)
|
|
CH(0,k,3)[0], CH(0,k,4)[0] = PM(ca, cb)
|
|
for i in range(1, ido):
|
|
# POCKETFFT_PREP7(i)
|
|
t1 = CC(i,0,k)
|
|
t2, t7 = PM(CC(i,1,k), CC(i,6,k))
|
|
t3, t6 = PM(CC(i,2,k), CC(i,5,k))
|
|
t4, t5 = PM(CC(i,3,k), CC(i,4,k))
|
|
CH(i,k,0)[0] = T(t1.real+t2.real+t3.real+t4.real,
|
|
t1.imag+t2.imag+t3.imag+t4.imag)
|
|
# POCKETFFT_PARTSTEP7(1,6,tw1r,tw2r,tw3r,+tw1i,+tw2i,+tw3i)
|
|
ca = T(t1.real+tw1r*t2.real+tw2r*t3.real+tw3r*t4.real,
|
|
t1.imag+tw1r*t2.imag+tw2r*t3.imag+tw3r*t4.imag)
|
|
cb = T(-(tw1i*t7.imag+tw2i*t6.imag+tw3i*t5.imag),
|
|
tw1i*t7.real+tw2i*t6.real+tw3i*t5.real)
|
|
da, db = PM(ca, cb)
|
|
CH(i,k,1)[0] = _special_mul(da, WA(1-1,i), fwd)
|
|
CH(i,k,6)[0] = _special_mul(db, WA(6-1,i), fwd)
|
|
# POCKETFFT_PARTSTEP7(2,5,tw2r,tw3r,tw1r,+tw2i,-tw3i,-tw1i)
|
|
ca = T(t1.real+tw2r*t2.real+tw3r*t3.real+tw1r*t4.real,
|
|
t1.imag+tw2r*t2.imag+tw3r*t3.imag+tw1r*t4.imag)
|
|
cb = T(-(tw2i*t7.imag-tw3i*t6.imag-tw1i*t5.imag),
|
|
tw2i*t7.real-tw3i*t6.real-tw1i*t5.real)
|
|
da, db = PM(ca, cb)
|
|
CH(i,k,2)[0] = _special_mul(da, WA(2-1,i), fwd)
|
|
CH(i,k,5)[0] = _special_mul(db, WA(5-1,i), fwd)
|
|
# POCKETFFT_PARTSTEP7(3,4,tw3r,tw1r,tw2r,+tw3i,-tw1i,+tw2i)
|
|
ca = T(t1.real+tw3r*t2.real+tw1r*t3.real+tw2r*t4.real,
|
|
t1.imag+tw3r*t2.imag+tw1r*t3.imag+tw2r*t4.imag)
|
|
cb = T(-(tw3i*t7.imag-tw1i*t6.imag+tw2i*t5.imag),
|
|
tw3i*t7.real-tw1i*t6.real+tw2i*t5.real)
|
|
da, db = PM(ca, cb)
|
|
CH(i,k,3)[0] = _special_mul(da, WA(3-1,i), fwd)
|
|
CH(i,k,4)[0] = _special_mul(db, WA(4-1,i), fwd)
|
|
|
|
def pass8(ido: int, l1: int, cc: Ptr[T], ch: Ptr[T], wa: Ptr[T], fwd: bool):
|
|
@inline
|
|
def CH(a: int, b: int, c: int):
|
|
return ch + (a+ido*(b+l1*c))
|
|
@inline
|
|
def CC(a: int, b: int, c: int):
|
|
return cc[a+ido*(b+8*c)]
|
|
@inline
|
|
def WA(x: int, i: int):
|
|
return wa[i-1+x*(ido-1)]
|
|
|
|
if ido == 1:
|
|
for k in range(l1):
|
|
a1, a5 = PM(CC(0,1,k), CC(0,5,k))
|
|
a3, a7 = PM(CC(0,3,k), CC(0,7,k))
|
|
a1, a3 = PM(a1, a3)
|
|
a3 = ROTX90(a3, fwd)
|
|
|
|
a7 = ROTX90(a7, fwd)
|
|
a5, a7 = PM(a5, a7)
|
|
a5 = ROTX45(a5, fwd)
|
|
a7 = ROTX135(a7, fwd)
|
|
|
|
a0, a4 = PM(CC(0,0,k), CC(0,4,k))
|
|
a2, a6 = PM(CC(0,2,k), CC(0,6,k))
|
|
CH(0,k,0)[0], CH(0,k,4)[0] = PM(a0+a2, a1)
|
|
CH(0,k,2)[0], CH(0,k,6)[0] = PM(a0-a2, a3)
|
|
a6 = ROTX90(a6, fwd)
|
|
CH(0,k,1)[0], CH(0,k,5)[0] = PM(a4+a6, a5)
|
|
CH(0,k,3)[0], CH(0,k,7)[0] = PM(a4-a6, a7)
|
|
else:
|
|
for k in range(l1):
|
|
a1, a5 = PM(CC(0,1,k), CC(0,5,k))
|
|
a3, a7 = PM(CC(0,3,k), CC(0,7,k))
|
|
a1, a3 = PM(a1, a3)
|
|
a3 = ROTX90(a3, fwd)
|
|
|
|
a7 = ROTX90(a7, fwd)
|
|
a5, a7 = PM(a5, a7)
|
|
a5 = ROTX45(a5, fwd)
|
|
a7 = ROTX135(a7, fwd)
|
|
|
|
a0, a4 = PM(CC(0,0,k), CC(0,4,k))
|
|
a2, a6 = PM(CC(0,2,k), CC(0,6,k))
|
|
CH(0,k,0)[0], CH(0,k,4)[0] = PM(a0+a2, a1)
|
|
CH(0,k,2)[0], CH(0,k,6)[0] = PM(a0-a2, a3)
|
|
a6 = ROTX90(a6, fwd)
|
|
CH(0,k,1)[0], CH(0,k,5)[0] = PM(a4+a6, a5)
|
|
CH(0,k,3)[0], CH(0,k,7)[0] = PM(a4-a6, a7)
|
|
|
|
for i in range(1, ido):
|
|
a1, a5 = PM(CC(i,1,k), CC(i,5,k))
|
|
a3, a7 = PM(CC(i,3,k), CC(i,7,k))
|
|
a7 = ROTX90(a7, fwd)
|
|
a1, a3 = PM(a1, a3)
|
|
a3 = ROTX90(a3, fwd)
|
|
a5, a7 = PM(a5, a7)
|
|
a5 = ROTX45(a5, fwd)
|
|
a7 = ROTX135(a7, fwd)
|
|
a0, a4 = PM(CC(i,0,k), CC(i,4,k))
|
|
a2, a6 = PM(CC(i,2,k), CC(i,6,k))
|
|
a0, a2 = PM(a0, a2)
|
|
CH(i,k,0)[0] = a0+a1
|
|
CH(i,k,4)[0] = _special_mul(a0-a1, WA(3,i), fwd)
|
|
CH(i,k,2)[0] = _special_mul(a2+a3, WA(1,i), fwd)
|
|
CH(i,k,6)[0] = _special_mul(a2-a3, WA(5,i), fwd)
|
|
a6 = ROTX90(a6, fwd)
|
|
a4, a6 = PM(a4, a6)
|
|
CH(i,k,1)[0] = _special_mul(a4+a5, WA(0,i), fwd)
|
|
CH(i,k,5)[0] = _special_mul(a4-a5, WA(4,i), fwd)
|
|
CH(i,k,3)[0] = _special_mul(a6+a7, WA(2,i), fwd)
|
|
CH(i,k,7)[0] = _special_mul(a6-a7, WA(6,i), fwd)
|
|
|
|
def pass11(ido: int, l1: int, cc: Ptr[T], ch: Ptr[T], wa: Ptr[T], fwd: bool):
|
|
@inline
|
|
def CH(a: int, b: int, c: int):
|
|
return ch + (a+ido*(b+l1*c))
|
|
@inline
|
|
def CC(a: int, b: int, c: int):
|
|
return cc[a+ido*(b+11*c)]
|
|
@inline
|
|
def WA(x: int, i: int):
|
|
return wa[i-1+x*(ido-1)]
|
|
|
|
tw1r = cast(0.8412535328311811688618116489193677, T0)
|
|
tw1i = cast(0.5406408174555975821076359543186917, T0)
|
|
if fwd:
|
|
tw1i = -tw1i
|
|
tw2r = cast(0.4154150130018864255292741492296232, T0)
|
|
tw2i = cast(0.9096319953545183714117153830790285, T0)
|
|
if fwd:
|
|
tw2i = -tw2i
|
|
tw3r = cast(-0.1423148382732851404437926686163697, T0)
|
|
tw3i = cast(0.9898214418809327323760920377767188, T0)
|
|
if fwd:
|
|
tw3i = -tw3i
|
|
tw4r = cast(-0.6548607339452850640569250724662936, T0)
|
|
tw4i = cast(0.7557495743542582837740358439723444, T0)
|
|
if fwd:
|
|
tw4i = -tw4i
|
|
tw5r = cast(-0.9594929736144973898903680570663277, T0)
|
|
tw5i = cast(0.2817325568414296977114179153466169, T0)
|
|
if fwd:
|
|
tw5i = -tw5i
|
|
|
|
if ido == 1:
|
|
for k in range(l1):
|
|
# POCKETFFT_PREP11(0)
|
|
t1 = CC(0,0,k)
|
|
t2, t11 = PM(CC(0,1,k), CC(0,10,k))
|
|
t3, t10 = PM(CC(0,2,k), CC(0,9,k))
|
|
t4, t9 = PM(CC(0,3,k), CC(0,8,k))
|
|
t5, t8 = PM(CC(0,4,k), CC(0,7,k))
|
|
t6, t7 = PM(CC(0,5,k), CC(0,6,k))
|
|
CH(0,k,0)[0] = T(t1.real+t2.real+t3.real+t4.real+t5.real+t6.real,
|
|
t1.imag+t2.imag+t3.imag+t4.imag+t5.imag+t6.imag)
|
|
# POCKETFFT_PARTSTEP11a(1,10,tw1r,tw2r,tw3r,tw4r,tw5r,+tw1i,+tw2i,+tw3i,+tw4i,+tw5i)
|
|
ca = t1 + t2*tw1r + t3*tw2r + t4*tw3r + t5*tw4r + t6*tw5r
|
|
cb = T(-(tw1i*t11.imag + tw2i*t10.imag + tw3i*t9.imag + tw4i*t8.imag + tw5i*t7.imag),
|
|
tw1i*t11.real + tw2i*t10.real + tw3i*t9.real + tw4i*t8.real + tw5i*t7.real)
|
|
CH(0,k,1)[0], CH(0,k,10)[0] = PM(ca, cb)
|
|
# POCKETFFT_PARTSTEP11a(2, 9,tw2r,tw4r,tw5r,tw3r,tw1r,+tw2i,+tw4i,-tw5i,-tw3i,-tw1i)
|
|
ca = t1 + t2*tw2r + t3*tw4r + t4*tw5r + t5*tw3r + t6*tw1r
|
|
cb = T(-(tw2i*t11.imag + tw4i*t10.imag - tw5i*t9.imag - tw3i*t8.imag - tw1i*t7.imag),
|
|
tw2i*t11.real + tw4i*t10.real - tw5i*t9.real - tw3i*t8.real - tw1i*t7.real)
|
|
CH(0,k,2)[0], CH(0,k,9)[0] = PM(ca, cb)
|
|
# POCKETFFT_PARTSTEP11a(3, 8,tw3r,tw5r,tw2r,tw1r,tw4r,+tw3i,-tw5i,-tw2i,+tw1i,+tw4i)
|
|
ca = t1 + t2*tw3r + t3*tw5r + t4*tw2r + t5*tw1r + t6*tw4r
|
|
cb = T(-(tw3i*t11.imag - tw5i*t10.imag - tw2i*t9.imag + tw1i*t8.imag + tw4i*t7.imag),
|
|
tw3i*t11.real - tw5i*t10.real - tw2i*t9.real + tw1i*t8.real + tw4i*t7.real)
|
|
CH(0,k,3)[0], CH(0,k,8)[0] = PM(ca, cb)
|
|
# POCKETFFT_PARTSTEP11a(4, 7,tw4r,tw3r,tw1r,tw5r,tw2r,+tw4i,-tw3i,+tw1i,+tw5i,-tw2i)
|
|
ca = t1 + t2*tw4r + t3*tw3r + t4*tw1r + t5*tw5r + t6*tw2r
|
|
cb = T(-(tw4i*t11.imag - tw3i*t10.imag + tw1i*t9.imag + tw5i*t8.imag - tw2i*t7.imag),
|
|
tw4i*t11.real - tw3i*t10.real + tw1i*t9.real + tw5i*t8.real - tw2i*t7.real)
|
|
CH(0,k,4)[0], CH(0,k,7)[0] = PM(ca, cb)
|
|
# POCKETFFT_PARTSTEP11a(5, 6,tw5r,tw1r,tw4r,tw2r,tw3r,+tw5i,-tw1i,+tw4i,-tw2i,+tw3i)
|
|
ca = t1 + t2*tw5r + t3*tw1r + t4*tw4r + t5*tw2r + t6*tw3r
|
|
cb = T(-(tw5i*t11.imag - tw1i*t10.imag + tw4i*t9.imag - tw2i*t8.imag + tw3i*t7.imag),
|
|
tw5i*t11.real - tw1i*t10.real + tw4i*t9.real - tw2i*t8.real + tw3i*t7.real)
|
|
CH(0,k,5)[0], CH(0,k,6)[0] = PM(ca, cb)
|
|
else:
|
|
for k in range(l1):
|
|
# POCKETFFT_PREP11(0)
|
|
t1 = CC(0,0,k)
|
|
t2, t11 = PM(CC(0,1,k), CC(0,10,k))
|
|
t3, t10 = PM(CC(0,2,k), CC(0,9,k))
|
|
t4, t9 = PM(CC(0,3,k), CC(0,8,k))
|
|
t5, t8 = PM(CC(0,4,k), CC(0,7,k))
|
|
t6, t7 = PM(CC(0,5,k), CC(0,6,k))
|
|
CH(0,k,0)[0] = T(t1.real+t2.real+t3.real+t4.real+t5.real+t6.real,
|
|
t1.imag+t2.imag+t3.imag+t4.imag+t5.imag+t6.imag)
|
|
# POCKETFFT_PARTSTEP11a(1,10,tw1r,tw2r,tw3r,tw4r,tw5r,+tw1i,+tw2i,+tw3i,+tw4i,+tw5i)
|
|
ca = t1 + t2*tw1r + t3*tw2r + t4*tw3r + t5*tw4r + t6*tw5r
|
|
cb = T(-(tw1i*t11.imag + tw2i*t10.imag + tw3i*t9.imag + tw4i*t8.imag + tw5i*t7.imag),
|
|
tw1i*t11.real + tw2i*t10.real + tw3i*t9.real + tw4i*t8.real + tw5i*t7.real)
|
|
CH(0,k,1)[0], CH(0,k,10)[0] = PM(ca, cb)
|
|
# POCKETFFT_PARTSTEP11a(2, 9,tw2r,tw4r,tw5r,tw3r,tw1r,+tw2i,+tw4i,-tw5i,-tw3i,-tw1i)
|
|
ca = t1 + t2*tw2r + t3*tw4r + t4*tw5r + t5*tw3r + t6*tw1r
|
|
cb = T(-(tw2i*t11.imag + tw4i*t10.imag - tw5i*t9.imag - tw3i*t8.imag - tw1i*t7.imag),
|
|
tw2i*t11.real + tw4i*t10.real - tw5i*t9.real - tw3i*t8.real - tw1i*t7.real)
|
|
CH(0,k,2)[0], CH(0,k,9)[0] = PM(ca, cb)
|
|
# POCKETFFT_PARTSTEP11a(3, 8,tw3r,tw5r,tw2r,tw1r,tw4r,+tw3i,-tw5i,-tw2i,+tw1i,+tw4i)
|
|
ca = t1 + t2*tw3r + t3*tw5r + t4*tw2r + t5*tw1r + t6*tw4r
|
|
cb = T(-(tw3i*t11.imag - tw5i*t10.imag - tw2i*t9.imag + tw1i*t8.imag + tw4i*t7.imag),
|
|
tw3i*t11.real - tw5i*t10.real - tw2i*t9.real + tw1i*t8.real + tw4i*t7.real)
|
|
CH(0,k,3)[0], CH(0,k,8)[0] = PM(ca, cb)
|
|
# POCKETFFT_PARTSTEP11a(4, 7,tw4r,tw3r,tw1r,tw5r,tw2r,+tw4i,-tw3i,+tw1i,+tw5i,-tw2i)
|
|
ca = t1 + t2*tw4r + t3*tw3r + t4*tw1r + t5*tw5r + t6*tw2r
|
|
cb = T(-(tw4i*t11.imag - tw3i*t10.imag + tw1i*t9.imag + tw5i*t8.imag - tw2i*t7.imag),
|
|
tw4i*t11.real - tw3i*t10.real + tw1i*t9.real + tw5i*t8.real - tw2i*t7.real)
|
|
CH(0,k,4)[0], CH(0,k,7)[0] = PM(ca, cb)
|
|
# POCKETFFT_PARTSTEP11a(5, 6,tw5r,tw1r,tw4r,tw2r,tw3r,+tw5i,-tw1i,+tw4i,-tw2i,+tw3i)
|
|
ca = t1 + t2*tw5r + t3*tw1r + t4*tw4r + t5*tw2r + t6*tw3r
|
|
cb = T(-(tw5i*t11.imag - tw1i*t10.imag + tw4i*t9.imag - tw2i*t8.imag + tw3i*t7.imag),
|
|
tw5i*t11.real - tw1i*t10.real + tw4i*t9.real - tw2i*t8.real + tw3i*t7.real)
|
|
CH(0,k,5)[0], CH(0,k,6)[0] = PM(ca, cb)
|
|
for i in range(1, ido):
|
|
# POCKETFFT_PREP11(i)
|
|
t1 = CC(i,0,k)
|
|
t2, t11 = PM(CC(i,1,k), CC(i,10,k))
|
|
t3, t10 = PM(CC(i,2,k), CC(i,9,k))
|
|
t4, t9 = PM(CC(i,3,k), CC(i,8,k))
|
|
t5, t8 = PM(CC(i,4,k), CC(i,7,k))
|
|
t6, t7 = PM(CC(i,5,k), CC(i,6,k))
|
|
CH(i,k,0)[0] = T(t1.real+t2.real+t3.real+t4.real+t5.real+t6.real,
|
|
t1.imag+t2.imag+t3.imag+t4.imag+t5.imag+t6.imag)
|
|
# POCKETFFT_PARTSTEP11(1,10,tw1r,tw2r,tw3r,tw4r,tw5r,+tw1i,+tw2i,+tw3i,+tw4i,+tw5i)
|
|
ca = t1 + t2*tw1r + t3*tw2r + t4*tw3r + t5*tw4r + t6*tw5r
|
|
cb = T(-(tw1i*t11.imag + tw2i*t10.imag + tw3i*t9.imag + tw4i*t8.imag + tw5i*t7.imag),
|
|
tw1i*t11.real + tw2i*t10.real + tw3i*t9.real + tw4i*t8.real + tw5i*t7.real)
|
|
da, db = PM(ca, cb)
|
|
CH(i,k,1)[0] = _special_mul(da, WA(1-1,i), fwd)
|
|
CH(i,k,10)[0] = _special_mul(db, WA(10-1,i), fwd)
|
|
# POCKETFFT_PARTSTEP11(2, 9,tw2r,tw4r,tw5r,tw3r,tw1r,+tw2i,+tw4i,-tw5i,-tw3i,-tw1i)
|
|
ca = t1 + t2*tw2r + t3*tw4r + t4*tw5r + t5*tw3r + t6*tw1r
|
|
cb = T(-(tw2i*t11.imag + tw4i*t10.imag - tw5i*t9.imag - tw3i*t8.imag - tw1i*t7.imag),
|
|
tw2i*t11.real + tw4i*t10.real - tw5i*t9.real - tw3i*t8.real - tw1i*t7.real)
|
|
da, db = PM(ca, cb)
|
|
CH(i,k,2)[0] = _special_mul(da, WA(2-1,i), fwd)
|
|
CH(i,k,9)[0] = _special_mul(db, WA(9-1,i), fwd)
|
|
# POCKETFFT_PARTSTEP11(3, 8,tw3r,tw5r,tw2r,tw1r,tw4r,+tw3i,-tw5i,-tw2i,+tw1i,+tw4i)
|
|
ca = t1 + t2*tw3r + t3*tw5r + t4*tw2r + t5*tw1r + t6*tw4r
|
|
cb = T(-(tw3i*t11.imag - tw5i*t10.imag - tw2i*t9.imag + tw1i*t8.imag + tw4i*t7.imag),
|
|
tw3i*t11.real - tw5i*t10.real - tw2i*t9.real + tw1i*t8.real + tw4i*t7.real)
|
|
da, db = PM(ca, cb)
|
|
CH(i,k,3)[0] = _special_mul(da, WA(3-1,i), fwd)
|
|
CH(i,k,8)[0] = _special_mul(db, WA(8-1,i), fwd)
|
|
# POCKETFFT_PARTSTEP11(4, 7,tw4r,tw3r,tw1r,tw5r,tw2r,+tw4i,-tw3i,+tw1i,+tw5i,-tw2i)
|
|
ca = t1 + t2*tw4r + t3*tw3r + t4*tw1r + t5*tw5r + t6*tw2r
|
|
cb = T(-(tw4i*t11.imag - tw3i*t10.imag + tw1i*t9.imag + tw5i*t8.imag - tw2i*t7.imag),
|
|
tw4i*t11.real - tw3i*t10.real + tw1i*t9.real + tw5i*t8.real - tw2i*t7.real)
|
|
da, db = PM(ca, cb)
|
|
CH(i,k,4)[0] = _special_mul(da, WA(4-1,i), fwd)
|
|
CH(i,k,7)[0] = _special_mul(db, WA(7-1,i), fwd)
|
|
# POCKETFFT_PARTSTEP11(5, 6,tw5r,tw1r,tw4r,tw2r,tw3r,+tw5i,-tw1i,+tw4i,-tw2i,+tw3i)
|
|
ca = t1 + t2*tw5r + t3*tw1r + t4*tw4r + t5*tw2r + t6*tw3r
|
|
cb = T(-(tw5i*t11.imag - tw1i*t10.imag + tw4i*t9.imag - tw2i*t8.imag + tw3i*t7.imag),
|
|
tw5i*t11.real - tw1i*t10.real + tw4i*t9.real - tw2i*t8.real + tw3i*t7.real)
|
|
da, db = PM(ca, cb)
|
|
CH(i,k,5)[0] = _special_mul(da, WA(5-1,i), fwd)
|
|
CH(i,k,6)[0] = _special_mul(db, WA(6-1,i), fwd)
|
|
|
|
def passg(ido: int, ip: int, l1: int, cc: Ptr[T], ch: Ptr[T], wa: Ptr[T], csarr: Ptr[T], fwd: bool):
|
|
cdim = ip
|
|
ipph = (ip + 1) // 2
|
|
idl1 = ido * l1
|
|
|
|
@inline
|
|
def CH(a: int, b: int, c: int):
|
|
return ch + (a+ido*(b+l1*c))
|
|
@inline
|
|
def CC(a: int, b: int, c: int):
|
|
return cc[a+ido*(b+cdim*c)]
|
|
@inline
|
|
def CX(a: int, b: int, c: int):
|
|
return cc + (a+ido*(b+l1*c))
|
|
@inline
|
|
def CX2(a: int, b: int):
|
|
return cc + (a+idl1*b)
|
|
@inline
|
|
def CH2(a: int, b: int):
|
|
return ch[a+idl1*b]
|
|
|
|
wal = arr[T](ip)
|
|
wal[0] = T(1., 0.)
|
|
for i in range(1, ip):
|
|
wal[i] = T(csarr[i].real, -csarr[i].imag if fwd else csarr[i].imag)
|
|
|
|
for k in range(l1):
|
|
for i in range(ido):
|
|
CH(i,k,0)[0] = CC(i,0,k)
|
|
jc = ip - 1
|
|
for j in range(1, ipph):
|
|
for k in range(l1):
|
|
for i in range(ido):
|
|
CH(i,k,j)[0], CH(i,k,jc)[0] = PM(CC(i,j,k), CC(i,jc,k))
|
|
jc -= 1
|
|
|
|
for k in range(l1):
|
|
for i in range(ido):
|
|
tmp = CH(i,k,0)[0]
|
|
for j in range(1, ipph):
|
|
tmp += CH(i,k,j)[0]
|
|
CX(i,k,0)[0] = tmp
|
|
|
|
lc = ip - 1
|
|
for l in range(1, ipph):
|
|
for ik in range(idl1):
|
|
CX2(ik,l)[0] = T(CH2(ik,0).real+wal[l].real*CH2(ik,1).real+wal[2*l].real*CH2(ik,2).real,
|
|
CH2(ik,0).imag+wal[l].real*CH2(ik,1).imag+wal[2*l].real*CH2(ik,2).imag)
|
|
CX2(ik,lc)[0] = T(-wal[l].imag*CH2(ik,ip-1).imag-wal[2*l].imag*CH2(ik,ip-2).imag,
|
|
wal[l].imag*CH2(ik,ip-1).real+wal[2*l].imag*CH2(ik,ip-2).real)
|
|
|
|
iwal = 2*l
|
|
j = 3
|
|
jc = ip - 3
|
|
while j < ipph - 1:
|
|
iwal += l
|
|
if iwal > ip:
|
|
iwal -= ip
|
|
xwal = wal[iwal]
|
|
iwal += l
|
|
if iwal > ip:
|
|
iwal -= ip
|
|
xwal2 = wal[iwal]
|
|
|
|
for ik in range(idl1):
|
|
CX2(ik,l)[0] += T(CH2(ik,j).real*xwal.real+CH2(ik,j+1).real*xwal2.real,
|
|
CH2(ik,j).imag*xwal.real+CH2(ik,j+1).imag*xwal2.real)
|
|
CX2(ik,lc)[0] += T(-(CH2(ik,jc).imag*xwal.imag+CH2(ik,jc-1).imag*xwal2.imag),
|
|
CH2(ik,jc).real*xwal.imag+CH2(ik,jc-1).real*xwal2.imag)
|
|
j += 2
|
|
jc -= 2
|
|
|
|
while j < ipph:
|
|
iwal += l
|
|
if iwal > ip:
|
|
iwal -= ip
|
|
xwal = wal[iwal]
|
|
for ik in range(idl1):
|
|
CX2(ik,l)[0] += T(CH2(ik,j).real*xwal.real,
|
|
CH2(ik,j).imag*xwal.real)
|
|
CX2(ik,lc)[0] += T(-(CH2(ik,jc).imag*xwal.imag),
|
|
CH2(ik,jc).real*xwal.imag)
|
|
j += 1
|
|
jc -= 1
|
|
|
|
lc -= 1
|
|
|
|
if ido == 1:
|
|
jc = ip - 1
|
|
for j in range(1, ipph):
|
|
for ik in range(idl1):
|
|
t1 = CX2(ik,j)[0]
|
|
t2 = CX2(ik,jc)[0]
|
|
CX2(ik,j)[0], CX2(ik,jc)[0] = PM(t1, t2)
|
|
jc -= 1
|
|
else:
|
|
jc = ip - 1
|
|
for j in range(1, ipph):
|
|
for k in range(l1):
|
|
t1 = CX(0,k,j)[0]
|
|
t2 = CX(0,k,jc)[0]
|
|
CX(0,k,j)[0], CX(0,k,jc)[0] = PM(t1, t2)
|
|
|
|
for i in range(1, ido):
|
|
x1, x2 = PM(CX(i,k,j)[0], CX(i,k,jc)[0])
|
|
idij = (j-1)*(ido-1)+i-1
|
|
CX(i,k,j)[0] = _special_mul(x1, wa[idij], fwd)
|
|
idij = (jc-1)*(ido-1)+i-1
|
|
CX(i,k,jc)[0] = _special_mul(x2, wa[idij], fwd)
|
|
jc -= 1
|
|
wal.dealloc()
|
|
|
|
def pass_all(self, c: Ptr[T], fct: T0, fwd: bool):
|
|
length = self.length
|
|
fact = self.fact
|
|
|
|
if length == 1:
|
|
c[0] *= fct
|
|
return
|
|
|
|
l1 = 1
|
|
ch = arr[T](length)
|
|
p1 = c
|
|
p2 = ch.data()
|
|
|
|
for k1 in range(len(fact)):
|
|
ip = fact[k1].fct
|
|
l2 = ip*l1
|
|
ido = length // l2
|
|
|
|
if ip == 4:
|
|
type(self).pass4(ido, l1, p1, p2, fact[k1].tw, fwd)
|
|
elif ip == 8:
|
|
type(self).pass8(ido, l1, p1, p2, fact[k1].tw, fwd)
|
|
elif ip == 2:
|
|
type(self).pass2(ido, l1, p1, p2, fact[k1].tw, fwd)
|
|
elif ip == 3:
|
|
type(self).pass3(ido, l1, p1, p2, fact[k1].tw, fwd)
|
|
elif ip == 5:
|
|
type(self).pass5(ido, l1, p1, p2, fact[k1].tw, fwd)
|
|
elif ip == 7:
|
|
type(self).pass7(ido, l1, p1, p2, fact[k1].tw, fwd)
|
|
elif ip == 11:
|
|
type(self).pass11(ido, l1, p1, p2, fact[k1].tw, fwd)
|
|
else:
|
|
type(self).passg(ido, ip, l1, p1, p2, fact[k1].tw, fact[k1].tws, fwd)
|
|
p1, p2 = p2, p1
|
|
|
|
p1, p2 = p2, p1
|
|
l1 = l2
|
|
|
|
if p1 != c:
|
|
if fct != T0(1):
|
|
for i in range(length):
|
|
c[i] = ch[i] * fct
|
|
else:
|
|
str.memcpy(c.as_byte(), p1.as_byte(), length * sizeof(T))
|
|
else:
|
|
if fct != T0(1):
|
|
for i in range(length):
|
|
c[i] *= fct
|
|
|
|
ch.dealloc()
|
|
|
|
def exec(self, c: Ptr[T], fct: T0, fwd: bool):
|
|
self.pass_all(c, fct, fwd)
|
|
|
|
def factorize(self):
|
|
n = self.length
|
|
fact = self.fact
|
|
|
|
while n & 7 == 0:
|
|
self.add_factor(8)
|
|
n >>= 3
|
|
|
|
while n & 3 == 0:
|
|
self.add_factor(4)
|
|
n >>= 2
|
|
|
|
if n & 1 == 0:
|
|
n >>= 1
|
|
self.add_factor(2)
|
|
fct_front = fact[0].fct
|
|
fct_back = fact[-1].fct
|
|
fact[0] = fact[0].with_fct(fct_back)
|
|
fact[-1] = fact[-1].with_fct(fct_front)
|
|
|
|
divisor = 3
|
|
while divisor * divisor <= n:
|
|
while n % divisor == 0:
|
|
self.add_factor(divisor)
|
|
n //= divisor
|
|
divisor += 2
|
|
|
|
if n > 1:
|
|
self.add_factor(n)
|
|
|
|
def twsize(self):
|
|
twsize = 0
|
|
l1 = 1
|
|
for k in range(len(self.fact)):
|
|
ip = self.fact[k].fct
|
|
ido = self.length // (l1*ip)
|
|
twsize += (ip-1)*(ido-1)
|
|
if ip > 11:
|
|
twsize += ip
|
|
l1 *= ip
|
|
return twsize
|
|
|
|
def comp_twiddle(self):
|
|
twiddle = sincos_2pibyn[T0](self.length)
|
|
l1 = 1
|
|
memofs = 0
|
|
fact = self.fact
|
|
|
|
for k in range(len(self.fact)):
|
|
ip = fact[k].fct
|
|
ido = self.length // (l1*ip)
|
|
fact[k] = fact[k].with_tw(self.mem.data() + memofs)
|
|
memofs += (ip-1)*(ido-1)
|
|
|
|
for j in range(1, ip):
|
|
for i in range(1, ido):
|
|
fact[k].tw[(j-1)*(ido-1)+i-1] = twiddle[j*l1*i]
|
|
if ip > 11:
|
|
fact[k] = fact[k].with_tws(self.mem.data() + memofs)
|
|
memofs += ip
|
|
for j in range(ip):
|
|
fact[k].tws[j] = twiddle[j*l1*ido]
|
|
l1 *= ip
|
|
|
|
twiddle.dealloc()
|
|
|
|
def __init__(self, length: int):
|
|
if length == 0:
|
|
raise ValueError("Invalid number of FFT data points (0) specified.")
|
|
self.length = length
|
|
self.fact = []
|
|
if length != 1:
|
|
self.factorize()
|
|
self.mem = arr[T](self.twsize())
|
|
self.comp_twiddle()
|
|
|
|
def dealloc(self):
|
|
self.mem.dealloc()
|
|
|
|
def MULPM(c, d, e, f):
|
|
return c*e+d*f, c*f-d*e
|
|
|
|
def REARRANGE(rx, ix, ry, iy):
|
|
t1 = rx+ry
|
|
t2 = ry-rx
|
|
t3 = ix+iy
|
|
t4 = ix-iy
|
|
return t1, t3, t4, t2
|
|
|
|
class rfftp:
|
|
length: int
|
|
mem: arr[T0]
|
|
fact: List[fctdata[T0]]
|
|
T: type # complex datatype
|
|
T0: type # real datatype
|
|
|
|
def add_factor(self, factor: int):
|
|
self.fact.append(fctdata[T0](factor, Ptr[T0](), Ptr[T0]()))
|
|
|
|
def radf2(ido: int, l1: int, cc: Ptr[T0], ch: Ptr[T0], wa: Ptr[T0]):
|
|
@inline
|
|
def WA(x: int, i: int):
|
|
return wa[i+x*(ido-1)]
|
|
@inline
|
|
def CC(a: int, b: int, c: int):
|
|
return cc[a+ido*(b+l1*c)]
|
|
@inline
|
|
def CH(a: int, b: int, c: int):
|
|
return ch + (a+ido*(b+2*c))
|
|
|
|
for k in range(l1):
|
|
CH(0,0,k)[0], CH(ido-1,1,k)[0] = PM(CC(0,k,0), CC(0,k,1))
|
|
|
|
if ido & 1 == 0:
|
|
for k in range(l1):
|
|
CH(0,1,k)[0] = -CC(ido-1,k,1)
|
|
CH(ido-1,0,k)[0] = CC(ido-1,k,0)
|
|
|
|
if ido <= 2:
|
|
return
|
|
|
|
for k in range(l1):
|
|
for i in range(2, ido, 2):
|
|
ic = ido-i
|
|
tr2, ti2 = MULPM(WA(0,i-2),WA(0,i-1),CC(i-1,k,1),CC(i,k,1))
|
|
CH(i-1,0,k)[0], CH(ic-1,1,k)[0] = PM(CC(i-1,k,0), tr2)
|
|
CH(i ,0,k)[0], CH(ic ,1,k)[0] = PM(ti2, CC(i,k,0))
|
|
|
|
def radf3(ido: int, l1: int, cc: Ptr[T0], ch: Ptr[T0], wa: Ptr[T0]):
|
|
@inline
|
|
def WA(x: int, i: int):
|
|
return wa[i+x*(ido-1)]
|
|
@inline
|
|
def CC(a: int, b: int, c: int):
|
|
return cc[a+ido*(b+l1*c)]
|
|
@inline
|
|
def CH(a: int, b: int, c: int):
|
|
return ch + (a+ido*(b+3*c))
|
|
|
|
taur = cast(-0.5, T0)
|
|
taui = cast(0.8660254037844386467637231707529362, T0)
|
|
|
|
for k in range(l1):
|
|
cr2 = CC(0,k,1) + CC(0,k,2)
|
|
CH(0,0,k)[0] = CC(0,k,0) + cr2
|
|
CH(0,2,k)[0] = taui*(CC(0,k,2)-CC(0,k,1))
|
|
CH(ido-1,1,k)[0] = CC(0,k,0)+taur*cr2
|
|
|
|
if ido == 1:
|
|
return
|
|
|
|
for k in range(l1):
|
|
for i in range(2, ido, 2):
|
|
ic = ido-i
|
|
dr2, di2 = MULPM(WA(0,i-2),WA(0,i-1),CC(i-1,k,1),CC(i,k,1))
|
|
dr3, di3 = MULPM(WA(1,i-2),WA(1,i-1),CC(i-1,k,2),CC(i,k,2))
|
|
dr2, di2, dr3, di3 = REARRANGE(dr2, di2, dr3, di3)
|
|
CH(i-1,0,k)[0] = CC(i-1,k,0)+dr2
|
|
CH(i ,0,k)[0] = CC(i ,k,0)+di2
|
|
tr2 = CC(i-1,k,0)+taur*dr2
|
|
ti2 = CC(i ,k,0)+taur*di2
|
|
tr3 = taui*dr3
|
|
ti3 = taui*di3
|
|
CH(i-1,2,k)[0], CH(ic-1,1,k)[0] = PM(tr2, tr3)
|
|
CH(i ,2,k)[0], CH(ic ,1,k)[0] = PM(ti3, ti2)
|
|
|
|
def radf4(ido: int, l1: int, cc: Ptr[T0], ch: Ptr[T0], wa: Ptr[T0]):
|
|
@inline
|
|
def WA(x: int, i: int):
|
|
return wa[i+x*(ido-1)]
|
|
@inline
|
|
def CC(a: int, b: int, c: int):
|
|
return cc[a+ido*(b+l1*c)]
|
|
@inline
|
|
def CH(a: int, b: int, c: int):
|
|
return ch + (a+ido*(b+4*c))
|
|
|
|
hsqt2 = cast(0.707106781186547524400844362104849, T0)
|
|
|
|
for k in range(l1):
|
|
tr1, CH(0,2,k)[0] = PM(CC(0,k,3), CC(0,k,1))
|
|
tr2, CH(ido-1,1,k)[0] = PM(CC(0,k,0),CC(0,k,2))
|
|
CH(0,0,k)[0], CH(ido-1,3,k)[0] = PM(tr2, tr1)
|
|
|
|
if ido & 1 == 0:
|
|
for k in range(l1):
|
|
ti1 = -hsqt2*(CC(ido-1,k,1)+CC(ido-1,k,3))
|
|
tr1 = hsqt2*(CC(ido-1,k,1)-CC(ido-1,k,3))
|
|
CH(ido-1,0,k)[0], CH(ido-1,2,k)[0] = PM(CC(ido-1,k,0), tr1)
|
|
CH( 0,3,k)[0], CH( 0,1,k)[0] = PM(ti1, CC(ido-1,k,2))
|
|
|
|
if ido <= 2:
|
|
return
|
|
|
|
for k in range(l1):
|
|
for i in range(2, ido, 2):
|
|
ic = ido - i
|
|
cr2,ci2 = MULPM(WA(0,i-2),WA(0,i-1),CC(i-1,k,1),CC(i,k,1))
|
|
cr3,ci3 = MULPM(WA(1,i-2),WA(1,i-1),CC(i-1,k,2),CC(i,k,2))
|
|
cr4,ci4 = MULPM(WA(2,i-2),WA(2,i-1),CC(i-1,k,3),CC(i,k,3))
|
|
tr1,tr4 = PM(cr4,cr2)
|
|
ti1,ti4 = PM(ci2,ci4)
|
|
tr2,tr3 = PM(CC(i-1,k,0),cr3)
|
|
ti2,ti3 = PM(CC(i ,k,0),ci3)
|
|
CH(i-1,0,k)[0], CH(ic-1,3,k)[0] = PM(tr2,tr1)
|
|
CH(i ,0,k)[0], CH(ic ,3,k)[0] = PM(ti1,ti2)
|
|
CH(i-1,2,k)[0], CH(ic-1,1,k)[0] = PM(tr3,ti4)
|
|
CH(i ,2,k)[0], CH(ic ,1,k)[0] = PM(tr4,ti3)
|
|
|
|
def radf5(ido: int, l1: int, cc: Ptr[T0], ch: Ptr[T0], wa: Ptr[T0]):
|
|
@inline
|
|
def WA(x: int, i: int):
|
|
return wa[i+x*(ido-1)]
|
|
@inline
|
|
def CC(a: int, b: int, c: int):
|
|
return cc[a+ido*(b+l1*c)]
|
|
@inline
|
|
def CH(a: int, b: int, c: int):
|
|
return ch + (a+ido*(b+5*c))
|
|
|
|
tr11 = cast(0.3090169943749474241022934171828191, T0)
|
|
ti11 = cast(0.9510565162951535721164393333793821, T0)
|
|
tr12 = cast(-0.8090169943749474241022934171828191, T0)
|
|
ti12 = cast(0.5877852522924731291687059546390728, T0)
|
|
|
|
for k in range(l1):
|
|
cr2,ci5 = PM(CC(0,k,4),CC(0,k,1))
|
|
cr3,ci4 = PM(CC(0,k,3),CC(0,k,2))
|
|
CH(0,0,k)[0] = CC(0,k,0)+cr2+cr3
|
|
CH(ido-1,1,k)[0] = CC(0,k,0)+tr11*cr2+tr12*cr3
|
|
CH(0,2,k)[0] = ti11*ci5+ti12*ci4
|
|
CH(ido-1,3,k)[0] = CC(0,k,0)+tr12*cr2+tr11*cr3
|
|
CH(0,4,k)[0] = ti12*ci5-ti11*ci4
|
|
|
|
if ido == 1:
|
|
return
|
|
|
|
for k in range(l1):
|
|
ic = ido-2
|
|
for i in range(2, ido, 2):
|
|
dr2,di2 = MULPM(WA(0,i-2),WA(0,i-1),CC(i-1,k,1),CC(i,k,1))
|
|
dr3,di3 = MULPM(WA(1,i-2),WA(1,i-1),CC(i-1,k,2),CC(i,k,2))
|
|
dr4,di4 = MULPM(WA(2,i-2),WA(2,i-1),CC(i-1,k,3),CC(i,k,3))
|
|
dr5,di5 = MULPM(WA(3,i-2),WA(3,i-1),CC(i-1,k,4),CC(i,k,4))
|
|
dr2, di2, dr5, di5 = REARRANGE(dr2, di2, dr5, di5)
|
|
dr3, di3, dr4, di4 = REARRANGE(dr3, di3, dr4, di4)
|
|
CH(i-1,0,k)[0] = CC(i-1,k,0)+dr2+dr3
|
|
CH(i ,0,k)[0] = CC(i ,k,0)+di2+di3
|
|
tr2 = CC(i-1,k,0)+tr11*dr2+tr12*dr3
|
|
ti2 = CC(i ,k,0)+tr11*di2+tr12*di3
|
|
tr3 = CC(i-1,k,0)+tr12*dr2+tr11*dr3
|
|
ti3 = CC(i ,k,0)+tr12*di2+tr11*di3
|
|
tr5 = ti11*dr5 + ti12*dr4
|
|
ti5 = ti11*di5 + ti12*di4
|
|
tr4 = ti12*dr5 - ti11*dr4
|
|
ti4 = ti12*di5 - ti11*di4
|
|
CH(i-1,2,k)[0], CH(ic-1,1,k)[0] = PM(tr2,tr5)
|
|
CH(i ,2,k)[0], CH(ic ,1,k)[0] = PM(ti5,ti2)
|
|
CH(i-1,4,k)[0], CH(ic-1,3,k)[0] = PM(tr3,tr4)
|
|
CH(i ,4,k)[0], CH(ic ,3,k)[0] = PM(ti4,ti3)
|
|
ic -= 2
|
|
|
|
def radfg(ido: int, ip: int, l1: int, cc: Ptr[T0], ch: Ptr[T0], wa: Ptr[T0], csarr: Ptr[T0]):
|
|
cdim = ip
|
|
ipph = (ip + 1) // 2
|
|
idl1 = ido * l1
|
|
|
|
@inline
|
|
def CC(a: int, b: int, c: int):
|
|
return cc + a+ido*(b+cdim*c)
|
|
@inline
|
|
def CH(a: int, b: int, c: int):
|
|
return ch + (a+ido*(b+l1*c))
|
|
@inline
|
|
def C1(a: int, b: int, c: int):
|
|
return cc + (a+ido*(b+l1*c))
|
|
@inline
|
|
def C2(a: int, b: int):
|
|
return cc + (a+idl1*b)
|
|
@inline
|
|
def CH2(a: int, b: int):
|
|
return ch + a+idl1*b
|
|
|
|
if ido > 1:
|
|
jc = ip - 1
|
|
for j in range(1, ipph):
|
|
is_ = (j-1)*(ido-1)
|
|
is2 = (jc-1)*(ido-1)
|
|
for k in range(l1):
|
|
idij = is_
|
|
idij2 = is2
|
|
i = 1
|
|
while i <= ido - 2:
|
|
t1 = C1(i,k,j )[0]
|
|
t2 = C1(i+1,k,j )[0]
|
|
t3 = C1(i,k,jc)[0]
|
|
t4 = C1(i+1,k,jc)[0]
|
|
x1 = wa[idij]*t1 + wa[idij+1]*t2
|
|
x2 = wa[idij]*t2 - wa[idij+1]*t1
|
|
x3 = wa[idij2]*t3 + wa[idij2+1]*t4
|
|
x4 = wa[idij2]*t4 - wa[idij2+1]*t3
|
|
C1(i,k,j)[0], C1(i+1,k,jc)[0] = PM(x3,x1)
|
|
C1(i+1,k,j)[0], C1(i,k,jc)[0] = PM(x2,x4)
|
|
idij += 2
|
|
idij2 += 2
|
|
i += 2
|
|
jc -= 1
|
|
|
|
jc = ip - 1
|
|
for j in range(1, ipph):
|
|
for k in range(l1):
|
|
C1(0,k,jc)[0], C1(0,k,j)[0] = MP(C1(0,k,jc)[0], C1(0,k,j)[0])
|
|
jc -= 1
|
|
|
|
lc = ip-1
|
|
for l in range(1, ipph):
|
|
for ik in range(idl1):
|
|
CH2(ik,l )[0] = C2(ik,0)[0]+csarr[2*l]*C2(ik,1)[0]+csarr[4*l]*C2(ik,2)[0]
|
|
CH2(ik,lc)[0] = csarr[2*l+1]*C2(ik,ip-1)[0]+csarr[4*l+1]*C2(ik,ip-2)[0]
|
|
|
|
iang = 2*l
|
|
j = 3
|
|
jc = ip - 3
|
|
|
|
while j < ipph - 3:
|
|
iang+=l
|
|
if iang>=ip:
|
|
iang-=ip
|
|
ar1 = csarr[2*iang]
|
|
ai1 = csarr[2*iang+1]
|
|
iang+=l
|
|
if iang>=ip:
|
|
iang-=ip
|
|
ar2=csarr[2*iang]
|
|
ai2=csarr[2*iang+1]
|
|
iang+=l
|
|
if iang>=ip:
|
|
iang-=ip
|
|
ar3=csarr[2*iang]
|
|
ai3=csarr[2*iang+1]
|
|
iang+=l
|
|
if iang>=ip:
|
|
iang-=ip
|
|
ar4 = csarr[2*iang]
|
|
ai4 = csarr[2*iang+1]
|
|
|
|
for ik in range(idl1):
|
|
CH2(ik,l )[0] += ar1*C2(ik,j )[0]+ar2*C2(ik,j +1)[0] + ar3*C2(ik,j +2)[0]+ar4*C2(ik,j +3)[0]
|
|
CH2(ik,lc)[0] += ai1*C2(ik,jc)[0]+ai2*C2(ik,jc-1)[0] + ai3*C2(ik,jc-2)[0]+ai4*C2(ik,jc-3)[0]
|
|
|
|
j += 4
|
|
jc -= 4
|
|
|
|
while j < ipph - 1:
|
|
iang += l
|
|
if iang >= ip:
|
|
iang -= ip
|
|
ar1 = csarr[2*iang]
|
|
ai1 = csarr[2*iang+1]
|
|
iang += l
|
|
if iang >= ip:
|
|
iang -= ip
|
|
ar2 = csarr[2*iang]
|
|
ai2 = csarr[2*iang+1]
|
|
|
|
for ik in range(idl1):
|
|
CH2(ik,l )[0] += ar1*C2(ik,j )[0] + ar2*C2(ik,j +1)[0]
|
|
CH2(ik,lc)[0] += ai1*C2(ik,jc)[0] + ai2*C2(ik,jc-1)[0]
|
|
|
|
j += 2
|
|
jc -= 2
|
|
|
|
while j < ipph:
|
|
iang += l
|
|
if iang >= ip:
|
|
iang -= ip
|
|
ar=csarr[2*iang]
|
|
ai=csarr[2*iang+1]
|
|
|
|
for ik in range(idl1):
|
|
CH2(ik,l )[0] += ar*C2(ik,j )[0]
|
|
CH2(ik,lc)[0] += ai*C2(ik,jc)[0]
|
|
|
|
j += 1
|
|
jc -= 1
|
|
|
|
lc -= 1
|
|
|
|
for ik in range(idl1):
|
|
CH2(ik,0)[0] = C2(ik,0)[0]
|
|
|
|
for j in range(1, ipph):
|
|
for ik in range(idl1):
|
|
CH2(ik,0)[0] += C2(ik,j)[0]
|
|
|
|
for k in range(l1):
|
|
for i in range(ido):
|
|
CC(i,0,k)[0] = CH(i,k,0)[0]
|
|
|
|
jc = ip - 1
|
|
for j in range(1, ipph):
|
|
j2 = 2*j-1
|
|
for k in range(l1):
|
|
CC(ido-1,j2,k)[0] = CH(0,k,j)[0]
|
|
CC(0,j2+1,k)[0] = CH(0,k,jc)[0]
|
|
jc -= 1
|
|
|
|
if ido == 1:
|
|
return
|
|
|
|
jc = ip - 1
|
|
for j in range(1, ipph):
|
|
j2 = 2*j - 1
|
|
for k in range(l1):
|
|
i = 1
|
|
ic = ido - i - 2
|
|
while i <= ido - 2:
|
|
CC(i ,j2+1,k)[0] = CH(i ,k,j )[0]+CH(i ,k,jc)[0]
|
|
CC(ic ,j2 ,k)[0] = CH(i ,k,j )[0]-CH(i ,k,jc)[0]
|
|
CC(i+1 ,j2+1,k)[0] = CH(i+1,k,j )[0]+CH(i+1,k,jc)[0]
|
|
CC(ic+1,j2 ,k)[0] = CH(i+1,k,jc)[0]-CH(i+1,k,j )[0]
|
|
i += 2
|
|
ic -= 2
|
|
jc -= 1
|
|
|
|
def radb2(ido: int, l1: int, cc: Ptr[T0], ch: Ptr[T0], wa: Ptr[T0]):
|
|
@inline
|
|
def WA(x: int, i: int):
|
|
return wa[i+x*(ido-1)]
|
|
@inline
|
|
def CC(a: int, b: int, c: int):
|
|
return cc[a+ido*(b+2*c)]
|
|
@inline
|
|
def CH(a: int, b: int, c: int):
|
|
return ch + (a+ido*(b+l1*c))
|
|
|
|
for k in range(l1):
|
|
CH(0,k,0)[0], CH(0,k,1)[0] = PM(CC(0,0,k),CC(ido-1,1,k))
|
|
|
|
if ido & 1 == 0:
|
|
for k in range(l1):
|
|
CH(ido-1,k,0)[0] = T0(2)*CC(ido-1,0,k)
|
|
CH(ido-1,k,1)[0] = -T0(2)*CC(0 ,1,k)
|
|
|
|
if ido <= 2:
|
|
return
|
|
|
|
for k in range(l1):
|
|
for i in range(2, ido, 2):
|
|
ic = ido-i
|
|
CH(i-1,k,0)[0], tr2 = PM(CC(i-1,0,k),CC(ic-1,1,k))
|
|
ti2, CH(i ,k,0)[0] = PM(CC(i ,0,k), CC(ic ,1,k))
|
|
CH(i,k,1)[0], CH(i-1,k,1)[0] = MULPM(WA(0,i-2),WA(0,i-1),ti2,tr2)
|
|
|
|
def radb3(ido: int, l1: int, cc: Ptr[T0], ch: Ptr[T0], wa: Ptr[T0]):
|
|
@inline
|
|
def WA(x: int, i: int):
|
|
return wa[i+x*(ido-1)]
|
|
@inline
|
|
def CC(a: int, b: int, c: int):
|
|
return cc[a+ido*(b+3*c)]
|
|
@inline
|
|
def CH(a: int, b: int, c: int):
|
|
return ch + (a+ido*(b+l1*c))
|
|
|
|
taur = cast(-0.5, T0)
|
|
taui = cast(0.8660254037844386467637231707529362, T0)
|
|
|
|
for k in range(l1):
|
|
tr2=T0(2)*CC(ido-1,1,k)
|
|
cr2=CC(0,0,k)+taur*tr2
|
|
CH(0,k,0)[0]=CC(0,0,k)+tr2
|
|
ci3=T0(2)*taui*CC(0,2,k)
|
|
CH(0,k,2)[0], CH(0,k,1)[0] = PM(cr2,ci3)
|
|
|
|
if ido == 1:
|
|
return
|
|
|
|
for k in range(l1):
|
|
ic = ido - 2
|
|
for i in range(2, ido, 2):
|
|
tr2=CC(i-1,2,k)+CC(ic-1,1,k)
|
|
ti2=CC(i ,2,k)-CC(ic ,1,k)
|
|
cr2=CC(i-1,0,k)+taur*tr2
|
|
ci2=CC(i ,0,k)+taur*ti2
|
|
CH(i-1,k,0)[0]=CC(i-1,0,k)+tr2
|
|
CH(i ,k,0)[0]=CC(i ,0,k)+ti2
|
|
cr3=taui*(CC(i-1,2,k)-CC(ic-1,1,k))
|
|
ci3=taui*(CC(i ,2,k)+CC(ic ,1,k))
|
|
dr3,dr2=PM(cr2,ci3)
|
|
di2,di3=PM(ci2,cr3)
|
|
CH(i,k,1)[0],CH(i-1,k,1)[0]=MULPM(WA(0,i-2),WA(0,i-1),di2,dr2)
|
|
CH(i,k,2)[0],CH(i-1,k,2)[0]=MULPM(WA(1,i-2),WA(1,i-1),di3,dr3)
|
|
ic -= 2
|
|
|
|
def radb4(ido: int, l1: int, cc: Ptr[T0], ch: Ptr[T0], wa: Ptr[T0]):
|
|
@inline
|
|
def WA(x: int, i: int):
|
|
return wa[i+x*(ido-1)]
|
|
@inline
|
|
def CC(a: int, b: int, c: int):
|
|
return cc[a+ido*(b+4*c)]
|
|
@inline
|
|
def CH(a: int, b: int, c: int):
|
|
return ch + (a+ido*(b+l1*c))
|
|
|
|
sqrt2 = cast(1.414213562373095048801688724209698, T0)
|
|
|
|
for k in range(l1):
|
|
tr2,tr1 = PM(CC(0,0,k),CC(ido-1,3,k))
|
|
tr3=T0(2)*CC(ido-1,1,k)
|
|
tr4=T0(2)*CC(0,2,k)
|
|
CH(0,k,0)[0],CH(0,k,2)[0] = PM(tr2,tr3)
|
|
CH(0,k,3)[0],CH(0,k,1)[0] = PM(tr1,tr4)
|
|
|
|
if ido & 1 == 0:
|
|
for k in range(l1):
|
|
ti1,ti2 = PM(CC(0 ,3,k),CC(0 ,1,k))
|
|
tr2,tr1 = PM(CC(ido-1,0,k),CC(ido-1,2,k))
|
|
CH(ido-1,k,0)[0]=tr2+tr2
|
|
CH(ido-1,k,1)[0]=sqrt2*(tr1-ti1)
|
|
CH(ido-1,k,2)[0]=ti2+ti2
|
|
CH(ido-1,k,3)[0]=-sqrt2*(tr1+ti1)
|
|
|
|
if ido <= 2:
|
|
return
|
|
|
|
for k in range(l1):
|
|
for i in range(2, ido, 2):
|
|
ic=ido-i
|
|
tr2,tr1 = PM(CC(i-1,0,k),CC(ic-1,3,k))
|
|
ti1,ti2 = PM(CC(i ,0,k),CC(ic ,3,k))
|
|
tr4,ti3 = PM(CC(i ,2,k),CC(ic ,1,k))
|
|
tr3,ti4 = PM(CC(i-1,2,k),CC(ic-1,1,k))
|
|
CH(i-1,k,0)[0],cr3 = PM(tr2,tr3)
|
|
CH(i ,k,0)[0],ci3 = PM(ti2,ti3)
|
|
cr4,cr2 = PM (tr1,tr4)
|
|
ci2,ci4 = PM (ti1,ti4)
|
|
CH(i,k,1)[0],CH(i-1,k,1)[0] = MULPM(WA(0,i-2),WA(0,i-1),ci2,cr2)
|
|
CH(i,k,2)[0],CH(i-1,k,2)[0] = MULPM(WA(1,i-2),WA(1,i-1),ci3,cr3)
|
|
CH(i,k,3)[0],CH(i-1,k,3)[0] = MULPM(WA(2,i-2),WA(2,i-1),ci4,cr4)
|
|
|
|
def radb5(ido: int, l1: int, cc: Ptr[T0], ch: Ptr[T0], wa: Ptr[T0]):
|
|
@inline
|
|
def WA(x: int, i: int):
|
|
return wa[i+x*(ido-1)]
|
|
@inline
|
|
def CC(a: int, b: int, c: int):
|
|
return cc[a+ido*(b+5*c)]
|
|
@inline
|
|
def CH(a: int, b: int, c: int):
|
|
return ch + (a+ido*(b+l1*c))
|
|
|
|
tr11 = cast(0.3090169943749474241022934171828191, T0)
|
|
ti11 = cast(0.9510565162951535721164393333793821, T0)
|
|
tr12 = cast(-0.8090169943749474241022934171828191, T0)
|
|
ti12 = cast(0.5877852522924731291687059546390728, T0)
|
|
|
|
for k in range(l1):
|
|
ti5=CC(0,2,k)+CC(0,2,k)
|
|
ti4=CC(0,4,k)+CC(0,4,k)
|
|
tr2=CC(ido-1,1,k)+CC(ido-1,1,k)
|
|
tr3=CC(ido-1,3,k)+CC(ido-1,3,k)
|
|
CH(0,k,0)[0]=CC(0,0,k)+tr2+tr3
|
|
cr2=CC(0,0,k)+tr11*tr2+tr12*tr3
|
|
cr3=CC(0,0,k)+tr12*tr2+tr11*tr3
|
|
ci5,ci4=MULPM(ti5,ti4,ti11,ti12)
|
|
CH(0,k,4)[0],CH(0,k,1)[0]=PM(cr2,ci5)
|
|
CH(0,k,3)[0],CH(0,k,2)[0]=PM(cr3,ci4)
|
|
|
|
if ido == 1:
|
|
return
|
|
|
|
for k in range(l1):
|
|
ic = ido-2
|
|
for i in range(2, ido, 2):
|
|
tr2,tr5=PM(CC(i-1,2,k),CC(ic-1,1,k))
|
|
ti5,ti2=PM(CC(i ,2,k),CC(ic ,1,k))
|
|
tr3,tr4=PM(CC(i-1,4,k),CC(ic-1,3,k))
|
|
ti4,ti3=PM(CC(i ,4,k),CC(ic ,3,k))
|
|
CH(i-1,k,0)[0]=CC(i-1,0,k)+tr2+tr3
|
|
CH(i ,k,0)[0]=CC(i ,0,k)+ti2+ti3
|
|
cr2=CC(i-1,0,k)+tr11*tr2+tr12*tr3
|
|
ci2=CC(i ,0,k)+tr11*ti2+tr12*ti3
|
|
cr3=CC(i-1,0,k)+tr12*tr2+tr11*tr3
|
|
ci3=CC(i ,0,k)+tr12*ti2+tr11*ti3
|
|
cr5,cr4=MULPM(tr5,tr4,ti11,ti12)
|
|
ci5,ci4=MULPM(ti5,ti4,ti11,ti12)
|
|
dr4,dr3=PM(cr3,ci4)
|
|
di3,di4=PM(ci3,cr4)
|
|
dr5,dr2=PM(cr2,ci5)
|
|
di2,di5=PM(ci2,cr5)
|
|
CH(i,k,1)[0],CH(i-1,k,1)[0]=MULPM(WA(0,i-2),WA(0,i-1),di2,dr2)
|
|
CH(i,k,2)[0],CH(i-1,k,2)[0]=MULPM(WA(1,i-2),WA(1,i-1),di3,dr3)
|
|
CH(i,k,3)[0],CH(i-1,k,3)[0]=MULPM(WA(2,i-2),WA(2,i-1),di4,dr4)
|
|
CH(i,k,4)[0],CH(i-1,k,4)[0]=MULPM(WA(3,i-2),WA(3,i-1),di5,dr5)
|
|
|
|
ic -= 2
|
|
|
|
def radbg(ido: int, ip: int, l1: int, cc: Ptr[T0], ch: Ptr[T0], wa: Ptr[T0], csarr: Ptr[T0]):
|
|
cdim = ip
|
|
ipph = (ip + 1) // 2
|
|
idl1 = ido * l1
|
|
|
|
@inline
|
|
def CC(a: int, b: int, c: int):
|
|
return cc[a+ido*(b+cdim*c)]
|
|
@inline
|
|
def CH(a: int, b: int, c: int):
|
|
return ch + (a+ido*(b+l1*c))
|
|
@inline
|
|
def C1(a: int, b: int, c: int):
|
|
return cc + (a+ido*(b+l1*c))
|
|
@inline
|
|
def C2(a: int, b: int):
|
|
return cc + (a+idl1*b)
|
|
@inline
|
|
def CH2(a: int, b: int):
|
|
return ch + a+idl1*b
|
|
|
|
for k in range(l1):
|
|
for i in range(ido):
|
|
CH(i,k,0)[0] = CC(i,0,k)
|
|
|
|
jc=ip-1
|
|
for j in range(1, ipph):
|
|
j2 = 2*j - 1
|
|
for k in range(l1):
|
|
CH(0,k,j )[0] = T0(2)*CC(ido-1,j2,k)
|
|
CH(0,k,jc)[0] = T0(2)*CC(0,j2+1,k)
|
|
jc -= 1
|
|
|
|
if ido != -1:
|
|
jc = ip-1
|
|
for j in range(1, ipph):
|
|
j2 = 2*j - 1
|
|
for k in range(l1):
|
|
i=1
|
|
ic=ido-i-2
|
|
while i <= ido - 2:
|
|
CH(i ,k,j )[0] = CC(i ,j2+1,k)+CC(ic ,j2,k)
|
|
CH(i ,k,jc)[0] = CC(i ,j2+1,k)-CC(ic ,j2,k)
|
|
CH(i+1,k,j )[0] = CC(i+1,j2+1,k)-CC(ic+1,j2,k)
|
|
CH(i+1,k,jc)[0] = CC(i+1,j2+1,k)+CC(ic+1,j2,k)
|
|
i += 2
|
|
ic -= 2
|
|
jc -= 1
|
|
|
|
lc = ip-1
|
|
for l in range(1, ipph):
|
|
for ik in range(idl1):
|
|
C2(ik,l )[0] = CH2(ik,0)[0]+csarr[2*l]*CH2(ik,1)[0]+csarr[4*l]*CH2(ik,2)[0]
|
|
C2(ik,lc)[0] = csarr[2*l+1]*CH2(ik,ip-1)[0]+csarr[4*l+1]*CH2(ik,ip-2)[0]
|
|
iang = 2*l
|
|
j=3
|
|
jc=ip-3
|
|
while j < ipph - 3:
|
|
iang+=l
|
|
if iang>ip:
|
|
iang-=ip
|
|
ar1=csarr[2*iang]
|
|
ai1=csarr[2*iang+1]
|
|
iang+=l
|
|
if iang>ip:
|
|
iang-=ip
|
|
ar2=csarr[2*iang]
|
|
ai2=csarr[2*iang+1]
|
|
iang+=l
|
|
if iang>ip:
|
|
iang-=ip
|
|
ar3=csarr[2*iang]
|
|
ai3=csarr[2*iang+1]
|
|
iang+=l
|
|
if iang>ip:
|
|
iang-=ip
|
|
ar4=csarr[2*iang]
|
|
ai4=csarr[2*iang+1]
|
|
for ik in range(idl1):
|
|
C2(ik,l )[0] += ar1*CH2(ik,j )[0]+ar2*CH2(ik,j +1)[0] + ar3*CH2(ik,j +2)[0]+ar4*CH2(ik,j +3)[0]
|
|
C2(ik,lc)[0] += ai1*CH2(ik,jc)[0]+ai2*CH2(ik,jc-1)[0] + ai3*CH2(ik,jc-2)[0]+ai4*CH2(ik,jc-3)[0]
|
|
j += 4
|
|
jc -= 4
|
|
|
|
while j < ipph - 1:
|
|
iang+=l
|
|
if iang>ip:
|
|
iang-=ip
|
|
ar1=csarr[2*iang]
|
|
ai1=csarr[2*iang+1]
|
|
iang+=l
|
|
if iang>ip:
|
|
iang-=ip
|
|
ar2=csarr[2*iang]
|
|
ai2=csarr[2*iang+1]
|
|
for ik in range(idl1):
|
|
C2(ik,l )[0] += ar1*CH2(ik,j )[0]+ar2*CH2(ik,j +1)[0]
|
|
C2(ik,lc)[0] += ai1*CH2(ik,jc)[0]+ai2*CH2(ik,jc-1)[0]
|
|
j += 2
|
|
jc -= 2
|
|
|
|
while j < ipph:
|
|
iang+=l
|
|
if iang>ip:
|
|
iang-=ip
|
|
war=csarr[2*iang]
|
|
wai=csarr[2*iang+1]
|
|
for ik in range(idl1):
|
|
C2(ik,l )[0] += war*CH2(ik,j )[0]
|
|
C2(ik,lc)[0] += wai*CH2(ik,jc)[0]
|
|
j += 1
|
|
jc -= 1
|
|
|
|
lc -= 1
|
|
|
|
for j in range(1, ipph):
|
|
for ik in range(idl1):
|
|
CH2(ik,0)[0] += CH2(ik,j)[0]
|
|
|
|
jc = ip-1
|
|
for j in range(1, ipph):
|
|
for k in range(l1):
|
|
CH(0,k,jc)[0],CH(0,k,j)[0] = PM(C1(0,k,j)[0],C1(0,k,jc)[0])
|
|
jc -= 1
|
|
|
|
if ido == 1:
|
|
return
|
|
|
|
jc = ip - 1
|
|
for j in range(1, ipph):
|
|
for k in range(l1):
|
|
i = 1
|
|
while i <= ido - 2:
|
|
CH(i ,k,j )[0] = C1(i ,k,j)[0]-C1(i+1,k,jc)[0]
|
|
CH(i ,k,jc)[0] = C1(i ,k,j)[0]+C1(i+1,k,jc)[0]
|
|
CH(i+1,k,j )[0] = C1(i+1,k,j)[0]+C1(i ,k,jc)[0]
|
|
CH(i+1,k,jc)[0] = C1(i+1,k,j)[0]-C1(i ,k,jc)[0]
|
|
i += 2
|
|
jc -= 1
|
|
|
|
for j in range(1, ip):
|
|
is_ = (j-1)*(ido-1)
|
|
for k in range(l1):
|
|
idij = is_
|
|
i = 1
|
|
while i <= ido - 2:
|
|
t1=CH(i,k,j)[0]
|
|
t2=CH(i+1,k,j)[0]
|
|
CH(i ,k,j)[0] = wa[idij]*t1-wa[idij+1]*t2
|
|
CH(i+1,k,j)[0] = wa[idij]*t2+wa[idij+1]*t1
|
|
idij+=2
|
|
i += 2
|
|
|
|
def copy_and_norm(self, c: Ptr[T0], p1: Ptr[T0], fct: T0):
|
|
if p1 != c:
|
|
if fct != cast(1, T0):
|
|
for i in range(self.length):
|
|
c[i] = fct * p1[i]
|
|
else:
|
|
str.memcpy(c.as_byte(), p1.as_byte(), self.length * sizeof(T0))
|
|
else:
|
|
if fct != cast(1, T0):
|
|
for i in range(self.length):
|
|
c[i] *= fct
|
|
|
|
def exec(self, c: Ptr[T0], fct: T0, r2hc: bool):
|
|
length = self.length
|
|
|
|
if length == 1:
|
|
c[0] *= fct
|
|
return
|
|
|
|
fact = self.fact
|
|
nf = len(fact)
|
|
ch = arr[T0](length)
|
|
p1 = c
|
|
p2 = ch.data()
|
|
|
|
if r2hc:
|
|
l1 = length
|
|
for k1 in range(nf):
|
|
k = nf-k1-1
|
|
ip = fact[k].fct
|
|
ido = length // l1
|
|
l1 //= ip
|
|
if ip == 4:
|
|
type(self).radf4(ido, l1, p1, p2, fact[k].tw)
|
|
elif ip == 2:
|
|
type(self).radf2(ido, l1, p1, p2, fact[k].tw)
|
|
elif ip == 3:
|
|
type(self).radf3(ido, l1, p1, p2, fact[k].tw)
|
|
elif ip == 5:
|
|
type(self).radf5(ido, l1, p1, p2, fact[k].tw)
|
|
else:
|
|
type(self).radfg(ido, ip, l1, p1, p2, fact[k].tw, fact[k].tws)
|
|
p1, p2 = p2, p1
|
|
p1, p2 = p2, p1
|
|
else:
|
|
l1 = 1
|
|
for k in range(nf):
|
|
ip = fact[k].fct
|
|
ido = length // (ip * l1)
|
|
if ip == 4:
|
|
type(self).radb4(ido, l1, p1, p2, fact[k].tw)
|
|
elif ip == 2:
|
|
type(self).radb2(ido, l1, p1, p2, fact[k].tw)
|
|
elif ip == 3:
|
|
type(self).radb3(ido, l1, p1, p2, fact[k].tw)
|
|
elif ip == 5:
|
|
type(self).radb5(ido, l1, p1, p2, fact[k].tw)
|
|
else:
|
|
type(self).radbg(ido, ip, l1, p1, p2, fact[k].tw, fact[k].tws)
|
|
p1, p2 = p2, p1
|
|
l1 *= ip
|
|
|
|
self.copy_and_norm(c, p1, fct)
|
|
ch.dealloc()
|
|
|
|
def factorize(self):
|
|
n = self.length
|
|
fact = self.fact
|
|
|
|
while n % 4 == 0:
|
|
self.add_factor(4)
|
|
n >>= 2
|
|
|
|
if n % 2 == 0:
|
|
n >>= 1
|
|
self.add_factor(2)
|
|
fct_front = fact[0].fct
|
|
fct_back = fact[-1].fct
|
|
fact[0] = fact[0].with_fct(fct_back)
|
|
fact[-1] = fact[-1].with_fct(fct_front)
|
|
|
|
divisor = 3
|
|
while divisor * divisor <= n:
|
|
while n % divisor == 0:
|
|
self.add_factor(divisor)
|
|
n //= divisor
|
|
divisor += 2
|
|
|
|
if n > 1:
|
|
self.add_factor(n)
|
|
|
|
def twsize(self):
|
|
length = self.length
|
|
fact = self.fact
|
|
twsz = 0
|
|
l1 = 1
|
|
for k in range(len(fact)):
|
|
ip = fact[k].fct
|
|
ido = length // (l1*ip)
|
|
twsz += (ip-1)*(ido-1)
|
|
if ip > 5:
|
|
twsz += 2*ip
|
|
l1 *= ip
|
|
return twsz
|
|
|
|
def comp_twiddle(self):
|
|
length = self.length
|
|
fact = self.fact
|
|
twid = sincos_2pibyn[T0](self.length)
|
|
l1 = 1
|
|
p = self.mem.data()
|
|
|
|
for k in range(len(fact)):
|
|
ip = fact[k].fct
|
|
ido = length // (l1*ip)
|
|
|
|
if k < len(fact) - 1:
|
|
fact[k] = fact[k].with_tw(p)
|
|
p += (ip-1)*(ido-1)
|
|
|
|
for j in range(1, ip):
|
|
i = 1
|
|
while i <= (ido-1)//2:
|
|
fact[k].tw[(j-1)*(ido-1)+2*i-2] = twid[j*l1*i].real
|
|
fact[k].tw[(j-1)*(ido-1)+2*i-1] = twid[j*l1*i].imag
|
|
i += 1
|
|
|
|
if ip > 5:
|
|
fact[k] = fact[k].with_tws(p)
|
|
p += 2*ip
|
|
fact[k].tws[0] = cast(1, T0)
|
|
fact[k].tws[1] = cast(0, T0)
|
|
i = 2
|
|
ic = 2*ip-2
|
|
while i <= ic:
|
|
fact[k].tws[i ] = twid[i//2*(length//ip)].real
|
|
fact[k].tws[i+1] = twid[i//2*(length//ip)].imag
|
|
fact[k].tws[ic] = twid[i//2*(length//ip)].real
|
|
fact[k].tws[ic+1] = -twid[i//2*(length//ip)].imag
|
|
i += 2
|
|
ic -= 2
|
|
|
|
l1 *= ip
|
|
|
|
twid.dealloc()
|
|
|
|
def __init__(self, length: int):
|
|
if length == 0:
|
|
raise ValueError("Invalid number of FFT data points (0) specified.")
|
|
self.length = length
|
|
self.fact = []
|
|
if length != 1:
|
|
self.factorize()
|
|
self.mem = arr[T0](self.twsize())
|
|
self.comp_twiddle()
|
|
|
|
def dealloc(self):
|
|
self.mem.dealloc()
|
|
|
|
class fftblue:
|
|
n: int
|
|
n2: int
|
|
plan: cfftp[T,T0]
|
|
mem: arr[T]
|
|
bk: Ptr[T]
|
|
bkf: Ptr[T]
|
|
T: type
|
|
T0: type
|
|
|
|
def fft(self, c: Ptr[T], fct: T0, fwd: bool):
|
|
n = self.n
|
|
n2 = self.n2
|
|
plan = self.plan
|
|
bk = self.bk
|
|
bkf = self.bkf
|
|
|
|
akf = arr[T](n2)
|
|
|
|
for m in range(n):
|
|
akf[m] = _special_mul(c[m], bk[m], fwd)
|
|
|
|
zero = akf[0] * cast(0, T0)
|
|
for m in range(n, n2):
|
|
akf[m] = zero
|
|
|
|
plan.exec(akf.data(), cast(1, T0), True)
|
|
|
|
akf[0] = _special_mul(akf[0], bkf[0], not fwd)
|
|
for m in range(1, (n2 + 1) // 2):
|
|
akf[m] = _special_mul(akf[m], bkf[m], not fwd)
|
|
akf[n2-m] = _special_mul(akf[n2-m], bkf[m], not fwd)
|
|
|
|
if n2 & 1 == 0:
|
|
akf[n2//2] = _special_mul(akf[n2//2], bkf[n2//2], not fwd)
|
|
|
|
plan.exec(akf.data(), cast(1, T0), False)
|
|
|
|
for m in range(n):
|
|
c[m] = _special_mul(akf[m], bk[m], fwd) * fct
|
|
|
|
akf.dealloc()
|
|
|
|
def __init__(self, length: int):
|
|
n = length
|
|
self.n = n
|
|
self.n2 = _good_size_cmplx(n*2-1)
|
|
n2 = self.n2
|
|
self.plan = cfftp[T,T0](n2)
|
|
self.mem = arr[T](n+n2//2+1)
|
|
self.bk = self.mem.data()
|
|
self.bkf = self.mem.data() + n
|
|
|
|
plan = self.plan
|
|
mem = self.mem
|
|
bk = self.bk
|
|
bkf = self.bkf
|
|
|
|
tmp = sincos_2pibyn[T0](2*n)
|
|
bk[0] = T(cast(1, T0), cast(0, T0))
|
|
|
|
coeff = 0
|
|
for m in range(1, n):
|
|
coeff += 2*m - 1
|
|
if coeff >= 2*n:
|
|
coeff -= 2*n
|
|
bk[m] = tmp[coeff]
|
|
|
|
tbkf = arr[T](n2)
|
|
xn2 = cast(1, T0) / cast(n2, T0)
|
|
tbkf[0] = bk[0]*xn2
|
|
|
|
for m in range(1, n):
|
|
x = bk[m]*xn2
|
|
tbkf[m] = x
|
|
tbkf[n2-m] = x
|
|
|
|
m = n
|
|
while m <= n2-n:
|
|
tbkf[m] = T(cast(0, T0), cast(0, T0))
|
|
m += 1
|
|
|
|
plan.exec(tbkf.data(), cast(1, T0), True)
|
|
|
|
for i in range(n2//2 + 1):
|
|
bkf[i] = tbkf[i]
|
|
|
|
tbkf.dealloc()
|
|
tmp.dealloc()
|
|
|
|
def exec(self, c: Ptr[T], fct: T0, fwd: bool):
|
|
return self.fft(c, fct, fwd)
|
|
|
|
def exec_r(self, c: Ptr[T0], fct: T0, fwd: bool):
|
|
n = self.n
|
|
tmp = arr[T](n)
|
|
if fwd:
|
|
zero = cast(0, T0) * c[0]
|
|
for m in range(n):
|
|
tmp[m] = T(c[m], zero)
|
|
self.fft(tmp.data(), fct, True)
|
|
c[0] = tmp[0].real
|
|
str.memcpy((c + 1).as_byte(), (tmp.data() + 1).as_byte(), (n-1) * sizeof(T0))
|
|
else:
|
|
tmp[0] = T(c[0], c[0] * cast(0, T0))
|
|
str.memcpy((tmp.data() + 1).as_byte(), (c + 1).as_byte(), (n-1) * sizeof(T0))
|
|
if n & 1 == 0:
|
|
tmp[n//2] = T(tmp[n//2].real, cast(0, T0) * c[0])
|
|
m = 1
|
|
while 2*m < n:
|
|
tmp[n-m] = T(tmp[m].real, -tmp[m].imag)
|
|
m += 1
|
|
self.fft(tmp.data(), fct, False)
|
|
for m in range(n):
|
|
c[m] = tmp[m].real
|
|
tmp.dealloc()
|
|
|
|
def dealloc(self):
|
|
self.plan.dealloc()
|
|
self.mem.dealloc()
|
|
|
|
class pocketfft_c:
|
|
packplan: Optional[cfftp[T,T0]]
|
|
blueplan: Optional[fftblue[T,T0]]
|
|
length: int
|
|
T: type
|
|
T0: type
|
|
|
|
def __init__(self, length: int):
|
|
self.packplan = None
|
|
self.blueplan = None
|
|
self.length = length
|
|
|
|
if length == 0:
|
|
raise ValueError("Invalid number of FFT data points (0) specified.")
|
|
|
|
tmp = 0 if length < 50 else _largest_prime_factor(length)
|
|
if tmp * tmp <= length:
|
|
self.packplan = cfftp[T,T0](length)
|
|
return
|
|
|
|
comp1 = _cost_guess(length)
|
|
comp2 = 2*_cost_guess(_good_size_cmplx(2*length - 1))
|
|
comp2 *= 1.5
|
|
|
|
if comp2 < comp1:
|
|
self.blueplan = fftblue[T,T0](length)
|
|
else:
|
|
self.packplan = cfftp[T,T0](length)
|
|
|
|
def exec(self, c: Ptr[T], fct: T0, fwd: bool):
|
|
if self.packplan is not None:
|
|
self.packplan.exec(c, fct, fwd)
|
|
self.packplan.dealloc()
|
|
else:
|
|
self.blueplan.exec(c, fct, fwd)
|
|
self.blueplan.dealloc()
|
|
|
|
class pocketfft_r:
|
|
packplan: Optional[rfftp[T,T0]]
|
|
blueplan: Optional[fftblue[T,T0]]
|
|
length: int
|
|
T: type
|
|
T0: type
|
|
|
|
def __init__(self, length: int):
|
|
self.packplan = None
|
|
self.blueplan = None
|
|
self.length = length
|
|
|
|
if length == 0:
|
|
raise ValueError("Invalid number of FFT data points (0) specified.")
|
|
|
|
tmp = 0 if length < 50 else _largest_prime_factor(length)
|
|
if tmp * tmp <= length:
|
|
self.packplan = rfftp[T,T0](length)
|
|
return
|
|
|
|
comp1 = 0.5*_cost_guess(length)
|
|
comp2 = 2*_cost_guess(_good_size_cmplx(2*length - 1))
|
|
comp2 *= 1.5
|
|
|
|
if comp2 < comp1:
|
|
self.blueplan = fftblue[T,T0](length)
|
|
else:
|
|
self.packplan = rfftp[T,T0](length)
|
|
|
|
def exec(self, c: Ptr[T0], fct: T0, fwd: bool):
|
|
if self.packplan is not None:
|
|
self.packplan.exec(c, fct, fwd)
|
|
self.packplan.dealloc()
|
|
else:
|
|
self.blueplan.exec_r(c, fct, fwd)
|
|
self.blueplan.dealloc()
|
|
|
|
def _bad_norm(norm: str):
|
|
raise ValueError(f'Invalid norm value {repr(norm)}; should be "backward", "ortho" or "forward".')
|
|
|
|
def _bad_length(n: int):
|
|
raise ValueError(f"Invalid number of FFT data points ({n}) specified.")
|
|
|
|
def _swap_direction(norm: Optional[str]) -> Optional[str]:
|
|
if norm is None:
|
|
return "forward"
|
|
if norm == "backward":
|
|
return "forward"
|
|
if norm == "ortho":
|
|
return "ortho"
|
|
if norm == "forward":
|
|
return "backward"
|
|
_bad_norm(norm)
|
|
|
|
def _fft(v: Ptr[T], n: int, is_forward: bool, norm: Optional[str], T: type):
|
|
if n < 1:
|
|
_bad_length(n)
|
|
|
|
if not is_forward:
|
|
norm = _swap_direction(norm)
|
|
|
|
real_dtype = type(T().real)
|
|
fct = cast(0, real_dtype)
|
|
|
|
if norm is None or norm == "backward":
|
|
fct = cast(1, real_dtype)
|
|
elif norm == "ortho":
|
|
fct = cast(1, real_dtype) / sqrt(cast(n, real_dtype))
|
|
elif norm == "forward":
|
|
fct = cast(1, real_dtype) / cast(n, real_dtype)
|
|
else:
|
|
_bad_norm(norm)
|
|
|
|
plan = pocketfft_c(n)
|
|
plan.exec(v, fct, is_forward)
|
|
|
|
def fft(v: Ptr[T], n: int, norm: Optional[str], T: type):
|
|
return _fft(v, n, is_forward=True, norm=norm)
|
|
|
|
def ifft(v: Ptr[T], n: int, norm: Optional[str], T: type):
|
|
return _fft(v, n, is_forward=False, norm=norm)
|
|
|
|
def rfft(v: Ptr[T0], n: int, is_forward: bool, norm: Optional[str], o: Ptr[T], T: type, T0: type):
|
|
if n < 1:
|
|
_bad_length(n)
|
|
|
|
real_dtype = type(T().real)
|
|
fct = cast(0, real_dtype)
|
|
|
|
if norm is None or norm == "backward":
|
|
fct = cast(1, real_dtype)
|
|
elif norm == "ortho":
|
|
fct = cast(1, real_dtype) / sqrt(cast(n, real_dtype))
|
|
elif norm == "forward":
|
|
fct = cast(1, real_dtype) / cast(n, real_dtype)
|
|
else:
|
|
_bad_norm(norm)
|
|
|
|
plan = pocketfft_r[T, T0](n)
|
|
plan.exec(v, fct, True)
|
|
|
|
i = 1
|
|
ii = 1
|
|
o[0] = T(v[0], cast(0, T0))
|
|
|
|
if is_forward:
|
|
while i < n - 1:
|
|
o[ii] = T(v[i], v[i+1])
|
|
i += 2
|
|
ii += 1
|
|
else:
|
|
while i < n - 1:
|
|
o[ii] = T(v[i], -v[i+1])
|
|
i += 2
|
|
ii += 1
|
|
|
|
if i < n:
|
|
o[ii] = T(v[i], cast(0, T0))
|
|
|
|
def irfft(v: Ptr[T], n: int, norm: Optional[str], o: Ptr[T0], T: type, T0: type):
|
|
if n < 1:
|
|
_bad_length(n)
|
|
|
|
real_dtype = type(T().real)
|
|
fct = cast(0, real_dtype)
|
|
norm = _swap_direction(norm)
|
|
|
|
if norm is None or norm == "backward":
|
|
fct = cast(1, real_dtype)
|
|
elif norm == "ortho":
|
|
fct = cast(1, real_dtype) / sqrt(cast(n, real_dtype))
|
|
elif norm == "forward":
|
|
fct = cast(1, real_dtype) / cast(n, real_dtype)
|
|
else:
|
|
_bad_norm(norm)
|
|
|
|
plan = pocketfft_r[T, T0](n)
|
|
|
|
i = 1
|
|
ii = 1
|
|
o[0] = v[0].real
|
|
is_forward = False # hardcoded, but kept for reference
|
|
|
|
if is_forward:
|
|
while i < n - 1:
|
|
o[i] = v[ii].real
|
|
o[i+1] = -v[ii].imag
|
|
i += 2
|
|
ii += 1
|
|
else:
|
|
while i < n - 1:
|
|
o[i] = v[ii].real
|
|
o[i+1] = v[ii].imag
|
|
i += 2
|
|
ii += 1
|
|
|
|
if i < n:
|
|
o[i] = v[ii].real
|
|
|
|
plan.exec(o, fct, False)
|