You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
438 lines
9.7 KiB
438 lines
9.7 KiB
5 years ago
|
#ifdef __AVR__
|
||
|
;-----------------------------------------------------------------------------;
|
||
|
; Fixed-point FFT routines for megaAVRs (C)ChaN, 2005
|
||
|
;-----------------------------------------------------------------------------;
|
||
|
; * This program is opened under license policy of following trems.
|
||
|
;
|
||
|
; Copyright (C) 2005, ChaN, all right reserved.
|
||
|
;
|
||
|
; * This program is a free software and there is NO WARRANTY.
|
||
|
; * No restriction on use. You can use, modify and redistribute it for
|
||
|
; personal, non-profit or commercial use UNDER YOUR RESPONSIBILITY.
|
||
|
; * Redistributions of source code must retain the above copyright notice.
|
||
|
;
|
||
|
;-----------------------------------------------------------------------------;
|
||
|
;
|
||
|
; void fft_input (const int16_t *array_src, complex_t *array_bfly);
|
||
|
; void fft_execute (complex_t *array_bfly);
|
||
|
; void fft_output (complex_t *array_bfly, uint16_t *array_dst);
|
||
|
;
|
||
|
; <array_src>: Wave form to be processed.
|
||
|
; <array_bfly>: Complex array for butterfly operations.
|
||
|
; <array_dst>: Spectrum output buffer.
|
||
|
;
|
||
|
; These functions must be called in sequence to do a DFT in FFT algorithm.
|
||
|
; fft_input() fills the complex array with a wave form to prepare butterfly
|
||
|
; operations. A hamming window is applied at the same time.
|
||
|
; fft_execute() executes the butterfly operations.
|
||
|
; fft_output() re-orders the results, converts the complex spectrum into
|
||
|
; scalar spectrum and output it in linear scale.
|
||
|
;
|
||
|
;----------------------------------------------------------------------------;
|
||
|
; THIS CODE HAS BEEN MODIFIED FROM ITS ORIGINAL FORM - it is pared down
|
||
|
; to handle 64 points only, for the Circuit Playground microphone library.
|
||
|
; Original FFFT code can be found on GitHub.
|
||
|
;----------------------------------------------------------------------------;
|
||
|
|
||
|
.list
|
||
|
|
||
|
#define FFT_N 64
|
||
|
#define FFT_B 6
|
||
|
|
||
|
#define T0L r0
|
||
|
#define T0H r1
|
||
|
#define T2L r2
|
||
|
#define T2H r3
|
||
|
#define T4L r4
|
||
|
#define T4H r5
|
||
|
#define T6L r6
|
||
|
#define T6H r7
|
||
|
#define T8L r8
|
||
|
#define T8H r9
|
||
|
#define T10L r10
|
||
|
#define T10H r11
|
||
|
#define T12L r12
|
||
|
#define T12H r13
|
||
|
#define T14L r14
|
||
|
#define T14H r15
|
||
|
#define AL r16
|
||
|
#define AH r17
|
||
|
#define BL r18
|
||
|
#define BH r19
|
||
|
#define CL r20
|
||
|
#define CH r21
|
||
|
#define DL r22
|
||
|
#define DH r23
|
||
|
#define EL r24
|
||
|
#define EH r25
|
||
|
#define XL r26
|
||
|
#define XH r27
|
||
|
#define YL r28
|
||
|
#define YH r29
|
||
|
#define ZL r30
|
||
|
#define ZH r31
|
||
|
|
||
|
.macro ldiw dh,dl, abs
|
||
|
ldi \dl, lo8(\abs)
|
||
|
ldi \dh, hi8(\abs)
|
||
|
.endm
|
||
|
|
||
|
.macro subiw dh,dl, abs
|
||
|
subi \dl, lo8(\abs)
|
||
|
sbci \dh, hi8(\abs)
|
||
|
.endm
|
||
|
|
||
|
.macro addw dh,dl, sh,sl
|
||
|
add \dl, \sl
|
||
|
adc \dh, \sh
|
||
|
.endm
|
||
|
|
||
|
.macro addd d3,d2,d1,d0, s3,s2,s1,s0
|
||
|
add \d0, \s0
|
||
|
adc \d1, \s1
|
||
|
adc \d2, \s2
|
||
|
adc \d3, \s3
|
||
|
.endm
|
||
|
|
||
|
.macro subw dh,dl, sh,sl
|
||
|
sub \dl, \sl
|
||
|
sbc \dh, \sh
|
||
|
.endm
|
||
|
|
||
|
.macro subd d3,d2,d1,d0, s3,s2,s1,s0
|
||
|
sub \d0, \s0
|
||
|
sbc \d1, \s1
|
||
|
sbc \d2, \s2
|
||
|
sbc \d3, \s3
|
||
|
.endm
|
||
|
|
||
|
.macro lddw dh,dl, src
|
||
|
ldd \dl, \src
|
||
|
ldd \dh, \src+1
|
||
|
.endm
|
||
|
|
||
|
.macro ldw dh,dl, src
|
||
|
ld \dl, \src
|
||
|
ld \dh, \src
|
||
|
.endm
|
||
|
|
||
|
.macro stw dst, sh,sl
|
||
|
st \dst, \sl
|
||
|
st \dst, \sh
|
||
|
.endm
|
||
|
|
||
|
.macro clrw dh, dl
|
||
|
clr \dh
|
||
|
clr \dl
|
||
|
.endm
|
||
|
|
||
|
.macro lsrw dh, dl
|
||
|
lsr \dh
|
||
|
ror \dl
|
||
|
.endm
|
||
|
|
||
|
.macro asrw dh, dl
|
||
|
asr \dh
|
||
|
ror \dl
|
||
|
.endm
|
||
|
|
||
|
.macro lslw dh, dl
|
||
|
lsl \dl
|
||
|
rol \dh
|
||
|
.endm
|
||
|
|
||
|
.macro pushw dh, dl
|
||
|
push \dh
|
||
|
push \dl
|
||
|
.endm
|
||
|
|
||
|
.macro popw dh, dl
|
||
|
pop \dl
|
||
|
pop \dh
|
||
|
.endm
|
||
|
|
||
|
.macro lpmw dh,dl, src
|
||
|
lpm \dl, \src
|
||
|
lpm \dh, \src
|
||
|
.endm
|
||
|
|
||
|
.macro rjne lbl
|
||
|
breq 99f
|
||
|
rjmp \lbl
|
||
|
99:
|
||
|
.endm
|
||
|
|
||
|
.macro FMULS16 d3,d2,d1,d0 ,s1h,s1l, s2h,s2l ;Fractional Multiply (19clk)
|
||
|
fmuls \s1h, \s2h
|
||
|
movw \d2, T0L
|
||
|
fmul \s1l, \s2l
|
||
|
movw \d0, T0L
|
||
|
adc \d2, EH ;EH: zero reg.
|
||
|
fmulsu \s1h, \s2l
|
||
|
sbc \d3, EH
|
||
|
add \d1, T0L
|
||
|
adc \d2, T0H
|
||
|
adc \d3, EH
|
||
|
fmulsu \s2h, \s1l
|
||
|
sbc \d3, EH
|
||
|
add \d1, T0L
|
||
|
adc \d2, T0H
|
||
|
adc \d3, EH
|
||
|
.endm
|
||
|
|
||
|
.macro SQRT32 ; 32bit square root (526..542clk)
|
||
|
clr T6L
|
||
|
clr T6H
|
||
|
clr T8L
|
||
|
clr T8H
|
||
|
ldi BL, 1
|
||
|
ldi BH, 0
|
||
|
clr CL
|
||
|
clr CH
|
||
|
ldi DH, 16
|
||
|
90: lsl T2L
|
||
|
rol T2H
|
||
|
rol T4L
|
||
|
rol T4H
|
||
|
rol T6L
|
||
|
rol T6H
|
||
|
rol T8L
|
||
|
rol T8H
|
||
|
lsl T2L
|
||
|
rol T2H
|
||
|
rol T4L
|
||
|
rol T4H
|
||
|
rol T6L
|
||
|
rol T6H
|
||
|
rol T8L
|
||
|
rol T8H
|
||
|
brpl 91f
|
||
|
add T6L, BL
|
||
|
adc T6H, BH
|
||
|
adc T8L, CL
|
||
|
adc T8H, CH
|
||
|
rjmp 92f
|
||
|
91: sub T6L, BL
|
||
|
sbc T6H, BH
|
||
|
sbc T8L, CL
|
||
|
sbc T8H, CH
|
||
|
92: lsl BL
|
||
|
rol BH
|
||
|
rol CL
|
||
|
andi BL, 0b11111000
|
||
|
ori BL, 0b00000101
|
||
|
sbrc T8H, 7
|
||
|
subi BL, 2
|
||
|
dec DH
|
||
|
brne 90b
|
||
|
lsr CL
|
||
|
ror BH
|
||
|
ror BL
|
||
|
lsr CL
|
||
|
ror BH
|
||
|
ror BL
|
||
|
.endm
|
||
|
|
||
|
;----------------------------------------------------------------------------;
|
||
|
; Constant Tables
|
||
|
|
||
|
.global tbl_window
|
||
|
tbl_window: ; tbl_window[] = ... (This is a Hamming window)
|
||
|
.dc.w 2621, 2693, 2910, 3270, 3768, 4401, 5161, 6042, 7036, 8132, 9320, 10588, 11926, 13318, 14753, 16216
|
||
|
.dc.w 17694, 19171, 20634, 22069, 23462, 24799, 26068, 27256, 28352, 29345, 30226, 30987, 31619, 32117, 32477, 32694
|
||
|
.dc.w 32766, 32694, 32477, 32117, 31619, 30987, 30226, 29345, 28352, 27256, 26068, 24799, 23462, 22069, 20634, 19171
|
||
|
.dc.w 17694, 16216, 14753, 13318, 11926, 10588, 9320, 8132, 7036, 6042, 5161, 4401, 3768, 3270, 2910, 2693
|
||
|
tbl_cos_sin: ; Table of {cos(x),sin(x)}, (0 <= x < pi, in FFT_N/2 steps)
|
||
|
.dc.w 32767, 0, 32609, 3211, 32137, 6392, 31356, 9511, 30272, 12539, 28897, 15446, 27244, 18204, 25329, 20787
|
||
|
.dc.w 23169, 23169, 20787, 25329, 18204, 27244, 15446, 28897, 12539, 30272, 9511, 31356, 6392, 32137, 3211, 32609
|
||
|
.dc.w 0, 32766, -3211, 32609, -6392, 32137, -9511, 31356, -12539, 30272, -15446, 28897, -18204, 27244, -20787, 25329
|
||
|
.dc.w -23169, 23169, -25329, 20787, -27244, 18204, -28897, 15446, -30272, 12539, -31356, 9511, -32137, 6392, -32609, 3211
|
||
|
tbl_bitrev: ; tbl_bitrev[] = ...
|
||
|
.dc.w 0*4, 32*4, 16*4, 48*4, 8*4, 40*4, 24*4, 56*4, 4*4, 36*4, 20*4, 52*4, 12*4, 44*4, 28*4, 60*4
|
||
|
.dc.w 2*4, 34*4, 18*4, 50*4, 10*4, 42*4, 26*4, 58*4, 6*4, 38*4, 22*4, 54*4, 14*4, 46*4, 30*4, 62*4
|
||
|
|
||
|
;----------------------------------------------------------------------------;
|
||
|
.global fft_input
|
||
|
.func fft_input
|
||
|
fft_input:
|
||
|
pushw T2H,T2L
|
||
|
pushw AH,AL
|
||
|
pushw YH,YL
|
||
|
|
||
|
movw XL, EL ;X = array_src;
|
||
|
movw YL, DL ;Y = array_bfly;
|
||
|
clr EH ;Zero
|
||
|
ldiw ZH,ZL, tbl_window ;Z = &tbl_window[0];
|
||
|
ldiw AH,AL, FFT_N ;A = FFT_N;
|
||
|
1: lpmw BH,BL, Z+ ;B = *Z++; (window)
|
||
|
ldw CH,CL, X+ ;C = *X++; (I-axis)
|
||
|
FMULS16 DH,DL,T2H,T2L, BH,BL, CH,CL ;D = B * C;
|
||
|
stw Y+, DH,DL ;*Y++ = D;
|
||
|
stw Y+, DH,DL ;*Y++ = D;
|
||
|
subiw AH,AL, 1 ;while(--A)
|
||
|
brne 1b ;/
|
||
|
|
||
|
popw YH,YL
|
||
|
popw AH,AL
|
||
|
popw T2H,T2L
|
||
|
clr r1
|
||
|
ret
|
||
|
.endfunc
|
||
|
|
||
|
;----------------------------------------------------------------------------;
|
||
|
.global fft_execute
|
||
|
.func fft_execute
|
||
|
fft_execute:
|
||
|
pushw T2H,T2L
|
||
|
pushw T4H,T4L
|
||
|
pushw T6H,T6L
|
||
|
pushw T8H,T8L
|
||
|
pushw T10H,T10L
|
||
|
pushw T12H,T12L
|
||
|
pushw T14H,T14L
|
||
|
pushw AH,AL
|
||
|
pushw YH,YL
|
||
|
|
||
|
movw ZL, EL ;Z = array_bfly;
|
||
|
ldiw EH,EL, 1 ;E = 1;
|
||
|
ldiw XH,XL, FFT_N/2 ;X = FFT_N/2;
|
||
|
1: ldi AL, 4 ;T12 = E; (angular speed)
|
||
|
mul EL, AL ;
|
||
|
movw T12L, T0L ;
|
||
|
mul EH, AL ;
|
||
|
add T12H, T0L ;/
|
||
|
movw T14L, EL ;T14 = E;
|
||
|
pushw EH,EL
|
||
|
movw YL, ZL ;Z = &array_bfly[0];
|
||
|
mul XL, AL ;Y = &array_bfly[X];
|
||
|
addw YH,YL, T0H,T0L ;
|
||
|
mul XH, AL ;
|
||
|
add YH, T0L ;/
|
||
|
pushw ZH,ZL
|
||
|
2: clrw T10H,T10L ;T10 = 0 (angle)
|
||
|
clr EH ;Zero reg.
|
||
|
3: lddw AH,AL, Z+0 ;A = *Z - *Y; *Z++ += *Y;
|
||
|
asrw AH,AL ;
|
||
|
lddw DH,DL, Y+0 ;
|
||
|
asrw DH,DL ;
|
||
|
movw CL, AL ;
|
||
|
subw AH,AL, DH,DL ;
|
||
|
addw CH,CL, DH,DL ;
|
||
|
stw Z+, CH,CL ;/
|
||
|
lddw BH,BL, Z+0 ;B = *Z - *Y; *Z++ += *Y;
|
||
|
asrw BH,BL ;
|
||
|
lddw DH,DL, Y+2 ;
|
||
|
asrw DH,DL ;
|
||
|
movw CL, BL ;
|
||
|
subw BH,BL, DH,DL ;
|
||
|
addw CH,CL, DH,DL ;
|
||
|
stw Z+, CH,CL ;/
|
||
|
movw T0L, ZL
|
||
|
ldiw ZH,ZL, tbl_cos_sin ;C = cos(T10); D = sin(T10);
|
||
|
addw ZH,ZL, T10H,T10L ;
|
||
|
lpmw CH,CL, Z+ ;
|
||
|
lpmw DH,DL, Z+ ;/
|
||
|
movw ZL, T0L
|
||
|
FMULS16 T4H,T4L,T2H,T2L, AH,AL, CH,CL ;*Y++ = A * C + B * D;
|
||
|
FMULS16 T8H,T8L,T6H,T6L, BH,BL, DH,DL ;
|
||
|
addd T4H,T4L,T2H,T2L, T8H,T8L,T6H,T6L;
|
||
|
stw Y+, T4H,T4L ;/
|
||
|
FMULS16 T4H,T4L,T2H,T2L, BH,BL, CH,CL ;*Y++ = B * C - A * D;
|
||
|
FMULS16 T8H,T8L,T6H,T6L, AH,AL, DH,DL ;
|
||
|
subd T4H,T4L,T2H,T2L, T8H,T8L,T6H,T6L;
|
||
|
stw Y+, T4H,T4L ;/
|
||
|
addw T10H,T10L, T12H,T12L ;T10 += T12; (next angle)
|
||
|
#if FFT_N >= 128
|
||
|
sbrs T10H, FFT_B - 7 ;while(T10 < pi)
|
||
|
#else
|
||
|
sbrs T10L, FFT_B + 1
|
||
|
#endif
|
||
|
rjmp 3b ;/
|
||
|
ldi AL, 4 ;Y += X; Z += X; (skip split segment)
|
||
|
mul XL, AL
|
||
|
addw YH,YL, T0H,T0L ;
|
||
|
addw ZH,ZL, T0H,T0L ;
|
||
|
mul XH, AL ;
|
||
|
add YH, T0L ;
|
||
|
add ZH, T0L ;/
|
||
|
ldi EL, 1 ;while(--T14)
|
||
|
subw T14H,T14L, EH,EL ;
|
||
|
rjne 2b ;/
|
||
|
popw ZH,ZL
|
||
|
popw EH,EL
|
||
|
lslw EH,EL ;E *= 2;
|
||
|
lsrw XH,XL ;while(X /= 2)
|
||
|
adiw XL, 0 ;
|
||
|
rjne 1b ;/
|
||
|
|
||
|
popw YH,YL
|
||
|
popw AH,AL
|
||
|
popw T14H,T14L
|
||
|
popw T12H,T12L
|
||
|
popw T10H,T10L
|
||
|
popw T8H,T8L
|
||
|
popw T6H,T6L
|
||
|
popw T4H,T4L
|
||
|
popw T2H,T2L
|
||
|
; clr r1
|
||
|
ret
|
||
|
.endfunc
|
||
|
|
||
|
;----------------------------------------------------------------------------;
|
||
|
.global fft_output
|
||
|
.func fft_output
|
||
|
fft_output:
|
||
|
pushw T2H,T2L
|
||
|
pushw T4H,T4L
|
||
|
pushw T6H,T6L
|
||
|
pushw T8H,T8L
|
||
|
pushw T10H,T10L
|
||
|
pushw AH,AL
|
||
|
pushw YH,YL
|
||
|
|
||
|
movw T10L, EL ;T10 = array_bfly;
|
||
|
movw YL, DL ;Y = array_output;
|
||
|
ldiw ZH,ZL, tbl_bitrev ;Z = tbl_bitrev;
|
||
|
clr EH ;Zero
|
||
|
ldiw AH,AL, FFT_N / 2 ;A = FFT_N / 2; (plus only)
|
||
|
1: lpmw XH,XL, Z+ ;X = *Z++;
|
||
|
addw XH,XL, T10H,T10L ;X += array_bfly;
|
||
|
ldw BH,BL, X+ ;B = *X++;
|
||
|
ldw CH,CL, X+ ;C = *X++;
|
||
|
FMULS16 T4H,T4L,T2H,T2L, BH,BL, BH,BL ;T4:T2 = B * B;
|
||
|
FMULS16 T8H,T8L,T6H,T6L, CH,CL, CH,CL ;T8:T6 = C * C;
|
||
|
addd T4H,T4L,T2H,T2L, T8H,T8L,T6H,T6L;T4:T2 += T8:T6;
|
||
|
SQRT32 ;B = sqrt(T4:T2);
|
||
|
stw Y+, BH,BL ;*Y++ = B;
|
||
|
subiw AH,AL, 1 ;while(--A)
|
||
|
rjne 1b ;/
|
||
|
|
||
|
popw YH,YL
|
||
|
popw AH,AL
|
||
|
popw T10H,T10L
|
||
|
popw T8H,T8L
|
||
|
popw T6H,T6L
|
||
|
popw T4H,T4L
|
||
|
popw T2H,T2L
|
||
|
clr r1
|
||
|
ret
|
||
|
.endfunc
|
||
|
|
||
|
;----------------------------------------------------------------------------;
|
||
|
.global fmuls_f
|
||
|
.func fmuls_f
|
||
|
fmuls_f:
|
||
|
movw CL, EL ;C = E;
|
||
|
clr EH ;Zero
|
||
|
FMULS16 ZH,ZL,XH,XL, CH,CL, DH,DL ;Z:X = C * D;
|
||
|
movw EL, ZL
|
||
|
clr r1
|
||
|
ret
|
||
|
.endfunc
|
||
|
|
||
|
#endif // __AVR__
|
||
|
|
||
|
#ifdef __SAMD21G18A__
|
||
|
.cpu cortex-m0plus
|
||
|
.fpu softvfp
|
||
|
#endif
|