
//void Syn_filt_32(
//     Word16 a[],                           
//     Word16 m,                             
//     Word16 exc[],                         
//     Word16 Qnew,                        
//     Word16 sig_hi[],                      
//     Word16 sig_lo[],                      
//     Word16 lg                             
//)
//***************************************************************
// a[]      --- r0
// m        --- r1
// exc[]    --- r2
// Qnew     --- r3
// lg -------- 64

#include "oscl_base_macros.h"
#include "oscl_base.h"

#if (PV_CPU_ARCH_VERSION >= 6 && PV_COMPILER == EPV_ARM_RVCT)
__asm     void Syn_filt_32dec_asm(
	    int16 a[],                           /* (i) Q12 : a[m+1] prediction coefficients */
	    int16 m,                             /* (i)     : order of LP filter             */
	    int16 exc[],                         /* (i) Qnew: excitation (exc[i] >> Qnew)    */
	    int16 Qnew,                          /* (i)     : exc scaling = 0(min) to 8(max) */
	    int16 sig_hi[],                      /* (o) /16 : synthesis high                 */
	    int16 sig_lo[],                      /* (o) /16 : synthesis low                  */
	    int16 lg                             /* (i)     : size of filtering              */
	)
{
        PRESERVE8
            
        STMFD   r13!, {r4 - r12, r14}
        LDR     r4,  [r13, #40]                  // get sig_hi[] address
        LDR     r5,  [r13, #44]                  // get sig_lo[] address
        
        SUB     r13, r13, #32                    // Ϊa[16-1]ջռ
       
        RSB     r3,  r3, #9                      // 9 - Q_new
       
        LDRSH   r6, [r0, #2]                     // load Aq[1]
        LDRSH   r7, [r0, #4]                     // load Aq[2]
        LDRSH   r8, [r0, #6]                     // load Aq[3]
        LDRSH   r9, [r0, #8]                     // load Aq[4]
        PKHBT   r10, r6, r7, LSL #16             // Aq[2] -- Aq[1]
        PKHBT   r11, r8, r9, LSL #16             // Aq[4] -- Aq[3]
        STR     r10, [r13, #28]
        STR     r11, [r13, #24]
       
        LDRSH   r6, [r0, #10]                    // load Aq[5]
        LDRSH   r7, [r0, #12]                    // load Aq[6]
        LDRSH   r8, [r0, #14]                    // load Aq[7]
        LDRSH   r9, [r0, #16]                    // load Aq[8]
        PKHBT   r10, r6, r7, LSL #16             // Aq[6] -- Aq[5]
        PKHBT   r11, r8, r9, LSL #16             // Aq[8] -- Aq[7]
        STR     r10, [r13, #20]
        STR     r11, [r13, #16]
       
        LDRSH   r6, [r0, #18]                    // load Aq[9]
        LDRSH   r7, [r0, #20]                    // load Aq[10]
        LDRSH   r8, [r0, #22]                    // load Aq[11]
        LDRSH   r9, [r0, #24]                    // load Aq[12]
        PKHBT   r10, r6, r7, LSL #16             // Aq[10] -- Aq[9]
        PKHBT   r11, r8, r9, LSL #16             // Aq[12] -- Aq[11]
        STR     r10, [r13, #12]
        STR     r11, [r13, #8]
       
        LDRSH   r6, [r0, #26]                    // load Aq[13]
        LDRSH   r7, [r0, #28]                    // load Aq[14]
        LDRSH   r8, [r0, #30]                    // load Aq[15]
        LDRSH   r9, [r0, #32]                    // load Aq[16]
        PKHBT   r10, r6, r7, LSL #16             // Aq[14] -- Aq[13]
        PKHBT   r11, r8, r9, LSL #16             // Aq[16] -- Aq[15]
        STR     r10, [r13, #4]
        STR     r11, [r13]
       
        MOV     r14, #32                         // ÿμ2hi2lo
        // r9\r10 sum_hi, r11\r12 sum_lo
        LDM     r13, {r6-r8}                  //load A16 - A11
        LDR     r0,  [r4, #-32]     

LOOP
        // 1\2		
        LDR       r1, [r5, #-32]
        SMUADX    r9 , r0, r6
        SMULTT    r10, r0, r6
        LDR       r0,  [r4, #-28]   //ǰ
        SMUADX    r11, r1, r6
        SMULTT    r12, r1, r6
        //3\4
        LDR       r1,  [r5, #-28]
        SMLABB    r10, r0, r6, r10
        SMLADX    r9,  r0, r7, r9
        SMLATT    r10, r0, r7, r10
        LDR       r0,  [r4, #-24]   //ǰ       
        SMLABB    r12, r1, r6, r12
        SMLADX    r11, r1, r7, r11
        SMLATT    r12, r1, r7, r12		
        //5\6
        LDR       r1,  [r5, #-24]
        SMLABB    r10, r0, r7, r10
        SMLADX    r9,  r0, r8, r9
        SMLATT    r10, r0, r8, r10
        LDR       r6,  [r13, #12]        // load a10 - a9
        LDR       r0,  [r4, #-20]        // ǰ  
        SMLABB    r12, r1, r7, r12		
        SMLADX    r11, r1, r8, r11
        SMLATT    r12, r1, r8, r12			
        //7\8

        LDR       r1,  [r5, #-20]
        SMLABB    r10, r0, r8, r10
        SMLADX    r9,  r0, r6, r9
        SMLATT    r10, r0, r6, r10
        LDR       r7,  [r13, #16]        // load a8 - a7
        LDR       r0,  [r4, #-16]        // ǰ 
        SMLABB    r12, r1, r8, r12
        SMLADX    r11, r1, r6, r11
        SMLATT    r12, r1, r6, r12			
        //9\10
        LDR       r1,  [r5, #-16]
        SMLABB    r10, r0, r6, r10
        SMLADX    r9,  r0, r7, r9
        SMLATT    r10, r0, r7, r10
        LDR       r8,  [r13, #20]        // load a6 - a5
        LDR       r0,  [r4, #-12]        // ǰ
        SMLABB    r12, r1, r6, r12
        SMLADX    r11, r1, r7, r11
        SMLATT    r12, r1, r7, r12		
        //11\12
        LDR       r1,  [r5, #-12]
        SMLABB    r10, r0, r7, r10
        SMLADX    r9,  r0, r8, r9
        SMLATT    r10, r0, r8, r10
        LDR       r6,  [r13, #24]        // load a4 - a3
        LDR       r0,  [r4, #-8]         // ǰ     
        SMLABB    r12, r1, r7, r12
        SMLADX    r11, r1, r8, r11
        SMLATT    r12, r1, r8, r12			  
        //13\14
        LDR       r1,  [r5, #-8]
        SMLABB    r10, r0, r8, r10
        SMLADX    r9,  r0, r6, r9
        SMLATT    r10, r0, r6, r10
        LDR       r7,  [r13, #28]        // load a2 - a1
        LDR       r0,  [r4, #-4]  
        SMLABB    r12, r1, r8, r12		
        SMLADX    r11, r1, r6, r11
        SMLATT    r12, r1, r6, r12	  
        //15\16
        LDR       r1,  [r5, #-4]
        SMLABB    r10, r0, r6, r10
        SMLADX    r9,  r0, r7, r9
        SMLATT    r10, r0, r7, r10
		
        SMLABB    r12, r1, r6, r12
        SMLADX    r11, r1, r7, r11
        SMLATT    r12, r1, r7, r12	
		
        //ǰüĴΪ: r0,r1,r6,r8
        LDRSH     r0, [r2],#2                      // load exc[i]
        RSB       r11, r11, #0                     // -L_tmp
        MOV       r11, r11, ASR #11                // L_tmp >>= 11
        ADD       r11, r11, r0, LSL r3             //L_tmp1 += (int32)exc[i] << a0;
        SUB       r11, r11, r9, LSL #1             //L_tmp1 -= (L_tmp2 << 1);
        SSAT      r11, #29, r11
        MOV       r11, r11, LSL #3
        LDRSH     r0,  [r2],#2                     // load exc[i]
        MOV       r9,  r11, ASR #16
        STRH      r9,  [r4], #2
	
        MOV       r11, r11, ASR #4
        SUB       r11, r11, r9, LSL #12
        STRH      r11, [r5], #2
       
        SMLABB    r10, r9,  r7, r10
        SMLABB    r12, r11, r7, r12

        RSB       r12, r12, #0                     // -L_tmp	
        MOV       r12, r12, ASR #11                // L_tmp >>= 11
        ADD       r12, r12, r0, LSL r3             //L_tmp1 += (int32)exc[i] << a0;
        SUB       r12, r12, r10, LSL #1            //L_tmp1 -= (L_tmp2 << 1);
        SSAT      r12, #29, r12
        MOV       r12, r12, LSL #3
        MOV       r10, r12, ASR #16
        STRH      r10, [r4], #2
        MOV       r12, r12, ASR #4
        SUB       r12, r12, r10, LSL #12
        SUBS      r14, r14, #1
        LDM       r13, {r6-r8}                     //load A1 - A6 ǰ
        LDR       r0, [r4, #-32]
        STRH      r12, [r5], #2
        BNE       LOOP

Syn_filt_32_end
        ADD       r13, r13, #32                    // ͷa[16-1]ջռ
        LDMFD     r13!, {r4 - r12, r15}
}
#endif

