/*
void search_3i40(
    Word16 dn[],         //i : correlation between target and h[] 
    Word16 dn2[],        //i : maximum of corr. in each track.    
    Word16 rr[][L_CODE], //i : matrix of autocorrelation
    Word16 codvec[],      //o : algebraic codebook vector
	Word32 info*         //Ϣ
)

*/
#include "oscl_base_macros.h"
#if (PV_CPU_ARCH_VERSION >= 6)
    .text
    .align   5
    .word    0
    .global  search_3i40_asm
    .type    search_3i40_asm, function


/*
{
    Word32 i0;
    Word32 i1;
    Word32 i2;

    Word32 ix; 
    Word16 ps; 

    Word32 i;
    Word32 pos;
    Word32 track1;
    Word32 track2;
    Word32 ipos[NB_PULSE];

    Word16 psk;
    Word16 ps0;
    Word16 ps1;
    Word16 sq;
    Word16 sq1;
    Word16 alpk;
    Word16 alp;
    Word16 alp_16;

    Word16 *p_codvec = &codvec[0];

    Word32 s;
    Word32 alp0;
    Word32 alp1;
*/

#ֲƫ
.equ    PS, 0
.equ    IX, 4
.equ    I0, 8
.equ    I1, 12
.equ    I2, 16
.equ    I, 20
.equ    POS, 24
.equ    TRACK1, 28
.equ    TRACK2, 32
.equ    SQK, 36
.equ    PS0, 40
.equ    PS1, 44
.equ    DN2, 48
.equ    SQ1, 52
.equ    ALPK, 56
.equ    DN, 60
.equ    SQ1, 64
.equ    P_CODVEC, 68
.equ    CODVEC, 72
.equ    S, 76
.equ    RR, 80
.equ    ALP0, 84
.equ    ALP1, 88
.equ    II, 92 @40Word32
.equ    IPOS, 252 @10Word32
.equ    LOC_SIZE, 296
.equ    INFO, (296+10*4)

#LOOP5_2ʹõı
@ָ
dn        .req  r0
rr        .req  r6
p_i0      .req  r1
p_i1      .req  r2
p_ii      .req  r3

@̶
alp0      .req  r4
ps0       .req  r5

@μ
alp1      .req  r6
sq1       .req  r7
ps1       .req  r8

@ϴμ
alp       .req  r14
sq        .req  r11

@ѭ
i         .req  r10
i0        .req  r10
i1        .req  r11
ii        .req  r12

@ʱ
rr_i0     .req  r6
rr_i1     .req  r7
rr_i2     .req  r8
dn_i      .req  r9
s         .req  r9


#LOOP4ʹõı
@ָ
dn2       .req r6

@ʱ
p_codvec  .req  r1
alpk      .req  r4
sqk       .req  r5
dn2_i     .req  r9

#LOOP3ʹõı

@ʱ
pos0    .req  r6
pos1    .req  r7
pos2    .req  r8


#LOOP2ʹõı
@ѭ
track2    .req  r9

@
track1    .req  r8

@ʱ
tmp1      .req  r6


    .macro LOAD reg, off
                LDR     \reg, [sp, #\off]
    .endm

    .macro STORE reg, off
                STR     \reg, [sp, #\off]
    .endm

    .macro STORELT reg, off
                STRLT     \reg, [sp, #\off]
    .endm

    .macro LOG  reg1, reg2, reg3, reg4, reg5
				STORE     i0, I0
				LOAD      i0, INFO
				STR       \reg1, [i0], #4
				STR       \reg2, [i0], #4
				STR       \reg3, [i0], #4
				STR       \reg4, [i0], #4
				STR       \reg5, [i0], #4
				STORE     i0, INFO
				LOAD      i0, I0


    .endm

    .macro LOG1  reg1
				STORE     i0, I0
				LOAD      i0, INFO
				STR       \reg1, [i0], #4
				STORE     i0, INFO
				LOAD      i0, I0


    .endm


    .macro Copy8
            LDRSH r5, [r2], #80+2
            LDRSH r6, [r2], #80+2
            LDRSH r7, [r2], #80+2
            LDRSH r8, [r2], #80+2
            LDRSH r9, [r2], #80+2
            LDRSH r10, [r2], #80+2
            LDRSH r11, [r2], #80+2
            LDRSH r12, [r2], #80+2
 
            LSL   r5, r5, #14
            LSL   r6, r6, #14
            LSL   r7, r7, #14
            LSL   r8, r8, #14
            LSL   r9, r9, #14
            LSL   r10, r10, #14                                                
            LSL   r11, r11, #14
            LSL   r12, r12, #14            
            
            STM   r4!, {r5-r12}
    .endm


search_3i40_asm:
    STMFD   sp!, {r4-r12, lr}
    SUB     sp, sp, #LOC_SIZE   @ֲ

#ʼ
//  psk = -0x10000;
//  alpk = 0x10000;
    STORE  r1, DN2
    STORE  r2, RR
    STORE  r3, CODVEC
    
    MOV    r4, #0
    SUB    r4, r4, #0x10000
    MOV    r5, #0x10000
    STORE  r4, SQK
    STORE  r5, ALPK

@Խ
    ADD   r4, sp, #II
    Copy8
    Copy8
    Copy8
    Copy8
    Copy8

//  for (i = 0; i < NB_PULSE; i++)
//  {
//      *(p_codvec++) = i;
//  }

@codvecʼ
    MOV   r4, #0x10000
    MOV   r5, #0x2              
    STM   r3, {r4, r5}

    ADD    p_ii, sp, #II     


//  for (track1 = 2; track1 < 8; track1 += 4)
//  {
    MOV   track1, #2
    STORE track1, TRACK1
LOOP1:    

//      for (track2 = 4; track2 < 10; track2 += 4)
//      {
            MOV   track2, #4
            STORE track2, TRACK2
LOOP2:

//          ipos[0] = 0;
//          ipos[1] = track1;
//          ipos[2] = track2;

            LOAD   track1, TRACK1
			MOV    tmp1, #0
            ADD    r7, sp, #IPOS
			STM    r7, {tmp1, track1, track2}

//          for (i = 0; i < NB_PULSE; i++)
//          {
            MOV   i, #3 @NB_PULSE
            STORE i, I
LOOP3:

//              for (i0 = ipos[0]; i0 < L_CODE; i0 += STEP)
//              {
                LOAD   i0, IPOS
LOOP4:  
              
//                    if (dn2[i0] >= 0) //Żbitmap
//                    {
                      LOAD   dn2, DN2
                      LDRSH  dn2_i, [dn2, i0] @i0ֱΪƫƣֵΪ±2
                      CMP    dn2_i, #0
                      BLT    LOOP4_END
                                                                        
//                        alp0 = (Word32) rr[i0][i0] << 14;
                          LDR    alp0, [p_ii, i0, LSL #1]
#						  LSL    alp0, alp0, #14

//                        ps0 = dn[i0];
                          LDRSH    ps0, [dn, i0]

//                        i1 = ipos[1];
//                        ps = 0;
//                        sq = -0x10000;
//                        alp = 0x10000;

                          LDR    ii, [sp, #IPOS+4]
                          MOV    r8, #0
                          SUB    sq, r8, #0x10000
                          LOAD   rr, RR @ǰ
                          MOV    alp, #0x10000
                          STM    sp, {r8, ii}

//                        for (i1 = ipos[1]; i1 < L_CODE; i1 += STEP)
//                        {

                          ADD    p_i0, rr, i0, LSL #5
                          ADD    p_i0, p_i0, i0, LSL #3 @ + i0*40

LOOP5_1:
//                            ps1 = ps0 + dn[i1];
//                            alp1 = alp0 + ((Word32) rr[i1][i1] << 14);
//                            alp1 += (Word32) rr[i0][i1] << 15;


                              LDRSH   dn_i, [dn, ii]
                              LDR     rr_i1, [p_ii, ii, LSL #1]
                              LDRSH   rr_i0, [p_i0, ii]

//							  LOG1    rr_i1
//							  LOG1    rr_i0
//							  LOG1    alp0

                              ADD     ps1, ps0, dn_i @˴dn_i
                              ADD     alp1, alp0, rr_i0, LSL #15 @˴rr_i0
#                              ADD     alp1, alp1, rr_i1, LSL #14 @˴rr_i1
                              ADD     alp1, alp1, rr_i1 @˴rr_i1
                              
//                            sq1 = (((Word32) ps1 * ps1) << 1);
                              MUL   sq1, ps1, ps1
                                                            

//                            alp_16 = (Word16)((alp1 + (Word32) 0x00008000L) << 1); 䲻ʹ

//                            s = ((Word32) sq * alp1);
//                            s -= ((Word32) alp * sq1);

                              SMMULR s, sq, alp1
                              LSL   sq1, sq1, #1
                              SMMLSR s, alp, sq1, s

//                              LOG alp1, sq1, alp, sq, s

//                            if (s < 0)
//                            {
//                                alp = alp1;
//                                sq = sq1;
//                                ps = ps1;
//                                ix = i1;
//                           }

                             CMP     s, #0
                             STMLT   sp, {ps1, ii}
                             MOVLT   alp, alp1
							 MOVLT   sq, sq1
                            
                            
//                        } 
LOOP5_1_END:
                        ADD   ii, ii, #10
                        CMP   ii, #80
                        BLT   LOOP5_1


//                      i1 = ix;
//                      ps0 = ps;
//                      alp0 = (Word32) alp >> 2;
                        LDM    sp, {ps0, i1}
                        LOAD   rr, RR @
                        ASR    alp0, alp, #2
						STORE  i1, I1

                        ADD    p_i0, rr, i0, LSL #5
                        ADD    p_i0, p_i0, i0, LSL #3 @*40
   
                        ADD    p_i1, rr, i1, LSL #5
                        ADD    p_i1, p_i1, i1, LSL #3 @*40

//                      ix = ipos[2];
//                      alp = 0x10000;
//                      ps = 0;
//                      sq = -0x10000;
                        LDR    ii, [sp, #IPOS+8]
                        MOV    r8, #0
                        SUB    sq, r8, #0x10000
                        MOV    alp, #0x10000
                        STM    sp, {r8, ii}                               
								                                            
//                      for (i2 = ipos[2]; i2 < L_CODE; i2 += STEP)
//                      {


LOOP5_2:
//                          ps1 = ps0 + dn[i2];
//                          alp1 = alp0 + ((Word32) rr[i2][i2] << 12);
//                          alp1 += (Word32) rr[i1][i2] << 13;
//                          alp1 += (Word32) rr[i0][i2] << 13;

                            LDRSH    rr_i0, [p_i0, ii] 
                            LDRSH    rr_i1, [p_i1, ii]
                            LDR      rr_i2, [p_ii, ii, LSL #1]
                            LDRSH    dn_i, [dn, ii]

//                            LOG      rr_i2, rr_i1, rr_i0, alp0, dn_i

                            ADD      alp1, alp0, rr_i0, LSL #13
                            ADD      alp1, alp1, rr_i1, LSL #13

                            ADD      alp1, alp1, rr_i2, ASR #2
#                            ADD      alp1, alp1, rr_i2, LSL #12

                            ADD      ps1, ps0, dn_i

//                            LOG1 alp0

//                          sq1 = (Word16)(((Word32) ps1 * ps1) << 1);
                            MUL   sq1, ps1, ps1


//                          alp_16 = (Word16)((alp1 + (Word32) 0x00008000L) >> 16);


//                          s = sq * alp1;
//                          s -=  alp * sq1;
                            SMMULR s, sq, alp1
                            LSL   sq1, sq1, #1
                            SMMLSR s, alp, sq1, s

 //                             LOG alp1, sq1, alp, sq, s

//                          if (s < 0)
//                          {
//                              sq = sq1;
//                              ps = ps1;
//                              alp = alp_16;
//                              ix = i2;
//                          }

                            CMP     s, #0
                            STMLT   sp, {ps1, ii}
                            MOVLT   alp, alp1
                            MOVLT   sq, sq1


//                      }
LOOP5_2_END:
                        ADD ii, ii, #10
                        CMP ii, #80
                        BLT LOOP5_2

//                      s = alpk * sq;
//                      s- = psk * alp;

                        LOAD  alpk, ALPK
                        LOAD  sqk, SQK

                        SMMULR s, alpk, sq
                        SMMLSR s, sqk, alp, s
                        
//                        LOG alp, sq, alpk, sqk, s

//                      if (s > 0)
//                      {
//                          sqk = sq;
//                          alpk = alp;
//                          p_codvec = &codvec[0];
//                          i2 = ix;
//                          *(p_codvec++) = (Word16)i0;
//                          *(p_codvec++) = (Word16)i1;
//                          *(p_codvec) = (Word16)i2;
//                      }
                        CMP   s, #0
                        BLE   LOOP4_END

                        LOAD  p_codvec, CODVEC
                        STORE sq, SQK
                        STORE alp, ALPK
                        LOAD  ii, IX
                        LOAD  i1, I1						

 //                       LOG1 i0
//						LOG1 i1
//						LOG1 ii

						STRH  i0, [p_codvec], #2
                        STRH  i1, [p_codvec], #2
                        STRH  ii, [p_codvec]

//                   }//if (dn2[i0] >= 0)


//              } //for (i0 = ipos[0]; i0 < L_CODE; i0 += STEP)
LOOP4_END:
                ADD   i0, #10
                CMP   i0, #80
                BLT   LOOP4
                
                
//              pos = ipos[2];
//              ipos[2] = ipos[1];
//              ipos[1] = ipos[0];
//              ipos[0] = pos;
                LDR  pos0, [sp, #IPOS]
                LDR  pos1, [sp, #IPOS+4]
                LDR  pos2, [sp, #IPOS+8]
                
				LOAD  i, I @

                STR  pos0, [sp, #IPOS+4]
                STR  pos1, [sp, #IPOS+8]
                STR  pos2, [sp, #IPOS]           

//            }  //for (i = 0; i < NB_PULSE; i++)
LOOP3_END:
              SUBS  i, i, #1
			  STORE i, I
              BNE   LOOP3           
        
//      } //for (track2 = 2; track2 < 5; track2 += 2)
LOOP2_END:
        LOAD  track2, TRACK2
        ADD   track2, #4
        CMP   track2, #10
        STORELT track2, TRACK2
        BLT   LOOP2             
        
//  } //for (track1 = 1; track1 < 4; track1 += 2)
LOOP1_END:
    LOAD    track1, TRACK1
    ADD     track1, #4
    CMP     track1, #8
    STORELT track1, TRACK1
    BLT     LOOP1 

//    return;
END:

    LOAD  p_codvec, CODVEC
					
	LDRSH  i0, [p_codvec], #2
    LDRSH  i1, [p_codvec], #2
    LDRSH  ii, [p_codvec]

	ASR    i0, i0, #1
	ASR    i1, i1, #1
	ASR    ii, ii, #1

	SUB    p_codvec, #4

	STRH   i0, [p_codvec], #2
	STRH   i1, [p_codvec], #2
	STRH   ii, [p_codvec]

    ADD     sp, sp, #LOC_SIZE   @ֲͷ
    LDMFD   sp!, {r4-r12, pc}
    #endif
