blob: 883b4faad72fbe0046331159e239691fc5f54235 [file] [log] [blame]
/********************************************************************************************************
* @file div_mod.S
*
* @brief for TLSR chips
*
* @author public@telink-semi.com;
* @date Sep. 30, 2010
*
* @attention
*
* Copyright (C) 2019-2020 Telink Semiconductor (Shanghai) Co., Ltd.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*******************************************************************************************************/
#define UDIV #0
#define SDIV #1
#define UMOD #2
#define SMOD #3
#define MUL2_STEP 8
.code 16
.text
.section .ram_code,"ax" //in ram code
.align 2
.global __modsi3
.code 16
.thumb_func
.type __modsi3, %function
__modsi3:
tmov r2, SMOD
tj div
.size __modsi3, .-__modsi3
.section .ram_code,"ax" //in ram code
.align 2
.global __divsi3
.code 16
.thumb_func
.type __divsi3, %function
__divsi3:
tmov r2, SDIV
tj div
.size __divsi3, .-__divsi3
.section .ram_code,"ax" //in ram code
.align 2
.global __umodsi3
.code 16
.thumb_func
.type __umodsi3, %function
__umodsi3:
tmov r2, UMOD
tj div
.size __umodsi3, .-__umodsi3
.section .ram_code,"ax" //in ram code
.align 2
.global __udivsi3
.code 16
.thumb_func
.type __udivsi3, %function
__udivsi3:
tmov r2, UDIV
tj div
.size __udivsi3, .-__udivsi3
.section .ram_code,"ax" //in ram code
.align 2
.global div
.code 16
.thumb_func
.type div, %function
div:
tmrcs r3
tpush {r3, r4}
tmov r4, #0x80
tor r3, r4
tmcsr r3
tloadr r3, .L11
tstorer r0, [r3]
tadd r3, r3, #4
tstorer r1, [r3]
tsub r3, r3, #8
tstorerb r2, [r3]
.L2:
tloadrb r0, [r3]
tcmp r0, #0
tjne .L2
tcmp r2, #1
tjls .L4
tadd r3, r3, #8
tloadr r0, [r3]
tj .L6
.L4:
tadd r3, r3, #4
tloadr r0, [r3]
.L6:
tpop {r3, r4}
tmcsr r3
tjex lr
.align 4
.L11:
.word(0x800664)
.word(0x800660)
.word(0x800668)
.size div, .-div
//removed
#if 0
//.section .ram_code,"ax" //in ram code
.align 4
.global mul32x32_64
.thumb_func
.type mul32x32_64, %function
mul32x32_64:
tmul r0, r1
tloadr r1, [pc, #4]
tloadr r1, [r1, #0]
tjex lr
.word(0x008006fc)
#endif
#if 0
//.section .ram_code,"ax" //in ram code
.align 4
.global mz_mul1
.thumb_func
.type mz_mul1, %function
mz_mul1:
tpush {r4, r5, r6, r7}
tmov r4, r8
tpush {r4}
tmov r4, #1
tmov r8, r4 //r8 = 1
tloadr r6, [pc, #4] //r6 REG_ADDR32(0x6fc)
tmovs r5, #0 //clear carry
tj MZ_MUL1_END
.word(0x008006fc)
MZ_MUL1_START:
tmul r4, r3 // l0 = a0 * b
tloadr r7, [r0, #0] // y0
tadd r4, r5 // l0 + c => c0
tsubc r5, r5 // c0 - 1
tadd r4, r7 // l0 + c + y0 => c1
tloadr r7, [r6, #0] // r7 = h0
tadd r5, r8 // c0 - 1 + 1 = c0 (nc)
tstorem r0!, {r4} // store y0
taddc r5, r7 // cn = c0 + h1 + c1
MZ_MUL1_END:
tloadm r1!, {r4} // load *a
tsub r2, #1 // r2--
tcmp r2, #0
tjge MZ_MUL1_START // carry set
tstorem r0!, {r5}
tpop {r4}
tmov r8, r4
tpop {r4, r5, r6, r7}
tjex lr
#endif
//.section .ram_code,"ax" //in ram code
.align 4
.global mz_mul2
.thumb_func
.type mz_mul2, %function
mz_mul2:
tpush {r4, r5, r6, r7}
tmov r4, r8
tmov r5, r9
tmov r6, r10
tmov r7, r11
tpush {r4, r5, r6, r7}
tmov r8, r2 //r8 = n, loop number
tmov r2, #1
tmov r10, r2 // r10 = 1
tsub r2, #(MUL2_STEP + 1)
tmov r9, r2 //r9 = -MUL2_STEP
tmov r2, #0
tmov r2, #0
tloadr r6, [pc, #4] //r6 REG_ADDR32(0x6fc)
tmov r11,r2 //r11 = 0
tj MZ_MUL2_LOOP
//tj MZ_MUL2_LOOP2
.word(0x008006fc)
MZ_MUL2_START:
//a0
tmul r4, r3 // l0 = a0 * b
tloadr r7, [r0, #0] // y0
tadd r4, r2 // l0 + c => c0
tsubc r2, r2 // c0 - 1
tadd r4, r7 // l0 + c + y0 => c1
tloadr r7, [r6, #0] // r7 = h0
tadd r2, r10 // c0 - 1 + 1 = c0 (nc)
tstorem r0!, {r4} // store y0 y1
taddc r2, r7 // cn = c0 + h1 + c1
tmul r5, r3 // l1 = a1 * b
tloadr r7, [r0, #0] // y1
tadd r5, r2 // l1 + cn => c2
tsubc r2, r2 // c2 - 1
tadd r5, r7 // l1 + c + y1 => c3
tloadr r7, [r6, #0] // r7 = h0
tadd r2, r10 // c0 - 1 + 1 = c0 (nc)
tstorem r0!, {r5} // store y0 y1
tloadm r1!, {r4, r5} // load *a
taddc r2, r7 // cn2 = c2 + h1 + c3
//a0
tmul r4, r3 // l0 = a0 * b
tloadr r7, [r0, #0] // y0
tadd r4, r2 // l0 + c => c0
tsubc r2, r2 // c0 - 1
tadd r4, r7 // l0 + c + y0 => c1
tloadr r7, [r6, #0] // r7 = h0
tadd r2, r10 // c0 - 1 + 1 = c0 (nc)
tstorem r0!, {r4} // store y0 y1
taddc r2, r7 // cn = c0 + h1 + c1
tmul r5, r3 // l1 = a1 * b
tloadr r7, [r0, #0] // y1
tadd r5, r2 // l1 + cn => c2
tsubc r2, r2 // c2 - 1
tadd r5, r7 // l1 + c + y1 => c3
tloadr r7, [r6, #0] // r7 = h0
tadd r2, r10 // c0 - 1 + 1 = c0 (nc)
tstorem r0!, {r5} // store y0 y1
tloadm r1!, {r4, r5} // load *a
taddc r2, r7 // cn2 = c2 + h1 + c3
//a0
tmul r4, r3 // l0 = a0 * b
tloadr r7, [r0, #0] // y0
tadd r4, r2 // l0 + c => c0
tsubc r2, r2 // c0 - 1
tadd r4, r7 // l0 + c + y0 => c1
tloadr r7, [r6, #0] // r7 = h0
tadd r2, r10 // c0 - 1 + 1 = c0 (nc)
tstorem r0!, {r4} // store y0 y1
taddc r2, r7 // cn = c0 + h1 + c1
tmul r5, r3 // l1 = a1 * b
tloadr r7, [r0, #0] // y1
tadd r5, r2 // l1 + cn => c2
tsubc r2, r2 // c2 - 1
tadd r5, r7 // l1 + c + y1 => c3
tloadr r7, [r6, #0] // r7 = h0
tadd r2, r10 // c0 - 1 + 1 = c0 (nc)
tstorem r0!, {r5} // store y0 y1
tloadm r1!, {r4, r5} // load *a
taddc r2, r7 // cn2 = c2 + h1 + c3
///// next 2
tmul r4, r3 // l0 = a0 * b
tloadr r7, [r0, #0] // y0
tadd r4, r2 // l0 + c => c0
tsubc r2, r2 // c0 - 1
tadd r4, r7 // l0 + c + y0 => c1
tloadr r7, [r6, #0] // r7 = h0
tadd r2, r10 // c0 - 1 + 1 = c0 (nc)
tstorem r0!, {r4} // store y0 y1
taddc r2, r7 // cn = c0 + h1 + c1
tmul r5, r3 // l1 = a1 * b
tloadr r7, [r0, #0] // y1
tadd r5, r2 // l1 + cn => c2
tsubc r2, r2 // c2 - 1
tadd r5, r7 // l1 + c + y1 => c3
tloadr r7, [r6, #0] // r7 = h0
tadd r2, r10 // c0 - 1 + 1 = c0 (nc)
tstorem r0!, {r5} // store y0 y1
taddc r2, r7 // cn2 = c2 + h1 + c3
MZ_MUL2_LOOP:
tloadm r1!, {r4, r5} // load *a
tadd r8, r9 // r8 -= MUL2_STEP
tcmp r8, r11 // const 0
tjge MZ_MUL2_START // carry set
tmov r5, r8
tadd r5, #MUL2_STEP
tsub r1, #8
tj MZ_MUL2_LOOP2
MZ_MUL2_START2:
tmul r4, r3 // l0 = a0 * b
tloadr r7, [r0, #0] // y0
tadd r4, r2 // l0 + c => c0
tsubc r2, r2 // c0 - 1
tadd r4, r7 // l0 + c + y0 => c1
tloadr r7, [r6, #0] // r7 = h0
tadd r2, r10 // c0 - 1 + 1 = c0 (nc)
tstorem r0!, {r4} // store y0
taddc r2, r7 // cn = c0 + h1 + c1
MZ_MUL2_LOOP2:
tloadm r1!, {r4} // load *a
tsub r5, #1 // r7--
tcmp r5, #0
tjge MZ_MUL2_START2 // carry set
MZ_MUL2_END:
//tmov r2, #13
tstorem r0!, {r2}
tpop {r4, r5, r6, r7}
tmov r8, r4
tmov r9, r5
tmov r10, r6
tmov r11, r7
tpop {r4, r5, r6, r7}
tjex lr
tnop
///////// asm crc24 function 2
.section .ram_code,"ax" //in ram code
.align 2
.global blt_packet_crc24_opt
.code 16
.thumb_func
.type blt_packet_crc24_opt, %function
blt_packet_crc24_opt:
tpush {r3, r4, r5, r6, r7, lr}
tmov r5, r8
tpush {r5}
tmov r5, r1
tneg r1, r0
tmov r4, #3
tand r1, r4 //number of byte CRC of pre_process to align CRC to word boundary
tsub r5, r1
tjge CRC24_SAVE_WORD_NUM
tadd r1, r5
tmov r5, #0
CRC24_SAVE_WORD_NUM:
tmov r8, r5 //save to r8
//tloadr r3, CRC24_DAT
tadd r4, r0, #0
tmov r0, #0
tmov r7, #60 //r7 = 15 * 4
CRC24_BYTE_LOOP: //r4: src; r6: dat; r2: crc; r5: tmp
tcmp r0, r1
tjeq CRC24_BYTE_END
tloadrb r6, [r4, r0] //r6 = dat[r0]
txor r6, r2 //r6 = crc ^ dat
tshftl r5, r6, #2 //r5 = r6 << 2
tand r5, r7 //r2 = r2 & 60
tloadr r5, [r5, r3] //load table
tasr r2, r2, #4 //r2 >> 4 (crc >> 4)
tshftr r6, r6, #2 // r6 = r6 >> 2
txor r2, r5 //r2 = r5 ^ r2
tand r6, r7 //r6 = r6 & 60
tloadr r6, [r6, r3]
tasr r2, r2, #4 //r2 >> 4 (crc >> 4)
tadd r0, #1
txor r2, r6 //r2 = r6 ^ r2
tjne CRC24_BYTE_LOOP
CRC24_BYTE_END:
tmov r1, r8
tcmp r1, #0
tjeq CRC24_END
tmov r5, #0
tmov r8, r5
tadd r4, r0
tmov r0, #0
CRC24_WORD_LOOP:
tsub r1, #4
tjlt CRC24_WORD_END
tloadr r0, [r4, #0] //r0 = dat[r0]
tadd r4, #4
tshftr r6, r0, #0 // r6 = r0 >> 0
CRC24_WORD_nib0:
txor r6, r2 //r6 = crc ^ dat
tshftl r5, r6, #2 //r5 = r6 << 2
tand r5, r7 //r2 = r2 & 60
tloadr r5, [r5, r3] //load table
tasr r2, r2, #4 //r2 >> 4 (crc >> 4)
tshftr r6, r6, #2 // r6 = r6 >> 2
tand r6, r7 //r6 = r6 & 60
tloadr r6, [r6, r3]
txor r2, r5 //r2 = r5 ^ r2
CRC24_WORD_nib1:
tasr r2, r2, #4 //r2 >> 4 (crc >> 4)
tshftr r5, r0, #8 //dat >> 8
txor r2, r6 //r2 = r6 ^ r2
CRC24_WORD_nib2:
txor r5, r2 //r6 = crc ^ dat
tshftl r6, r5, #2 //r5 << 2
tand r6, r7
tloadr r6, [r6, r3]
tasr r2, r2, #4 //r2 >> 4 (crc >> 4)
tshftr r5, r5, #2
tand r5, r7 //r6 = r6 & 60
tloadr r5, [r5, r3]
txor r2, r6 //r2 = r6 ^ r2
CRC24_WORD_nib3:
tasr r2, r2, #4 //r2 >> 4 (crc >> 4)
tshftr r6, r0, #16 //dat >> 8
txor r2, r5 //r2 = r6 ^ r2
CRC24_WORD_nib4:
txor r6, r2 //r6 = crc ^ dat
tshftl r5, r6, #2 //r5 = r6 << 2
tand r5, r7 //r2 = r2 & 60
tloadr r5, [r5, r3] //load table
tasr r2, r2, #4 //r2 >> 4 (crc >> 4)
tshftr r6, r6, #2 // r6 = r6 >> 2
tand r6, r7 //r6 = r6 & 60
tloadr r6, [r6, r3]
txor r2, r5 //r2 = r5 ^ r2
CRC24_WORD_nib5:
tasr r2, r2, #4 //r2 >> 4 (crc >> 4)
tshftr r5, r0, #24 //dat >> 8
txor r2, r6 //r2 = r6 ^ r2
CRC24_WORD_nib6:
txor r5, r2 //r6 = crc ^ dat
tshftl r6, r5, #2 //r5 << 2
tand r6, r7
tloadr r6, [r6, r3]
tasr r2, r2, #4 //r2 >> 4 (crc >> 4)
tshftr r5, r5, #2
tand r5, r7 //r5 = r5 & 60
tloadr r5, [r5, r3]
txor r2, r6 //r2 = r6 ^ r2
CRC24_WORD_nib7:
tasr r2, r2, #4 //r2 >> 4 (crc >> 4)
tmov r0, #0
txor r2, r5 //r2 = r6 ^ r2
tj CRC24_WORD_LOOP
CRC24_WORD_END:
tadd r1, #4
tj CRC24_BYTE_LOOP
CRC24_END:
tadd r0, r2, #0
tpop {r5}
tmov r8, r5
tpop {r3, r4, r5, r6, r7, pc}
tnop
// static int Crc24Lookup[16] = {
// 0x0000000,0x01b4c00,0x0369800,0x02dd400,
// 0x06d3000,0x0767c00,0x05ba800,0x040e400,
// 0x0da6000,0x0c12c00,0x0ecf800,0x0f7b400,
// 0x0b75000,0x0ac1c00,0x081c800,0x09a8400,
// };
// //usage
// //crc = blt_packet_crc24_opt (dat, length, crc_init, Crc24Lookup);