Blame - src/kernel/linux/v4.19/arch/arm64/lib/strlen.S - T800

blob: 8e0b14205dcb419e5752209294922ddff072bd52 [file] [log] [blame]

xj	b04a402	2021-11-25 15:01:52 +0800	[diff] [blame]	1	/*
				2	* Copyright (C) 2013 ARM Ltd.
				3	* Copyright (C) 2013 Linaro.
				4	*
				5	* This code is based on glibc cortex strings work originally authored by Linaro
				6	* and re-licensed under GPLv2 for the Linux kernel. The original code can
				7	* be found @
				8	*
				9	* http://bazaar.launchpad.net/~linaro-toolchain-dev/cortex-strings/trunk/
				10	* files/head:/src/aarch64/
				11	*
				12	* This program is free software; you can redistribute it and/or modify
				13	* it under the terms of the GNU General Public License version 2 as
				14	* published by the Free Software Foundation.
				15	*
				16	* This program is distributed in the hope that it will be useful,
				17	* but WITHOUT ANY WARRANTY; without even the implied warranty of
				18	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
				19	* GNU General Public License for more details.
				20	*
				21	* You should have received a copy of the GNU General Public License
				22	* along with this program. If not, see <http://www.gnu.org/licenses/>.
				23	*/
				24
				25	#include <linux/linkage.h>
				26	#include <asm/assembler.h>
				27
				28	/*
				29	* calculate the length of a string
				30	*
				31	* Parameters:
				32	* x0 - const string pointer
				33	* Returns:
				34	* x0 - the return length of specific string
				35	*/
				36
				37	/* Arguments and results. */
				38	srcin .req x0
				39	len .req x0
				40
				41	/* Locals and temporaries. */
				42	src .req x1
				43	data1 .req x2
				44	data2 .req x3
				45	data2a .req x4
				46	has_nul1 .req x5
				47	has_nul2 .req x6
				48	tmp1 .req x7
				49	tmp2 .req x8
				50	tmp3 .req x9
				51	tmp4 .req x10
				52	zeroones .req x11
				53	pos .req x12
				54
				55	#define REP8_01 0x0101010101010101
				56	#define REP8_7f 0x7f7f7f7f7f7f7f7f
				57	#define REP8_80 0x8080808080808080
				58
				59	WEAK(strlen)
				60	mov zeroones, #REP8_01
				61	bic src, srcin, #15
				62	ands tmp1, srcin, #15
				63	b.ne .Lmisaligned
				64	/*
				65	* NUL detection works on the principle that (X - 1) & (~X) & 0x80
				66	* (=> (X - 1) & ~(X \| 0x7f)) is non-zero iff a byte is zero, and
				67	* can be done in parallel across the entire word.
				68	*/
				69	/*
				70	* The inner loop deals with two Dwords at a time. This has a
				71	* slightly higher start-up cost, but we should win quite quickly,
				72	* especially on cores with a high number of issue slots per
				73	* cycle, as we get much better parallelism out of the operations.
				74	*/
				75	.Lloop:
				76	ldp data1, data2, [src], #16
				77	.Lrealigned:
				78	sub tmp1, data1, zeroones
				79	orr tmp2, data1, #REP8_7f
				80	sub tmp3, data2, zeroones
				81	orr tmp4, data2, #REP8_7f
				82	bic has_nul1, tmp1, tmp2
				83	bics has_nul2, tmp3, tmp4
				84	ccmp has_nul1, #0, #0, eq /* NZCV = 0000 */
				85	b.eq .Lloop
				86
				87	sub len, src, srcin
				88	cbz has_nul1, .Lnul_in_data2
				89	CPU_BE( mov data2, data1 ) /prepare data to re-calculate the syndrome/
				90	sub len, len, #8
				91	mov has_nul2, has_nul1
				92	.Lnul_in_data2:
				93	/*
				94	* For big-endian, carry propagation (if the final byte in the
				95	* string is 0x01) means we cannot use has_nul directly. The
				96	* easiest way to get the correct byte is to byte-swap the data
				97	* and calculate the syndrome a second time.
				98	*/
				99	CPU_BE( rev data2, data2 )
				100	CPU_BE( sub tmp1, data2, zeroones )
				101	CPU_BE( orr tmp2, data2, #REP8_7f )
				102	CPU_BE( bic has_nul2, tmp1, tmp2 )
				103
				104	sub len, len, #8
				105	rev has_nul2, has_nul2
				106	clz pos, has_nul2
				107	add len, len, pos, lsr #3 /* Bits to bytes. */
				108	ret
				109
				110	.Lmisaligned:
				111	cmp tmp1, #8
				112	neg tmp1, tmp1
				113	ldp data1, data2, [src], #16
				114	lsl tmp1, tmp1, #3 /* Bytes beyond alignment -> bits. */
				115	mov tmp2, #~0
				116	/* Big-endian. Early bytes are at MSB. */
				117	CPU_BE( lsl tmp2, tmp2, tmp1 ) /* Shift (tmp1 & 63). */
				118	/* Little-endian. Early bytes are at LSB. */
				119	CPU_LE( lsr tmp2, tmp2, tmp1 ) /* Shift (tmp1 & 63). */
				120
				121	orr data1, data1, tmp2
				122	orr data2a, data2, tmp2
				123	csinv data1, data1, xzr, le
				124	csel data2, data2, data2a, le
				125	b .Lrealigned
				126	ENDPIPROC(strlen)