Blame - ap/libc/glibc/glibc-2.23/string/strcoll_l.c - T106_DC

blob: 4d1e3ab15ea5d27a2870abc8cbe2d6f51203b949 [file] [log] [blame]

xf.li	bdd93d5	2023-05-12 07:10:14 -0700	[diff] [blame]	1	/* Copyright (C) 1995-2016 Free Software Foundation, Inc.
				2	This file is part of the GNU C Library.
				3	Written by Ulrich Drepper <drepper@gnu.org>, 1995.
				4
				5	The GNU C Library is free software; you can redistribute it and/or
				6	modify it under the terms of the GNU Lesser General Public
				7	License as published by the Free Software Foundation; either
				8	version 2.1 of the License, or (at your option) any later version.
				9
				10	The GNU C Library is distributed in the hope that it will be useful,
				11	but WITHOUT ANY WARRANTY; without even the implied warranty of
				12	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
				13	Lesser General Public License for more details.
				14
				15	You should have received a copy of the GNU Lesser General Public
				16	License along with the GNU C Library; if not, see
				17	<http://www.gnu.org/licenses/>. */
				18
				19
				20	#include <assert.h>
				21	#include <langinfo.h>
				22	#include <locale.h>
				23	#include <stddef.h>
				24	#include <stdint.h>
				25	#include <string.h>
				26	#include <sys/param.h>
				27
				28	#ifndef STRING_TYPE
				29	# define STRING_TYPE char
				30	# define USTRING_TYPE unsigned char
				31	# define STRCOLL __strcoll_l
				32	# define STRCMP strcmp
				33	# define WEIGHT_H "../locale/weight.h"
				34	# define SUFFIX MB
				35	# define L(arg) arg
				36	#endif
				37
				38	#define CONCAT(a,b) CONCAT1(a,b)
				39	#define CONCAT1(a,b) a##b
				40
				41	#include "../locale/localeinfo.h"
				42	#include WEIGHT_H
				43
				44	/* Track status while looking for sequences in a string. */
				45	typedef struct
				46	{
				47	int len; /* Length of the current sequence. */
				48	size_t val; /* Position of the sequence relative to the
				49	previous non-ignored sequence. */
				50	size_t idxmax; /* Maximum index in sequences. */
				51	size_t idxcnt; /* Current count of indices. */
				52	size_t backw; /* Current Backward sequence index. */
				53	size_t backw_stop; /* Index where the backward sequences stop. */
				54	const USTRING_TYPE us; / The string. */
				55	unsigned char rule; /* Saved rule for the first sequence. */
				56	int32_t idx; /* Index to weight of the current sequence. */
				57	int32_t save_idx; /* Save looked up index of a forward
				58	sequence after the last backward
				59	sequence. */
				60	const USTRING_TYPE back_us; / Beginning of the backward sequence. */
				61	} coll_seq;
				62
				63	/* Get next sequence. Traverse the string as required. */
				64	static __always_inline void
				65	get_next_seq (coll_seq seq, int nrules, const unsigned char rulesets,
				66	const USTRING_TYPE weights, const int32_t table,
				67	const USTRING_TYPE extra, const int32_t indirect,
				68	int pass)
				69	{
				70	size_t val = seq->val = 0;
				71	int len = seq->len;
				72	size_t backw_stop = seq->backw_stop;
				73	size_t backw = seq->backw;
				74	size_t idxcnt = seq->idxcnt;
				75	size_t idxmax = seq->idxmax;
				76	int32_t idx = seq->idx;
				77	const USTRING_TYPE *us = seq->us;
				78
				79	while (len == 0)
				80	{
				81	++val;
				82	if (backw_stop != ~0ul)
				83	{
				84	/* There is something pushed. */
				85	if (backw == backw_stop)
				86	{
				87	/* The last pushed character was handled. Continue
				88	with forward characters. */
				89	if (idxcnt < idxmax)
				90	{
				91	idx = seq->save_idx;
				92	backw_stop = ~0ul;
				93	}
				94	else
				95	{
				96	/* Nothing anymore. The backward sequence ended with
				97	the last sequence in the string. Note that len is
				98	still zero. */
				99	idx = 0;
				100	break;
				101	}
				102	}
				103	else
				104	{
				105	/* XXX Traverse BACKW sequences from the beginning of
				106	BACKW_STOP to get the next sequence. Is ther a quicker way
				107	to do this? */
				108	size_t i = backw_stop;
				109	us = seq->back_us;
				110	while (i < backw)
				111	{
				112	int32_t tmp = findidx (table, indirect, extra, &us, -1);
				113	idx = tmp & 0xffffff;
				114	i++;
				115	}
				116	--backw;
				117	us = seq->us;
				118	}
				119	}
				120	else
				121	{
				122	backw_stop = idxmax;
				123	int32_t prev_idx = idx;
				124
				125	while (*us != L('\0'))
				126	{
				127	int32_t tmp = findidx (table, indirect, extra, &us, -1);
				128	unsigned char rule = tmp >> 24;
				129	prev_idx = idx;
				130	idx = tmp & 0xffffff;
				131	idxcnt = idxmax++;
				132
				133	/* Save the rule for the first sequence. */
				134	if (__glibc_unlikely (idxcnt == 0))
				135	seq->rule = rule;
				136
				137	if ((rulesets[rule * nrules + pass]
				138	& sort_backward) == 0)
				139	/* No more backward characters to push. */
				140	break;
				141	++idxcnt;
				142	}
				143
				144	if (backw_stop >= idxcnt)
				145	{
				146	/* No sequence at all or just one. */
				147	if (idxcnt == idxmax \|\| backw_stop > idxcnt)
				148	/* Note that len is still zero. */
				149	break;
				150
				151	backw_stop = ~0ul;
				152	}
				153	else
				154	{
				155	/* We pushed backward sequences. If the stream ended with the
				156	backward sequence, then we process the last sequence we
				157	found. Otherwise we process the sequence before the last
				158	one since the last one was a forward sequence. */
				159	seq->back_us = seq->us;
				160	seq->us = us;
				161	backw = idxcnt;
				162	if (idxmax > idxcnt)
				163	{
				164	backw--;
				165	seq->save_idx = idx;
				166	idx = prev_idx;
				167	}
				168	if (backw > backw_stop)
				169	backw--;
				170	}
				171	}
				172
				173	len = weights[idx++];
				174	/* Skip over indices of previous levels. */
				175	for (int i = 0; i < pass; i++)
				176	{
				177	idx += len;
				178	len = weights[idx];
				179	idx++;
				180	}
				181	}
				182
				183	/* Update the structure. */
				184	seq->val = val;
				185	seq->len = len;
				186	seq->backw_stop = backw_stop;
				187	seq->backw = backw;
				188	seq->idxcnt = idxcnt;
				189	seq->idxmax = idxmax;
				190	seq->us = us;
				191	seq->idx = idx;
				192	}
				193
				194	/* Compare two sequences. */
				195	static __always_inline int
				196	do_compare (coll_seq seq1, coll_seq seq2, int position,
				197	const USTRING_TYPE *weights)
				198	{
				199	int seq1len = seq1->len;
				200	int seq2len = seq2->len;
				201	size_t val1 = seq1->val;
				202	size_t val2 = seq2->val;
				203	int idx1 = seq1->idx;
				204	int idx2 = seq2->idx;
				205	int result = 0;
				206
				207	/* Test for position if necessary. */
				208	if (position && val1 != val2)
				209	{
				210	result = val1 > val2 ? 1 : -1;
				211	goto out;
				212	}
				213
				214	/* Compare the two sequences. */
				215	do
				216	{
				217	if (weights[idx1] != weights[idx2])
				218	{
				219	/* The sequences differ. */
				220	result = weights[idx1] - weights[idx2];
				221	goto out;
				222	}
				223
				224	/* Increment the offsets. */
				225	++idx1;
				226	++idx2;
				227
				228	--seq1len;
				229	--seq2len;
				230	}
				231	while (seq1len > 0 && seq2len > 0);
				232
				233	if (position && seq1len != seq2len)
				234	result = seq1len - seq2len;
				235
				236	out:
				237	seq1->len = seq1len;
				238	seq2->len = seq2len;
				239	seq1->idx = idx1;
				240	seq2->idx = idx2;
				241	return result;
				242	}
				243
				244	int
				245	STRCOLL (const STRING_TYPE s1, const STRING_TYPE s2, __locale_t l)
				246	{
				247	struct __locale_data *current = l->__locales[LC_COLLATE];
				248	uint_fast32_t nrules = current->values[_NL_ITEM_INDEX (_NL_COLLATE_NRULES)].word;
				249	/* We don't assign the following values right away since it might be
				250	unnecessary in case there are no rules. */
				251	const unsigned char *rulesets;
				252	const int32_t *table;
				253	const USTRING_TYPE *weights;
				254	const USTRING_TYPE *extra;
				255	const int32_t *indirect;
				256
				257	if (nrules == 0)
				258	return STRCMP (s1, s2);
				259
				260	/* Catch empty strings. */
				261	if (__glibc_unlikely (s1 == '\0') \|\| __glibc_unlikely (s2 == '\0'))
				262	return (s1 != '\0') - (s2 != '\0');
				263
				264	rulesets = (const unsigned char *)
				265	current->values[_NL_ITEM_INDEX (_NL_COLLATE_RULESETS)].string;
				266	table = (const int32_t *)
				267	current->values[_NL_ITEM_INDEX (CONCAT(_NL_COLLATE_TABLE,SUFFIX))].string;
				268	weights = (const USTRING_TYPE *)
				269	current->values[_NL_ITEM_INDEX (CONCAT(_NL_COLLATE_WEIGHT,SUFFIX))].string;
				270	extra = (const USTRING_TYPE *)
				271	current->values[_NL_ITEM_INDEX (CONCAT(_NL_COLLATE_EXTRA,SUFFIX))].string;
				272	indirect = (const int32_t *)
				273	current->values[_NL_ITEM_INDEX (CONCAT(_NL_COLLATE_INDIRECT,SUFFIX))].string;
				274
				275	assert (((uintptr_t) table) % __alignof__ (table[0]) == 0);
				276	assert (((uintptr_t) weights) % __alignof__ (weights[0]) == 0);
				277	assert (((uintptr_t) extra) % __alignof__ (extra[0]) == 0);
				278	assert (((uintptr_t) indirect) % __alignof__ (indirect[0]) == 0);
				279
				280	int result = 0, rule = 0;
				281
				282	coll_seq seq1, seq2;
				283	seq1.len = 0;
				284	seq1.idxmax = 0;
				285	seq1.rule = 0;
				286	seq2.len = 0;
				287	seq2.idxmax = 0;
				288
				289	for (int pass = 0; pass < nrules; ++pass)
				290	{
				291	seq1.idxcnt = 0;
				292	seq1.idx = 0;
				293	seq2.idx = 0;
				294	seq1.backw_stop = ~0ul;
				295	seq1.backw = ~0ul;
				296	seq2.idxcnt = 0;
				297	seq2.backw_stop = ~0ul;
				298	seq2.backw = ~0ul;
				299
				300	/* We need the elements of the strings as unsigned values since they
				301	are used as indices. */
				302	seq1.us = (const USTRING_TYPE *) s1;
				303	seq2.us = (const USTRING_TYPE *) s2;
				304
				305	/* We assume that if a rule has defined `position' in one section
				306	this is true for all of them. Please note that the localedef programs
				307	makes sure that `position' is not used at the first level. */
				308
				309	int position = rulesets[rule * nrules + pass] & sort_position;
				310
				311	while (1)
				312	{
				313	get_next_seq (&seq1, nrules, rulesets, weights, table,
				314	extra, indirect, pass);
				315	get_next_seq (&seq2, nrules, rulesets, weights, table,
				316	extra, indirect, pass);
				317	/* See whether any or both strings are empty. */
				318	if (seq1.len == 0 \|\| seq2.len == 0)
				319	{
				320	if (seq1.len == seq2.len)
				321	{
				322	/* Both strings ended and are equal at this level. Do a
				323	byte-level comparison to ensure that we don't waste time
				324	going through multiple passes for totally equal strings
				325	before proceeding to subsequent passes. */
				326	if (pass == 0 && STRCMP (s1, s2) == 0)
				327	return result;
				328	else
				329	break;
				330	}
				331
				332	/* This means one string is shorter than the other. Find out
				333	which one and return an appropriate value. */
				334	return seq1.len == 0 ? -1 : 1;
				335	}
				336
				337	result = do_compare (&seq1, &seq2, position, weights);
				338	if (result != 0)
				339	return result;
				340	}
				341
				342	rule = seq1.rule;
				343	}
				344
				345	return result;
				346	}
				347	libc_hidden_def (STRCOLL)
				348
				349	#ifndef WIDE_CHAR_VERSION
				350	weak_alias (__strcoll_l, strcoll_l)
				351	#endif