Blame - ap/build/uClibc/libc/misc/regex/regcomp.c - T106_DC

blob: b25850ccd68fb27c82ad9063201e01ee2c625d08 [file] [log] [blame]

lh	9ed821d	2023-04-07 01:36:19 -0700	[diff] [blame]	1	/* Extended regular expression matching and search library.
				2	Copyright (C) 2002,2003,2004,2005,2006 Free Software Foundation, Inc.
				3	This file is part of the GNU C Library.
				4	Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>.
				5
				6	The GNU C Library is free software; you can redistribute it and/or
				7	modify it under the terms of the GNU Lesser General Public
				8	License as published by the Free Software Foundation; either
				9	version 2.1 of the License, or (at your option) any later version.
				10
				11	The GNU C Library is distributed in the hope that it will be useful,
				12	but WITHOUT ANY WARRANTY; without even the implied warranty of
				13	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
				14	Lesser General Public License for more details.
				15
				16	You should have received a copy of the GNU Lesser General Public
				17	License along with the GNU C Library; if not, write to the Free
				18	Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
				19	02111-1307 USA. */
				20
				21	static reg_errcode_t re_compile_internal (regex_t preg, const char pattern,
				22	size_t length, reg_syntax_t syntax);
				23	static void re_compile_fastmap_iter (regex_t *bufp,
				24	const re_dfastate_t *init_state,
				25	char *fastmap);
				26	static reg_errcode_t init_dfa (re_dfa_t *dfa, size_t pat_len);
				27	#ifdef RE_ENABLE_I18N
				28	static void free_charset (re_charset_t *cset);
				29	#endif
				30	static void free_workarea_compile (regex_t *preg);
				31	static reg_errcode_t create_initial_state (re_dfa_t *dfa);
				32	#ifdef RE_ENABLE_I18N
				33	static void optimize_utf8 (re_dfa_t *dfa);
				34	#endif
				35	static reg_errcode_t analyze (regex_t *preg);
				36	static reg_errcode_t preorder (bin_tree_t *root,
				37	reg_errcode_t (fn (void , bin_tree_t )),
				38	void *extra);
				39	static reg_errcode_t postorder (bin_tree_t *root,
				40	reg_errcode_t (fn (void , bin_tree_t )),
				41	void *extra);
				42	static reg_errcode_t optimize_subexps (void extra, bin_tree_t node);
				43	static reg_errcode_t lower_subexps (void extra, bin_tree_t node);
				44	static bin_tree_t lower_subexp (reg_errcode_t err, regex_t *preg,
				45	bin_tree_t *node);
				46	static reg_errcode_t calc_first (void extra, bin_tree_t node);
				47	static reg_errcode_t calc_next (void extra, bin_tree_t node);
				48	static reg_errcode_t link_nfa_nodes (void extra, bin_tree_t node);
				49	static int duplicate_node (re_dfa_t *dfa, int org_idx, unsigned int constraint);
				50	static int search_duplicated_node (const re_dfa_t *dfa, int org_node,
				51	unsigned int constraint);
				52	static reg_errcode_t calc_eclosure (re_dfa_t *dfa);
				53	static reg_errcode_t calc_eclosure_iter (re_node_set new_set, re_dfa_t dfa,
				54	int node, int root);
				55	static reg_errcode_t calc_inveclosure (re_dfa_t *dfa);
				56	static int fetch_number (re_string_t input, re_token_t token,
				57	reg_syntax_t syntax);
				58	static int peek_token (re_token_t token, re_string_t input,
				59	reg_syntax_t syntax) internal_function;
				60	static bin_tree_t parse (re_string_t regexp, regex_t *preg,
				61	reg_syntax_t syntax, reg_errcode_t *err);
				62	static bin_tree_t parse_reg_exp (re_string_t regexp, regex_t *preg,
				63	re_token_t *token, reg_syntax_t syntax,
				64	int nest, reg_errcode_t *err);
				65	static bin_tree_t parse_branch (re_string_t regexp, regex_t *preg,
				66	re_token_t *token, reg_syntax_t syntax,
				67	int nest, reg_errcode_t *err);
				68	static bin_tree_t parse_expression (re_string_t regexp, regex_t *preg,
				69	re_token_t *token, reg_syntax_t syntax,
				70	int nest, reg_errcode_t *err);
				71	static bin_tree_t parse_sub_exp (re_string_t regexp, regex_t *preg,
				72	re_token_t *token, reg_syntax_t syntax,
				73	int nest, reg_errcode_t *err);
				74	static bin_tree_t parse_dup_op (bin_tree_t dup_elem, re_string_t *regexp,
				75	re_dfa_t dfa, re_token_t token,
				76	reg_syntax_t syntax, reg_errcode_t *err);
				77	static bin_tree_t parse_bracket_exp (re_string_t regexp, re_dfa_t *dfa,
				78	re_token_t *token, reg_syntax_t syntax,
				79	reg_errcode_t *err);
				80	static reg_errcode_t parse_bracket_element (bracket_elem_t *elem,
				81	re_string_t *regexp,
				82	re_token_t *token, int token_len,
				83	re_dfa_t *dfa,
				84	reg_syntax_t syntax,
				85	int accept_hyphen);
				86	static reg_errcode_t parse_bracket_symbol (bracket_elem_t *elem,
				87	re_string_t *regexp,
				88	re_token_t *token);
				89	#ifdef RE_ENABLE_I18N
				90	static reg_errcode_t build_equiv_class (bitset_t sbcset,
				91	re_charset_t *mbcset,
				92	int *equiv_class_alloc,
				93	const unsigned char *name);
				94	static reg_errcode_t build_charclass (RE_TRANSLATE_TYPE trans,
				95	bitset_t sbcset,
				96	re_charset_t *mbcset,
				97	int *char_class_alloc,
				98	const unsigned char *class_name,
				99	reg_syntax_t syntax);
				100	#else /* not RE_ENABLE_I18N */
				101	static reg_errcode_t build_equiv_class (bitset_t sbcset,
				102	const unsigned char *name);
				103	static reg_errcode_t build_charclass (RE_TRANSLATE_TYPE trans,
				104	bitset_t sbcset,
				105	const unsigned char *class_name,
				106	reg_syntax_t syntax);
				107	#endif /* not RE_ENABLE_I18N */
				108	static bin_tree_t build_charclass_op (re_dfa_t dfa,
				109	RE_TRANSLATE_TYPE trans,
				110	const unsigned char *class_name,
				111	const unsigned char *extra,
				112	int non_match, reg_errcode_t *err);
				113	static bin_tree_t create_tree (re_dfa_t dfa,
				114	bin_tree_t left, bin_tree_t right,
				115	re_token_type_t type);
				116	static bin_tree_t create_token_tree (re_dfa_t dfa,
				117	bin_tree_t left, bin_tree_t right,
				118	const re_token_t *token);
				119	static bin_tree_t duplicate_tree (const bin_tree_t src, re_dfa_t *dfa);
				120	static void free_token (re_token_t *node);
				121	static reg_errcode_t free_tree (void extra, bin_tree_t node);
				122	static reg_errcode_t mark_opt_subexp (void extra, bin_tree_t node);
				123
				124	/* This table gives an error message for each of the error codes listed
				125	in regex.h. Obviously the order here has to be same as there.
				126	POSIX doesn't require that we do anything for REG_NOERROR,
				127	but why not be nice? */
				128
				129	static const char __re_error_msgid[] =
				130	{
				131	#define REG_NOERROR_IDX 0
				132	gettext_noop ("Success") /* REG_NOERROR */
				133	"\0"
				134	#define REG_NOMATCH_IDX (REG_NOERROR_IDX + sizeof "Success")
				135	gettext_noop ("No match") /* REG_NOMATCH */
				136	"\0"
				137	#define REG_BADPAT_IDX (REG_NOMATCH_IDX + sizeof "No match")
				138	gettext_noop ("Invalid regular expression") /* REG_BADPAT */
				139	"\0"
				140	#define REG_ECOLLATE_IDX (REG_BADPAT_IDX + sizeof "Invalid regular expression")
				141	gettext_noop ("Invalid collation character") /* REG_ECOLLATE */
				142	"\0"
				143	#define REG_ECTYPE_IDX (REG_ECOLLATE_IDX + sizeof "Invalid collation character")
				144	gettext_noop ("Invalid character class name") /* REG_ECTYPE */
				145	"\0"
				146	#define REG_EESCAPE_IDX (REG_ECTYPE_IDX + sizeof "Invalid character class name")
				147	gettext_noop ("Trailing backslash") /* REG_EESCAPE */
				148	"\0"
				149	#define REG_ESUBREG_IDX (REG_EESCAPE_IDX + sizeof "Trailing backslash")
				150	gettext_noop ("Invalid back reference") /* REG_ESUBREG */
				151	"\0"
				152	#define REG_EBRACK_IDX (REG_ESUBREG_IDX + sizeof "Invalid back reference")
				153	gettext_noop ("Unmatched [ or [^") /* REG_EBRACK */
				154	"\0"
				155	#define REG_EPAREN_IDX (REG_EBRACK_IDX + sizeof "Unmatched [ or [^")
				156	gettext_noop ("Unmatched ( or \\(") /* REG_EPAREN */
				157	"\0"
				158	#define REG_EBRACE_IDX (REG_EPAREN_IDX + sizeof "Unmatched ( or \\(")
				159	gettext_noop ("Unmatched \\{") /* REG_EBRACE */
				160	"\0"
				161	#define REG_BADBR_IDX (REG_EBRACE_IDX + sizeof "Unmatched \\{")
				162	gettext_noop ("Invalid content of \\{\\}") /* REG_BADBR */
				163	"\0"
				164	#define REG_ERANGE_IDX (REG_BADBR_IDX + sizeof "Invalid content of \\{\\}")
				165	gettext_noop ("Invalid range end") /* REG_ERANGE */
				166	"\0"
				167	#define REG_ESPACE_IDX (REG_ERANGE_IDX + sizeof "Invalid range end")
				168	gettext_noop ("Memory exhausted") /* REG_ESPACE */
				169	"\0"
				170	#define REG_BADRPT_IDX (REG_ESPACE_IDX + sizeof "Memory exhausted")
				171	gettext_noop ("Invalid preceding regular expression") /* REG_BADRPT */
				172	"\0"
				173	#define REG_EEND_IDX (REG_BADRPT_IDX + sizeof "Invalid preceding regular expression")
				174	gettext_noop ("Premature end of regular expression") /* REG_EEND */
				175	"\0"
				176	#define REG_ESIZE_IDX (REG_EEND_IDX + sizeof "Premature end of regular expression")
				177	gettext_noop ("Regular expression too big") /* REG_ESIZE */
				178	"\0"
				179	#define REG_ERPAREN_IDX (REG_ESIZE_IDX + sizeof "Regular expression too big")
				180	gettext_noop ("Unmatched ) or \\)") /* REG_ERPAREN */
				181	};
				182
				183	static const uint16_t __re_error_msgid_idx[] =
				184	{
				185	REG_NOERROR_IDX,
				186	REG_NOMATCH_IDX,
				187	REG_BADPAT_IDX,
				188	REG_ECOLLATE_IDX,
				189	REG_ECTYPE_IDX,
				190	REG_EESCAPE_IDX,
				191	REG_ESUBREG_IDX,
				192	REG_EBRACK_IDX,
				193	REG_EPAREN_IDX,
				194	REG_EBRACE_IDX,
				195	REG_BADBR_IDX,
				196	REG_ERANGE_IDX,
				197	REG_ESPACE_IDX,
				198	REG_BADRPT_IDX,
				199	REG_EEND_IDX,
				200	REG_ESIZE_IDX,
				201	REG_ERPAREN_IDX
				202	};
				203
				204	/* Entry points for GNU code. */
				205
				206	/* re_compile_pattern is the GNU regular expression compiler: it
				207	compiles PATTERN (of length LENGTH) and puts the result in BUFP.
				208	Returns 0 if the pattern was valid, otherwise an error string.
				209
				210	Assumes the `allocated' (and perhaps `buffer') and `translate' fields
				211	are set in BUFP on entry. */
				212
				213	const char *
				214	re_compile_pattern (const char *pattern,
				215	size_t length,
				216	struct re_pattern_buffer *bufp)
				217	{
				218	reg_errcode_t ret;
				219
				220	/* And GNU code determines whether or not to get register information
				221	by passing null for the REGS argument to re_match, etc., not by
				222	setting no_sub, unless RE_NO_SUB is set. */
				223	bufp->no_sub = !!(re_syntax_options & RE_NO_SUB);
				224
				225	/* Match anchors at newline. */
				226	bufp->newline_anchor = 1;
				227
				228	ret = re_compile_internal (bufp, pattern, length, re_syntax_options);
				229
				230	if (!ret)
				231	return NULL;
				232	return gettext (__re_error_msgid + __re_error_msgid_idx[(int) ret]);
				233	}
				234
				235	/* Set by `re_set_syntax' to the current regexp syntax to recognize. Can
				236	also be assigned to arbitrarily: each pattern buffer stores its own
				237	syntax, so it can be changed between regex compilations. */
				238	/* This has no initializer because initialized variables in Emacs
				239	become read-only after dumping. */
				240	reg_syntax_t re_syntax_options;
				241
				242
				243	/* Specify the precise syntax of regexps for compilation. This provides
				244	for compatibility for various utilities which historically have
				245	different, incompatible syntaxes.
				246
				247	The argument SYNTAX is a bit mask comprised of the various bits
				248	defined in regex.h. We return the old syntax. */
				249
				250	reg_syntax_t
				251	re_set_syntax (reg_syntax_t syntax)
				252	{
				253	reg_syntax_t ret = re_syntax_options;
				254
				255	re_syntax_options = syntax;
				256	return ret;
				257	}
				258
				259	int
				260	re_compile_fastmap (struct re_pattern_buffer *bufp)
				261	{
				262	re_dfa_t dfa = (re_dfa_t ) bufp->buffer;
				263	char *fastmap = bufp->fastmap;
				264
				265	memset (fastmap, '\0', sizeof (char) * SBC_MAX);
				266	re_compile_fastmap_iter (bufp, dfa->init_state, fastmap);
				267	if (dfa->init_state != dfa->init_state_word)
				268	re_compile_fastmap_iter (bufp, dfa->init_state_word, fastmap);
				269	if (dfa->init_state != dfa->init_state_nl)
				270	re_compile_fastmap_iter (bufp, dfa->init_state_nl, fastmap);
				271	if (dfa->init_state != dfa->init_state_begbuf)
				272	re_compile_fastmap_iter (bufp, dfa->init_state_begbuf, fastmap);
				273	bufp->fastmap_accurate = 1;
				274	return 0;
				275	}
				276	libc_hidden_def(re_compile_fastmap)
				277
				278	static __inline__ void
				279	__attribute ((always_inline))
				280	re_set_fastmap (char *fastmap, int icase, int ch)
				281	{
				282	fastmap[ch] = 1;
				283	if (icase)
				284	fastmap[tolower (ch)] = 1;
				285	}
				286
				287	/* Helper function for re_compile_fastmap.
				288	Compile fastmap for the initial_state INIT_STATE. */
				289
				290	static void
				291	re_compile_fastmap_iter (regex_t bufp, const re_dfastate_t init_state,
				292	char *fastmap)
				293	{
				294	re_dfa_t dfa = (re_dfa_t ) bufp->buffer;
				295	int node_cnt;
				296	int icase = (dfa->mb_cur_max == 1 && (bufp->syntax & RE_ICASE));
				297	for (node_cnt = 0; node_cnt < init_state->nodes.nelem; ++node_cnt)
				298	{
				299	int node = init_state->nodes.elems[node_cnt];
				300	re_token_type_t type = dfa->nodes[node].type;
				301
				302	if (type == CHARACTER)
				303	{
				304	re_set_fastmap (fastmap, icase, dfa->nodes[node].opr.c);
				305	#ifdef RE_ENABLE_I18N
				306	if ((bufp->syntax & RE_ICASE) && dfa->mb_cur_max > 1)
				307	{
				308	unsigned char buf = alloca (dfa->mb_cur_max), p;
				309	wchar_t wc;
				310	mbstate_t state;
				311
				312	p = buf;
				313	*p++ = dfa->nodes[node].opr.c;
				314	while (++node < dfa->nodes_len
				315	&& dfa->nodes[node].type == CHARACTER
				316	&& dfa->nodes[node].mb_partial)
				317	*p++ = dfa->nodes[node].opr.c;
				318	memset (&state, '\0', sizeof (state));
				319	if (mbrtowc (&wc, (const char *) buf, p - buf,
				320	&state) == p - buf
				321	&& (__wcrtomb ((char *) buf, towlower (wc), &state)
				322	!= (size_t) -1))
				323	re_set_fastmap (fastmap, 0, buf[0]);
				324	}
				325	#endif
				326	}
				327	else if (type == SIMPLE_BRACKET)
				328	{
				329	int i, ch;
				330	for (i = 0, ch = 0; i < BITSET_WORDS; ++i)
				331	{
				332	int j;
				333	bitset_word_t w = dfa->nodes[node].opr.sbcset[i];
				334	for (j = 0; j < BITSET_WORD_BITS; ++j, ++ch)
				335	if (w & ((bitset_word_t) 1 << j))
				336	re_set_fastmap (fastmap, icase, ch);
				337	}
				338	}
				339	#ifdef RE_ENABLE_I18N
				340	else if (type == COMPLEX_BRACKET)
				341	{
				342	int i;
				343	re_charset_t *cset = dfa->nodes[node].opr.mbcset;
				344	if (cset->non_match \|\| cset->ncoll_syms \|\| cset->nequiv_classes
				345	\|\| cset->nranges \|\| cset->nchar_classes)
				346	{
				347	# if 0
				348	if (_NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES) != 0)
				349	{
				350	/* In this case we want to catch the bytes which are
				351	the first byte of any collation elements.
				352	e.g. In da_DK, we want to catch 'a' since "aa"
				353	is a valid collation element, and don't catch
				354	'b' since 'b' is the only collation element
				355	which starts from 'b'. */
				356	const int32_t table = (const int32_t )
				357	_NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB);
				358	for (i = 0; i < SBC_MAX; ++i)
				359	if (table[i] < 0)
				360	re_set_fastmap (fastmap, icase, i);
				361	}
				362	# else
				363	if (dfa->mb_cur_max > 1)
				364	for (i = 0; i < SBC_MAX; ++i)
				365	if (__btowc (i) == WEOF)
				366	re_set_fastmap (fastmap, icase, i);
				367	# endif
				368	}
				369	for (i = 0; i < cset->nmbchars; ++i)
				370	{
				371	char buf[256];
				372	mbstate_t state;
				373	memset (&state, '\0', sizeof (state));
				374	if (__wcrtomb (buf, cset->mbchars[i], &state) != (size_t) -1)
				375	re_set_fastmap (fastmap, icase, (unsigned char ) buf);
				376	if ((bufp->syntax & RE_ICASE) && dfa->mb_cur_max > 1)
				377	{
				378	if (__wcrtomb (buf, towlower (cset->mbchars[i]), &state)
				379	!= (size_t) -1)
				380	re_set_fastmap (fastmap, 0, (unsigned char ) buf);
				381	}
				382	}
				383	}
				384	#endif /* RE_ENABLE_I18N */
				385	else if (type == OP_PERIOD
				386	#ifdef RE_ENABLE_I18N
				387	\|\| type == OP_UTF8_PERIOD
				388	#endif
				389	\|\| type == END_OF_RE)
				390	{
				391	memset (fastmap, '\1', sizeof (char) * SBC_MAX);
				392	if (type == END_OF_RE)
				393	bufp->can_be_null = 1;
				394	return;
				395	}
				396	}
				397	}
				398
				399	/* Entry point for POSIX code. */
				400	/* regcomp takes a regular expression as a string and compiles it.
				401
				402	PREG is a regex_t *. We do not expect any fields to be initialized,
				403	since POSIX says we shouldn't. Thus, we set
				404
				405	`buffer' to the compiled pattern;
				406	`used' to the length of the compiled pattern;
				407	`syntax' to RE_SYNTAX_POSIX_EXTENDED if the
				408	REG_EXTENDED bit in CFLAGS is set; otherwise, to
				409	RE_SYNTAX_POSIX_BASIC;
				410	`newline_anchor' to REG_NEWLINE being set in CFLAGS;
				411	`fastmap' to an allocated space for the fastmap;
				412	`fastmap_accurate' to zero;
				413	`re_nsub' to the number of subexpressions in PATTERN.
				414
				415	PATTERN is the address of the pattern string.
				416
				417	CFLAGS is a series of bits which affect compilation.
				418
				419	If REG_EXTENDED is set, we use POSIX extended syntax; otherwise, we
				420	use POSIX basic syntax.
				421
				422	If REG_NEWLINE is set, then . and [^...] don't match newline.
				423	Also, regexec will try a match beginning after every newline.
				424
				425	If REG_ICASE is set, then we considers upper- and lowercase
				426	versions of letters to be equivalent when matching.
				427
				428	If REG_NOSUB is set, then when PREG is passed to regexec, that
				429	routine will report only success or failure, and nothing about the
				430	registers.
				431
				432	It returns 0 if it succeeds, nonzero if it doesn't. (See regex.h for
				433	the return codes and their meanings.) */
				434
				435	int
				436	regcomp (regex_t *__restrict preg,
				437	const char *__restrict pattern,
				438	int cflags)
				439	{
				440	reg_errcode_t ret;
				441	reg_syntax_t syntax = ((cflags & REG_EXTENDED) ? RE_SYNTAX_POSIX_EXTENDED
				442	: RE_SYNTAX_POSIX_BASIC);
				443
				444	preg->buffer = NULL;
				445	preg->allocated = 0;
				446	preg->used = 0;
				447
				448	/* Try to allocate space for the fastmap. */
				449	preg->fastmap = re_malloc (char, SBC_MAX);
				450	if (BE (preg->fastmap == NULL, 0))
				451	return REG_ESPACE;
				452
				453	syntax \|= (cflags & REG_ICASE) ? RE_ICASE : 0;
				454
				455	/* If REG_NEWLINE is set, newlines are treated differently. */
				456	if (cflags & REG_NEWLINE)
				457	{ /* REG_NEWLINE implies neither . nor [^...] match newline. */
				458	syntax &= ~RE_DOT_NEWLINE;
				459	syntax \|= RE_HAT_LISTS_NOT_NEWLINE;
				460	/* It also changes the matching behavior. */
				461	preg->newline_anchor = 1;
				462	}
				463	else
				464	preg->newline_anchor = 0;
				465	preg->no_sub = !!(cflags & REG_NOSUB);
				466	preg->translate = NULL;
				467
				468	ret = re_compile_internal (preg, pattern, strlen (pattern), syntax);
				469
				470	/* POSIX doesn't distinguish between an unmatched open-group and an
				471	unmatched close-group: both are REG_EPAREN. */
				472	if (ret == REG_ERPAREN)
				473	ret = REG_EPAREN;
				474
				475	/* We have already checked preg->fastmap != NULL. */
				476	if (BE (ret == REG_NOERROR, 1))
				477	/* Compute the fastmap now, since regexec cannot modify the pattern
				478	buffer. This function never fails in this implementation. */
				479	(void) re_compile_fastmap (preg);
				480	else
				481	{
				482	/* Some error occurred while compiling the expression. */
				483	re_free (preg->fastmap);
				484	preg->fastmap = NULL;
				485	}
				486
				487	return (int) ret;
				488	}
				489
				490	/* Returns a message corresponding to an error code, ERRCODE, returned
				491	from either regcomp or regexec. We don't use PREG here. */
				492
				493	size_t
				494	regerror (int errcode,
				495	const regex_t *__restrict preg,
				496	char *__restrict errbuf,
				497	size_t errbuf_size)
				498	{
				499	const char *msg;
				500	size_t msg_size;
				501
				502	if (BE (errcode < 0
				503	\|\| errcode >= (int) (sizeof (__re_error_msgid_idx)
				504	/ sizeof (__re_error_msgid_idx[0])), 0))
				505	/* Only error codes returned by the rest of the code should be passed
				506	to this routine. If we are given anything else, or if other regex
				507	code generates an invalid error code, then the program has a bug.
				508	Dump core so we can fix it. */
				509	abort ();
				510
				511	msg = gettext (__re_error_msgid + __re_error_msgid_idx[errcode]);
				512
				513	msg_size = strlen (msg) + 1; /* Includes the null. */
				514
				515	if (BE (errbuf_size != 0, 1))
				516	{
				517	if (BE (msg_size > errbuf_size, 0))
				518	{
				519	memcpy (errbuf, msg, errbuf_size - 1);
				520	errbuf[errbuf_size - 1] = 0;
				521	}
				522	else
				523	memcpy (errbuf, msg, msg_size);
				524	}
				525
				526	return msg_size;
				527	}
				528
				529
				530	#ifdef RE_ENABLE_I18N
				531	/* This static array is used for the map to single-byte characters when
				532	UTF-8 is used. Otherwise we would allocate memory just to initialize
				533	it the same all the time. UTF-8 is the preferred encoding so this is
				534	a worthwhile optimization. */
				535	static const bitset_t utf8_sb_map =
				536	{
				537	/* Set the first 128 bits. */
				538	[0 ... 0x80 / BITSET_WORD_BITS - 1] = BITSET_WORD_MAX
				539	};
				540	#endif
				541
				542
				543	static void
				544	free_dfa_content (re_dfa_t *dfa)
				545	{
				546	int i, j;
				547
				548	if (dfa->nodes)
				549	for (i = 0; i < dfa->nodes_len; ++i)
				550	free_token (dfa->nodes + i);
				551	re_free (dfa->nexts);
				552	for (i = 0; i < dfa->nodes_len; ++i)
				553	{
				554	if (dfa->eclosures != NULL)
				555	re_node_set_free (dfa->eclosures + i);
				556	if (dfa->inveclosures != NULL)
				557	re_node_set_free (dfa->inveclosures + i);
				558	if (dfa->edests != NULL)
				559	re_node_set_free (dfa->edests + i);
				560	}
				561	re_free (dfa->edests);
				562	re_free (dfa->eclosures);
				563	re_free (dfa->inveclosures);
				564	re_free (dfa->nodes);
				565
				566	if (dfa->state_table)
				567	for (i = 0; i <= dfa->state_hash_mask; ++i)
				568	{
				569	struct re_state_table_entry *entry = dfa->state_table + i;
				570	for (j = 0; j < entry->num; ++j)
				571	{
				572	re_dfastate_t *state = entry->array[j];
				573	free_state (state);
				574	}
				575	re_free (entry->array);
				576	}
				577	re_free (dfa->state_table);
				578	#ifdef RE_ENABLE_I18N
				579	if (dfa->sb_char != utf8_sb_map)
				580	re_free (dfa->sb_char);
				581	#endif
				582	re_free (dfa->subexp_map);
				583	#ifdef DEBUG
				584	re_free (dfa->re_str);
				585	#endif
				586
				587	re_free (dfa);
				588	}
				589
				590
				591	/* Free dynamically allocated space used by PREG. */
				592
				593	void
				594	regfree (regex_t *preg)
				595	{
				596	re_dfa_t dfa = (re_dfa_t ) preg->buffer;
				597	if (BE (dfa != NULL, 1))
				598	free_dfa_content (dfa);
				599	preg->buffer = NULL;
				600	preg->allocated = 0;
				601
				602	re_free (preg->fastmap);
				603	preg->fastmap = NULL;
				604
				605	re_free (preg->translate);
				606	preg->translate = NULL;
				607	}
				608	libc_hidden_def(regfree)
				609
				610	/* Entry points compatible with 4.2 BSD regex library. We don't define
				611	them unless specifically requested. */
				612
				613	#if defined _REGEX_RE_COMP \|\| defined __UCLIBC__
				614
				615	/* BSD has one and only one pattern buffer. */
				616	static struct re_pattern_buffer *re_comp_buf;
				617
				618	char *
				619	/* Make BCD definitions weak in libc, so POSIX programs can redefine
				620	these names if they don't use our functions, and still use
				621	regcomp/regexec above without link errors. */
				622	weak_function
				623	re_comp (const char *s)
				624	{
				625	reg_errcode_t ret;
				626
				627	/* "If re_comp() is passed NULL or a null string, it returns
				628	* without changing the currently compiled regular expression." */
				629	if (!s \|\| !s[0])
				630	{
				631	if (!re_comp_buf)
				632	return gettext ("No previous regular expression");
				633	return NULL;
				634	}
				635
				636	if (!re_comp_buf)
				637	{
				638	re_comp_buf = calloc (1, sizeof(*re_comp_buf));
				639	if (!re_comp_buf)
				640	{
				641	ret = REG_ESPACE;
				642	goto err;
				643	}
				644	}
				645
				646	if (re_comp_buf->buffer)
				647	{
				648	regfree (re_comp_buf);
				649	memset (re_comp_buf, '\0', sizeof(*re_comp_buf));
				650	}
				651
				652	if (re_comp_buf->fastmap == NULL)
				653	{
				654	re_comp_buf->fastmap = malloc (SBC_MAX);
				655	if (re_comp_buf->fastmap == NULL)
				656	{
				657	ret = REG_ESPACE;
				658	goto err;
				659	}
				660	}
				661
				662	/* Since `re_exec' always passes NULL for the `regs' argument, we
				663	don't need to initialize the pattern buffer fields which affect it. */
				664
				665	/* Match anchors at newlines. */
				666	re_comp_buf->newline_anchor = 1;
				667
				668	ret = re_compile_internal (re_comp_buf, s, strlen (s), re_syntax_options);
				669
				670	if (!ret)
				671	return NULL;
				672	free (re_comp_buf);
				673	re_comp_buf = NULL;
				674
				675	err:
				676	/* Yes, we're discarding `const' here if !HAVE_LIBINTL. */
				677	return (char *) gettext (__re_error_msgid + __re_error_msgid_idx[(int) ret]);
				678	}
				679
				680	#if 0
				681	libc_freeres_fn (free_mem)
				682	{
				683	regfree (re_comp_buf);
				684	free (re_comp_buf);
				685	re_comp_buf = NULL;
				686	}
				687	#endif
				688
				689	#endif /* _REGEX_RE_COMP */
				690
				691	/* Internal entry point.
				692	Compile the regular expression PATTERN, whose length is LENGTH.
				693	SYNTAX indicate regular expression's syntax. */
				694
				695	static reg_errcode_t
				696	re_compile_internal (regex_t preg, const char pattern, size_t length,
				697	reg_syntax_t syntax)
				698	{
				699	reg_errcode_t err = REG_NOERROR;
				700	re_dfa_t *dfa;
				701	re_string_t regexp;
				702
				703	/* Initialize the pattern buffer. */
				704	preg->fastmap_accurate = 0;
				705	preg->syntax = syntax;
				706	preg->not_bol = preg->not_eol = 0;
				707	preg->used = 0;
				708	preg->re_nsub = 0;
				709	preg->can_be_null = 0;
				710	preg->regs_allocated = REGS_UNALLOCATED;
				711
				712	/* Initialize the dfa. */
				713	dfa = (re_dfa_t *) preg->buffer;
				714	if (BE (preg->allocated < sizeof (re_dfa_t), 0))
				715	{
				716	/* If zero allocated, but buffer is non-null, try to realloc
				717	enough space. This loses if buffer's address is bogus, but
				718	that is the user's responsibility. If ->buffer is NULL this
				719	is a simple allocation. */
				720	dfa = re_realloc (preg->buffer, re_dfa_t, 1);
				721	if (dfa == NULL)
				722	return REG_ESPACE;
				723	preg->allocated = sizeof (re_dfa_t);
				724	preg->buffer = (unsigned char *) dfa;
				725	}
				726	preg->used = sizeof (re_dfa_t);
				727
				728	err = init_dfa (dfa, length);
				729	if (BE (err != REG_NOERROR, 0))
				730	{
				731	free_dfa_content (dfa);
				732	preg->buffer = NULL;
				733	preg->allocated = 0;
				734	return err;
				735	}
				736	#ifdef DEBUG
				737	/* Note: length+1 will not overflow since it is checked in init_dfa. */
				738	dfa->re_str = re_malloc (char, length + 1);
				739	strncpy (dfa->re_str, pattern, length + 1);
				740	#endif
				741
				742	__libc_lock_init (dfa->lock);
				743
				744	err = re_string_construct (&regexp, pattern, length, preg->translate,
				745	syntax & RE_ICASE, dfa);
				746	if (BE (err != REG_NOERROR, 0))
				747	{
				748	re_compile_internal_free_return:
				749	free_workarea_compile (preg);
				750	re_string_destruct (&regexp);
				751	free_dfa_content (dfa);
				752	preg->buffer = NULL;
				753	preg->allocated = 0;
				754	return err;
				755	}
				756
				757	/* Parse the regular expression, and build a structure tree. */
				758	preg->re_nsub = 0;
				759	dfa->str_tree = parse (&regexp, preg, syntax, &err);
				760	if (BE (dfa->str_tree == NULL, 0))
				761	goto re_compile_internal_free_return;
				762
				763	/* Analyze the tree and create the nfa. */
				764	err = analyze (preg);
				765	if (BE (err != REG_NOERROR, 0))
				766	goto re_compile_internal_free_return;
				767
				768	#ifdef RE_ENABLE_I18N
				769	/* If possible, do searching in single byte encoding to speed things up. */
				770	if (dfa->is_utf8 && !(syntax & RE_ICASE) && preg->translate == NULL)
				771	optimize_utf8 (dfa);
				772	#endif
				773
				774	/* Then create the initial state of the dfa. */
				775	err = create_initial_state (dfa);
				776
				777	/* Release work areas. */
				778	free_workarea_compile (preg);
				779	re_string_destruct (&regexp);
				780
				781	if (BE (err != REG_NOERROR, 0))
				782	{
				783	free_dfa_content (dfa);
				784	preg->buffer = NULL;
				785	preg->allocated = 0;
				786	}
				787
				788	return err;
				789	}
				790
				791	/* Initialize DFA. We use the length of the regular expression PAT_LEN
				792	as the initial length of some arrays. */
				793
				794	static reg_errcode_t
				795	init_dfa (re_dfa_t *dfa, size_t pat_len)
				796	{
				797	unsigned int table_size;
				798	#if 1
				799	char *codeset_name;
				800	#endif
				801
				802	memset (dfa, '\0', sizeof (re_dfa_t));
				803
				804	/* Force allocation of str_tree_storage the first time. */
				805	dfa->str_tree_storage_idx = BIN_TREE_STORAGE_SIZE;
				806
				807	/* Avoid overflows. */
				808	if (pat_len == SIZE_MAX)
				809	return REG_ESPACE;
				810
				811	dfa->nodes_alloc = pat_len + 1;
				812	dfa->nodes = re_malloc (re_token_t, dfa->nodes_alloc);
				813
				814	/* table_size = 2 ^ ceil(log pat_len) */
				815	for (table_size = 1; ; table_size <<= 1)
				816	if (table_size > pat_len)
				817	break;
				818
				819	dfa->state_table = calloc (sizeof (struct re_state_table_entry), table_size);
				820	dfa->state_hash_mask = table_size - 1;
				821
				822	dfa->mb_cur_max = MB_CUR_MAX;
				823	#if 0
				824	if (dfa->mb_cur_max == 6
				825	&& strcmp (_NL_CURRENT (LC_CTYPE, _NL_CTYPE_CODESET_NAME), "UTF-8") == 0)
				826	dfa->is_utf8 = 1;
				827	dfa->map_notascii = (_NL_CURRENT_WORD (LC_CTYPE, _NL_CTYPE_MAP_TO_NONASCII)
				828	!= 0);
				829	#else
				830	# ifdef HAVE_LANGINFO_CODESET
				831	codeset_name = nl_langinfo (CODESET);
				832	# else
				833	codeset_name = getenv ("LC_ALL");
				834	if (codeset_name == NULL \|\| codeset_name[0] == '\0')
				835	codeset_name = getenv ("LC_CTYPE");
				836	if (codeset_name == NULL \|\| codeset_name[0] == '\0')
				837	codeset_name = getenv ("LANG");
				838	if (codeset_name == NULL)
				839	codeset_name = "";
				840	else if (strchr (codeset_name, '.') != NULL)
				841	codeset_name = strchr (codeset_name, '.') + 1;
				842	# endif
				843
				844	if (strcasecmp (codeset_name, "UTF-8") == 0
				845	\|\| strcasecmp (codeset_name, "UTF8") == 0)
				846	dfa->is_utf8 = 1;
				847
				848	/* We check exhaustively in the loop below if this charset is a
				849	superset of ASCII. */
				850	dfa->map_notascii = 0;
				851	#endif
				852
				853	#ifdef RE_ENABLE_I18N
				854	if (dfa->mb_cur_max > 1)
				855	{
				856	if (dfa->is_utf8)
				857	dfa->sb_char = (re_bitset_ptr_t) utf8_sb_map;
				858	else
				859	{
				860	int i, j, ch;
				861
				862	dfa->sb_char = calloc (sizeof (bitset_t), 1);
				863	if (BE (dfa->sb_char == NULL, 0))
				864	return REG_ESPACE;
				865
				866	/* Set the bits corresponding to single byte chars. */
				867	for (i = 0, ch = 0; i < BITSET_WORDS; ++i)
				868	for (j = 0; j < BITSET_WORD_BITS; ++j, ++ch)
				869	{
				870	wint_t wch = __btowc (ch);
				871	if (wch != WEOF)
				872	dfa->sb_char[i] \|= (bitset_word_t) 1 << j;
				873	# if 1
				874	if (isascii (ch) && wch != ch)
				875	dfa->map_notascii = 1;
				876	# endif
				877	}
				878	}
				879	}
				880	#endif
				881
				882	if (BE (dfa->nodes == NULL \|\| dfa->state_table == NULL, 0))
				883	return REG_ESPACE;
				884	return REG_NOERROR;
				885	}
				886
				887	/* Initialize WORD_CHAR table, which indicate which character is
				888	"word". In this case "word" means that it is the word construction
				889	character used by some operators like "\<", "\>", etc. */
				890
				891	static void
				892	internal_function
				893	init_word_char (re_dfa_t *dfa)
				894	{
				895	int i, j, ch;
				896	dfa->word_ops_used = 1;
				897	for (i = 0, ch = 0; i < BITSET_WORDS; ++i)
				898	for (j = 0; j < BITSET_WORD_BITS; ++j, ++ch)
				899	if (isalnum (ch) \|\| ch == '_')
				900	dfa->word_char[i] \|= (bitset_word_t) 1 << j;
				901	}
				902
				903	/* Free the work area which are only used while compiling. */
				904
				905	static void
				906	free_workarea_compile (regex_t *preg)
				907	{
				908	re_dfa_t dfa = (re_dfa_t ) preg->buffer;
				909	bin_tree_storage_t storage, next;
				910	for (storage = dfa->str_tree_storage; storage; storage = next)
				911	{
				912	next = storage->next;
				913	re_free (storage);
				914	}
				915	dfa->str_tree_storage = NULL;
				916	dfa->str_tree_storage_idx = BIN_TREE_STORAGE_SIZE;
				917	dfa->str_tree = NULL;
				918	re_free (dfa->org_indices);
				919	dfa->org_indices = NULL;
				920	}
				921
				922	/* Create initial states for all contexts. */
				923
				924	static reg_errcode_t
				925	create_initial_state (re_dfa_t *dfa)
				926	{
				927	int first, i;
				928	reg_errcode_t err;
				929	re_node_set init_nodes;
				930
				931	/* Initial states have the epsilon closure of the node which is
				932	the first node of the regular expression. */
				933	first = dfa->str_tree->first->node_idx;
				934	dfa->init_node = first;
				935	err = re_node_set_init_copy (&init_nodes, dfa->eclosures + first);
				936	if (BE (err != REG_NOERROR, 0))
				937	return err;
				938
				939	/* The back-references which are in initial states can epsilon transit,
				940	since in this case all of the subexpressions can be null.
				941	Then we add epsilon closures of the nodes which are the next nodes of
				942	the back-references. */
				943	if (dfa->nbackref > 0)
				944	for (i = 0; i < init_nodes.nelem; ++i)
				945	{
				946	int node_idx = init_nodes.elems[i];
				947	re_token_type_t type = dfa->nodes[node_idx].type;
				948
				949	int clexp_idx;
				950	if (type != OP_BACK_REF)
				951	continue;
				952	for (clexp_idx = 0; clexp_idx < init_nodes.nelem; ++clexp_idx)
				953	{
				954	re_token_t *clexp_node;
				955	clexp_node = dfa->nodes + init_nodes.elems[clexp_idx];
				956	if (clexp_node->type == OP_CLOSE_SUBEXP
				957	&& clexp_node->opr.idx == dfa->nodes[node_idx].opr.idx)
				958	break;
				959	}
				960	if (clexp_idx == init_nodes.nelem)
				961	continue;
				962
				963	if (type == OP_BACK_REF)
				964	{
				965	int dest_idx = dfa->edests[node_idx].elems[0];
				966	if (!re_node_set_contains (&init_nodes, dest_idx))
				967	{
				968	re_node_set_merge (&init_nodes, dfa->eclosures + dest_idx);
				969	i = 0;
				970	}
				971	}
				972	}
				973
				974	/* It must be the first time to invoke acquire_state. */
				975	dfa->init_state = re_acquire_state_context (&err, dfa, &init_nodes, 0);
				976	/* We don't check ERR here, since the initial state must not be NULL. */
				977	if (BE (dfa->init_state == NULL, 0))
				978	return err;
				979	if (dfa->init_state->has_constraint)
				980	{
				981	dfa->init_state_word = re_acquire_state_context (&err, dfa, &init_nodes,
				982	CONTEXT_WORD);
				983	dfa->init_state_nl = re_acquire_state_context (&err, dfa, &init_nodes,
				984	CONTEXT_NEWLINE);
				985	dfa->init_state_begbuf = re_acquire_state_context (&err, dfa,
				986	&init_nodes,
				987	CONTEXT_NEWLINE
				988	\| CONTEXT_BEGBUF);
				989	if (BE (dfa->init_state_word == NULL \|\| dfa->init_state_nl == NULL
				990	\|\| dfa->init_state_begbuf == NULL, 0))
				991	return err;
				992	}
				993	else
				994	dfa->init_state_word = dfa->init_state_nl
				995	= dfa->init_state_begbuf = dfa->init_state;
				996
				997	re_node_set_free (&init_nodes);
				998	return REG_NOERROR;
				999	}
				1000
				1001	#ifdef RE_ENABLE_I18N
				1002	/* If it is possible to do searching in single byte encoding instead of UTF-8
				1003	to speed things up, set dfa->mb_cur_max to 1, clear is_utf8 and change
				1004	DFA nodes where needed. */
				1005
				1006	static void
				1007	optimize_utf8 (re_dfa_t *dfa)
				1008	{
				1009	int node, i, mb_chars = 0, has_period = 0;
				1010
				1011	for (node = 0; node < dfa->nodes_len; ++node)
				1012	switch (dfa->nodes[node].type)
				1013	{
				1014	case CHARACTER:
				1015	if (dfa->nodes[node].opr.c >= 0x80)
				1016	mb_chars = 1;
				1017	break;
				1018	case ANCHOR:
				1019	switch (dfa->nodes[node].opr.idx)
				1020	{
				1021	case LINE_FIRST:
				1022	case LINE_LAST:
				1023	case BUF_FIRST:
				1024	case BUF_LAST:
				1025	break;
				1026	default:
				1027	/* Word anchors etc. cannot be handled. */
				1028	return;
				1029	}
				1030	break;
				1031	case OP_PERIOD:
				1032	has_period = 1;
				1033	break;
				1034	case OP_BACK_REF:
				1035	case OP_ALT:
				1036	case END_OF_RE:
				1037	case OP_DUP_ASTERISK:
				1038	case OP_OPEN_SUBEXP:
				1039	case OP_CLOSE_SUBEXP:
				1040	break;
				1041	case COMPLEX_BRACKET:
				1042	return;
				1043	case SIMPLE_BRACKET:
				1044	/* Just double check. The non-ASCII range starts at 0x80. */
				1045	assert (0x80 % BITSET_WORD_BITS == 0);
				1046	for (i = 0x80 / BITSET_WORD_BITS; i < BITSET_WORDS; ++i)
				1047	if (dfa->nodes[node].opr.sbcset[i])
				1048	return;
				1049	break;
				1050	default:
				1051	abort ();
				1052	}
				1053
				1054	if (mb_chars \|\| has_period)
				1055	for (node = 0; node < dfa->nodes_len; ++node)
				1056	{
				1057	if (dfa->nodes[node].type == CHARACTER
				1058	&& dfa->nodes[node].opr.c >= 0x80)
				1059	dfa->nodes[node].mb_partial = 0;
				1060	else if (dfa->nodes[node].type == OP_PERIOD)
				1061	dfa->nodes[node].type = OP_UTF8_PERIOD;
				1062	}
				1063
				1064	/* The search can be in single byte locale. */
				1065	dfa->mb_cur_max = 1;
				1066	dfa->is_utf8 = 0;
				1067	dfa->has_mb_node = dfa->nbackref > 0 \|\| has_period;
				1068	}
				1069	#endif
				1070
				1071	/* Analyze the structure tree, and calculate "first", "next", "edest",
				1072	"eclosure", and "inveclosure". */
				1073
				1074	static reg_errcode_t
				1075	analyze (regex_t *preg)
				1076	{
				1077	re_dfa_t dfa = (re_dfa_t ) preg->buffer;
				1078	reg_errcode_t ret;
				1079
				1080	/* Allocate arrays. */
				1081	dfa->nexts = re_malloc (int, dfa->nodes_alloc);
				1082	dfa->org_indices = re_malloc (int, dfa->nodes_alloc);
				1083	dfa->edests = re_malloc (re_node_set, dfa->nodes_alloc);
				1084	dfa->eclosures = re_malloc (re_node_set, dfa->nodes_alloc);
				1085	if (BE (dfa->nexts == NULL \|\| dfa->org_indices == NULL \|\| dfa->edests == NULL
				1086	\|\| dfa->eclosures == NULL, 0))
				1087	return REG_ESPACE;
				1088
				1089	dfa->subexp_map = re_malloc (int, preg->re_nsub);
				1090	if (dfa->subexp_map != NULL)
				1091	{
				1092	int i;
				1093	for (i = 0; i < preg->re_nsub; i++)
				1094	dfa->subexp_map[i] = i;
				1095	preorder (dfa->str_tree, optimize_subexps, dfa);
				1096	for (i = 0; i < preg->re_nsub; i++)
				1097	if (dfa->subexp_map[i] != i)
				1098	break;
				1099	if (i == preg->re_nsub)
				1100	{
				1101	free (dfa->subexp_map);
				1102	dfa->subexp_map = NULL;
				1103	}
				1104	}
				1105
				1106	ret = postorder (dfa->str_tree, lower_subexps, preg);
				1107	if (BE (ret != REG_NOERROR, 0))
				1108	return ret;
				1109	ret = postorder (dfa->str_tree, calc_first, dfa);
				1110	if (BE (ret != REG_NOERROR, 0))
				1111	return ret;
				1112	preorder (dfa->str_tree, calc_next, dfa);
				1113	ret = preorder (dfa->str_tree, link_nfa_nodes, dfa);
				1114	if (BE (ret != REG_NOERROR, 0))
				1115	return ret;
				1116	ret = calc_eclosure (dfa);
				1117	if (BE (ret != REG_NOERROR, 0))
				1118	return ret;
				1119
				1120	/* We only need this during the prune_impossible_nodes pass in regexec.c;
				1121	skip it if p_i_n will not run, as calc_inveclosure can be quadratic. */
				1122	if ((!preg->no_sub && preg->re_nsub > 0 && dfa->has_plural_match)
				1123	\|\| dfa->nbackref)
				1124	{
				1125	dfa->inveclosures = re_malloc (re_node_set, dfa->nodes_len);
				1126	if (BE (dfa->inveclosures == NULL, 0))
				1127	return REG_ESPACE;
				1128	ret = calc_inveclosure (dfa);
				1129	}
				1130
				1131	return ret;
				1132	}
				1133
				1134	/* Our parse trees are very unbalanced, so we cannot use a stack to
				1135	implement parse tree visits. Instead, we use parent pointers and
				1136	some hairy code in these two functions. */
				1137	static reg_errcode_t
				1138	postorder (bin_tree_t root, reg_errcode_t (fn (void , bin_tree_t *)),
				1139	void *extra)
				1140	{
				1141	bin_tree_t node, prev;
				1142
				1143	for (node = root; ; )
				1144	{
				1145	/* Descend down the tree, preferably to the left (or to the right
				1146	if that's the only child). */
				1147	while (node->left \|\| node->right)
				1148	if (node->left)
				1149	node = node->left;
				1150	else
				1151	node = node->right;
				1152
				1153	do
				1154	{
				1155	reg_errcode_t err = fn (extra, node);
				1156	if (BE (err != REG_NOERROR, 0))
				1157	return err;
				1158	if (node->parent == NULL)
				1159	return REG_NOERROR;
				1160	prev = node;
				1161	node = node->parent;
				1162	}
				1163	/* Go up while we have a node that is reached from the right. */
				1164	while (node->right == prev \|\| node->right == NULL);
				1165	node = node->right;
				1166	}
				1167	}
				1168
				1169	static reg_errcode_t
				1170	preorder (bin_tree_t root, reg_errcode_t (fn (void , bin_tree_t *)),
				1171	void *extra)
				1172	{
				1173	bin_tree_t *node;
				1174
				1175	for (node = root; ; )
				1176	{
				1177	reg_errcode_t err = fn (extra, node);
				1178	if (BE (err != REG_NOERROR, 0))
				1179	return err;
				1180
				1181	/* Go to the left node, or up and to the right. */
				1182	if (node->left)
				1183	node = node->left;
				1184	else
				1185	{
				1186	bin_tree_t *prev = NULL;
				1187	while (node->right == prev \|\| node->right == NULL)
				1188	{
				1189	prev = node;
				1190	node = node->parent;
				1191	if (!node)
				1192	return REG_NOERROR;
				1193	}
				1194	node = node->right;
				1195	}
				1196	}
				1197	}
				1198
				1199	/* Optimization pass: if a SUBEXP is entirely contained, strip it and tell
				1200	re_search_internal to map the inner one's opr.idx to this one's. Adjust
				1201	backreferences as well. Requires a preorder visit. */
				1202	static reg_errcode_t
				1203	optimize_subexps (void extra, bin_tree_t node)
				1204	{
				1205	re_dfa_t dfa = (re_dfa_t ) extra;
				1206
				1207	if (node->token.type == OP_BACK_REF && dfa->subexp_map)
				1208	{
				1209	int idx = node->token.opr.idx;
				1210	node->token.opr.idx = dfa->subexp_map[idx];
				1211	dfa->used_bkref_map \|= 1 << node->token.opr.idx;
				1212	}
				1213
				1214	else if (node->token.type == SUBEXP
				1215	&& node->left && node->left->token.type == SUBEXP)
				1216	{
				1217	int other_idx = node->left->token.opr.idx;
				1218
				1219	node->left = node->left->left;
				1220	if (node->left)
				1221	node->left->parent = node;
				1222
				1223	dfa->subexp_map[other_idx] = dfa->subexp_map[node->token.opr.idx];
				1224	if (other_idx < BITSET_WORD_BITS)
				1225	dfa->used_bkref_map &= ~((bitset_word_t) 1 << other_idx);
				1226	}
				1227
				1228	return REG_NOERROR;
				1229	}
				1230
				1231	/* Lowering pass: Turn each SUBEXP node into the appropriate concatenation
				1232	of OP_OPEN_SUBEXP, the body of the SUBEXP (if any) and OP_CLOSE_SUBEXP. */
				1233	static reg_errcode_t
				1234	lower_subexps (void extra, bin_tree_t node)
				1235	{
				1236	regex_t preg = (regex_t ) extra;
				1237	reg_errcode_t err = REG_NOERROR;
				1238
				1239	if (node->left && node->left->token.type == SUBEXP)
				1240	{
				1241	node->left = lower_subexp (&err, preg, node->left);
				1242	if (node->left)
				1243	node->left->parent = node;
				1244	}
				1245	if (node->right && node->right->token.type == SUBEXP)
				1246	{
				1247	node->right = lower_subexp (&err, preg, node->right);
				1248	if (node->right)
				1249	node->right->parent = node;
				1250	}
				1251
				1252	return err;
				1253	}
				1254
				1255	static bin_tree_t *
				1256	lower_subexp (reg_errcode_t err, regex_t preg, bin_tree_t *node)
				1257	{
				1258	re_dfa_t dfa = (re_dfa_t ) preg->buffer;
				1259	bin_tree_t *body = node->left;
				1260	bin_tree_t op, cls, tree1, tree;
				1261
				1262	if (preg->no_sub
				1263	/* We do not optimize empty subexpressions, because otherwise we may
				1264	have bad CONCAT nodes with NULL children. This is obviously not
				1265	very common, so we do not lose much. An example that triggers
				1266	this case is the sed "script" //x. */
				1267	&& node->left != NULL
				1268	&& (node->token.opr.idx >= BITSET_WORD_BITS
				1269	\|\| !(dfa->used_bkref_map
				1270	& ((bitset_word_t) 1 << node->token.opr.idx))))
				1271	return node->left;
				1272
				1273	/* Convert the SUBEXP node to the concatenation of an
				1274	OP_OPEN_SUBEXP, the contents, and an OP_CLOSE_SUBEXP. */
				1275	op = create_tree (dfa, NULL, NULL, OP_OPEN_SUBEXP);
				1276	cls = create_tree (dfa, NULL, NULL, OP_CLOSE_SUBEXP);
				1277	tree1 = body ? create_tree (dfa, body, cls, CONCAT) : cls;
				1278	tree = create_tree (dfa, op, tree1, CONCAT);
				1279	if (BE (tree == NULL \|\| tree1 == NULL \|\| op == NULL \|\| cls == NULL, 0))
				1280	{
				1281	*err = REG_ESPACE;
				1282	return NULL;
				1283	}
				1284
				1285	op->token.opr.idx = cls->token.opr.idx = node->token.opr.idx;
				1286	op->token.opt_subexp = cls->token.opt_subexp = node->token.opt_subexp;
				1287	return tree;
				1288	}
				1289
				1290	/* Pass 1 in building the NFA: compute FIRST and create unlinked automaton
				1291	nodes. Requires a postorder visit. */
				1292	static reg_errcode_t
				1293	calc_first (void extra, bin_tree_t node)
				1294	{
				1295	re_dfa_t dfa = (re_dfa_t ) extra;
				1296	if (node->token.type == CONCAT)
				1297	{
				1298	node->first = node->left->first;
				1299	node->node_idx = node->left->node_idx;
				1300	}
				1301	else
				1302	{
				1303	node->first = node;
				1304	node->node_idx = re_dfa_add_node (dfa, node->token);
				1305	if (BE (node->node_idx == -1, 0))
				1306	return REG_ESPACE;
				1307	}
				1308	return REG_NOERROR;
				1309	}
				1310
				1311	/* Pass 2: compute NEXT on the tree. Preorder visit. */
				1312	static reg_errcode_t
				1313	calc_next (void extra, bin_tree_t node)
				1314	{
				1315	switch (node->token.type)
				1316	{
				1317	case OP_DUP_ASTERISK:
				1318	node->left->next = node;
				1319	break;
				1320	case CONCAT:
				1321	node->left->next = node->right->first;
				1322	node->right->next = node->next;
				1323	break;
				1324	default:
				1325	if (node->left)
				1326	node->left->next = node->next;
				1327	if (node->right)
				1328	node->right->next = node->next;
				1329	break;
				1330	}
				1331	return REG_NOERROR;
				1332	}
				1333
				1334	/* Pass 3: link all DFA nodes to their NEXT node (any order will do). */
				1335	static reg_errcode_t
				1336	link_nfa_nodes (void extra, bin_tree_t node)
				1337	{
				1338	re_dfa_t dfa = (re_dfa_t ) extra;
				1339	int idx = node->node_idx;
				1340	reg_errcode_t err = REG_NOERROR;
				1341
				1342	switch (node->token.type)
				1343	{
				1344	case CONCAT:
				1345	break;
				1346
				1347	case END_OF_RE:
				1348	assert (node->next == NULL);
				1349	break;
				1350
				1351	case OP_DUP_ASTERISK:
				1352	case OP_ALT:
				1353	{
				1354	int left, right;
				1355	dfa->has_plural_match = 1;
				1356	if (node->left != NULL)
				1357	left = node->left->first->node_idx;
				1358	else
				1359	left = node->next->node_idx;
				1360	if (node->right != NULL)
				1361	right = node->right->first->node_idx;
				1362	else
				1363	right = node->next->node_idx;
				1364	assert (left > -1);
				1365	assert (right > -1);
				1366	err = re_node_set_init_2 (dfa->edests + idx, left, right);
				1367	}
				1368	break;
				1369
				1370	case ANCHOR:
				1371	case OP_OPEN_SUBEXP:
				1372	case OP_CLOSE_SUBEXP:
				1373	err = re_node_set_init_1 (dfa->edests + idx, node->next->node_idx);
				1374	break;
				1375
				1376	case OP_BACK_REF:
				1377	dfa->nexts[idx] = node->next->node_idx;
				1378	if (node->token.type == OP_BACK_REF)
				1379	re_node_set_init_1 (dfa->edests + idx, dfa->nexts[idx]);
				1380	break;
				1381
				1382	default:
				1383	assert (!IS_EPSILON_NODE (node->token.type));
				1384	dfa->nexts[idx] = node->next->node_idx;
				1385	break;
				1386	}
				1387
				1388	return err;
				1389	}
				1390
				1391	/* Duplicate the epsilon closure of the node ROOT_NODE.
				1392	Note that duplicated nodes have constraint INIT_CONSTRAINT in addition
				1393	to their own constraint. */
				1394
				1395	static reg_errcode_t
				1396	internal_function
				1397	duplicate_node_closure (re_dfa_t *dfa, int top_org_node, int top_clone_node,
				1398	int root_node, unsigned int init_constraint)
				1399	{
				1400	int org_node, clone_node, ret;
				1401	unsigned int constraint = init_constraint;
				1402	for (org_node = top_org_node, clone_node = top_clone_node;;)
				1403	{
				1404	int org_dest, clone_dest;
				1405	if (dfa->nodes[org_node].type == OP_BACK_REF)
				1406	{
				1407	/* If the back reference epsilon-transit, its destination must
				1408	also have the constraint. Then duplicate the epsilon closure
				1409	of the destination of the back reference, and store it in
				1410	edests of the back reference. */
				1411	org_dest = dfa->nexts[org_node];
				1412	re_node_set_empty (dfa->edests + clone_node);
				1413	clone_dest = duplicate_node (dfa, org_dest, constraint);
				1414	if (BE (clone_dest == -1, 0))
				1415	return REG_ESPACE;
				1416	dfa->nexts[clone_node] = dfa->nexts[org_node];
				1417	ret = re_node_set_insert (dfa->edests + clone_node, clone_dest);
				1418	if (BE (ret < 0, 0))
				1419	return REG_ESPACE;
				1420	}
				1421	else if (dfa->edests[org_node].nelem == 0)
				1422	{
				1423	/* In case of the node can't epsilon-transit, don't duplicate the
				1424	destination and store the original destination as the
				1425	destination of the node. */
				1426	dfa->nexts[clone_node] = dfa->nexts[org_node];
				1427	break;
				1428	}
				1429	else if (dfa->edests[org_node].nelem == 1)
				1430	{
				1431	/* In case of the node can epsilon-transit, and it has only one
				1432	destination. */
				1433	org_dest = dfa->edests[org_node].elems[0];
				1434	re_node_set_empty (dfa->edests + clone_node);
				1435	if (dfa->nodes[org_node].type == ANCHOR)
				1436	{
				1437	/* In case of the node has another constraint, append it. */
				1438	if (org_node == root_node && clone_node != org_node)
				1439	{
				1440	/* ...but if the node is root_node itself, it means the
				1441	epsilon closure have a loop, then tie it to the
				1442	destination of the root_node. */
				1443	ret = re_node_set_insert (dfa->edests + clone_node,
				1444	org_dest);
				1445	if (BE (ret < 0, 0))
				1446	return REG_ESPACE;
				1447	break;
				1448	}
				1449	constraint \|= dfa->nodes[org_node].opr.ctx_type;
				1450	}
				1451	clone_dest = duplicate_node (dfa, org_dest, constraint);
				1452	if (BE (clone_dest == -1, 0))
				1453	return REG_ESPACE;
				1454	ret = re_node_set_insert (dfa->edests + clone_node, clone_dest);
				1455	if (BE (ret < 0, 0))
				1456	return REG_ESPACE;
				1457	}
				1458	else /* dfa->edests[org_node].nelem == 2 */
				1459	{
				1460	/* In case of the node can epsilon-transit, and it has two
				1461	destinations. In the bin_tree_t and DFA, that's '\|' and ''. /
				1462	org_dest = dfa->edests[org_node].elems[0];
				1463	re_node_set_empty (dfa->edests + clone_node);
				1464	/* Search for a duplicated node which satisfies the constraint. */
				1465	clone_dest = search_duplicated_node (dfa, org_dest, constraint);
				1466	if (clone_dest == -1)
				1467	{
				1468	/* There are no such a duplicated node, create a new one. */
				1469	reg_errcode_t err;
				1470	clone_dest = duplicate_node (dfa, org_dest, constraint);
				1471	if (BE (clone_dest == -1, 0))
				1472	return REG_ESPACE;
				1473	ret = re_node_set_insert (dfa->edests + clone_node, clone_dest);
				1474	if (BE (ret < 0, 0))
				1475	return REG_ESPACE;
				1476	err = duplicate_node_closure (dfa, org_dest, clone_dest,
				1477	root_node, constraint);
				1478	if (BE (err != REG_NOERROR, 0))
				1479	return err;
				1480	}
				1481	else
				1482	{
				1483	/* There are a duplicated node which satisfy the constraint,
				1484	use it to avoid infinite loop. */
				1485	ret = re_node_set_insert (dfa->edests + clone_node, clone_dest);
				1486	if (BE (ret < 0, 0))
				1487	return REG_ESPACE;
				1488	}
				1489
				1490	org_dest = dfa->edests[org_node].elems[1];
				1491	clone_dest = duplicate_node (dfa, org_dest, constraint);
				1492	if (BE (clone_dest == -1, 0))
				1493	return REG_ESPACE;
				1494	ret = re_node_set_insert (dfa->edests + clone_node, clone_dest);
				1495	if (BE (ret < 0, 0))
				1496	return REG_ESPACE;
				1497	}
				1498	org_node = org_dest;
				1499	clone_node = clone_dest;
				1500	}
				1501	return REG_NOERROR;
				1502	}
				1503
				1504	/* Search for a node which is duplicated from the node ORG_NODE, and
				1505	satisfies the constraint CONSTRAINT. */
				1506
				1507	static int
				1508	search_duplicated_node (const re_dfa_t *dfa, int org_node,
				1509	unsigned int constraint)
				1510	{
				1511	int idx;
				1512	for (idx = dfa->nodes_len - 1; dfa->nodes[idx].duplicated && idx > 0; --idx)
				1513	{
				1514	if (org_node == dfa->org_indices[idx]
				1515	&& constraint == dfa->nodes[idx].constraint)
				1516	return idx; /* Found. */
				1517	}
				1518	return -1; /* Not found. */
				1519	}
				1520
				1521	/* Duplicate the node whose index is ORG_IDX and set the constraint CONSTRAINT.
				1522	Return the index of the new node, or -1 if insufficient storage is
				1523	available. */
				1524
				1525	static int
				1526	duplicate_node (re_dfa_t *dfa, int org_idx, unsigned int constraint)
				1527	{
				1528	int dup_idx = re_dfa_add_node (dfa, dfa->nodes[org_idx]);
				1529	if (BE (dup_idx != -1, 1))
				1530	{
				1531	dfa->nodes[dup_idx].constraint = constraint;
				1532	if (dfa->nodes[org_idx].type == ANCHOR)
				1533	dfa->nodes[dup_idx].constraint \|= dfa->nodes[org_idx].opr.ctx_type;
				1534	dfa->nodes[dup_idx].duplicated = 1;
				1535
				1536	/* Store the index of the original node. */
				1537	dfa->org_indices[dup_idx] = org_idx;
				1538	}
				1539	return dup_idx;
				1540	}
				1541
				1542	static reg_errcode_t
				1543	calc_inveclosure (re_dfa_t *dfa)
				1544	{
				1545	int src, idx, ret;
				1546	for (idx = 0; idx < dfa->nodes_len; ++idx)
				1547	re_node_set_init_empty (dfa->inveclosures + idx);
				1548
				1549	for (src = 0; src < dfa->nodes_len; ++src)
				1550	{
				1551	int *elems = dfa->eclosures[src].elems;
				1552	for (idx = 0; idx < dfa->eclosures[src].nelem; ++idx)
				1553	{
				1554	ret = re_node_set_insert_last (dfa->inveclosures + elems[idx], src);
				1555	if (BE (ret == -1, 0))
				1556	return REG_ESPACE;
				1557	}
				1558	}
				1559
				1560	return REG_NOERROR;
				1561	}
				1562
				1563	/* Calculate "eclosure" for all the node in DFA. */
				1564
				1565	static reg_errcode_t
				1566	calc_eclosure (re_dfa_t *dfa)
				1567	{
				1568	int node_idx, incomplete;
				1569	#ifdef DEBUG
				1570	assert (dfa->nodes_len > 0);
				1571	#endif
				1572	incomplete = 0;
				1573	/* For each nodes, calculate epsilon closure. */
				1574	for (node_idx = 0; ; ++node_idx)
				1575	{
				1576	reg_errcode_t err;
				1577	re_node_set eclosure_elem;
				1578	if (node_idx == dfa->nodes_len)
				1579	{
				1580	if (!incomplete)
				1581	break;
				1582	incomplete = 0;
				1583	node_idx = 0;
				1584	}
				1585
				1586	#ifdef DEBUG
				1587	assert (dfa->eclosures[node_idx].nelem != -1);
				1588	#endif
				1589
				1590	/* If we have already calculated, skip it. */
				1591	if (dfa->eclosures[node_idx].nelem != 0)
				1592	continue;
				1593	/* Calculate epsilon closure of `node_idx'. */
				1594	err = calc_eclosure_iter (&eclosure_elem, dfa, node_idx, 1);
				1595	if (BE (err != REG_NOERROR, 0))
				1596	return err;
				1597
				1598	if (dfa->eclosures[node_idx].nelem == 0)
				1599	{
				1600	incomplete = 1;
				1601	re_node_set_free (&eclosure_elem);
				1602	}
				1603	}
				1604	return REG_NOERROR;
				1605	}
				1606
				1607	/* Calculate epsilon closure of NODE. */
				1608
				1609	static reg_errcode_t
				1610	calc_eclosure_iter (re_node_set new_set, re_dfa_t dfa, int node, int root)
				1611	{
				1612	reg_errcode_t err;
				1613	unsigned int constraint;
				1614	int i, incomplete;
				1615	re_node_set eclosure;
				1616	incomplete = 0;
				1617	err = re_node_set_alloc (&eclosure, dfa->edests[node].nelem + 1);
				1618	if (BE (err != REG_NOERROR, 0))
				1619	return err;
				1620
				1621	/* This indicates that we are calculating this node now.
				1622	We reference this value to avoid infinite loop. */
				1623	dfa->eclosures[node].nelem = -1;
				1624
				1625	constraint = ((dfa->nodes[node].type == ANCHOR)
				1626	? dfa->nodes[node].opr.ctx_type : 0);
				1627	/* If the current node has constraints, duplicate all nodes.
				1628	Since they must inherit the constraints. */
				1629	if (constraint
				1630	&& dfa->edests[node].nelem
				1631	&& !dfa->nodes[dfa->edests[node].elems[0]].duplicated)
				1632	{
				1633	err = duplicate_node_closure (dfa, node, node, node, constraint);
				1634	if (BE (err != REG_NOERROR, 0))
				1635	return err;
				1636	}
				1637
				1638	/* Expand each epsilon destination nodes. */
				1639	if (IS_EPSILON_NODE(dfa->nodes[node].type))
				1640	for (i = 0; i < dfa->edests[node].nelem; ++i)
				1641	{
				1642	re_node_set eclosure_elem;
				1643	int edest = dfa->edests[node].elems[i];
				1644	/* If calculating the epsilon closure of `edest' is in progress,
				1645	return intermediate result. */
				1646	if (dfa->eclosures[edest].nelem == -1)
				1647	{
				1648	incomplete = 1;
				1649	continue;
				1650	}
				1651	/* If we haven't calculated the epsilon closure of `edest' yet,
				1652	calculate now. Otherwise use calculated epsilon closure. */
				1653	if (dfa->eclosures[edest].nelem == 0)
				1654	{
				1655	err = calc_eclosure_iter (&eclosure_elem, dfa, edest, 0);
				1656	if (BE (err != REG_NOERROR, 0))
				1657	return err;
				1658	}
				1659	else
				1660	eclosure_elem = dfa->eclosures[edest];
				1661	/* Merge the epsilon closure of `edest'. */
				1662	re_node_set_merge (&eclosure, &eclosure_elem);
				1663	/* If the epsilon closure of `edest' is incomplete,
				1664	the epsilon closure of this node is also incomplete. */
				1665	if (dfa->eclosures[edest].nelem == 0)
				1666	{
				1667	incomplete = 1;
				1668	re_node_set_free (&eclosure_elem);
				1669	}
				1670	}
				1671
				1672	/* Epsilon closures include itself. */
				1673	re_node_set_insert (&eclosure, node);
				1674	if (incomplete && !root)
				1675	dfa->eclosures[node].nelem = 0;
				1676	else
				1677	dfa->eclosures[node] = eclosure;
				1678	*new_set = eclosure;
				1679	return REG_NOERROR;
				1680	}
				1681
				1682	/* Functions for token which are used in the parser. */
				1683
				1684	/* Fetch a token from INPUT.
				1685	We must not use this function inside bracket expressions. */
				1686
				1687	static void
				1688	internal_function
				1689	fetch_token (re_token_t result, re_string_t input, reg_syntax_t syntax)
				1690	{
				1691	re_string_skip_bytes (input, peek_token (result, input, syntax));
				1692	}
				1693
				1694	/* Peek a token from INPUT, and return the length of the token.
				1695	We must not use this function inside bracket expressions. */
				1696
				1697	static int
				1698	internal_function
				1699	peek_token (re_token_t token, re_string_t input, reg_syntax_t syntax)
				1700	{
				1701	unsigned char c;
				1702
				1703	if (re_string_eoi (input))
				1704	{
				1705	token->type = END_OF_RE;
				1706	return 0;
				1707	}
				1708
				1709	c = re_string_peek_byte (input, 0);
				1710	token->opr.c = c;
				1711
				1712	token->word_char = 0;
				1713	#ifdef RE_ENABLE_I18N
				1714	token->mb_partial = 0;
				1715	if (input->mb_cur_max > 1 &&
				1716	!re_string_first_byte (input, re_string_cur_idx (input)))
				1717	{
				1718	token->type = CHARACTER;
				1719	token->mb_partial = 1;
				1720	return 1;
				1721	}
				1722	#endif
				1723	if (c == '\\')
				1724	{
				1725	unsigned char c2;
				1726	if (re_string_cur_idx (input) + 1 >= re_string_length (input))
				1727	{
				1728	token->type = BACK_SLASH;
				1729	return 1;
				1730	}
				1731
				1732	c2 = re_string_peek_byte_case (input, 1);
				1733	token->opr.c = c2;
				1734	token->type = CHARACTER;
				1735	#ifdef RE_ENABLE_I18N
				1736	if (input->mb_cur_max > 1)
				1737	{
				1738	wint_t wc = re_string_wchar_at (input,
				1739	re_string_cur_idx (input) + 1);
				1740	token->word_char = IS_WIDE_WORD_CHAR (wc) != 0;
				1741	}
				1742	else
				1743	#endif
				1744	token->word_char = IS_WORD_CHAR (c2) != 0;
				1745
				1746	switch (c2)
				1747	{
				1748	case '\|':
				1749	if (!(syntax & RE_LIMITED_OPS) && !(syntax & RE_NO_BK_VBAR))
				1750	token->type = OP_ALT;
				1751	break;
				1752	case '1': case '2': case '3': case '4': case '5':
				1753	case '6': case '7': case '8': case '9':
				1754	if (!(syntax & RE_NO_BK_REFS))
				1755	{
				1756	token->type = OP_BACK_REF;
				1757	token->opr.idx = c2 - '1';
				1758	}
				1759	break;
				1760	case '<':
				1761	if (!(syntax & RE_NO_GNU_OPS))
				1762	{
				1763	token->type = ANCHOR;
				1764	token->opr.ctx_type = WORD_FIRST;
				1765	}
				1766	break;
				1767	case '>':
				1768	if (!(syntax & RE_NO_GNU_OPS))
				1769	{
				1770	token->type = ANCHOR;
				1771	token->opr.ctx_type = WORD_LAST;
				1772	}
				1773	break;
				1774	case 'b':
				1775	if (!(syntax & RE_NO_GNU_OPS))
				1776	{
				1777	token->type = ANCHOR;
				1778	token->opr.ctx_type = WORD_DELIM;
				1779	}
				1780	break;
				1781	case 'B':
				1782	if (!(syntax & RE_NO_GNU_OPS))
				1783	{
				1784	token->type = ANCHOR;
				1785	token->opr.ctx_type = NOT_WORD_DELIM;
				1786	}
				1787	break;
				1788	case 'w':
				1789	if (!(syntax & RE_NO_GNU_OPS))
				1790	token->type = OP_WORD;
				1791	break;
				1792	case 'W':
				1793	if (!(syntax & RE_NO_GNU_OPS))
				1794	token->type = OP_NOTWORD;
				1795	break;
				1796	case 's':
				1797	if (!(syntax & RE_NO_GNU_OPS))
				1798	token->type = OP_SPACE;
				1799	break;
				1800	case 'S':
				1801	if (!(syntax & RE_NO_GNU_OPS))
				1802	token->type = OP_NOTSPACE;
				1803	break;
				1804	case '`':
				1805	if (!(syntax & RE_NO_GNU_OPS))
				1806	{
				1807	token->type = ANCHOR;
				1808	token->opr.ctx_type = BUF_FIRST;
				1809	}
				1810	break;
				1811	case '\'':
				1812	if (!(syntax & RE_NO_GNU_OPS))
				1813	{
				1814	token->type = ANCHOR;
				1815	token->opr.ctx_type = BUF_LAST;
				1816	}
				1817	break;
				1818	case '(':
				1819	if (!(syntax & RE_NO_BK_PARENS))
				1820	token->type = OP_OPEN_SUBEXP;
				1821	break;
				1822	case ')':
				1823	if (!(syntax & RE_NO_BK_PARENS))
				1824	token->type = OP_CLOSE_SUBEXP;
				1825	break;
				1826	case '+':
				1827	if (!(syntax & RE_LIMITED_OPS) && (syntax & RE_BK_PLUS_QM))
				1828	token->type = OP_DUP_PLUS;
				1829	break;
				1830	case '?':
				1831	if (!(syntax & RE_LIMITED_OPS) && (syntax & RE_BK_PLUS_QM))
				1832	token->type = OP_DUP_QUESTION;
				1833	break;
				1834	case '{':
				1835	if ((syntax & RE_INTERVALS) && (!(syntax & RE_NO_BK_BRACES)))
				1836	token->type = OP_OPEN_DUP_NUM;
				1837	break;
				1838	case '}':
				1839	if ((syntax & RE_INTERVALS) && (!(syntax & RE_NO_BK_BRACES)))
				1840	token->type = OP_CLOSE_DUP_NUM;
				1841	break;
				1842	default:
				1843	break;
				1844	}
				1845	return 2;
				1846	}
				1847
				1848	token->type = CHARACTER;
				1849	#ifdef RE_ENABLE_I18N
				1850	if (input->mb_cur_max > 1)
				1851	{
				1852	wint_t wc = re_string_wchar_at (input, re_string_cur_idx (input));
				1853	token->word_char = IS_WIDE_WORD_CHAR (wc) != 0;
				1854	}
				1855	else
				1856	#endif
				1857	token->word_char = IS_WORD_CHAR (token->opr.c);
				1858
				1859	switch (c)
				1860	{
				1861	case '\n':
				1862	if (syntax & RE_NEWLINE_ALT)
				1863	token->type = OP_ALT;
				1864	break;
				1865	case '\|':
				1866	if (!(syntax & RE_LIMITED_OPS) && (syntax & RE_NO_BK_VBAR))
				1867	token->type = OP_ALT;
				1868	break;
				1869	case '*':
				1870	token->type = OP_DUP_ASTERISK;
				1871	break;
				1872	case '+':
				1873	if (!(syntax & RE_LIMITED_OPS) && !(syntax & RE_BK_PLUS_QM))
				1874	token->type = OP_DUP_PLUS;
				1875	break;
				1876	case '?':
				1877	if (!(syntax & RE_LIMITED_OPS) && !(syntax & RE_BK_PLUS_QM))
				1878	token->type = OP_DUP_QUESTION;
				1879	break;
				1880	case '{':
				1881	if ((syntax & RE_INTERVALS) && (syntax & RE_NO_BK_BRACES))
				1882	token->type = OP_OPEN_DUP_NUM;
				1883	break;
				1884	case '}':
				1885	if ((syntax & RE_INTERVALS) && (syntax & RE_NO_BK_BRACES))
				1886	token->type = OP_CLOSE_DUP_NUM;
				1887	break;
				1888	case '(':
				1889	if (syntax & RE_NO_BK_PARENS)
				1890	token->type = OP_OPEN_SUBEXP;
				1891	break;
				1892	case ')':
				1893	if (syntax & RE_NO_BK_PARENS)
				1894	token->type = OP_CLOSE_SUBEXP;
				1895	break;
				1896	case '[':
				1897	token->type = OP_OPEN_BRACKET;
				1898	break;
				1899	case '.':
				1900	token->type = OP_PERIOD;
				1901	break;
				1902	case '^':
				1903	if (!(syntax & (RE_CONTEXT_INDEP_ANCHORS \| RE_CARET_ANCHORS_HERE)) &&
				1904	re_string_cur_idx (input) != 0)
				1905	{
				1906	char prev = re_string_peek_byte (input, -1);
				1907	if (!(syntax & RE_NEWLINE_ALT) \|\| prev != '\n')
				1908	break;
				1909	}
				1910	token->type = ANCHOR;
				1911	token->opr.ctx_type = LINE_FIRST;
				1912	break;
				1913	case '$':
				1914	if (!(syntax & RE_CONTEXT_INDEP_ANCHORS) &&
				1915	re_string_cur_idx (input) + 1 != re_string_length (input))
				1916	{
				1917	re_token_t next;
				1918	re_string_skip_bytes (input, 1);
				1919	peek_token (&next, input, syntax);
				1920	re_string_skip_bytes (input, -1);
				1921	if (next.type != OP_ALT && next.type != OP_CLOSE_SUBEXP)
				1922	break;
				1923	}
				1924	token->type = ANCHOR;
				1925	token->opr.ctx_type = LINE_LAST;
				1926	break;
				1927	default:
				1928	break;
				1929	}
				1930	return 1;
				1931	}
				1932
				1933	/* Peek a token from INPUT, and return the length of the token.
				1934	We must not use this function out of bracket expressions. */
				1935
				1936	static int
				1937	internal_function
				1938	peek_token_bracket (re_token_t token, re_string_t input, reg_syntax_t syntax)
				1939	{
				1940	unsigned char c;
				1941	if (re_string_eoi (input))
				1942	{
				1943	token->type = END_OF_RE;
				1944	return 0;
				1945	}
				1946	c = re_string_peek_byte (input, 0);
				1947	token->opr.c = c;
				1948
				1949	#ifdef RE_ENABLE_I18N
				1950	if (input->mb_cur_max > 1 &&
				1951	!re_string_first_byte (input, re_string_cur_idx (input)))
				1952	{
				1953	token->type = CHARACTER;
				1954	return 1;
				1955	}
				1956	#endif
				1957
				1958	if (c == '\\' && (syntax & RE_BACKSLASH_ESCAPE_IN_LISTS)
				1959	&& re_string_cur_idx (input) + 1 < re_string_length (input))
				1960	{
				1961	/* In this case, '\' escape a character. */
				1962	unsigned char c2;
				1963	re_string_skip_bytes (input, 1);
				1964	c2 = re_string_peek_byte (input, 0);
				1965	token->opr.c = c2;
				1966	token->type = CHARACTER;
				1967	return 1;
				1968	}
				1969	if (c == '[') /* '[' is a special char in a bracket exps. */
				1970	{
				1971	unsigned char c2;
				1972	int token_len;
				1973	if (re_string_cur_idx (input) + 1 < re_string_length (input))
				1974	c2 = re_string_peek_byte (input, 1);
				1975	else
				1976	c2 = 0;
				1977	token->opr.c = c2;
				1978	token_len = 2;
				1979	switch (c2)
				1980	{
				1981	case '.':
				1982	token->type = OP_OPEN_COLL_ELEM;
				1983	break;
				1984	case '=':
				1985	token->type = OP_OPEN_EQUIV_CLASS;
				1986	break;
				1987	case ':':
				1988	if (syntax & RE_CHAR_CLASSES)
				1989	{
				1990	token->type = OP_OPEN_CHAR_CLASS;
				1991	break;
				1992	}
				1993	/* else fall through. */
				1994	default:
				1995	token->type = CHARACTER;
				1996	token->opr.c = c;
				1997	token_len = 1;
				1998	break;
				1999	}
				2000	return token_len;
				2001	}
				2002	switch (c)
				2003	{
				2004	case '-':
				2005	token->type = OP_CHARSET_RANGE;
				2006	break;
				2007	case ']':
				2008	token->type = OP_CLOSE_BRACKET;
				2009	break;
				2010	case '^':
				2011	token->type = OP_NON_MATCH_LIST;
				2012	break;
				2013	default:
				2014	token->type = CHARACTER;
				2015	}
				2016	return 1;
				2017	}
				2018
				2019	/* Functions for parser. */
				2020
				2021	/* Entry point of the parser.
				2022	Parse the regular expression REGEXP and return the structure tree.
				2023	If an error is occured, ERR is set by error code, and return NULL.
				2024	This function build the following tree, from regular expression <reg_exp>:
				2025	CAT
				2026	/ \
				2027	/ \
				2028	<reg_exp> EOR
				2029
				2030	CAT means concatenation.
				2031	EOR means end of regular expression. */
				2032
				2033	static bin_tree_t *
				2034	parse (re_string_t regexp, regex_t preg, reg_syntax_t syntax,
				2035	reg_errcode_t *err)
				2036	{
				2037	re_dfa_t dfa = (re_dfa_t ) preg->buffer;
				2038	bin_tree_t tree, eor, *root;
				2039	re_token_t current_token;
				2040	dfa->syntax = syntax;
				2041	fetch_token (&current_token, regexp, syntax \| RE_CARET_ANCHORS_HERE);
				2042	tree = parse_reg_exp (regexp, preg, &current_token, syntax, 0, err);
				2043	if (BE (*err != REG_NOERROR && tree == NULL, 0))
				2044	return NULL;
				2045	eor = create_tree (dfa, NULL, NULL, END_OF_RE);
				2046	if (tree != NULL)
				2047	root = create_tree (dfa, tree, eor, CONCAT);
				2048	else
				2049	root = eor;
				2050	if (BE (eor == NULL \|\| root == NULL, 0))
				2051	{
				2052	*err = REG_ESPACE;
				2053	return NULL;
				2054	}
				2055	return root;
				2056	}
				2057
				2058	/* This function build the following tree, from regular expression
				2059	<branch1>\|<branch2>:
				2060	ALT
				2061	/ \
				2062	/ \
				2063	<branch1> <branch2>
				2064
				2065	ALT means alternative, which represents the operator `\|'. */
				2066
				2067	static bin_tree_t *
				2068	parse_reg_exp (re_string_t regexp, regex_t preg, re_token_t *token,
				2069	reg_syntax_t syntax, int nest, reg_errcode_t *err)
				2070	{
				2071	re_dfa_t dfa = (re_dfa_t ) preg->buffer;
				2072	bin_tree_t tree, branch = NULL;
				2073	tree = parse_branch (regexp, preg, token, syntax, nest, err);
				2074	if (BE (*err != REG_NOERROR && tree == NULL, 0))
				2075	return NULL;
				2076
				2077	while (token->type == OP_ALT)
				2078	{
				2079	fetch_token (token, regexp, syntax \| RE_CARET_ANCHORS_HERE);
				2080	if (token->type != OP_ALT && token->type != END_OF_RE
				2081	&& (nest == 0 \|\| token->type != OP_CLOSE_SUBEXP))
				2082	{
				2083	branch = parse_branch (regexp, preg, token, syntax, nest, err);
				2084	if (BE (*err != REG_NOERROR && branch == NULL, 0))
				2085	return NULL;
				2086	}
				2087	else
				2088	branch = NULL;
				2089	tree = create_tree (dfa, tree, branch, OP_ALT);
				2090	if (BE (tree == NULL, 0))
				2091	{
				2092	*err = REG_ESPACE;
				2093	return NULL;
				2094	}
				2095	}
				2096	return tree;
				2097	}
				2098
				2099	/* This function build the following tree, from regular expression
				2100	<exp1><exp2>:
				2101	CAT
				2102	/ \
				2103	/ \
				2104	<exp1> <exp2>
				2105
				2106	CAT means concatenation. */
				2107
				2108	static bin_tree_t *
				2109	parse_branch (re_string_t regexp, regex_t preg, re_token_t *token,
				2110	reg_syntax_t syntax, int nest, reg_errcode_t *err)
				2111	{
				2112	bin_tree_t tree, exp;
				2113	re_dfa_t dfa = (re_dfa_t ) preg->buffer;
				2114	tree = parse_expression (regexp, preg, token, syntax, nest, err);
				2115	if (BE (*err != REG_NOERROR && tree == NULL, 0))
				2116	return NULL;
				2117
				2118	while (token->type != OP_ALT && token->type != END_OF_RE
				2119	&& (nest == 0 \|\| token->type != OP_CLOSE_SUBEXP))
				2120	{
				2121	exp = parse_expression (regexp, preg, token, syntax, nest, err);
				2122	if (BE (*err != REG_NOERROR && exp == NULL, 0))
				2123	{
				2124	return NULL;
				2125	}
				2126	if (tree != NULL && exp != NULL)
				2127	{
				2128	tree = create_tree (dfa, tree, exp, CONCAT);
				2129	if (tree == NULL)
				2130	{
				2131	*err = REG_ESPACE;
				2132	return NULL;
				2133	}
				2134	}
				2135	else if (tree == NULL)
				2136	tree = exp;
				2137	/* Otherwise exp == NULL, we don't need to create new tree. */
				2138	}
				2139	return tree;
				2140	}
				2141
				2142	/* This function build the following tree, from regular expression a*:
				2143	*
				2144	\|
				2145	a
				2146	*/
				2147
				2148	static bin_tree_t *
				2149	parse_expression (re_string_t regexp, regex_t preg, re_token_t *token,
				2150	reg_syntax_t syntax, int nest, reg_errcode_t *err)
				2151	{
				2152	re_dfa_t dfa = (re_dfa_t ) preg->buffer;
				2153	bin_tree_t *tree;
				2154	switch (token->type)
				2155	{
				2156	case CHARACTER:
				2157	tree = create_token_tree (dfa, NULL, NULL, token);
				2158	if (BE (tree == NULL, 0))
				2159	{
				2160	*err = REG_ESPACE;
				2161	return NULL;
				2162	}
				2163	#ifdef RE_ENABLE_I18N
				2164	if (dfa->mb_cur_max > 1)
				2165	{
				2166	while (!re_string_eoi (regexp)
				2167	&& !re_string_first_byte (regexp, re_string_cur_idx (regexp)))
				2168	{
				2169	bin_tree_t *mbc_remain;
				2170	fetch_token (token, regexp, syntax);
				2171	mbc_remain = create_token_tree (dfa, NULL, NULL, token);
				2172	tree = create_tree (dfa, tree, mbc_remain, CONCAT);
				2173	if (BE (mbc_remain == NULL \|\| tree == NULL, 0))
				2174	{
				2175	*err = REG_ESPACE;
				2176	return NULL;
				2177	}
				2178	}
				2179	}
				2180	#endif
				2181	break;
				2182	case OP_OPEN_SUBEXP:
				2183	tree = parse_sub_exp (regexp, preg, token, syntax, nest + 1, err);
				2184	if (BE (*err != REG_NOERROR && tree == NULL, 0))
				2185	return NULL;
				2186	break;
				2187	case OP_OPEN_BRACKET:
				2188	tree = parse_bracket_exp (regexp, dfa, token, syntax, err);
				2189	if (BE (*err != REG_NOERROR && tree == NULL, 0))
				2190	return NULL;
				2191	break;
				2192	case OP_BACK_REF:
				2193	if (!BE (dfa->completed_bkref_map & (1 << token->opr.idx), 1))
				2194	{
				2195	*err = REG_ESUBREG;
				2196	return NULL;
				2197	}
				2198	dfa->used_bkref_map \|= 1 << token->opr.idx;
				2199	tree = create_token_tree (dfa, NULL, NULL, token);
				2200	if (BE (tree == NULL, 0))
				2201	{
				2202	*err = REG_ESPACE;
				2203	return NULL;
				2204	}
				2205	++dfa->nbackref;
				2206	dfa->has_mb_node = 1;
				2207	break;
				2208	case OP_OPEN_DUP_NUM:
				2209	if (syntax & RE_CONTEXT_INVALID_DUP)
				2210	{
				2211	*err = REG_BADRPT;
				2212	return NULL;
				2213	}
				2214	/* FALLTHROUGH */
				2215	case OP_DUP_ASTERISK:
				2216	case OP_DUP_PLUS:
				2217	case OP_DUP_QUESTION:
				2218	if (syntax & RE_CONTEXT_INVALID_OPS)
				2219	{
				2220	*err = REG_BADRPT;
				2221	return NULL;
				2222	}
				2223	else if (syntax & RE_CONTEXT_INDEP_OPS)
				2224	{
				2225	fetch_token (token, regexp, syntax);
				2226	return parse_expression (regexp, preg, token, syntax, nest, err);
				2227	}
				2228	/* else fall through */
				2229	case OP_CLOSE_SUBEXP:
				2230	if ((token->type == OP_CLOSE_SUBEXP) &&
				2231	!(syntax & RE_UNMATCHED_RIGHT_PAREN_ORD))
				2232	{
				2233	*err = REG_ERPAREN;
				2234	return NULL;
				2235	}
				2236	/* else fall through */
				2237	case OP_CLOSE_DUP_NUM:
				2238	/* We treat it as a normal character. */
				2239
				2240	/* Then we can these characters as normal characters. */
				2241	token->type = CHARACTER;
				2242	/* mb_partial and word_char bits should be initialized already
				2243	by peek_token. */
				2244	tree = create_token_tree (dfa, NULL, NULL, token);
				2245	if (BE (tree == NULL, 0))
				2246	{
				2247	*err = REG_ESPACE;
				2248	return NULL;
				2249	}
				2250	break;
				2251	case ANCHOR:
				2252	if ((token->opr.ctx_type
				2253	& (WORD_DELIM \| NOT_WORD_DELIM \| WORD_FIRST \| WORD_LAST))
				2254	&& dfa->word_ops_used == 0)
				2255	init_word_char (dfa);
				2256	if (token->opr.ctx_type == WORD_DELIM
				2257	\|\| token->opr.ctx_type == NOT_WORD_DELIM)
				2258	{
				2259	bin_tree_t tree_first, tree_last;
				2260	if (token->opr.ctx_type == WORD_DELIM)
				2261	{
				2262	token->opr.ctx_type = WORD_FIRST;
				2263	tree_first = create_token_tree (dfa, NULL, NULL, token);
				2264	token->opr.ctx_type = WORD_LAST;
				2265	}
				2266	else
				2267	{
				2268	token->opr.ctx_type = INSIDE_WORD;
				2269	tree_first = create_token_tree (dfa, NULL, NULL, token);
				2270	token->opr.ctx_type = INSIDE_NOTWORD;
				2271	}
				2272	tree_last = create_token_tree (dfa, NULL, NULL, token);
				2273	tree = create_tree (dfa, tree_first, tree_last, OP_ALT);
				2274	if (BE (tree_first == NULL \|\| tree_last == NULL \|\| tree == NULL, 0))
				2275	{
				2276	*err = REG_ESPACE;
				2277	return NULL;
				2278	}
				2279	}
				2280	else
				2281	{
				2282	tree = create_token_tree (dfa, NULL, NULL, token);
				2283	if (BE (tree == NULL, 0))
				2284	{
				2285	*err = REG_ESPACE;
				2286	return NULL;
				2287	}
				2288	}
				2289	/* We must return here, since ANCHORs can't be followed
				2290	by repetition operators.
				2291	eg. RE"^" is invalid or "<ANCHOR(^)><CHAR()>",
				2292	it must not be "<ANCHOR(^)><REPEAT()>". /
				2293	fetch_token (token, regexp, syntax);
				2294	return tree;
				2295	case OP_PERIOD:
				2296	tree = create_token_tree (dfa, NULL, NULL, token);
				2297	if (BE (tree == NULL, 0))
				2298	{
				2299	*err = REG_ESPACE;
				2300	return NULL;
				2301	}
				2302	if (dfa->mb_cur_max > 1)
				2303	dfa->has_mb_node = 1;
				2304	break;
				2305	case OP_WORD:
				2306	case OP_NOTWORD:
				2307	tree = build_charclass_op (dfa, regexp->trans,
				2308	(const unsigned char *) "alnum",
				2309	(const unsigned char *) "_",
				2310	token->type == OP_NOTWORD, err);
				2311	if (BE (*err != REG_NOERROR && tree == NULL, 0))
				2312	return NULL;
				2313	break;
				2314	case OP_SPACE:
				2315	case OP_NOTSPACE:
				2316	tree = build_charclass_op (dfa, regexp->trans,
				2317	(const unsigned char *) "space",
				2318	(const unsigned char *) "",
				2319	token->type == OP_NOTSPACE, err);
				2320	if (BE (*err != REG_NOERROR && tree == NULL, 0))
				2321	return NULL;
				2322	break;
				2323	case OP_ALT:
				2324	case END_OF_RE:
				2325	return NULL;
				2326	case BACK_SLASH:
				2327	*err = REG_EESCAPE;
				2328	return NULL;
				2329	default:
				2330	/* Must not happen? */
				2331	#ifdef DEBUG
				2332	assert (0);
				2333	#endif
				2334	return NULL;
				2335	}
				2336	fetch_token (token, regexp, syntax);
				2337
				2338	while (token->type == OP_DUP_ASTERISK \|\| token->type == OP_DUP_PLUS
				2339	\|\| token->type == OP_DUP_QUESTION \|\| token->type == OP_OPEN_DUP_NUM)
				2340	{
				2341	tree = parse_dup_op (tree, regexp, dfa, token, syntax, err);
				2342	if (BE (*err != REG_NOERROR && tree == NULL, 0))
				2343	return NULL;
				2344	/* In BRE consecutive duplications are not allowed. */
				2345	if ((syntax & RE_CONTEXT_INVALID_DUP)
				2346	&& (token->type == OP_DUP_ASTERISK
				2347	\|\| token->type == OP_OPEN_DUP_NUM))
				2348	{
				2349	*err = REG_BADRPT;
				2350	return NULL;
				2351	}
				2352	}
				2353
				2354	return tree;
				2355	}
				2356
				2357	/* This function build the following tree, from regular expression
				2358	(<reg_exp>):
				2359	SUBEXP
				2360	\|
				2361	<reg_exp>
				2362	*/
				2363
				2364	static bin_tree_t *
				2365	parse_sub_exp (re_string_t regexp, regex_t preg, re_token_t *token,
				2366	reg_syntax_t syntax, int nest, reg_errcode_t *err)
				2367	{
				2368	re_dfa_t dfa = (re_dfa_t ) preg->buffer;
				2369	bin_tree_t *tree;
				2370	size_t cur_nsub;
				2371	cur_nsub = preg->re_nsub++;
				2372
				2373	fetch_token (token, regexp, syntax \| RE_CARET_ANCHORS_HERE);
				2374
				2375	/* The subexpression may be a null string. */
				2376	if (token->type == OP_CLOSE_SUBEXP)
				2377	tree = NULL;
				2378	else
				2379	{
				2380	tree = parse_reg_exp (regexp, preg, token, syntax, nest, err);
				2381	if (BE (*err == REG_NOERROR && token->type != OP_CLOSE_SUBEXP, 0))
				2382	*err = REG_EPAREN;
				2383	if (BE (*err != REG_NOERROR, 0))
				2384	return NULL;
				2385	}
				2386
				2387	if (cur_nsub <= '9' - '1')
				2388	dfa->completed_bkref_map \|= 1 << cur_nsub;
				2389
				2390	tree = create_tree (dfa, tree, NULL, SUBEXP);
				2391	if (BE (tree == NULL, 0))
				2392	{
				2393	*err = REG_ESPACE;
				2394	return NULL;
				2395	}
				2396	tree->token.opr.idx = cur_nsub;
				2397	return tree;
				2398	}
				2399
				2400	/* This function parse repetition operators like "", "+", "{1,3}" etc. /
				2401
				2402	static bin_tree_t *
				2403	parse_dup_op (bin_tree_t elem, re_string_t regexp, re_dfa_t *dfa,
				2404	re_token_t token, reg_syntax_t syntax, reg_errcode_t err)
				2405	{
				2406	bin_tree_t tree = NULL, old_tree = NULL;
				2407	int i, start, end, start_idx = re_string_cur_idx (regexp);
				2408	re_token_t start_token = *token;
				2409
				2410	if (token->type == OP_OPEN_DUP_NUM)
				2411	{
				2412	end = 0;
				2413	start = fetch_number (regexp, token, syntax);
				2414	if (start == -1)
				2415	{
				2416	if (token->type == CHARACTER && token->opr.c == ',')
				2417	start = 0; /* We treat "{,m}" as "{0,m}". */
				2418	else
				2419	{
				2420	err = REG_BADBR; / <re>{} is invalid. */
				2421	return NULL;
				2422	}
				2423	}
				2424	if (BE (start != -2, 1))
				2425	{
				2426	/* We treat "{n}" as "{n,n}". */
				2427	end = ((token->type == OP_CLOSE_DUP_NUM) ? start
				2428	: ((token->type == CHARACTER && token->opr.c == ',')
				2429	? fetch_number (regexp, token, syntax) : -2));
				2430	}
				2431	if (BE (start == -2 \|\| end == -2, 0))
				2432	{
				2433	/* Invalid sequence. */
				2434	if (BE (!(syntax & RE_INVALID_INTERVAL_ORD), 0))
				2435	{
				2436	if (token->type == END_OF_RE)
				2437	*err = REG_EBRACE;
				2438	else
				2439	*err = REG_BADBR;
				2440
				2441	return NULL;
				2442	}
				2443
				2444	/* If the syntax bit is set, rollback. */
				2445	re_string_set_index (regexp, start_idx);
				2446	*token = start_token;
				2447	token->type = CHARACTER;
				2448	/* mb_partial and word_char bits should be already initialized by
				2449	peek_token. */
				2450	return elem;
				2451	}
				2452
				2453	if (BE (end != -1 && start > end, 0))
				2454	{
				2455	/* First number greater than second. */
				2456	*err = REG_BADBR;
				2457	return NULL;
				2458	}
				2459	}
				2460	else
				2461	{
				2462	start = (token->type == OP_DUP_PLUS) ? 1 : 0;
				2463	end = (token->type == OP_DUP_QUESTION) ? 1 : -1;
				2464	}
				2465
				2466	fetch_token (token, regexp, syntax);
				2467
				2468	if (BE (elem == NULL, 0))
				2469	return NULL;
				2470	if (BE (start == 0 && end == 0, 0))
				2471	{
				2472	postorder (elem, free_tree, NULL);
				2473	return NULL;
				2474	}
				2475
				2476	/* Extract "<re>{n,m}" to "<re><re>...<re><re>{0,<m-n>}". */
				2477	if (BE (start > 0, 0))
				2478	{
				2479	tree = elem;
				2480	for (i = 2; i <= start; ++i)
				2481	{
				2482	elem = duplicate_tree (elem, dfa);
				2483	tree = create_tree (dfa, tree, elem, CONCAT);
				2484	if (BE (elem == NULL \|\| tree == NULL, 0))
				2485	goto parse_dup_op_espace;
				2486	}
				2487
				2488	if (start == end)
				2489	return tree;
				2490
				2491	/* Duplicate ELEM before it is marked optional. */
				2492	elem = duplicate_tree (elem, dfa);
				2493	old_tree = tree;
				2494	}
				2495	else
				2496	old_tree = NULL;
				2497
				2498	if (elem->token.type == SUBEXP)
				2499	postorder (elem, mark_opt_subexp, (void *) (long) elem->token.opr.idx);
				2500
				2501	tree = create_tree (dfa, elem, NULL, (end == -1 ? OP_DUP_ASTERISK : OP_ALT));
				2502	if (BE (tree == NULL, 0))
				2503	goto parse_dup_op_espace;
				2504
				2505	/* This loop is actually executed only when end != -1,
				2506	to rewrite <re>{0,n} as (<re>(<re>...<re>?)?)?... We have
				2507	already created the start+1-th copy. */
				2508	for (i = start + 2; i <= end; ++i)
				2509	{
				2510	elem = duplicate_tree (elem, dfa);
				2511	tree = create_tree (dfa, tree, elem, CONCAT);
				2512	if (BE (elem == NULL \|\| tree == NULL, 0))
				2513	goto parse_dup_op_espace;
				2514
				2515	tree = create_tree (dfa, tree, NULL, OP_ALT);
				2516	if (BE (tree == NULL, 0))
				2517	goto parse_dup_op_espace;
				2518	}
				2519
				2520	if (old_tree)
				2521	tree = create_tree (dfa, old_tree, tree, CONCAT);
				2522
				2523	return tree;
				2524
				2525	parse_dup_op_espace:
				2526	*err = REG_ESPACE;
				2527	return NULL;
				2528	}
				2529
				2530	/* Size of the names for collating symbol/equivalence_class/character_class.
				2531	I'm not sure, but maybe enough. */
				2532	#define BRACKET_NAME_BUF_SIZE 32
				2533
				2534	#if 1
				2535	/* Local function for parse_bracket_exp only used in case of NOT glibc.
				2536	Build the range expression which starts from START_ELEM, and ends
				2537	at END_ELEM. The result are written to MBCSET and SBCSET.
				2538	RANGE_ALLOC is the allocated size of mbcset->range_starts, and
				2539	mbcset->range_ends, is a pointer argument sinse we may
				2540	update it. */
				2541
				2542	static reg_errcode_t
				2543	internal_function
				2544	# ifdef RE_ENABLE_I18N
				2545	build_range_exp (bitset_t sbcset, re_charset_t mbcset, int range_alloc,
				2546	bracket_elem_t start_elem, bracket_elem_t end_elem)
				2547	# else
				2548	build_range_exp (bitset_t sbcset, bracket_elem_t *start_elem,
				2549	bracket_elem_t *end_elem)
				2550	# endif
				2551	{
				2552	unsigned int start_ch, end_ch;
				2553	/* Equivalence Classes and Character Classes can't be a range start/end. */
				2554	if (BE (start_elem->type == EQUIV_CLASS \|\| start_elem->type == CHAR_CLASS
				2555	\|\| end_elem->type == EQUIV_CLASS \|\| end_elem->type == CHAR_CLASS,
				2556	0))
				2557	return REG_ERANGE;
				2558
				2559	/* We can handle no multi character collating elements without libc
				2560	support. */
				2561	if (BE ((start_elem->type == COLL_SYM
				2562	&& strlen ((char *) start_elem->opr.name) > 1)
				2563	\|\| (end_elem->type == COLL_SYM
				2564	&& strlen ((char *) end_elem->opr.name) > 1), 0))
				2565	return REG_ECOLLATE;
				2566
				2567	# ifdef RE_ENABLE_I18N
				2568	{
				2569	wchar_t wc;
				2570	wint_t start_wc;
				2571	wint_t end_wc;
				2572	wchar_t cmp_buf[6] = {L'\0', L'\0', L'\0', L'\0', L'\0', L'\0'};
				2573
				2574	start_ch = ((start_elem->type == SB_CHAR) ? start_elem->opr.ch
				2575	: ((start_elem->type == COLL_SYM) ? start_elem->opr.name[0]
				2576	: 0));
				2577	end_ch = ((end_elem->type == SB_CHAR) ? end_elem->opr.ch
				2578	: ((end_elem->type == COLL_SYM) ? end_elem->opr.name[0]
				2579	: 0));
				2580	start_wc = ((start_elem->type == SB_CHAR \|\| start_elem->type == COLL_SYM)
				2581	? __btowc (start_ch) : start_elem->opr.wch);
				2582	end_wc = ((end_elem->type == SB_CHAR \|\| end_elem->type == COLL_SYM)
				2583	? __btowc (end_ch) : end_elem->opr.wch);
				2584	if (start_wc == WEOF \|\| end_wc == WEOF)
				2585	return REG_ECOLLATE;
				2586	cmp_buf[0] = start_wc;
				2587	cmp_buf[4] = end_wc;
				2588	if (wcscoll (cmp_buf, cmp_buf + 4) > 0)
				2589	return REG_ERANGE;
				2590
				2591	/* Got valid collation sequence values, add them as a new entry.
				2592	However, for !glibc we have no collation elements: if the
				2593	character set is single byte, the single byte character set
				2594	that we build below suffices. parse_bracket_exp passes
				2595	no MBCSET if dfa->mb_cur_max == 1. */
				2596	if (mbcset)
				2597	{
				2598	/* Check the space of the arrays. */
				2599	if (BE (*range_alloc == mbcset->nranges, 0))
				2600	{
				2601	/* There is not enough space, need realloc. */
				2602	wchar_t new_array_start, new_array_end;
				2603	int new_nranges;
				2604
				2605	/* +1 in case of mbcset->nranges is 0. */
				2606	new_nranges = 2 * mbcset->nranges + 1;
				2607	/* Use realloc since mbcset->range_starts and mbcset->range_ends
				2608	are NULL if range_alloc == 0. /
				2609	new_array_start = re_realloc (mbcset->range_starts, wchar_t,
				2610	new_nranges);
				2611	new_array_end = re_realloc (mbcset->range_ends, wchar_t,
				2612	new_nranges);
				2613
				2614	if (BE (new_array_start == NULL \|\| new_array_end == NULL, 0))
				2615	return REG_ESPACE;
				2616
				2617	mbcset->range_starts = new_array_start;
				2618	mbcset->range_ends = new_array_end;
				2619	*range_alloc = new_nranges;
				2620	}
				2621
				2622	mbcset->range_starts[mbcset->nranges] = start_wc;
				2623	mbcset->range_ends[mbcset->nranges++] = end_wc;
				2624	}
				2625
				2626	/* Build the table for single byte characters. */
				2627	for (wc = 0; wc < SBC_MAX; ++wc)
				2628	{
				2629	cmp_buf[2] = wc;
				2630	if (wcscoll (cmp_buf, cmp_buf + 2) <= 0
				2631	&& wcscoll (cmp_buf + 2, cmp_buf + 4) <= 0)
				2632	bitset_set (sbcset, wc);
				2633	}
				2634	}
				2635	# else /* not RE_ENABLE_I18N */
				2636	{
				2637	unsigned int ch;
				2638	start_ch = ((start_elem->type == SB_CHAR ) ? start_elem->opr.ch
				2639	: ((start_elem->type == COLL_SYM) ? start_elem->opr.name[0]
				2640	: 0));
				2641	end_ch = ((end_elem->type == SB_CHAR ) ? end_elem->opr.ch
				2642	: ((end_elem->type == COLL_SYM) ? end_elem->opr.name[0]
				2643	: 0));
				2644	if (start_ch > end_ch)
				2645	return REG_ERANGE;
				2646	/* Build the table for single byte characters. */
				2647	for (ch = 0; ch < SBC_MAX; ++ch)
				2648	if (start_ch <= ch && ch <= end_ch)
				2649	bitset_set (sbcset, ch);
				2650	}
				2651	# endif /* not RE_ENABLE_I18N */
				2652	return REG_NOERROR;
				2653	}
				2654	#endif
				2655
				2656	#if 1
				2657	/* Helper function for parse_bracket_exp only used in case of NOT glibc.
				2658	Build the collating element which is represented by NAME.
				2659	The result are written to MBCSET and SBCSET.
				2660	COLL_SYM_ALLOC is the allocated size of mbcset->coll_sym, is a
				2661	pointer argument since we may update it. */
				2662
				2663	static reg_errcode_t
				2664	internal_function
				2665	# ifdef RE_ENABLE_I18N
				2666	build_collating_symbol (bitset_t sbcset, re_charset_t *mbcset,
				2667	int coll_sym_alloc, const unsigned char name)
				2668	# else
				2669	build_collating_symbol (bitset_t sbcset, const unsigned char *name)
				2670	# endif
				2671	{
				2672	size_t name_len = strlen ((const char *) name);
				2673	if (BE (name_len != 1, 0))
				2674	return REG_ECOLLATE;
				2675	bitset_set (sbcset, name[0]);
				2676	return REG_NOERROR;
				2677	}
				2678	#endif
				2679
				2680	/* This function parse bracket expression like "[abc]", "[a-c]",
				2681	"[[.a-a.]]" etc. */
				2682
				2683	static bin_tree_t *
				2684	parse_bracket_exp (re_string_t regexp, re_dfa_t dfa, re_token_t *token,
				2685	reg_syntax_t syntax, reg_errcode_t *err)
				2686	{
				2687	#if 0
				2688	const unsigned char *collseqmb;
				2689	const char *collseqwc;
				2690	uint32_t nrules;
				2691	int32_t table_size;
				2692	const int32_t *symb_table;
				2693	const unsigned char *extra;
				2694
				2695	/* Local function for parse_bracket_exp used in glibc.
				2696	Seek the collating symbol entry correspondings to NAME.
				2697	Return the index of the symbol in the SYMB_TABLE. */
				2698
				2699	auto __inline__ int32_t
				2700	__attribute ((always_inline))
				2701	seek_collating_symbol_entry (const unsigned char *name, size_t name_len)
				2702	{
				2703	int32_t hash = elem_hash ((const char *) name, name_len);
				2704	int32_t elem = hash % table_size;
				2705	if (symb_table[2 * elem] != 0)
				2706	{
				2707	int32_t second = hash % (table_size - 2) + 1;
				2708
				2709	do
				2710	{
				2711	/* First compare the hashing value. */
				2712	if (symb_table[2 * elem] == hash
				2713	/* Compare the length of the name. */
				2714	&& name_len == extra[symb_table[2 * elem + 1]]
				2715	/* Compare the name. */
				2716	&& memcmp (name, &extra[symb_table[2 * elem + 1] + 1],
				2717	name_len) == 0)
				2718	{
				2719	/* Yep, this is the entry. */
				2720	break;
				2721	}
				2722
				2723	/* Next entry. */
				2724	elem += second;
				2725	}
				2726	while (symb_table[2 * elem] != 0);
				2727	}
				2728	return elem;
				2729	}
				2730
				2731	/* Local function for parse_bracket_exp used in glibc.
				2732	Look up the collation sequence value of BR_ELEM.
				2733	Return the value if succeeded, UINT_MAX otherwise. */
				2734
				2735	auto __inline__ unsigned int
				2736	__attribute ((always_inline))
				2737	lookup_collation_sequence_value (bracket_elem_t *br_elem)
				2738	{
				2739	if (br_elem->type == SB_CHAR)
				2740	{
				2741	/*
				2742	if (MB_CUR_MAX == 1)
				2743	*/
				2744	if (nrules == 0)
				2745	return collseqmb[br_elem->opr.ch];
				2746	else
				2747	{
				2748	wint_t wc = __btowc (br_elem->opr.ch);
				2749	return __collseq_table_lookup (collseqwc, wc);
				2750	}
				2751	}
				2752	else if (br_elem->type == MB_CHAR)
				2753	{
				2754	return __collseq_table_lookup (collseqwc, br_elem->opr.wch);
				2755	}
				2756	else if (br_elem->type == COLL_SYM)
				2757	{
				2758	size_t sym_name_len = strlen ((char *) br_elem->opr.name);
				2759	if (nrules != 0)
				2760	{
				2761	int32_t elem, idx;
				2762	elem = seek_collating_symbol_entry (br_elem->opr.name,
				2763	sym_name_len);
				2764	if (symb_table[2 * elem] != 0)
				2765	{
				2766	/* We found the entry. */
				2767	idx = symb_table[2 * elem + 1];
				2768	/* Skip the name of collating element name. */
				2769	idx += 1 + extra[idx];
				2770	/* Skip the byte sequence of the collating element. */
				2771	idx += 1 + extra[idx];
				2772	/* Adjust for the alignment. */
				2773	idx = (idx + 3) & ~3;
				2774	/* Skip the multibyte collation sequence value. */
				2775	idx += sizeof (unsigned int);
				2776	/* Skip the wide char sequence of the collating element. */
				2777	idx += sizeof (unsigned int) *
				2778	(1 + (unsigned int ) (extra + idx));
				2779	/* Return the collation sequence value. */
				2780	return (unsigned int ) (extra + idx);
				2781	}
				2782	else if (symb_table[2 * elem] == 0 && sym_name_len == 1)
				2783	{
				2784	/* No valid character. Match it as a single byte
				2785	character. */
				2786	return collseqmb[br_elem->opr.name[0]];
				2787	}
				2788	}
				2789	else if (sym_name_len == 1)
				2790	return collseqmb[br_elem->opr.name[0]];
				2791	}
				2792	return UINT_MAX;
				2793	}
				2794
				2795	/* Local function for parse_bracket_exp used in glibc.
				2796	Build the range expression which starts from START_ELEM, and ends
				2797	at END_ELEM. The result are written to MBCSET and SBCSET.
				2798	RANGE_ALLOC is the allocated size of mbcset->range_starts, and
				2799	mbcset->range_ends, is a pointer argument sinse we may
				2800	update it. */
				2801
				2802	auto __inline__ reg_errcode_t
				2803	__attribute ((always_inline))
				2804	build_range_exp (re_charset_t *mbcset,
				2805	int *range_alloc,
				2806	bitset_t sbcset,
				2807	bracket_elem_t *start_elem,
				2808	bracket_elem_t *end_elem)
				2809	{
				2810	unsigned int ch;
				2811	uint32_t start_collseq;
				2812	uint32_t end_collseq;
				2813
				2814	/* Equivalence Classes and Character Classes can't be a range
				2815	start/end. */
				2816	if (BE (start_elem->type == EQUIV_CLASS \|\| start_elem->type == CHAR_CLASS
				2817	\|\| end_elem->type == EQUIV_CLASS \|\| end_elem->type == CHAR_CLASS,
				2818	0))
				2819	return REG_ERANGE;
				2820
				2821	start_collseq = lookup_collation_sequence_value (start_elem);
				2822	end_collseq = lookup_collation_sequence_value (end_elem);
				2823	/* Check start/end collation sequence values. */
				2824	if (BE (start_collseq == UINT_MAX \|\| end_collseq == UINT_MAX, 0))
				2825	return REG_ECOLLATE;
				2826	if (BE ((syntax & RE_NO_EMPTY_RANGES) && start_collseq > end_collseq, 0))
				2827	return REG_ERANGE;
				2828
				2829	/* Got valid collation sequence values, add them as a new entry.
				2830	However, if we have no collation elements, and the character set
				2831	is single byte, the single byte character set that we
				2832	build below suffices. */
				2833	if (nrules > 0 \|\| dfa->mb_cur_max > 1)
				2834	{
				2835	/* Check the space of the arrays. */
				2836	if (BE (*range_alloc == mbcset->nranges, 0))
				2837	{
				2838	/* There is not enough space, need realloc. */
				2839	uint32_t *new_array_start;
				2840	uint32_t *new_array_end;
				2841	int new_nranges;
				2842
				2843	/* +1 in case of mbcset->nranges is 0. */
				2844	new_nranges = 2 * mbcset->nranges + 1;
				2845	new_array_start = re_realloc (mbcset->range_starts, uint32_t,
				2846	new_nranges);
				2847	new_array_end = re_realloc (mbcset->range_ends, uint32_t,
				2848	new_nranges);
				2849
				2850	if (BE (new_array_start == NULL \|\| new_array_end == NULL, 0))
				2851	return REG_ESPACE;
				2852
				2853	mbcset->range_starts = new_array_start;
				2854	mbcset->range_ends = new_array_end;
				2855	*range_alloc = new_nranges;
				2856	}
				2857
				2858	mbcset->range_starts[mbcset->nranges] = start_collseq;
				2859	mbcset->range_ends[mbcset->nranges++] = end_collseq;
				2860	}
				2861
				2862	/* Build the table for single byte characters. */
				2863	for (ch = 0; ch < SBC_MAX; ch++)
				2864	{
				2865	uint32_t ch_collseq;
				2866	/*
				2867	if (MB_CUR_MAX == 1)
				2868	*/
				2869	if (nrules == 0)
				2870	ch_collseq = collseqmb[ch];
				2871	else
				2872	ch_collseq = __collseq_table_lookup (collseqwc, __btowc (ch));
				2873	if (start_collseq <= ch_collseq && ch_collseq <= end_collseq)
				2874	bitset_set (sbcset, ch);
				2875	}
				2876	return REG_NOERROR;
				2877	}
				2878
				2879	/* Local function for parse_bracket_exp used in glibc.
				2880	Build the collating element which is represented by NAME.
				2881	The result are written to MBCSET and SBCSET.
				2882	COLL_SYM_ALLOC is the allocated size of mbcset->coll_sym, is a
				2883	pointer argument sinse we may update it. */
				2884
				2885	auto __inline__ reg_errcode_t
				2886	__attribute ((always_inline))
				2887	build_collating_symbol (re_charset_t *mbcset,
				2888	int *coll_sym_alloc,
				2889	bitset_t sbcset,
				2890	const unsigned char *name)
				2891	{
				2892	int32_t elem, idx;
				2893	size_t name_len = strlen ((const char *) name);
				2894	if (nrules != 0)
				2895	{
				2896	elem = seek_collating_symbol_entry (name, name_len);
				2897	if (symb_table[2 * elem] != 0)
				2898	{
				2899	/* We found the entry. */
				2900	idx = symb_table[2 * elem + 1];
				2901	/* Skip the name of collating element name. */
				2902	idx += 1 + extra[idx];
				2903	}
				2904	else if (symb_table[2 * elem] == 0 && name_len == 1)
				2905	{
				2906	/* No valid character, treat it as a normal
				2907	character. */
				2908	bitset_set (sbcset, name[0]);
				2909	return REG_NOERROR;
				2910	}
				2911	else
				2912	return REG_ECOLLATE;
				2913
				2914	/* Got valid collation sequence, add it as a new entry. */
				2915	/* Check the space of the arrays. */
				2916	if (BE (*coll_sym_alloc == mbcset->ncoll_syms, 0))
				2917	{
				2918	/* Not enough, realloc it. */
				2919	/* +1 in case of mbcset->ncoll_syms is 0. */
				2920	int new_coll_sym_alloc = 2 * mbcset->ncoll_syms + 1;
				2921	/* Use realloc since mbcset->coll_syms is NULL
				2922	if alloc == 0. /
				2923	int32_t *new_coll_syms = re_realloc (mbcset->coll_syms, int32_t,
				2924	new_coll_sym_alloc);
				2925	if (BE (new_coll_syms == NULL, 0))
				2926	return REG_ESPACE;
				2927	mbcset->coll_syms = new_coll_syms;
				2928	*coll_sym_alloc = new_coll_sym_alloc;
				2929	}
				2930	mbcset->coll_syms[mbcset->ncoll_syms++] = idx;
				2931	return REG_NOERROR;
				2932	}
				2933	else
				2934	{
				2935	if (BE (name_len != 1, 0))
				2936	return REG_ECOLLATE;
				2937	else
				2938	{
				2939	bitset_set (sbcset, name[0]);
				2940	return REG_NOERROR;
				2941	}
				2942	}
				2943	}
				2944	#endif
				2945
				2946	re_token_t br_token;
				2947	re_bitset_ptr_t sbcset;
				2948	#ifdef RE_ENABLE_I18N
				2949	re_charset_t *mbcset;
				2950	int coll_sym_alloc = 0, range_alloc = 0, mbchar_alloc = 0;
				2951	int equiv_class_alloc = 0, char_class_alloc = 0;
				2952	#endif
				2953	int non_match = 0;
				2954	bin_tree_t *work_tree;
				2955	int token_len;
				2956	int first_round = 1;
				2957	#if 0
				2958	collseqmb = (const unsigned char *)
				2959	_NL_CURRENT (LC_COLLATE, _NL_COLLATE_COLLSEQMB);
				2960	nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
				2961	if (nrules)
				2962	{
				2963	/*
				2964	if (MB_CUR_MAX > 1)
				2965	*/
				2966	collseqwc = _NL_CURRENT (LC_COLLATE, _NL_COLLATE_COLLSEQWC);
				2967	table_size = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_SYMB_HASH_SIZEMB);
				2968	symb_table = (const int32_t *) _NL_CURRENT (LC_COLLATE,
				2969	_NL_COLLATE_SYMB_TABLEMB);
				2970	extra = (const unsigned char *) _NL_CURRENT (LC_COLLATE,
				2971	_NL_COLLATE_SYMB_EXTRAMB);
				2972	}
				2973	#endif
				2974	sbcset = calloc (sizeof (bitset_t), 1);
				2975	#ifdef RE_ENABLE_I18N
				2976	mbcset = calloc (sizeof (re_charset_t), 1);
				2977	#endif
				2978	#ifdef RE_ENABLE_I18N
				2979	if (BE (sbcset == NULL \|\| mbcset == NULL, 0))
				2980	#else
				2981	if (BE (sbcset == NULL, 0))
				2982	#endif
				2983	{
				2984	*err = REG_ESPACE;
				2985	return NULL;
				2986	}
				2987
				2988	token_len = peek_token_bracket (token, regexp, syntax);
				2989	if (BE (token->type == END_OF_RE, 0))
				2990	{
				2991	*err = REG_BADPAT;
				2992	goto parse_bracket_exp_free_return;
				2993	}
				2994	if (token->type == OP_NON_MATCH_LIST)
				2995	{
				2996	#ifdef RE_ENABLE_I18N
				2997	mbcset->non_match = 1;
				2998	#endif
				2999	non_match = 1;
				3000	if (syntax & RE_HAT_LISTS_NOT_NEWLINE)
				3001	bitset_set (sbcset, '\0');
				3002	re_string_skip_bytes (regexp, token_len); /* Skip a token. */
				3003	token_len = peek_token_bracket (token, regexp, syntax);
				3004	if (BE (token->type == END_OF_RE, 0))
				3005	{
				3006	*err = REG_BADPAT;
				3007	goto parse_bracket_exp_free_return;
				3008	}
				3009	}
				3010
				3011	/* We treat the first ']' as a normal character. */
				3012	if (token->type == OP_CLOSE_BRACKET)
				3013	token->type = CHARACTER;
				3014
				3015	while (1)
				3016	{
				3017	bracket_elem_t start_elem, end_elem;
				3018	unsigned char start_name_buf[BRACKET_NAME_BUF_SIZE];
				3019	unsigned char end_name_buf[BRACKET_NAME_BUF_SIZE];
				3020	reg_errcode_t ret;
				3021	int token_len2 = 0, is_range_exp = 0;
				3022	re_token_t token2;
				3023
				3024	start_elem.opr.name = start_name_buf;
				3025	ret = parse_bracket_element (&start_elem, regexp, token, token_len, dfa,
				3026	syntax, first_round);
				3027	if (BE (ret != REG_NOERROR, 0))
				3028	{
				3029	*err = ret;
				3030	goto parse_bracket_exp_free_return;
				3031	}
				3032	first_round = 0;
				3033
				3034	/* Get information about the next token. We need it in any case. */
				3035	token_len = peek_token_bracket (token, regexp, syntax);
				3036
				3037	/* Do not check for ranges if we know they are not allowed. */
				3038	if (start_elem.type != CHAR_CLASS && start_elem.type != EQUIV_CLASS)
				3039	{
				3040	if (BE (token->type == END_OF_RE, 0))
				3041	{
				3042	*err = REG_EBRACK;
				3043	goto parse_bracket_exp_free_return;
				3044	}
				3045	if (token->type == OP_CHARSET_RANGE)
				3046	{
				3047	re_string_skip_bytes (regexp, token_len); /* Skip '-'. */
				3048	token_len2 = peek_token_bracket (&token2, regexp, syntax);
				3049	if (BE (token2.type == END_OF_RE, 0))
				3050	{
				3051	*err = REG_EBRACK;
				3052	goto parse_bracket_exp_free_return;
				3053	}
				3054	if (token2.type == OP_CLOSE_BRACKET)
				3055	{
				3056	/* We treat the last '-' as a normal character. */
				3057	re_string_skip_bytes (regexp, -token_len);
				3058	token->type = CHARACTER;
				3059	}
				3060	else
				3061	is_range_exp = 1;
				3062	}
				3063	}
				3064
				3065	if (is_range_exp == 1)
				3066	{
				3067	end_elem.opr.name = end_name_buf;
				3068	ret = parse_bracket_element (&end_elem, regexp, &token2, token_len2,
				3069	dfa, syntax, 1);
				3070	if (BE (ret != REG_NOERROR, 0))
				3071	{
				3072	*err = ret;
				3073	goto parse_bracket_exp_free_return;
				3074	}
				3075
				3076	token_len = peek_token_bracket (token, regexp, syntax);
				3077
				3078	#if 0
				3079	*err = build_range_exp (sbcset, mbcset, &range_alloc,
				3080	&start_elem, &end_elem);
				3081	#else
				3082	# ifdef RE_ENABLE_I18N
				3083	*err = build_range_exp (sbcset,
				3084	dfa->mb_cur_max > 1 ? mbcset : NULL,
				3085	&range_alloc, &start_elem, &end_elem);
				3086	# else
				3087	*err = build_range_exp (sbcset, &start_elem, &end_elem);
				3088	# endif
				3089	#endif
				3090	if (BE (*err != REG_NOERROR, 0))
				3091	goto parse_bracket_exp_free_return;
				3092	}
				3093	else
				3094	{
				3095	switch (start_elem.type)
				3096	{
				3097	case SB_CHAR:
				3098	bitset_set (sbcset, start_elem.opr.ch);
				3099	break;
				3100	#ifdef RE_ENABLE_I18N
				3101	case MB_CHAR:
				3102	/* Check whether the array has enough space. */
				3103	if (BE (mbchar_alloc == mbcset->nmbchars, 0))
				3104	{
				3105	wchar_t *new_mbchars;
				3106	/* Not enough, realloc it. */
				3107	/* +1 in case of mbcset->nmbchars is 0. */
				3108	mbchar_alloc = 2 * mbcset->nmbchars + 1;
				3109	/* Use realloc since array is NULL if alloc == 0. /
				3110	new_mbchars = re_realloc (mbcset->mbchars, wchar_t,
				3111	mbchar_alloc);
				3112	if (BE (new_mbchars == NULL, 0))
				3113	goto parse_bracket_exp_espace;
				3114	mbcset->mbchars = new_mbchars;
				3115	}
				3116	mbcset->mbchars[mbcset->nmbchars++] = start_elem.opr.wch;
				3117	break;
				3118	#endif /* RE_ENABLE_I18N */
				3119	case EQUIV_CLASS:
				3120	*err = build_equiv_class (sbcset,
				3121	#ifdef RE_ENABLE_I18N
				3122	mbcset, &equiv_class_alloc,
				3123	#endif
				3124	start_elem.opr.name);
				3125	if (BE (*err != REG_NOERROR, 0))
				3126	goto parse_bracket_exp_free_return;
				3127	break;
				3128	case COLL_SYM:
				3129	*err = build_collating_symbol (sbcset,
				3130	#ifdef RE_ENABLE_I18N
				3131	mbcset, &coll_sym_alloc,
				3132	#endif
				3133	start_elem.opr.name);
				3134	if (BE (*err != REG_NOERROR, 0))
				3135	goto parse_bracket_exp_free_return;
				3136	break;
				3137	case CHAR_CLASS:
				3138	*err = build_charclass (regexp->trans, sbcset,
				3139	#ifdef RE_ENABLE_I18N
				3140	mbcset, &char_class_alloc,
				3141	#endif
				3142	start_elem.opr.name, syntax);
				3143	if (BE (*err != REG_NOERROR, 0))
				3144	goto parse_bracket_exp_free_return;
				3145	break;
				3146	default:
				3147	assert (0);
				3148	break;
				3149	}
				3150	}
				3151	if (BE (token->type == END_OF_RE, 0))
				3152	{
				3153	*err = REG_EBRACK;
				3154	goto parse_bracket_exp_free_return;
				3155	}
				3156	if (token->type == OP_CLOSE_BRACKET)
				3157	break;
				3158	}
				3159
				3160	re_string_skip_bytes (regexp, token_len); /* Skip a token. */
				3161
				3162	/* If it is non-matching list. */
				3163	if (non_match)
				3164	bitset_not (sbcset);
				3165
				3166	#ifdef RE_ENABLE_I18N
				3167	/* Ensure only single byte characters are set. */
				3168	if (dfa->mb_cur_max > 1)
				3169	bitset_mask (sbcset, dfa->sb_char);
				3170
				3171	if (mbcset->nmbchars \|\| mbcset->ncoll_syms \|\| mbcset->nequiv_classes
				3172	\|\| mbcset->nranges \|\| (dfa->mb_cur_max > 1 && (mbcset->nchar_classes
				3173	\|\| mbcset->non_match)))
				3174	{
				3175	bin_tree_t *mbc_tree;
				3176	int sbc_idx;
				3177	/* Build a tree for complex bracket. */
				3178	dfa->has_mb_node = 1;
				3179	br_token.type = COMPLEX_BRACKET;
				3180	br_token.opr.mbcset = mbcset;
				3181	mbc_tree = create_token_tree (dfa, NULL, NULL, &br_token);
				3182	if (BE (mbc_tree == NULL, 0))
				3183	goto parse_bracket_exp_espace;
				3184	for (sbc_idx = 0; sbc_idx < BITSET_WORDS; ++sbc_idx)
				3185	if (sbcset[sbc_idx])
				3186	break;
				3187	/* If there are no bits set in sbcset, there is no point
				3188	of having both SIMPLE_BRACKET and COMPLEX_BRACKET. */
				3189	if (sbc_idx < BITSET_WORDS)
				3190	{
				3191	/* Build a tree for simple bracket. */
				3192	br_token.type = SIMPLE_BRACKET;
				3193	br_token.opr.sbcset = sbcset;
				3194	work_tree = create_token_tree (dfa, NULL, NULL, &br_token);
				3195	if (BE (work_tree == NULL, 0))
				3196	goto parse_bracket_exp_espace;
				3197
				3198	/* Then join them by ALT node. */
				3199	work_tree = create_tree (dfa, work_tree, mbc_tree, OP_ALT);
				3200	if (BE (work_tree == NULL, 0))
				3201	goto parse_bracket_exp_espace;
				3202	}
				3203	else
				3204	{
				3205	re_free (sbcset);
				3206	work_tree = mbc_tree;
				3207	}
				3208	}
				3209	else
				3210	#endif /* not RE_ENABLE_I18N */
				3211	{
				3212	#ifdef RE_ENABLE_I18N
				3213	free_charset (mbcset);
				3214	#endif
				3215	/* Build a tree for simple bracket. */
				3216	br_token.type = SIMPLE_BRACKET;
				3217	br_token.opr.sbcset = sbcset;
				3218	work_tree = create_token_tree (dfa, NULL, NULL, &br_token);
				3219	if (BE (work_tree == NULL, 0))
				3220	goto parse_bracket_exp_espace;
				3221	}
				3222	return work_tree;
				3223
				3224	parse_bracket_exp_espace:
				3225	*err = REG_ESPACE;
				3226	parse_bracket_exp_free_return:
				3227	re_free (sbcset);
				3228	#ifdef RE_ENABLE_I18N
				3229	free_charset (mbcset);
				3230	#endif
				3231	return NULL;
				3232	}
				3233
				3234	/* Parse an element in the bracket expression. */
				3235
				3236	static reg_errcode_t
				3237	parse_bracket_element (bracket_elem_t elem, re_string_t regexp,
				3238	re_token_t token, int token_len, re_dfa_t dfa,
				3239	reg_syntax_t syntax, int accept_hyphen)
				3240	{
				3241	#ifdef RE_ENABLE_I18N
				3242	int cur_char_size;
				3243	cur_char_size = re_string_char_size_at (regexp, re_string_cur_idx (regexp));
				3244	if (cur_char_size > 1)
				3245	{
				3246	elem->type = MB_CHAR;
				3247	elem->opr.wch = re_string_wchar_at (regexp, re_string_cur_idx (regexp));
				3248	re_string_skip_bytes (regexp, cur_char_size);
				3249	return REG_NOERROR;
				3250	}
				3251	#endif /* RE_ENABLE_I18N */
				3252	re_string_skip_bytes (regexp, token_len); /* Skip a token. */
				3253	if (token->type == OP_OPEN_COLL_ELEM \|\| token->type == OP_OPEN_CHAR_CLASS
				3254	\|\| token->type == OP_OPEN_EQUIV_CLASS)
				3255	return parse_bracket_symbol (elem, regexp, token);
				3256	if (BE (token->type == OP_CHARSET_RANGE, 0) && !accept_hyphen)
				3257	{
				3258	/* A '-' must only appear as anything but a range indicator before
				3259	the closing bracket. Everything else is an error. */
				3260	re_token_t token2;
				3261	(void) peek_token_bracket (&token2, regexp, syntax);
				3262	if (token2.type != OP_CLOSE_BRACKET)
				3263	/* The actual error value is not standardized since this whole
				3264	case is undefined. But ERANGE makes good sense. */
				3265	return REG_ERANGE;
				3266	}
				3267	elem->type = SB_CHAR;
				3268	elem->opr.ch = token->opr.c;
				3269	return REG_NOERROR;
				3270	}
				3271
				3272	/* Parse a bracket symbol in the bracket expression. Bracket symbols are
				3273	such as [:<character_class>:], [.<collating_element>.], and
				3274	[=<equivalent_class>=]. */
				3275
				3276	static reg_errcode_t
				3277	parse_bracket_symbol (bracket_elem_t elem, re_string_t regexp,
				3278	re_token_t *token)
				3279	{
				3280	unsigned char ch, delim = token->opr.c;
				3281	int i = 0;
				3282	if (re_string_eoi(regexp))
				3283	return REG_EBRACK;
				3284	for (;; ++i)
				3285	{
				3286	if (i >= BRACKET_NAME_BUF_SIZE)
				3287	return REG_EBRACK;
				3288	if (token->type == OP_OPEN_CHAR_CLASS)
				3289	ch = re_string_fetch_byte_case (regexp);
				3290	else
				3291	ch = re_string_fetch_byte (regexp);
				3292	if (re_string_eoi(regexp))
				3293	return REG_EBRACK;
				3294	if (ch == delim && re_string_peek_byte (regexp, 0) == ']')
				3295	break;
				3296	elem->opr.name[i] = ch;
				3297	}
				3298	re_string_skip_bytes (regexp, 1);
				3299	elem->opr.name[i] = '\0';
				3300	switch (token->type)
				3301	{
				3302	case OP_OPEN_COLL_ELEM:
				3303	elem->type = COLL_SYM;
				3304	break;
				3305	case OP_OPEN_EQUIV_CLASS:
				3306	elem->type = EQUIV_CLASS;
				3307	break;
				3308	case OP_OPEN_CHAR_CLASS:
				3309	elem->type = CHAR_CLASS;
				3310	break;
				3311	default:
				3312	break;
				3313	}
				3314	return REG_NOERROR;
				3315	}
				3316
				3317	/* Helper function for parse_bracket_exp.
				3318	Build the equivalence class which is represented by NAME.
				3319	The result are written to MBCSET and SBCSET.
				3320	EQUIV_CLASS_ALLOC is the allocated size of mbcset->equiv_classes,
				3321	is a pointer argument sinse we may update it. */
				3322
				3323	static reg_errcode_t
				3324	#ifdef RE_ENABLE_I18N
				3325	build_equiv_class (bitset_t sbcset, re_charset_t *mbcset,
				3326	int equiv_class_alloc, const unsigned char name)
				3327	#else
				3328	build_equiv_class (bitset_t sbcset, const unsigned char *name)
				3329	#endif
				3330	{
				3331	#if 0
				3332	uint32_t nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
				3333	if (nrules != 0)
				3334	{
				3335	const int32_t table, indirect;
				3336	const unsigned char weights, extra, *cp;
				3337	unsigned char char_buf[2];
				3338	int32_t idx1, idx2;
				3339	unsigned int ch;
				3340	size_t len;
				3341	/* This #include defines a local function! */
				3342	# include <locale/weight.h>
				3343	/* Calculate the index for equivalence class. */
				3344	cp = name;
				3345	table = (const int32_t *) _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB);
				3346	weights = (const unsigned char *) _NL_CURRENT (LC_COLLATE,
				3347	_NL_COLLATE_WEIGHTMB);
				3348	extra = (const unsigned char *) _NL_CURRENT (LC_COLLATE,
				3349	_NL_COLLATE_EXTRAMB);
				3350	indirect = (const int32_t *) _NL_CURRENT (LC_COLLATE,
				3351	_NL_COLLATE_INDIRECTMB);
				3352	idx1 = findidx (&cp);
				3353	if (BE (idx1 == 0 \|\| cp < name + strlen ((const char *) name), 0))
				3354	/* This isn't a valid character. */
				3355	return REG_ECOLLATE;
				3356
				3357	/* Build single byte matcing table for this equivalence class. */
				3358	char_buf[1] = (unsigned char) '\0';
				3359	len = weights[idx1];
				3360	for (ch = 0; ch < SBC_MAX; ++ch)
				3361	{
				3362	char_buf[0] = ch;
				3363	cp = char_buf;
				3364	idx2 = findidx (&cp);
				3365	/*
				3366	idx2 = table[ch];
				3367	*/
				3368	if (idx2 == 0)
				3369	/* This isn't a valid character. */
				3370	continue;
				3371	if (len == weights[idx2])
				3372	{
				3373	int cnt = 0;
				3374	while (cnt <= len &&
				3375	weights[idx1 + 1 + cnt] == weights[idx2 + 1 + cnt])
				3376	++cnt;
				3377
				3378	if (cnt > len)
				3379	bitset_set (sbcset, ch);
				3380	}
				3381	}
				3382	/* Check whether the array has enough space. */
				3383	if (BE (*equiv_class_alloc == mbcset->nequiv_classes, 0))
				3384	{
				3385	/* Not enough, realloc it. */
				3386	/* +1 in case of mbcset->nequiv_classes is 0. */
				3387	int new_equiv_class_alloc = 2 * mbcset->nequiv_classes + 1;
				3388	/* Use realloc since the array is NULL if alloc == 0. /
				3389	int32_t *new_equiv_classes = re_realloc (mbcset->equiv_classes,
				3390	int32_t,
				3391	new_equiv_class_alloc);
				3392	if (BE (new_equiv_classes == NULL, 0))
				3393	return REG_ESPACE;
				3394	mbcset->equiv_classes = new_equiv_classes;
				3395	*equiv_class_alloc = new_equiv_class_alloc;
				3396	}
				3397	mbcset->equiv_classes[mbcset->nequiv_classes++] = idx1;
				3398	}
				3399	else
				3400	#endif
				3401	{
				3402	if (BE (strlen ((const char *) name) != 1, 0))
				3403	return REG_ECOLLATE;
				3404	bitset_set (sbcset, *name);
				3405	}
				3406	return REG_NOERROR;
				3407	}
				3408
				3409	/* Helper function for parse_bracket_exp.
				3410	Build the character class which is represented by NAME.
				3411	The result are written to MBCSET and SBCSET.
				3412	CHAR_CLASS_ALLOC is the allocated size of mbcset->char_classes,
				3413	is a pointer argument sinse we may update it. */
				3414
				3415	static reg_errcode_t
				3416	#ifdef RE_ENABLE_I18N
				3417	build_charclass (RE_TRANSLATE_TYPE trans, bitset_t sbcset,
				3418	re_charset_t mbcset, int char_class_alloc,
				3419	const unsigned char *class_name, reg_syntax_t syntax)
				3420	#else
				3421	build_charclass (RE_TRANSLATE_TYPE trans, bitset_t sbcset,
				3422	const unsigned char *class_name, reg_syntax_t syntax)
				3423	#endif
				3424	{
				3425	int i;
				3426	const char name = (const char ) class_name;
				3427
				3428	/* In case of REG_ICASE "upper" and "lower" match the both of
				3429	upper and lower cases. */
				3430	if ((syntax & RE_ICASE)
				3431	&& (strcmp (name, "upper") == 0 \|\| strcmp (name, "lower") == 0))
				3432	name = "alpha";
				3433
				3434	#ifdef RE_ENABLE_I18N
				3435	/* Check the space of the arrays. */
				3436	if (BE (*char_class_alloc == mbcset->nchar_classes, 0))
				3437	{
				3438	/* Not enough, realloc it. */
				3439	/* +1 in case of mbcset->nchar_classes is 0. */
				3440	int new_char_class_alloc = 2 * mbcset->nchar_classes + 1;
				3441	/* Use realloc since array is NULL if alloc == 0. /
				3442	wctype_t *new_char_classes = re_realloc (mbcset->char_classes, wctype_t,
				3443	new_char_class_alloc);
				3444	if (BE (new_char_classes == NULL, 0))
				3445	return REG_ESPACE;
				3446	mbcset->char_classes = new_char_classes;
				3447	*char_class_alloc = new_char_class_alloc;
				3448	}
				3449	mbcset->char_classes[mbcset->nchar_classes++] = __wctype (name);
				3450	#endif /* RE_ENABLE_I18N */
				3451
				3452	#define BUILD_CHARCLASS_LOOP(ctype_func) \
				3453	do { \
				3454	if (BE (trans != NULL, 0)) \
				3455	{ \
				3456	for (i = 0; i < SBC_MAX; ++i) \
				3457	if (ctype_func (i)) \
				3458	bitset_set (sbcset, trans[i]); \
				3459	} \
				3460	else \
				3461	{ \
				3462	for (i = 0; i < SBC_MAX; ++i) \
				3463	if (ctype_func (i)) \
				3464	bitset_set (sbcset, i); \
				3465	} \
				3466	} while (0)
				3467
				3468	if (strcmp (name, "alnum") == 0)
				3469	BUILD_CHARCLASS_LOOP (isalnum);
				3470	else if (strcmp (name, "cntrl") == 0)
				3471	BUILD_CHARCLASS_LOOP (iscntrl);
				3472	else if (strcmp (name, "lower") == 0)
				3473	BUILD_CHARCLASS_LOOP (islower);
				3474	else if (strcmp (name, "space") == 0)
				3475	BUILD_CHARCLASS_LOOP (isspace);
				3476	else if (strcmp (name, "alpha") == 0)
				3477	BUILD_CHARCLASS_LOOP (isalpha);
				3478	else if (strcmp (name, "digit") == 0)
				3479	BUILD_CHARCLASS_LOOP (isdigit);
				3480	else if (strcmp (name, "print") == 0)
				3481	BUILD_CHARCLASS_LOOP (isprint);
				3482	else if (strcmp (name, "upper") == 0)
				3483	BUILD_CHARCLASS_LOOP (isupper);
				3484	else if (strcmp (name, "blank") == 0)
				3485	BUILD_CHARCLASS_LOOP (isblank);
				3486	else if (strcmp (name, "graph") == 0)
				3487	BUILD_CHARCLASS_LOOP (isgraph);
				3488	else if (strcmp (name, "punct") == 0)
				3489	BUILD_CHARCLASS_LOOP (ispunct);
				3490	else if (strcmp (name, "xdigit") == 0)
				3491	BUILD_CHARCLASS_LOOP (isxdigit);
				3492	else
				3493	return REG_ECTYPE;
				3494
				3495	return REG_NOERROR;
				3496	}
				3497
				3498	static bin_tree_t *
				3499	build_charclass_op (re_dfa_t *dfa, RE_TRANSLATE_TYPE trans,
				3500	const unsigned char *class_name,
				3501	const unsigned char *extra, int non_match,
				3502	reg_errcode_t *err)
				3503	{
				3504	re_bitset_ptr_t sbcset;
				3505	#ifdef RE_ENABLE_I18N
				3506	re_charset_t *mbcset;
				3507	int alloc = 0;
				3508	#endif
				3509	reg_errcode_t ret;
				3510	re_token_t br_token;
				3511	bin_tree_t *tree;
				3512
				3513	sbcset = calloc (sizeof (bitset_t), 1);
				3514	#ifdef RE_ENABLE_I18N
				3515	mbcset = calloc (sizeof (re_charset_t), 1);
				3516	#endif
				3517
				3518	#ifdef RE_ENABLE_I18N
				3519	if (BE (sbcset == NULL \|\| mbcset == NULL, 0))
				3520	#else
				3521	if (BE (sbcset == NULL, 0))
				3522	#endif
				3523	{
				3524	*err = REG_ESPACE;
				3525	return NULL;
				3526	}
				3527
				3528	if (non_match)
				3529	{
				3530	#ifdef RE_ENABLE_I18N
				3531	/*
				3532	if (syntax & RE_HAT_LISTS_NOT_NEWLINE)
				3533	bitset_set(cset->sbcset, '\0');
				3534	*/
				3535	mbcset->non_match = 1;
				3536	#endif
				3537	}
				3538
				3539	/* We don't care the syntax in this case. */
				3540	ret = build_charclass (trans, sbcset,
				3541	#ifdef RE_ENABLE_I18N
				3542	mbcset, &alloc,
				3543	#endif
				3544	class_name, 0);
				3545
				3546	if (BE (ret != REG_NOERROR, 0))
				3547	{
				3548	re_free (sbcset);
				3549	#ifdef RE_ENABLE_I18N
				3550	free_charset (mbcset);
				3551	#endif
				3552	*err = ret;
				3553	return NULL;
				3554	}
				3555	/* \w match '_' also. */
				3556	for (; *extra; extra++)
				3557	bitset_set (sbcset, *extra);
				3558
				3559	/* If it is non-matching list. */
				3560	if (non_match)
				3561	bitset_not (sbcset);
				3562
				3563	#ifdef RE_ENABLE_I18N
				3564	/* Ensure only single byte characters are set. */
				3565	if (dfa->mb_cur_max > 1)
				3566	bitset_mask (sbcset, dfa->sb_char);
				3567	#endif
				3568
				3569	/* Build a tree for simple bracket. */
				3570	br_token.type = SIMPLE_BRACKET;
				3571	br_token.opr.sbcset = sbcset;
				3572	tree = create_token_tree (dfa, NULL, NULL, &br_token);
				3573	if (BE (tree == NULL, 0))
				3574	goto build_word_op_espace;
				3575
				3576	#ifdef RE_ENABLE_I18N
				3577	if (dfa->mb_cur_max > 1)
				3578	{
				3579	bin_tree_t *mbc_tree;
				3580	/* Build a tree for complex bracket. */
				3581	br_token.type = COMPLEX_BRACKET;
				3582	br_token.opr.mbcset = mbcset;
				3583	dfa->has_mb_node = 1;
				3584	mbc_tree = create_token_tree (dfa, NULL, NULL, &br_token);
				3585	if (BE (mbc_tree == NULL, 0))
				3586	goto build_word_op_espace;
				3587	/* Then join them by ALT node. */
				3588	tree = create_tree (dfa, tree, mbc_tree, OP_ALT);
				3589	if (BE (mbc_tree != NULL, 1))
				3590	return tree;
				3591	}
				3592	else
				3593	{
				3594	free_charset (mbcset);
				3595	return tree;
				3596	}
				3597	#else /* not RE_ENABLE_I18N */
				3598	return tree;
				3599	#endif
				3600
				3601	build_word_op_espace:
				3602	re_free (sbcset);
				3603	#ifdef RE_ENABLE_I18N
				3604	free_charset (mbcset);
				3605	#endif
				3606	*err = REG_ESPACE;
				3607	return NULL;
				3608	}
				3609
				3610	/* This is intended for the expressions like "a{1,3}".
				3611	Fetch a number from `input', and return the number.
				3612	Return -1, if the number field is empty like "{,1}".
				3613	Return -2, If an error is occured. */
				3614
				3615	static int
				3616	fetch_number (re_string_t input, re_token_t token, reg_syntax_t syntax)
				3617	{
				3618	int num = -1;
				3619	unsigned char c;
				3620	while (1)
				3621	{
				3622	fetch_token (token, input, syntax);
				3623	c = token->opr.c;
				3624	if (BE (token->type == END_OF_RE, 0))
				3625	return -2;
				3626	if (token->type == OP_CLOSE_DUP_NUM \|\| c == ',')
				3627	break;
				3628	num = ((token->type != CHARACTER \|\| c < '0' \|\| '9' < c \|\| num == -2)
				3629	? -2 : ((num == -1) ? c - '0' : num * 10 + c - '0'));
				3630	num = (num > RE_DUP_MAX) ? -2 : num;
				3631	}
				3632	return num;
				3633	}
				3634
				3635	#ifdef RE_ENABLE_I18N
				3636	static void
				3637	free_charset (re_charset_t *cset)
				3638	{
				3639	re_free (cset->mbchars);
				3640	# if 0
				3641	re_free (cset->coll_syms);
				3642	re_free (cset->equiv_classes);
				3643	re_free (cset->range_starts);
				3644	re_free (cset->range_ends);
				3645	# endif
				3646	re_free (cset->char_classes);
				3647	re_free (cset);
				3648	}
				3649	#endif /* RE_ENABLE_I18N */
				3650
				3651	/* Functions for binary tree operation. */
				3652
				3653	/* Create a tree node. */
				3654
				3655	static bin_tree_t *
				3656	create_tree (re_dfa_t dfa, bin_tree_t left, bin_tree_t *right,
				3657	re_token_type_t type)
				3658	{
				3659	re_token_t t;
				3660	t.type = type;
				3661	return create_token_tree (dfa, left, right, &t);
				3662	}
				3663
				3664	static bin_tree_t *
				3665	create_token_tree (re_dfa_t dfa, bin_tree_t left, bin_tree_t *right,
				3666	const re_token_t *token)
				3667	{
				3668	bin_tree_t *tree;
				3669	if (BE (dfa->str_tree_storage_idx == BIN_TREE_STORAGE_SIZE, 0))
				3670	{
				3671	bin_tree_storage_t *storage = re_malloc (bin_tree_storage_t, 1);
				3672
				3673	if (storage == NULL)
				3674	return NULL;
				3675	storage->next = dfa->str_tree_storage;
				3676	dfa->str_tree_storage = storage;
				3677	dfa->str_tree_storage_idx = 0;
				3678	}
				3679	tree = &dfa->str_tree_storage->data[dfa->str_tree_storage_idx++];
				3680
				3681	tree->parent = NULL;
				3682	tree->left = left;
				3683	tree->right = right;
				3684	tree->token = *token;
				3685	tree->token.duplicated = 0;
				3686	tree->token.opt_subexp = 0;
				3687	tree->first = NULL;
				3688	tree->next = NULL;
				3689	tree->node_idx = -1;
				3690
				3691	if (left != NULL)
				3692	left->parent = tree;
				3693	if (right != NULL)
				3694	right->parent = tree;
				3695	return tree;
				3696	}
				3697
				3698	/* Mark the tree SRC as an optional subexpression.
				3699	To be called from preorder or postorder. */
				3700
				3701	static reg_errcode_t
				3702	mark_opt_subexp (void extra, bin_tree_t node)
				3703	{
				3704	int idx = (int) (long) extra;
				3705	if (node->token.type == SUBEXP && node->token.opr.idx == idx)
				3706	node->token.opt_subexp = 1;
				3707
				3708	return REG_NOERROR;
				3709	}
				3710
				3711	/* Free the allocated memory inside NODE. */
				3712
				3713	static void
				3714	free_token (re_token_t *node)
				3715	{
				3716	#ifdef RE_ENABLE_I18N
				3717	if (node->type == COMPLEX_BRACKET && node->duplicated == 0)
				3718	free_charset (node->opr.mbcset);
				3719	else
				3720	#endif
				3721	if (node->type == SIMPLE_BRACKET && node->duplicated == 0)
				3722	re_free (node->opr.sbcset);
				3723	}
				3724
				3725	/* Worker function for tree walking. Free the allocated memory inside NODE
				3726	and its children. */
				3727
				3728	static reg_errcode_t
				3729	free_tree (void extra, bin_tree_t node)
				3730	{
				3731	free_token (&node->token);
				3732	return REG_NOERROR;
				3733	}
				3734
				3735
				3736	/* Duplicate the node SRC, and return new node. This is a preorder
				3737	visit similar to the one implemented by the generic visitor, but
				3738	we need more infrastructure to maintain two parallel trees --- so,
				3739	it's easier to duplicate. */
				3740
				3741	static bin_tree_t *
				3742	duplicate_tree (const bin_tree_t root, re_dfa_t dfa)
				3743	{
				3744	const bin_tree_t *node;
				3745	bin_tree_t *dup_root;
				3746	bin_tree_t *p_new = &dup_root, dup_node = root->parent;
				3747
				3748	for (node = root; ; )
				3749	{
				3750	/* Create a new tree and link it back to the current parent. */
				3751	*p_new = create_token_tree (dfa, NULL, NULL, &node->token);
				3752	if (*p_new == NULL)
				3753	return NULL;
				3754	(*p_new)->parent = dup_node;
				3755	(*p_new)->token.duplicated = 1;
				3756	dup_node = *p_new;
				3757
				3758	/* Go to the left node, or up and to the right. */
				3759	if (node->left)
				3760	{
				3761	node = node->left;
				3762	p_new = &dup_node->left;
				3763	}
				3764	else
				3765	{
				3766	const bin_tree_t *prev = NULL;
				3767	while (node->right == prev \|\| node->right == NULL)
				3768	{
				3769	prev = node;
				3770	node = node->parent;
				3771	dup_node = dup_node->parent;
				3772	if (!node)
				3773	return dup_root;
				3774	}
				3775	node = node->right;
				3776	p_new = &dup_node->right;
				3777	}
				3778	}
				3779	}