Blame - ap/build/uClibc/libc/misc/regex/regexec.c - T106_DC

blob: c13c64ef485e97c21ead9cdc134374a2bbca52bc [file] [log] [blame]

lh	9ed821d	2023-04-07 01:36:19 -0700	[diff] [blame]	1	/* Extended regular expression matching and search library.
				2	Copyright (C) 2002, 2003, 2004, 2005 Free Software Foundation, Inc.
				3	This file is part of the GNU C Library.
				4	Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>.
				5
				6	The GNU C Library is free software; you can redistribute it and/or
				7	modify it under the terms of the GNU Lesser General Public
				8	License as published by the Free Software Foundation; either
				9	version 2.1 of the License, or (at your option) any later version.
				10
				11	The GNU C Library is distributed in the hope that it will be useful,
				12	but WITHOUT ANY WARRANTY; without even the implied warranty of
				13	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
				14	Lesser General Public License for more details.
				15
				16	You should have received a copy of the GNU Lesser General Public
				17	License along with the GNU C Library; if not, write to the Free
				18	Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
				19	02111-1307 USA. */
				20
				21	static reg_errcode_t match_ctx_init (re_match_context_t *cache, int eflags,
				22	int n) internal_function;
				23	static void match_ctx_clean (re_match_context_t *mctx) internal_function;
				24	static void match_ctx_free (re_match_context_t *cache) internal_function;
				25	static reg_errcode_t match_ctx_add_entry (re_match_context_t *cache, int node,
				26	int str_idx, int from, int to)
				27	internal_function;
				28	static int search_cur_bkref_entry (const re_match_context_t *mctx, int str_idx)
				29	internal_function;
				30	static reg_errcode_t match_ctx_add_subtop (re_match_context_t *mctx, int node,
				31	int str_idx) internal_function;
				32	static re_sub_match_last_t * match_ctx_add_sublast (re_sub_match_top_t *subtop,
				33	int node, int str_idx)
				34	internal_function;
				35	static void sift_ctx_init (re_sift_context_t sctx, re_dfastate_t *sifted_sts,
				36	re_dfastate_t **limited_sts, int last_node,
				37	int last_str_idx)
				38	internal_function;
				39	static reg_errcode_t re_search_internal (const regex_t *preg,
				40	const char *string, int length,
				41	int start, int range, int stop,
				42	size_t nmatch, regmatch_t pmatch[],
				43	int eflags) internal_function;
				44	static int re_search_2_stub (struct re_pattern_buffer *bufp,
				45	const char *string1, int length1,
				46	const char *string2, int length2,
				47	int start, int range, struct re_registers *regs,
				48	int stop, int ret_len) internal_function;
				49	static int re_search_stub (struct re_pattern_buffer *bufp,
				50	const char *string, int length, int start,
				51	int range, int stop, struct re_registers *regs,
				52	int ret_len) internal_function;
				53	static unsigned re_copy_regs (struct re_registers regs, regmatch_t pmatch,
				54	int nregs, int regs_allocated) internal_function;
				55	static reg_errcode_t prune_impossible_nodes (re_match_context_t *mctx)
				56	internal_function;
				57	static int check_matching (re_match_context_t *mctx, int fl_longest_match,
				58	int *p_match_first) internal_function;
				59	static int check_halt_state_context (const re_match_context_t *mctx,
				60	const re_dfastate_t *state, int idx)
				61	internal_function;
				62	static void update_regs (const re_dfa_t dfa, regmatch_t pmatch,
				63	regmatch_t *prev_idx_match, int cur_node,
				64	int cur_idx, int nmatch) internal_function;
				65	static reg_errcode_t push_fail_stack (struct re_fail_stack_t *fs,
				66	int str_idx, int dest_node, int nregs,
				67	regmatch_t *regs,
				68	re_node_set *eps_via_nodes)
				69	internal_function;
				70	static reg_errcode_t set_regs (const regex_t *preg,
				71	const re_match_context_t *mctx,
				72	size_t nmatch, regmatch_t *pmatch,
				73	int fl_backtrack) internal_function;
				74	static reg_errcode_t free_fail_stack_return (struct re_fail_stack_t *fs)
				75	internal_function;
				76
				77	#ifdef RE_ENABLE_I18N
				78	static int sift_states_iter_mb (const re_match_context_t *mctx,
				79	re_sift_context_t *sctx,
				80	int node_idx, int str_idx, int max_str_idx)
				81	internal_function;
				82	#endif
				83	static reg_errcode_t sift_states_backward (const re_match_context_t *mctx,
				84	re_sift_context_t *sctx)
				85	internal_function;
				86	static reg_errcode_t build_sifted_states (const re_match_context_t *mctx,
				87	re_sift_context_t *sctx, int str_idx,
				88	re_node_set *cur_dest)
				89	internal_function;
				90	static reg_errcode_t update_cur_sifted_state (const re_match_context_t *mctx,
				91	re_sift_context_t *sctx,
				92	int str_idx,
				93	re_node_set *dest_nodes)
				94	internal_function;
				95	static reg_errcode_t add_epsilon_src_nodes (const re_dfa_t *dfa,
				96	re_node_set *dest_nodes,
				97	const re_node_set *candidates)
				98	internal_function;
				99	static int check_dst_limits (const re_match_context_t *mctx,
				100	re_node_set *limits,
				101	int dst_node, int dst_idx, int src_node,
				102	int src_idx) internal_function;
				103	static int check_dst_limits_calc_pos_1 (const re_match_context_t *mctx,
				104	int boundaries, int subexp_idx,
				105	int from_node, int bkref_idx)
				106	internal_function;
				107	static int check_dst_limits_calc_pos (const re_match_context_t *mctx,
				108	int limit, int subexp_idx,
				109	int node, int str_idx,
				110	int bkref_idx) internal_function;
				111	static reg_errcode_t check_subexp_limits (const re_dfa_t *dfa,
				112	re_node_set *dest_nodes,
				113	const re_node_set *candidates,
				114	re_node_set *limits,
				115	struct re_backref_cache_entry *bkref_ents,
				116	int str_idx) internal_function;
				117	static reg_errcode_t sift_states_bkref (const re_match_context_t *mctx,
				118	re_sift_context_t *sctx,
				119	int str_idx, const re_node_set *candidates)
				120	internal_function;
				121	static reg_errcode_t merge_state_array (const re_dfa_t *dfa,
				122	re_dfastate_t **dst,
				123	re_dfastate_t **src, int num)
				124	internal_function;
				125	static re_dfastate_t find_recover_state (reg_errcode_t err,
				126	re_match_context_t *mctx) internal_function;
				127	static re_dfastate_t transit_state (reg_errcode_t err,
				128	re_match_context_t *mctx,
				129	re_dfastate_t *state) internal_function;
				130	static re_dfastate_t merge_state_with_log (reg_errcode_t err,
				131	re_match_context_t *mctx,
				132	re_dfastate_t *next_state)
				133	internal_function;
				134	static reg_errcode_t check_subexp_matching_top (re_match_context_t *mctx,
				135	re_node_set *cur_nodes,
				136	int str_idx) internal_function;
				137	#if 0
				138	static re_dfastate_t transit_state_sb (reg_errcode_t err,
				139	re_match_context_t *mctx,
				140	re_dfastate_t *pstate)
				141	internal_function;
				142	#endif
				143	#ifdef RE_ENABLE_I18N
				144	static reg_errcode_t transit_state_mb (re_match_context_t *mctx,
				145	re_dfastate_t *pstate)
				146	internal_function;
				147	#endif
				148	static reg_errcode_t transit_state_bkref (re_match_context_t *mctx,
				149	const re_node_set *nodes)
				150	internal_function;
				151	static reg_errcode_t get_subexp (re_match_context_t *mctx,
				152	int bkref_node, int bkref_str_idx)
				153	internal_function;
				154	static reg_errcode_t get_subexp_sub (re_match_context_t *mctx,
				155	const re_sub_match_top_t *sub_top,
				156	re_sub_match_last_t *sub_last,
				157	int bkref_node, int bkref_str)
				158	internal_function;
				159	static int find_subexp_node (const re_dfa_t dfa, const re_node_set nodes,
				160	int subexp_idx, int type) internal_function;
				161	static reg_errcode_t check_arrival (re_match_context_t *mctx,
				162	state_array_t *path, int top_node,
				163	int top_str, int last_node, int last_str,
				164	int type) internal_function;
				165	static reg_errcode_t check_arrival_add_next_nodes (re_match_context_t *mctx,
				166	int str_idx,
				167	re_node_set *cur_nodes,
				168	re_node_set *next_nodes)
				169	internal_function;
				170	static reg_errcode_t check_arrival_expand_ecl (const re_dfa_t *dfa,
				171	re_node_set *cur_nodes,
				172	int ex_subexp, int type)
				173	internal_function;
				174	static reg_errcode_t check_arrival_expand_ecl_sub (const re_dfa_t *dfa,
				175	re_node_set *dst_nodes,
				176	int target, int ex_subexp,
				177	int type) internal_function;
				178	static reg_errcode_t expand_bkref_cache (re_match_context_t *mctx,
				179	re_node_set *cur_nodes, int cur_str,
				180	int subexp_num, int type)
				181	internal_function;
				182	static int build_trtable (const re_dfa_t *dfa,
				183	re_dfastate_t *state) internal_function;
				184	#ifdef RE_ENABLE_I18N
				185	static int check_node_accept_bytes (const re_dfa_t *dfa, int node_idx,
				186	const re_string_t *input, int idx)
				187	internal_function;
				188	#endif
				189	static int group_nodes_into_DFAstates (const re_dfa_t *dfa,
				190	const re_dfastate_t *state,
				191	re_node_set *states_node,
				192	bitset_t *states_ch) internal_function;
				193	static int check_node_accept (const re_match_context_t *mctx,
				194	const re_token_t *node, int idx)
				195	internal_function;
				196	static reg_errcode_t extend_buffers (re_match_context_t *mctx)
				197	internal_function;
				198
				199	/* Entry point for POSIX code. */
				200
				201	/* regexec searches for a given pattern, specified by PREG, in the
				202	string STRING.
				203
				204	If NMATCH is zero or REG_NOSUB was set in the cflags argument to
				205	`regcomp', we ignore PMATCH. Otherwise, we assume PMATCH has at
				206	least NMATCH elements, and we set them to the offsets of the
				207	corresponding matched substrings.
				208
				209	EFLAGS specifies `execution flags' which affect matching: if
				210	REG_NOTBOL is set, then ^ does not match at the beginning of the
				211	string; if REG_NOTEOL is set, then $ does not match at the end.
				212
				213	We return 0 if we find a match and REG_NOMATCH if not. */
				214
				215	int
				216	regexec (const regex_t __restrict preg, const char __restrict string,
				217	size_t nmatch, regmatch_t pmatch[], int eflags)
				218	{
				219	reg_errcode_t err;
				220	int start, length;
				221	#ifdef __UCLIBC_HAS_THREADS__
				222	re_dfa_t dfa = (re_dfa_t ) preg->buffer;
				223	#endif
				224
				225	if (eflags & ~(REG_NOTBOL \| REG_NOTEOL \| REG_STARTEND))
				226	return REG_BADPAT;
				227
				228	if (eflags & REG_STARTEND)
				229	{
				230	start = pmatch[0].rm_so;
				231	length = pmatch[0].rm_eo;
				232	}
				233	else
				234	{
				235	start = 0;
				236	length = strlen (string);
				237	}
				238
				239	__libc_lock_lock (dfa->lock);
				240	if (preg->no_sub)
				241	err = re_search_internal (preg, string, length, start, length - start,
				242	length, 0, NULL, eflags);
				243	else
				244	err = re_search_internal (preg, string, length, start, length - start,
				245	length, nmatch, pmatch, eflags);
				246	__libc_lock_unlock (dfa->lock);
				247	return err != REG_NOERROR;
				248	}
				249	libc_hidden_def(regexec)
				250
				251	/* Entry points for GNU code. */
				252
				253	/* re_match, re_search, re_match_2, re_search_2
				254
				255	The former two functions operate on STRING with length LENGTH,
				256	while the later two operate on concatenation of STRING1 and STRING2
				257	with lengths LENGTH1 and LENGTH2, respectively.
				258
				259	re_match() matches the compiled pattern in BUFP against the string,
				260	starting at index START.
				261
				262	re_search() first tries matching at index START, then it tries to match
				263	starting from index START + 1, and so on. The last start position tried
				264	is START + RANGE. (Thus RANGE = 0 forces re_search to operate the same
				265	way as re_match().)
				266
				267	The parameter STOP of re_{match,search}_2 specifies that no match exceeding
				268	the first STOP characters of the concatenation of the strings should be
				269	concerned.
				270
				271	If REGS is not NULL, and BUFP->no_sub is not set, the offsets of the match
				272	and all groups is stroed in REGS. (For the "_2" variants, the offsets are
				273	computed relative to the concatenation, not relative to the individual
				274	strings.)
				275
				276	On success, re_match* functions return the length of the match, re_search*
				277	return the position of the start of the match. Return value -1 means no
				278	match was found and -2 indicates an internal error. */
				279
				280	int
				281	re_match (struct re_pattern_buffer bufp, const char string, int length,
				282	int start, struct re_registers *regs)
				283	{
				284	return re_search_stub (bufp, string, length, start, 0, length, regs, 1);
				285	}
				286
				287	int
				288	re_search (struct re_pattern_buffer bufp, const char string, int length,
				289	int start, int range, struct re_registers *regs)
				290	{
				291	return re_search_stub (bufp, string, length, start, range, length, regs, 0);
				292	}
				293	libc_hidden_def(re_search)
				294
				295	int
				296	re_match_2 (struct re_pattern_buffer bufp, const char string1, int length1,
				297	const char *string2, int length2, int start,
				298	struct re_registers *regs, int stop)
				299	{
				300	return re_search_2_stub (bufp, string1, length1, string2, length2,
				301	start, 0, regs, stop, 1);
				302	}
				303
				304	int
				305	re_search_2 (struct re_pattern_buffer bufp, const char string1, int lenght1,
				306	const char *string2, int length2, int start, int range,
				307	struct re_registers *regs, int stop)
				308	{
				309	return re_search_2_stub (bufp, string1, lenght1, string2, length2,
				310	start, range, regs, stop, 0);
				311	}
				312	libc_hidden_def(re_search_2)
				313
				314	static int internal_function
				315	re_search_2_stub (struct re_pattern_buffer bufp, const char string1,
				316	int length1, const char *string2, int length2, int start,
				317	int range, struct re_registers *regs, int stop, int ret_len)
				318	{
				319	const char *str;
				320	int rval;
				321	int len = length1 + length2;
				322	int free_str = 0;
				323
				324	if (BE (length1 < 0 \|\| length2 < 0 \|\| stop < 0, 0))
				325	return -2;
				326
				327	/* Concatenate the strings. */
				328	if (length2 > 0)
				329	if (length1 > 0)
				330	{
				331	char *s = re_malloc (char, len);
				332
				333	if (BE (s == NULL, 0))
				334	return -2;
				335	memcpy (s, string1, length1);
				336	memcpy (s + length1, string2, length2);
				337	str = s;
				338	free_str = 1;
				339	}
				340	else
				341	str = string2;
				342	else
				343	str = string1;
				344
				345	rval = re_search_stub (bufp, str, len, start, range, stop, regs,
				346	ret_len);
				347	if (free_str)
				348	re_free ((char *) str);
				349	return rval;
				350	}
				351
				352	/* The parameters have the same meaning as those of re_search.
				353	Additional parameters:
				354	If RET_LEN is nonzero the length of the match is returned (re_match style);
				355	otherwise the position of the match is returned. */
				356
				357	static int internal_function
				358	re_search_stub (struct re_pattern_buffer bufp, const char string, int length,
				359	int start, int range, int stop, struct re_registers *regs,
				360	int ret_len)
				361	{
				362	reg_errcode_t result;
				363	regmatch_t *pmatch;
				364	int nregs, rval;
				365	int eflags = 0;
				366	#ifdef __UCLIBC_HAS_THREADS__
				367	re_dfa_t dfa = (re_dfa_t ) bufp->buffer;
				368	#endif
				369	/* Check for out-of-range. */
				370	if (BE (start < 0 \|\| start > length, 0))
				371	return -1;
				372	if (BE (start + range > length, 0))
				373	range = length - start;
				374	else if (BE (start + range < 0, 0))
				375	range = -start;
				376
				377	__libc_lock_lock (dfa->lock);
				378
				379	eflags \|= (bufp->not_bol) ? REG_NOTBOL : 0;
				380	eflags \|= (bufp->not_eol) ? REG_NOTEOL : 0;
				381
				382	/* Compile fastmap if we haven't yet. */
				383	if (range > 0 && bufp->fastmap != NULL && !bufp->fastmap_accurate)
				384	re_compile_fastmap (bufp);
				385
				386	if (BE (bufp->no_sub, 0))
				387	regs = NULL;
				388
				389	/* We need at least 1 register. */
				390	if (regs == NULL)
				391	nregs = 1;
				392	else if (BE (bufp->regs_allocated == REGS_FIXED &&
				393	regs->num_regs < bufp->re_nsub + 1, 0))
				394	{
				395	nregs = regs->num_regs;
				396	if (BE (nregs < 1, 0))
				397	{
				398	/* Nothing can be copied to regs. */
				399	regs = NULL;
				400	nregs = 1;
				401	}
				402	}
				403	else
				404	nregs = bufp->re_nsub + 1;
				405	pmatch = re_malloc (regmatch_t, nregs);
				406	if (BE (pmatch == NULL, 0))
				407	{
				408	rval = -2;
				409	goto out;
				410	}
				411
				412	result = re_search_internal (bufp, string, length, start, range, stop,
				413	nregs, pmatch, eflags);
				414
				415	rval = 0;
				416
				417	/* I hope we needn't fill ther regs with -1's when no match was found. */
				418	if (result != REG_NOERROR)
				419	rval = -1;
				420	else if (regs != NULL)
				421	{
				422	/* If caller wants register contents data back, copy them. */
				423	bufp->regs_allocated = re_copy_regs (regs, pmatch, nregs,
				424	bufp->regs_allocated);
				425	if (BE (bufp->regs_allocated == REGS_UNALLOCATED, 0))
				426	rval = -2;
				427	}
				428
				429	if (BE (rval == 0, 1))
				430	{
				431	if (ret_len)
				432	{
				433	assert (pmatch[0].rm_so == start);
				434	rval = pmatch[0].rm_eo - start;
				435	}
				436	else
				437	rval = pmatch[0].rm_so;
				438	}
				439	re_free (pmatch);
				440	out:
				441	__libc_lock_unlock (dfa->lock);
				442	return rval;
				443	}
				444
				445	static unsigned internal_function
				446	re_copy_regs (struct re_registers regs, regmatch_t pmatch, int nregs,
				447	int regs_allocated)
				448	{
				449	int rval = REGS_REALLOCATE;
				450	int i;
				451	int need_regs = nregs + 1;
				452	/* We need one extra element beyond `num_regs' for the `-1' marker GNU code
				453	uses. */
				454
				455	/* Have the register data arrays been allocated? */
				456	if (regs_allocated == REGS_UNALLOCATED)
				457	{ /* No. So allocate them with malloc. */
				458	regs->start = re_malloc (regoff_t, need_regs);
				459	regs->end = re_malloc (regoff_t, need_regs);
				460	if (BE (regs->start == NULL, 0) \|\| BE (regs->end == NULL, 0))
				461	return REGS_UNALLOCATED;
				462	regs->num_regs = need_regs;
				463	}
				464	else if (regs_allocated == REGS_REALLOCATE)
				465	{ /* Yes. If we need more elements than were already
				466	allocated, reallocate them. If we need fewer, just
				467	leave it alone. */
				468	if (BE (need_regs > regs->num_regs, 0))
				469	{
				470	regoff_t *new_start = re_realloc (regs->start, regoff_t, need_regs);
				471	regoff_t *new_end = re_realloc (regs->end, regoff_t, need_regs);
				472	if (BE (new_start == NULL, 0) \|\| BE (new_end == NULL, 0))
				473	return REGS_UNALLOCATED;
				474	regs->start = new_start;
				475	regs->end = new_end;
				476	regs->num_regs = need_regs;
				477	}
				478	}
				479	else
				480	{
				481	assert (regs_allocated == REGS_FIXED);
				482	/* This function may not be called with REGS_FIXED and nregs too big. */
				483	assert (regs->num_regs >= nregs);
				484	rval = REGS_FIXED;
				485	}
				486
				487	/* Copy the regs. */
				488	for (i = 0; i < nregs; ++i)
				489	{
				490	regs->start[i] = pmatch[i].rm_so;
				491	regs->end[i] = pmatch[i].rm_eo;
				492	}
				493	for ( ; i < regs->num_regs; ++i)
				494	regs->start[i] = regs->end[i] = -1;
				495
				496	return rval;
				497	}
				498
				499	/* Set REGS to hold NUM_REGS registers, storing them in STARTS and
				500	ENDS. Subsequent matches using PATTERN_BUFFER and REGS will use
				501	this memory for recording register information. STARTS and ENDS
				502	must be allocated using the malloc library routine, and must each
				503	be at least NUM_REGS * sizeof (regoff_t) bytes long.
				504
				505	If NUM_REGS == 0, then subsequent matches should allocate their own
				506	register data.
				507
				508	Unless this function is called, the first search or match using
				509	PATTERN_BUFFER will allocate its own register data, without
				510	freeing the old data. */
				511
				512	void
				513	re_set_registers (struct re_pattern_buffer bufp, struct re_registers regs,
				514	unsigned num_regs, regoff_t starts, regoff_t ends)
				515	{
				516	if (num_regs)
				517	{
				518	bufp->regs_allocated = REGS_REALLOCATE;
				519	regs->num_regs = num_regs;
				520	regs->start = starts;
				521	regs->end = ends;
				522	}
				523	else
				524	{
				525	bufp->regs_allocated = REGS_UNALLOCATED;
				526	regs->num_regs = 0;
				527	regs->start = regs->end = (regoff_t *) 0;
				528	}
				529	}
				530
				531	/* Entry points compatible with 4.2 BSD regex library. We don't define
				532	them unless specifically requested. */
				533
				534	#if defined _REGEX_RE_COMP \|\| defined __UCLIBC__
				535	int
				536	weak_function
				537	re_exec (const char *s)
				538	{
				539	return 0 == regexec (re_comp_buf, s, 0, NULL, 0);
				540	}
				541	#endif
				542
				543	/* Internal entry point. */
				544
				545	/* Searches for a compiled pattern PREG in the string STRING, whose
				546	length is LENGTH. NMATCH, PMATCH, and EFLAGS have the same
				547	mingings with regexec. START, and RANGE have the same meanings
				548	with re_search.
				549	Return REG_NOERROR if we find a match, and REG_NOMATCH if not,
				550	otherwise return the error code.
				551	Note: We assume front end functions already check ranges.
				552	(START + RANGE >= 0 && START + RANGE <= LENGTH) */
				553	static reg_errcode_t internal_function
				554	re_search_internal (const regex_t preg, const char string, int length,
				555	int start, int range, int stop, size_t nmatch,
				556	regmatch_t pmatch[], int eflags)
				557	{
				558	reg_errcode_t err;
				559	const re_dfa_t dfa = (const re_dfa_t ) preg->buffer;
				560	int left_lim, right_lim, incr;
				561	int fl_longest_match, match_first, match_kind, match_last = -1;
				562	int extra_nmatch;
				563	int sb, ch;
				564	re_match_context_t mctx;
				565	char *fastmap = (preg->fastmap != NULL && preg->fastmap_accurate
				566	&& range && !preg->can_be_null) ? preg->fastmap : NULL;
				567	RE_TRANSLATE_TYPE t = preg->translate;
				568
				569	memset (&mctx, '\0', sizeof (re_match_context_t));
				570	mctx.dfa = dfa;
				571
				572	extra_nmatch = (nmatch > preg->re_nsub) ? nmatch - (preg->re_nsub + 1) : 0;
				573	nmatch -= extra_nmatch;
				574
				575	/* Check if the DFA haven't been compiled. */
				576	if (BE (preg->used == 0 \|\| dfa->init_state == NULL
				577	\|\| dfa->init_state_word == NULL \|\| dfa->init_state_nl == NULL
				578	\|\| dfa->init_state_begbuf == NULL, 0))
				579	return REG_NOMATCH;
				580
				581	#ifdef DEBUG
				582	/* We assume front-end functions already check them. */
				583	assert (start + range >= 0 && start + range <= length);
				584	#endif
				585
				586	/* If initial states with non-begbuf contexts have no elements,
				587	the regex must be anchored. If preg->newline_anchor is set,
				588	we'll never use init_state_nl, so do not check it. */
				589	if (dfa->init_state->nodes.nelem == 0
				590	&& dfa->init_state_word->nodes.nelem == 0
				591	&& (dfa->init_state_nl->nodes.nelem == 0
				592	\|\| !preg->newline_anchor))
				593	{
				594	if (start != 0 && start + range != 0)
				595	return REG_NOMATCH;
				596	start = range = 0;
				597	}
				598
				599	/* We must check the longest matching, if nmatch > 0. */
				600	fl_longest_match = (nmatch != 0 \|\| dfa->nbackref);
				601
				602	err = re_string_allocate (&mctx.input, string, length, dfa->nodes_len + 1,
				603	preg->translate, preg->syntax & RE_ICASE, dfa);
				604	if (BE (err != REG_NOERROR, 0))
				605	goto free_return;
				606	mctx.input.stop = stop;
				607	mctx.input.raw_stop = stop;
				608	mctx.input.newline_anchor = preg->newline_anchor;
				609
				610	err = match_ctx_init (&mctx, eflags, dfa->nbackref * 2);
				611	if (BE (err != REG_NOERROR, 0))
				612	goto free_return;
				613
				614	/* We will log all the DFA states through which the dfa pass,
				615	if nmatch > 1, or this dfa has "multibyte node", which is a
				616	back-reference or a node which can accept multibyte character or
				617	multi character collating element. */
				618	if (nmatch > 1 \|\| dfa->has_mb_node)
				619	{
				620	mctx.state_log = re_malloc (re_dfastate_t *, mctx.input.bufs_len + 1);
				621	if (BE (mctx.state_log == NULL, 0))
				622	{
				623	err = REG_ESPACE;
				624	goto free_return;
				625	}
				626	}
				627	else
				628	mctx.state_log = NULL;
				629
				630	match_first = start;
				631	mctx.input.tip_context = (eflags & REG_NOTBOL) ? CONTEXT_BEGBUF
				632	: CONTEXT_NEWLINE \| CONTEXT_BEGBUF;
				633
				634	/* Check incrementally whether of not the input string match. */
				635	incr = (range < 0) ? -1 : 1;
				636	left_lim = (range < 0) ? start + range : start;
				637	right_lim = (range < 0) ? start : start + range;
				638	sb = dfa->mb_cur_max == 1;
				639	match_kind =
				640	(fastmap
				641	? ((sb \|\| !(preg->syntax & RE_ICASE \|\| t) ? 4 : 0)
				642	\| (range >= 0 ? 2 : 0)
				643	\| (t != NULL ? 1 : 0))
				644	: 8);
				645
				646	for (;; match_first += incr)
				647	{
				648	err = REG_NOMATCH;
				649	if (match_first < left_lim \|\| right_lim < match_first)
				650	goto free_return;
				651
				652	/* Advance as rapidly as possible through the string, until we
				653	find a plausible place to start matching. This may be done
				654	with varying efficiency, so there are various possibilities:
				655	only the most common of them are specialized, in order to
				656	save on code size. We use a switch statement for speed. */
				657	switch (match_kind)
				658	{
				659	case 8:
				660	/* No fastmap. */
				661	break;
				662
				663	case 7:
				664	/* Fastmap with single-byte translation, match forward. */
				665	while (BE (match_first < right_lim, 1)
				666	&& !fastmap[t[(unsigned char) string[match_first]]])
				667	++match_first;
				668	goto forward_match_found_start_or_reached_end;
				669
				670	case 6:
				671	/* Fastmap without translation, match forward. */
				672	while (BE (match_first < right_lim, 1)
				673	&& !fastmap[(unsigned char) string[match_first]])
				674	++match_first;
				675
				676	forward_match_found_start_or_reached_end:
				677	if (BE (match_first == right_lim, 0))
				678	{
				679	ch = match_first >= length
				680	? 0 : (unsigned char) string[match_first];
				681	if (!fastmap[t ? t[ch] : ch])
				682	goto free_return;
				683	}
				684	break;
				685
				686	case 4:
				687	case 5:
				688	/* Fastmap without multi-byte translation, match backwards. */
				689	while (match_first >= left_lim)
				690	{
				691	ch = match_first >= length
				692	? 0 : (unsigned char) string[match_first];
				693	if (fastmap[t ? t[ch] : ch])
				694	break;
				695	--match_first;
				696	}
				697	if (match_first < left_lim)
				698	goto free_return;
				699	break;
				700
				701	default:
				702	/* In this case, we can't determine easily the current byte,
				703	since it might be a component byte of a multibyte
				704	character. Then we use the constructed buffer instead. */
				705	for (;;)
				706	{
				707	/* If MATCH_FIRST is out of the valid range, reconstruct the
				708	buffers. */
				709	unsigned int offset = match_first - mctx.input.raw_mbs_idx;
				710	if (BE (offset >= (unsigned int) mctx.input.valid_raw_len, 0))
				711	{
				712	err = re_string_reconstruct (&mctx.input, match_first,
				713	eflags);
				714	if (BE (err != REG_NOERROR, 0))
				715	goto free_return;
				716
				717	offset = match_first - mctx.input.raw_mbs_idx;
				718	}
				719	/* If MATCH_FIRST is out of the buffer, leave it as '\0'.
				720	Note that MATCH_FIRST must not be smaller than 0. */
				721	ch = (match_first >= length
				722	? 0 : re_string_byte_at (&mctx.input, offset));
				723	if (fastmap[ch])
				724	break;
				725	match_first += incr;
				726	if (match_first < left_lim \|\| match_first > right_lim)
				727	{
				728	err = REG_NOMATCH;
				729	goto free_return;
				730	}
				731	}
				732	break;
				733	}
				734
				735	/* Reconstruct the buffers so that the matcher can assume that
				736	the matching starts from the beginning of the buffer. */
				737	err = re_string_reconstruct (&mctx.input, match_first, eflags);
				738	if (BE (err != REG_NOERROR, 0))
				739	goto free_return;
				740
				741	#ifdef RE_ENABLE_I18N
				742	/* Don't consider this char as a possible match start if it part,
				743	yet isn't the head, of a multibyte character. */
				744	if (!sb && !re_string_first_byte (&mctx.input, 0))
				745	continue;
				746	#endif
				747
				748	/* It seems to be appropriate one, then use the matcher. */
				749	/* We assume that the matching starts from 0. */
				750	mctx.state_log_top = mctx.nbkref_ents = mctx.max_mb_elem_len = 0;
				751	match_last = check_matching (&mctx, fl_longest_match,
				752	range >= 0 ? &match_first : NULL);
				753	if (match_last != -1)
				754	{
				755	if (BE (match_last == -2, 0))
				756	{
				757	err = REG_ESPACE;
				758	goto free_return;
				759	}
				760	else
				761	{
				762	mctx.match_last = match_last;
				763	if ((!preg->no_sub && nmatch > 1) \|\| dfa->nbackref)
				764	{
				765	re_dfastate_t *pstate = mctx.state_log[match_last];
				766	mctx.last_node = check_halt_state_context (&mctx, pstate,
				767	match_last);
				768	}
				769	if ((!preg->no_sub && nmatch > 1 && dfa->has_plural_match)
				770	\|\| dfa->nbackref)
				771	{
				772	err = prune_impossible_nodes (&mctx);
				773	if (err == REG_NOERROR)
				774	break;
				775	if (BE (err != REG_NOMATCH, 0))
				776	goto free_return;
				777	match_last = -1;
				778	}
				779	else
				780	break; /* We found a match. */
				781	}
				782	}
				783
				784	match_ctx_clean (&mctx);
				785	}
				786
				787	#ifdef DEBUG
				788	assert (match_last != -1);
				789	assert (err == REG_NOERROR);
				790	#endif
				791
				792	/* Set pmatch[] if we need. */
				793	if (nmatch > 0)
				794	{
				795	int reg_idx;
				796
				797	/* Initialize registers. */
				798	for (reg_idx = 1; reg_idx < nmatch; ++reg_idx)
				799	pmatch[reg_idx].rm_so = pmatch[reg_idx].rm_eo = -1;
				800
				801	/* Set the points where matching start/end. */
				802	pmatch[0].rm_so = 0;
				803	pmatch[0].rm_eo = mctx.match_last;
				804
				805	if (!preg->no_sub && nmatch > 1)
				806	{
				807	err = set_regs (preg, &mctx, nmatch, pmatch,
				808	dfa->has_plural_match && dfa->nbackref > 0);
				809	if (BE (err != REG_NOERROR, 0))
				810	goto free_return;
				811	}
				812
				813	/* At last, add the offset to the each registers, since we slided
				814	the buffers so that we could assume that the matching starts
				815	from 0. */
				816	for (reg_idx = 0; reg_idx < nmatch; ++reg_idx)
				817	if (pmatch[reg_idx].rm_so != -1)
				818	{
				819	#ifdef RE_ENABLE_I18N
				820	if (BE (mctx.input.offsets_needed != 0, 0))
				821	{
				822	pmatch[reg_idx].rm_so =
				823	(pmatch[reg_idx].rm_so == mctx.input.valid_len
				824	? mctx.input.valid_raw_len
				825	: mctx.input.offsets[pmatch[reg_idx].rm_so]);
				826	pmatch[reg_idx].rm_eo =
				827	(pmatch[reg_idx].rm_eo == mctx.input.valid_len
				828	? mctx.input.valid_raw_len
				829	: mctx.input.offsets[pmatch[reg_idx].rm_eo]);
				830	}
				831	#else
				832	assert (mctx.input.offsets_needed == 0);
				833	#endif
				834	pmatch[reg_idx].rm_so += match_first;
				835	pmatch[reg_idx].rm_eo += match_first;
				836	}
				837	for (reg_idx = 0; reg_idx < extra_nmatch; ++reg_idx)
				838	{
				839	pmatch[nmatch + reg_idx].rm_so = -1;
				840	pmatch[nmatch + reg_idx].rm_eo = -1;
				841	}
				842
				843	if (dfa->subexp_map)
				844	for (reg_idx = 0; reg_idx + 1 < nmatch; reg_idx++)
				845	if (dfa->subexp_map[reg_idx] != reg_idx)
				846	{
				847	pmatch[reg_idx + 1].rm_so
				848	= pmatch[dfa->subexp_map[reg_idx] + 1].rm_so;
				849	pmatch[reg_idx + 1].rm_eo
				850	= pmatch[dfa->subexp_map[reg_idx] + 1].rm_eo;
				851	}
				852	}
				853
				854	free_return:
				855	re_free (mctx.state_log);
				856	if (dfa->nbackref)
				857	match_ctx_free (&mctx);
				858	re_string_destruct (&mctx.input);
				859	return err;
				860	}
				861
				862	static reg_errcode_t internal_function
				863	prune_impossible_nodes (re_match_context_t *mctx)
				864	{
				865	const re_dfa_t *const dfa = mctx->dfa;
				866	int halt_node, match_last;
				867	reg_errcode_t ret;
				868	re_dfastate_t **sifted_states;
				869	re_dfastate_t **lim_states = NULL;
				870	re_sift_context_t sctx;
				871	#ifdef DEBUG
				872	assert (mctx->state_log != NULL);
				873	#endif
				874	match_last = mctx->match_last;
				875	halt_node = mctx->last_node;
				876	sifted_states = re_malloc (re_dfastate_t *, match_last + 1);
				877	if (BE (sifted_states == NULL, 0))
				878	{
				879	ret = REG_ESPACE;
				880	goto free_return;
				881	}
				882	if (dfa->nbackref)
				883	{
				884	lim_states = re_malloc (re_dfastate_t *, match_last + 1);
				885	if (BE (lim_states == NULL, 0))
				886	{
				887	ret = REG_ESPACE;
				888	goto free_return;
				889	}
				890	while (1)
				891	{
				892	memset (lim_states, '\0',
				893	sizeof (re_dfastate_t ) (match_last + 1));
				894	sift_ctx_init (&sctx, sifted_states, lim_states, halt_node,
				895	match_last);
				896	ret = sift_states_backward (mctx, &sctx);
				897	re_node_set_free (&sctx.limits);
				898	if (BE (ret != REG_NOERROR, 0))
				899	goto free_return;
				900	if (sifted_states[0] != NULL \|\| lim_states[0] != NULL)
				901	break;
				902	do
				903	{
				904	--match_last;
				905	if (match_last < 0)
				906	{
				907	ret = REG_NOMATCH;
				908	goto free_return;
				909	}
				910	} while (mctx->state_log[match_last] == NULL
				911	\|\| !mctx->state_log[match_last]->halt);
				912	halt_node = check_halt_state_context (mctx,
				913	mctx->state_log[match_last],
				914	match_last);
				915	}
				916	ret = merge_state_array (dfa, sifted_states, lim_states,
				917	match_last + 1);
				918	re_free (lim_states);
				919	lim_states = NULL;
				920	if (BE (ret != REG_NOERROR, 0))
				921	goto free_return;
				922	}
				923	else
				924	{
				925	sift_ctx_init (&sctx, sifted_states, lim_states, halt_node, match_last);
				926	ret = sift_states_backward (mctx, &sctx);
				927	re_node_set_free (&sctx.limits);
				928	if (BE (ret != REG_NOERROR, 0))
				929	goto free_return;
				930	}
				931	re_free (mctx->state_log);
				932	mctx->state_log = sifted_states;
				933	sifted_states = NULL;
				934	mctx->last_node = halt_node;
				935	mctx->match_last = match_last;
				936	ret = REG_NOERROR;
				937	free_return:
				938	re_free (sifted_states);
				939	re_free (lim_states);
				940	return ret;
				941	}
				942
				943	/* Acquire an initial state and return it.
				944	We must select appropriate initial state depending on the context,
				945	since initial states may have constraints like "\<", "^", etc.. */
				946
				947	static __inline__ re_dfastate_t *
				948	__attribute ((always_inline)) internal_function
				949	acquire_init_state_context (reg_errcode_t err, const re_match_context_t mctx,
				950	int idx)
				951	{
				952	const re_dfa_t *const dfa = mctx->dfa;
				953	if (dfa->init_state->has_constraint)
				954	{
				955	unsigned int context;
				956	context = re_string_context_at (&mctx->input, idx - 1, mctx->eflags);
				957	if (IS_WORD_CONTEXT (context))
				958	return dfa->init_state_word;
				959	else if (IS_ORDINARY_CONTEXT (context))
				960	return dfa->init_state;
				961	else if (IS_BEGBUF_CONTEXT (context) && IS_NEWLINE_CONTEXT (context))
				962	return dfa->init_state_begbuf;
				963	else if (IS_NEWLINE_CONTEXT (context))
				964	return dfa->init_state_nl;
				965	else if (IS_BEGBUF_CONTEXT (context))
				966	{
				967	/* It is relatively rare case, then calculate on demand. */
				968	return re_acquire_state_context (err, dfa,
				969	dfa->init_state->entrance_nodes,
				970	context);
				971	}
				972	else
				973	/* Must not happen? */
				974	return dfa->init_state;
				975	}
				976	else
				977	return dfa->init_state;
				978	}
				979
				980	/* Check whether the regular expression match input string INPUT or not,
				981	and return the index where the matching end, return -1 if not match,
				982	or return -2 in case of an error.
				983	FL_LONGEST_MATCH means we want the POSIX longest matching.
				984	If P_MATCH_FIRST is not NULL, and the match fails, it is set to the
				985	next place where we may want to try matching.
				986	Note that the matcher assume that the maching starts from the current
				987	index of the buffer. */
				988
				989	static int
				990	internal_function
				991	check_matching (re_match_context_t *mctx, int fl_longest_match,
				992	int *p_match_first)
				993	{
				994	const re_dfa_t *const dfa = mctx->dfa;
				995	reg_errcode_t err;
				996	int match = 0;
				997	int match_last = -1;
				998	int cur_str_idx = re_string_cur_idx (&mctx->input);
				999	re_dfastate_t *cur_state;
				1000	int at_init_state = p_match_first != NULL;
				1001	int next_start_idx = cur_str_idx;
				1002
				1003	err = REG_NOERROR;
				1004	cur_state = acquire_init_state_context (&err, mctx, cur_str_idx);
				1005	/* An initial state must not be NULL (invalid). */
				1006	if (BE (cur_state == NULL, 0))
				1007	{
				1008	assert (err == REG_ESPACE);
				1009	return -2;
				1010	}
				1011
				1012	if (mctx->state_log != NULL)
				1013	{
				1014	mctx->state_log[cur_str_idx] = cur_state;
				1015
				1016	/* Check OP_OPEN_SUBEXP in the initial state in case that we use them
				1017	later. E.g. Processing back references. */
				1018	if (BE (dfa->nbackref, 0))
				1019	{
				1020	at_init_state = 0;
				1021	err = check_subexp_matching_top (mctx, &cur_state->nodes, 0);
				1022	if (BE (err != REG_NOERROR, 0))
				1023	return err;
				1024
				1025	if (cur_state->has_backref)
				1026	{
				1027	err = transit_state_bkref (mctx, &cur_state->nodes);
				1028	if (BE (err != REG_NOERROR, 0))
				1029	return err;
				1030	}
				1031	}
				1032	}
				1033
				1034	/* If the RE accepts NULL string. */
				1035	if (BE (cur_state->halt, 0))
				1036	{
				1037	if (!cur_state->has_constraint
				1038	\|\| check_halt_state_context (mctx, cur_state, cur_str_idx))
				1039	{
				1040	if (!fl_longest_match)
				1041	return cur_str_idx;
				1042	else
				1043	{
				1044	match_last = cur_str_idx;
				1045	match = 1;
				1046	}
				1047	}
				1048	}
				1049
				1050	while (!re_string_eoi (&mctx->input))
				1051	{
				1052	re_dfastate_t *old_state = cur_state;
				1053	int next_char_idx = re_string_cur_idx (&mctx->input) + 1;
				1054
				1055	if (BE (next_char_idx >= mctx->input.bufs_len, 0)
				1056	\|\| (BE (next_char_idx >= mctx->input.valid_len, 0)
				1057	&& mctx->input.valid_len < mctx->input.len))
				1058	{
				1059	err = extend_buffers (mctx);
				1060	if (BE (err != REG_NOERROR, 0))
				1061	{
				1062	assert (err == REG_ESPACE);
				1063	return -2;
				1064	}
				1065	}
				1066
				1067	cur_state = transit_state (&err, mctx, cur_state);
				1068	if (mctx->state_log != NULL)
				1069	cur_state = merge_state_with_log (&err, mctx, cur_state);
				1070
				1071	if (cur_state == NULL)
				1072	{
				1073	/* Reached the invalid state or an error. Try to recover a valid
				1074	state using the state log, if available and if we have not
				1075	already found a valid (even if not the longest) match. */
				1076	if (BE (err != REG_NOERROR, 0))
				1077	return -2;
				1078
				1079	if (mctx->state_log == NULL
				1080	\|\| (match && !fl_longest_match)
				1081	\|\| (cur_state = find_recover_state (&err, mctx)) == NULL)
				1082	break;
				1083	}
				1084
				1085	if (BE (at_init_state, 0))
				1086	{
				1087	if (old_state == cur_state)
				1088	next_start_idx = next_char_idx;
				1089	else
				1090	at_init_state = 0;
				1091	}
				1092
				1093	if (cur_state->halt)
				1094	{
				1095	/* Reached a halt state.
				1096	Check the halt state can satisfy the current context. */
				1097	if (!cur_state->has_constraint
				1098	\|\| check_halt_state_context (mctx, cur_state,
				1099	re_string_cur_idx (&mctx->input)))
				1100	{
				1101	/* We found an appropriate halt state. */
				1102	match_last = re_string_cur_idx (&mctx->input);
				1103	match = 1;
				1104
				1105	/* We found a match, do not modify match_first below. */
				1106	p_match_first = NULL;
				1107	if (!fl_longest_match)
				1108	break;
				1109	}
				1110	}
				1111	}
				1112
				1113	if (p_match_first)
				1114	*p_match_first += next_start_idx;
				1115
				1116	return match_last;
				1117	}
				1118
				1119	/* Check NODE match the current context. */
				1120
				1121	static int
				1122	internal_function
				1123	check_halt_node_context (const re_dfa_t *dfa, int node, unsigned int context)
				1124	{
				1125	re_token_type_t type = dfa->nodes[node].type;
				1126	unsigned int constraint = dfa->nodes[node].constraint;
				1127	if (type != END_OF_RE)
				1128	return 0;
				1129	if (!constraint)
				1130	return 1;
				1131	if (NOT_SATISFY_NEXT_CONSTRAINT (constraint, context))
				1132	return 0;
				1133	return 1;
				1134	}
				1135
				1136	/* Check the halt state STATE match the current context.
				1137	Return 0 if not match, if the node, STATE has, is a halt node and
				1138	match the context, return the node. */
				1139
				1140	static int
				1141	internal_function
				1142	check_halt_state_context (const re_match_context_t *mctx,
				1143	const re_dfastate_t *state, int idx)
				1144	{
				1145	int i;
				1146	unsigned int context;
				1147	#ifdef DEBUG
				1148	assert (state->halt);
				1149	#endif
				1150	context = re_string_context_at (&mctx->input, idx, mctx->eflags);
				1151	for (i = 0; i < state->nodes.nelem; ++i)
				1152	if (check_halt_node_context (mctx->dfa, state->nodes.elems[i], context))
				1153	return state->nodes.elems[i];
				1154	return 0;
				1155	}
				1156
				1157	/* Compute the next node to which "NFA" transit from NODE("NFA" is a NFA
				1158	corresponding to the DFA).
				1159	Return the destination node, and update EPS_VIA_NODES, return -1 in case
				1160	of errors. */
				1161
				1162	static int
				1163	internal_function
				1164	proceed_next_node (const re_match_context_t mctx, int nregs, regmatch_t regs,
				1165	int pidx, int node, re_node_set eps_via_nodes,
				1166	struct re_fail_stack_t *fs)
				1167	{
				1168	const re_dfa_t *const dfa = mctx->dfa;
				1169	int i, err;
				1170	if (IS_EPSILON_NODE (dfa->nodes[node].type))
				1171	{
				1172	re_node_set cur_nodes = &mctx->state_log[pidx]->nodes;
				1173	re_node_set *edests = &dfa->edests[node];
				1174	int dest_node;
				1175	err = re_node_set_insert (eps_via_nodes, node);
				1176	if (BE (err < 0, 0))
				1177	return -2;
				1178	/* Pick up a valid destination, or return -1 if none is found. */
				1179	for (dest_node = -1, i = 0; i < edests->nelem; ++i)
				1180	{
				1181	int candidate = edests->elems[i];
				1182	if (!re_node_set_contains (cur_nodes, candidate))
				1183	continue;
				1184	if (dest_node == -1)
				1185	dest_node = candidate;
				1186
				1187	else
				1188	{
				1189	/* In order to avoid infinite loop like "(a)", return the second
				1190	epsilon-transition if the first was already considered. */
				1191	if (re_node_set_contains (eps_via_nodes, dest_node))
				1192	return candidate;
				1193
				1194	/* Otherwise, push the second epsilon-transition on the fail stack. */
				1195	else if (fs != NULL
				1196	&& push_fail_stack (fs, *pidx, candidate, nregs, regs,
				1197	eps_via_nodes))
				1198	return -2;
				1199
				1200	/* We know we are going to exit. */
				1201	break;
				1202	}
				1203	}
				1204	return dest_node;
				1205	}
				1206	else
				1207	{
				1208	int naccepted = 0;
				1209	re_token_type_t type = dfa->nodes[node].type;
				1210
				1211	#ifdef RE_ENABLE_I18N
				1212	if (dfa->nodes[node].accept_mb)
				1213	naccepted = check_node_accept_bytes (dfa, node, &mctx->input, *pidx);
				1214	else
				1215	#endif /* RE_ENABLE_I18N */
				1216	if (type == OP_BACK_REF)
				1217	{
				1218	int subexp_idx = dfa->nodes[node].opr.idx + 1;
				1219	naccepted = regs[subexp_idx].rm_eo - regs[subexp_idx].rm_so;
				1220	if (fs != NULL)
				1221	{
				1222	if (regs[subexp_idx].rm_so == -1 \|\| regs[subexp_idx].rm_eo == -1)
				1223	return -1;
				1224	else if (naccepted)
				1225	{
				1226	char buf = (char ) re_string_get_buffer (&mctx->input);
				1227	if (memcmp (buf + regs[subexp_idx].rm_so, buf + *pidx,
				1228	naccepted) != 0)
				1229	return -1;
				1230	}
				1231	}
				1232
				1233	if (naccepted == 0)
				1234	{
				1235	int dest_node;
				1236	err = re_node_set_insert (eps_via_nodes, node);
				1237	if (BE (err < 0, 0))
				1238	return -2;
				1239	dest_node = dfa->edests[node].elems[0];
				1240	if (re_node_set_contains (&mctx->state_log[*pidx]->nodes,
				1241	dest_node))
				1242	return dest_node;
				1243	}
				1244	}
				1245
				1246	if (naccepted != 0
				1247	\|\| check_node_accept (mctx, dfa->nodes + node, *pidx))
				1248	{
				1249	int dest_node = dfa->nexts[node];
				1250	pidx = (naccepted == 0) ? pidx + 1 : *pidx + naccepted;
				1251	if (fs && (pidx > mctx->match_last \|\| mctx->state_log[pidx] == NULL
				1252	\|\| !re_node_set_contains (&mctx->state_log[*pidx]->nodes,
				1253	dest_node)))
				1254	return -1;
				1255	re_node_set_empty (eps_via_nodes);
				1256	return dest_node;
				1257	}
				1258	}
				1259	return -1;
				1260	}
				1261
				1262	static reg_errcode_t
				1263	internal_function
				1264	push_fail_stack (struct re_fail_stack_t *fs, int str_idx, int dest_node,
				1265	int nregs, regmatch_t regs, re_node_set eps_via_nodes)
				1266	{
				1267	reg_errcode_t err;
				1268	int num = fs->num++;
				1269	if (fs->num == fs->alloc)
				1270	{
				1271	struct re_fail_stack_ent_t *new_array;
				1272	new_array = realloc (fs->stack, (sizeof (struct re_fail_stack_ent_t)
				1273	* fs->alloc * 2));
				1274	if (new_array == NULL)
				1275	return REG_ESPACE;
				1276	fs->alloc *= 2;
				1277	fs->stack = new_array;
				1278	}
				1279	fs->stack[num].idx = str_idx;
				1280	fs->stack[num].node = dest_node;
				1281	fs->stack[num].regs = re_malloc (regmatch_t, nregs);
				1282	if (fs->stack[num].regs == NULL)
				1283	return REG_ESPACE;
				1284	memcpy (fs->stack[num].regs, regs, sizeof (regmatch_t) * nregs);
				1285	err = re_node_set_init_copy (&fs->stack[num].eps_via_nodes, eps_via_nodes);
				1286	return err;
				1287	}
				1288
				1289	static int
				1290	internal_function
				1291	pop_fail_stack (struct re_fail_stack_t fs, int pidx, int nregs,
				1292	regmatch_t regs, re_node_set eps_via_nodes)
				1293	{
				1294	int num = --fs->num;
				1295	assert (num >= 0);
				1296	*pidx = fs->stack[num].idx;
				1297	memcpy (regs, fs->stack[num].regs, sizeof (regmatch_t) * nregs);
				1298	re_node_set_free (eps_via_nodes);
				1299	re_free (fs->stack[num].regs);
				1300	*eps_via_nodes = fs->stack[num].eps_via_nodes;
				1301	return fs->stack[num].node;
				1302	}
				1303
				1304	/* Set the positions where the subexpressions are starts/ends to registers
				1305	PMATCH.
				1306	Note: We assume that pmatch[0] is already set, and
				1307	pmatch[i].rm_so == pmatch[i].rm_eo == -1 for 0 < i < nmatch. */
				1308
				1309	static reg_errcode_t
				1310	internal_function
				1311	set_regs (const regex_t preg, const re_match_context_t mctx, size_t nmatch,
				1312	regmatch_t *pmatch, int fl_backtrack)
				1313	{
				1314	const re_dfa_t dfa = (const re_dfa_t ) preg->buffer;
				1315	int idx, cur_node;
				1316	re_node_set eps_via_nodes;
				1317	struct re_fail_stack_t *fs;
				1318	struct re_fail_stack_t fs_body = { 0, 2, NULL };
				1319	regmatch_t *prev_idx_match;
				1320	int prev_idx_match_malloced = 0;
				1321
				1322	#ifdef DEBUG
				1323	assert (nmatch > 1);
				1324	assert (mctx->state_log != NULL);
				1325	#endif
				1326	if (fl_backtrack)
				1327	{
				1328	fs = &fs_body;
				1329	fs->stack = re_malloc (struct re_fail_stack_ent_t, fs->alloc);
				1330	if (fs->stack == NULL)
				1331	return REG_ESPACE;
				1332	}
				1333	else
				1334	fs = NULL;
				1335
				1336	cur_node = dfa->init_node;
				1337	re_node_set_init_empty (&eps_via_nodes);
				1338
				1339	if (__libc_use_alloca (nmatch * sizeof (regmatch_t)))
				1340	prev_idx_match = (regmatch_t ) alloca (nmatch sizeof (regmatch_t));
				1341	else
				1342	{
				1343	prev_idx_match = re_malloc (regmatch_t, nmatch);
				1344	if (prev_idx_match == NULL)
				1345	{
				1346	free_fail_stack_return (fs);
				1347	return REG_ESPACE;
				1348	}
				1349	prev_idx_match_malloced = 1;
				1350	}
				1351	memcpy (prev_idx_match, pmatch, sizeof (regmatch_t) * nmatch);
				1352
				1353	for (idx = pmatch[0].rm_so; idx <= pmatch[0].rm_eo ;)
				1354	{
				1355	update_regs (dfa, pmatch, prev_idx_match, cur_node, idx, nmatch);
				1356
				1357	if (idx == pmatch[0].rm_eo && cur_node == mctx->last_node)
				1358	{
				1359	int reg_idx;
				1360	if (fs)
				1361	{
				1362	for (reg_idx = 0; reg_idx < nmatch; ++reg_idx)
				1363	if (pmatch[reg_idx].rm_so > -1 && pmatch[reg_idx].rm_eo == -1)
				1364	break;
				1365	if (reg_idx == nmatch)
				1366	{
				1367	re_node_set_free (&eps_via_nodes);
				1368	if (prev_idx_match_malloced)
				1369	re_free (prev_idx_match);
				1370	return free_fail_stack_return (fs);
				1371	}
				1372	cur_node = pop_fail_stack (fs, &idx, nmatch, pmatch,
				1373	&eps_via_nodes);
				1374	}
				1375	else
				1376	{
				1377	re_node_set_free (&eps_via_nodes);
				1378	if (prev_idx_match_malloced)
				1379	re_free (prev_idx_match);
				1380	return REG_NOERROR;
				1381	}
				1382	}
				1383
				1384	/* Proceed to next node. */
				1385	cur_node = proceed_next_node (mctx, nmatch, pmatch, &idx, cur_node,
				1386	&eps_via_nodes, fs);
				1387
				1388	if (BE (cur_node < 0, 0))
				1389	{
				1390	if (BE (cur_node == -2, 0))
				1391	{
				1392	re_node_set_free (&eps_via_nodes);
				1393	if (prev_idx_match_malloced)
				1394	re_free (prev_idx_match);
				1395	free_fail_stack_return (fs);
				1396	return REG_ESPACE;
				1397	}
				1398	if (fs)
				1399	cur_node = pop_fail_stack (fs, &idx, nmatch, pmatch,
				1400	&eps_via_nodes);
				1401	else
				1402	{
				1403	re_node_set_free (&eps_via_nodes);
				1404	if (prev_idx_match_malloced)
				1405	re_free (prev_idx_match);
				1406	return REG_NOMATCH;
				1407	}
				1408	}
				1409	}
				1410	re_node_set_free (&eps_via_nodes);
				1411	if (prev_idx_match_malloced)
				1412	re_free (prev_idx_match);
				1413	return free_fail_stack_return (fs);
				1414	}
				1415
				1416	static reg_errcode_t
				1417	internal_function
				1418	free_fail_stack_return (struct re_fail_stack_t *fs)
				1419	{
				1420	if (fs)
				1421	{
				1422	int fs_idx;
				1423	for (fs_idx = 0; fs_idx < fs->num; ++fs_idx)
				1424	{
				1425	re_node_set_free (&fs->stack[fs_idx].eps_via_nodes);
				1426	re_free (fs->stack[fs_idx].regs);
				1427	}
				1428	re_free (fs->stack);
				1429	}
				1430	return REG_NOERROR;
				1431	}
				1432
				1433	static void
				1434	internal_function
				1435	update_regs (const re_dfa_t dfa, regmatch_t pmatch,
				1436	regmatch_t *prev_idx_match, int cur_node, int cur_idx, int nmatch)
				1437	{
				1438	int type = dfa->nodes[cur_node].type;
				1439	if (type == OP_OPEN_SUBEXP)
				1440	{
				1441	int reg_num = dfa->nodes[cur_node].opr.idx + 1;
				1442
				1443	/* We are at the first node of this sub expression. */
				1444	if (reg_num < nmatch)
				1445	{
				1446	pmatch[reg_num].rm_so = cur_idx;
				1447	pmatch[reg_num].rm_eo = -1;
				1448	}
				1449	}
				1450	else if (type == OP_CLOSE_SUBEXP)
				1451	{
				1452	int reg_num = dfa->nodes[cur_node].opr.idx + 1;
				1453	if (reg_num < nmatch)
				1454	{
				1455	/* We are at the last node of this sub expression. */
				1456	if (pmatch[reg_num].rm_so < cur_idx)
				1457	{
				1458	pmatch[reg_num].rm_eo = cur_idx;
				1459	/* This is a non-empty match or we are not inside an optional
				1460	subexpression. Accept this right away. */
				1461	memcpy (prev_idx_match, pmatch, sizeof (regmatch_t) * nmatch);
				1462	}
				1463	else
				1464	{
				1465	if (dfa->nodes[cur_node].opt_subexp
				1466	&& prev_idx_match[reg_num].rm_so != -1)
				1467	/* We transited through an empty match for an optional
				1468	subexpression, like (a?)*, and this is not the subexp's
				1469	first match. Copy back the old content of the registers
				1470	so that matches of an inner subexpression are undone as
				1471	well, like in ((a?)). /
				1472	memcpy (pmatch, prev_idx_match, sizeof (regmatch_t) * nmatch);
				1473	else
				1474	/* We completed a subexpression, but it may be part of
				1475	an optional one, so do not update PREV_IDX_MATCH. */
				1476	pmatch[reg_num].rm_eo = cur_idx;
				1477	}
				1478	}
				1479	}
				1480	}
				1481
				1482	/* This function checks the STATE_LOG from the SCTX->last_str_idx to 0
				1483	and sift the nodes in each states according to the following rules.
				1484	Updated state_log will be wrote to STATE_LOG.
				1485
				1486	Rules: We throw away the Node `a' in the STATE_LOG[STR_IDX] if...
				1487	1. When STR_IDX == MATCH_LAST(the last index in the state_log):
				1488	If `a' isn't the LAST_NODE and `a' can't epsilon transit to
				1489	the LAST_NODE, we throw away the node `a'.
				1490	2. When 0 <= STR_IDX < MATCH_LAST and `a' accepts
				1491	string `s' and transit to `b':
				1492	i. If 'b' isn't in the STATE_LOG[STR_IDX+strlen('s')], we throw
				1493	away the node `a'.
				1494	ii. If 'b' is in the STATE_LOG[STR_IDX+strlen('s')] but 'b' is
				1495	thrown away, we throw away the node `a'.
				1496	3. When 0 <= STR_IDX < MATCH_LAST and 'a' epsilon transit to 'b':
				1497	i. If 'b' isn't in the STATE_LOG[STR_IDX], we throw away the
				1498	node `a'.
				1499	ii. If 'b' is in the STATE_LOG[STR_IDX] but 'b' is thrown away,
				1500	we throw away the node `a'. */
				1501
				1502	#define STATE_NODE_CONTAINS(state,node) \
				1503	((state) != NULL && re_node_set_contains (&(state)->nodes, node))
				1504
				1505	static reg_errcode_t
				1506	internal_function
				1507	sift_states_backward (const re_match_context_t mctx, re_sift_context_t sctx)
				1508	{
				1509	reg_errcode_t err;
				1510	int null_cnt = 0;
				1511	int str_idx = sctx->last_str_idx;
				1512	re_node_set cur_dest;
				1513
				1514	#ifdef DEBUG
				1515	assert (mctx->state_log != NULL && mctx->state_log[str_idx] != NULL);
				1516	#endif
				1517
				1518	/* Build sifted state_log[str_idx]. It has the nodes which can epsilon
				1519	transit to the last_node and the last_node itself. */
				1520	err = re_node_set_init_1 (&cur_dest, sctx->last_node);
				1521	if (BE (err != REG_NOERROR, 0))
				1522	return err;
				1523	err = update_cur_sifted_state (mctx, sctx, str_idx, &cur_dest);
				1524	if (BE (err != REG_NOERROR, 0))
				1525	goto free_return;
				1526
				1527	/* Then check each states in the state_log. */
				1528	while (str_idx > 0)
				1529	{
				1530	/* Update counters. */
				1531	null_cnt = (sctx->sifted_states[str_idx] == NULL) ? null_cnt + 1 : 0;
				1532	if (null_cnt > mctx->max_mb_elem_len)
				1533	{
				1534	memset (sctx->sifted_states, '\0',
				1535	sizeof (re_dfastate_t ) str_idx);
				1536	re_node_set_free (&cur_dest);
				1537	return REG_NOERROR;
				1538	}
				1539	re_node_set_empty (&cur_dest);
				1540	--str_idx;
				1541
				1542	if (mctx->state_log[str_idx])
				1543	{
				1544	err = build_sifted_states (mctx, sctx, str_idx, &cur_dest);
				1545	if (BE (err != REG_NOERROR, 0))
				1546	goto free_return;
				1547	}
				1548
				1549	/* Add all the nodes which satisfy the following conditions:
				1550	- It can epsilon transit to a node in CUR_DEST.
				1551	- It is in CUR_SRC.
				1552	And update state_log. */
				1553	err = update_cur_sifted_state (mctx, sctx, str_idx, &cur_dest);
				1554	if (BE (err != REG_NOERROR, 0))
				1555	goto free_return;
				1556	}
				1557	err = REG_NOERROR;
				1558	free_return:
				1559	re_node_set_free (&cur_dest);
				1560	return err;
				1561	}
				1562
				1563	static reg_errcode_t
				1564	internal_function
				1565	build_sifted_states (const re_match_context_t mctx, re_sift_context_t sctx,
				1566	int str_idx, re_node_set *cur_dest)
				1567	{
				1568	const re_dfa_t *const dfa = mctx->dfa;
				1569	const re_node_set *cur_src = &mctx->state_log[str_idx]->non_eps_nodes;
				1570	int i;
				1571
				1572	/* Then build the next sifted state.
				1573	We build the next sifted state on `cur_dest', and update
				1574	`sifted_states[str_idx]' with `cur_dest'.
				1575	Note:
				1576	`cur_dest' is the sifted state from `state_log[str_idx + 1]'.
				1577	`cur_src' points the node_set of the old `state_log[str_idx]'
				1578	(with the epsilon nodes pre-filtered out). */
				1579	for (i = 0; i < cur_src->nelem; i++)
				1580	{
				1581	int prev_node = cur_src->elems[i];
				1582	int naccepted = 0;
				1583	int ret;
				1584
				1585	#ifdef DEBUG
				1586	re_token_type_t type = dfa->nodes[prev_node].type;
				1587	assert (!IS_EPSILON_NODE (type));
				1588	#endif
				1589	#ifdef RE_ENABLE_I18N
				1590	/* If the node may accept `multi byte'. */
				1591	if (dfa->nodes[prev_node].accept_mb)
				1592	naccepted = sift_states_iter_mb (mctx, sctx, prev_node,
				1593	str_idx, sctx->last_str_idx);
				1594	#endif /* RE_ENABLE_I18N */
				1595
				1596	/* We don't check backreferences here.
				1597	See update_cur_sifted_state(). */
				1598	if (!naccepted
				1599	&& check_node_accept (mctx, dfa->nodes + prev_node, str_idx)
				1600	&& STATE_NODE_CONTAINS (sctx->sifted_states[str_idx + 1],
				1601	dfa->nexts[prev_node]))
				1602	naccepted = 1;
				1603
				1604	if (naccepted == 0)
				1605	continue;
				1606
				1607	if (sctx->limits.nelem)
				1608	{
				1609	int to_idx = str_idx + naccepted;
				1610	if (check_dst_limits (mctx, &sctx->limits,
				1611	dfa->nexts[prev_node], to_idx,
				1612	prev_node, str_idx))
				1613	continue;
				1614	}
				1615	ret = re_node_set_insert (cur_dest, prev_node);
				1616	if (BE (ret == -1, 0))
				1617	return REG_ESPACE;
				1618	}
				1619
				1620	return REG_NOERROR;
				1621	}
				1622
				1623	/* Helper functions. */
				1624
				1625	static reg_errcode_t
				1626	internal_function
				1627	clean_state_log_if_needed (re_match_context_t *mctx, int next_state_log_idx)
				1628	{
				1629	int top = mctx->state_log_top;
				1630
				1631	if (next_state_log_idx >= mctx->input.bufs_len
				1632	\|\| (next_state_log_idx >= mctx->input.valid_len
				1633	&& mctx->input.valid_len < mctx->input.len))
				1634	{
				1635	reg_errcode_t err;
				1636	err = extend_buffers (mctx);
				1637	if (BE (err != REG_NOERROR, 0))
				1638	return err;
				1639	}
				1640
				1641	if (top < next_state_log_idx)
				1642	{
				1643	memset (mctx->state_log + top + 1, '\0',
				1644	sizeof (re_dfastate_t ) (next_state_log_idx - top));
				1645	mctx->state_log_top = next_state_log_idx;
				1646	}
				1647	return REG_NOERROR;
				1648	}
				1649
				1650	static reg_errcode_t
				1651	internal_function
				1652	merge_state_array (const re_dfa_t dfa, re_dfastate_t *dst,
				1653	re_dfastate_t **src, int num)
				1654	{
				1655	int st_idx;
				1656	reg_errcode_t err;
				1657	for (st_idx = 0; st_idx < num; ++st_idx)
				1658	{
				1659	if (dst[st_idx] == NULL)
				1660	dst[st_idx] = src[st_idx];
				1661	else if (src[st_idx] != NULL)
				1662	{
				1663	re_node_set merged_set;
				1664	err = re_node_set_init_union (&merged_set, &dst[st_idx]->nodes,
				1665	&src[st_idx]->nodes);
				1666	if (BE (err != REG_NOERROR, 0))
				1667	return err;
				1668	dst[st_idx] = re_acquire_state (&err, dfa, &merged_set);
				1669	re_node_set_free (&merged_set);
				1670	if (BE (err != REG_NOERROR, 0))
				1671	return err;
				1672	}
				1673	}
				1674	return REG_NOERROR;
				1675	}
				1676
				1677	static reg_errcode_t
				1678	internal_function
				1679	update_cur_sifted_state (const re_match_context_t *mctx,
				1680	re_sift_context_t *sctx, int str_idx,
				1681	re_node_set *dest_nodes)
				1682	{
				1683	const re_dfa_t *const dfa = mctx->dfa;
				1684	reg_errcode_t err = REG_NOERROR;
				1685	const re_node_set *candidates;
				1686	candidates = ((mctx->state_log[str_idx] == NULL) ? NULL
				1687	: &mctx->state_log[str_idx]->nodes);
				1688
				1689	if (dest_nodes->nelem == 0)
				1690	sctx->sifted_states[str_idx] = NULL;
				1691	else
				1692	{
				1693	if (candidates)
				1694	{
				1695	/* At first, add the nodes which can epsilon transit to a node in
				1696	DEST_NODE. */
				1697	err = add_epsilon_src_nodes (dfa, dest_nodes, candidates);
				1698	if (BE (err != REG_NOERROR, 0))
				1699	return err;
				1700
				1701	/* Then, check the limitations in the current sift_context. */
				1702	if (sctx->limits.nelem)
				1703	{
				1704	err = check_subexp_limits (dfa, dest_nodes, candidates, &sctx->limits,
				1705	mctx->bkref_ents, str_idx);
				1706	if (BE (err != REG_NOERROR, 0))
				1707	return err;
				1708	}
				1709	}
				1710
				1711	sctx->sifted_states[str_idx] = re_acquire_state (&err, dfa, dest_nodes);
				1712	if (BE (err != REG_NOERROR, 0))
				1713	return err;
				1714	}
				1715
				1716	if (candidates && mctx->state_log[str_idx]->has_backref)
				1717	{
				1718	err = sift_states_bkref (mctx, sctx, str_idx, candidates);
				1719	if (BE (err != REG_NOERROR, 0))
				1720	return err;
				1721	}
				1722	return REG_NOERROR;
				1723	}
				1724
				1725	static reg_errcode_t
				1726	internal_function
				1727	add_epsilon_src_nodes (const re_dfa_t dfa, re_node_set dest_nodes,
				1728	const re_node_set *candidates)
				1729	{
				1730	reg_errcode_t err = REG_NOERROR;
				1731	int i;
				1732
				1733	re_dfastate_t *state = re_acquire_state (&err, dfa, dest_nodes);
				1734	if (BE (err != REG_NOERROR, 0))
				1735	return err;
				1736
				1737	if (!state->inveclosure.alloc)
				1738	{
				1739	err = re_node_set_alloc (&state->inveclosure, dest_nodes->nelem);
				1740	if (BE (err != REG_NOERROR, 0))
				1741	return REG_ESPACE;
				1742	for (i = 0; i < dest_nodes->nelem; i++)
				1743	re_node_set_merge (&state->inveclosure,
				1744	dfa->inveclosures + dest_nodes->elems[i]);
				1745	}
				1746	return re_node_set_add_intersect (dest_nodes, candidates,
				1747	&state->inveclosure);
				1748	}
				1749
				1750	static reg_errcode_t
				1751	internal_function
				1752	sub_epsilon_src_nodes (const re_dfa_t dfa, int node, re_node_set dest_nodes,
				1753	const re_node_set *candidates)
				1754	{
				1755	int ecl_idx;
				1756	reg_errcode_t err;
				1757	re_node_set *inv_eclosure = dfa->inveclosures + node;
				1758	re_node_set except_nodes;
				1759	re_node_set_init_empty (&except_nodes);
				1760	for (ecl_idx = 0; ecl_idx < inv_eclosure->nelem; ++ecl_idx)
				1761	{
				1762	int cur_node = inv_eclosure->elems[ecl_idx];
				1763	if (cur_node == node)
				1764	continue;
				1765	if (IS_EPSILON_NODE (dfa->nodes[cur_node].type))
				1766	{
				1767	int edst1 = dfa->edests[cur_node].elems[0];
				1768	int edst2 = ((dfa->edests[cur_node].nelem > 1)
				1769	? dfa->edests[cur_node].elems[1] : -1);
				1770	if ((!re_node_set_contains (inv_eclosure, edst1)
				1771	&& re_node_set_contains (dest_nodes, edst1))
				1772	\|\| (edst2 > 0
				1773	&& !re_node_set_contains (inv_eclosure, edst2)
				1774	&& re_node_set_contains (dest_nodes, edst2)))
				1775	{
				1776	err = re_node_set_add_intersect (&except_nodes, candidates,
				1777	dfa->inveclosures + cur_node);
				1778	if (BE (err != REG_NOERROR, 0))
				1779	{
				1780	re_node_set_free (&except_nodes);
				1781	return err;
				1782	}
				1783	}
				1784	}
				1785	}
				1786	for (ecl_idx = 0; ecl_idx < inv_eclosure->nelem; ++ecl_idx)
				1787	{
				1788	int cur_node = inv_eclosure->elems[ecl_idx];
				1789	if (!re_node_set_contains (&except_nodes, cur_node))
				1790	{
				1791	int idx = re_node_set_contains (dest_nodes, cur_node) - 1;
				1792	re_node_set_remove_at (dest_nodes, idx);
				1793	}
				1794	}
				1795	re_node_set_free (&except_nodes);
				1796	return REG_NOERROR;
				1797	}
				1798
				1799	static int
				1800	internal_function
				1801	check_dst_limits (const re_match_context_t mctx, re_node_set limits,
				1802	int dst_node, int dst_idx, int src_node, int src_idx)
				1803	{
				1804	const re_dfa_t *const dfa = mctx->dfa;
				1805	int lim_idx, src_pos, dst_pos;
				1806
				1807	int dst_bkref_idx = search_cur_bkref_entry (mctx, dst_idx);
				1808	int src_bkref_idx = search_cur_bkref_entry (mctx, src_idx);
				1809	for (lim_idx = 0; lim_idx < limits->nelem; ++lim_idx)
				1810	{
				1811	int subexp_idx;
				1812	struct re_backref_cache_entry *ent;
				1813	ent = mctx->bkref_ents + limits->elems[lim_idx];
				1814	subexp_idx = dfa->nodes[ent->node].opr.idx;
				1815
				1816	dst_pos = check_dst_limits_calc_pos (mctx, limits->elems[lim_idx],
				1817	subexp_idx, dst_node, dst_idx,
				1818	dst_bkref_idx);
				1819	src_pos = check_dst_limits_calc_pos (mctx, limits->elems[lim_idx],
				1820	subexp_idx, src_node, src_idx,
				1821	src_bkref_idx);
				1822
				1823	/* In case of:
				1824	<src> <dst> ( <subexp> )
				1825	( <subexp> ) <src> <dst>
				1826	( <subexp1> <src> <subexp2> <dst> <subexp3> ) */
				1827	if (src_pos == dst_pos)
				1828	continue; /* This is unrelated limitation. */
				1829	else
				1830	return 1;
				1831	}
				1832	return 0;
				1833	}
				1834
				1835	static int
				1836	internal_function
				1837	check_dst_limits_calc_pos_1 (const re_match_context_t *mctx, int boundaries,
				1838	int subexp_idx, int from_node, int bkref_idx)
				1839	{
				1840	const re_dfa_t *const dfa = mctx->dfa;
				1841	const re_node_set *eclosures = dfa->eclosures + from_node;
				1842	int node_idx;
				1843
				1844	/* Else, we are on the boundary: examine the nodes on the epsilon
				1845	closure. */
				1846	for (node_idx = 0; node_idx < eclosures->nelem; ++node_idx)
				1847	{
				1848	int node = eclosures->elems[node_idx];
				1849	switch (dfa->nodes[node].type)
				1850	{
				1851	case OP_BACK_REF:
				1852	if (bkref_idx != -1)
				1853	{
				1854	struct re_backref_cache_entry *ent = mctx->bkref_ents + bkref_idx;
				1855	do
				1856	{
				1857	int dst, cpos;
				1858
				1859	if (ent->node != node)
				1860	continue;
				1861
				1862	if (subexp_idx < BITSET_WORD_BITS
				1863	&& !(ent->eps_reachable_subexps_map
				1864	& ((bitset_word_t) 1 << subexp_idx)))
				1865	continue;
				1866
				1867	/* Recurse trying to reach the OP_OPEN_SUBEXP and
				1868	OP_CLOSE_SUBEXP cases below. But, if the
				1869	destination node is the same node as the source
				1870	node, don't recurse because it would cause an
				1871	infinite loop: a regex that exhibits this behavior
				1872	is ()\1\1 */
				1873	dst = dfa->edests[node].elems[0];
				1874	if (dst == from_node)
				1875	{
				1876	if (boundaries & 1)
				1877	return -1;
				1878	else /* if (boundaries & 2) */
				1879	return 0;
				1880	}
				1881
				1882	cpos =
				1883	check_dst_limits_calc_pos_1 (mctx, boundaries, subexp_idx,
				1884	dst, bkref_idx);
				1885	if (cpos == -1 /* && (boundaries & 1) */)
				1886	return -1;
				1887	if (cpos == 0 && (boundaries & 2))
				1888	return 0;
				1889
				1890	if (subexp_idx < BITSET_WORD_BITS)
				1891	ent->eps_reachable_subexps_map
				1892	&= ~((bitset_word_t) 1 << subexp_idx);
				1893	}
				1894	while (ent++->more);
				1895	}
				1896	break;
				1897
				1898	case OP_OPEN_SUBEXP:
				1899	if ((boundaries & 1) && subexp_idx == dfa->nodes[node].opr.idx)
				1900	return -1;
				1901	break;
				1902
				1903	case OP_CLOSE_SUBEXP:
				1904	if ((boundaries & 2) && subexp_idx == dfa->nodes[node].opr.idx)
				1905	return 0;
				1906	break;
				1907
				1908	default:
				1909	break;
				1910	}
				1911	}
				1912
				1913	return (boundaries & 2) ? 1 : 0;
				1914	}
				1915
				1916	static int
				1917	internal_function
				1918	check_dst_limits_calc_pos (const re_match_context_t *mctx, int limit,
				1919	int subexp_idx, int from_node, int str_idx,
				1920	int bkref_idx)
				1921	{
				1922	struct re_backref_cache_entry *lim = mctx->bkref_ents + limit;
				1923	int boundaries;
				1924
				1925	/* If we are outside the range of the subexpression, return -1 or 1. */
				1926	if (str_idx < lim->subexp_from)
				1927	return -1;
				1928
				1929	if (lim->subexp_to < str_idx)
				1930	return 1;
				1931
				1932	/* If we are within the subexpression, return 0. */
				1933	boundaries = (str_idx == lim->subexp_from);
				1934	boundaries \|= (str_idx == lim->subexp_to) << 1;
				1935	if (boundaries == 0)
				1936	return 0;
				1937
				1938	/* Else, examine epsilon closure. */
				1939	return check_dst_limits_calc_pos_1 (mctx, boundaries, subexp_idx,
				1940	from_node, bkref_idx);
				1941	}
				1942
				1943	/* Check the limitations of sub expressions LIMITS, and remove the nodes
				1944	which are against limitations from DEST_NODES. */
				1945
				1946	static reg_errcode_t
				1947	internal_function
				1948	check_subexp_limits (const re_dfa_t dfa, re_node_set dest_nodes,
				1949	const re_node_set candidates, re_node_set limits,
				1950	struct re_backref_cache_entry *bkref_ents, int str_idx)
				1951	{
				1952	reg_errcode_t err;
				1953	int node_idx, lim_idx;
				1954
				1955	for (lim_idx = 0; lim_idx < limits->nelem; ++lim_idx)
				1956	{
				1957	int subexp_idx;
				1958	struct re_backref_cache_entry *ent;
				1959	ent = bkref_ents + limits->elems[lim_idx];
				1960
				1961	if (str_idx <= ent->subexp_from \|\| ent->str_idx < str_idx)
				1962	continue; /* This is unrelated limitation. */
				1963
				1964	subexp_idx = dfa->nodes[ent->node].opr.idx;
				1965	if (ent->subexp_to == str_idx)
				1966	{
				1967	int ops_node = -1;
				1968	int cls_node = -1;
				1969	for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx)
				1970	{
				1971	int node = dest_nodes->elems[node_idx];
				1972	re_token_type_t type = dfa->nodes[node].type;
				1973	if (type == OP_OPEN_SUBEXP
				1974	&& subexp_idx == dfa->nodes[node].opr.idx)
				1975	ops_node = node;
				1976	else if (type == OP_CLOSE_SUBEXP
				1977	&& subexp_idx == dfa->nodes[node].opr.idx)
				1978	cls_node = node;
				1979	}
				1980
				1981	/* Check the limitation of the open subexpression. */
				1982	/* Note that (ent->subexp_to = str_idx != ent->subexp_from). */
				1983	if (ops_node >= 0)
				1984	{
				1985	err = sub_epsilon_src_nodes (dfa, ops_node, dest_nodes,
				1986	candidates);
				1987	if (BE (err != REG_NOERROR, 0))
				1988	return err;
				1989	}
				1990
				1991	/* Check the limitation of the close subexpression. */
				1992	if (cls_node >= 0)
				1993	for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx)
				1994	{
				1995	int node = dest_nodes->elems[node_idx];
				1996	if (!re_node_set_contains (dfa->inveclosures + node,
				1997	cls_node)
				1998	&& !re_node_set_contains (dfa->eclosures + node,
				1999	cls_node))
				2000	{
				2001	/* It is against this limitation.
				2002	Remove it form the current sifted state. */
				2003	err = sub_epsilon_src_nodes (dfa, node, dest_nodes,
				2004	candidates);
				2005	if (BE (err != REG_NOERROR, 0))
				2006	return err;
				2007	--node_idx;
				2008	}
				2009	}
				2010	}
				2011	else /* (ent->subexp_to != str_idx) */
				2012	{
				2013	for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx)
				2014	{
				2015	int node = dest_nodes->elems[node_idx];
				2016	re_token_type_t type = dfa->nodes[node].type;
				2017	if (type == OP_CLOSE_SUBEXP \|\| type == OP_OPEN_SUBEXP)
				2018	{
				2019	if (subexp_idx != dfa->nodes[node].opr.idx)
				2020	continue;
				2021	/* It is against this limitation.
				2022	Remove it form the current sifted state. */
				2023	err = sub_epsilon_src_nodes (dfa, node, dest_nodes,
				2024	candidates);
				2025	if (BE (err != REG_NOERROR, 0))
				2026	return err;
				2027	}
				2028	}
				2029	}
				2030	}
				2031	return REG_NOERROR;
				2032	}
				2033
				2034	static reg_errcode_t
				2035	internal_function
				2036	sift_states_bkref (const re_match_context_t mctx, re_sift_context_t sctx,
				2037	int str_idx, const re_node_set *candidates)
				2038	{
				2039	const re_dfa_t *const dfa = mctx->dfa;
				2040	reg_errcode_t err;
				2041	int node_idx, node;
				2042	re_sift_context_t local_sctx;
				2043	int first_idx = search_cur_bkref_entry (mctx, str_idx);
				2044
				2045	if (first_idx == -1)
				2046	return REG_NOERROR;
				2047
				2048	local_sctx.sifted_states = NULL; /* Mark that it hasn't been initialized. */
				2049
				2050	for (node_idx = 0; node_idx < candidates->nelem; ++node_idx)
				2051	{
				2052	int enabled_idx;
				2053	re_token_type_t type;
				2054	struct re_backref_cache_entry *entry;
				2055	node = candidates->elems[node_idx];
				2056	type = dfa->nodes[node].type;
				2057	/* Avoid infinite loop for the REs like "()\1+". */
				2058	if (node == sctx->last_node && str_idx == sctx->last_str_idx)
				2059	continue;
				2060	if (type != OP_BACK_REF)
				2061	continue;
				2062
				2063	entry = mctx->bkref_ents + first_idx;
				2064	enabled_idx = first_idx;
				2065	do
				2066	{
				2067	int subexp_len;
				2068	int to_idx;
				2069	int dst_node;
				2070	int ret;
				2071	re_dfastate_t *cur_state;
				2072
				2073	if (entry->node != node)
				2074	continue;
				2075	subexp_len = entry->subexp_to - entry->subexp_from;
				2076	to_idx = str_idx + subexp_len;
				2077	dst_node = (subexp_len ? dfa->nexts[node]
				2078	: dfa->edests[node].elems[0]);
				2079
				2080	if (to_idx > sctx->last_str_idx
				2081	\|\| sctx->sifted_states[to_idx] == NULL
				2082	\|\| !STATE_NODE_CONTAINS (sctx->sifted_states[to_idx], dst_node)
				2083	\|\| check_dst_limits (mctx, &sctx->limits, node,
				2084	str_idx, dst_node, to_idx))
				2085	continue;
				2086
				2087	if (local_sctx.sifted_states == NULL)
				2088	{
				2089	local_sctx = *sctx;
				2090	err = re_node_set_init_copy (&local_sctx.limits, &sctx->limits);
				2091	if (BE (err != REG_NOERROR, 0))
				2092	goto free_return;
				2093	}
				2094	local_sctx.last_node = node;
				2095	local_sctx.last_str_idx = str_idx;
				2096	ret = re_node_set_insert (&local_sctx.limits, enabled_idx);
				2097	if (BE (ret < 0, 0))
				2098	{
				2099	err = REG_ESPACE;
				2100	goto free_return;
				2101	}
				2102	cur_state = local_sctx.sifted_states[str_idx];
				2103	err = sift_states_backward (mctx, &local_sctx);
				2104	if (BE (err != REG_NOERROR, 0))
				2105	goto free_return;
				2106	if (sctx->limited_states != NULL)
				2107	{
				2108	err = merge_state_array (dfa, sctx->limited_states,
				2109	local_sctx.sifted_states,
				2110	str_idx + 1);
				2111	if (BE (err != REG_NOERROR, 0))
				2112	goto free_return;
				2113	}
				2114	local_sctx.sifted_states[str_idx] = cur_state;
				2115	re_node_set_remove (&local_sctx.limits, enabled_idx);
				2116
				2117	/* mctx->bkref_ents may have changed, reload the pointer. */
				2118	entry = mctx->bkref_ents + enabled_idx;
				2119	}
				2120	while (enabled_idx++, entry++->more);
				2121	}
				2122	err = REG_NOERROR;
				2123	free_return:
				2124	if (local_sctx.sifted_states != NULL)
				2125	{
				2126	re_node_set_free (&local_sctx.limits);
				2127	}
				2128
				2129	return err;
				2130	}
				2131
				2132
				2133	#ifdef RE_ENABLE_I18N
				2134	static int
				2135	internal_function
				2136	sift_states_iter_mb (const re_match_context_t mctx, re_sift_context_t sctx,
				2137	int node_idx, int str_idx, int max_str_idx)
				2138	{
				2139	const re_dfa_t *const dfa = mctx->dfa;
				2140	int naccepted;
				2141	/* Check the node can accept `multi byte'. */
				2142	naccepted = check_node_accept_bytes (dfa, node_idx, &mctx->input, str_idx);
				2143	if (naccepted > 0 && str_idx + naccepted <= max_str_idx &&
				2144	!STATE_NODE_CONTAINS (sctx->sifted_states[str_idx + naccepted],
				2145	dfa->nexts[node_idx]))
				2146	/* The node can't accept the `multi byte', or the
				2147	destination was already thrown away, then the node
				2148	could't accept the current input `multi byte'. */
				2149	naccepted = 0;
				2150	/* Otherwise, it is sure that the node could accept
				2151	`naccepted' bytes input. */
				2152	return naccepted;
				2153	}
				2154	#endif /* RE_ENABLE_I18N */
				2155
				2156
				2157	/* Functions for state transition. */
				2158
				2159	/* Return the next state to which the current state STATE will transit by
				2160	accepting the current input byte, and update STATE_LOG if necessary.
				2161	If STATE can accept a multibyte char/collating element/back reference
				2162	update the destination of STATE_LOG. */
				2163
				2164	static re_dfastate_t *
				2165	internal_function
				2166	transit_state (reg_errcode_t err, re_match_context_t mctx,
				2167	re_dfastate_t *state)
				2168	{
				2169	re_dfastate_t **trtable;
				2170	unsigned char ch;
				2171
				2172	#ifdef RE_ENABLE_I18N
				2173	/* If the current state can accept multibyte. */
				2174	if (BE (state->accept_mb, 0))
				2175	{
				2176	*err = transit_state_mb (mctx, state);
				2177	if (BE (*err != REG_NOERROR, 0))
				2178	return NULL;
				2179	}
				2180	#endif /* RE_ENABLE_I18N */
				2181
				2182	/* Then decide the next state with the single byte. */
				2183	#if 0
				2184	if (0)
				2185	/* don't use transition table */
				2186	return transit_state_sb (err, mctx, state);
				2187	#endif
				2188
				2189	/* Use transition table */
				2190	ch = re_string_fetch_byte (&mctx->input);
				2191	for (;;)
				2192	{
				2193	trtable = state->trtable;
				2194	if (BE (trtable != NULL, 1))
				2195	return trtable[ch];
				2196
				2197	trtable = state->word_trtable;
				2198	if (BE (trtable != NULL, 1))
				2199	{
				2200	unsigned int context;
				2201	context
				2202	= re_string_context_at (&mctx->input,
				2203	re_string_cur_idx (&mctx->input) - 1,
				2204	mctx->eflags);
				2205	if (IS_WORD_CONTEXT (context))
				2206	return trtable[ch + SBC_MAX];
				2207	else
				2208	return trtable[ch];
				2209	}
				2210
				2211	if (!build_trtable (mctx->dfa, state))
				2212	{
				2213	*err = REG_ESPACE;
				2214	return NULL;
				2215	}
				2216
				2217	/* Retry, we now have a transition table. */
				2218	}
				2219	}
				2220
				2221	/* Update the state_log if we need */
				2222	re_dfastate_t *
				2223	internal_function
				2224	merge_state_with_log (reg_errcode_t err, re_match_context_t mctx,
				2225	re_dfastate_t *next_state)
				2226	{
				2227	const re_dfa_t *const dfa = mctx->dfa;
				2228	int cur_idx = re_string_cur_idx (&mctx->input);
				2229
				2230	if (cur_idx > mctx->state_log_top)
				2231	{
				2232	mctx->state_log[cur_idx] = next_state;
				2233	mctx->state_log_top = cur_idx;
				2234	}
				2235	else if (mctx->state_log[cur_idx] == 0)
				2236	{
				2237	mctx->state_log[cur_idx] = next_state;
				2238	}
				2239	else
				2240	{
				2241	re_dfastate_t *pstate;
				2242	unsigned int context;
				2243	re_node_set next_nodes, log_nodes, table_nodes = NULL;
				2244	/* If (state_log[cur_idx] != 0), it implies that cur_idx is
				2245	the destination of a multibyte char/collating element/
				2246	back reference. Then the next state is the union set of
				2247	these destinations and the results of the transition table. */
				2248	pstate = mctx->state_log[cur_idx];
				2249	log_nodes = pstate->entrance_nodes;
				2250	if (next_state != NULL)
				2251	{
				2252	table_nodes = next_state->entrance_nodes;
				2253	*err = re_node_set_init_union (&next_nodes, table_nodes,
				2254	log_nodes);
				2255	if (BE (*err != REG_NOERROR, 0))
				2256	return NULL;
				2257	}
				2258	else
				2259	next_nodes = *log_nodes;
				2260	/* Note: We already add the nodes of the initial state,
				2261	then we don't need to add them here. */
				2262
				2263	context = re_string_context_at (&mctx->input,
				2264	re_string_cur_idx (&mctx->input) - 1,
				2265	mctx->eflags);
				2266	next_state = mctx->state_log[cur_idx]
				2267	= re_acquire_state_context (err, dfa, &next_nodes, context);
				2268	/* We don't need to check errors here, since the return value of
				2269	this function is next_state and ERR is already set. */
				2270
				2271	if (table_nodes != NULL)
				2272	re_node_set_free (&next_nodes);
				2273	}
				2274
				2275	if (BE (dfa->nbackref, 0) && next_state != NULL)
				2276	{
				2277	/* Check OP_OPEN_SUBEXP in the current state in case that we use them
				2278	later. We must check them here, since the back references in the
				2279	next state might use them. */
				2280	*err = check_subexp_matching_top (mctx, &next_state->nodes,
				2281	cur_idx);
				2282	if (BE (*err != REG_NOERROR, 0))
				2283	return NULL;
				2284
				2285	/* If the next state has back references. */
				2286	if (next_state->has_backref)
				2287	{
				2288	*err = transit_state_bkref (mctx, &next_state->nodes);
				2289	if (BE (*err != REG_NOERROR, 0))
				2290	return NULL;
				2291	next_state = mctx->state_log[cur_idx];
				2292	}
				2293	}
				2294
				2295	return next_state;
				2296	}
				2297
				2298	/* Skip bytes in the input that correspond to part of a
				2299	multi-byte match, then look in the log for a state
				2300	from which to restart matching. */
				2301	re_dfastate_t *
				2302	internal_function
				2303	find_recover_state (reg_errcode_t err, re_match_context_t mctx)
				2304	{
				2305	re_dfastate_t *cur_state;
				2306	do
				2307	{
				2308	int max = mctx->state_log_top;
				2309	int cur_str_idx = re_string_cur_idx (&mctx->input);
				2310
				2311	do
				2312	{
				2313	if (++cur_str_idx > max)
				2314	return NULL;
				2315	re_string_skip_bytes (&mctx->input, 1);
				2316	}
				2317	while (mctx->state_log[cur_str_idx] == NULL);
				2318
				2319	cur_state = merge_state_with_log (err, mctx, NULL);
				2320	}
				2321	while (*err == REG_NOERROR && cur_state == NULL);
				2322	return cur_state;
				2323	}
				2324
				2325	/* Helper functions for transit_state. */
				2326
				2327	/* From the node set CUR_NODES, pick up the nodes whose types are
				2328	OP_OPEN_SUBEXP and which have corresponding back references in the regular
				2329	expression. And register them to use them later for evaluating the
				2330	correspoding back references. */
				2331
				2332	static reg_errcode_t
				2333	internal_function
				2334	check_subexp_matching_top (re_match_context_t mctx, re_node_set cur_nodes,
				2335	int str_idx)
				2336	{
				2337	const re_dfa_t *const dfa = mctx->dfa;
				2338	int node_idx;
				2339	reg_errcode_t err;
				2340
				2341	/* TODO: This isn't efficient.
				2342	Because there might be more than one nodes whose types are
				2343	OP_OPEN_SUBEXP and whose index is SUBEXP_IDX, we must check all
				2344	nodes.
				2345	E.g. RE: (a){2} */
				2346	for (node_idx = 0; node_idx < cur_nodes->nelem; ++node_idx)
				2347	{
				2348	int node = cur_nodes->elems[node_idx];
				2349	if (dfa->nodes[node].type == OP_OPEN_SUBEXP
				2350	&& dfa->nodes[node].opr.idx < BITSET_WORD_BITS
				2351	&& (dfa->used_bkref_map
				2352	& ((bitset_word_t) 1 << dfa->nodes[node].opr.idx)))
				2353	{
				2354	err = match_ctx_add_subtop (mctx, node, str_idx);
				2355	if (BE (err != REG_NOERROR, 0))
				2356	return err;
				2357	}
				2358	}
				2359	return REG_NOERROR;
				2360	}
				2361
				2362	#if 0
				2363	/* Return the next state to which the current state STATE will transit by
				2364	accepting the current input byte. */
				2365
				2366	static re_dfastate_t *
				2367	transit_state_sb (reg_errcode_t err, re_match_context_t mctx,
				2368	re_dfastate_t *state)
				2369	{
				2370	const re_dfa_t *const dfa = mctx->dfa;
				2371	re_node_set next_nodes;
				2372	re_dfastate_t *next_state;
				2373	int node_cnt, cur_str_idx = re_string_cur_idx (&mctx->input);
				2374	unsigned int context;
				2375
				2376	*err = re_node_set_alloc (&next_nodes, state->nodes.nelem + 1);
				2377	if (BE (*err != REG_NOERROR, 0))
				2378	return NULL;
				2379	for (node_cnt = 0; node_cnt < state->nodes.nelem; ++node_cnt)
				2380	{
				2381	int cur_node = state->nodes.elems[node_cnt];
				2382	if (check_node_accept (mctx, dfa->nodes + cur_node, cur_str_idx))
				2383	{
				2384	*err = re_node_set_merge (&next_nodes,
				2385	dfa->eclosures + dfa->nexts[cur_node]);
				2386	if (BE (*err != REG_NOERROR, 0))
				2387	{
				2388	re_node_set_free (&next_nodes);
				2389	return NULL;
				2390	}
				2391	}
				2392	}
				2393	context = re_string_context_at (&mctx->input, cur_str_idx, mctx->eflags);
				2394	next_state = re_acquire_state_context (err, dfa, &next_nodes, context);
				2395	/* We don't need to check errors here, since the return value of
				2396	this function is next_state and ERR is already set. */
				2397
				2398	re_node_set_free (&next_nodes);
				2399	re_string_skip_bytes (&mctx->input, 1);
				2400	return next_state;
				2401	}
				2402	#endif
				2403
				2404	#ifdef RE_ENABLE_I18N
				2405	static reg_errcode_t
				2406	internal_function
				2407	transit_state_mb (re_match_context_t mctx, re_dfastate_t pstate)
				2408	{
				2409	const re_dfa_t *const dfa = mctx->dfa;
				2410	reg_errcode_t err;
				2411	int i;
				2412
				2413	for (i = 0; i < pstate->nodes.nelem; ++i)
				2414	{
				2415	re_node_set dest_nodes, *new_nodes;
				2416	int cur_node_idx = pstate->nodes.elems[i];
				2417	int naccepted, dest_idx;
				2418	unsigned int context;
				2419	re_dfastate_t *dest_state;
				2420
				2421	if (!dfa->nodes[cur_node_idx].accept_mb)
				2422	continue;
				2423
				2424	if (dfa->nodes[cur_node_idx].constraint)
				2425	{
				2426	context = re_string_context_at (&mctx->input,
				2427	re_string_cur_idx (&mctx->input),
				2428	mctx->eflags);
				2429	if (NOT_SATISFY_NEXT_CONSTRAINT (dfa->nodes[cur_node_idx].constraint,
				2430	context))
				2431	continue;
				2432	}
				2433
				2434	/* How many bytes the node can accept? */
				2435	naccepted = check_node_accept_bytes (dfa, cur_node_idx, &mctx->input,
				2436	re_string_cur_idx (&mctx->input));
				2437	if (naccepted == 0)
				2438	continue;
				2439
				2440	/* The node can accepts `naccepted' bytes. */
				2441	dest_idx = re_string_cur_idx (&mctx->input) + naccepted;
				2442	mctx->max_mb_elem_len = ((mctx->max_mb_elem_len < naccepted) ? naccepted
				2443	: mctx->max_mb_elem_len);
				2444	err = clean_state_log_if_needed (mctx, dest_idx);
				2445	if (BE (err != REG_NOERROR, 0))
				2446	return err;
				2447	#ifdef DEBUG
				2448	assert (dfa->nexts[cur_node_idx] != -1);
				2449	#endif
				2450	new_nodes = dfa->eclosures + dfa->nexts[cur_node_idx];
				2451
				2452	dest_state = mctx->state_log[dest_idx];
				2453	if (dest_state == NULL)
				2454	dest_nodes = *new_nodes;
				2455	else
				2456	{
				2457	err = re_node_set_init_union (&dest_nodes,
				2458	dest_state->entrance_nodes, new_nodes);
				2459	if (BE (err != REG_NOERROR, 0))
				2460	return err;
				2461	}
				2462	context = re_string_context_at (&mctx->input, dest_idx - 1,
				2463	mctx->eflags);
				2464	mctx->state_log[dest_idx]
				2465	= re_acquire_state_context (&err, dfa, &dest_nodes, context);
				2466	if (dest_state != NULL)
				2467	re_node_set_free (&dest_nodes);
				2468	if (BE (mctx->state_log[dest_idx] == NULL && err != REG_NOERROR, 0))
				2469	return err;
				2470	}
				2471	return REG_NOERROR;
				2472	}
				2473	#endif /* RE_ENABLE_I18N */
				2474
				2475	static reg_errcode_t
				2476	internal_function
				2477	transit_state_bkref (re_match_context_t mctx, const re_node_set nodes)
				2478	{
				2479	const re_dfa_t *const dfa = mctx->dfa;
				2480	reg_errcode_t err;
				2481	int i;
				2482	int cur_str_idx = re_string_cur_idx (&mctx->input);
				2483
				2484	for (i = 0; i < nodes->nelem; ++i)
				2485	{
				2486	int dest_str_idx, prev_nelem, bkc_idx;
				2487	int node_idx = nodes->elems[i];
				2488	unsigned int context;
				2489	const re_token_t *node = dfa->nodes + node_idx;
				2490	re_node_set *new_dest_nodes;
				2491
				2492	/* Check whether `node' is a backreference or not. */
				2493	if (node->type != OP_BACK_REF)
				2494	continue;
				2495
				2496	if (node->constraint)
				2497	{
				2498	context = re_string_context_at (&mctx->input, cur_str_idx,
				2499	mctx->eflags);
				2500	if (NOT_SATISFY_NEXT_CONSTRAINT (node->constraint, context))
				2501	continue;
				2502	}
				2503
				2504	/* `node' is a backreference.
				2505	Check the substring which the substring matched. */
				2506	bkc_idx = mctx->nbkref_ents;
				2507	err = get_subexp (mctx, node_idx, cur_str_idx);
				2508	if (BE (err != REG_NOERROR, 0))
				2509	goto free_return;
				2510
				2511	/* And add the epsilon closures (which is `new_dest_nodes') of
				2512	the backreference to appropriate state_log. */
				2513	#ifdef DEBUG
				2514	assert (dfa->nexts[node_idx] != -1);
				2515	#endif
				2516	for (; bkc_idx < mctx->nbkref_ents; ++bkc_idx)
				2517	{
				2518	int subexp_len;
				2519	re_dfastate_t *dest_state;
				2520	struct re_backref_cache_entry *bkref_ent;
				2521	bkref_ent = mctx->bkref_ents + bkc_idx;
				2522	if (bkref_ent->node != node_idx \|\| bkref_ent->str_idx != cur_str_idx)
				2523	continue;
				2524	subexp_len = bkref_ent->subexp_to - bkref_ent->subexp_from;
				2525	new_dest_nodes = (subexp_len == 0
				2526	? dfa->eclosures + dfa->edests[node_idx].elems[0]
				2527	: dfa->eclosures + dfa->nexts[node_idx]);
				2528	dest_str_idx = (cur_str_idx + bkref_ent->subexp_to
				2529	- bkref_ent->subexp_from);
				2530	context = re_string_context_at (&mctx->input, dest_str_idx - 1,
				2531	mctx->eflags);
				2532	dest_state = mctx->state_log[dest_str_idx];
				2533	prev_nelem = ((mctx->state_log[cur_str_idx] == NULL) ? 0
				2534	: mctx->state_log[cur_str_idx]->nodes.nelem);
				2535	/* Add `new_dest_node' to state_log. */
				2536	if (dest_state == NULL)
				2537	{
				2538	mctx->state_log[dest_str_idx]
				2539	= re_acquire_state_context (&err, dfa, new_dest_nodes,
				2540	context);
				2541	if (BE (mctx->state_log[dest_str_idx] == NULL
				2542	&& err != REG_NOERROR, 0))
				2543	goto free_return;
				2544	}
				2545	else
				2546	{
				2547	re_node_set dest_nodes;
				2548	err = re_node_set_init_union (&dest_nodes,
				2549	dest_state->entrance_nodes,
				2550	new_dest_nodes);
				2551	if (BE (err != REG_NOERROR, 0))
				2552	{
				2553	re_node_set_free (&dest_nodes);
				2554	goto free_return;
				2555	}
				2556	mctx->state_log[dest_str_idx]
				2557	= re_acquire_state_context (&err, dfa, &dest_nodes, context);
				2558	re_node_set_free (&dest_nodes);
				2559	if (BE (mctx->state_log[dest_str_idx] == NULL
				2560	&& err != REG_NOERROR, 0))
				2561	goto free_return;
				2562	}
				2563	/* We need to check recursively if the backreference can epsilon
				2564	transit. */
				2565	if (subexp_len == 0
				2566	&& mctx->state_log[cur_str_idx]->nodes.nelem > prev_nelem)
				2567	{
				2568	err = check_subexp_matching_top (mctx, new_dest_nodes,
				2569	cur_str_idx);
				2570	if (BE (err != REG_NOERROR, 0))
				2571	goto free_return;
				2572	err = transit_state_bkref (mctx, new_dest_nodes);
				2573	if (BE (err != REG_NOERROR, 0))
				2574	goto free_return;
				2575	}
				2576	}
				2577	}
				2578	err = REG_NOERROR;
				2579	free_return:
				2580	return err;
				2581	}
				2582
				2583	/* Enumerate all the candidates which the backreference BKREF_NODE can match
				2584	at BKREF_STR_IDX, and register them by match_ctx_add_entry().
				2585	Note that we might collect inappropriate candidates here.
				2586	However, the cost of checking them strictly here is too high, then we
				2587	delay these checking for prune_impossible_nodes(). */
				2588
				2589	static reg_errcode_t
				2590	internal_function
				2591	get_subexp (re_match_context_t *mctx, int bkref_node, int bkref_str_idx)
				2592	{
				2593	const re_dfa_t *const dfa = mctx->dfa;
				2594	int subexp_num, sub_top_idx;
				2595	const char buf = (const char ) re_string_get_buffer (&mctx->input);
				2596	/* Return if we have already checked BKREF_NODE at BKREF_STR_IDX. */
				2597	int cache_idx = search_cur_bkref_entry (mctx, bkref_str_idx);
				2598	if (cache_idx != -1)
				2599	{
				2600	const struct re_backref_cache_entry *entry
				2601	= mctx->bkref_ents + cache_idx;
				2602	do
				2603	if (entry->node == bkref_node)
				2604	return REG_NOERROR; /* We already checked it. */
				2605	while (entry++->more);
				2606	}
				2607
				2608	subexp_num = dfa->nodes[bkref_node].opr.idx;
				2609
				2610	/* For each sub expression */
				2611	for (sub_top_idx = 0; sub_top_idx < mctx->nsub_tops; ++sub_top_idx)
				2612	{
				2613	reg_errcode_t err;
				2614	re_sub_match_top_t *sub_top = mctx->sub_tops[sub_top_idx];
				2615	re_sub_match_last_t *sub_last;
				2616	int sub_last_idx, sl_str, bkref_str_off;
				2617
				2618	if (dfa->nodes[sub_top->node].opr.idx != subexp_num)
				2619	continue; /* It isn't related. */
				2620
				2621	sl_str = sub_top->str_idx;
				2622	bkref_str_off = bkref_str_idx;
				2623	/* At first, check the last node of sub expressions we already
				2624	evaluated. */
				2625	for (sub_last_idx = 0; sub_last_idx < sub_top->nlasts; ++sub_last_idx)
				2626	{
				2627	int sl_str_diff;
				2628	sub_last = sub_top->lasts[sub_last_idx];
				2629	sl_str_diff = sub_last->str_idx - sl_str;
				2630	/* The matched string by the sub expression match with the substring
				2631	at the back reference? */
				2632	if (sl_str_diff > 0)
				2633	{
				2634	if (BE (bkref_str_off + sl_str_diff > mctx->input.valid_len, 0))
				2635	{
				2636	/* Not enough chars for a successful match. */
				2637	if (bkref_str_off + sl_str_diff > mctx->input.len)
				2638	break;
				2639
				2640	err = clean_state_log_if_needed (mctx,
				2641	bkref_str_off
				2642	+ sl_str_diff);
				2643	if (BE (err != REG_NOERROR, 0))
				2644	return err;
				2645	buf = (const char *) re_string_get_buffer (&mctx->input);
				2646	}
				2647	if (memcmp (buf + bkref_str_off, buf + sl_str, sl_str_diff) != 0)
				2648	/* We don't need to search this sub expression any more. */
				2649	break;
				2650	}
				2651	bkref_str_off += sl_str_diff;
				2652	sl_str += sl_str_diff;
				2653	err = get_subexp_sub (mctx, sub_top, sub_last, bkref_node,
				2654	bkref_str_idx);
				2655
				2656	/* Reload buf, since the preceding call might have reallocated
				2657	the buffer. */
				2658	buf = (const char *) re_string_get_buffer (&mctx->input);
				2659
				2660	if (err == REG_NOMATCH)
				2661	continue;
				2662	if (BE (err != REG_NOERROR, 0))
				2663	return err;
				2664	}
				2665
				2666	if (sub_last_idx < sub_top->nlasts)
				2667	continue;
				2668	if (sub_last_idx > 0)
				2669	++sl_str;
				2670	/* Then, search for the other last nodes of the sub expression. */
				2671	for (; sl_str <= bkref_str_idx; ++sl_str)
				2672	{
				2673	int cls_node, sl_str_off;
				2674	const re_node_set *nodes;
				2675	sl_str_off = sl_str - sub_top->str_idx;
				2676	/* The matched string by the sub expression match with the substring
				2677	at the back reference? */
				2678	if (sl_str_off > 0)
				2679	{
				2680	if (BE (bkref_str_off >= mctx->input.valid_len, 0))
				2681	{
				2682	/* If we are at the end of the input, we cannot match. */
				2683	if (bkref_str_off >= mctx->input.len)
				2684	break;
				2685
				2686	err = extend_buffers (mctx);
				2687	if (BE (err != REG_NOERROR, 0))
				2688	return err;
				2689
				2690	buf = (const char *) re_string_get_buffer (&mctx->input);
				2691	}
				2692	if (buf [bkref_str_off++] != buf[sl_str - 1])
				2693	break; /* We don't need to search this sub expression
				2694	any more. */
				2695	}
				2696	if (mctx->state_log[sl_str] == NULL)
				2697	continue;
				2698	/* Does this state have a ')' of the sub expression? */
				2699	nodes = &mctx->state_log[sl_str]->nodes;
				2700	cls_node = find_subexp_node (dfa, nodes, subexp_num,
				2701	OP_CLOSE_SUBEXP);
				2702	if (cls_node == -1)
				2703	continue; /* No. */
				2704	if (sub_top->path == NULL)
				2705	{
				2706	sub_top->path = calloc (sizeof (state_array_t),
				2707	sl_str - sub_top->str_idx + 1);
				2708	if (sub_top->path == NULL)
				2709	return REG_ESPACE;
				2710	}
				2711	/* Can the OP_OPEN_SUBEXP node arrive the OP_CLOSE_SUBEXP node
				2712	in the current context? */
				2713	err = check_arrival (mctx, sub_top->path, sub_top->node,
				2714	sub_top->str_idx, cls_node, sl_str,
				2715	OP_CLOSE_SUBEXP);
				2716	if (err == REG_NOMATCH)
				2717	continue;
				2718	if (BE (err != REG_NOERROR, 0))
				2719	return err;
				2720	sub_last = match_ctx_add_sublast (sub_top, cls_node, sl_str);
				2721	if (BE (sub_last == NULL, 0))
				2722	return REG_ESPACE;
				2723	err = get_subexp_sub (mctx, sub_top, sub_last, bkref_node,
				2724	bkref_str_idx);
				2725	if (err == REG_NOMATCH)
				2726	continue;
				2727	}
				2728	}
				2729	return REG_NOERROR;
				2730	}
				2731
				2732	/* Helper functions for get_subexp(). */
				2733
				2734	/* Check SUB_LAST can arrive to the back reference BKREF_NODE at BKREF_STR.
				2735	If it can arrive, register the sub expression expressed with SUB_TOP
				2736	and SUB_LAST. */
				2737
				2738	static reg_errcode_t
				2739	internal_function
				2740	get_subexp_sub (re_match_context_t mctx, const re_sub_match_top_t sub_top,
				2741	re_sub_match_last_t *sub_last, int bkref_node, int bkref_str)
				2742	{
				2743	reg_errcode_t err;
				2744	int to_idx;
				2745	/* Can the subexpression arrive the back reference? */
				2746	err = check_arrival (mctx, &sub_last->path, sub_last->node,
				2747	sub_last->str_idx, bkref_node, bkref_str,
				2748	OP_OPEN_SUBEXP);
				2749	if (err != REG_NOERROR)
				2750	return err;
				2751	err = match_ctx_add_entry (mctx, bkref_node, bkref_str, sub_top->str_idx,
				2752	sub_last->str_idx);
				2753	if (BE (err != REG_NOERROR, 0))
				2754	return err;
				2755	to_idx = bkref_str + sub_last->str_idx - sub_top->str_idx;
				2756	return clean_state_log_if_needed (mctx, to_idx);
				2757	}
				2758
				2759	/* Find the first node which is '(' or ')' and whose index is SUBEXP_IDX.
				2760	Search '(' if FL_OPEN, or search ')' otherwise.
				2761	TODO: This function isn't efficient...
				2762	Because there might be more than one nodes whose types are
				2763	OP_OPEN_SUBEXP and whose index is SUBEXP_IDX, we must check all
				2764	nodes.
				2765	E.g. RE: (a){2} */
				2766
				2767	static int
				2768	internal_function
				2769	find_subexp_node (const re_dfa_t dfa, const re_node_set nodes,
				2770	int subexp_idx, int type)
				2771	{
				2772	int cls_idx;
				2773	for (cls_idx = 0; cls_idx < nodes->nelem; ++cls_idx)
				2774	{
				2775	int cls_node = nodes->elems[cls_idx];
				2776	const re_token_t *node = dfa->nodes + cls_node;
				2777	if (node->type == type
				2778	&& node->opr.idx == subexp_idx)
				2779	return cls_node;
				2780	}
				2781	return -1;
				2782	}
				2783
				2784	/* Check whether the node TOP_NODE at TOP_STR can arrive to the node
				2785	LAST_NODE at LAST_STR. We record the path onto PATH since it will be
				2786	heavily reused.
				2787	Return REG_NOERROR if it can arrive, or REG_NOMATCH otherwise. */
				2788
				2789	static reg_errcode_t
				2790	internal_function
				2791	check_arrival (re_match_context_t mctx, state_array_t path, int top_node,
				2792	int top_str, int last_node, int last_str, int type)
				2793	{
				2794	const re_dfa_t *const dfa = mctx->dfa;
				2795	reg_errcode_t err = REG_NOERROR;
				2796	int subexp_num, backup_cur_idx, str_idx, null_cnt;
				2797	re_dfastate_t *cur_state = NULL;
				2798	re_node_set *cur_nodes, next_nodes;
				2799	re_dfastate_t **backup_state_log;
				2800	unsigned int context;
				2801
				2802	subexp_num = dfa->nodes[top_node].opr.idx;
				2803	/* Extend the buffer if we need. */
				2804	if (BE (path->alloc < last_str + mctx->max_mb_elem_len + 1, 0))
				2805	{
				2806	re_dfastate_t **new_array;
				2807	int old_alloc = path->alloc;
				2808	path->alloc += last_str + mctx->max_mb_elem_len + 1;
				2809	new_array = re_realloc (path->array, re_dfastate_t *, path->alloc);
				2810	if (BE (new_array == NULL, 0))
				2811	{
				2812	path->alloc = old_alloc;
				2813	return REG_ESPACE;
				2814	}
				2815	path->array = new_array;
				2816	memset (new_array + old_alloc, '\0',
				2817	sizeof (re_dfastate_t ) (path->alloc - old_alloc));
				2818	}
				2819
				2820	str_idx = path->next_idx ?: top_str;
				2821
				2822	/* Temporary modify MCTX. */
				2823	backup_state_log = mctx->state_log;
				2824	backup_cur_idx = mctx->input.cur_idx;
				2825	mctx->state_log = path->array;
				2826	mctx->input.cur_idx = str_idx;
				2827
				2828	/* Setup initial node set. */
				2829	context = re_string_context_at (&mctx->input, str_idx - 1, mctx->eflags);
				2830	if (str_idx == top_str)
				2831	{
				2832	err = re_node_set_init_1 (&next_nodes, top_node);
				2833	if (BE (err != REG_NOERROR, 0))
				2834	return err;
				2835	err = check_arrival_expand_ecl (dfa, &next_nodes, subexp_num, type);
				2836	if (BE (err != REG_NOERROR, 0))
				2837	{
				2838	re_node_set_free (&next_nodes);
				2839	return err;
				2840	}
				2841	}
				2842	else
				2843	{
				2844	cur_state = mctx->state_log[str_idx];
				2845	if (cur_state && cur_state->has_backref)
				2846	{
				2847	err = re_node_set_init_copy (&next_nodes, &cur_state->nodes);
				2848	if (BE (err != REG_NOERROR, 0))
				2849	return err;
				2850	}
				2851	else
				2852	re_node_set_init_empty (&next_nodes);
				2853	}
				2854	if (str_idx == top_str \|\| (cur_state && cur_state->has_backref))
				2855	{
				2856	if (next_nodes.nelem)
				2857	{
				2858	err = expand_bkref_cache (mctx, &next_nodes, str_idx,
				2859	subexp_num, type);
				2860	if (BE (err != REG_NOERROR, 0))
				2861	{
				2862	re_node_set_free (&next_nodes);
				2863	return err;
				2864	}
				2865	}
				2866	cur_state = re_acquire_state_context (&err, dfa, &next_nodes, context);
				2867	if (BE (cur_state == NULL && err != REG_NOERROR, 0))
				2868	{
				2869	re_node_set_free (&next_nodes);
				2870	return err;
				2871	}
				2872	mctx->state_log[str_idx] = cur_state;
				2873	}
				2874
				2875	for (null_cnt = 0; str_idx < last_str && null_cnt <= mctx->max_mb_elem_len;)
				2876	{
				2877	re_node_set_empty (&next_nodes);
				2878	if (mctx->state_log[str_idx + 1])
				2879	{
				2880	err = re_node_set_merge (&next_nodes,
				2881	&mctx->state_log[str_idx + 1]->nodes);
				2882	if (BE (err != REG_NOERROR, 0))
				2883	{
				2884	re_node_set_free (&next_nodes);
				2885	return err;
				2886	}
				2887	}
				2888	if (cur_state)
				2889	{
				2890	err = check_arrival_add_next_nodes (mctx, str_idx,
				2891	&cur_state->non_eps_nodes,
				2892	&next_nodes);
				2893	if (BE (err != REG_NOERROR, 0))
				2894	{
				2895	re_node_set_free (&next_nodes);
				2896	return err;
				2897	}
				2898	}
				2899	++str_idx;
				2900	if (next_nodes.nelem)
				2901	{
				2902	err = check_arrival_expand_ecl (dfa, &next_nodes, subexp_num, type);
				2903	if (BE (err != REG_NOERROR, 0))
				2904	{
				2905	re_node_set_free (&next_nodes);
				2906	return err;
				2907	}
				2908	err = expand_bkref_cache (mctx, &next_nodes, str_idx,
				2909	subexp_num, type);
				2910	if (BE (err != REG_NOERROR, 0))
				2911	{
				2912	re_node_set_free (&next_nodes);
				2913	return err;
				2914	}
				2915	}
				2916	context = re_string_context_at (&mctx->input, str_idx - 1, mctx->eflags);
				2917	cur_state = re_acquire_state_context (&err, dfa, &next_nodes, context);
				2918	if (BE (cur_state == NULL && err != REG_NOERROR, 0))
				2919	{
				2920	re_node_set_free (&next_nodes);
				2921	return err;
				2922	}
				2923	mctx->state_log[str_idx] = cur_state;
				2924	null_cnt = cur_state == NULL ? null_cnt + 1 : 0;
				2925	}
				2926	re_node_set_free (&next_nodes);
				2927	cur_nodes = (mctx->state_log[last_str] == NULL ? NULL
				2928	: &mctx->state_log[last_str]->nodes);
				2929	path->next_idx = str_idx;
				2930
				2931	/* Fix MCTX. */
				2932	mctx->state_log = backup_state_log;
				2933	mctx->input.cur_idx = backup_cur_idx;
				2934
				2935	/* Then check the current node set has the node LAST_NODE. */
				2936	if (cur_nodes != NULL && re_node_set_contains (cur_nodes, last_node))
				2937	return REG_NOERROR;
				2938
				2939	return REG_NOMATCH;
				2940	}
				2941
				2942	/* Helper functions for check_arrival. */
				2943
				2944	/* Calculate the destination nodes of CUR_NODES at STR_IDX, and append them
				2945	to NEXT_NODES.
				2946	TODO: This function is similar to the functions transit_state*(),
				2947	however this function has many additional works.
				2948	Can't we unify them? */
				2949
				2950	static reg_errcode_t
				2951	internal_function
				2952	check_arrival_add_next_nodes (re_match_context_t *mctx, int str_idx,
				2953	re_node_set cur_nodes, re_node_set next_nodes)
				2954	{
				2955	const re_dfa_t *const dfa = mctx->dfa;
				2956	int result;
				2957	int cur_idx;
				2958	#ifdef RE_ENABLE_I18N
				2959	reg_errcode_t err = REG_NOERROR;
				2960	#endif
				2961	re_node_set union_set;
				2962	re_node_set_init_empty (&union_set);
				2963	for (cur_idx = 0; cur_idx < cur_nodes->nelem; ++cur_idx)
				2964	{
				2965	int naccepted = 0;
				2966	int cur_node = cur_nodes->elems[cur_idx];
				2967	#ifdef DEBUG
				2968	re_token_type_t type = dfa->nodes[cur_node].type;
				2969	assert (!IS_EPSILON_NODE (type));
				2970	#endif
				2971	#ifdef RE_ENABLE_I18N
				2972	/* If the node may accept `multi byte'. */
				2973	if (dfa->nodes[cur_node].accept_mb)
				2974	{
				2975	naccepted = check_node_accept_bytes (dfa, cur_node, &mctx->input,
				2976	str_idx);
				2977	if (naccepted > 1)
				2978	{
				2979	re_dfastate_t *dest_state;
				2980	int next_node = dfa->nexts[cur_node];
				2981	int next_idx = str_idx + naccepted;
				2982	dest_state = mctx->state_log[next_idx];
				2983	re_node_set_empty (&union_set);
				2984	if (dest_state)
				2985	{
				2986	err = re_node_set_merge (&union_set, &dest_state->nodes);
				2987	if (BE (err != REG_NOERROR, 0))
				2988	{
				2989	re_node_set_free (&union_set);
				2990	return err;
				2991	}
				2992	}
				2993	result = re_node_set_insert (&union_set, next_node);
				2994	if (BE (result < 0, 0))
				2995	{
				2996	re_node_set_free (&union_set);
				2997	return REG_ESPACE;
				2998	}
				2999	mctx->state_log[next_idx] = re_acquire_state (&err, dfa,
				3000	&union_set);
				3001	if (BE (mctx->state_log[next_idx] == NULL
				3002	&& err != REG_NOERROR, 0))
				3003	{
				3004	re_node_set_free (&union_set);
				3005	return err;
				3006	}
				3007	}
				3008	}
				3009	#endif /* RE_ENABLE_I18N */
				3010	if (naccepted
				3011	\|\| check_node_accept (mctx, dfa->nodes + cur_node, str_idx))
				3012	{
				3013	result = re_node_set_insert (next_nodes, dfa->nexts[cur_node]);
				3014	if (BE (result < 0, 0))
				3015	{
				3016	re_node_set_free (&union_set);
				3017	return REG_ESPACE;
				3018	}
				3019	}
				3020	}
				3021	re_node_set_free (&union_set);
				3022	return REG_NOERROR;
				3023	}
				3024
				3025	/* For all the nodes in CUR_NODES, add the epsilon closures of them to
				3026	CUR_NODES, however exclude the nodes which are:
				3027	- inside the sub expression whose number is EX_SUBEXP, if FL_OPEN.
				3028	- out of the sub expression whose number is EX_SUBEXP, if !FL_OPEN.
				3029	*/
				3030
				3031	static reg_errcode_t
				3032	internal_function
				3033	check_arrival_expand_ecl (const re_dfa_t dfa, re_node_set cur_nodes,
				3034	int ex_subexp, int type)
				3035	{
				3036	reg_errcode_t err;
				3037	int idx, outside_node;
				3038	re_node_set new_nodes;
				3039	#ifdef DEBUG
				3040	assert (cur_nodes->nelem);
				3041	#endif
				3042	err = re_node_set_alloc (&new_nodes, cur_nodes->nelem);
				3043	if (BE (err != REG_NOERROR, 0))
				3044	return err;
				3045	/* Create a new node set NEW_NODES with the nodes which are epsilon
				3046	closures of the node in CUR_NODES. */
				3047
				3048	for (idx = 0; idx < cur_nodes->nelem; ++idx)
				3049	{
				3050	int cur_node = cur_nodes->elems[idx];
				3051	const re_node_set *eclosure = dfa->eclosures + cur_node;
				3052	outside_node = find_subexp_node (dfa, eclosure, ex_subexp, type);
				3053	if (outside_node == -1)
				3054	{
				3055	/* There are no problematic nodes, just merge them. */
				3056	err = re_node_set_merge (&new_nodes, eclosure);
				3057	if (BE (err != REG_NOERROR, 0))
				3058	{
				3059	re_node_set_free (&new_nodes);
				3060	return err;
				3061	}
				3062	}
				3063	else
				3064	{
				3065	/* There are problematic nodes, re-calculate incrementally. */
				3066	err = check_arrival_expand_ecl_sub (dfa, &new_nodes, cur_node,
				3067	ex_subexp, type);
				3068	if (BE (err != REG_NOERROR, 0))
				3069	{
				3070	re_node_set_free (&new_nodes);
				3071	return err;
				3072	}
				3073	}
				3074	}
				3075	re_node_set_free (cur_nodes);
				3076	*cur_nodes = new_nodes;
				3077	return REG_NOERROR;
				3078	}
				3079
				3080	/* Helper function for check_arrival_expand_ecl.
				3081	Check incrementally the epsilon closure of TARGET, and if it isn't
				3082	problematic append it to DST_NODES. */
				3083
				3084	static reg_errcode_t
				3085	internal_function
				3086	check_arrival_expand_ecl_sub (const re_dfa_t dfa, re_node_set dst_nodes,
				3087	int target, int ex_subexp, int type)
				3088	{
				3089	int cur_node;
				3090	for (cur_node = target; !re_node_set_contains (dst_nodes, cur_node);)
				3091	{
				3092	int err;
				3093
				3094	if (dfa->nodes[cur_node].type == type
				3095	&& dfa->nodes[cur_node].opr.idx == ex_subexp)
				3096	{
				3097	if (type == OP_CLOSE_SUBEXP)
				3098	{
				3099	err = re_node_set_insert (dst_nodes, cur_node);
				3100	if (BE (err == -1, 0))
				3101	return REG_ESPACE;
				3102	}
				3103	break;
				3104	}
				3105	err = re_node_set_insert (dst_nodes, cur_node);
				3106	if (BE (err == -1, 0))
				3107	return REG_ESPACE;
				3108	if (dfa->edests[cur_node].nelem == 0)
				3109	break;
				3110	if (dfa->edests[cur_node].nelem == 2)
				3111	{
				3112	err = check_arrival_expand_ecl_sub (dfa, dst_nodes,
				3113	dfa->edests[cur_node].elems[1],
				3114	ex_subexp, type);
				3115	if (BE (err != REG_NOERROR, 0))
				3116	return err;
				3117	}
				3118	cur_node = dfa->edests[cur_node].elems[0];
				3119	}
				3120	return REG_NOERROR;
				3121	}
				3122
				3123
				3124	/* For all the back references in the current state, calculate the
				3125	destination of the back references by the appropriate entry
				3126	in MCTX->BKREF_ENTS. */
				3127
				3128	static reg_errcode_t
				3129	internal_function
				3130	expand_bkref_cache (re_match_context_t mctx, re_node_set cur_nodes,
				3131	int cur_str, int subexp_num, int type)
				3132	{
				3133	const re_dfa_t *const dfa = mctx->dfa;
				3134	reg_errcode_t err;
				3135	int cache_idx_start = search_cur_bkref_entry (mctx, cur_str);
				3136	struct re_backref_cache_entry *ent;
				3137
				3138	if (cache_idx_start == -1)
				3139	return REG_NOERROR;
				3140
				3141	restart:
				3142	ent = mctx->bkref_ents + cache_idx_start;
				3143	do
				3144	{
				3145	int to_idx, next_node;
				3146
				3147	/* Is this entry ENT is appropriate? */
				3148	if (!re_node_set_contains (cur_nodes, ent->node))
				3149	continue; /* No. */
				3150
				3151	to_idx = cur_str + ent->subexp_to - ent->subexp_from;
				3152	/* Calculate the destination of the back reference, and append it
				3153	to MCTX->STATE_LOG. */
				3154	if (to_idx == cur_str)
				3155	{
				3156	/* The backreference did epsilon transit, we must re-check all the
				3157	node in the current state. */
				3158	re_node_set new_dests;
				3159	reg_errcode_t err2, err3;
				3160	next_node = dfa->edests[ent->node].elems[0];
				3161	if (re_node_set_contains (cur_nodes, next_node))
				3162	continue;
				3163	err = re_node_set_init_1 (&new_dests, next_node);
				3164	err2 = check_arrival_expand_ecl (dfa, &new_dests, subexp_num, type);
				3165	err3 = re_node_set_merge (cur_nodes, &new_dests);
				3166	re_node_set_free (&new_dests);
				3167	if (BE (err != REG_NOERROR \|\| err2 != REG_NOERROR
				3168	\|\| err3 != REG_NOERROR, 0))
				3169	{
				3170	err = (err != REG_NOERROR ? err
				3171	: (err2 != REG_NOERROR ? err2 : err3));
				3172	return err;
				3173	}
				3174	/* TODO: It is still inefficient... */
				3175	goto restart;
				3176	}
				3177	else
				3178	{
				3179	re_node_set union_set;
				3180	next_node = dfa->nexts[ent->node];
				3181	if (mctx->state_log[to_idx])
				3182	{
				3183	int ret;
				3184	if (re_node_set_contains (&mctx->state_log[to_idx]->nodes,
				3185	next_node))
				3186	continue;
				3187	err = re_node_set_init_copy (&union_set,
				3188	&mctx->state_log[to_idx]->nodes);
				3189	ret = re_node_set_insert (&union_set, next_node);
				3190	if (BE (err != REG_NOERROR \|\| ret < 0, 0))
				3191	{
				3192	re_node_set_free (&union_set);
				3193	err = err != REG_NOERROR ? err : REG_ESPACE;
				3194	return err;
				3195	}
				3196	}
				3197	else
				3198	{
				3199	err = re_node_set_init_1 (&union_set, next_node);
				3200	if (BE (err != REG_NOERROR, 0))
				3201	return err;
				3202	}
				3203	mctx->state_log[to_idx] = re_acquire_state (&err, dfa, &union_set);
				3204	re_node_set_free (&union_set);
				3205	if (BE (mctx->state_log[to_idx] == NULL
				3206	&& err != REG_NOERROR, 0))
				3207	return err;
				3208	}
				3209	}
				3210	while (ent++->more);
				3211	return REG_NOERROR;
				3212	}
				3213
				3214	/* Build transition table for the state.
				3215	Return 1 if succeeded, otherwise return NULL. */
				3216
				3217	static int
				3218	internal_function
				3219	build_trtable (const re_dfa_t dfa, re_dfastate_t state)
				3220	{
				3221	reg_errcode_t err;
				3222	int i, j, ch, need_word_trtable = 0;
				3223	bitset_word_t elem, mask;
				3224	bool dests_node_malloced = false;
				3225	bool dest_states_malloced = false;
				3226	int ndests; /* Number of the destination states from `state'. */
				3227	re_dfastate_t **trtable;
				3228	re_dfastate_t dest_states = NULL, dest_states_word, **dest_states_nl;
				3229	re_node_set follows, *dests_node;
				3230	bitset_t *dests_ch;
				3231	bitset_t acceptable;
				3232
				3233	struct dests_alloc
				3234	{
				3235	re_node_set dests_node[SBC_MAX];
				3236	bitset_t dests_ch[SBC_MAX];
				3237	} *dests_alloc;
				3238
				3239	/* We build DFA states which corresponds to the destination nodes
				3240	from `state'. `dests_node[i]' represents the nodes which i-th
				3241	destination state contains, and `dests_ch[i]' represents the
				3242	characters which i-th destination state accepts. */
				3243	if (__libc_use_alloca (sizeof (struct dests_alloc)))
				3244	dests_alloc = (struct dests_alloc *) alloca (sizeof (struct dests_alloc));
				3245	else
				3246	{
				3247	dests_alloc = re_malloc (struct dests_alloc, 1);
				3248	if (BE (dests_alloc == NULL, 0))
				3249	return 0;
				3250	dests_node_malloced = true;
				3251	}
				3252	dests_node = dests_alloc->dests_node;
				3253	dests_ch = dests_alloc->dests_ch;
				3254
				3255	/* Initialize transiton table. */
				3256	state->word_trtable = state->trtable = NULL;
				3257
				3258	/* At first, group all nodes belonging to `state' into several
				3259	destinations. */
				3260	ndests = group_nodes_into_DFAstates (dfa, state, dests_node, dests_ch);
				3261	if (BE (ndests <= 0, 0))
				3262	{
				3263	if (dests_node_malloced)
				3264	free (dests_alloc);
				3265	/* Return 0 in case of an error, 1 otherwise. */
				3266	if (ndests == 0)
				3267	{
				3268	state->trtable = (re_dfastate_t **)
				3269	calloc (sizeof (re_dfastate_t *), SBC_MAX);
				3270	return 1;
				3271	}
				3272	return 0;
				3273	}
				3274
				3275	err = re_node_set_alloc (&follows, ndests + 1);
				3276	if (BE (err != REG_NOERROR, 0))
				3277	goto out_free;
				3278
				3279	if (__libc_use_alloca ((sizeof (re_node_set) + sizeof (bitset_t)) * SBC_MAX
				3280	+ ndests * 3 * sizeof (re_dfastate_t *)))
				3281	dest_states = (re_dfastate_t **)
				3282	alloca (ndests * 3 * sizeof (re_dfastate_t *));
				3283	else
				3284	{
				3285	dest_states = (re_dfastate_t **)
				3286	malloc (ndests * 3 * sizeof (re_dfastate_t *));
				3287	if (BE (dest_states == NULL, 0))
				3288	{
				3289	out_free:
				3290	if (dest_states_malloced)
				3291	free (dest_states);
				3292	re_node_set_free (&follows);
				3293	for (i = 0; i < ndests; ++i)
				3294	re_node_set_free (dests_node + i);
				3295	if (dests_node_malloced)
				3296	free (dests_alloc);
				3297	return 0;
				3298	}
				3299	dest_states_malloced = true;
				3300	}
				3301	dest_states_word = dest_states + ndests;
				3302	dest_states_nl = dest_states_word + ndests;
				3303	bitset_empty (acceptable);
				3304
				3305	/* Then build the states for all destinations. */
				3306	for (i = 0; i < ndests; ++i)
				3307	{
				3308	int next_node;
				3309	re_node_set_empty (&follows);
				3310	/* Merge the follows of this destination states. */
				3311	for (j = 0; j < dests_node[i].nelem; ++j)
				3312	{
				3313	next_node = dfa->nexts[dests_node[i].elems[j]];
				3314	if (next_node != -1)
				3315	{
				3316	err = re_node_set_merge (&follows, dfa->eclosures + next_node);
				3317	if (BE (err != REG_NOERROR, 0))
				3318	goto out_free;
				3319	}
				3320	}
				3321	dest_states[i] = re_acquire_state_context (&err, dfa, &follows, 0);
				3322	if (BE (dest_states[i] == NULL && err != REG_NOERROR, 0))
				3323	goto out_free;
				3324	/* If the new state has context constraint,
				3325	build appropriate states for these contexts. */
				3326	if (dest_states[i]->has_constraint)
				3327	{
				3328	dest_states_word[i] = re_acquire_state_context (&err, dfa, &follows,
				3329	CONTEXT_WORD);
				3330	if (BE (dest_states_word[i] == NULL && err != REG_NOERROR, 0))
				3331	goto out_free;
				3332
				3333	if (dest_states[i] != dest_states_word[i] && dfa->mb_cur_max > 1)
				3334	need_word_trtable = 1;
				3335
				3336	dest_states_nl[i] = re_acquire_state_context (&err, dfa, &follows,
				3337	CONTEXT_NEWLINE);
				3338	if (BE (dest_states_nl[i] == NULL && err != REG_NOERROR, 0))
				3339	goto out_free;
				3340	}
				3341	else
				3342	{
				3343	dest_states_word[i] = dest_states[i];
				3344	dest_states_nl[i] = dest_states[i];
				3345	}
				3346	bitset_merge (acceptable, dests_ch[i]);
				3347	}
				3348
				3349	if (!BE (need_word_trtable, 0))
				3350	{
				3351	/* We don't care about whether the following character is a word
				3352	character, or we are in a single-byte character set so we can
				3353	discern by looking at the character code: allocate a
				3354	256-entry transition table. */
				3355	trtable = state->trtable = calloc (sizeof (re_dfastate_t *), SBC_MAX);
				3356	if (BE (trtable == NULL, 0))
				3357	goto out_free;
				3358
				3359	/* For all characters ch...: */
				3360	for (i = 0; i < BITSET_WORDS; ++i)
				3361	for (ch = i * BITSET_WORD_BITS, elem = acceptable[i], mask = 1;
				3362	elem;
				3363	mask <<= 1, elem >>= 1, ++ch)
				3364	if (BE (elem & 1, 0))
				3365	{
				3366	/* There must be exactly one destination which accepts
				3367	character ch. See group_nodes_into_DFAstates. */
				3368	for (j = 0; (dests_ch[j][i] & mask) == 0; ++j)
				3369	;
				3370
				3371	/* j-th destination accepts the word character ch. */
				3372	if (dfa->word_char[i] & mask)
				3373	trtable[ch] = dest_states_word[j];
				3374	else
				3375	trtable[ch] = dest_states[j];
				3376	}
				3377	}
				3378	else
				3379	{
				3380	/* We care about whether the following character is a word
				3381	character, and we are in a multi-byte character set: discern
				3382	by looking at the character code: build two 256-entry
				3383	transition tables, one starting at trtable[0] and one
				3384	starting at trtable[SBC_MAX]. */
				3385	trtable = state->word_trtable = calloc (sizeof (re_dfastate_t ), 2 SBC_MAX);
				3386	if (BE (trtable == NULL, 0))
				3387	goto out_free;
				3388
				3389	/* For all characters ch...: */
				3390	for (i = 0; i < BITSET_WORDS; ++i)
				3391	for (ch = i * BITSET_WORD_BITS, elem = acceptable[i], mask = 1;
				3392	elem;
				3393	mask <<= 1, elem >>= 1, ++ch)
				3394	if (BE (elem & 1, 0))
				3395	{
				3396	/* There must be exactly one destination which accepts
				3397	character ch. See group_nodes_into_DFAstates. */
				3398	for (j = 0; (dests_ch[j][i] & mask) == 0; ++j)
				3399	;
				3400
				3401	/* j-th destination accepts the word character ch. */
				3402	trtable[ch] = dest_states[j];
				3403	trtable[ch + SBC_MAX] = dest_states_word[j];
				3404	}
				3405	}
				3406
				3407	/* new line */
				3408	if (bitset_contain (acceptable, NEWLINE_CHAR))
				3409	{
				3410	/* The current state accepts newline character. */
				3411	for (j = 0; j < ndests; ++j)
				3412	if (bitset_contain (dests_ch[j], NEWLINE_CHAR))
				3413	{
				3414	/* k-th destination accepts newline character. */
				3415	trtable[NEWLINE_CHAR] = dest_states_nl[j];
				3416	if (need_word_trtable)
				3417	trtable[NEWLINE_CHAR + SBC_MAX] = dest_states_nl[j];
				3418	/* There must be only one destination which accepts
				3419	newline. See group_nodes_into_DFAstates. */
				3420	break;
				3421	}
				3422	}
				3423
				3424	if (dest_states_malloced)
				3425	free (dest_states);
				3426
				3427	re_node_set_free (&follows);
				3428	for (i = 0; i < ndests; ++i)
				3429	re_node_set_free (dests_node + i);
				3430
				3431	if (dests_node_malloced)
				3432	free (dests_alloc);
				3433
				3434	return 1;
				3435	}
				3436
				3437	/* Group all nodes belonging to STATE into several destinations.
				3438	Then for all destinations, set the nodes belonging to the destination
				3439	to DESTS_NODE[i] and set the characters accepted by the destination
				3440	to DEST_CH[i]. This function return the number of destinations. */
				3441
				3442	static int
				3443	internal_function
				3444	group_nodes_into_DFAstates (const re_dfa_t dfa, const re_dfastate_t state,
				3445	re_node_set dests_node, bitset_t dests_ch)
				3446	{
				3447	reg_errcode_t err;
				3448	int result;
				3449	int i, j, k;
				3450	int ndests; /* Number of the destinations from `state'. */
				3451	bitset_t accepts; /* Characters a node can accept. */
				3452	const re_node_set *cur_nodes = &state->nodes;
				3453	bitset_empty (accepts);
				3454	ndests = 0;
				3455
				3456	/* For all the nodes belonging to `state', */
				3457	for (i = 0; i < cur_nodes->nelem; ++i)
				3458	{
				3459	re_token_t *node = &dfa->nodes[cur_nodes->elems[i]];
				3460	re_token_type_t type = node->type;
				3461	unsigned int constraint = node->constraint;
				3462
				3463	/* Enumerate all single byte character this node can accept. */
				3464	if (type == CHARACTER)
				3465	bitset_set (accepts, node->opr.c);
				3466	else if (type == SIMPLE_BRACKET)
				3467	{
				3468	bitset_merge (accepts, node->opr.sbcset);
				3469	}
				3470	else if (type == OP_PERIOD)
				3471	{
				3472	#ifdef RE_ENABLE_I18N
				3473	if (dfa->mb_cur_max > 1)
				3474	bitset_merge (accepts, dfa->sb_char);
				3475	else
				3476	#endif
				3477	bitset_set_all (accepts);
				3478	if (!(dfa->syntax & RE_DOT_NEWLINE))
				3479	bitset_clear (accepts, '\n');
				3480	if (dfa->syntax & RE_DOT_NOT_NULL)
				3481	bitset_clear (accepts, '\0');
				3482	}
				3483	#ifdef RE_ENABLE_I18N
				3484	else if (type == OP_UTF8_PERIOD)
				3485	{
				3486	memset (accepts, '\xff', sizeof (bitset_t) / 2);
				3487	if (!(dfa->syntax & RE_DOT_NEWLINE))
				3488	bitset_clear (accepts, '\n');
				3489	if (dfa->syntax & RE_DOT_NOT_NULL)
				3490	bitset_clear (accepts, '\0');
				3491	}
				3492	#endif
				3493	else
				3494	continue;
				3495
				3496	/* Check the `accepts' and sift the characters which are not
				3497	match it the context. */
				3498	if (constraint)
				3499	{
				3500	if (constraint & NEXT_NEWLINE_CONSTRAINT)
				3501	{
				3502	bool accepts_newline = bitset_contain (accepts, NEWLINE_CHAR);
				3503	bitset_empty (accepts);
				3504	if (accepts_newline)
				3505	bitset_set (accepts, NEWLINE_CHAR);
				3506	else
				3507	continue;
				3508	}
				3509	if (constraint & NEXT_ENDBUF_CONSTRAINT)
				3510	{
				3511	bitset_empty (accepts);
				3512	continue;
				3513	}
				3514
				3515	if (constraint & NEXT_WORD_CONSTRAINT)
				3516	{
				3517	bitset_word_t any_set = 0;
				3518	if (type == CHARACTER && !node->word_char)
				3519	{
				3520	bitset_empty (accepts);
				3521	continue;
				3522	}
				3523	#ifdef RE_ENABLE_I18N
				3524	if (dfa->mb_cur_max > 1)
				3525	for (j = 0; j < BITSET_WORDS; ++j)
				3526	any_set \|= (accepts[j] &= (dfa->word_char[j] \| ~dfa->sb_char[j]));
				3527	else
				3528	#endif
				3529	for (j = 0; j < BITSET_WORDS; ++j)
				3530	any_set \|= (accepts[j] &= dfa->word_char[j]);
				3531	if (!any_set)
				3532	continue;
				3533	}
				3534	if (constraint & NEXT_NOTWORD_CONSTRAINT)
				3535	{
				3536	bitset_word_t any_set = 0;
				3537	if (type == CHARACTER && node->word_char)
				3538	{
				3539	bitset_empty (accepts);
				3540	continue;
				3541	}
				3542	#ifdef RE_ENABLE_I18N
				3543	if (dfa->mb_cur_max > 1)
				3544	for (j = 0; j < BITSET_WORDS; ++j)
				3545	any_set \|= (accepts[j] &= ~(dfa->word_char[j] & dfa->sb_char[j]));
				3546	else
				3547	#endif
				3548	for (j = 0; j < BITSET_WORDS; ++j)
				3549	any_set \|= (accepts[j] &= ~dfa->word_char[j]);
				3550	if (!any_set)
				3551	continue;
				3552	}
				3553	}
				3554
				3555	/* Then divide `accepts' into DFA states, or create a new
				3556	state. Above, we make sure that accepts is not empty. */
				3557	for (j = 0; j < ndests; ++j)
				3558	{
				3559	bitset_t intersec; /* Intersection sets, see below. */
				3560	bitset_t remains;
				3561	/* Flags, see below. */
				3562	bitset_word_t has_intersec, not_subset, not_consumed;
				3563
				3564	/* Optimization, skip if this state doesn't accept the character. */
				3565	if (type == CHARACTER && !bitset_contain (dests_ch[j], node->opr.c))
				3566	continue;
				3567
				3568	/* Enumerate the intersection set of this state and `accepts'. */
				3569	has_intersec = 0;
				3570	for (k = 0; k < BITSET_WORDS; ++k)
				3571	has_intersec \|= intersec[k] = accepts[k] & dests_ch[j][k];
				3572	/* And skip if the intersection set is empty. */
				3573	if (!has_intersec)
				3574	continue;
				3575
				3576	/* Then check if this state is a subset of `accepts'. */
				3577	not_subset = not_consumed = 0;
				3578	for (k = 0; k < BITSET_WORDS; ++k)
				3579	{
				3580	not_subset \|= remains[k] = ~accepts[k] & dests_ch[j][k];
				3581	not_consumed \|= accepts[k] = accepts[k] & ~dests_ch[j][k];
				3582	}
				3583
				3584	/* If this state isn't a subset of `accepts', create a
				3585	new group state, which has the `remains'. */
				3586	if (not_subset)
				3587	{
				3588	bitset_copy (dests_ch[ndests], remains);
				3589	bitset_copy (dests_ch[j], intersec);
				3590	err = re_node_set_init_copy (dests_node + ndests, &dests_node[j]);
				3591	if (BE (err != REG_NOERROR, 0))
				3592	goto error_return;
				3593	++ndests;
				3594	}
				3595
				3596	/* Put the position in the current group. */
				3597	result = re_node_set_insert (&dests_node[j], cur_nodes->elems[i]);
				3598	if (BE (result < 0, 0))
				3599	goto error_return;
				3600
				3601	/* If all characters are consumed, go to next node. */
				3602	if (!not_consumed)
				3603	break;
				3604	}
				3605	/* Some characters remain, create a new group. */
				3606	if (j == ndests)
				3607	{
				3608	bitset_copy (dests_ch[ndests], accepts);
				3609	err = re_node_set_init_1 (dests_node + ndests, cur_nodes->elems[i]);
				3610	if (BE (err != REG_NOERROR, 0))
				3611	goto error_return;
				3612	++ndests;
				3613	bitset_empty (accepts);
				3614	}
				3615	}
				3616	return ndests;
				3617	error_return:
				3618	for (j = 0; j < ndests; ++j)
				3619	re_node_set_free (dests_node + j);
				3620	return -1;
				3621	}
				3622
				3623	#ifdef RE_ENABLE_I18N
				3624	/* Check how many bytes the node `dfa->nodes[node_idx]' accepts.
				3625	Return the number of the bytes the node accepts.
				3626	STR_IDX is the current index of the input string.
				3627
				3628	This function handles the nodes which can accept one character, or
				3629	one collating element like '.', '[a-z]', opposite to the other nodes
				3630	can only accept one byte. */
				3631
				3632	static int
				3633	internal_function
				3634	check_node_accept_bytes (const re_dfa_t *dfa, int node_idx,
				3635	const re_string_t *input, int str_idx)
				3636	{
				3637	const re_token_t *node = dfa->nodes + node_idx;
				3638	int char_len, elem_len;
				3639	int i;
				3640
				3641	if (BE (node->type == OP_UTF8_PERIOD, 0))
				3642	{
				3643	unsigned char c = re_string_byte_at (input, str_idx), d;
				3644	if (BE (c < 0xc2, 1))
				3645	return 0;
				3646
				3647	if (str_idx + 2 > input->len)
				3648	return 0;
				3649
				3650	d = re_string_byte_at (input, str_idx + 1);
				3651	if (c < 0xe0)
				3652	return (d < 0x80 \|\| d > 0xbf) ? 0 : 2;
				3653	else if (c < 0xf0)
				3654	{
				3655	char_len = 3;
				3656	if (c == 0xe0 && d < 0xa0)
				3657	return 0;
				3658	}
				3659	else if (c < 0xf8)
				3660	{
				3661	char_len = 4;
				3662	if (c == 0xf0 && d < 0x90)
				3663	return 0;
				3664	}
				3665	else if (c < 0xfc)
				3666	{
				3667	char_len = 5;
				3668	if (c == 0xf8 && d < 0x88)
				3669	return 0;
				3670	}
				3671	else if (c < 0xfe)
				3672	{
				3673	char_len = 6;
				3674	if (c == 0xfc && d < 0x84)
				3675	return 0;
				3676	}
				3677	else
				3678	return 0;
				3679
				3680	if (str_idx + char_len > input->len)
				3681	return 0;
				3682
				3683	for (i = 1; i < char_len; ++i)
				3684	{
				3685	d = re_string_byte_at (input, str_idx + i);
				3686	if (d < 0x80 \|\| d > 0xbf)
				3687	return 0;
				3688	}
				3689	return char_len;
				3690	}
				3691
				3692	char_len = re_string_char_size_at (input, str_idx);
				3693	if (node->type == OP_PERIOD)
				3694	{
				3695	if (char_len <= 1)
				3696	return 0;
				3697	/* FIXME: I don't think this if is needed, as both '\n'
				3698	and '\0' are char_len == 1. */
				3699	/* '.' accepts any one character except the following two cases. */
				3700	if ((!(dfa->syntax & RE_DOT_NEWLINE) &&
				3701	re_string_byte_at (input, str_idx) == '\n') \|\|
				3702	((dfa->syntax & RE_DOT_NOT_NULL) &&
				3703	re_string_byte_at (input, str_idx) == '\0'))
				3704	return 0;
				3705	return char_len;
				3706	}
				3707
				3708	elem_len = re_string_elem_size_at (input, str_idx);
				3709	if ((elem_len <= 1 && char_len <= 1) \|\| char_len == 0)
				3710	return 0;
				3711
				3712	if (node->type == COMPLEX_BRACKET)
				3713	{
				3714	const re_charset_t *cset = node->opr.mbcset;
				3715	# if 0
				3716	const unsigned char *pin
				3717	= ((const unsigned char *) re_string_get_buffer (input) + str_idx);
				3718	int j;
				3719	uint32_t nrules;
				3720	# endif
				3721	int match_len = 0;
				3722	wchar_t wc = ((cset->nranges \|\| cset->nchar_classes \|\| cset->nmbchars)
				3723	? re_string_wchar_at (input, str_idx) : 0);
				3724
				3725	/* match with multibyte character? */
				3726	for (i = 0; i < cset->nmbchars; ++i)
				3727	if (wc == cset->mbchars[i])
				3728	{
				3729	match_len = char_len;
				3730	goto check_node_accept_bytes_match;
				3731	}
				3732	/* match with character_class? */
				3733	for (i = 0; i < cset->nchar_classes; ++i)
				3734	{
				3735	wctype_t wt = cset->char_classes[i];
				3736	if (__iswctype (wc, wt))
				3737	{
				3738	match_len = char_len;
				3739	goto check_node_accept_bytes_match;
				3740	}
				3741	}
				3742
				3743	# if 0
				3744	nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
				3745	if (nrules != 0)
				3746	{
				3747	unsigned int in_collseq = 0;
				3748	const int32_t table, indirect;
				3749	const unsigned char weights, extra;
				3750	const char *collseqwc;
				3751	int32_t idx;
				3752	/* This #include defines a local function! */
				3753	# include <locale/weight.h>
				3754
				3755	/* match with collating_symbol? */
				3756	if (cset->ncoll_syms)
				3757	extra = (const unsigned char *)
				3758	_NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB);
				3759	for (i = 0; i < cset->ncoll_syms; ++i)
				3760	{
				3761	const unsigned char *coll_sym = extra + cset->coll_syms[i];
				3762	/* Compare the length of input collating element and
				3763	the length of current collating element. */
				3764	if (*coll_sym != elem_len)
				3765	continue;
				3766	/* Compare each bytes. */
				3767	for (j = 0; j < *coll_sym; j++)
				3768	if (pin[j] != coll_sym[1 + j])
				3769	break;
				3770	if (j == *coll_sym)
				3771	{
				3772	/* Match if every bytes is equal. */
				3773	match_len = j;
				3774	goto check_node_accept_bytes_match;
				3775	}
				3776	}
				3777
				3778	if (cset->nranges)
				3779	{
				3780	if (elem_len <= char_len)
				3781	{
				3782	collseqwc = _NL_CURRENT (LC_COLLATE, _NL_COLLATE_COLLSEQWC);
				3783	in_collseq = __collseq_table_lookup (collseqwc, wc);
				3784	}
				3785	else
				3786	in_collseq = find_collation_sequence_value (pin, elem_len);
				3787	}
				3788	/* match with range expression? */
				3789	for (i = 0; i < cset->nranges; ++i)
				3790	if (cset->range_starts[i] <= in_collseq
				3791	&& in_collseq <= cset->range_ends[i])
				3792	{
				3793	match_len = elem_len;
				3794	goto check_node_accept_bytes_match;
				3795	}
				3796
				3797	/* match with equivalence_class? */
				3798	if (cset->nequiv_classes)
				3799	{
				3800	const unsigned char *cp = pin;
				3801	table = (const int32_t *)
				3802	_NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB);
				3803	weights = (const unsigned char *)
				3804	_NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTMB);
				3805	extra = (const unsigned char *)
				3806	_NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAMB);
				3807	indirect = (const int32_t *)
				3808	_NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTMB);
				3809	idx = findidx (&cp);
				3810	if (idx > 0)
				3811	for (i = 0; i < cset->nequiv_classes; ++i)
				3812	{
				3813	int32_t equiv_class_idx = cset->equiv_classes[i];
				3814	size_t weight_len = weights[idx];
				3815	if (weight_len == weights[equiv_class_idx])
				3816	{
				3817	int cnt = 0;
				3818	while (cnt <= weight_len
				3819	&& (weights[equiv_class_idx + 1 + cnt]
				3820	== weights[idx + 1 + cnt]))
				3821	++cnt;
				3822	if (cnt > weight_len)
				3823	{
				3824	match_len = elem_len;
				3825	goto check_node_accept_bytes_match;
				3826	}
				3827	}
				3828	}
				3829	}
				3830	}
				3831	else
				3832	# endif
				3833	{
				3834	/* match with range expression? */
				3835	wchar_t cmp_buf[6];
				3836
				3837	memset (cmp_buf, 0, sizeof(cmp_buf));
				3838	cmp_buf[2] = wc;
				3839	for (i = 0; i < cset->nranges; ++i)
				3840	{
				3841	cmp_buf[0] = cset->range_starts[i];
				3842	cmp_buf[4] = cset->range_ends[i];
				3843	if (wcscoll (cmp_buf, cmp_buf + 2) <= 0
				3844	&& wcscoll (cmp_buf + 2, cmp_buf + 4) <= 0)
				3845	{
				3846	match_len = char_len;
				3847	goto check_node_accept_bytes_match;
				3848	}
				3849	}
				3850	}
				3851
				3852	check_node_accept_bytes_match:
				3853	if (!cset->non_match)
				3854	return match_len;
				3855	if (match_len > 0)
				3856	return 0;
				3857	return (elem_len > char_len) ? elem_len : char_len;
				3858	}
				3859	return 0;
				3860	}
				3861
				3862	# if 0
				3863	static unsigned int
				3864	internal_function
				3865	find_collation_sequence_value (const unsigned char *mbs, size_t mbs_len)
				3866	{
				3867	uint32_t nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
				3868	if (nrules == 0)
				3869	{
				3870	if (mbs_len == 1)
				3871	{
				3872	/* No valid character. Match it as a single byte character. */
				3873	const unsigned char collseq = (const unsigned char )
				3874	_NL_CURRENT (LC_COLLATE, _NL_COLLATE_COLLSEQMB);
				3875	return collseq[mbs[0]];
				3876	}
				3877	return UINT_MAX;
				3878	}
				3879	else
				3880	{
				3881	int32_t idx;
				3882	const unsigned char extra = (const unsigned char )
				3883	_NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB);
				3884	int32_t extrasize = (const unsigned char *)
				3885	_NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB + 1) - extra;
				3886
				3887	for (idx = 0; idx < extrasize;)
				3888	{
				3889	int mbs_cnt, found = 0;
				3890	int32_t elem_mbs_len;
				3891	/* Skip the name of collating element name. */
				3892	idx = idx + extra[idx] + 1;
				3893	elem_mbs_len = extra[idx++];
				3894	if (mbs_len == elem_mbs_len)
				3895	{
				3896	for (mbs_cnt = 0; mbs_cnt < elem_mbs_len; ++mbs_cnt)
				3897	if (extra[idx + mbs_cnt] != mbs[mbs_cnt])
				3898	break;
				3899	if (mbs_cnt == elem_mbs_len)
				3900	/* Found the entry. */
				3901	found = 1;
				3902	}
				3903	/* Skip the byte sequence of the collating element. */
				3904	idx += elem_mbs_len;
				3905	/* Adjust for the alignment. */
				3906	idx = (idx + 3) & ~3;
				3907	/* Skip the collation sequence value. */
				3908	idx += sizeof (uint32_t);
				3909	/* Skip the wide char sequence of the collating element. */
				3910	idx = idx + sizeof (uint32_t) * (extra[idx] + 1);
				3911	/* If we found the entry, return the sequence value. */
				3912	if (found)
				3913	return (uint32_t ) (extra + idx);
				3914	/* Skip the collation sequence value. */
				3915	idx += sizeof (uint32_t);
				3916	}
				3917	return UINT_MAX;
				3918	}
				3919	}
				3920	# endif
				3921	#endif /* RE_ENABLE_I18N */
				3922
				3923	/* Check whether the node accepts the byte which is IDX-th
				3924	byte of the INPUT. */
				3925
				3926	static int
				3927	internal_function
				3928	check_node_accept (const re_match_context_t mctx, const re_token_t node,
				3929	int idx)
				3930	{
				3931	unsigned char ch;
				3932	ch = re_string_byte_at (&mctx->input, idx);
				3933	switch (node->type)
				3934	{
				3935	case CHARACTER:
				3936	if (node->opr.c != ch)
				3937	return 0;
				3938	break;
				3939
				3940	case SIMPLE_BRACKET:
				3941	if (!bitset_contain (node->opr.sbcset, ch))
				3942	return 0;
				3943	break;
				3944
				3945	#ifdef RE_ENABLE_I18N
				3946	case OP_UTF8_PERIOD:
				3947	if (ch >= 0x80)
				3948	return 0;
				3949	/* FALLTHROUGH */
				3950	#endif
				3951	case OP_PERIOD:
				3952	if ((ch == '\n' && !(mctx->dfa->syntax & RE_DOT_NEWLINE))
				3953	\|\| (ch == '\0' && (mctx->dfa->syntax & RE_DOT_NOT_NULL)))
				3954	return 0;
				3955	break;
				3956
				3957	default:
				3958	return 0;
				3959	}
				3960
				3961	if (node->constraint)
				3962	{
				3963	/* The node has constraints. Check whether the current context
				3964	satisfies the constraints. */
				3965	unsigned int context = re_string_context_at (&mctx->input, idx,
				3966	mctx->eflags);
				3967	if (NOT_SATISFY_NEXT_CONSTRAINT (node->constraint, context))
				3968	return 0;
				3969	}
				3970
				3971	return 1;
				3972	}
				3973
				3974	/* Extend the buffers, if the buffers have run out. */
				3975
				3976	static reg_errcode_t
				3977	internal_function
				3978	extend_buffers (re_match_context_t *mctx)
				3979	{
				3980	reg_errcode_t ret;
				3981	re_string_t *pstr = &mctx->input;
				3982
				3983	/* Double the lengthes of the buffers. */
				3984	ret = re_string_realloc_buffers (pstr, pstr->bufs_len * 2);
				3985	if (BE (ret != REG_NOERROR, 0))
				3986	return ret;
				3987
				3988	if (mctx->state_log != NULL)
				3989	{
				3990	/* And double the length of state_log. */
				3991	/* XXX We have no indication of the size of this buffer. If this
				3992	allocation fail we have no indication that the state_log array
				3993	does not have the right size. */
				3994	re_dfastate_t *new_array = re_realloc (mctx->state_log, re_dfastate_t ,
				3995	pstr->bufs_len + 1);
				3996	if (BE (new_array == NULL, 0))
				3997	return REG_ESPACE;
				3998	mctx->state_log = new_array;
				3999	}
				4000
				4001	/* Then reconstruct the buffers. */
				4002	if (pstr->icase)
				4003	{
				4004	#ifdef RE_ENABLE_I18N
				4005	if (pstr->mb_cur_max > 1)
				4006	{
				4007	ret = build_wcs_upper_buffer (pstr);
				4008	if (BE (ret != REG_NOERROR, 0))
				4009	return ret;
				4010	}
				4011	else
				4012	#endif /* RE_ENABLE_I18N */
				4013	build_upper_buffer (pstr);
				4014	}
				4015	else
				4016	{
				4017	#ifdef RE_ENABLE_I18N
				4018	if (pstr->mb_cur_max > 1)
				4019	build_wcs_buffer (pstr);
				4020	else
				4021	#endif /* RE_ENABLE_I18N */
				4022	{
				4023	if (pstr->trans != NULL)
				4024	re_string_translate_buffer (pstr);
				4025	}
				4026	}
				4027	return REG_NOERROR;
				4028	}
				4029
				4030
				4031	/* Functions for matching context. */
				4032
				4033	/* Initialize MCTX. */
				4034
				4035	static reg_errcode_t
				4036	internal_function
				4037	match_ctx_init (re_match_context_t *mctx, int eflags, int n)
				4038	{
				4039	mctx->eflags = eflags;
				4040	mctx->match_last = -1;
				4041	if (n > 0)
				4042	{
				4043	mctx->bkref_ents = re_malloc (struct re_backref_cache_entry, n);
				4044	mctx->sub_tops = re_malloc (re_sub_match_top_t *, n);
				4045	if (BE (mctx->bkref_ents == NULL \|\| mctx->sub_tops == NULL, 0))
				4046	return REG_ESPACE;
				4047	}
				4048	/* Already zero-ed by the caller.
				4049	else
				4050	mctx->bkref_ents = NULL;
				4051	mctx->nbkref_ents = 0;
				4052	mctx->nsub_tops = 0; */
				4053	mctx->abkref_ents = n;
				4054	mctx->max_mb_elem_len = 1;
				4055	mctx->asub_tops = n;
				4056	return REG_NOERROR;
				4057	}
				4058
				4059	/* Clean the entries which depend on the current input in MCTX.
				4060	This function must be invoked when the matcher changes the start index
				4061	of the input, or changes the input string. */
				4062
				4063	static void
				4064	internal_function
				4065	match_ctx_clean (re_match_context_t *mctx)
				4066	{
				4067	int st_idx;
				4068	for (st_idx = 0; st_idx < mctx->nsub_tops; ++st_idx)
				4069	{
				4070	int sl_idx;
				4071	re_sub_match_top_t *top = mctx->sub_tops[st_idx];
				4072	for (sl_idx = 0; sl_idx < top->nlasts; ++sl_idx)
				4073	{
				4074	re_sub_match_last_t *last = top->lasts[sl_idx];
				4075	re_free (last->path.array);
				4076	re_free (last);
				4077	}
				4078	re_free (top->lasts);
				4079	if (top->path)
				4080	{
				4081	re_free (top->path->array);
				4082	re_free (top->path);
				4083	}
				4084	free (top);
				4085	}
				4086
				4087	mctx->nsub_tops = 0;
				4088	mctx->nbkref_ents = 0;
				4089	}
				4090
				4091	/* Free all the memory associated with MCTX. */
				4092
				4093	static void
				4094	internal_function
				4095	match_ctx_free (re_match_context_t *mctx)
				4096	{
				4097	/* First, free all the memory associated with MCTX->SUB_TOPS. */
				4098	match_ctx_clean (mctx);
				4099	re_free (mctx->sub_tops);
				4100	re_free (mctx->bkref_ents);
				4101	}
				4102
				4103	/* Add a new backreference entry to MCTX.
				4104	Note that we assume that caller never call this function with duplicate
				4105	entry, and call with STR_IDX which isn't smaller than any existing entry.
				4106	*/
				4107
				4108	static reg_errcode_t
				4109	internal_function
				4110	match_ctx_add_entry (re_match_context_t *mctx, int node, int str_idx, int from,
				4111	int to)
				4112	{
				4113	if (mctx->nbkref_ents >= mctx->abkref_ents)
				4114	{
				4115	struct re_backref_cache_entry* new_entry;
				4116	new_entry = re_realloc (mctx->bkref_ents, struct re_backref_cache_entry,
				4117	mctx->abkref_ents * 2);
				4118	if (BE (new_entry == NULL, 0))
				4119	{
				4120	re_free (mctx->bkref_ents);
				4121	return REG_ESPACE;
				4122	}
				4123	mctx->bkref_ents = new_entry;
				4124	memset (mctx->bkref_ents + mctx->nbkref_ents, '\0',
				4125	sizeof (struct re_backref_cache_entry) * mctx->abkref_ents);
				4126	mctx->abkref_ents *= 2;
				4127	}
				4128	if (mctx->nbkref_ents > 0
				4129	&& mctx->bkref_ents[mctx->nbkref_ents - 1].str_idx == str_idx)
				4130	mctx->bkref_ents[mctx->nbkref_ents - 1].more = 1;
				4131
				4132	mctx->bkref_ents[mctx->nbkref_ents].node = node;
				4133	mctx->bkref_ents[mctx->nbkref_ents].str_idx = str_idx;
				4134	mctx->bkref_ents[mctx->nbkref_ents].subexp_from = from;
				4135	mctx->bkref_ents[mctx->nbkref_ents].subexp_to = to;
				4136
				4137	/* This is a cache that saves negative results of check_dst_limits_calc_pos.
				4138	If bit N is clear, means that this entry won't epsilon-transition to
				4139	an OP_OPEN_SUBEXP or OP_CLOSE_SUBEXP for the N+1-th subexpression. If
				4140	it is set, check_dst_limits_calc_pos_1 will recurse and try to find one
				4141	such node.
				4142
				4143	A backreference does not epsilon-transition unless it is empty, so set
				4144	to all zeros if FROM != TO. */
				4145	mctx->bkref_ents[mctx->nbkref_ents].eps_reachable_subexps_map
				4146	= (from == to ? ~0 : 0);
				4147
				4148	mctx->bkref_ents[mctx->nbkref_ents++].more = 0;
				4149	if (mctx->max_mb_elem_len < to - from)
				4150	mctx->max_mb_elem_len = to - from;
				4151	return REG_NOERROR;
				4152	}
				4153
				4154	/* Search for the first entry which has the same str_idx, or -1 if none is
				4155	found. Note that MCTX->BKREF_ENTS is already sorted by MCTX->STR_IDX. */
				4156
				4157	static int
				4158	internal_function
				4159	search_cur_bkref_entry (const re_match_context_t *mctx, int str_idx)
				4160	{
				4161	int left, right, mid, last;
				4162	last = right = mctx->nbkref_ents;
				4163	for (left = 0; left < right;)
				4164	{
				4165	mid = (left + right) / 2;
				4166	if (mctx->bkref_ents[mid].str_idx < str_idx)
				4167	left = mid + 1;
				4168	else
				4169	right = mid;
				4170	}
				4171	if (left < last && mctx->bkref_ents[left].str_idx == str_idx)
				4172	return left;
				4173	else
				4174	return -1;
				4175	}
				4176
				4177	/* Register the node NODE, whose type is OP_OPEN_SUBEXP, and which matches
				4178	at STR_IDX. */
				4179
				4180	static reg_errcode_t
				4181	internal_function
				4182	match_ctx_add_subtop (re_match_context_t *mctx, int node, int str_idx)
				4183	{
				4184	#ifdef DEBUG
				4185	assert (mctx->sub_tops != NULL);
				4186	assert (mctx->asub_tops > 0);
				4187	#endif
				4188	if (BE (mctx->nsub_tops == mctx->asub_tops, 0))
				4189	{
				4190	int new_asub_tops = mctx->asub_tops * 2;
				4191	re_sub_match_top_t **new_array = re_realloc (mctx->sub_tops,
				4192	re_sub_match_top_t *,
				4193	new_asub_tops);
				4194	if (BE (new_array == NULL, 0))
				4195	return REG_ESPACE;
				4196	mctx->sub_tops = new_array;
				4197	mctx->asub_tops = new_asub_tops;
				4198	}
				4199	mctx->sub_tops[mctx->nsub_tops] = calloc (1, sizeof (re_sub_match_top_t));
				4200	if (BE (mctx->sub_tops[mctx->nsub_tops] == NULL, 0))
				4201	return REG_ESPACE;
				4202	mctx->sub_tops[mctx->nsub_tops]->node = node;
				4203	mctx->sub_tops[mctx->nsub_tops++]->str_idx = str_idx;
				4204	return REG_NOERROR;
				4205	}
				4206
				4207	/* Register the node NODE, whose type is OP_CLOSE_SUBEXP, and which matches
				4208	at STR_IDX, whose corresponding OP_OPEN_SUBEXP is SUB_TOP. */
				4209
				4210	static re_sub_match_last_t *
				4211	internal_function
				4212	match_ctx_add_sublast (re_sub_match_top_t *subtop, int node, int str_idx)
				4213	{
				4214	re_sub_match_last_t *new_entry;
				4215	if (BE (subtop->nlasts == subtop->alasts, 0))
				4216	{
				4217	int new_alasts = 2 * subtop->alasts + 1;
				4218	re_sub_match_last_t **new_array = re_realloc (subtop->lasts,
				4219	re_sub_match_last_t *,
				4220	new_alasts);
				4221	if (BE (new_array == NULL, 0))
				4222	return NULL;
				4223	subtop->lasts = new_array;
				4224	subtop->alasts = new_alasts;
				4225	}
				4226	new_entry = calloc (1, sizeof (re_sub_match_last_t));
				4227	if (BE (new_entry != NULL, 1))
				4228	{
				4229	subtop->lasts[subtop->nlasts] = new_entry;
				4230	new_entry->node = node;
				4231	new_entry->str_idx = str_idx;
				4232	++subtop->nlasts;
				4233	}
				4234	return new_entry;
				4235	}
				4236
				4237	static void
				4238	internal_function
				4239	sift_ctx_init (re_sift_context_t sctx, re_dfastate_t *sifted_sts,
				4240	re_dfastate_t **limited_sts, int last_node, int last_str_idx)
				4241	{
				4242	sctx->sifted_states = sifted_sts;
				4243	sctx->limited_states = limited_sts;
				4244	sctx->last_node = last_node;
				4245	sctx->last_str_idx = last_str_idx;
				4246	re_node_set_init_empty (&sctx->limits);
				4247	}