Blame - src/kernel/linux/v4.19/scripts/genksyms/lex.l - T800

blob: d29c774f51b615026b82ae22e44ce03a676e6dd9 [file] [log] [blame]

xj	b04a402	2021-11-25 15:01:52 +0800	[diff] [blame]	1	/* Lexical analysis for genksyms.
				2	Copyright 1996, 1997 Linux International.
				3
				4	New implementation contributed by Richard Henderson <rth@tamu.edu>
				5	Based on original work by Bjorn Ekwall <bj0rn@blox.se>
				6
				7	Taken from Linux modutils 2.4.22.
				8
				9	This program is free software; you can redistribute it and/or modify it
				10	under the terms of the GNU General Public License as published by the
				11	Free Software Foundation; either version 2 of the License, or (at your
				12	option) any later version.
				13
				14	This program is distributed in the hope that it will be useful, but
				15	WITHOUT ANY WARRANTY; without even the implied warranty of
				16	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
				17	General Public License for more details.
				18
				19	You should have received a copy of the GNU General Public License
				20	along with this program; if not, write to the Free Software Foundation,
				21	Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
				22
				23
				24	%{
				25
				26	#include <limits.h>
				27	#include <stdlib.h>
				28	#include <string.h>
				29	#include <ctype.h>
				30
				31	#include "genksyms.h"
				32	#include "parse.tab.h"
				33
				34	/* We've got a two-level lexer here. We let flex do basic tokenization
				35	and then we categorize those basic tokens in the second stage. */
				36	#define YY_DECL static int yylex1(void)
				37
				38	%}
				39
				40	IDENT [A-Za-z_\$][A-Za-z0-9_\$]*
				41
				42	O_INT 0[0-7]*
				43	D_INT [1-9][0-9]*
				44	X_INT 0[Xx][0-9A-Fa-f]+
				45	I_SUF [Uu]\|[Ll]\|[Uu][Ll]\|[Ll][Uu]
				46	INT ({O_INT}\|{D_INT}\|{X_INT}){I_SUF}?
				47
				48	FRAC ([0-9]*\.[0-9]+)\|([0-9]+\.)
				49	EXP [Ee][+-]?[0-9]+
				50	F_SUF [FfLl]
				51	REAL ({FRAC}{EXP}?{F_SUF}?)\|([0-9]+{EXP}{F_SUF}?)
				52
				53	STRING L?\"([^\\\"]\\.)[^\\\"]*\"
				54	CHAR L?\'([^\\\']\\.)[^\\\']*\'
				55
				56	MC_TOKEN ([~%^&*+=\|<>/-]=)\|(&&)\|("\|\|")\|(->)\|(<<)\|(>>)
				57
				58	/* We don't do multiple input files. */
				59	%option noyywrap
				60
				61	%option noinput
				62
				63	%%
				64
				65
				66	/* Keep track of our location in the original source files. */
				67	^#[ \t]+{INT}[ \t]+\"[^\"\n]+\".*\n return FILENAME;
				68	^#.*\n cur_line++;
				69	\n cur_line++;
				70
				71	/* Ignore all other whitespace. */
				72	[ \t\f\v\r]+ ;
				73
				74
				75	{STRING} return STRING;
				76	{CHAR} return CHAR;
				77	{IDENT} return IDENT;
				78
				79	/* The Pedant requires that the other C multi-character tokens be
				80	recognized as tokens. We don't actually use them since we don't
				81	parse expressions, but we do want whitespace to be arranged
				82	around them properly. */
				83	{MC_TOKEN} return OTHER;
				84	{INT} return INT;
				85	{REAL} return REAL;
				86
				87	"..." return DOTS;
				88
				89	/* All other tokens are single characters. */
				90	. return yytext[0];
				91
				92
				93	%%
				94
				95	/* Bring in the keyword recognizer. */
				96
				97	#include "keywords.c"
				98
				99
				100	/* Macros to append to our phrase collection list. */
				101
				102	/*
				103	* We mark any token, that that equals to a known enumerator, as
				104	* SYM_ENUM_CONST. The parser will change this for struct and union tags later,
				105	* the only problem is struct and union members:
				106	* enum e { a, b }; struct s { int a, b; }
				107	* but in this case, the only effect will be, that the ABI checksums become
				108	* more volatile, which is acceptable. Also, such collisions are quite rare,
				109	* so far it was only observed in include/linux/telephony.h.
				110	*/
				111	#define _APP(T,L) do { \
				112	cur_node = next_node; \
				113	next_node = xmalloc(sizeof(*next_node)); \
				114	next_node->next = cur_node; \
				115	cur_node->string = memcpy(xmalloc(L+1), T, L+1); \
				116	cur_node->tag = \
				117	find_symbol(cur_node->string, SYM_ENUM_CONST, 1)?\
				118	SYM_ENUM_CONST : SYM_NORMAL ; \
				119	cur_node->in_source_file = in_source_file; \
				120	} while (0)
				121
				122	#define APP _APP(yytext, yyleng)
				123
				124
				125	/* The second stage lexer. Here we incorporate knowledge of the state
				126	of the parser to tailor the tokens that are returned. */
				127
				128	int
				129	yylex(void)
				130	{
				131	static enum {
				132	ST_NOTSTARTED, ST_NORMAL, ST_ATTRIBUTE, ST_ASM, ST_TYPEOF, ST_TYPEOF_1,
				133	ST_BRACKET, ST_BRACE, ST_EXPRESSION,
				134	ST_TABLE_1, ST_TABLE_2, ST_TABLE_3, ST_TABLE_4,
				135	ST_TABLE_5, ST_TABLE_6
				136	} lexstate = ST_NOTSTARTED;
				137
				138	static int suppress_type_lookup, dont_want_brace_phrase;
				139	static struct string_list *next_node;
				140
				141	int token, count = 0;
				142	struct string_list *cur_node;
				143
				144	if (lexstate == ST_NOTSTARTED)
				145	{
				146	next_node = xmalloc(sizeof(*next_node));
				147	next_node->next = NULL;
				148	lexstate = ST_NORMAL;
				149	}
				150
				151	repeat:
				152	token = yylex1();
				153
				154	if (token == 0)
				155	return 0;
				156	else if (token == FILENAME)
				157	{
				158	char file, e;
				159
				160	/* Save the filename and line number for later error messages. */
				161
				162	if (cur_filename)
				163	free(cur_filename);
				164
				165	file = strchr(yytext, '\"')+1;
				166	e = strchr(file, '\"');
				167	*e = '\0';
				168	cur_filename = memcpy(xmalloc(e-file+1), file, e-file+1);
				169	cur_line = atoi(yytext+2);
				170
				171	if (!source_file) {
				172	source_file = xstrdup(cur_filename);
				173	in_source_file = 1;
				174	} else {
				175	in_source_file = (strcmp(cur_filename, source_file) == 0);
				176	}
				177
				178	goto repeat;
				179	}
				180
				181	switch (lexstate)
				182	{
				183	case ST_NORMAL:
				184	switch (token)
				185	{
				186	case IDENT:
				187	APP;
				188	{
				189	int r = is_reserved_word(yytext, yyleng);
				190	if (r >= 0)
				191	{
				192	switch (token = r)
				193	{
				194	case ATTRIBUTE_KEYW:
				195	lexstate = ST_ATTRIBUTE;
				196	count = 0;
				197	goto repeat;
				198	case ASM_KEYW:
				199	lexstate = ST_ASM;
				200	count = 0;
				201	goto repeat;
				202	case TYPEOF_KEYW:
				203	lexstate = ST_TYPEOF;
				204	count = 0;
				205	goto repeat;
				206
				207	case STRUCT_KEYW:
				208	case UNION_KEYW:
				209	case ENUM_KEYW:
				210	dont_want_brace_phrase = 3;
				211	suppress_type_lookup = 2;
				212	goto fini;
				213
				214	case EXPORT_SYMBOL_KEYW:
				215	goto fini;
				216	}
				217	}
				218	if (!suppress_type_lookup)
				219	{
				220	if (find_symbol(yytext, SYM_TYPEDEF, 1))
				221	token = TYPE;
				222	}
				223	}
				224	break;
				225
				226	case '[':
				227	APP;
				228	lexstate = ST_BRACKET;
				229	count = 1;
				230	goto repeat;
				231
				232	case '{':
				233	APP;
				234	if (dont_want_brace_phrase)
				235	break;
				236	lexstate = ST_BRACE;
				237	count = 1;
				238	goto repeat;
				239
				240	case '=': case ':':
				241	APP;
				242	lexstate = ST_EXPRESSION;
				243	break;
				244
				245	case DOTS:
				246	default:
				247	APP;
				248	break;
				249	}
				250	break;
				251
				252	case ST_ATTRIBUTE:
				253	APP;
				254	switch (token)
				255	{
				256	case '(':
				257	++count;
				258	goto repeat;
				259	case ')':
				260	if (--count == 0)
				261	{
				262	lexstate = ST_NORMAL;
				263	token = ATTRIBUTE_PHRASE;
				264	break;
				265	}
				266	goto repeat;
				267	default:
				268	goto repeat;
				269	}
				270	break;
				271
				272	case ST_ASM:
				273	APP;
				274	switch (token)
				275	{
				276	case '(':
				277	++count;
				278	goto repeat;
				279	case ')':
				280	if (--count == 0)
				281	{
				282	lexstate = ST_NORMAL;
				283	token = ASM_PHRASE;
				284	break;
				285	}
				286	goto repeat;
				287	default:
				288	goto repeat;
				289	}
				290	break;
				291
				292	case ST_TYPEOF_1:
				293	if (token == IDENT)
				294	{
				295	if (is_reserved_word(yytext, yyleng) >= 0
				296	\|\| find_symbol(yytext, SYM_TYPEDEF, 1))
				297	{
				298	yyless(0);
				299	unput('(');
				300	lexstate = ST_NORMAL;
				301	token = TYPEOF_KEYW;
				302	break;
				303	}
				304	_APP("(", 1);
				305	}
				306	lexstate = ST_TYPEOF;
				307	/* FALLTHRU */
				308
				309	case ST_TYPEOF:
				310	switch (token)
				311	{
				312	case '(':
				313	if ( ++count == 1 )
				314	lexstate = ST_TYPEOF_1;
				315	else
				316	APP;
				317	goto repeat;
				318	case ')':
				319	APP;
				320	if (--count == 0)
				321	{
				322	lexstate = ST_NORMAL;
				323	token = TYPEOF_PHRASE;
				324	break;
				325	}
				326	goto repeat;
				327	default:
				328	APP;
				329	goto repeat;
				330	}
				331	break;
				332
				333	case ST_BRACKET:
				334	APP;
				335	switch (token)
				336	{
				337	case '[':
				338	++count;
				339	goto repeat;
				340	case ']':
				341	if (--count == 0)
				342	{
				343	lexstate = ST_NORMAL;
				344	token = BRACKET_PHRASE;
				345	break;
				346	}
				347	goto repeat;
				348	default:
				349	goto repeat;
				350	}
				351	break;
				352
				353	case ST_BRACE:
				354	APP;
				355	switch (token)
				356	{
				357	case '{':
				358	++count;
				359	goto repeat;
				360	case '}':
				361	if (--count == 0)
				362	{
				363	lexstate = ST_NORMAL;
				364	token = BRACE_PHRASE;
				365	break;
				366	}
				367	goto repeat;
				368	default:
				369	goto repeat;
				370	}
				371	break;
				372
				373	case ST_EXPRESSION:
				374	switch (token)
				375	{
				376	case '(': case '[': case '{':
				377	++count;
				378	APP;
				379	goto repeat;
				380	case '}':
				381	/* is this the last line of an enum declaration? */
				382	if (count == 0)
				383	{
				384	/* Put back the token we just read so's we can find it again
				385	after registering the expression. */
				386	unput(token);
				387
				388	lexstate = ST_NORMAL;
				389	token = EXPRESSION_PHRASE;
				390	break;
				391	}
				392	/* FALLTHRU */
				393	case ')': case ']':
				394	--count;
				395	APP;
				396	goto repeat;
				397	case ',': case ';':
				398	if (count == 0)
				399	{
				400	/* Put back the token we just read so's we can find it again
				401	after registering the expression. */
				402	unput(token);
				403
				404	lexstate = ST_NORMAL;
				405	token = EXPRESSION_PHRASE;
				406	break;
				407	}
				408	APP;
				409	goto repeat;
				410	default:
				411	APP;
				412	goto repeat;
				413	}
				414	break;
				415
				416	case ST_TABLE_1:
				417	goto repeat;
				418
				419	case ST_TABLE_2:
				420	if (token == IDENT && yyleng == 1 && yytext[0] == 'X')
				421	{
				422	token = EXPORT_SYMBOL_KEYW;
				423	lexstate = ST_TABLE_5;
				424	APP;
				425	break;
				426	}
				427	lexstate = ST_TABLE_6;
				428	/* FALLTHRU */
				429
				430	case ST_TABLE_6:
				431	switch (token)
				432	{
				433	case '{': case '[': case '(':
				434	++count;
				435	break;
				436	case '}': case ']': case ')':
				437	--count;
				438	break;
				439	case ',':
				440	if (count == 0)
				441	lexstate = ST_TABLE_2;
				442	break;
				443	};
				444	goto repeat;
				445
				446	case ST_TABLE_3:
				447	goto repeat;
				448
				449	case ST_TABLE_4:
				450	if (token == ';')
				451	lexstate = ST_NORMAL;
				452	goto repeat;
				453
				454	case ST_TABLE_5:
				455	switch (token)
				456	{
				457	case ',':
				458	token = ';';
				459	lexstate = ST_TABLE_2;
				460	APP;
				461	break;
				462	default:
				463	APP;
				464	break;
				465	}
				466	break;
				467
				468	default:
				469	exit(1);
				470	}
				471	fini:
				472
				473	if (suppress_type_lookup > 0)
				474	--suppress_type_lookup;
				475	if (dont_want_brace_phrase > 0)
				476	--dont_want_brace_phrase;
				477
				478	yylval = &next_node->next;
				479
				480	return token;
				481	}