blob: 56abf8915e836e7fceec9bb742a6d96191b84033 [file] [log] [blame]
lh9ed821d2023-04-07 01:36:19 -07001/*
2 * Copyright (c) 2002 - 2005 Tony Finch <dot@dotat.at>. All rights reserved.
3 *
4 * This code is derived from software contributed to Berkeley by Dave Yost.
5 * It was rewritten to support ANSI C by Tony Finch. The original version of
6 * unifdef carried the following copyright notice. None of its code remains
7 * in this version (though some of the names remain).
8 *
9 * Copyright (c) 1985, 1993
10 * The Regents of the University of California. All rights reserved.
11 *
12 * Redistribution and use in source and binary forms, with or without
13 * modification, are permitted provided that the following conditions
14 * are met:
15 * 1. Redistributions of source code must retain the above copyright
16 * notice, this list of conditions and the following disclaimer.
17 * 2. Redistributions in binary form must reproduce the above copyright
18 * notice, this list of conditions and the following disclaimer in the
19 * documentation and/or other materials provided with the distribution.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31 * SUCH DAMAGE.
32 */
33
34#include <sys/cdefs.h>
35
36#ifndef lint
37#if 0
38static const char copyright[] =
39"@(#) Copyright (c) 1985, 1993\n\
40 The Regents of the University of California. All rights reserved.\n";
41#endif
42#ifdef __IDSTRING
43__IDSTRING(Berkeley, "@(#)unifdef.c 8.1 (Berkeley) 6/6/93");
44__IDSTRING(NetBSD, "$NetBSD: unifdef.c,v 1.8 2000/07/03 02:51:36 matt Exp $");
45__IDSTRING(dotat, "$dotat: things/unifdef.c,v 1.171 2005/03/08 12:38:48 fanf2 Exp $");
46#endif
47#endif /* not lint */
48#ifdef __FBSDID
49__FBSDID("$FreeBSD: /repoman/r/ncvs/src/usr.bin/unifdef/unifdef.c,v 1.20 2005/05/21 09:55:09 ru Exp $");
50#endif
51
52/*
53 * unifdef - remove ifdef'ed lines
54 *
55 * Wishlist:
56 * provide an option which will append the name of the
57 * appropriate symbol after #else's and #endif's
58 * provide an option which will check symbols after
59 * #else's and #endif's to see that they match their
60 * corresponding #ifdef or #ifndef
61 *
62 * The first two items above require better buffer handling, which would
63 * also make it possible to handle all "dodgy" directives correctly.
64 */
65
66#include <errno.h>
67#include <ctype.h>
68#include <stdarg.h>
69#include <stdbool.h>
70#include <stdio.h>
71#include <stdlib.h>
72#include <string.h>
73#include <unistd.h>
74
75/* Avoid err.h since uClibc can disable these things */
76#define vwarnx(fmt, args) ({ fprintf(stderr, "unifdef: "); vfprintf(stderr, fmt, args); fprintf(stderr, "\n"); })
77#define warnx(fmt, args...) fprintf(stderr, "unifdef: " fmt "\n", ## args)
78#define errx(exit_code, fmt, args...) ({ warnx(fmt, ## args); exit(exit_code); })
79#define err(exit_code, fmt, args...) errx(exit_code, fmt ": %s", ## args, strerror(errno))
80
81/* types of input lines: */
82typedef enum {
83 LT_TRUEI, /* a true #if with ignore flag */
84 LT_FALSEI, /* a false #if with ignore flag */
85 LT_IF, /* an unknown #if */
86 LT_TRUE, /* a true #if */
87 LT_FALSE, /* a false #if */
88 LT_ELIF, /* an unknown #elif */
89 LT_ELTRUE, /* a true #elif */
90 LT_ELFALSE, /* a false #elif */
91 LT_ELSE, /* #else */
92 LT_ENDIF, /* #endif */
93 LT_DODGY, /* flag: directive is not on one line */
94 LT_DODGY_LAST = LT_DODGY + LT_ENDIF,
95 LT_PLAIN, /* ordinary line */
96 LT_EOF, /* end of file */
97 LT_COUNT
98} Linetype;
99
100static char const * const linetype_name[] = {
101 "TRUEI", "FALSEI", "IF", "TRUE", "FALSE",
102 "ELIF", "ELTRUE", "ELFALSE", "ELSE", "ENDIF",
103 "DODGY TRUEI", "DODGY FALSEI",
104 "DODGY IF", "DODGY TRUE", "DODGY FALSE",
105 "DODGY ELIF", "DODGY ELTRUE", "DODGY ELFALSE",
106 "DODGY ELSE", "DODGY ENDIF",
107 "PLAIN", "EOF"
108};
109
110/* state of #if processing */
111typedef enum {
112 IS_OUTSIDE,
113 IS_FALSE_PREFIX, /* false #if followed by false #elifs */
114 IS_TRUE_PREFIX, /* first non-false #(el)if is true */
115 IS_PASS_MIDDLE, /* first non-false #(el)if is unknown */
116 IS_FALSE_MIDDLE, /* a false #elif after a pass state */
117 IS_TRUE_MIDDLE, /* a true #elif after a pass state */
118 IS_PASS_ELSE, /* an else after a pass state */
119 IS_FALSE_ELSE, /* an else after a true state */
120 IS_TRUE_ELSE, /* an else after only false states */
121 IS_FALSE_TRAILER, /* #elifs after a true are false */
122 IS_COUNT
123} Ifstate;
124
125static char const * const ifstate_name[] = {
126 "OUTSIDE", "FALSE_PREFIX", "TRUE_PREFIX",
127 "PASS_MIDDLE", "FALSE_MIDDLE", "TRUE_MIDDLE",
128 "PASS_ELSE", "FALSE_ELSE", "TRUE_ELSE",
129 "FALSE_TRAILER"
130};
131
132/* state of comment parser */
133typedef enum {
134 NO_COMMENT = false, /* outside a comment */
135 C_COMMENT, /* in a comment like this one */
136 CXX_COMMENT, /* between // and end of line */
137 STARTING_COMMENT, /* just after slash-backslash-newline */
138 FINISHING_COMMENT, /* star-backslash-newline in a C comment */
139 CHAR_LITERAL, /* inside '' */
140 STRING_LITERAL /* inside "" */
141} Comment_state;
142
143static char const * const comment_name[] = {
144 "NO", "C", "CXX", "STARTING", "FINISHING", "CHAR", "STRING"
145};
146
147/* state of preprocessor line parser */
148typedef enum {
149 LS_START, /* only space and comments on this line */
150 LS_HASH, /* only space, comments, and a hash */
151 LS_DIRTY /* this line can't be a preprocessor line */
152} Line_state;
153
154static char const * const linestate_name[] = {
155 "START", "HASH", "DIRTY"
156};
157
158/*
159 * Minimum translation limits from ISO/IEC 9899:1999 5.2.4.1
160 */
161#define MAXDEPTH 64 /* maximum #if nesting */
162#define MAXLINE 4096 /* maximum length of line */
163#define MAXSYMS 4096 /* maximum number of symbols */
164
165/*
166 * Sometimes when editing a keyword the replacement text is longer, so
167 * we leave some space at the end of the tline buffer to accommodate this.
168 */
169#define EDITSLOP 10
170
171/*
172 * Globals.
173 */
174
175static bool complement; /* -c: do the complement */
176static bool debugging; /* -d: debugging reports */
177static bool iocccok; /* -e: fewer IOCCC errors */
178static bool killconsts; /* -k: eval constant #ifs */
179static bool lnblank; /* -l: blank deleted lines */
180static bool lnnum; /* -n: add #line directives */
181static bool symlist; /* -s: output symbol list */
182static bool text; /* -t: this is a text file */
183
184static const char *symname[MAXSYMS]; /* symbol name */
185static const char *value[MAXSYMS]; /* -Dsym=value */
186static bool ignore[MAXSYMS]; /* -iDsym or -iUsym */
187static int nsyms; /* number of symbols */
188
189static FILE *input; /* input file pointer */
190static const char *filename; /* input file name */
191static int linenum; /* current line number */
192
193static char tline[MAXLINE+EDITSLOP];/* input buffer plus space */
194static char *keyword; /* used for editing #elif's */
195
196static Comment_state incomment; /* comment parser state */
197static Line_state linestate; /* #if line parser state */
198static Ifstate ifstate[MAXDEPTH]; /* #if processor state */
199static bool ignoring[MAXDEPTH]; /* ignore comments state */
200static int stifline[MAXDEPTH]; /* start of current #if */
201static int depth; /* current #if nesting */
202static int delcount; /* count of deleted lines */
203static bool keepthis; /* don't delete constant #if */
204
205static int exitstat; /* program exit status */
206
207static void addsym(bool, bool, char *);
208static void debug(const char *, ...);
209static void done(void);
210static void error(const char *);
211static int findsym(const char *);
212static void flushline(bool);
213static Linetype get_line(void);
214static Linetype ifeval(const char **);
215static void ignoreoff(void);
216static void ignoreon(void);
217static void keywordedit(const char *);
218static void nest(void);
219static void process(void);
220static const char *skipcomment(const char *);
221static const char *skipsym(const char *);
222static void state(Ifstate);
223static int strlcmp(const char *, const char *, size_t);
224static void unnest(void);
225static void usage(void);
226
227#define endsym(c) (!isalpha((unsigned char)c) && !isdigit((unsigned char)c) && c != '_')
228
229/*
230 * The main program.
231 */
232int
233main(int argc, char *argv[])
234{
235 int opt;
236
237 while ((opt = getopt(argc, argv, "i:D:U:I:cdeklnst")) != -1)
238 switch (opt) {
239 case 'i': /* treat stuff controlled by these symbols as text */
240 /*
241 * For strict backwards-compatibility the U or D
242 * should be immediately after the -i but it doesn't
243 * matter much if we relax that requirement.
244 */
245 opt = *optarg++;
246 if (opt == 'D')
247 addsym(true, true, optarg);
248 else if (opt == 'U')
249 addsym(true, false, optarg);
250 else
251 usage();
252 break;
253 case 'D': /* define a symbol */
254 addsym(false, true, optarg);
255 break;
256 case 'U': /* undef a symbol */
257 addsym(false, false, optarg);
258 break;
259 case 'I':
260 /* no-op for compatibility with cpp */
261 break;
262 case 'c': /* treat -D as -U and vice versa */
263 complement = true;
264 break;
265 case 'd':
266 debugging = true;
267 break;
268 case 'e': /* fewer errors from dodgy lines */
269 iocccok = true;
270 break;
271 case 'k': /* process constant #ifs */
272 killconsts = true;
273 break;
274 case 'l': /* blank deleted lines instead of omitting them */
275 lnblank = true;
276 break;
277 case 'n': /* add #line directive after deleted lines */
278 lnnum = true;
279 break;
280 case 's': /* only output list of symbols that control #ifs */
281 symlist = true;
282 break;
283 case 't': /* don't parse C comments */
284 text = true;
285 break;
286 default:
287 usage();
288 }
289 argc -= optind;
290 argv += optind;
291 if (argc > 1) {
292 errx(2, "can only do one file");
293 } else if (argc == 1 && strcmp(*argv, "-") != 0) {
294 filename = *argv;
295 input = fopen(filename, "r");
296 if (input == NULL)
297 err(2, "can't open %s", filename);
298 } else {
299 filename = "[stdin]";
300 input = stdin;
301 }
302 process();
303 debug("bug at line %d", __LINE__);
304 abort(); /* bug */
305}
306
307static void
308usage(void)
309{
310 fprintf(stderr, "usage: unifdef [-cdeklnst] [-Ipath]"
311 " [-Dsym[=val]] [-Usym] [-iDsym[=val]] [-iUsym] ... [file]\n");
312 exit(2);
313}
314
315/*
316 * A state transition function alters the global #if processing state
317 * in a particular way. The table below is indexed by the current
318 * processing state and the type of the current line.
319 *
320 * Nesting is handled by keeping a stack of states; some transition
321 * functions increase or decrease the depth. They also maintain the
322 * ignore state on a stack. In some complicated cases they have to
323 * alter the preprocessor directive, as follows.
324 *
325 * When we have processed a group that starts off with a known-false
326 * #if/#elif sequence (which has therefore been deleted) followed by a
327 * #elif that we don't understand and therefore must keep, we edit the
328 * latter into a #if to keep the nesting correct.
329 *
330 * When we find a true #elif in a group, the following block will
331 * always be kept and the rest of the sequence after the next #elif or
332 * #else will be discarded. We edit the #elif into a #else and the
333 * following directive to #endif since this has the desired behaviour.
334 *
335 * "Dodgy" directives are split across multiple lines, the most common
336 * example being a multi-line comment hanging off the right of the
337 * directive. We can handle them correctly only if there is no change
338 * from printing to dropping (or vice versa) caused by that directive.
339 * If the directive is the first of a group we have a choice between
340 * failing with an error, or passing it through unchanged instead of
341 * evaluating it. The latter is not the default to avoid questions from
342 * users about unifdef unexpectedly leaving behind preprocessor directives.
343 */
344typedef void state_fn(void);
345
346/* report an error */
347static void Eelif (void) { error("Inappropriate #elif"); }
348static void Eelse (void) { error("Inappropriate #else"); }
349static void Eendif(void) { error("Inappropriate #endif"); }
350static void Eeof (void) { error("Premature EOF"); }
351static void Eioccc(void) { error("Obfuscated preprocessor control line"); }
352/* plain line handling */
353static void print (void) { flushline(true); }
354static void drop (void) { flushline(false); }
355/* output lacks group's start line */
356static void Strue (void) { drop(); ignoreoff(); state(IS_TRUE_PREFIX); }
357static void Sfalse(void) { drop(); ignoreoff(); state(IS_FALSE_PREFIX); }
358static void Selse (void) { drop(); state(IS_TRUE_ELSE); }
359/* print/pass this block */
360static void Pelif (void) { print(); ignoreoff(); state(IS_PASS_MIDDLE); }
361static void Pelse (void) { print(); state(IS_PASS_ELSE); }
362static void Pendif(void) { print(); unnest(); }
363/* discard this block */
364static void Dfalse(void) { drop(); ignoreoff(); state(IS_FALSE_TRAILER); }
365static void Delif (void) { drop(); ignoreoff(); state(IS_FALSE_MIDDLE); }
366static void Delse (void) { drop(); state(IS_FALSE_ELSE); }
367static void Dendif(void) { drop(); unnest(); }
368/* first line of group */
369static void Fdrop (void) { nest(); Dfalse(); }
370static void Fpass (void) { nest(); Pelif(); }
371static void Ftrue (void) { nest(); Strue(); }
372static void Ffalse(void) { nest(); Sfalse(); }
373/* variable pedantry for obfuscated lines */
374static void Oiffy (void) { if (!iocccok) Eioccc(); Fpass(); ignoreon(); }
375static void Oif (void) { if (!iocccok) Eioccc(); Fpass(); }
376static void Oelif (void) { if (!iocccok) Eioccc(); Pelif(); }
377/* ignore comments in this block */
378static void Idrop (void) { Fdrop(); ignoreon(); }
379static void Itrue (void) { Ftrue(); ignoreon(); }
380static void Ifalse(void) { Ffalse(); ignoreon(); }
381/* edit this line */
382static void Mpass (void) { strncpy(keyword, "if ", 4); Pelif(); }
383static void Mtrue (void) { keywordedit("else\n"); state(IS_TRUE_MIDDLE); }
384static void Melif (void) { keywordedit("endif\n"); state(IS_FALSE_TRAILER); }
385static void Melse (void) { keywordedit("endif\n"); state(IS_FALSE_ELSE); }
386
387static state_fn * const trans_table[IS_COUNT][LT_COUNT] = {
388/* IS_OUTSIDE */
389{ Itrue, Ifalse,Fpass, Ftrue, Ffalse,Eelif, Eelif, Eelif, Eelse, Eendif,
390 Oiffy, Oiffy, Fpass, Oif, Oif, Eelif, Eelif, Eelif, Eelse, Eendif,
391 print, done },
392/* IS_FALSE_PREFIX */
393{ Idrop, Idrop, Fdrop, Fdrop, Fdrop, Mpass, Strue, Sfalse,Selse, Dendif,
394 Idrop, Idrop, Fdrop, Fdrop, Fdrop, Mpass, Eioccc,Eioccc,Eioccc,Eioccc,
395 drop, Eeof },
396/* IS_TRUE_PREFIX */
397{ Itrue, Ifalse,Fpass, Ftrue, Ffalse,Dfalse,Dfalse,Dfalse,Delse, Dendif,
398 Oiffy, Oiffy, Fpass, Oif, Oif, Eioccc,Eioccc,Eioccc,Eioccc,Eioccc,
399 print, Eeof },
400/* IS_PASS_MIDDLE */
401{ Itrue, Ifalse,Fpass, Ftrue, Ffalse,Pelif, Mtrue, Delif, Pelse, Pendif,
402 Oiffy, Oiffy, Fpass, Oif, Oif, Pelif, Oelif, Oelif, Pelse, Pendif,
403 print, Eeof },
404/* IS_FALSE_MIDDLE */
405{ Idrop, Idrop, Fdrop, Fdrop, Fdrop, Pelif, Mtrue, Delif, Pelse, Pendif,
406 Idrop, Idrop, Fdrop, Fdrop, Fdrop, Eioccc,Eioccc,Eioccc,Eioccc,Eioccc,
407 drop, Eeof },
408/* IS_TRUE_MIDDLE */
409{ Itrue, Ifalse,Fpass, Ftrue, Ffalse,Melif, Melif, Melif, Melse, Pendif,
410 Oiffy, Oiffy, Fpass, Oif, Oif, Eioccc,Eioccc,Eioccc,Eioccc,Pendif,
411 print, Eeof },
412/* IS_PASS_ELSE */
413{ Itrue, Ifalse,Fpass, Ftrue, Ffalse,Eelif, Eelif, Eelif, Eelse, Pendif,
414 Oiffy, Oiffy, Fpass, Oif, Oif, Eelif, Eelif, Eelif, Eelse, Pendif,
415 print, Eeof },
416/* IS_FALSE_ELSE */
417{ Idrop, Idrop, Fdrop, Fdrop, Fdrop, Eelif, Eelif, Eelif, Eelse, Dendif,
418 Idrop, Idrop, Fdrop, Fdrop, Fdrop, Eelif, Eelif, Eelif, Eelse, Eioccc,
419 drop, Eeof },
420/* IS_TRUE_ELSE */
421{ Itrue, Ifalse,Fpass, Ftrue, Ffalse,Eelif, Eelif, Eelif, Eelse, Dendif,
422 Oiffy, Oiffy, Fpass, Oif, Oif, Eelif, Eelif, Eelif, Eelse, Eioccc,
423 print, Eeof },
424/* IS_FALSE_TRAILER */
425{ Idrop, Idrop, Fdrop, Fdrop, Fdrop, Dfalse,Dfalse,Dfalse,Delse, Dendif,
426 Idrop, Idrop, Fdrop, Fdrop, Fdrop, Dfalse,Dfalse,Dfalse,Delse, Eioccc,
427 drop, Eeof }
428/*TRUEI FALSEI IF TRUE FALSE ELIF ELTRUE ELFALSE ELSE ENDIF
429 TRUEI FALSEI IF TRUE FALSE ELIF ELTRUE ELFALSE ELSE ENDIF (DODGY)
430 PLAIN EOF */
431};
432
433/*
434 * State machine utility functions
435 */
436static void
437done(void)
438{
439 if (incomment)
440 error("EOF in comment");
441 exit(exitstat);
442}
443static void
444ignoreoff(void)
445{
446 if (depth == 0) {
447 debug("bug at line %d", __LINE__);
448 abort(); /* bug */
449 }
450 ignoring[depth] = ignoring[depth-1];
451}
452static void
453ignoreon(void)
454{
455 ignoring[depth] = true;
456}
457static void
458keywordedit(const char *replacement)
459{
460 size_t size = tline + sizeof(tline) - keyword;
461 char *dst = keyword;
462 const char *src = replacement;
463 if (size != 0) {
464 while ((--size != 0) && (*src != '\0'))
465 *dst++ = *src++;
466 *dst = '\0';
467 }
468 print();
469}
470static void
471nest(void)
472{
473 depth += 1;
474 if (depth >= MAXDEPTH)
475 error("Too many levels of nesting");
476 stifline[depth] = linenum;
477}
478static void
479unnest(void)
480{
481 if (depth == 0) {
482 debug("bug at line %d", __LINE__);
483 abort(); /* bug */
484 }
485 depth -= 1;
486}
487static void
488state(Ifstate is)
489{
490 ifstate[depth] = is;
491}
492
493/*
494 * Write a line to the output or not, according to command line options.
495 */
496static void
497flushline(bool keep)
498{
499 if (symlist)
500 return;
501 if (keep ^ complement) {
502 if (lnnum && delcount > 0)
503 printf("#line %d\n", linenum);
504 fputs(tline, stdout);
505 delcount = 0;
506 } else {
507 if (lnblank)
508 putc('\n', stdout);
509 exitstat = 1;
510 delcount += 1;
511 }
512}
513
514/*
515 * The driver for the state machine.
516 */
517static void
518process(void)
519{
520 Linetype lineval;
521
522 for (;;) {
523 linenum++;
524 lineval = get_line();
525 trans_table[ifstate[depth]][lineval]();
526 debug("process %s -> %s depth %d",
527 linetype_name[lineval],
528 ifstate_name[ifstate[depth]], depth);
529 }
530}
531
532/*
533 * Parse a line and determine its type. We keep the preprocessor line
534 * parser state between calls in the global variable linestate, with
535 * help from skipcomment().
536 */
537static Linetype
538get_line(void)
539{
540 const char *cp;
541 int cursym;
542 int kwlen;
543 Linetype retval;
544 Comment_state wascomment;
545
546 if (fgets(tline, MAXLINE, input) == NULL)
547 return (LT_EOF);
548 retval = LT_PLAIN;
549 wascomment = incomment;
550 cp = skipcomment(tline);
551 if (linestate == LS_START) {
552 if (*cp == '#') {
553 linestate = LS_HASH;
554 cp = skipcomment(cp + 1);
555 } else if (*cp != '\0')
556 linestate = LS_DIRTY;
557 }
558 if (!incomment && linestate == LS_HASH) {
559 keyword = tline + (cp - tline);
560 cp = skipsym(cp);
561 kwlen = cp - keyword;
562 /* no way can we deal with a continuation inside a keyword */
563 if (strncmp(cp, "\\\n", 2) == 0)
564 Eioccc();
565 if (strlcmp("ifdef", keyword, kwlen) == 0 ||
566 strlcmp("ifndef", keyword, kwlen) == 0) {
567 cp = skipcomment(cp);
568 if ((cursym = findsym(cp)) < 0)
569 retval = LT_IF;
570 else {
571 retval = (keyword[2] == 'n')
572 ? LT_FALSE : LT_TRUE;
573 if (value[cursym] == NULL)
574 retval = (retval == LT_TRUE)
575 ? LT_FALSE : LT_TRUE;
576 if (ignore[cursym])
577 retval = (retval == LT_TRUE)
578 ? LT_TRUEI : LT_FALSEI;
579 }
580 cp = skipsym(cp);
581 } else if (strlcmp("if", keyword, kwlen) == 0)
582 retval = ifeval(&cp);
583 else if (strlcmp("elif", keyword, kwlen) == 0)
584 retval = ifeval(&cp) - LT_IF + LT_ELIF;
585 else if (strlcmp("else", keyword, kwlen) == 0)
586 retval = LT_ELSE;
587 else if (strlcmp("endif", keyword, kwlen) == 0)
588 retval = LT_ENDIF;
589 else {
590 linestate = LS_DIRTY;
591 retval = LT_PLAIN;
592 }
593 cp = skipcomment(cp);
594 if (*cp != '\0') {
595 linestate = LS_DIRTY;
596 if (retval == LT_TRUE || retval == LT_FALSE ||
597 retval == LT_TRUEI || retval == LT_FALSEI)
598 retval = LT_IF;
599 if (retval == LT_ELTRUE || retval == LT_ELFALSE)
600 retval = LT_ELIF;
601 }
602 if (retval != LT_PLAIN && (wascomment || incomment)) {
603 retval += LT_DODGY;
604 if (incomment)
605 linestate = LS_DIRTY;
606 }
607 /* skipcomment should have changed the state */
608// Hmm hppens sometimes on valid files
609// if (linestate == LS_HASH) {
610// debug("bug at line %d", __LINE__);
611// abort(); /* bug */
612// }
613 }
614 if (linestate == LS_DIRTY) {
615 while (*cp != '\0')
616 cp = skipcomment(cp + 1);
617 }
618 debug("parser %s comment %s line",
619 comment_name[incomment], linestate_name[linestate]);
620 return (retval);
621}
622
623/*
624 * These are the binary operators that are supported by the expression
625 * evaluator. Note that if support for division is added then we also
626 * need short-circuiting booleans because of divide-by-zero.
627 */
628static int op_lt(int a, int b) { return (a < b); }
629static int op_gt(int a, int b) { return (a > b); }
630static int op_le(int a, int b) { return (a <= b); }
631static int op_ge(int a, int b) { return (a >= b); }
632static int op_eq(int a, int b) { return (a == b); }
633static int op_ne(int a, int b) { return (a != b); }
634static int op_or(int a, int b) { return (a || b); }
635static int op_and(int a, int b) { return (a && b); }
636
637/*
638 * An evaluation function takes three arguments, as follows: (1) a pointer to
639 * an element of the precedence table which lists the operators at the current
640 * level of precedence; (2) a pointer to an integer which will receive the
641 * value of the expression; and (3) a pointer to a char* that points to the
642 * expression to be evaluated and that is updated to the end of the expression
643 * when evaluation is complete. The function returns LT_FALSE if the value of
644 * the expression is zero, LT_TRUE if it is non-zero, or LT_IF if the
645 * expression could not be evaluated.
646 */
647struct ops;
648
649typedef Linetype eval_fn(const struct ops *, int *, const char **);
650
651static eval_fn eval_table, eval_unary;
652
653/*
654 * The precedence table. Expressions involving binary operators are evaluated
655 * in a table-driven way by eval_table. When it evaluates a subexpression it
656 * calls the inner function with its first argument pointing to the next
657 * element of the table. Innermost expressions have special non-table-driven
658 * handling.
659 */
660static const struct ops {
661 eval_fn *inner;
662 struct op {
663 const char *str;
664 int short_circuit_val;
665 int (*fn)(int, int);
666 } op[5];
667} eval_ops[] = {
668 { eval_table, { { "||", 1, op_or } } },
669 { eval_table, { { "&&", 0, op_and } } },
670 { eval_table, { { "==", -1, op_eq },
671 { "!=", -1, op_ne } } },
672 { eval_unary, { { "<=", -1, op_le },
673 { ">=", -1, op_ge },
674 { "<", -1, op_lt },
675 { ">", -1, op_gt } } }
676};
677
678/*
679 * Function for evaluating the innermost parts of expressions, viz.
680 * "!expr", "(expr)", "defined(symbol)", "defined symbol", "symbol", "number".
681 * We reset the keepthis flag when we find a non-constant subexpression.
682 */
683// TODO: we use LT_IF both as "I don't know whether it's false or true"
684// (example: "#if defined FOO") and when we see syntax error
685// (example: "#if (1 || 2" - no closing paren!), but this is wrong.
686// Binary && and || need to distinguish these cases in order to handle this:
687// "#if defined KNOWN_UNDEFINED && FOO" - discard
688// "#if defined KNOWN_UNDEFINED && (syntax_error_here" - do not discard!
689static Linetype
690eval_unary(const struct ops *ops, int *valp, const char **cpp)
691{
692 const char *cp;
693 char *ep;
694 int sym;
695
696 cp = skipcomment(*cpp);
697 if (*cp == '!') {
698 debug("eval%d !", ops - eval_ops);
699 cp++;
700 if (eval_unary(ops, valp, &cp) == LT_IF) {
701 *cpp = cp;
702 return (LT_IF);
703 }
704 *valp = !*valp;
705
706 } else if (*cp == '(') {
707 Linetype expr_res;
708
709 cp++;
710 debug("eval%d (%s", ops - eval_ops, cp);
711 expr_res = eval_table(eval_ops, valp, &cp);
712 cp = skipcomment(cp);
713 *cpp = cp;
714 if (*cp++ != ')')
715 return (LT_IF);
716 *cpp = cp;
717 if (expr_res == LT_IF)
718 return (LT_IF);
719
720 } else if (isdigit((unsigned char)*cp)) {
721 debug("eval%d number", ops - eval_ops);
722 *valp = strtol(cp, &ep, 0);
723 cp = skipsym(cp);
724
725 } else if (strncmp(cp, "defined", 7) == 0 && endsym(cp[7])) {
726 bool parens;
727
728 cp = skipcomment(cp+7);
729 debug("eval%d defined '%s'", ops - eval_ops, cp);
730 parens = (*cp == '(');
731 if (parens)
732 cp = skipcomment(cp+1);
733 sym = findsym(cp);
734 cp = skipsym(cp);
735 cp = skipcomment(cp);
736 if (parens) {
737 if (*cp != ')')
738 return (LT_IF);
739 cp = skipcomment(cp+1);
740 }
741 *cpp = cp;
742 if (sym < 0) {
743 debug("sym not found, returning LT_IF");
744 return (LT_IF);
745 }
746 *valp = (value[sym] != NULL);
747 keepthis = false;
748
749 } else if (!endsym(*cp)) {
750 debug("eval%d symbol", ops - eval_ops);
751 sym = findsym(cp);
752 cp = skipsym(cp);
753 *cpp = cp;
754 if (sym < 0)
755 return (LT_IF);
756 if (value[sym] == NULL)
757 *valp = 0;
758 else {
759 *valp = strtol(value[sym], &ep, 0);
760 if (*ep != '\0' || ep == value[sym])
761 return (LT_IF);
762 }
763 keepthis = false;
764
765 } else {
766 debug("eval%d bad expr", ops - eval_ops);
767 return (LT_IF);
768 }
769
770 *cpp = cp;
771 debug("eval%d = %d", ops - eval_ops, *valp);
772 return (*valp ? LT_TRUE : LT_FALSE);
773}
774
775/*
776 * Table-driven evaluation of binary operators.
777 */
778static Linetype
779eval_table(const struct ops *ops, int *valp, const char **cpp)
780{
781 Linetype left_side;
782 const struct op *op;
783 const char *cp;
784 int val;
785
786 debug("eval%d '%s'", ops - eval_ops, *cpp);
787 left_side = ops->inner(ops+1, valp, cpp);
788 cp = *cpp;
789
790 for (;;) {
791 Linetype right_side;
792
793 cp = skipcomment(cp);
794 for (op = ops->op; op->str != NULL; op++)
795 if (strncmp(cp, op->str, strlen(op->str)) == 0)
796 break;
797 if (op->str == NULL)
798 break;
799 cp += strlen(op->str);
800 debug("eval%d '%s'", ops - eval_ops, op->str);
801 right_side = ops->inner(ops+1, &val, &cp);
802 *cpp = cp;
803
804 /* If short_circuit_val is 0 or 1, we can ignore
805 * right side if left size is known, and its value
806 * (i.e., *valp) is 0 or !0, respectively */
807 if (left_side != LT_IF && op->short_circuit_val == !!*valp) {
808 debug("op->short_circuit_val:%d *valp:%d cp:'%s'",
809 op->short_circuit_val, *valp, cp);
810 *valp = !!*valp;
811 break;
812 }
813 /* Same for the right side */
814 if (right_side != LT_IF && op->short_circuit_val == !!val) {
815 debug("op->short_circuit_val:%d val:%d cp:'%s'",
816 op->short_circuit_val, val, cp);
817 left_side = right_side;
818 *valp = !!val;
819 break;
820 }
821
822 if (left_side == LT_IF || right_side == LT_IF)
823 return (LT_IF);
824 *valp = op->fn(*valp, val);
825 left_side = right_side;
826 }
827
828 debug("eval%d = %d LT_IF:%d", ops - eval_ops, *valp, (left_side == LT_IF));
829 if (left_side == LT_IF)
830 return (LT_IF);
831 return (*valp ? LT_TRUE : LT_FALSE);
832}
833
834/*
835 * Evaluate the expression on a #if or #elif line. If we can work out
836 * the result we return LT_TRUE or LT_FALSE accordingly, otherwise we
837 * return just a generic LT_IF.
838 */
839static Linetype
840ifeval(const char **cpp)
841{
842 int ret;
843 int val;
844
845 debug("eval %s", *cpp);
846 keepthis = killconsts ? false : true;
847 ret = eval_table(eval_ops, &val, cpp);
848 debug("val:%d ret:%d keepthis:%d", val, ret, keepthis);
849 return (keepthis ? LT_IF : ret);
850}
851
852/*
853 * Skip over comments, strings, and character literals and stop at the
854 * next character position that is not whitespace. Between calls we keep
855 * the comment state in the global variable incomment, and we also adjust
856 * the global variable linestate when we see a newline.
857 * XXX: doesn't cope with the buffer splitting inside a state transition.
858 */
859static const char *
860skipcomment(const char *cp)
861{
862 if (text || ignoring[depth]) {
863 for (; isspace((unsigned char)*cp); cp++)
864 if (*cp == '\n')
865 linestate = LS_START;
866 return (cp);
867 }
868 while (*cp != '\0')
869 /* don't reset to LS_START after a line continuation */
870 if (strncmp(cp, "\\\n", 2) == 0)
871 cp += 2;
872 else switch (incomment) {
873 case NO_COMMENT:
874 if (strncmp(cp, "/\\\n", 3) == 0) {
875 incomment = STARTING_COMMENT;
876 cp += 3;
877 } else if (strncmp(cp, "/*", 2) == 0) {
878 incomment = C_COMMENT;
879 cp += 2;
880 } else if (strncmp(cp, "//", 2) == 0) {
881 incomment = CXX_COMMENT;
882 cp += 2;
883 } else if (strncmp(cp, "\'", 1) == 0) {
884 incomment = CHAR_LITERAL;
885 linestate = LS_DIRTY;
886 cp += 1;
887 } else if (strncmp(cp, "\"", 1) == 0) {
888 incomment = STRING_LITERAL;
889 linestate = LS_DIRTY;
890 cp += 1;
891 } else if (strncmp(cp, "\n", 1) == 0) {
892 linestate = LS_START;
893 cp += 1;
894 } else if (strchr(" \t", *cp) != NULL) {
895 cp += 1;
896 } else
897 return (cp);
898 continue;
899 case CXX_COMMENT:
900 if (strncmp(cp, "\n", 1) == 0) {
901 incomment = NO_COMMENT;
902 linestate = LS_START;
903 }
904 cp += 1;
905 continue;
906 case CHAR_LITERAL:
907 case STRING_LITERAL:
908 if ((incomment == CHAR_LITERAL && cp[0] == '\'') ||
909 (incomment == STRING_LITERAL && cp[0] == '\"')) {
910 incomment = NO_COMMENT;
911 cp += 1;
912 } else if (cp[0] == '\\') {
913 if (cp[1] == '\0')
914 cp += 1;
915 else
916 cp += 2;
917 } else if (strncmp(cp, "\n", 1) == 0) {
918 if (incomment == CHAR_LITERAL)
919 error("unterminated char literal");
920 else
921 error("unterminated string literal");
922 } else
923 cp += 1;
924 continue;
925 case C_COMMENT:
926 if (strncmp(cp, "*\\\n", 3) == 0) {
927 incomment = FINISHING_COMMENT;
928 cp += 3;
929 } else if (strncmp(cp, "*/", 2) == 0) {
930 incomment = NO_COMMENT;
931 cp += 2;
932 } else
933 cp += 1;
934 continue;
935 case STARTING_COMMENT:
936 if (*cp == '*') {
937 incomment = C_COMMENT;
938 cp += 1;
939 } else if (*cp == '/') {
940 incomment = CXX_COMMENT;
941 cp += 1;
942 } else {
943 incomment = NO_COMMENT;
944 linestate = LS_DIRTY;
945 }
946 continue;
947 case FINISHING_COMMENT:
948 if (*cp == '/') {
949 incomment = NO_COMMENT;
950 cp += 1;
951 } else
952 incomment = C_COMMENT;
953 continue;
954 default:
955 debug("bug at line %d", __LINE__);
956 abort(); /* bug */
957 }
958 return (cp);
959}
960
961/*
962 * Skip over an identifier.
963 */
964static const char *
965skipsym(const char *cp)
966{
967 while (!endsym(*cp))
968 ++cp;
969 return (cp);
970}
971
972/*
973 * Look for the symbol in the symbol table. If is is found, we return
974 * the symbol table index, else we return -1.
975 */
976static int
977findsym(const char *str)
978{
979 const char *cp;
980 int symind;
981
982 cp = skipsym(str);
983 if (cp == str)
984 return (-1);
985 if (symlist) {
986 printf("%.*s\n", (int)(cp-str), str);
987 /* we don't care about the value of the symbol */
988 return (0);
989 }
990 for (symind = 0; symind < nsyms; ++symind) {
991 if (strlcmp(symname[symind], str, cp-str) == 0) {
992 debug("findsym %s %s", symname[symind],
993 value[symind] ? value[symind] : "");
994 return (symind);
995 }
996 }
997 return (-1);
998}
999
1000/*
1001 * Add a symbol to the symbol table.
1002 */
1003static void
1004addsym(bool ignorethis, bool definethis, char *sym)
1005{
1006 int symind;
1007 char *val;
1008
1009 symind = findsym(sym);
1010 if (symind < 0) {
1011 if (nsyms >= MAXSYMS)
1012 errx(2, "too many symbols");
1013 symind = nsyms++;
1014 }
1015 symname[symind] = sym;
1016 ignore[symind] = ignorethis;
1017 val = sym + (skipsym(sym) - sym);
1018 if (definethis) {
1019 if (*val == '=') {
1020 value[symind] = val+1;
1021 *val = '\0';
1022 } else if (*val == '\0')
1023 value[symind] = "";
1024 else
1025 usage();
1026 } else {
1027 if (*val != '\0')
1028 usage();
1029 value[symind] = NULL;
1030 }
1031}
1032
1033/*
1034 * Compare s with n characters of t.
1035 * The same as strncmp() except that it checks that s[n] == '\0'.
1036 */
1037static int
1038strlcmp(const char *s, const char *t, size_t n)
1039{
1040 while (n-- && *t != '\0')
1041 if (*s != *t)
1042 return ((unsigned char)*s - (unsigned char)*t);
1043 else
1044 ++s, ++t;
1045 return ((unsigned char)*s);
1046}
1047
1048/*
1049 * Diagnostics.
1050 */
1051static void
1052debug(const char *msg, ...)
1053{
1054 va_list ap;
1055
1056 if (debugging) {
1057 va_start(ap, msg);
1058 vwarnx(msg, ap);
1059 va_end(ap);
1060 }
1061}
1062
1063static void
1064error(const char *msg)
1065{
1066 if (depth == 0)
1067 warnx("%s: %d: %s", filename, linenum, msg);
1068 else
1069 warnx("%s: %d: %s (#if line %d depth %d)",
1070 filename, linenum, msg, stifline[depth], depth);
1071 errx(2, "output may be truncated");
1072}