| xf.li | bdd93d5 | 2023-05-12 07:10:14 -0700 | [diff] [blame] | 1 | #!/usr/bin/perl | 
|  | 2 |  | 
|  | 3 | # Check that use of symbols declared in a given header does not result | 
|  | 4 | # in any symbols being brought in that are not reserved with external | 
|  | 5 | # linkage for the given standard. | 
|  | 6 |  | 
|  | 7 | # Copyright (C) 2014-2016 Free Software Foundation, Inc. | 
|  | 8 | # This file is part of the GNU C Library. | 
|  | 9 |  | 
|  | 10 | # The GNU C Library is free software; you can redistribute it and/or | 
|  | 11 | # modify it under the terms of the GNU Lesser General Public | 
|  | 12 | # License as published by the Free Software Foundation; either | 
|  | 13 | # version 2.1 of the License, or (at your option) any later version. | 
|  | 14 |  | 
|  | 15 | # The GNU C Library is distributed in the hope that it will be useful, | 
|  | 16 | # but WITHOUT ANY WARRANTY; without even the implied warranty of | 
|  | 17 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU | 
|  | 18 | # Lesser General Public License for more details. | 
|  | 19 |  | 
|  | 20 | # You should have received a copy of the GNU Lesser General Public | 
|  | 21 | # License along with the GNU C Library; if not, see | 
|  | 22 | # <http://www.gnu.org/licenses/>. | 
|  | 23 |  | 
|  | 24 | use GlibcConform; | 
|  | 25 | use Getopt::Long; | 
|  | 26 |  | 
|  | 27 | GetOptions ('header=s' => \$header, 'standard=s' => \$standard, | 
|  | 28 | 'flags=s' => \$flags, 'cc=s' => \$CC, 'tmpdir=s' => \$tmpdir, | 
|  | 29 | 'stdsyms=s' => \$stdsyms_file, 'libsyms=s' => \$libsyms_file, | 
|  | 30 | 'readelf=s' => \$READELF); | 
|  | 31 |  | 
|  | 32 | # Load the list of symbols that are OK. | 
|  | 33 | %stdsyms = (); | 
|  | 34 | open (STDSYMS, "<$stdsyms_file") || die ("open $stdsyms_file: $!\n"); | 
|  | 35 | while (<STDSYMS>) { | 
|  | 36 | chomp; | 
|  | 37 | $stdsyms{$_} = 1; | 
|  | 38 | } | 
|  | 39 | close (STDSYMS) || die ("close $stdsyms_file: $!\n"); | 
|  | 40 |  | 
|  | 41 | # The following whitelisted symbols are also allowed for now. | 
|  | 42 | # | 
|  | 43 | # * Bug 17576: stdin, stdout, stderr only reserved with external | 
|  | 44 | # linkage when stdio.h included (and possibly not then), not | 
|  | 45 | # generally. | 
|  | 46 | # | 
|  | 47 | # * Bug 18442: re_syntax_options wrongly brought in by regcomp and | 
|  | 48 | # used by re_comp. | 
|  | 49 | # | 
|  | 50 | # * False positive: matherr only used conditionally.  matherrf/matherrl are used | 
|  | 51 | # by IA64 too for the same reason. | 
|  | 52 | # | 
|  | 53 | @whitelist = qw(stdin stdout stderr re_syntax_options matherr matherrf | 
|  | 54 | matherrl); | 
|  | 55 | foreach my $sym (@whitelist) { | 
|  | 56 | $stdsyms{$sym} = 1; | 
|  | 57 | } | 
|  | 58 |  | 
|  | 59 | # Return information about GLOBAL and WEAK symbols listed in readelf | 
|  | 60 | # -s output. | 
|  | 61 | sub list_syms { | 
|  | 62 | my ($syms_file) = @_; | 
|  | 63 | open (SYMS, "<$syms_file") || die ("open $syms_file: $!\n"); | 
|  | 64 | my ($file) = $syms_file; | 
|  | 65 | my (@ret) = (); | 
|  | 66 | while (<SYMS>) { | 
|  | 67 | chomp; | 
|  | 68 | if (/^File: (.*)/) { | 
|  | 69 | $file = $1; | 
|  | 70 | $file =~ s|^.*/||; | 
|  | 71 | next; | 
|  | 72 | } | 
|  | 73 | s/^\s*//; | 
|  | 74 | # Architecture-specific st_other bits appear inside [] and disrupt | 
|  | 75 | # the format of readelf output. | 
|  | 76 | s/\[.*?\]//; | 
|  | 77 | my (@fields) = split (/\s+/, $_); | 
|  | 78 | if (@fields < 8) { | 
|  | 79 | next; | 
|  | 80 | } | 
|  | 81 | my ($bind) = $fields[4]; | 
|  | 82 | my ($ndx) = $fields[6]; | 
|  | 83 | my ($sym) = $fields[7]; | 
|  | 84 | if ($bind ne "GLOBAL" && $bind ne "WEAK") { | 
|  | 85 | next; | 
|  | 86 | } | 
|  | 87 | if ($sym !~ /^\w+$/) { | 
|  | 88 | next; | 
|  | 89 | } | 
|  | 90 | push (@ret, [$file, $sym, $bind, $ndx ne "UND"]); | 
|  | 91 | } | 
|  | 92 | close (SYMS) || die ("close $syms_file: $!\n"); | 
|  | 93 | return @ret; | 
|  | 94 | } | 
|  | 95 |  | 
|  | 96 | # Load information about GLOBAL and WEAK symbols defined or used in | 
|  | 97 | # the standard libraries. | 
|  | 98 | # Symbols from a given object, except for weak defined symbols. | 
|  | 99 | %seen_syms = (); | 
|  | 100 | # Strong undefined symbols from a given object. | 
|  | 101 | %strong_undef_syms = (); | 
|  | 102 | # Objects defining a given symbol (strongly or weakly). | 
|  | 103 | %sym_objs = (); | 
|  | 104 | @sym_data = list_syms ($libsyms_file); | 
|  | 105 | foreach my $sym (@sym_data) { | 
|  | 106 | my ($file, $name, $bind, $defined) = @$sym; | 
|  | 107 | if ($defined) { | 
|  | 108 | if (!defined ($sym_objs{$name})) { | 
|  | 109 | $sym_objs{$name} = []; | 
|  | 110 | } | 
|  | 111 | push (@{$sym_objs{$name}}, $file); | 
|  | 112 | } | 
|  | 113 | if ($bind eq "GLOBAL" || !$defined) { | 
|  | 114 | if (!defined ($seen_syms{$file})) { | 
|  | 115 | $seen_syms{$file} = []; | 
|  | 116 | } | 
|  | 117 | push (@{$seen_syms{$file}}, $name); | 
|  | 118 | } | 
|  | 119 | if ($bind eq "GLOBAL" && !$defined) { | 
|  | 120 | if (!defined ($strong_undef_syms{$file})) { | 
|  | 121 | $strong_undef_syms{$file} = []; | 
|  | 122 | } | 
|  | 123 | push (@{$strong_undef_syms{$file}}, $name); | 
|  | 124 | } | 
|  | 125 | } | 
|  | 126 |  | 
|  | 127 | # Determine what ELF-level symbols are brought in by use of C-level | 
|  | 128 | # symbols declared in the given header. | 
|  | 129 | # | 
|  | 130 | # The rules followed are heuristic and so may produce false positives | 
|  | 131 | # and false negatives. | 
|  | 132 | # | 
|  | 133 | # * All undefined symbols are considered of signficance, but it is | 
|  | 134 | # possible that (a) any standard library definition is weak, so can be | 
|  | 135 | # overridden by the user's definition, and (b) the symbol is only used | 
|  | 136 | # conditionally and not if the program is limited to standard | 
|  | 137 | # functionality.  (matherr is an example of such a false positive.) | 
|  | 138 | # | 
|  | 139 | # * If a symbol reference is only brought in by the user using a data | 
|  | 140 | # symbol rather than a function from the standard library, this will | 
|  | 141 | # not be detected. | 
|  | 142 | # | 
|  | 143 | # * If a symbol reference is only brought in by crt*.o or libgcc, this | 
|  | 144 | # will not be detected. | 
|  | 145 | # | 
|  | 146 | # * If a symbol reference is only brought in through __builtin_foo in | 
|  | 147 | # a standard macro being compiled to call foo, this will not be | 
|  | 148 | # detected. | 
|  | 149 | # | 
|  | 150 | # * Header inclusions should be compiled several times with different | 
|  | 151 | # options such as -O2, -D_FORTIFY_SOURCE and -D_FILE_OFFSET_BITS=64 to | 
|  | 152 | # find out what symbols are undefined from such a compilation; this is | 
|  | 153 | # not yet implemented. | 
|  | 154 | # | 
|  | 155 | # * This script finds symbols referenced through use of macros on the | 
|  | 156 | # basis that if a macro calls an internal function, that function must | 
|  | 157 | # also be declared in the header.  However, the header might also | 
|  | 158 | # declare implementation-namespace functions that are not called by | 
|  | 159 | # any standard macro in the header, resulting in false positives for | 
|  | 160 | # any symbols brought in only through use of those | 
|  | 161 | # implementation-namespace functions. | 
|  | 162 | # | 
|  | 163 | # * Namespace issues can apply for dynamic linking as well as static | 
|  | 164 | # linking, when a call is from one shared library to another or uses a | 
|  | 165 | # PLT entry for a call within a shared library; such issues are only | 
|  | 166 | # detected by this script if the same namespace issue applies for | 
|  | 167 | # static linking. | 
|  | 168 |  | 
|  | 169 | @c_syms = list_exported_functions ("$CC $flags", $standard, $header, $tmpdir); | 
|  | 170 | $cincfile = "$tmpdir/undef-$$.c"; | 
|  | 171 | $cincfile_o = "$tmpdir/undef-$$.o"; | 
|  | 172 | $cincfile_sym = "$tmpdir/undef-$$.sym"; | 
|  | 173 | open (CINCFILE, ">$cincfile") || die ("open $cincfile: $!\n"); | 
|  | 174 | print CINCFILE "#include <$header>\n"; | 
|  | 175 | foreach my $sym (sort @c_syms) { | 
|  | 176 | print CINCFILE "void *__glibc_test_$sym = (void *) &$sym;\n"; | 
|  | 177 | } | 
|  | 178 | close CINCFILE || die ("close $cincfile: $!\n"); | 
|  | 179 | system ("$CC $flags -D_ISOMAC $CFLAGS{$standard} -c $cincfile -o $cincfile_o") | 
|  | 180 | && die ("compiling failed\n"); | 
|  | 181 | system ("LC_ALL=C $READELF -W -s $cincfile_o > $cincfile_sym") | 
|  | 182 | && die ("readelf failed\n"); | 
|  | 183 | @elf_syms = list_syms ($cincfile_sym); | 
|  | 184 | unlink ($cincfile) || die ("unlink $cincfile: $!\n"); | 
|  | 185 | unlink ($cincfile_o) || die ("unlink $cincfile_o: $!\n"); | 
|  | 186 | unlink ($cincfile_sym) || die ("unlink $cincfile_sym: $!\n"); | 
|  | 187 |  | 
|  | 188 | %seen_where = (); | 
|  | 189 | %files_seen = (); | 
|  | 190 | %all_undef = (); | 
|  | 191 | %current_undef = (); | 
|  | 192 | foreach my $sym (@elf_syms) { | 
|  | 193 | my ($file, $name, $bind, $defined) = @$sym; | 
|  | 194 | if ($bind eq "GLOBAL" && !$defined) { | 
|  | 195 | $seen_where{$name} = "[initial] $name"; | 
|  | 196 | $all_undef{$name} = "[initial] $name"; | 
|  | 197 | $current_undef{$name} = "[initial] $name"; | 
|  | 198 | } | 
|  | 199 | } | 
|  | 200 |  | 
|  | 201 | while (%current_undef) { | 
|  | 202 | %new_undef = (); | 
|  | 203 | foreach my $sym (sort keys %current_undef) { | 
|  | 204 | foreach my $file (@{$sym_objs{$sym}}) { | 
|  | 205 | if (defined ($files_seen{$file})) { | 
|  | 206 | next; | 
|  | 207 | } | 
|  | 208 | $files_seen{$file} = 1; | 
|  | 209 | foreach my $ssym (@{$seen_syms{$file}}) { | 
|  | 210 | if (!defined ($seen_where{$ssym})) { | 
|  | 211 | $seen_where{$ssym} = "$current_undef{$sym} -> [$file] $ssym"; | 
|  | 212 | } | 
|  | 213 | } | 
|  | 214 | foreach my $usym (@{$strong_undef_syms{$file}}) { | 
|  | 215 | if (!defined ($all_undef{$usym})) { | 
|  | 216 | $all_undef{$usym} = "$current_undef{$sym} -> [$file] $usym"; | 
|  | 217 | $new_undef{$usym} = "$current_undef{$sym} -> [$file] $usym"; | 
|  | 218 | } | 
|  | 219 | } | 
|  | 220 | } | 
|  | 221 | } | 
|  | 222 | %current_undef = %new_undef; | 
|  | 223 | } | 
|  | 224 |  | 
|  | 225 | $ret = 0; | 
|  | 226 | foreach my $sym (sort keys %seen_where) { | 
|  | 227 | if ($sym =~ /^_/) { | 
|  | 228 | next; | 
|  | 229 | } | 
|  | 230 | if (defined ($stdsyms{$sym})) { | 
|  | 231 | next; | 
|  | 232 | } | 
|  | 233 | print "$seen_where{$sym}\n"; | 
|  | 234 | $ret = 1; | 
|  | 235 | } | 
|  | 236 |  | 
|  | 237 | exit $ret; |