lh | 9ed821d | 2023-04-07 01:36:19 -0700 | [diff] [blame^] | 1 | #! /usr/bin/perl |
| 2 | |
| 3 | # Check that use of symbols declared in a given header does not result |
| 4 | # in any symbols being brought in that are not reserved with external |
| 5 | # linkage for the given standard. |
| 6 | |
| 7 | # Copyright (C) 2014-2015 Free Software Foundation, Inc. |
| 8 | # This file is part of the GNU C Library. |
| 9 | |
| 10 | # The GNU C Library is free software; you can redistribute it and/or |
| 11 | # modify it under the terms of the GNU Lesser General Public |
| 12 | # License as published by the Free Software Foundation; either |
| 13 | # version 2.1 of the License, or (at your option) any later version. |
| 14 | |
| 15 | # The GNU C Library is distributed in the hope that it will be useful, |
| 16 | # but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 17 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
| 18 | # Lesser General Public License for more details. |
| 19 | |
| 20 | # You should have received a copy of the GNU Lesser General Public |
| 21 | # License along with the GNU C Library; if not, see |
| 22 | # <http://www.gnu.org/licenses/>. |
| 23 | |
| 24 | use GlibcConform; |
| 25 | use Getopt::Long; |
| 26 | |
| 27 | GetOptions ('header=s' => \$header, 'standard=s' => \$standard, |
| 28 | 'flags=s' => \$flags, 'cc=s' => \$CC, 'tmpdir=s' => \$tmpdir, |
| 29 | 'stdsyms=s' => \$stdsyms_file, 'libsyms=s' => \$libsyms_file, |
| 30 | 'readelf=s' => \$READELF); |
| 31 | |
| 32 | # Load the list of symbols that are OK. |
| 33 | %stdsyms = (); |
| 34 | open (STDSYMS, "<$stdsyms_file") || die ("open $stdsyms_file: $!\n"); |
| 35 | while (<STDSYMS>) { |
| 36 | chomp; |
| 37 | $stdsyms{$_} = 1; |
| 38 | } |
| 39 | close (STDSYMS) || die ("close $stdsyms_file: $!\n"); |
| 40 | |
| 41 | # The following whitelisted symbols are also allowed for now. |
| 42 | # |
| 43 | # * Bug 15421: lgamma wrongly sets signgam for ISO C. |
| 44 | # |
| 45 | # * Bug 17576: stdin, stdout, stderr only reserved with external |
| 46 | # linkage when stdio.h included (and possibly not then), not |
| 47 | # generally. |
| 48 | # |
| 49 | # * Bug 18442: re_syntax_options wrongly brought in by regcomp and |
| 50 | # used by re_comp. |
| 51 | # |
| 52 | # * False positive: matherr only used conditionally. matherrf/matherrl are used |
| 53 | # by IA64 too for the same reason. |
| 54 | # |
| 55 | @whitelist = qw(signgam stdin stdout stderr re_syntax_options matherr matherrf |
| 56 | matherrl); |
| 57 | foreach my $sym (@whitelist) { |
| 58 | $stdsyms{$sym} = 1; |
| 59 | } |
| 60 | |
| 61 | # Return information about GLOBAL and WEAK symbols listed in readelf |
| 62 | # -s output. |
| 63 | sub list_syms { |
| 64 | my ($syms_file) = @_; |
| 65 | open (SYMS, "<$syms_file") || die ("open $syms_file: $!\n"); |
| 66 | my ($file) = $syms_file; |
| 67 | my (@ret) = (); |
| 68 | while (<SYMS>) { |
| 69 | chomp; |
| 70 | if (/^File: (.*)/) { |
| 71 | $file = $1; |
| 72 | $file =~ s|^.*/||; |
| 73 | next; |
| 74 | } |
| 75 | s/^\s*//; |
| 76 | # Architecture-specific st_other bits appear inside [] and disrupt |
| 77 | # the format of readelf output. |
| 78 | s/\[.*?\]//; |
| 79 | my (@fields) = split (/\s+/, $_); |
| 80 | if (@fields < 8) { |
| 81 | next; |
| 82 | } |
| 83 | my ($bind) = $fields[4]; |
| 84 | my ($ndx) = $fields[6]; |
| 85 | my ($sym) = $fields[7]; |
| 86 | if ($bind ne "GLOBAL" && $bind ne "WEAK") { |
| 87 | next; |
| 88 | } |
| 89 | if ($sym !~ /^\w+$/) { |
| 90 | next; |
| 91 | } |
| 92 | push (@ret, [$file, $sym, $bind, $ndx ne "UND"]); |
| 93 | } |
| 94 | close (SYMS) || die ("close $syms_file: $!\n"); |
| 95 | return @ret; |
| 96 | } |
| 97 | |
| 98 | # Load information about GLOBAL and WEAK symbols defined or used in |
| 99 | # the standard libraries. |
| 100 | # Strong symbols (defined or undefined) from a given object. |
| 101 | %strong_syms = (); |
| 102 | # Strong undefined symbols from a given object. |
| 103 | %strong_undef_syms = (); |
| 104 | # Objects defining a given symbol (strongly or weakly). |
| 105 | %sym_objs = (); |
| 106 | @sym_data = list_syms ($libsyms_file); |
| 107 | foreach my $sym (@sym_data) { |
| 108 | my ($file, $name, $bind, $defined) = @$sym; |
| 109 | if ($defined) { |
| 110 | if (!defined ($sym_objs{$name})) { |
| 111 | $sym_objs{$name} = []; |
| 112 | } |
| 113 | push (@{$sym_objs{$name}}, $file); |
| 114 | } |
| 115 | if ($bind eq "GLOBAL") { |
| 116 | if (!defined ($strong_syms{$file})) { |
| 117 | $strong_syms{$file} = []; |
| 118 | } |
| 119 | push (@{$strong_syms{$file}}, $name); |
| 120 | if (!$defined) { |
| 121 | if (!defined ($strong_undef_syms{$file})) { |
| 122 | $strong_undef_syms{$file} = []; |
| 123 | } |
| 124 | push (@{$strong_undef_syms{$file}}, $name); |
| 125 | } |
| 126 | } |
| 127 | } |
| 128 | |
| 129 | # Determine what ELF-level symbols are brought in by use of C-level |
| 130 | # symbols declared in the given header. |
| 131 | # |
| 132 | # The rules followed are heuristic and so may produce false positives |
| 133 | # and false negatives. |
| 134 | # |
| 135 | # * Weak undefined symbols are ignored; however, if a code path that |
| 136 | # references one (even just to check if its address is 0) is executed, |
| 137 | # that may conflict with a definition of that symbol in the user's |
| 138 | # program. |
| 139 | # |
| 140 | # * Strong undefined symbols are considered of signficance, but it is |
| 141 | # possible that (a) any standard library definition is weak, so can be |
| 142 | # overridden by the user's definition, and (b) the symbol is only used |
| 143 | # conditionally and not if the program is limited to standard |
| 144 | # functionality. (matherr is an example of such a false positive.) |
| 145 | # |
| 146 | # * If a symbol reference is only brought in by the user using a data |
| 147 | # symbol rather than a function from the standard library, this will |
| 148 | # not be detected. |
| 149 | # |
| 150 | # * If a symbol reference is only brought in by crt*.o or libgcc, this |
| 151 | # will not be detected. |
| 152 | # |
| 153 | # * If a symbol reference is only brought in through __builtin_foo in |
| 154 | # a standard macro being compiled to call foo, this will not be |
| 155 | # detected. |
| 156 | # |
| 157 | # * Header inclusions should be compiled several times with different |
| 158 | # options such as -O2, -D_FORTIFY_SOURCE and -D_FILE_OFFSET_BITS=64 to |
| 159 | # find out what symbols are undefined from such a compilation; this is |
| 160 | # not yet implemented. |
| 161 | # |
| 162 | # * This script finds symbols referenced through use of macros on the |
| 163 | # basis that if a macro calls an internal function, that function must |
| 164 | # also be declared in the header. However, the header might also |
| 165 | # declare implementation-namespace functions that are not called by |
| 166 | # any standard macro in the header, resulting in false positives for |
| 167 | # any symbols brought in only through use of those |
| 168 | # implementation-namespace functions. |
| 169 | # |
| 170 | # * Namespace issues can apply for dynamic linking as well as static |
| 171 | # linking, when a call is from one shared library to another or uses a |
| 172 | # PLT entry for a call within a shared library; such issues are only |
| 173 | # detected by this script if the same namespace issue applies for |
| 174 | # static linking. |
| 175 | |
| 176 | @c_syms = list_exported_functions ("$CC $flags", $standard, $header, $tmpdir); |
| 177 | $cincfile = "$tmpdir/undef-$$.c"; |
| 178 | $cincfile_o = "$tmpdir/undef-$$.o"; |
| 179 | $cincfile_sym = "$tmpdir/undef-$$.sym"; |
| 180 | open (CINCFILE, ">$cincfile") || die ("open $cincfile: $!\n"); |
| 181 | print CINCFILE "#include <$header>\n"; |
| 182 | foreach my $sym (sort @c_syms) { |
| 183 | print CINCFILE "void *__glibc_test_$sym = (void *) &$sym;\n"; |
| 184 | } |
| 185 | close CINCFILE || die ("close $cincfile: $!\n"); |
| 186 | system ("$CC $flags -D_ISOMAC $CFLAGS{$standard} -c $cincfile -o $cincfile_o") |
| 187 | && die ("compiling failed\n"); |
| 188 | system ("LC_ALL=C $READELF -W -s $cincfile_o > $cincfile_sym") |
| 189 | && die ("readelf failed\n"); |
| 190 | @elf_syms = list_syms ($cincfile_sym); |
| 191 | unlink ($cincfile) || die ("unlink $cincfile: $!\n"); |
| 192 | unlink ($cincfile_o) || die ("unlink $cincfile_o: $!\n"); |
| 193 | unlink ($cincfile_sym) || die ("unlink $cincfile_sym: $!\n"); |
| 194 | |
| 195 | %strong_seen = (); |
| 196 | %files_seen = (); |
| 197 | %all_undef = (); |
| 198 | %current_undef = (); |
| 199 | foreach my $sym (@elf_syms) { |
| 200 | my ($file, $name, $bind, $defined) = @$sym; |
| 201 | if ($bind eq "GLOBAL" && !$defined) { |
| 202 | $strong_seen{$name} = "[initial] $name"; |
| 203 | $all_undef{$name} = "[initial] $name"; |
| 204 | $current_undef{$name} = "[initial] $name"; |
| 205 | } |
| 206 | } |
| 207 | |
| 208 | while (%current_undef) { |
| 209 | %new_undef = (); |
| 210 | foreach my $sym (sort keys %current_undef) { |
| 211 | foreach my $file (@{$sym_objs{$sym}}) { |
| 212 | if (defined ($files_seen{$file})) { |
| 213 | next; |
| 214 | } |
| 215 | $files_seen{$file} = 1; |
| 216 | foreach my $ssym (@{$strong_syms{$file}}) { |
| 217 | if (!defined ($strong_seen{$ssym})) { |
| 218 | $strong_seen{$ssym} = "$current_undef{$sym} -> [$file] $ssym"; |
| 219 | } |
| 220 | } |
| 221 | foreach my $usym (@{$strong_undef_syms{$file}}) { |
| 222 | if (!defined ($all_undef{$usym})) { |
| 223 | $all_undef{$usym} = "$current_undef{$sym} -> [$file] $usym"; |
| 224 | $new_undef{$usym} = "$current_undef{$sym} -> [$file] $usym"; |
| 225 | } |
| 226 | } |
| 227 | } |
| 228 | } |
| 229 | %current_undef = %new_undef; |
| 230 | } |
| 231 | |
| 232 | $ret = 0; |
| 233 | foreach my $sym (sort keys %strong_seen) { |
| 234 | if ($sym =~ /^_/) { |
| 235 | next; |
| 236 | } |
| 237 | if (defined ($stdsyms{$sym})) { |
| 238 | next; |
| 239 | } |
| 240 | print "$strong_seen{$sym}\n"; |
| 241 | $ret = 1; |
| 242 | } |
| 243 | |
| 244 | exit $ret; |