blob: 15fa6134ab425dac167d147adce2f9e6ca71efc2 [file] [log] [blame]
lh9ed821d2023-04-07 01:36:19 -07001#! /usr/bin/perl
2
3# Check that use of symbols declared in a given header does not result
4# in any symbols being brought in that are not reserved with external
5# linkage for the given standard.
6
7# Copyright (C) 2014-2015 Free Software Foundation, Inc.
8# This file is part of the GNU C Library.
9
10# The GNU C Library is free software; you can redistribute it and/or
11# modify it under the terms of the GNU Lesser General Public
12# License as published by the Free Software Foundation; either
13# version 2.1 of the License, or (at your option) any later version.
14
15# The GNU C Library is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18# Lesser General Public License for more details.
19
20# You should have received a copy of the GNU Lesser General Public
21# License along with the GNU C Library; if not, see
22# <http://www.gnu.org/licenses/>.
23
24use GlibcConform;
25use Getopt::Long;
26
27GetOptions ('header=s' => \$header, 'standard=s' => \$standard,
28 'flags=s' => \$flags, 'cc=s' => \$CC, 'tmpdir=s' => \$tmpdir,
29 'stdsyms=s' => \$stdsyms_file, 'libsyms=s' => \$libsyms_file,
30 'readelf=s' => \$READELF);
31
32# Load the list of symbols that are OK.
33%stdsyms = ();
34open (STDSYMS, "<$stdsyms_file") || die ("open $stdsyms_file: $!\n");
35while (<STDSYMS>) {
36 chomp;
37 $stdsyms{$_} = 1;
38}
39close (STDSYMS) || die ("close $stdsyms_file: $!\n");
40
41# The following whitelisted symbols are also allowed for now.
42#
43# * Bug 15421: lgamma wrongly sets signgam for ISO C.
44#
45# * Bug 17576: stdin, stdout, stderr only reserved with external
46# linkage when stdio.h included (and possibly not then), not
47# generally.
48#
49# * Bug 18442: re_syntax_options wrongly brought in by regcomp and
50# used by re_comp.
51#
52# * False positive: matherr only used conditionally. matherrf/matherrl are used
53# by IA64 too for the same reason.
54#
55@whitelist = qw(signgam stdin stdout stderr re_syntax_options matherr matherrf
56 matherrl);
57foreach my $sym (@whitelist) {
58 $stdsyms{$sym} = 1;
59}
60
61# Return information about GLOBAL and WEAK symbols listed in readelf
62# -s output.
63sub list_syms {
64 my ($syms_file) = @_;
65 open (SYMS, "<$syms_file") || die ("open $syms_file: $!\n");
66 my ($file) = $syms_file;
67 my (@ret) = ();
68 while (<SYMS>) {
69 chomp;
70 if (/^File: (.*)/) {
71 $file = $1;
72 $file =~ s|^.*/||;
73 next;
74 }
75 s/^\s*//;
76 # Architecture-specific st_other bits appear inside [] and disrupt
77 # the format of readelf output.
78 s/\[.*?\]//;
79 my (@fields) = split (/\s+/, $_);
80 if (@fields < 8) {
81 next;
82 }
83 my ($bind) = $fields[4];
84 my ($ndx) = $fields[6];
85 my ($sym) = $fields[7];
86 if ($bind ne "GLOBAL" && $bind ne "WEAK") {
87 next;
88 }
89 if ($sym !~ /^\w+$/) {
90 next;
91 }
92 push (@ret, [$file, $sym, $bind, $ndx ne "UND"]);
93 }
94 close (SYMS) || die ("close $syms_file: $!\n");
95 return @ret;
96}
97
98# Load information about GLOBAL and WEAK symbols defined or used in
99# the standard libraries.
100# Strong symbols (defined or undefined) from a given object.
101%strong_syms = ();
102# Strong undefined symbols from a given object.
103%strong_undef_syms = ();
104# Objects defining a given symbol (strongly or weakly).
105%sym_objs = ();
106@sym_data = list_syms ($libsyms_file);
107foreach my $sym (@sym_data) {
108 my ($file, $name, $bind, $defined) = @$sym;
109 if ($defined) {
110 if (!defined ($sym_objs{$name})) {
111 $sym_objs{$name} = [];
112 }
113 push (@{$sym_objs{$name}}, $file);
114 }
115 if ($bind eq "GLOBAL") {
116 if (!defined ($strong_syms{$file})) {
117 $strong_syms{$file} = [];
118 }
119 push (@{$strong_syms{$file}}, $name);
120 if (!$defined) {
121 if (!defined ($strong_undef_syms{$file})) {
122 $strong_undef_syms{$file} = [];
123 }
124 push (@{$strong_undef_syms{$file}}, $name);
125 }
126 }
127}
128
129# Determine what ELF-level symbols are brought in by use of C-level
130# symbols declared in the given header.
131#
132# The rules followed are heuristic and so may produce false positives
133# and false negatives.
134#
135# * Weak undefined symbols are ignored; however, if a code path that
136# references one (even just to check if its address is 0) is executed,
137# that may conflict with a definition of that symbol in the user's
138# program.
139#
140# * Strong undefined symbols are considered of signficance, but it is
141# possible that (a) any standard library definition is weak, so can be
142# overridden by the user's definition, and (b) the symbol is only used
143# conditionally and not if the program is limited to standard
144# functionality. (matherr is an example of such a false positive.)
145#
146# * If a symbol reference is only brought in by the user using a data
147# symbol rather than a function from the standard library, this will
148# not be detected.
149#
150# * If a symbol reference is only brought in by crt*.o or libgcc, this
151# will not be detected.
152#
153# * If a symbol reference is only brought in through __builtin_foo in
154# a standard macro being compiled to call foo, this will not be
155# detected.
156#
157# * Header inclusions should be compiled several times with different
158# options such as -O2, -D_FORTIFY_SOURCE and -D_FILE_OFFSET_BITS=64 to
159# find out what symbols are undefined from such a compilation; this is
160# not yet implemented.
161#
162# * This script finds symbols referenced through use of macros on the
163# basis that if a macro calls an internal function, that function must
164# also be declared in the header. However, the header might also
165# declare implementation-namespace functions that are not called by
166# any standard macro in the header, resulting in false positives for
167# any symbols brought in only through use of those
168# implementation-namespace functions.
169#
170# * Namespace issues can apply for dynamic linking as well as static
171# linking, when a call is from one shared library to another or uses a
172# PLT entry for a call within a shared library; such issues are only
173# detected by this script if the same namespace issue applies for
174# static linking.
175
176@c_syms = list_exported_functions ("$CC $flags", $standard, $header, $tmpdir);
177$cincfile = "$tmpdir/undef-$$.c";
178$cincfile_o = "$tmpdir/undef-$$.o";
179$cincfile_sym = "$tmpdir/undef-$$.sym";
180open (CINCFILE, ">$cincfile") || die ("open $cincfile: $!\n");
181print CINCFILE "#include <$header>\n";
182foreach my $sym (sort @c_syms) {
183 print CINCFILE "void *__glibc_test_$sym = (void *) &$sym;\n";
184}
185close CINCFILE || die ("close $cincfile: $!\n");
186system ("$CC $flags -D_ISOMAC $CFLAGS{$standard} -c $cincfile -o $cincfile_o")
187 && die ("compiling failed\n");
188system ("LC_ALL=C $READELF -W -s $cincfile_o > $cincfile_sym")
189 && die ("readelf failed\n");
190@elf_syms = list_syms ($cincfile_sym);
191unlink ($cincfile) || die ("unlink $cincfile: $!\n");
192unlink ($cincfile_o) || die ("unlink $cincfile_o: $!\n");
193unlink ($cincfile_sym) || die ("unlink $cincfile_sym: $!\n");
194
195%strong_seen = ();
196%files_seen = ();
197%all_undef = ();
198%current_undef = ();
199foreach my $sym (@elf_syms) {
200 my ($file, $name, $bind, $defined) = @$sym;
201 if ($bind eq "GLOBAL" && !$defined) {
202 $strong_seen{$name} = "[initial] $name";
203 $all_undef{$name} = "[initial] $name";
204 $current_undef{$name} = "[initial] $name";
205 }
206}
207
208while (%current_undef) {
209 %new_undef = ();
210 foreach my $sym (sort keys %current_undef) {
211 foreach my $file (@{$sym_objs{$sym}}) {
212 if (defined ($files_seen{$file})) {
213 next;
214 }
215 $files_seen{$file} = 1;
216 foreach my $ssym (@{$strong_syms{$file}}) {
217 if (!defined ($strong_seen{$ssym})) {
218 $strong_seen{$ssym} = "$current_undef{$sym} -> [$file] $ssym";
219 }
220 }
221 foreach my $usym (@{$strong_undef_syms{$file}}) {
222 if (!defined ($all_undef{$usym})) {
223 $all_undef{$usym} = "$current_undef{$sym} -> [$file] $usym";
224 $new_undef{$usym} = "$current_undef{$sym} -> [$file] $usym";
225 }
226 }
227 }
228 }
229 %current_undef = %new_undef;
230}
231
232$ret = 0;
233foreach my $sym (sort keys %strong_seen) {
234 if ($sym =~ /^_/) {
235 next;
236 }
237 if (defined ($stdsyms{$sym})) {
238 next;
239 }
240 print "$strong_seen{$sym}\n";
241 $ret = 1;
242}
243
244exit $ret;