b.liu | e958203 | 2025-04-17 19:18:16 +0800 | [diff] [blame^] | 1 | #!/bin/sh |
| 2 | # SPDX-License-Identifier: GPL-2.0-only |
| 3 | |
| 4 | KSELFTESTS_SKIP=4 |
| 5 | |
| 6 | . ./eeh-functions.sh |
| 7 | |
| 8 | if ! eeh_supported ; then |
| 9 | echo "EEH not supported on this system, skipping" |
| 10 | exit $KSELFTESTS_SKIP; |
| 11 | fi |
| 12 | |
| 13 | if [ ! -e "/sys/kernel/debug/powerpc/eeh_dev_check" ] && \ |
| 14 | [ ! -e "/sys/kernel/debug/powerpc/eeh_dev_break" ] ; then |
| 15 | echo "debugfs EEH testing files are missing. Is debugfs mounted?" |
| 16 | exit $KSELFTESTS_SKIP; |
| 17 | fi |
| 18 | |
| 19 | pre_lspci=`mktemp` |
| 20 | lspci > $pre_lspci |
| 21 | |
| 22 | # Bump the max freeze count to something absurd so we don't |
| 23 | # trip over it while breaking things. |
| 24 | echo 5000 > /sys/kernel/debug/powerpc/eeh_max_freezes |
| 25 | |
| 26 | # record the devices that we break in here. Assuming everything |
| 27 | # goes to plan we should get them back once the recover process |
| 28 | # is finished. |
| 29 | devices="" |
| 30 | |
| 31 | # Build up a list of candidate devices. |
| 32 | for dev in `ls -1 /sys/bus/pci/devices/ | grep '\.0$'` ; do |
| 33 | # skip bridges since we can't recover them (yet...) |
| 34 | if [ -e "/sys/bus/pci/devices/$dev/pci_bus" ] ; then |
| 35 | echo "$dev, Skipped: bridge" |
| 36 | continue; |
| 37 | fi |
| 38 | |
| 39 | # Skip VFs for now since we don't have a reliable way |
| 40 | # to break them. |
| 41 | if [ -e "/sys/bus/pci/devices/$dev/physfn" ] ; then |
| 42 | echo "$dev, Skipped: virtfn" |
| 43 | continue; |
| 44 | fi |
| 45 | |
| 46 | # Don't inject errosr into an already-frozen PE. This happens with |
| 47 | # PEs that contain multiple PCI devices (e.g. multi-function cards) |
| 48 | # and injecting new errors during the recovery process will probably |
| 49 | # result in the recovery failing and the device being marked as |
| 50 | # failed. |
| 51 | if ! pe_ok $dev ; then |
| 52 | echo "$dev, Skipped: Bad initial PE state" |
| 53 | continue; |
| 54 | fi |
| 55 | |
| 56 | echo "$dev, Added" |
| 57 | |
| 58 | # Add to this list of device to check |
| 59 | devices="$devices $dev" |
| 60 | done |
| 61 | |
| 62 | dev_count="$(echo $devices | wc -w)" |
| 63 | echo "Found ${dev_count} breakable devices..." |
| 64 | |
| 65 | failed=0 |
| 66 | for dev in $devices ; do |
| 67 | echo "Breaking $dev..." |
| 68 | |
| 69 | if ! pe_ok $dev ; then |
| 70 | echo "Skipping $dev, Initial PE state is not ok" |
| 71 | failed="$((failed + 1))" |
| 72 | continue; |
| 73 | fi |
| 74 | |
| 75 | if ! eeh_one_dev $dev ; then |
| 76 | failed="$((failed + 1))" |
| 77 | fi |
| 78 | done |
| 79 | |
| 80 | echo "$failed devices failed to recover ($dev_count tested)" |
| 81 | lspci | diff -u $pre_lspci - |
| 82 | rm -f $pre_lspci |
| 83 | |
| 84 | test "$failed" -eq 0 |
| 85 | exit $? |