blob: fa3f08ca5f6cec434a59b18466e6585d79d6b5ef [file] [log] [blame]
b.liue9582032025-04-17 19:18:16 +08001// SPDX-License-Identifier: GPL-2.0-or-later
2/*
3 * libata-eh.c - libata error handling
4 *
5 * Maintained by: Tejun Heo <tj@kernel.org>
6 * Please ALWAYS copy linux-ide@vger.kernel.org
7 * on emails.
8 *
9 * Copyright 2006 Tejun Heo <htejun@gmail.com>
10 *
11 * libata documentation is available via 'make {ps|pdf}docs',
12 * as Documentation/driver-api/libata.rst
13 *
14 * Hardware documentation available from http://www.t13.org/ and
15 * http://www.sata-io.org/
16 */
17
18#include <linux/kernel.h>
19#include <linux/blkdev.h>
20#include <linux/export.h>
21#include <linux/pci.h>
22#include <scsi/scsi.h>
23#include <scsi/scsi_host.h>
24#include <scsi/scsi_eh.h>
25#include <scsi/scsi_device.h>
26#include <scsi/scsi_cmnd.h>
27#include <scsi/scsi_dbg.h>
28#include "../scsi/scsi_transport_api.h"
29
30#include <linux/libata.h>
31
32#include <trace/events/libata.h>
33#include "libata.h"
34
35enum {
36 /* speed down verdicts */
37 ATA_EH_SPDN_NCQ_OFF = (1 << 0),
38 ATA_EH_SPDN_SPEED_DOWN = (1 << 1),
39 ATA_EH_SPDN_FALLBACK_TO_PIO = (1 << 2),
40 ATA_EH_SPDN_KEEP_ERRORS = (1 << 3),
41
42 /* error flags */
43 ATA_EFLAG_IS_IO = (1 << 0),
44 ATA_EFLAG_DUBIOUS_XFER = (1 << 1),
45 ATA_EFLAG_OLD_ER = (1 << 31),
46
47 /* error categories */
48 ATA_ECAT_NONE = 0,
49 ATA_ECAT_ATA_BUS = 1,
50 ATA_ECAT_TOUT_HSM = 2,
51 ATA_ECAT_UNK_DEV = 3,
52 ATA_ECAT_DUBIOUS_NONE = 4,
53 ATA_ECAT_DUBIOUS_ATA_BUS = 5,
54 ATA_ECAT_DUBIOUS_TOUT_HSM = 6,
55 ATA_ECAT_DUBIOUS_UNK_DEV = 7,
56 ATA_ECAT_NR = 8,
57
58 ATA_EH_CMD_DFL_TIMEOUT = 5000,
59
60 /* always put at least this amount of time between resets */
61 ATA_EH_RESET_COOL_DOWN = 5000,
62
63 /* Waiting in ->prereset can never be reliable. It's
64 * sometimes nice to wait there but it can't be depended upon;
65 * otherwise, we wouldn't be resetting. Just give it enough
66 * time for most drives to spin up.
67 */
68 ATA_EH_PRERESET_TIMEOUT = 10000,
69 ATA_EH_FASTDRAIN_INTERVAL = 3000,
70
71 ATA_EH_UA_TRIES = 5,
72
73 /* probe speed down parameters, see ata_eh_schedule_probe() */
74 ATA_EH_PROBE_TRIAL_INTERVAL = 60000, /* 1 min */
75 ATA_EH_PROBE_TRIALS = 2,
76};
77
78/* The following table determines how we sequence resets. Each entry
79 * represents timeout for that try. The first try can be soft or
80 * hardreset. All others are hardreset if available. In most cases
81 * the first reset w/ 10sec timeout should succeed. Following entries
82 * are mostly for error handling, hotplug and those outlier devices that
83 * take an exceptionally long time to recover from reset.
84 */
85static const unsigned long ata_eh_reset_timeouts[] = {
86 10000, /* most drives spin up by 10sec */
87 10000, /* > 99% working drives spin up before 20sec */
88 35000, /* give > 30 secs of idleness for outlier devices */
89 5000, /* and sweet one last chance */
90 ULONG_MAX, /* > 1 min has elapsed, give up */
91};
92
93static const unsigned long ata_eh_identify_timeouts[] = {
94 5000, /* covers > 99% of successes and not too boring on failures */
95 10000, /* combined time till here is enough even for media access */
96 30000, /* for true idiots */
97 ULONG_MAX,
98};
99
100static const unsigned long ata_eh_revalidate_timeouts[] = {
101 15000, /* Some drives are slow to read log pages when waking-up */
102 15000, /* combined time till here is enough even for media access */
103 ULONG_MAX,
104};
105
106static const unsigned long ata_eh_flush_timeouts[] = {
107 15000, /* be generous with flush */
108 15000, /* ditto */
109 30000, /* and even more generous */
110 ULONG_MAX,
111};
112
113static const unsigned long ata_eh_other_timeouts[] = {
114 5000, /* same rationale as identify timeout */
115 10000, /* ditto */
116 /* but no merciful 30sec for other commands, it just isn't worth it */
117 ULONG_MAX,
118};
119
120struct ata_eh_cmd_timeout_ent {
121 const u8 *commands;
122 const unsigned long *timeouts;
123};
124
125/* The following table determines timeouts to use for EH internal
126 * commands. Each table entry is a command class and matches the
127 * commands the entry applies to and the timeout table to use.
128 *
129 * On the retry after a command timed out, the next timeout value from
130 * the table is used. If the table doesn't contain further entries,
131 * the last value is used.
132 *
133 * ehc->cmd_timeout_idx keeps track of which timeout to use per
134 * command class, so if SET_FEATURES times out on the first try, the
135 * next try will use the second timeout value only for that class.
136 */
137#define CMDS(cmds...) (const u8 []){ cmds, 0 }
138static const struct ata_eh_cmd_timeout_ent
139ata_eh_cmd_timeout_table[ATA_EH_CMD_TIMEOUT_TABLE_SIZE] = {
140 { .commands = CMDS(ATA_CMD_ID_ATA, ATA_CMD_ID_ATAPI),
141 .timeouts = ata_eh_identify_timeouts, },
142 { .commands = CMDS(ATA_CMD_READ_LOG_EXT, ATA_CMD_READ_LOG_DMA_EXT),
143 .timeouts = ata_eh_revalidate_timeouts, },
144 { .commands = CMDS(ATA_CMD_READ_NATIVE_MAX, ATA_CMD_READ_NATIVE_MAX_EXT),
145 .timeouts = ata_eh_other_timeouts, },
146 { .commands = CMDS(ATA_CMD_SET_MAX, ATA_CMD_SET_MAX_EXT),
147 .timeouts = ata_eh_other_timeouts, },
148 { .commands = CMDS(ATA_CMD_SET_FEATURES),
149 .timeouts = ata_eh_other_timeouts, },
150 { .commands = CMDS(ATA_CMD_INIT_DEV_PARAMS),
151 .timeouts = ata_eh_other_timeouts, },
152 { .commands = CMDS(ATA_CMD_FLUSH, ATA_CMD_FLUSH_EXT),
153 .timeouts = ata_eh_flush_timeouts },
154};
155#undef CMDS
156
157static void __ata_port_freeze(struct ata_port *ap);
158#ifdef CONFIG_PM
159static void ata_eh_handle_port_suspend(struct ata_port *ap);
160static void ata_eh_handle_port_resume(struct ata_port *ap);
161#else /* CONFIG_PM */
162static void ata_eh_handle_port_suspend(struct ata_port *ap)
163{ }
164
165static void ata_eh_handle_port_resume(struct ata_port *ap)
166{ }
167#endif /* CONFIG_PM */
168
169static __printf(2, 0) void __ata_ehi_pushv_desc(struct ata_eh_info *ehi,
170 const char *fmt, va_list args)
171{
172 ehi->desc_len += vscnprintf(ehi->desc + ehi->desc_len,
173 ATA_EH_DESC_LEN - ehi->desc_len,
174 fmt, args);
175}
176
177/**
178 * __ata_ehi_push_desc - push error description without adding separator
179 * @ehi: target EHI
180 * @fmt: printf format string
181 *
182 * Format string according to @fmt and append it to @ehi->desc.
183 *
184 * LOCKING:
185 * spin_lock_irqsave(host lock)
186 */
187void __ata_ehi_push_desc(struct ata_eh_info *ehi, const char *fmt, ...)
188{
189 va_list args;
190
191 va_start(args, fmt);
192 __ata_ehi_pushv_desc(ehi, fmt, args);
193 va_end(args);
194}
195
196/**
197 * ata_ehi_push_desc - push error description with separator
198 * @ehi: target EHI
199 * @fmt: printf format string
200 *
201 * Format string according to @fmt and append it to @ehi->desc.
202 * If @ehi->desc is not empty, ", " is added in-between.
203 *
204 * LOCKING:
205 * spin_lock_irqsave(host lock)
206 */
207void ata_ehi_push_desc(struct ata_eh_info *ehi, const char *fmt, ...)
208{
209 va_list args;
210
211 if (ehi->desc_len)
212 __ata_ehi_push_desc(ehi, ", ");
213
214 va_start(args, fmt);
215 __ata_ehi_pushv_desc(ehi, fmt, args);
216 va_end(args);
217}
218
219/**
220 * ata_ehi_clear_desc - clean error description
221 * @ehi: target EHI
222 *
223 * Clear @ehi->desc.
224 *
225 * LOCKING:
226 * spin_lock_irqsave(host lock)
227 */
228void ata_ehi_clear_desc(struct ata_eh_info *ehi)
229{
230 ehi->desc[0] = '\0';
231 ehi->desc_len = 0;
232}
233
234/**
235 * ata_port_desc - append port description
236 * @ap: target ATA port
237 * @fmt: printf format string
238 *
239 * Format string according to @fmt and append it to port
240 * description. If port description is not empty, " " is added
241 * in-between. This function is to be used while initializing
242 * ata_host. The description is printed on host registration.
243 *
244 * LOCKING:
245 * None.
246 */
247void ata_port_desc(struct ata_port *ap, const char *fmt, ...)
248{
249 va_list args;
250
251 WARN_ON(!(ap->pflags & ATA_PFLAG_INITIALIZING));
252
253 if (ap->link.eh_info.desc_len)
254 __ata_ehi_push_desc(&ap->link.eh_info, " ");
255
256 va_start(args, fmt);
257 __ata_ehi_pushv_desc(&ap->link.eh_info, fmt, args);
258 va_end(args);
259}
260
261#ifdef CONFIG_PCI
262
263/**
264 * ata_port_pbar_desc - append PCI BAR description
265 * @ap: target ATA port
266 * @bar: target PCI BAR
267 * @offset: offset into PCI BAR
268 * @name: name of the area
269 *
270 * If @offset is negative, this function formats a string which
271 * contains the name, address, size and type of the BAR and
272 * appends it to the port description. If @offset is zero or
273 * positive, only name and offsetted address is appended.
274 *
275 * LOCKING:
276 * None.
277 */
278void ata_port_pbar_desc(struct ata_port *ap, int bar, ssize_t offset,
279 const char *name)
280{
281 struct pci_dev *pdev = to_pci_dev(ap->host->dev);
282 char *type = "";
283 unsigned long long start, len;
284
285 if (pci_resource_flags(pdev, bar) & IORESOURCE_MEM)
286 type = "m";
287 else if (pci_resource_flags(pdev, bar) & IORESOURCE_IO)
288 type = "i";
289
290 start = (unsigned long long)pci_resource_start(pdev, bar);
291 len = (unsigned long long)pci_resource_len(pdev, bar);
292
293 if (offset < 0)
294 ata_port_desc(ap, "%s %s%llu@0x%llx", name, type, len, start);
295 else
296 ata_port_desc(ap, "%s 0x%llx", name,
297 start + (unsigned long long)offset);
298}
299
300#endif /* CONFIG_PCI */
301
302static int ata_lookup_timeout_table(u8 cmd)
303{
304 int i;
305
306 for (i = 0; i < ATA_EH_CMD_TIMEOUT_TABLE_SIZE; i++) {
307 const u8 *cur;
308
309 for (cur = ata_eh_cmd_timeout_table[i].commands; *cur; cur++)
310 if (*cur == cmd)
311 return i;
312 }
313
314 return -1;
315}
316
317/**
318 * ata_internal_cmd_timeout - determine timeout for an internal command
319 * @dev: target device
320 * @cmd: internal command to be issued
321 *
322 * Determine timeout for internal command @cmd for @dev.
323 *
324 * LOCKING:
325 * EH context.
326 *
327 * RETURNS:
328 * Determined timeout.
329 */
330unsigned long ata_internal_cmd_timeout(struct ata_device *dev, u8 cmd)
331{
332 struct ata_eh_context *ehc = &dev->link->eh_context;
333 int ent = ata_lookup_timeout_table(cmd);
334 int idx;
335
336 if (ent < 0)
337 return ATA_EH_CMD_DFL_TIMEOUT;
338
339 idx = ehc->cmd_timeout_idx[dev->devno][ent];
340 return ata_eh_cmd_timeout_table[ent].timeouts[idx];
341}
342
343/**
344 * ata_internal_cmd_timed_out - notification for internal command timeout
345 * @dev: target device
346 * @cmd: internal command which timed out
347 *
348 * Notify EH that internal command @cmd for @dev timed out. This
349 * function should be called only for commands whose timeouts are
350 * determined using ata_internal_cmd_timeout().
351 *
352 * LOCKING:
353 * EH context.
354 */
355void ata_internal_cmd_timed_out(struct ata_device *dev, u8 cmd)
356{
357 struct ata_eh_context *ehc = &dev->link->eh_context;
358 int ent = ata_lookup_timeout_table(cmd);
359 int idx;
360
361 if (ent < 0)
362 return;
363
364 idx = ehc->cmd_timeout_idx[dev->devno][ent];
365 if (ata_eh_cmd_timeout_table[ent].timeouts[idx + 1] != ULONG_MAX)
366 ehc->cmd_timeout_idx[dev->devno][ent]++;
367}
368
369static void ata_ering_record(struct ata_ering *ering, unsigned int eflags,
370 unsigned int err_mask)
371{
372 struct ata_ering_entry *ent;
373
374 WARN_ON(!err_mask);
375
376 ering->cursor++;
377 ering->cursor %= ATA_ERING_SIZE;
378
379 ent = &ering->ring[ering->cursor];
380 ent->eflags = eflags;
381 ent->err_mask = err_mask;
382 ent->timestamp = get_jiffies_64();
383}
384
385static struct ata_ering_entry *ata_ering_top(struct ata_ering *ering)
386{
387 struct ata_ering_entry *ent = &ering->ring[ering->cursor];
388
389 if (ent->err_mask)
390 return ent;
391 return NULL;
392}
393
394int ata_ering_map(struct ata_ering *ering,
395 int (*map_fn)(struct ata_ering_entry *, void *),
396 void *arg)
397{
398 int idx, rc = 0;
399 struct ata_ering_entry *ent;
400
401 idx = ering->cursor;
402 do {
403 ent = &ering->ring[idx];
404 if (!ent->err_mask)
405 break;
406 rc = map_fn(ent, arg);
407 if (rc)
408 break;
409 idx = (idx - 1 + ATA_ERING_SIZE) % ATA_ERING_SIZE;
410 } while (idx != ering->cursor);
411
412 return rc;
413}
414
415static int ata_ering_clear_cb(struct ata_ering_entry *ent, void *void_arg)
416{
417 ent->eflags |= ATA_EFLAG_OLD_ER;
418 return 0;
419}
420
421static void ata_ering_clear(struct ata_ering *ering)
422{
423 ata_ering_map(ering, ata_ering_clear_cb, NULL);
424}
425
426static unsigned int ata_eh_dev_action(struct ata_device *dev)
427{
428 struct ata_eh_context *ehc = &dev->link->eh_context;
429
430 return ehc->i.action | ehc->i.dev_action[dev->devno];
431}
432
433static void ata_eh_clear_action(struct ata_link *link, struct ata_device *dev,
434 struct ata_eh_info *ehi, unsigned int action)
435{
436 struct ata_device *tdev;
437
438 if (!dev) {
439 ehi->action &= ~action;
440 ata_for_each_dev(tdev, link, ALL)
441 ehi->dev_action[tdev->devno] &= ~action;
442 } else {
443 /* doesn't make sense for port-wide EH actions */
444 WARN_ON(!(action & ATA_EH_PERDEV_MASK));
445
446 /* break ehi->action into ehi->dev_action */
447 if (ehi->action & action) {
448 ata_for_each_dev(tdev, link, ALL)
449 ehi->dev_action[tdev->devno] |=
450 ehi->action & action;
451 ehi->action &= ~action;
452 }
453
454 /* turn off the specified per-dev action */
455 ehi->dev_action[dev->devno] &= ~action;
456 }
457}
458
459/**
460 * ata_eh_acquire - acquire EH ownership
461 * @ap: ATA port to acquire EH ownership for
462 *
463 * Acquire EH ownership for @ap. This is the basic exclusion
464 * mechanism for ports sharing a host. Only one port hanging off
465 * the same host can claim the ownership of EH.
466 *
467 * LOCKING:
468 * EH context.
469 */
470void ata_eh_acquire(struct ata_port *ap)
471{
472 mutex_lock(&ap->host->eh_mutex);
473 WARN_ON_ONCE(ap->host->eh_owner);
474 ap->host->eh_owner = current;
475}
476
477/**
478 * ata_eh_release - release EH ownership
479 * @ap: ATA port to release EH ownership for
480 *
481 * Release EH ownership for @ap if the caller. The caller must
482 * have acquired EH ownership using ata_eh_acquire() previously.
483 *
484 * LOCKING:
485 * EH context.
486 */
487void ata_eh_release(struct ata_port *ap)
488{
489 WARN_ON_ONCE(ap->host->eh_owner != current);
490 ap->host->eh_owner = NULL;
491 mutex_unlock(&ap->host->eh_mutex);
492}
493
494static void ata_eh_unload(struct ata_port *ap)
495{
496 struct ata_link *link;
497 struct ata_device *dev;
498 unsigned long flags;
499
500 /* Restore SControl IPM and SPD for the next driver and
501 * disable attached devices.
502 */
503 ata_for_each_link(link, ap, PMP_FIRST) {
504 sata_scr_write(link, SCR_CONTROL, link->saved_scontrol & 0xff0);
505 ata_for_each_dev(dev, link, ALL)
506 ata_dev_disable(dev);
507 }
508
509 /* freeze and set UNLOADED */
510 spin_lock_irqsave(ap->lock, flags);
511
512 ata_port_freeze(ap); /* won't be thawed */
513 ap->pflags &= ~ATA_PFLAG_EH_PENDING; /* clear pending from freeze */
514 ap->pflags |= ATA_PFLAG_UNLOADED;
515
516 spin_unlock_irqrestore(ap->lock, flags);
517}
518
519/**
520 * ata_scsi_error - SCSI layer error handler callback
521 * @host: SCSI host on which error occurred
522 *
523 * Handles SCSI-layer-thrown error events.
524 *
525 * LOCKING:
526 * Inherited from SCSI layer (none, can sleep)
527 *
528 * RETURNS:
529 * Zero.
530 */
531void ata_scsi_error(struct Scsi_Host *host)
532{
533 struct ata_port *ap = ata_shost_to_port(host);
534 unsigned long flags;
535 LIST_HEAD(eh_work_q);
536
537 DPRINTK("ENTER\n");
538
539 spin_lock_irqsave(host->host_lock, flags);
540 list_splice_init(&host->eh_cmd_q, &eh_work_q);
541 spin_unlock_irqrestore(host->host_lock, flags);
542
543 ata_scsi_cmd_error_handler(host, ap, &eh_work_q);
544
545 /* If we timed raced normal completion and there is nothing to
546 recover nr_timedout == 0 why exactly are we doing error recovery ? */
547 ata_scsi_port_error_handler(host, ap);
548
549 /* finish or retry handled scmd's and clean up */
550 WARN_ON(!list_empty(&eh_work_q));
551
552 DPRINTK("EXIT\n");
553}
554
555/**
556 * ata_scsi_cmd_error_handler - error callback for a list of commands
557 * @host: scsi host containing the port
558 * @ap: ATA port within the host
559 * @eh_work_q: list of commands to process
560 *
561 * process the given list of commands and return those finished to the
562 * ap->eh_done_q. This function is the first part of the libata error
563 * handler which processes a given list of failed commands.
564 */
565void ata_scsi_cmd_error_handler(struct Scsi_Host *host, struct ata_port *ap,
566 struct list_head *eh_work_q)
567{
568 int i;
569 unsigned long flags;
570
571 /* make sure sff pio task is not running */
572 ata_sff_flush_pio_task(ap);
573
574 /* synchronize with host lock and sort out timeouts */
575
576 /* For new EH, all qcs are finished in one of three ways -
577 * normal completion, error completion, and SCSI timeout.
578 * Both completions can race against SCSI timeout. When normal
579 * completion wins, the qc never reaches EH. When error
580 * completion wins, the qc has ATA_QCFLAG_FAILED set.
581 *
582 * When SCSI timeout wins, things are a bit more complex.
583 * Normal or error completion can occur after the timeout but
584 * before this point. In such cases, both types of
585 * completions are honored. A scmd is determined to have
586 * timed out iff its associated qc is active and not failed.
587 */
588 spin_lock_irqsave(ap->lock, flags);
589 if (ap->ops->error_handler) {
590 struct scsi_cmnd *scmd, *tmp;
591 int nr_timedout = 0;
592
593 /* This must occur under the ap->lock as we don't want
594 a polled recovery to race the real interrupt handler
595
596 The lost_interrupt handler checks for any completed but
597 non-notified command and completes much like an IRQ handler.
598
599 We then fall into the error recovery code which will treat
600 this as if normal completion won the race */
601
602 if (ap->ops->lost_interrupt)
603 ap->ops->lost_interrupt(ap);
604
605 list_for_each_entry_safe(scmd, tmp, eh_work_q, eh_entry) {
606 struct ata_queued_cmd *qc;
607
608 ata_qc_for_each_raw(ap, qc, i) {
609 if (qc->flags & ATA_QCFLAG_ACTIVE &&
610 qc->scsicmd == scmd)
611 break;
612 }
613
614 if (i < ATA_MAX_QUEUE) {
615 /* the scmd has an associated qc */
616 if (!(qc->flags & ATA_QCFLAG_FAILED)) {
617 /* which hasn't failed yet, timeout */
618 qc->err_mask |= AC_ERR_TIMEOUT;
619 qc->flags |= ATA_QCFLAG_FAILED;
620 nr_timedout++;
621 }
622 } else {
623 /* Normal completion occurred after
624 * SCSI timeout but before this point.
625 * Successfully complete it.
626 */
627 scmd->retries = scmd->allowed;
628 scsi_eh_finish_cmd(scmd, &ap->eh_done_q);
629 }
630 }
631
632 /* If we have timed out qcs. They belong to EH from
633 * this point but the state of the controller is
634 * unknown. Freeze the port to make sure the IRQ
635 * handler doesn't diddle with those qcs. This must
636 * be done atomically w.r.t. setting QCFLAG_FAILED.
637 */
638 if (nr_timedout)
639 __ata_port_freeze(ap);
640
641
642 /* initialize eh_tries */
643 ap->eh_tries = ATA_EH_MAX_TRIES;
644 }
645 spin_unlock_irqrestore(ap->lock, flags);
646
647}
648EXPORT_SYMBOL(ata_scsi_cmd_error_handler);
649
650/**
651 * ata_scsi_port_error_handler - recover the port after the commands
652 * @host: SCSI host containing the port
653 * @ap: the ATA port
654 *
655 * Handle the recovery of the port @ap after all the commands
656 * have been recovered.
657 */
658void ata_scsi_port_error_handler(struct Scsi_Host *host, struct ata_port *ap)
659{
660 unsigned long flags;
661
662 /* invoke error handler */
663 if (ap->ops->error_handler) {
664 struct ata_link *link;
665
666 /* acquire EH ownership */
667 ata_eh_acquire(ap);
668 repeat:
669 /* kill fast drain timer */
670 del_timer_sync(&ap->fastdrain_timer);
671
672 /* process port resume request */
673 ata_eh_handle_port_resume(ap);
674
675 /* fetch & clear EH info */
676 spin_lock_irqsave(ap->lock, flags);
677
678 ata_for_each_link(link, ap, HOST_FIRST) {
679 struct ata_eh_context *ehc = &link->eh_context;
680 struct ata_device *dev;
681
682 memset(&link->eh_context, 0, sizeof(link->eh_context));
683 link->eh_context.i = link->eh_info;
684 memset(&link->eh_info, 0, sizeof(link->eh_info));
685
686 ata_for_each_dev(dev, link, ENABLED) {
687 int devno = dev->devno;
688
689 ehc->saved_xfer_mode[devno] = dev->xfer_mode;
690 if (ata_ncq_enabled(dev))
691 ehc->saved_ncq_enabled |= 1 << devno;
692 }
693 }
694
695 ap->pflags |= ATA_PFLAG_EH_IN_PROGRESS;
696 ap->pflags &= ~ATA_PFLAG_EH_PENDING;
697 ap->excl_link = NULL; /* don't maintain exclusion over EH */
698
699 spin_unlock_irqrestore(ap->lock, flags);
700
701 /* invoke EH, skip if unloading or suspended */
702 if (!(ap->pflags & (ATA_PFLAG_UNLOADING | ATA_PFLAG_SUSPENDED)))
703 ap->ops->error_handler(ap);
704 else {
705 /* if unloading, commence suicide */
706 if ((ap->pflags & ATA_PFLAG_UNLOADING) &&
707 !(ap->pflags & ATA_PFLAG_UNLOADED))
708 ata_eh_unload(ap);
709 ata_eh_finish(ap);
710 }
711
712 /* process port suspend request */
713 ata_eh_handle_port_suspend(ap);
714
715 /* Exception might have happened after ->error_handler
716 * recovered the port but before this point. Repeat
717 * EH in such case.
718 */
719 spin_lock_irqsave(ap->lock, flags);
720
721 if (ap->pflags & ATA_PFLAG_EH_PENDING) {
722 if (--ap->eh_tries) {
723 spin_unlock_irqrestore(ap->lock, flags);
724 goto repeat;
725 }
726 ata_port_err(ap,
727 "EH pending after %d tries, giving up\n",
728 ATA_EH_MAX_TRIES);
729 ap->pflags &= ~ATA_PFLAG_EH_PENDING;
730 }
731
732 /* this run is complete, make sure EH info is clear */
733 ata_for_each_link(link, ap, HOST_FIRST)
734 memset(&link->eh_info, 0, sizeof(link->eh_info));
735
736 /* end eh (clear host_eh_scheduled) while holding
737 * ap->lock such that if exception occurs after this
738 * point but before EH completion, SCSI midlayer will
739 * re-initiate EH.
740 */
741 ap->ops->end_eh(ap);
742
743 spin_unlock_irqrestore(ap->lock, flags);
744 ata_eh_release(ap);
745 } else {
746 WARN_ON(ata_qc_from_tag(ap, ap->link.active_tag) == NULL);
747 ap->ops->eng_timeout(ap);
748 }
749
750 scsi_eh_flush_done_q(&ap->eh_done_q);
751
752 /* clean up */
753 spin_lock_irqsave(ap->lock, flags);
754
755 if (ap->pflags & ATA_PFLAG_LOADING)
756 ap->pflags &= ~ATA_PFLAG_LOADING;
757 else if ((ap->pflags & ATA_PFLAG_SCSI_HOTPLUG) &&
758 !(ap->flags & ATA_FLAG_SAS_HOST))
759 schedule_delayed_work(&ap->hotplug_task, 0);
760
761 if (ap->pflags & ATA_PFLAG_RECOVERED)
762 ata_port_info(ap, "EH complete\n");
763
764 ap->pflags &= ~(ATA_PFLAG_SCSI_HOTPLUG | ATA_PFLAG_RECOVERED);
765
766 /* tell wait_eh that we're done */
767 ap->pflags &= ~ATA_PFLAG_EH_IN_PROGRESS;
768 wake_up_all(&ap->eh_wait_q);
769
770 spin_unlock_irqrestore(ap->lock, flags);
771}
772EXPORT_SYMBOL_GPL(ata_scsi_port_error_handler);
773
774/**
775 * ata_port_wait_eh - Wait for the currently pending EH to complete
776 * @ap: Port to wait EH for
777 *
778 * Wait until the currently pending EH is complete.
779 *
780 * LOCKING:
781 * Kernel thread context (may sleep).
782 */
783void ata_port_wait_eh(struct ata_port *ap)
784{
785 unsigned long flags;
786 DEFINE_WAIT(wait);
787
788 retry:
789 spin_lock_irqsave(ap->lock, flags);
790
791 while (ap->pflags & (ATA_PFLAG_EH_PENDING | ATA_PFLAG_EH_IN_PROGRESS)) {
792 prepare_to_wait(&ap->eh_wait_q, &wait, TASK_UNINTERRUPTIBLE);
793 spin_unlock_irqrestore(ap->lock, flags);
794 schedule();
795 spin_lock_irqsave(ap->lock, flags);
796 }
797 finish_wait(&ap->eh_wait_q, &wait);
798
799 spin_unlock_irqrestore(ap->lock, flags);
800
801 /* make sure SCSI EH is complete */
802 if (scsi_host_in_recovery(ap->scsi_host)) {
803 ata_msleep(ap, 10);
804 goto retry;
805 }
806}
807EXPORT_SYMBOL_GPL(ata_port_wait_eh);
808
809static int ata_eh_nr_in_flight(struct ata_port *ap)
810{
811 struct ata_queued_cmd *qc;
812 unsigned int tag;
813 int nr = 0;
814
815 /* count only non-internal commands */
816 ata_qc_for_each(ap, qc, tag) {
817 if (qc)
818 nr++;
819 }
820
821 return nr;
822}
823
824void ata_eh_fastdrain_timerfn(struct timer_list *t)
825{
826 struct ata_port *ap = from_timer(ap, t, fastdrain_timer);
827 unsigned long flags;
828 int cnt;
829
830 spin_lock_irqsave(ap->lock, flags);
831
832 cnt = ata_eh_nr_in_flight(ap);
833
834 /* are we done? */
835 if (!cnt)
836 goto out_unlock;
837
838 if (cnt == ap->fastdrain_cnt) {
839 struct ata_queued_cmd *qc;
840 unsigned int tag;
841
842 /* No progress during the last interval, tag all
843 * in-flight qcs as timed out and freeze the port.
844 */
845 ata_qc_for_each(ap, qc, tag) {
846 if (qc)
847 qc->err_mask |= AC_ERR_TIMEOUT;
848 }
849
850 ata_port_freeze(ap);
851 } else {
852 /* some qcs have finished, give it another chance */
853 ap->fastdrain_cnt = cnt;
854 ap->fastdrain_timer.expires =
855 ata_deadline(jiffies, ATA_EH_FASTDRAIN_INTERVAL);
856 add_timer(&ap->fastdrain_timer);
857 }
858
859 out_unlock:
860 spin_unlock_irqrestore(ap->lock, flags);
861}
862
863/**
864 * ata_eh_set_pending - set ATA_PFLAG_EH_PENDING and activate fast drain
865 * @ap: target ATA port
866 * @fastdrain: activate fast drain
867 *
868 * Set ATA_PFLAG_EH_PENDING and activate fast drain if @fastdrain
869 * is non-zero and EH wasn't pending before. Fast drain ensures
870 * that EH kicks in in timely manner.
871 *
872 * LOCKING:
873 * spin_lock_irqsave(host lock)
874 */
875static void ata_eh_set_pending(struct ata_port *ap, int fastdrain)
876{
877 int cnt;
878
879 /* already scheduled? */
880 if (ap->pflags & ATA_PFLAG_EH_PENDING)
881 return;
882
883 ap->pflags |= ATA_PFLAG_EH_PENDING;
884
885 if (!fastdrain)
886 return;
887
888 /* do we have in-flight qcs? */
889 cnt = ata_eh_nr_in_flight(ap);
890 if (!cnt)
891 return;
892
893 /* activate fast drain */
894 ap->fastdrain_cnt = cnt;
895 ap->fastdrain_timer.expires =
896 ata_deadline(jiffies, ATA_EH_FASTDRAIN_INTERVAL);
897 add_timer(&ap->fastdrain_timer);
898}
899
900/**
901 * ata_qc_schedule_eh - schedule qc for error handling
902 * @qc: command to schedule error handling for
903 *
904 * Schedule error handling for @qc. EH will kick in as soon as
905 * other commands are drained.
906 *
907 * LOCKING:
908 * spin_lock_irqsave(host lock)
909 */
910void ata_qc_schedule_eh(struct ata_queued_cmd *qc)
911{
912 struct ata_port *ap = qc->ap;
913
914 WARN_ON(!ap->ops->error_handler);
915
916 qc->flags |= ATA_QCFLAG_FAILED;
917 ata_eh_set_pending(ap, 1);
918
919 /* The following will fail if timeout has already expired.
920 * ata_scsi_error() takes care of such scmds on EH entry.
921 * Note that ATA_QCFLAG_FAILED is unconditionally set after
922 * this function completes.
923 */
924 blk_abort_request(qc->scsicmd->request);
925}
926
927/**
928 * ata_std_sched_eh - non-libsas ata_ports issue eh with this common routine
929 * @ap: ATA port to schedule EH for
930 *
931 * LOCKING: inherited from ata_port_schedule_eh
932 * spin_lock_irqsave(host lock)
933 */
934void ata_std_sched_eh(struct ata_port *ap)
935{
936 WARN_ON(!ap->ops->error_handler);
937
938 if (ap->pflags & ATA_PFLAG_INITIALIZING)
939 return;
940
941 ata_eh_set_pending(ap, 1);
942 scsi_schedule_eh(ap->scsi_host);
943
944 DPRINTK("port EH scheduled\n");
945}
946EXPORT_SYMBOL_GPL(ata_std_sched_eh);
947
948/**
949 * ata_std_end_eh - non-libsas ata_ports complete eh with this common routine
950 * @ap: ATA port to end EH for
951 *
952 * In the libata object model there is a 1:1 mapping of ata_port to
953 * shost, so host fields can be directly manipulated under ap->lock, in
954 * the libsas case we need to hold a lock at the ha->level to coordinate
955 * these events.
956 *
957 * LOCKING:
958 * spin_lock_irqsave(host lock)
959 */
960void ata_std_end_eh(struct ata_port *ap)
961{
962 struct Scsi_Host *host = ap->scsi_host;
963
964 host->host_eh_scheduled = 0;
965}
966EXPORT_SYMBOL(ata_std_end_eh);
967
968
969/**
970 * ata_port_schedule_eh - schedule error handling without a qc
971 * @ap: ATA port to schedule EH for
972 *
973 * Schedule error handling for @ap. EH will kick in as soon as
974 * all commands are drained.
975 *
976 * LOCKING:
977 * spin_lock_irqsave(host lock)
978 */
979void ata_port_schedule_eh(struct ata_port *ap)
980{
981 /* see: ata_std_sched_eh, unless you know better */
982 ap->ops->sched_eh(ap);
983}
984
985static int ata_do_link_abort(struct ata_port *ap, struct ata_link *link)
986{
987 struct ata_queued_cmd *qc;
988 int tag, nr_aborted = 0;
989
990 WARN_ON(!ap->ops->error_handler);
991
992 /* we're gonna abort all commands, no need for fast drain */
993 ata_eh_set_pending(ap, 0);
994
995 /* include internal tag in iteration */
996 ata_qc_for_each_with_internal(ap, qc, tag) {
997 if (qc && (!link || qc->dev->link == link)) {
998 qc->flags |= ATA_QCFLAG_FAILED;
999 ata_qc_complete(qc);
1000 nr_aborted++;
1001 }
1002 }
1003
1004 if (!nr_aborted)
1005 ata_port_schedule_eh(ap);
1006
1007 return nr_aborted;
1008}
1009
1010/**
1011 * ata_link_abort - abort all qc's on the link
1012 * @link: ATA link to abort qc's for
1013 *
1014 * Abort all active qc's active on @link and schedule EH.
1015 *
1016 * LOCKING:
1017 * spin_lock_irqsave(host lock)
1018 *
1019 * RETURNS:
1020 * Number of aborted qc's.
1021 */
1022int ata_link_abort(struct ata_link *link)
1023{
1024 return ata_do_link_abort(link->ap, link);
1025}
1026
1027/**
1028 * ata_port_abort - abort all qc's on the port
1029 * @ap: ATA port to abort qc's for
1030 *
1031 * Abort all active qc's of @ap and schedule EH.
1032 *
1033 * LOCKING:
1034 * spin_lock_irqsave(host_set lock)
1035 *
1036 * RETURNS:
1037 * Number of aborted qc's.
1038 */
1039int ata_port_abort(struct ata_port *ap)
1040{
1041 return ata_do_link_abort(ap, NULL);
1042}
1043
1044/**
1045 * __ata_port_freeze - freeze port
1046 * @ap: ATA port to freeze
1047 *
1048 * This function is called when HSM violation or some other
1049 * condition disrupts normal operation of the port. Frozen port
1050 * is not allowed to perform any operation until the port is
1051 * thawed, which usually follows a successful reset.
1052 *
1053 * ap->ops->freeze() callback can be used for freezing the port
1054 * hardware-wise (e.g. mask interrupt and stop DMA engine). If a
1055 * port cannot be frozen hardware-wise, the interrupt handler
1056 * must ack and clear interrupts unconditionally while the port
1057 * is frozen.
1058 *
1059 * LOCKING:
1060 * spin_lock_irqsave(host lock)
1061 */
1062static void __ata_port_freeze(struct ata_port *ap)
1063{
1064 WARN_ON(!ap->ops->error_handler);
1065
1066 if (ap->ops->freeze)
1067 ap->ops->freeze(ap);
1068
1069 ap->pflags |= ATA_PFLAG_FROZEN;
1070
1071 DPRINTK("ata%u port frozen\n", ap->print_id);
1072}
1073
1074/**
1075 * ata_port_freeze - abort & freeze port
1076 * @ap: ATA port to freeze
1077 *
1078 * Abort and freeze @ap. The freeze operation must be called
1079 * first, because some hardware requires special operations
1080 * before the taskfile registers are accessible.
1081 *
1082 * LOCKING:
1083 * spin_lock_irqsave(host lock)
1084 *
1085 * RETURNS:
1086 * Number of aborted commands.
1087 */
1088int ata_port_freeze(struct ata_port *ap)
1089{
1090 int nr_aborted;
1091
1092 WARN_ON(!ap->ops->error_handler);
1093
1094 __ata_port_freeze(ap);
1095 nr_aborted = ata_port_abort(ap);
1096
1097 return nr_aborted;
1098}
1099
1100/**
1101 * sata_async_notification - SATA async notification handler
1102 * @ap: ATA port where async notification is received
1103 *
1104 * Handler to be called when async notification via SDB FIS is
1105 * received. This function schedules EH if necessary.
1106 *
1107 * LOCKING:
1108 * spin_lock_irqsave(host lock)
1109 *
1110 * RETURNS:
1111 * 1 if EH is scheduled, 0 otherwise.
1112 */
1113int sata_async_notification(struct ata_port *ap)
1114{
1115 u32 sntf;
1116 int rc;
1117
1118 if (!(ap->flags & ATA_FLAG_AN))
1119 return 0;
1120
1121 rc = sata_scr_read(&ap->link, SCR_NOTIFICATION, &sntf);
1122 if (rc == 0)
1123 sata_scr_write(&ap->link, SCR_NOTIFICATION, sntf);
1124
1125 if (!sata_pmp_attached(ap) || rc) {
1126 /* PMP is not attached or SNTF is not available */
1127 if (!sata_pmp_attached(ap)) {
1128 /* PMP is not attached. Check whether ATAPI
1129 * AN is configured. If so, notify media
1130 * change.
1131 */
1132 struct ata_device *dev = ap->link.device;
1133
1134 if ((dev->class == ATA_DEV_ATAPI) &&
1135 (dev->flags & ATA_DFLAG_AN))
1136 ata_scsi_media_change_notify(dev);
1137 return 0;
1138 } else {
1139 /* PMP is attached but SNTF is not available.
1140 * ATAPI async media change notification is
1141 * not used. The PMP must be reporting PHY
1142 * status change, schedule EH.
1143 */
1144 ata_port_schedule_eh(ap);
1145 return 1;
1146 }
1147 } else {
1148 /* PMP is attached and SNTF is available */
1149 struct ata_link *link;
1150
1151 /* check and notify ATAPI AN */
1152 ata_for_each_link(link, ap, EDGE) {
1153 if (!(sntf & (1 << link->pmp)))
1154 continue;
1155
1156 if ((link->device->class == ATA_DEV_ATAPI) &&
1157 (link->device->flags & ATA_DFLAG_AN))
1158 ata_scsi_media_change_notify(link->device);
1159 }
1160
1161 /* If PMP is reporting that PHY status of some
1162 * downstream ports has changed, schedule EH.
1163 */
1164 if (sntf & (1 << SATA_PMP_CTRL_PORT)) {
1165 ata_port_schedule_eh(ap);
1166 return 1;
1167 }
1168
1169 return 0;
1170 }
1171}
1172
1173/**
1174 * ata_eh_freeze_port - EH helper to freeze port
1175 * @ap: ATA port to freeze
1176 *
1177 * Freeze @ap.
1178 *
1179 * LOCKING:
1180 * None.
1181 */
1182void ata_eh_freeze_port(struct ata_port *ap)
1183{
1184 unsigned long flags;
1185
1186 if (!ap->ops->error_handler)
1187 return;
1188
1189 spin_lock_irqsave(ap->lock, flags);
1190 __ata_port_freeze(ap);
1191 spin_unlock_irqrestore(ap->lock, flags);
1192}
1193
1194/**
1195 * ata_port_thaw_port - EH helper to thaw port
1196 * @ap: ATA port to thaw
1197 *
1198 * Thaw frozen port @ap.
1199 *
1200 * LOCKING:
1201 * None.
1202 */
1203void ata_eh_thaw_port(struct ata_port *ap)
1204{
1205 unsigned long flags;
1206
1207 if (!ap->ops->error_handler)
1208 return;
1209
1210 spin_lock_irqsave(ap->lock, flags);
1211
1212 ap->pflags &= ~ATA_PFLAG_FROZEN;
1213
1214 if (ap->ops->thaw)
1215 ap->ops->thaw(ap);
1216
1217 spin_unlock_irqrestore(ap->lock, flags);
1218
1219 DPRINTK("ata%u port thawed\n", ap->print_id);
1220}
1221
1222static void ata_eh_scsidone(struct scsi_cmnd *scmd)
1223{
1224 /* nada */
1225}
1226
1227static void __ata_eh_qc_complete(struct ata_queued_cmd *qc)
1228{
1229 struct ata_port *ap = qc->ap;
1230 struct scsi_cmnd *scmd = qc->scsicmd;
1231 unsigned long flags;
1232
1233 spin_lock_irqsave(ap->lock, flags);
1234 qc->scsidone = ata_eh_scsidone;
1235 __ata_qc_complete(qc);
1236 WARN_ON(ata_tag_valid(qc->tag));
1237 spin_unlock_irqrestore(ap->lock, flags);
1238
1239 scsi_eh_finish_cmd(scmd, &ap->eh_done_q);
1240}
1241
1242/**
1243 * ata_eh_qc_complete - Complete an active ATA command from EH
1244 * @qc: Command to complete
1245 *
1246 * Indicate to the mid and upper layers that an ATA command has
1247 * completed. To be used from EH.
1248 */
1249void ata_eh_qc_complete(struct ata_queued_cmd *qc)
1250{
1251 struct scsi_cmnd *scmd = qc->scsicmd;
1252 scmd->retries = scmd->allowed;
1253 __ata_eh_qc_complete(qc);
1254}
1255
1256/**
1257 * ata_eh_qc_retry - Tell midlayer to retry an ATA command after EH
1258 * @qc: Command to retry
1259 *
1260 * Indicate to the mid and upper layers that an ATA command
1261 * should be retried. To be used from EH.
1262 *
1263 * SCSI midlayer limits the number of retries to scmd->allowed.
1264 * scmd->allowed is incremented for commands which get retried
1265 * due to unrelated failures (qc->err_mask is zero).
1266 */
1267void ata_eh_qc_retry(struct ata_queued_cmd *qc)
1268{
1269 struct scsi_cmnd *scmd = qc->scsicmd;
1270 if (!qc->err_mask)
1271 scmd->allowed++;
1272 __ata_eh_qc_complete(qc);
1273}
1274
1275/**
1276 * ata_dev_disable - disable ATA device
1277 * @dev: ATA device to disable
1278 *
1279 * Disable @dev.
1280 *
1281 * Locking:
1282 * EH context.
1283 */
1284void ata_dev_disable(struct ata_device *dev)
1285{
1286 if (!ata_dev_enabled(dev))
1287 return;
1288
1289 if (ata_msg_drv(dev->link->ap))
1290 ata_dev_warn(dev, "disabled\n");
1291 ata_acpi_on_disable(dev);
1292 ata_down_xfermask_limit(dev, ATA_DNXFER_FORCE_PIO0 | ATA_DNXFER_QUIET);
1293 dev->class++;
1294
1295 /* From now till the next successful probe, ering is used to
1296 * track probe failures. Clear accumulated device error info.
1297 */
1298 ata_ering_clear(&dev->ering);
1299}
1300
1301/**
1302 * ata_eh_detach_dev - detach ATA device
1303 * @dev: ATA device to detach
1304 *
1305 * Detach @dev.
1306 *
1307 * LOCKING:
1308 * None.
1309 */
1310void ata_eh_detach_dev(struct ata_device *dev)
1311{
1312 struct ata_link *link = dev->link;
1313 struct ata_port *ap = link->ap;
1314 struct ata_eh_context *ehc = &link->eh_context;
1315 unsigned long flags;
1316
1317 ata_dev_disable(dev);
1318
1319 spin_lock_irqsave(ap->lock, flags);
1320
1321 dev->flags &= ~ATA_DFLAG_DETACH;
1322
1323 if (ata_scsi_offline_dev(dev)) {
1324 dev->flags |= ATA_DFLAG_DETACHED;
1325 ap->pflags |= ATA_PFLAG_SCSI_HOTPLUG;
1326 }
1327
1328 /* clear per-dev EH info */
1329 ata_eh_clear_action(link, dev, &link->eh_info, ATA_EH_PERDEV_MASK);
1330 ata_eh_clear_action(link, dev, &link->eh_context.i, ATA_EH_PERDEV_MASK);
1331 ehc->saved_xfer_mode[dev->devno] = 0;
1332 ehc->saved_ncq_enabled &= ~(1 << dev->devno);
1333
1334 spin_unlock_irqrestore(ap->lock, flags);
1335}
1336
1337/**
1338 * ata_eh_about_to_do - about to perform eh_action
1339 * @link: target ATA link
1340 * @dev: target ATA dev for per-dev action (can be NULL)
1341 * @action: action about to be performed
1342 *
1343 * Called just before performing EH actions to clear related bits
1344 * in @link->eh_info such that eh actions are not unnecessarily
1345 * repeated.
1346 *
1347 * LOCKING:
1348 * None.
1349 */
1350void ata_eh_about_to_do(struct ata_link *link, struct ata_device *dev,
1351 unsigned int action)
1352{
1353 struct ata_port *ap = link->ap;
1354 struct ata_eh_info *ehi = &link->eh_info;
1355 struct ata_eh_context *ehc = &link->eh_context;
1356 unsigned long flags;
1357
1358 spin_lock_irqsave(ap->lock, flags);
1359
1360 ata_eh_clear_action(link, dev, ehi, action);
1361
1362 /* About to take EH action, set RECOVERED. Ignore actions on
1363 * slave links as master will do them again.
1364 */
1365 if (!(ehc->i.flags & ATA_EHI_QUIET) && link != ap->slave_link)
1366 ap->pflags |= ATA_PFLAG_RECOVERED;
1367
1368 spin_unlock_irqrestore(ap->lock, flags);
1369}
1370
1371/**
1372 * ata_eh_done - EH action complete
1373 * @link: ATA link for which EH actions are complete
1374 * @dev: target ATA dev for per-dev action (can be NULL)
1375 * @action: action just completed
1376 *
1377 * Called right after performing EH actions to clear related bits
1378 * in @link->eh_context.
1379 *
1380 * LOCKING:
1381 * None.
1382 */
1383void ata_eh_done(struct ata_link *link, struct ata_device *dev,
1384 unsigned int action)
1385{
1386 struct ata_eh_context *ehc = &link->eh_context;
1387
1388 ata_eh_clear_action(link, dev, &ehc->i, action);
1389}
1390
1391/**
1392 * ata_err_string - convert err_mask to descriptive string
1393 * @err_mask: error mask to convert to string
1394 *
1395 * Convert @err_mask to descriptive string. Errors are
1396 * prioritized according to severity and only the most severe
1397 * error is reported.
1398 *
1399 * LOCKING:
1400 * None.
1401 *
1402 * RETURNS:
1403 * Descriptive string for @err_mask
1404 */
1405static const char *ata_err_string(unsigned int err_mask)
1406{
1407 if (err_mask & AC_ERR_HOST_BUS)
1408 return "host bus error";
1409 if (err_mask & AC_ERR_ATA_BUS)
1410 return "ATA bus error";
1411 if (err_mask & AC_ERR_TIMEOUT)
1412 return "timeout";
1413 if (err_mask & AC_ERR_HSM)
1414 return "HSM violation";
1415 if (err_mask & AC_ERR_SYSTEM)
1416 return "internal error";
1417 if (err_mask & AC_ERR_MEDIA)
1418 return "media error";
1419 if (err_mask & AC_ERR_INVALID)
1420 return "invalid argument";
1421 if (err_mask & AC_ERR_DEV)
1422 return "device error";
1423 if (err_mask & AC_ERR_NCQ)
1424 return "NCQ error";
1425 if (err_mask & AC_ERR_NODEV_HINT)
1426 return "Polling detection error";
1427 return "unknown error";
1428}
1429
1430/**
1431 * ata_eh_read_log_10h - Read log page 10h for NCQ error details
1432 * @dev: Device to read log page 10h from
1433 * @tag: Resulting tag of the failed command
1434 * @tf: Resulting taskfile registers of the failed command
1435 *
1436 * Read log page 10h to obtain NCQ error details and clear error
1437 * condition.
1438 *
1439 * LOCKING:
1440 * Kernel thread context (may sleep).
1441 *
1442 * RETURNS:
1443 * 0 on success, -errno otherwise.
1444 */
1445static int ata_eh_read_log_10h(struct ata_device *dev,
1446 int *tag, struct ata_taskfile *tf)
1447{
1448 u8 *buf = dev->link->ap->sector_buf;
1449 unsigned int err_mask;
1450 u8 csum;
1451 int i;
1452
1453 err_mask = ata_read_log_page(dev, ATA_LOG_SATA_NCQ, 0, buf, 1);
1454 if (err_mask)
1455 return -EIO;
1456
1457 csum = 0;
1458 for (i = 0; i < ATA_SECT_SIZE; i++)
1459 csum += buf[i];
1460 if (csum)
1461 ata_dev_warn(dev, "invalid checksum 0x%x on log page 10h\n",
1462 csum);
1463
1464 if (buf[0] & 0x80)
1465 return -ENOENT;
1466
1467 *tag = buf[0] & 0x1f;
1468
1469 tf->command = buf[2];
1470 tf->feature = buf[3];
1471 tf->lbal = buf[4];
1472 tf->lbam = buf[5];
1473 tf->lbah = buf[6];
1474 tf->device = buf[7];
1475 tf->hob_lbal = buf[8];
1476 tf->hob_lbam = buf[9];
1477 tf->hob_lbah = buf[10];
1478 tf->nsect = buf[12];
1479 tf->hob_nsect = buf[13];
1480 if (dev->class == ATA_DEV_ZAC && ata_id_has_ncq_autosense(dev->id))
1481 tf->auxiliary = buf[14] << 16 | buf[15] << 8 | buf[16];
1482
1483 return 0;
1484}
1485
1486/**
1487 * atapi_eh_tur - perform ATAPI TEST_UNIT_READY
1488 * @dev: target ATAPI device
1489 * @r_sense_key: out parameter for sense_key
1490 *
1491 * Perform ATAPI TEST_UNIT_READY.
1492 *
1493 * LOCKING:
1494 * EH context (may sleep).
1495 *
1496 * RETURNS:
1497 * 0 on success, AC_ERR_* mask on failure.
1498 */
1499unsigned int atapi_eh_tur(struct ata_device *dev, u8 *r_sense_key)
1500{
1501 u8 cdb[ATAPI_CDB_LEN] = { TEST_UNIT_READY, 0, 0, 0, 0, 0 };
1502 struct ata_taskfile tf;
1503 unsigned int err_mask;
1504
1505 ata_tf_init(dev, &tf);
1506
1507 tf.flags |= ATA_TFLAG_ISADDR | ATA_TFLAG_DEVICE;
1508 tf.command = ATA_CMD_PACKET;
1509 tf.protocol = ATAPI_PROT_NODATA;
1510
1511 err_mask = ata_exec_internal(dev, &tf, cdb, DMA_NONE, NULL, 0, 0);
1512 if (err_mask == AC_ERR_DEV)
1513 *r_sense_key = tf.feature >> 4;
1514 return err_mask;
1515}
1516
1517/**
1518 * ata_eh_request_sense - perform REQUEST_SENSE_DATA_EXT
1519 * @qc: qc to perform REQUEST_SENSE_SENSE_DATA_EXT to
1520 * @cmd: scsi command for which the sense code should be set
1521 *
1522 * Perform REQUEST_SENSE_DATA_EXT after the device reported CHECK
1523 * SENSE. This function is an EH helper.
1524 *
1525 * LOCKING:
1526 * Kernel thread context (may sleep).
1527 */
1528static void ata_eh_request_sense(struct ata_queued_cmd *qc,
1529 struct scsi_cmnd *cmd)
1530{
1531 struct ata_device *dev = qc->dev;
1532 struct ata_taskfile tf;
1533 unsigned int err_mask;
1534
1535 if (qc->ap->pflags & ATA_PFLAG_FROZEN) {
1536 ata_dev_warn(dev, "sense data available but port frozen\n");
1537 return;
1538 }
1539
1540 if (!cmd || qc->flags & ATA_QCFLAG_SENSE_VALID)
1541 return;
1542
1543 if (!ata_id_sense_reporting_enabled(dev->id)) {
1544 ata_dev_warn(qc->dev, "sense data reporting disabled\n");
1545 return;
1546 }
1547
1548 DPRINTK("ATA request sense\n");
1549
1550 ata_tf_init(dev, &tf);
1551 tf.flags |= ATA_TFLAG_ISADDR | ATA_TFLAG_DEVICE;
1552 tf.flags |= ATA_TFLAG_LBA | ATA_TFLAG_LBA48;
1553 tf.command = ATA_CMD_REQ_SENSE_DATA;
1554 tf.protocol = ATA_PROT_NODATA;
1555
1556 err_mask = ata_exec_internal(dev, &tf, NULL, DMA_NONE, NULL, 0, 0);
1557 /* Ignore err_mask; ATA_ERR might be set */
1558 if (tf.command & ATA_SENSE) {
1559 ata_scsi_set_sense(dev, cmd, tf.lbah, tf.lbam, tf.lbal);
1560 qc->flags |= ATA_QCFLAG_SENSE_VALID;
1561 } else {
1562 ata_dev_warn(dev, "request sense failed stat %02x emask %x\n",
1563 tf.command, err_mask);
1564 }
1565}
1566
1567/**
1568 * atapi_eh_request_sense - perform ATAPI REQUEST_SENSE
1569 * @dev: device to perform REQUEST_SENSE to
1570 * @sense_buf: result sense data buffer (SCSI_SENSE_BUFFERSIZE bytes long)
1571 * @dfl_sense_key: default sense key to use
1572 *
1573 * Perform ATAPI REQUEST_SENSE after the device reported CHECK
1574 * SENSE. This function is EH helper.
1575 *
1576 * LOCKING:
1577 * Kernel thread context (may sleep).
1578 *
1579 * RETURNS:
1580 * 0 on success, AC_ERR_* mask on failure
1581 */
1582unsigned int atapi_eh_request_sense(struct ata_device *dev,
1583 u8 *sense_buf, u8 dfl_sense_key)
1584{
1585 u8 cdb[ATAPI_CDB_LEN] =
1586 { REQUEST_SENSE, 0, 0, 0, SCSI_SENSE_BUFFERSIZE, 0 };
1587 struct ata_port *ap = dev->link->ap;
1588 struct ata_taskfile tf;
1589
1590 DPRINTK("ATAPI request sense\n");
1591
1592 memset(sense_buf, 0, SCSI_SENSE_BUFFERSIZE);
1593
1594 /* initialize sense_buf with the error register,
1595 * for the case where they are -not- overwritten
1596 */
1597 sense_buf[0] = 0x70;
1598 sense_buf[2] = dfl_sense_key;
1599
1600 /* some devices time out if garbage left in tf */
1601 ata_tf_init(dev, &tf);
1602
1603 tf.flags |= ATA_TFLAG_ISADDR | ATA_TFLAG_DEVICE;
1604 tf.command = ATA_CMD_PACKET;
1605
1606 /* is it pointless to prefer PIO for "safety reasons"? */
1607 if (ap->flags & ATA_FLAG_PIO_DMA) {
1608 tf.protocol = ATAPI_PROT_DMA;
1609 tf.feature |= ATAPI_PKT_DMA;
1610 } else {
1611 tf.protocol = ATAPI_PROT_PIO;
1612 tf.lbam = SCSI_SENSE_BUFFERSIZE;
1613 tf.lbah = 0;
1614 }
1615
1616 return ata_exec_internal(dev, &tf, cdb, DMA_FROM_DEVICE,
1617 sense_buf, SCSI_SENSE_BUFFERSIZE, 0);
1618}
1619
1620/**
1621 * ata_eh_analyze_serror - analyze SError for a failed port
1622 * @link: ATA link to analyze SError for
1623 *
1624 * Analyze SError if available and further determine cause of
1625 * failure.
1626 *
1627 * LOCKING:
1628 * None.
1629 */
1630static void ata_eh_analyze_serror(struct ata_link *link)
1631{
1632 struct ata_eh_context *ehc = &link->eh_context;
1633 u32 serror = ehc->i.serror;
1634 unsigned int err_mask = 0, action = 0;
1635 u32 hotplug_mask;
1636
1637 if (serror & (SERR_PERSISTENT | SERR_DATA)) {
1638 err_mask |= AC_ERR_ATA_BUS;
1639 action |= ATA_EH_RESET;
1640 }
1641 if (serror & SERR_PROTOCOL) {
1642 err_mask |= AC_ERR_HSM;
1643 action |= ATA_EH_RESET;
1644 }
1645 if (serror & SERR_INTERNAL) {
1646 err_mask |= AC_ERR_SYSTEM;
1647 action |= ATA_EH_RESET;
1648 }
1649
1650 /* Determine whether a hotplug event has occurred. Both
1651 * SError.N/X are considered hotplug events for enabled or
1652 * host links. For disabled PMP links, only N bit is
1653 * considered as X bit is left at 1 for link plugging.
1654 */
1655 if (link->lpm_policy > ATA_LPM_MAX_POWER)
1656 hotplug_mask = 0; /* hotplug doesn't work w/ LPM */
1657 else if (!(link->flags & ATA_LFLAG_DISABLED) || ata_is_host_link(link))
1658 hotplug_mask = SERR_PHYRDY_CHG | SERR_DEV_XCHG;
1659 else
1660 hotplug_mask = SERR_PHYRDY_CHG;
1661
1662 if (serror & hotplug_mask)
1663 ata_ehi_hotplugged(&ehc->i);
1664
1665 ehc->i.err_mask |= err_mask;
1666 ehc->i.action |= action;
1667}
1668
1669/**
1670 * ata_eh_analyze_ncq_error - analyze NCQ error
1671 * @link: ATA link to analyze NCQ error for
1672 *
1673 * Read log page 10h, determine the offending qc and acquire
1674 * error status TF. For NCQ device errors, all LLDDs have to do
1675 * is setting AC_ERR_DEV in ehi->err_mask. This function takes
1676 * care of the rest.
1677 *
1678 * LOCKING:
1679 * Kernel thread context (may sleep).
1680 */
1681void ata_eh_analyze_ncq_error(struct ata_link *link)
1682{
1683 struct ata_port *ap = link->ap;
1684 struct ata_eh_context *ehc = &link->eh_context;
1685 struct ata_device *dev = link->device;
1686 struct ata_queued_cmd *qc;
1687 struct ata_taskfile tf;
1688 int tag, rc;
1689
1690 /* if frozen, we can't do much */
1691 if (ap->pflags & ATA_PFLAG_FROZEN)
1692 return;
1693
1694 /* is it NCQ device error? */
1695 if (!link->sactive || !(ehc->i.err_mask & AC_ERR_DEV))
1696 return;
1697
1698 /* has LLDD analyzed already? */
1699 ata_qc_for_each_raw(ap, qc, tag) {
1700 if (!(qc->flags & ATA_QCFLAG_FAILED))
1701 continue;
1702
1703 if (qc->err_mask)
1704 return;
1705 }
1706
1707 /* okay, this error is ours */
1708 memset(&tf, 0, sizeof(tf));
1709 rc = ata_eh_read_log_10h(dev, &tag, &tf);
1710 if (rc) {
1711 ata_link_err(link, "failed to read log page 10h (errno=%d)\n",
1712 rc);
1713 return;
1714 }
1715
1716 if (!(link->sactive & (1 << tag))) {
1717 ata_link_err(link, "log page 10h reported inactive tag %d\n",
1718 tag);
1719 return;
1720 }
1721
1722 /* we've got the perpetrator, condemn it */
1723 qc = __ata_qc_from_tag(ap, tag);
1724 memcpy(&qc->result_tf, &tf, sizeof(tf));
1725 qc->result_tf.flags = ATA_TFLAG_ISADDR | ATA_TFLAG_LBA | ATA_TFLAG_LBA48;
1726 qc->err_mask |= AC_ERR_DEV | AC_ERR_NCQ;
1727 if (dev->class == ATA_DEV_ZAC &&
1728 ((qc->result_tf.command & ATA_SENSE) || qc->result_tf.auxiliary)) {
1729 char sense_key, asc, ascq;
1730
1731 sense_key = (qc->result_tf.auxiliary >> 16) & 0xff;
1732 asc = (qc->result_tf.auxiliary >> 8) & 0xff;
1733 ascq = qc->result_tf.auxiliary & 0xff;
1734 ata_scsi_set_sense(dev, qc->scsicmd, sense_key, asc, ascq);
1735 ata_scsi_set_sense_information(dev, qc->scsicmd,
1736 &qc->result_tf);
1737 qc->flags |= ATA_QCFLAG_SENSE_VALID;
1738 }
1739
1740 ehc->i.err_mask &= ~AC_ERR_DEV;
1741}
1742
1743/**
1744 * ata_eh_analyze_tf - analyze taskfile of a failed qc
1745 * @qc: qc to analyze
1746 * @tf: Taskfile registers to analyze
1747 *
1748 * Analyze taskfile of @qc and further determine cause of
1749 * failure. This function also requests ATAPI sense data if
1750 * available.
1751 *
1752 * LOCKING:
1753 * Kernel thread context (may sleep).
1754 *
1755 * RETURNS:
1756 * Determined recovery action
1757 */
1758static unsigned int ata_eh_analyze_tf(struct ata_queued_cmd *qc,
1759 const struct ata_taskfile *tf)
1760{
1761 unsigned int tmp, action = 0;
1762 u8 stat = tf->command, err = tf->feature;
1763
1764 if ((stat & (ATA_BUSY | ATA_DRQ | ATA_DRDY)) != ATA_DRDY) {
1765 qc->err_mask |= AC_ERR_HSM;
1766 return ATA_EH_RESET;
1767 }
1768
1769 if (stat & (ATA_ERR | ATA_DF)) {
1770 qc->err_mask |= AC_ERR_DEV;
1771 /*
1772 * Sense data reporting does not work if the
1773 * device fault bit is set.
1774 */
1775 if (stat & ATA_DF)
1776 stat &= ~ATA_SENSE;
1777 } else {
1778 return 0;
1779 }
1780
1781 switch (qc->dev->class) {
1782 case ATA_DEV_ZAC:
1783 if (stat & ATA_SENSE)
1784 ata_eh_request_sense(qc, qc->scsicmd);
1785 /* fall through */
1786 case ATA_DEV_ATA:
1787 if (err & ATA_ICRC)
1788 qc->err_mask |= AC_ERR_ATA_BUS;
1789 if (err & (ATA_UNC | ATA_AMNF))
1790 qc->err_mask |= AC_ERR_MEDIA;
1791 if (err & ATA_IDNF)
1792 qc->err_mask |= AC_ERR_INVALID;
1793 break;
1794
1795 case ATA_DEV_ATAPI:
1796 if (!(qc->ap->pflags & ATA_PFLAG_FROZEN)) {
1797 tmp = atapi_eh_request_sense(qc->dev,
1798 qc->scsicmd->sense_buffer,
1799 qc->result_tf.feature >> 4);
1800 if (!tmp)
1801 qc->flags |= ATA_QCFLAG_SENSE_VALID;
1802 else
1803 qc->err_mask |= tmp;
1804 }
1805 }
1806
1807 if (qc->flags & ATA_QCFLAG_SENSE_VALID) {
1808 int ret = scsi_check_sense(qc->scsicmd);
1809 /*
1810 * SUCCESS here means that the sense code could be
1811 * evaluated and should be passed to the upper layers
1812 * for correct evaluation.
1813 * FAILED means the sense code could not be interpreted
1814 * and the device would need to be reset.
1815 * NEEDS_RETRY and ADD_TO_MLQUEUE means that the
1816 * command would need to be retried.
1817 */
1818 if (ret == NEEDS_RETRY || ret == ADD_TO_MLQUEUE) {
1819 qc->flags |= ATA_QCFLAG_RETRY;
1820 qc->err_mask |= AC_ERR_OTHER;
1821 } else if (ret != SUCCESS) {
1822 qc->err_mask |= AC_ERR_HSM;
1823 }
1824 }
1825 if (qc->err_mask & (AC_ERR_HSM | AC_ERR_TIMEOUT | AC_ERR_ATA_BUS))
1826 action |= ATA_EH_RESET;
1827
1828 return action;
1829}
1830
1831static int ata_eh_categorize_error(unsigned int eflags, unsigned int err_mask,
1832 int *xfer_ok)
1833{
1834 int base = 0;
1835
1836 if (!(eflags & ATA_EFLAG_DUBIOUS_XFER))
1837 *xfer_ok = 1;
1838
1839 if (!*xfer_ok)
1840 base = ATA_ECAT_DUBIOUS_NONE;
1841
1842 if (err_mask & AC_ERR_ATA_BUS)
1843 return base + ATA_ECAT_ATA_BUS;
1844
1845 if (err_mask & AC_ERR_TIMEOUT)
1846 return base + ATA_ECAT_TOUT_HSM;
1847
1848 if (eflags & ATA_EFLAG_IS_IO) {
1849 if (err_mask & AC_ERR_HSM)
1850 return base + ATA_ECAT_TOUT_HSM;
1851 if ((err_mask &
1852 (AC_ERR_DEV|AC_ERR_MEDIA|AC_ERR_INVALID)) == AC_ERR_DEV)
1853 return base + ATA_ECAT_UNK_DEV;
1854 }
1855
1856 return 0;
1857}
1858
1859struct speed_down_verdict_arg {
1860 u64 since;
1861 int xfer_ok;
1862 int nr_errors[ATA_ECAT_NR];
1863};
1864
1865static int speed_down_verdict_cb(struct ata_ering_entry *ent, void *void_arg)
1866{
1867 struct speed_down_verdict_arg *arg = void_arg;
1868 int cat;
1869
1870 if ((ent->eflags & ATA_EFLAG_OLD_ER) || (ent->timestamp < arg->since))
1871 return -1;
1872
1873 cat = ata_eh_categorize_error(ent->eflags, ent->err_mask,
1874 &arg->xfer_ok);
1875 arg->nr_errors[cat]++;
1876
1877 return 0;
1878}
1879
1880/**
1881 * ata_eh_speed_down_verdict - Determine speed down verdict
1882 * @dev: Device of interest
1883 *
1884 * This function examines error ring of @dev and determines
1885 * whether NCQ needs to be turned off, transfer speed should be
1886 * stepped down, or falling back to PIO is necessary.
1887 *
1888 * ECAT_ATA_BUS : ATA_BUS error for any command
1889 *
1890 * ECAT_TOUT_HSM : TIMEOUT for any command or HSM violation for
1891 * IO commands
1892 *
1893 * ECAT_UNK_DEV : Unknown DEV error for IO commands
1894 *
1895 * ECAT_DUBIOUS_* : Identical to above three but occurred while
1896 * data transfer hasn't been verified.
1897 *
1898 * Verdicts are
1899 *
1900 * NCQ_OFF : Turn off NCQ.
1901 *
1902 * SPEED_DOWN : Speed down transfer speed but don't fall back
1903 * to PIO.
1904 *
1905 * FALLBACK_TO_PIO : Fall back to PIO.
1906 *
1907 * Even if multiple verdicts are returned, only one action is
1908 * taken per error. An action triggered by non-DUBIOUS errors
1909 * clears ering, while one triggered by DUBIOUS_* errors doesn't.
1910 * This is to expedite speed down decisions right after device is
1911 * initially configured.
1912 *
1913 * The following are speed down rules. #1 and #2 deal with
1914 * DUBIOUS errors.
1915 *
1916 * 1. If more than one DUBIOUS_ATA_BUS or DUBIOUS_TOUT_HSM errors
1917 * occurred during last 5 mins, SPEED_DOWN and FALLBACK_TO_PIO.
1918 *
1919 * 2. If more than one DUBIOUS_TOUT_HSM or DUBIOUS_UNK_DEV errors
1920 * occurred during last 5 mins, NCQ_OFF.
1921 *
1922 * 3. If more than 8 ATA_BUS, TOUT_HSM or UNK_DEV errors
1923 * occurred during last 5 mins, FALLBACK_TO_PIO
1924 *
1925 * 4. If more than 3 TOUT_HSM or UNK_DEV errors occurred
1926 * during last 10 mins, NCQ_OFF.
1927 *
1928 * 5. If more than 3 ATA_BUS or TOUT_HSM errors, or more than 6
1929 * UNK_DEV errors occurred during last 10 mins, SPEED_DOWN.
1930 *
1931 * LOCKING:
1932 * Inherited from caller.
1933 *
1934 * RETURNS:
1935 * OR of ATA_EH_SPDN_* flags.
1936 */
1937static unsigned int ata_eh_speed_down_verdict(struct ata_device *dev)
1938{
1939 const u64 j5mins = 5LLU * 60 * HZ, j10mins = 10LLU * 60 * HZ;
1940 u64 j64 = get_jiffies_64();
1941 struct speed_down_verdict_arg arg;
1942 unsigned int verdict = 0;
1943
1944 /* scan past 5 mins of error history */
1945 memset(&arg, 0, sizeof(arg));
1946 arg.since = j64 - min(j64, j5mins);
1947 ata_ering_map(&dev->ering, speed_down_verdict_cb, &arg);
1948
1949 if (arg.nr_errors[ATA_ECAT_DUBIOUS_ATA_BUS] +
1950 arg.nr_errors[ATA_ECAT_DUBIOUS_TOUT_HSM] > 1)
1951 verdict |= ATA_EH_SPDN_SPEED_DOWN |
1952 ATA_EH_SPDN_FALLBACK_TO_PIO | ATA_EH_SPDN_KEEP_ERRORS;
1953
1954 if (arg.nr_errors[ATA_ECAT_DUBIOUS_TOUT_HSM] +
1955 arg.nr_errors[ATA_ECAT_DUBIOUS_UNK_DEV] > 1)
1956 verdict |= ATA_EH_SPDN_NCQ_OFF | ATA_EH_SPDN_KEEP_ERRORS;
1957
1958 if (arg.nr_errors[ATA_ECAT_ATA_BUS] +
1959 arg.nr_errors[ATA_ECAT_TOUT_HSM] +
1960 arg.nr_errors[ATA_ECAT_UNK_DEV] > 6)
1961 verdict |= ATA_EH_SPDN_FALLBACK_TO_PIO;
1962
1963 /* scan past 10 mins of error history */
1964 memset(&arg, 0, sizeof(arg));
1965 arg.since = j64 - min(j64, j10mins);
1966 ata_ering_map(&dev->ering, speed_down_verdict_cb, &arg);
1967
1968 if (arg.nr_errors[ATA_ECAT_TOUT_HSM] +
1969 arg.nr_errors[ATA_ECAT_UNK_DEV] > 3)
1970 verdict |= ATA_EH_SPDN_NCQ_OFF;
1971
1972 if (arg.nr_errors[ATA_ECAT_ATA_BUS] +
1973 arg.nr_errors[ATA_ECAT_TOUT_HSM] > 3 ||
1974 arg.nr_errors[ATA_ECAT_UNK_DEV] > 6)
1975 verdict |= ATA_EH_SPDN_SPEED_DOWN;
1976
1977 return verdict;
1978}
1979
1980/**
1981 * ata_eh_speed_down - record error and speed down if necessary
1982 * @dev: Failed device
1983 * @eflags: mask of ATA_EFLAG_* flags
1984 * @err_mask: err_mask of the error
1985 *
1986 * Record error and examine error history to determine whether
1987 * adjusting transmission speed is necessary. It also sets
1988 * transmission limits appropriately if such adjustment is
1989 * necessary.
1990 *
1991 * LOCKING:
1992 * Kernel thread context (may sleep).
1993 *
1994 * RETURNS:
1995 * Determined recovery action.
1996 */
1997static unsigned int ata_eh_speed_down(struct ata_device *dev,
1998 unsigned int eflags, unsigned int err_mask)
1999{
2000 struct ata_link *link = ata_dev_phys_link(dev);
2001 int xfer_ok = 0;
2002 unsigned int verdict;
2003 unsigned int action = 0;
2004
2005 /* don't bother if Cat-0 error */
2006 if (ata_eh_categorize_error(eflags, err_mask, &xfer_ok) == 0)
2007 return 0;
2008
2009 /* record error and determine whether speed down is necessary */
2010 ata_ering_record(&dev->ering, eflags, err_mask);
2011 verdict = ata_eh_speed_down_verdict(dev);
2012
2013 /* turn off NCQ? */
2014 if ((verdict & ATA_EH_SPDN_NCQ_OFF) &&
2015 (dev->flags & (ATA_DFLAG_PIO | ATA_DFLAG_NCQ |
2016 ATA_DFLAG_NCQ_OFF)) == ATA_DFLAG_NCQ) {
2017 dev->flags |= ATA_DFLAG_NCQ_OFF;
2018 ata_dev_warn(dev, "NCQ disabled due to excessive errors\n");
2019 goto done;
2020 }
2021
2022 /* speed down? */
2023 if (verdict & ATA_EH_SPDN_SPEED_DOWN) {
2024 /* speed down SATA link speed if possible */
2025 if (sata_down_spd_limit(link, 0) == 0) {
2026 action |= ATA_EH_RESET;
2027 goto done;
2028 }
2029
2030 /* lower transfer mode */
2031 if (dev->spdn_cnt < 2) {
2032 static const int dma_dnxfer_sel[] =
2033 { ATA_DNXFER_DMA, ATA_DNXFER_40C };
2034 static const int pio_dnxfer_sel[] =
2035 { ATA_DNXFER_PIO, ATA_DNXFER_FORCE_PIO0 };
2036 int sel;
2037
2038 if (dev->xfer_shift != ATA_SHIFT_PIO)
2039 sel = dma_dnxfer_sel[dev->spdn_cnt];
2040 else
2041 sel = pio_dnxfer_sel[dev->spdn_cnt];
2042
2043 dev->spdn_cnt++;
2044
2045 if (ata_down_xfermask_limit(dev, sel) == 0) {
2046 action |= ATA_EH_RESET;
2047 goto done;
2048 }
2049 }
2050 }
2051
2052 /* Fall back to PIO? Slowing down to PIO is meaningless for
2053 * SATA ATA devices. Consider it only for PATA and SATAPI.
2054 */
2055 if ((verdict & ATA_EH_SPDN_FALLBACK_TO_PIO) && (dev->spdn_cnt >= 2) &&
2056 (link->ap->cbl != ATA_CBL_SATA || dev->class == ATA_DEV_ATAPI) &&
2057 (dev->xfer_shift != ATA_SHIFT_PIO)) {
2058 if (ata_down_xfermask_limit(dev, ATA_DNXFER_FORCE_PIO) == 0) {
2059 dev->spdn_cnt = 0;
2060 action |= ATA_EH_RESET;
2061 goto done;
2062 }
2063 }
2064
2065 return 0;
2066 done:
2067 /* device has been slowed down, blow error history */
2068 if (!(verdict & ATA_EH_SPDN_KEEP_ERRORS))
2069 ata_ering_clear(&dev->ering);
2070 return action;
2071}
2072
2073/**
2074 * ata_eh_worth_retry - analyze error and decide whether to retry
2075 * @qc: qc to possibly retry
2076 *
2077 * Look at the cause of the error and decide if a retry
2078 * might be useful or not. We don't want to retry media errors
2079 * because the drive itself has probably already taken 10-30 seconds
2080 * doing its own internal retries before reporting the failure.
2081 */
2082static inline int ata_eh_worth_retry(struct ata_queued_cmd *qc)
2083{
2084 if (qc->err_mask & AC_ERR_MEDIA)
2085 return 0; /* don't retry media errors */
2086 if (qc->flags & ATA_QCFLAG_IO)
2087 return 1; /* otherwise retry anything from fs stack */
2088 if (qc->err_mask & AC_ERR_INVALID)
2089 return 0; /* don't retry these */
2090 return qc->err_mask != AC_ERR_DEV; /* retry if not dev error */
2091}
2092
2093/**
2094 * ata_eh_quiet - check if we need to be quiet about a command error
2095 * @qc: qc to check
2096 *
2097 * Look at the qc flags anbd its scsi command request flags to determine
2098 * if we need to be quiet about the command failure.
2099 */
2100static inline bool ata_eh_quiet(struct ata_queued_cmd *qc)
2101{
2102 if (qc->scsicmd &&
2103 qc->scsicmd->request->rq_flags & RQF_QUIET)
2104 qc->flags |= ATA_QCFLAG_QUIET;
2105 return qc->flags & ATA_QCFLAG_QUIET;
2106}
2107
2108/**
2109 * ata_eh_link_autopsy - analyze error and determine recovery action
2110 * @link: host link to perform autopsy on
2111 *
2112 * Analyze why @link failed and determine which recovery actions
2113 * are needed. This function also sets more detailed AC_ERR_*
2114 * values and fills sense data for ATAPI CHECK SENSE.
2115 *
2116 * LOCKING:
2117 * Kernel thread context (may sleep).
2118 */
2119static void ata_eh_link_autopsy(struct ata_link *link)
2120{
2121 struct ata_port *ap = link->ap;
2122 struct ata_eh_context *ehc = &link->eh_context;
2123 struct ata_queued_cmd *qc;
2124 struct ata_device *dev;
2125 unsigned int all_err_mask = 0, eflags = 0;
2126 int tag, nr_failed = 0, nr_quiet = 0;
2127 u32 serror;
2128 int rc;
2129
2130 DPRINTK("ENTER\n");
2131
2132 if (ehc->i.flags & ATA_EHI_NO_AUTOPSY)
2133 return;
2134
2135 /* obtain and analyze SError */
2136 rc = sata_scr_read(link, SCR_ERROR, &serror);
2137 if (rc == 0) {
2138 ehc->i.serror |= serror;
2139 ata_eh_analyze_serror(link);
2140 } else if (rc != -EOPNOTSUPP) {
2141 /* SError read failed, force reset and probing */
2142 ehc->i.probe_mask |= ATA_ALL_DEVICES;
2143 ehc->i.action |= ATA_EH_RESET;
2144 ehc->i.err_mask |= AC_ERR_OTHER;
2145 }
2146
2147 /* analyze NCQ failure */
2148 ata_eh_analyze_ncq_error(link);
2149
2150 /* any real error trumps AC_ERR_OTHER */
2151 if (ehc->i.err_mask & ~AC_ERR_OTHER)
2152 ehc->i.err_mask &= ~AC_ERR_OTHER;
2153
2154 all_err_mask |= ehc->i.err_mask;
2155
2156 ata_qc_for_each_raw(ap, qc, tag) {
2157 if (!(qc->flags & ATA_QCFLAG_FAILED) ||
2158 ata_dev_phys_link(qc->dev) != link)
2159 continue;
2160
2161 /* inherit upper level err_mask */
2162 qc->err_mask |= ehc->i.err_mask;
2163
2164 /* analyze TF */
2165 ehc->i.action |= ata_eh_analyze_tf(qc, &qc->result_tf);
2166
2167 /* DEV errors are probably spurious in case of ATA_BUS error */
2168 if (qc->err_mask & AC_ERR_ATA_BUS)
2169 qc->err_mask &= ~(AC_ERR_DEV | AC_ERR_MEDIA |
2170 AC_ERR_INVALID);
2171
2172 /* any real error trumps unknown error */
2173 if (qc->err_mask & ~AC_ERR_OTHER)
2174 qc->err_mask &= ~AC_ERR_OTHER;
2175
2176 /*
2177 * SENSE_VALID trumps dev/unknown error and revalidation. Upper
2178 * layers will determine whether the command is worth retrying
2179 * based on the sense data and device class/type. Otherwise,
2180 * determine directly if the command is worth retrying using its
2181 * error mask and flags.
2182 */
2183 if (qc->flags & ATA_QCFLAG_SENSE_VALID)
2184 qc->err_mask &= ~(AC_ERR_DEV | AC_ERR_OTHER);
2185 else if (ata_eh_worth_retry(qc))
2186 qc->flags |= ATA_QCFLAG_RETRY;
2187
2188 /* accumulate error info */
2189 ehc->i.dev = qc->dev;
2190 all_err_mask |= qc->err_mask;
2191 if (qc->flags & ATA_QCFLAG_IO)
2192 eflags |= ATA_EFLAG_IS_IO;
2193 trace_ata_eh_link_autopsy_qc(qc);
2194
2195 /* Count quiet errors */
2196 if (ata_eh_quiet(qc))
2197 nr_quiet++;
2198 nr_failed++;
2199 }
2200
2201 /* If all failed commands requested silence, then be quiet */
2202 if (nr_quiet == nr_failed)
2203 ehc->i.flags |= ATA_EHI_QUIET;
2204
2205 /* enforce default EH actions */
2206 if (ap->pflags & ATA_PFLAG_FROZEN ||
2207 all_err_mask & (AC_ERR_HSM | AC_ERR_TIMEOUT))
2208 ehc->i.action |= ATA_EH_RESET;
2209 else if (((eflags & ATA_EFLAG_IS_IO) && all_err_mask) ||
2210 (!(eflags & ATA_EFLAG_IS_IO) && (all_err_mask & ~AC_ERR_DEV)))
2211 ehc->i.action |= ATA_EH_REVALIDATE;
2212
2213 /* If we have offending qcs and the associated failed device,
2214 * perform per-dev EH action only on the offending device.
2215 */
2216 if (ehc->i.dev) {
2217 ehc->i.dev_action[ehc->i.dev->devno] |=
2218 ehc->i.action & ATA_EH_PERDEV_MASK;
2219 ehc->i.action &= ~ATA_EH_PERDEV_MASK;
2220 }
2221
2222 /* propagate timeout to host link */
2223 if ((all_err_mask & AC_ERR_TIMEOUT) && !ata_is_host_link(link))
2224 ap->link.eh_context.i.err_mask |= AC_ERR_TIMEOUT;
2225
2226 /* record error and consider speeding down */
2227 dev = ehc->i.dev;
2228 if (!dev && ((ata_link_max_devices(link) == 1 &&
2229 ata_dev_enabled(link->device))))
2230 dev = link->device;
2231
2232 if (dev) {
2233 if (dev->flags & ATA_DFLAG_DUBIOUS_XFER)
2234 eflags |= ATA_EFLAG_DUBIOUS_XFER;
2235 ehc->i.action |= ata_eh_speed_down(dev, eflags, all_err_mask);
2236 trace_ata_eh_link_autopsy(dev, ehc->i.action, all_err_mask);
2237 }
2238 DPRINTK("EXIT\n");
2239}
2240
2241/**
2242 * ata_eh_autopsy - analyze error and determine recovery action
2243 * @ap: host port to perform autopsy on
2244 *
2245 * Analyze all links of @ap and determine why they failed and
2246 * which recovery actions are needed.
2247 *
2248 * LOCKING:
2249 * Kernel thread context (may sleep).
2250 */
2251void ata_eh_autopsy(struct ata_port *ap)
2252{
2253 struct ata_link *link;
2254
2255 ata_for_each_link(link, ap, EDGE)
2256 ata_eh_link_autopsy(link);
2257
2258 /* Handle the frigging slave link. Autopsy is done similarly
2259 * but actions and flags are transferred over to the master
2260 * link and handled from there.
2261 */
2262 if (ap->slave_link) {
2263 struct ata_eh_context *mehc = &ap->link.eh_context;
2264 struct ata_eh_context *sehc = &ap->slave_link->eh_context;
2265
2266 /* transfer control flags from master to slave */
2267 sehc->i.flags |= mehc->i.flags & ATA_EHI_TO_SLAVE_MASK;
2268
2269 /* perform autopsy on the slave link */
2270 ata_eh_link_autopsy(ap->slave_link);
2271
2272 /* transfer actions from slave to master and clear slave */
2273 ata_eh_about_to_do(ap->slave_link, NULL, ATA_EH_ALL_ACTIONS);
2274 mehc->i.action |= sehc->i.action;
2275 mehc->i.dev_action[1] |= sehc->i.dev_action[1];
2276 mehc->i.flags |= sehc->i.flags;
2277 ata_eh_done(ap->slave_link, NULL, ATA_EH_ALL_ACTIONS);
2278 }
2279
2280 /* Autopsy of fanout ports can affect host link autopsy.
2281 * Perform host link autopsy last.
2282 */
2283 if (sata_pmp_attached(ap))
2284 ata_eh_link_autopsy(&ap->link);
2285}
2286
2287/**
2288 * ata_get_cmd_descript - get description for ATA command
2289 * @command: ATA command code to get description for
2290 *
2291 * Return a textual description of the given command, or NULL if the
2292 * command is not known.
2293 *
2294 * LOCKING:
2295 * None
2296 */
2297const char *ata_get_cmd_descript(u8 command)
2298{
2299#ifdef CONFIG_ATA_VERBOSE_ERROR
2300 static const struct
2301 {
2302 u8 command;
2303 const char *text;
2304 } cmd_descr[] = {
2305 { ATA_CMD_DEV_RESET, "DEVICE RESET" },
2306 { ATA_CMD_CHK_POWER, "CHECK POWER MODE" },
2307 { ATA_CMD_STANDBY, "STANDBY" },
2308 { ATA_CMD_IDLE, "IDLE" },
2309 { ATA_CMD_EDD, "EXECUTE DEVICE DIAGNOSTIC" },
2310 { ATA_CMD_DOWNLOAD_MICRO, "DOWNLOAD MICROCODE" },
2311 { ATA_CMD_DOWNLOAD_MICRO_DMA, "DOWNLOAD MICROCODE DMA" },
2312 { ATA_CMD_NOP, "NOP" },
2313 { ATA_CMD_FLUSH, "FLUSH CACHE" },
2314 { ATA_CMD_FLUSH_EXT, "FLUSH CACHE EXT" },
2315 { ATA_CMD_ID_ATA, "IDENTIFY DEVICE" },
2316 { ATA_CMD_ID_ATAPI, "IDENTIFY PACKET DEVICE" },
2317 { ATA_CMD_SERVICE, "SERVICE" },
2318 { ATA_CMD_READ, "READ DMA" },
2319 { ATA_CMD_READ_EXT, "READ DMA EXT" },
2320 { ATA_CMD_READ_QUEUED, "READ DMA QUEUED" },
2321 { ATA_CMD_READ_STREAM_EXT, "READ STREAM EXT" },
2322 { ATA_CMD_READ_STREAM_DMA_EXT, "READ STREAM DMA EXT" },
2323 { ATA_CMD_WRITE, "WRITE DMA" },
2324 { ATA_CMD_WRITE_EXT, "WRITE DMA EXT" },
2325 { ATA_CMD_WRITE_QUEUED, "WRITE DMA QUEUED EXT" },
2326 { ATA_CMD_WRITE_STREAM_EXT, "WRITE STREAM EXT" },
2327 { ATA_CMD_WRITE_STREAM_DMA_EXT, "WRITE STREAM DMA EXT" },
2328 { ATA_CMD_WRITE_FUA_EXT, "WRITE DMA FUA EXT" },
2329 { ATA_CMD_WRITE_QUEUED_FUA_EXT, "WRITE DMA QUEUED FUA EXT" },
2330 { ATA_CMD_FPDMA_READ, "READ FPDMA QUEUED" },
2331 { ATA_CMD_FPDMA_WRITE, "WRITE FPDMA QUEUED" },
2332 { ATA_CMD_NCQ_NON_DATA, "NCQ NON-DATA" },
2333 { ATA_CMD_FPDMA_SEND, "SEND FPDMA QUEUED" },
2334 { ATA_CMD_FPDMA_RECV, "RECEIVE FPDMA QUEUED" },
2335 { ATA_CMD_PIO_READ, "READ SECTOR(S)" },
2336 { ATA_CMD_PIO_READ_EXT, "READ SECTOR(S) EXT" },
2337 { ATA_CMD_PIO_WRITE, "WRITE SECTOR(S)" },
2338 { ATA_CMD_PIO_WRITE_EXT, "WRITE SECTOR(S) EXT" },
2339 { ATA_CMD_READ_MULTI, "READ MULTIPLE" },
2340 { ATA_CMD_READ_MULTI_EXT, "READ MULTIPLE EXT" },
2341 { ATA_CMD_WRITE_MULTI, "WRITE MULTIPLE" },
2342 { ATA_CMD_WRITE_MULTI_EXT, "WRITE MULTIPLE EXT" },
2343 { ATA_CMD_WRITE_MULTI_FUA_EXT, "WRITE MULTIPLE FUA EXT" },
2344 { ATA_CMD_SET_FEATURES, "SET FEATURES" },
2345 { ATA_CMD_SET_MULTI, "SET MULTIPLE MODE" },
2346 { ATA_CMD_VERIFY, "READ VERIFY SECTOR(S)" },
2347 { ATA_CMD_VERIFY_EXT, "READ VERIFY SECTOR(S) EXT" },
2348 { ATA_CMD_WRITE_UNCORR_EXT, "WRITE UNCORRECTABLE EXT" },
2349 { ATA_CMD_STANDBYNOW1, "STANDBY IMMEDIATE" },
2350 { ATA_CMD_IDLEIMMEDIATE, "IDLE IMMEDIATE" },
2351 { ATA_CMD_SLEEP, "SLEEP" },
2352 { ATA_CMD_INIT_DEV_PARAMS, "INITIALIZE DEVICE PARAMETERS" },
2353 { ATA_CMD_READ_NATIVE_MAX, "READ NATIVE MAX ADDRESS" },
2354 { ATA_CMD_READ_NATIVE_MAX_EXT, "READ NATIVE MAX ADDRESS EXT" },
2355 { ATA_CMD_SET_MAX, "SET MAX ADDRESS" },
2356 { ATA_CMD_SET_MAX_EXT, "SET MAX ADDRESS EXT" },
2357 { ATA_CMD_READ_LOG_EXT, "READ LOG EXT" },
2358 { ATA_CMD_WRITE_LOG_EXT, "WRITE LOG EXT" },
2359 { ATA_CMD_READ_LOG_DMA_EXT, "READ LOG DMA EXT" },
2360 { ATA_CMD_WRITE_LOG_DMA_EXT, "WRITE LOG DMA EXT" },
2361 { ATA_CMD_TRUSTED_NONDATA, "TRUSTED NON-DATA" },
2362 { ATA_CMD_TRUSTED_RCV, "TRUSTED RECEIVE" },
2363 { ATA_CMD_TRUSTED_RCV_DMA, "TRUSTED RECEIVE DMA" },
2364 { ATA_CMD_TRUSTED_SND, "TRUSTED SEND" },
2365 { ATA_CMD_TRUSTED_SND_DMA, "TRUSTED SEND DMA" },
2366 { ATA_CMD_PMP_READ, "READ BUFFER" },
2367 { ATA_CMD_PMP_READ_DMA, "READ BUFFER DMA" },
2368 { ATA_CMD_PMP_WRITE, "WRITE BUFFER" },
2369 { ATA_CMD_PMP_WRITE_DMA, "WRITE BUFFER DMA" },
2370 { ATA_CMD_CONF_OVERLAY, "DEVICE CONFIGURATION OVERLAY" },
2371 { ATA_CMD_SEC_SET_PASS, "SECURITY SET PASSWORD" },
2372 { ATA_CMD_SEC_UNLOCK, "SECURITY UNLOCK" },
2373 { ATA_CMD_SEC_ERASE_PREP, "SECURITY ERASE PREPARE" },
2374 { ATA_CMD_SEC_ERASE_UNIT, "SECURITY ERASE UNIT" },
2375 { ATA_CMD_SEC_FREEZE_LOCK, "SECURITY FREEZE LOCK" },
2376 { ATA_CMD_SEC_DISABLE_PASS, "SECURITY DISABLE PASSWORD" },
2377 { ATA_CMD_CONFIG_STREAM, "CONFIGURE STREAM" },
2378 { ATA_CMD_SMART, "SMART" },
2379 { ATA_CMD_MEDIA_LOCK, "DOOR LOCK" },
2380 { ATA_CMD_MEDIA_UNLOCK, "DOOR UNLOCK" },
2381 { ATA_CMD_DSM, "DATA SET MANAGEMENT" },
2382 { ATA_CMD_CHK_MED_CRD_TYP, "CHECK MEDIA CARD TYPE" },
2383 { ATA_CMD_CFA_REQ_EXT_ERR, "CFA REQUEST EXTENDED ERROR" },
2384 { ATA_CMD_CFA_WRITE_NE, "CFA WRITE SECTORS WITHOUT ERASE" },
2385 { ATA_CMD_CFA_TRANS_SECT, "CFA TRANSLATE SECTOR" },
2386 { ATA_CMD_CFA_ERASE, "CFA ERASE SECTORS" },
2387 { ATA_CMD_CFA_WRITE_MULT_NE, "CFA WRITE MULTIPLE WITHOUT ERASE" },
2388 { ATA_CMD_REQ_SENSE_DATA, "REQUEST SENSE DATA EXT" },
2389 { ATA_CMD_SANITIZE_DEVICE, "SANITIZE DEVICE" },
2390 { ATA_CMD_ZAC_MGMT_IN, "ZAC MANAGEMENT IN" },
2391 { ATA_CMD_ZAC_MGMT_OUT, "ZAC MANAGEMENT OUT" },
2392 { ATA_CMD_READ_LONG, "READ LONG (with retries)" },
2393 { ATA_CMD_READ_LONG_ONCE, "READ LONG (without retries)" },
2394 { ATA_CMD_WRITE_LONG, "WRITE LONG (with retries)" },
2395 { ATA_CMD_WRITE_LONG_ONCE, "WRITE LONG (without retries)" },
2396 { ATA_CMD_RESTORE, "RECALIBRATE" },
2397 { 0, NULL } /* terminate list */
2398 };
2399
2400 unsigned int i;
2401 for (i = 0; cmd_descr[i].text; i++)
2402 if (cmd_descr[i].command == command)
2403 return cmd_descr[i].text;
2404#endif
2405
2406 return NULL;
2407}
2408EXPORT_SYMBOL_GPL(ata_get_cmd_descript);
2409
2410/**
2411 * ata_eh_link_report - report error handling to user
2412 * @link: ATA link EH is going on
2413 *
2414 * Report EH to user.
2415 *
2416 * LOCKING:
2417 * None.
2418 */
2419static void ata_eh_link_report(struct ata_link *link)
2420{
2421 struct ata_port *ap = link->ap;
2422 struct ata_eh_context *ehc = &link->eh_context;
2423 struct ata_queued_cmd *qc;
2424 const char *frozen, *desc;
2425 char tries_buf[16] = "";
2426 int tag, nr_failed = 0;
2427
2428 if (ehc->i.flags & ATA_EHI_QUIET)
2429 return;
2430
2431 desc = NULL;
2432 if (ehc->i.desc[0] != '\0')
2433 desc = ehc->i.desc;
2434
2435 ata_qc_for_each_raw(ap, qc, tag) {
2436 if (!(qc->flags & ATA_QCFLAG_FAILED) ||
2437 ata_dev_phys_link(qc->dev) != link ||
2438 ((qc->flags & ATA_QCFLAG_QUIET) &&
2439 qc->err_mask == AC_ERR_DEV))
2440 continue;
2441 if (qc->flags & ATA_QCFLAG_SENSE_VALID && !qc->err_mask)
2442 continue;
2443
2444 nr_failed++;
2445 }
2446
2447 if (!nr_failed && !ehc->i.err_mask)
2448 return;
2449
2450 frozen = "";
2451 if (ap->pflags & ATA_PFLAG_FROZEN)
2452 frozen = " frozen";
2453
2454 if (ap->eh_tries < ATA_EH_MAX_TRIES)
2455 snprintf(tries_buf, sizeof(tries_buf), " t%d",
2456 ap->eh_tries);
2457
2458 if (ehc->i.dev) {
2459 ata_dev_err(ehc->i.dev, "exception Emask 0x%x "
2460 "SAct 0x%x SErr 0x%x action 0x%x%s%s\n",
2461 ehc->i.err_mask, link->sactive, ehc->i.serror,
2462 ehc->i.action, frozen, tries_buf);
2463 if (desc)
2464 ata_dev_err(ehc->i.dev, "%s\n", desc);
2465 } else {
2466 ata_link_err(link, "exception Emask 0x%x "
2467 "SAct 0x%x SErr 0x%x action 0x%x%s%s\n",
2468 ehc->i.err_mask, link->sactive, ehc->i.serror,
2469 ehc->i.action, frozen, tries_buf);
2470 if (desc)
2471 ata_link_err(link, "%s\n", desc);
2472 }
2473
2474#ifdef CONFIG_ATA_VERBOSE_ERROR
2475 if (ehc->i.serror)
2476 ata_link_err(link,
2477 "SError: { %s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s}\n",
2478 ehc->i.serror & SERR_DATA_RECOVERED ? "RecovData " : "",
2479 ehc->i.serror & SERR_COMM_RECOVERED ? "RecovComm " : "",
2480 ehc->i.serror & SERR_DATA ? "UnrecovData " : "",
2481 ehc->i.serror & SERR_PERSISTENT ? "Persist " : "",
2482 ehc->i.serror & SERR_PROTOCOL ? "Proto " : "",
2483 ehc->i.serror & SERR_INTERNAL ? "HostInt " : "",
2484 ehc->i.serror & SERR_PHYRDY_CHG ? "PHYRdyChg " : "",
2485 ehc->i.serror & SERR_PHY_INT_ERR ? "PHYInt " : "",
2486 ehc->i.serror & SERR_COMM_WAKE ? "CommWake " : "",
2487 ehc->i.serror & SERR_10B_8B_ERR ? "10B8B " : "",
2488 ehc->i.serror & SERR_DISPARITY ? "Dispar " : "",
2489 ehc->i.serror & SERR_CRC ? "BadCRC " : "",
2490 ehc->i.serror & SERR_HANDSHAKE ? "Handshk " : "",
2491 ehc->i.serror & SERR_LINK_SEQ_ERR ? "LinkSeq " : "",
2492 ehc->i.serror & SERR_TRANS_ST_ERROR ? "TrStaTrns " : "",
2493 ehc->i.serror & SERR_UNRECOG_FIS ? "UnrecFIS " : "",
2494 ehc->i.serror & SERR_DEV_XCHG ? "DevExch " : "");
2495#endif
2496
2497 ata_qc_for_each_raw(ap, qc, tag) {
2498 struct ata_taskfile *cmd = &qc->tf, *res = &qc->result_tf;
2499 char data_buf[20] = "";
2500 char cdb_buf[70] = "";
2501
2502 if (!(qc->flags & ATA_QCFLAG_FAILED) ||
2503 ata_dev_phys_link(qc->dev) != link || !qc->err_mask)
2504 continue;
2505
2506 if (qc->dma_dir != DMA_NONE) {
2507 static const char *dma_str[] = {
2508 [DMA_BIDIRECTIONAL] = "bidi",
2509 [DMA_TO_DEVICE] = "out",
2510 [DMA_FROM_DEVICE] = "in",
2511 };
2512 const char *prot_str = NULL;
2513
2514 switch (qc->tf.protocol) {
2515 case ATA_PROT_UNKNOWN:
2516 prot_str = "unknown";
2517 break;
2518 case ATA_PROT_NODATA:
2519 prot_str = "nodata";
2520 break;
2521 case ATA_PROT_PIO:
2522 prot_str = "pio";
2523 break;
2524 case ATA_PROT_DMA:
2525 prot_str = "dma";
2526 break;
2527 case ATA_PROT_NCQ:
2528 prot_str = "ncq dma";
2529 break;
2530 case ATA_PROT_NCQ_NODATA:
2531 prot_str = "ncq nodata";
2532 break;
2533 case ATAPI_PROT_NODATA:
2534 prot_str = "nodata";
2535 break;
2536 case ATAPI_PROT_PIO:
2537 prot_str = "pio";
2538 break;
2539 case ATAPI_PROT_DMA:
2540 prot_str = "dma";
2541 break;
2542 }
2543 snprintf(data_buf, sizeof(data_buf), " %s %u %s",
2544 prot_str, qc->nbytes, dma_str[qc->dma_dir]);
2545 }
2546
2547 if (ata_is_atapi(qc->tf.protocol)) {
2548 const u8 *cdb = qc->cdb;
2549 size_t cdb_len = qc->dev->cdb_len;
2550
2551 if (qc->scsicmd) {
2552 cdb = qc->scsicmd->cmnd;
2553 cdb_len = qc->scsicmd->cmd_len;
2554 }
2555 __scsi_format_command(cdb_buf, sizeof(cdb_buf),
2556 cdb, cdb_len);
2557 } else {
2558 const char *descr = ata_get_cmd_descript(cmd->command);
2559 if (descr)
2560 ata_dev_err(qc->dev, "failed command: %s\n",
2561 descr);
2562 }
2563
2564 ata_dev_err(qc->dev,
2565 "cmd %02x/%02x:%02x:%02x:%02x:%02x/%02x:%02x:%02x:%02x:%02x/%02x "
2566 "tag %d%s\n %s"
2567 "res %02x/%02x:%02x:%02x:%02x:%02x/%02x:%02x:%02x:%02x:%02x/%02x "
2568 "Emask 0x%x (%s)%s\n",
2569 cmd->command, cmd->feature, cmd->nsect,
2570 cmd->lbal, cmd->lbam, cmd->lbah,
2571 cmd->hob_feature, cmd->hob_nsect,
2572 cmd->hob_lbal, cmd->hob_lbam, cmd->hob_lbah,
2573 cmd->device, qc->tag, data_buf, cdb_buf,
2574 res->command, res->feature, res->nsect,
2575 res->lbal, res->lbam, res->lbah,
2576 res->hob_feature, res->hob_nsect,
2577 res->hob_lbal, res->hob_lbam, res->hob_lbah,
2578 res->device, qc->err_mask, ata_err_string(qc->err_mask),
2579 qc->err_mask & AC_ERR_NCQ ? " <F>" : "");
2580
2581#ifdef CONFIG_ATA_VERBOSE_ERROR
2582 if (res->command & (ATA_BUSY | ATA_DRDY | ATA_DF | ATA_DRQ |
2583 ATA_SENSE | ATA_ERR)) {
2584 if (res->command & ATA_BUSY)
2585 ata_dev_err(qc->dev, "status: { Busy }\n");
2586 else
2587 ata_dev_err(qc->dev, "status: { %s%s%s%s%s}\n",
2588 res->command & ATA_DRDY ? "DRDY " : "",
2589 res->command & ATA_DF ? "DF " : "",
2590 res->command & ATA_DRQ ? "DRQ " : "",
2591 res->command & ATA_SENSE ? "SENSE " : "",
2592 res->command & ATA_ERR ? "ERR " : "");
2593 }
2594
2595 if (cmd->command != ATA_CMD_PACKET &&
2596 (res->feature & (ATA_ICRC | ATA_UNC | ATA_AMNF |
2597 ATA_IDNF | ATA_ABORTED)))
2598 ata_dev_err(qc->dev, "error: { %s%s%s%s%s}\n",
2599 res->feature & ATA_ICRC ? "ICRC " : "",
2600 res->feature & ATA_UNC ? "UNC " : "",
2601 res->feature & ATA_AMNF ? "AMNF " : "",
2602 res->feature & ATA_IDNF ? "IDNF " : "",
2603 res->feature & ATA_ABORTED ? "ABRT " : "");
2604#endif
2605 }
2606}
2607
2608/**
2609 * ata_eh_report - report error handling to user
2610 * @ap: ATA port to report EH about
2611 *
2612 * Report EH to user.
2613 *
2614 * LOCKING:
2615 * None.
2616 */
2617void ata_eh_report(struct ata_port *ap)
2618{
2619 struct ata_link *link;
2620
2621 ata_for_each_link(link, ap, HOST_FIRST)
2622 ata_eh_link_report(link);
2623}
2624
2625static int ata_do_reset(struct ata_link *link, ata_reset_fn_t reset,
2626 unsigned int *classes, unsigned long deadline,
2627 bool clear_classes)
2628{
2629 struct ata_device *dev;
2630
2631 if (clear_classes)
2632 ata_for_each_dev(dev, link, ALL)
2633 classes[dev->devno] = ATA_DEV_UNKNOWN;
2634
2635 return reset(link, classes, deadline);
2636}
2637
2638static int ata_eh_followup_srst_needed(struct ata_link *link, int rc)
2639{
2640 if ((link->flags & ATA_LFLAG_NO_SRST) || ata_link_offline(link))
2641 return 0;
2642 if (rc == -EAGAIN)
2643 return 1;
2644 if (sata_pmp_supported(link->ap) && ata_is_host_link(link))
2645 return 1;
2646 return 0;
2647}
2648
2649int ata_eh_reset(struct ata_link *link, int classify,
2650 ata_prereset_fn_t prereset, ata_reset_fn_t softreset,
2651 ata_reset_fn_t hardreset, ata_postreset_fn_t postreset)
2652{
2653 struct ata_port *ap = link->ap;
2654 struct ata_link *slave = ap->slave_link;
2655 struct ata_eh_context *ehc = &link->eh_context;
2656 struct ata_eh_context *sehc = slave ? &slave->eh_context : NULL;
2657 unsigned int *classes = ehc->classes;
2658 unsigned int lflags = link->flags;
2659 int verbose = !(ehc->i.flags & ATA_EHI_QUIET);
2660 int max_tries = 0, try = 0;
2661 struct ata_link *failed_link;
2662 struct ata_device *dev;
2663 unsigned long deadline, now;
2664 ata_reset_fn_t reset;
2665 unsigned long flags;
2666 u32 sstatus;
2667 int nr_unknown, rc;
2668
2669 /*
2670 * Prepare to reset
2671 */
2672 while (ata_eh_reset_timeouts[max_tries] != ULONG_MAX)
2673 max_tries++;
2674 if (link->flags & ATA_LFLAG_RST_ONCE)
2675 max_tries = 1;
2676 if (link->flags & ATA_LFLAG_NO_HRST)
2677 hardreset = NULL;
2678 if (link->flags & ATA_LFLAG_NO_SRST)
2679 softreset = NULL;
2680
2681 /* make sure each reset attempt is at least COOL_DOWN apart */
2682 if (ehc->i.flags & ATA_EHI_DID_RESET) {
2683 now = jiffies;
2684 WARN_ON(time_after(ehc->last_reset, now));
2685 deadline = ata_deadline(ehc->last_reset,
2686 ATA_EH_RESET_COOL_DOWN);
2687 if (time_before(now, deadline))
2688 schedule_timeout_uninterruptible(deadline - now);
2689 }
2690
2691 spin_lock_irqsave(ap->lock, flags);
2692 ap->pflags |= ATA_PFLAG_RESETTING;
2693 spin_unlock_irqrestore(ap->lock, flags);
2694
2695 ata_eh_about_to_do(link, NULL, ATA_EH_RESET);
2696
2697 ata_for_each_dev(dev, link, ALL) {
2698 /* If we issue an SRST then an ATA drive (not ATAPI)
2699 * may change configuration and be in PIO0 timing. If
2700 * we do a hard reset (or are coming from power on)
2701 * this is true for ATA or ATAPI. Until we've set a
2702 * suitable controller mode we should not touch the
2703 * bus as we may be talking too fast.
2704 */
2705 dev->pio_mode = XFER_PIO_0;
2706 dev->dma_mode = 0xff;
2707
2708 /* If the controller has a pio mode setup function
2709 * then use it to set the chipset to rights. Don't
2710 * touch the DMA setup as that will be dealt with when
2711 * configuring devices.
2712 */
2713 if (ap->ops->set_piomode)
2714 ap->ops->set_piomode(ap, dev);
2715 }
2716
2717 /* prefer hardreset */
2718 reset = NULL;
2719 ehc->i.action &= ~ATA_EH_RESET;
2720 if (hardreset) {
2721 reset = hardreset;
2722 ehc->i.action |= ATA_EH_HARDRESET;
2723 } else if (softreset) {
2724 reset = softreset;
2725 ehc->i.action |= ATA_EH_SOFTRESET;
2726 }
2727
2728 if (prereset) {
2729 unsigned long deadline = ata_deadline(jiffies,
2730 ATA_EH_PRERESET_TIMEOUT);
2731
2732 if (slave) {
2733 sehc->i.action &= ~ATA_EH_RESET;
2734 sehc->i.action |= ehc->i.action;
2735 }
2736
2737 rc = prereset(link, deadline);
2738
2739 /* If present, do prereset on slave link too. Reset
2740 * is skipped iff both master and slave links report
2741 * -ENOENT or clear ATA_EH_RESET.
2742 */
2743 if (slave && (rc == 0 || rc == -ENOENT)) {
2744 int tmp;
2745
2746 tmp = prereset(slave, deadline);
2747 if (tmp != -ENOENT)
2748 rc = tmp;
2749
2750 ehc->i.action |= sehc->i.action;
2751 }
2752
2753 if (rc) {
2754 if (rc == -ENOENT) {
2755 ata_link_dbg(link, "port disabled--ignoring\n");
2756 ehc->i.action &= ~ATA_EH_RESET;
2757
2758 ata_for_each_dev(dev, link, ALL)
2759 classes[dev->devno] = ATA_DEV_NONE;
2760
2761 rc = 0;
2762 } else
2763 ata_link_err(link,
2764 "prereset failed (errno=%d)\n",
2765 rc);
2766 goto out;
2767 }
2768
2769 /* prereset() might have cleared ATA_EH_RESET. If so,
2770 * bang classes, thaw and return.
2771 */
2772 if (reset && !(ehc->i.action & ATA_EH_RESET)) {
2773 ata_for_each_dev(dev, link, ALL)
2774 classes[dev->devno] = ATA_DEV_NONE;
2775 if ((ap->pflags & ATA_PFLAG_FROZEN) &&
2776 ata_is_host_link(link))
2777 ata_eh_thaw_port(ap);
2778 rc = 0;
2779 goto out;
2780 }
2781 }
2782
2783 retry:
2784 /*
2785 * Perform reset
2786 */
2787 if (ata_is_host_link(link))
2788 ata_eh_freeze_port(ap);
2789
2790 deadline = ata_deadline(jiffies, ata_eh_reset_timeouts[try++]);
2791
2792 if (reset) {
2793 if (verbose)
2794 ata_link_info(link, "%s resetting link\n",
2795 reset == softreset ? "soft" : "hard");
2796
2797 /* mark that this EH session started with reset */
2798 ehc->last_reset = jiffies;
2799 if (reset == hardreset)
2800 ehc->i.flags |= ATA_EHI_DID_HARDRESET;
2801 else
2802 ehc->i.flags |= ATA_EHI_DID_SOFTRESET;
2803
2804 rc = ata_do_reset(link, reset, classes, deadline, true);
2805 if (rc && rc != -EAGAIN) {
2806 failed_link = link;
2807 goto fail;
2808 }
2809
2810 /* hardreset slave link if existent */
2811 if (slave && reset == hardreset) {
2812 int tmp;
2813
2814 if (verbose)
2815 ata_link_info(slave, "hard resetting link\n");
2816
2817 ata_eh_about_to_do(slave, NULL, ATA_EH_RESET);
2818 tmp = ata_do_reset(slave, reset, classes, deadline,
2819 false);
2820 switch (tmp) {
2821 case -EAGAIN:
2822 rc = -EAGAIN;
2823 case 0:
2824 break;
2825 default:
2826 failed_link = slave;
2827 rc = tmp;
2828 goto fail;
2829 }
2830 }
2831
2832 /* perform follow-up SRST if necessary */
2833 if (reset == hardreset &&
2834 ata_eh_followup_srst_needed(link, rc)) {
2835 reset = softreset;
2836
2837 if (!reset) {
2838 ata_link_err(link,
2839 "follow-up softreset required but no softreset available\n");
2840 failed_link = link;
2841 rc = -EINVAL;
2842 goto fail;
2843 }
2844
2845 ata_eh_about_to_do(link, NULL, ATA_EH_RESET);
2846 rc = ata_do_reset(link, reset, classes, deadline, true);
2847 if (rc) {
2848 failed_link = link;
2849 goto fail;
2850 }
2851 }
2852 } else {
2853 if (verbose)
2854 ata_link_info(link,
2855 "no reset method available, skipping reset\n");
2856 if (!(lflags & ATA_LFLAG_ASSUME_CLASS))
2857 lflags |= ATA_LFLAG_ASSUME_ATA;
2858 }
2859
2860 /*
2861 * Post-reset processing
2862 */
2863 ata_for_each_dev(dev, link, ALL) {
2864 /* After the reset, the device state is PIO 0 and the
2865 * controller state is undefined. Reset also wakes up
2866 * drives from sleeping mode.
2867 */
2868 dev->pio_mode = XFER_PIO_0;
2869 dev->flags &= ~ATA_DFLAG_SLEEPING;
2870
2871 if (ata_phys_link_offline(ata_dev_phys_link(dev)))
2872 continue;
2873
2874 /* apply class override */
2875 if (lflags & ATA_LFLAG_ASSUME_ATA)
2876 classes[dev->devno] = ATA_DEV_ATA;
2877 else if (lflags & ATA_LFLAG_ASSUME_SEMB)
2878 classes[dev->devno] = ATA_DEV_SEMB_UNSUP;
2879 }
2880
2881 /* record current link speed */
2882 if (sata_scr_read(link, SCR_STATUS, &sstatus) == 0)
2883 link->sata_spd = (sstatus >> 4) & 0xf;
2884 if (slave && sata_scr_read(slave, SCR_STATUS, &sstatus) == 0)
2885 slave->sata_spd = (sstatus >> 4) & 0xf;
2886
2887 /* thaw the port */
2888 if (ata_is_host_link(link))
2889 ata_eh_thaw_port(ap);
2890
2891 /* postreset() should clear hardware SError. Although SError
2892 * is cleared during link resume, clearing SError here is
2893 * necessary as some PHYs raise hotplug events after SRST.
2894 * This introduces race condition where hotplug occurs between
2895 * reset and here. This race is mediated by cross checking
2896 * link onlineness and classification result later.
2897 */
2898 if (postreset) {
2899 postreset(link, classes);
2900 if (slave)
2901 postreset(slave, classes);
2902 }
2903
2904 /* clear cached SError */
2905 spin_lock_irqsave(link->ap->lock, flags);
2906 link->eh_info.serror = 0;
2907 if (slave)
2908 slave->eh_info.serror = 0;
2909 spin_unlock_irqrestore(link->ap->lock, flags);
2910
2911 if (ap->pflags & ATA_PFLAG_FROZEN)
2912 ata_eh_thaw_port(ap);
2913
2914 /*
2915 * Make sure onlineness and classification result correspond.
2916 * Hotplug could have happened during reset and some
2917 * controllers fail to wait while a drive is spinning up after
2918 * being hotplugged causing misdetection. By cross checking
2919 * link on/offlineness and classification result, those
2920 * conditions can be reliably detected and retried.
2921 */
2922 nr_unknown = 0;
2923 ata_for_each_dev(dev, link, ALL) {
2924 if (ata_phys_link_online(ata_dev_phys_link(dev))) {
2925 if (classes[dev->devno] == ATA_DEV_UNKNOWN) {
2926 ata_dev_dbg(dev, "link online but device misclassified\n");
2927 classes[dev->devno] = ATA_DEV_NONE;
2928 nr_unknown++;
2929 }
2930 } else if (ata_phys_link_offline(ata_dev_phys_link(dev))) {
2931 if (ata_class_enabled(classes[dev->devno]))
2932 ata_dev_dbg(dev,
2933 "link offline, clearing class %d to NONE\n",
2934 classes[dev->devno]);
2935 classes[dev->devno] = ATA_DEV_NONE;
2936 } else if (classes[dev->devno] == ATA_DEV_UNKNOWN) {
2937 ata_dev_dbg(dev,
2938 "link status unknown, clearing UNKNOWN to NONE\n");
2939 classes[dev->devno] = ATA_DEV_NONE;
2940 }
2941 }
2942
2943 if (classify && nr_unknown) {
2944 if (try < max_tries) {
2945 ata_link_warn(link,
2946 "link online but %d devices misclassified, retrying\n",
2947 nr_unknown);
2948 failed_link = link;
2949 rc = -EAGAIN;
2950 goto fail;
2951 }
2952 ata_link_warn(link,
2953 "link online but %d devices misclassified, "
2954 "device detection might fail\n", nr_unknown);
2955 }
2956
2957 /* reset successful, schedule revalidation */
2958 ata_eh_done(link, NULL, ATA_EH_RESET);
2959 if (slave)
2960 ata_eh_done(slave, NULL, ATA_EH_RESET);
2961 ehc->last_reset = jiffies; /* update to completion time */
2962 ehc->i.action |= ATA_EH_REVALIDATE;
2963 link->lpm_policy = ATA_LPM_UNKNOWN; /* reset LPM state */
2964
2965 rc = 0;
2966 out:
2967 /* clear hotplug flag */
2968 ehc->i.flags &= ~ATA_EHI_HOTPLUGGED;
2969 if (slave)
2970 sehc->i.flags &= ~ATA_EHI_HOTPLUGGED;
2971
2972 spin_lock_irqsave(ap->lock, flags);
2973 ap->pflags &= ~ATA_PFLAG_RESETTING;
2974 spin_unlock_irqrestore(ap->lock, flags);
2975
2976 return rc;
2977
2978 fail:
2979 /* if SCR isn't accessible on a fan-out port, PMP needs to be reset */
2980 if (!ata_is_host_link(link) &&
2981 sata_scr_read(link, SCR_STATUS, &sstatus))
2982 rc = -ERESTART;
2983
2984 if (try >= max_tries) {
2985 /*
2986 * Thaw host port even if reset failed, so that the port
2987 * can be retried on the next phy event. This risks
2988 * repeated EH runs but seems to be a better tradeoff than
2989 * shutting down a port after a botched hotplug attempt.
2990 */
2991 if (ata_is_host_link(link))
2992 ata_eh_thaw_port(ap);
2993 goto out;
2994 }
2995
2996 now = jiffies;
2997 if (time_before(now, deadline)) {
2998 unsigned long delta = deadline - now;
2999
3000 ata_link_warn(failed_link,
3001 "reset failed (errno=%d), retrying in %u secs\n",
3002 rc, DIV_ROUND_UP(jiffies_to_msecs(delta), 1000));
3003
3004 ata_eh_release(ap);
3005 while (delta)
3006 delta = schedule_timeout_uninterruptible(delta);
3007 ata_eh_acquire(ap);
3008 }
3009
3010 /*
3011 * While disks spinup behind PMP, some controllers fail sending SRST.
3012 * They need to be reset - as well as the PMP - before retrying.
3013 */
3014 if (rc == -ERESTART) {
3015 if (ata_is_host_link(link))
3016 ata_eh_thaw_port(ap);
3017 goto out;
3018 }
3019
3020 if (try == max_tries - 1) {
3021 sata_down_spd_limit(link, 0);
3022 if (slave)
3023 sata_down_spd_limit(slave, 0);
3024 } else if (rc == -EPIPE)
3025 sata_down_spd_limit(failed_link, 0);
3026
3027 if (hardreset)
3028 reset = hardreset;
3029 goto retry;
3030}
3031
3032static inline void ata_eh_pull_park_action(struct ata_port *ap)
3033{
3034 struct ata_link *link;
3035 struct ata_device *dev;
3036 unsigned long flags;
3037
3038 /*
3039 * This function can be thought of as an extended version of
3040 * ata_eh_about_to_do() specially crafted to accommodate the
3041 * requirements of ATA_EH_PARK handling. Since the EH thread
3042 * does not leave the do {} while () loop in ata_eh_recover as
3043 * long as the timeout for a park request to *one* device on
3044 * the port has not expired, and since we still want to pick
3045 * up park requests to other devices on the same port or
3046 * timeout updates for the same device, we have to pull
3047 * ATA_EH_PARK actions from eh_info into eh_context.i
3048 * ourselves at the beginning of each pass over the loop.
3049 *
3050 * Additionally, all write accesses to &ap->park_req_pending
3051 * through reinit_completion() (see below) or complete_all()
3052 * (see ata_scsi_park_store()) are protected by the host lock.
3053 * As a result we have that park_req_pending.done is zero on
3054 * exit from this function, i.e. when ATA_EH_PARK actions for
3055 * *all* devices on port ap have been pulled into the
3056 * respective eh_context structs. If, and only if,
3057 * park_req_pending.done is non-zero by the time we reach
3058 * wait_for_completion_timeout(), another ATA_EH_PARK action
3059 * has been scheduled for at least one of the devices on port
3060 * ap and we have to cycle over the do {} while () loop in
3061 * ata_eh_recover() again.
3062 */
3063
3064 spin_lock_irqsave(ap->lock, flags);
3065 reinit_completion(&ap->park_req_pending);
3066 ata_for_each_link(link, ap, EDGE) {
3067 ata_for_each_dev(dev, link, ALL) {
3068 struct ata_eh_info *ehi = &link->eh_info;
3069
3070 link->eh_context.i.dev_action[dev->devno] |=
3071 ehi->dev_action[dev->devno] & ATA_EH_PARK;
3072 ata_eh_clear_action(link, dev, ehi, ATA_EH_PARK);
3073 }
3074 }
3075 spin_unlock_irqrestore(ap->lock, flags);
3076}
3077
3078static void ata_eh_park_issue_cmd(struct ata_device *dev, int park)
3079{
3080 struct ata_eh_context *ehc = &dev->link->eh_context;
3081 struct ata_taskfile tf;
3082 unsigned int err_mask;
3083
3084 ata_tf_init(dev, &tf);
3085 if (park) {
3086 ehc->unloaded_mask |= 1 << dev->devno;
3087 tf.command = ATA_CMD_IDLEIMMEDIATE;
3088 tf.feature = 0x44;
3089 tf.lbal = 0x4c;
3090 tf.lbam = 0x4e;
3091 tf.lbah = 0x55;
3092 } else {
3093 ehc->unloaded_mask &= ~(1 << dev->devno);
3094 tf.command = ATA_CMD_CHK_POWER;
3095 }
3096
3097 tf.flags |= ATA_TFLAG_DEVICE | ATA_TFLAG_ISADDR;
3098 tf.protocol = ATA_PROT_NODATA;
3099 err_mask = ata_exec_internal(dev, &tf, NULL, DMA_NONE, NULL, 0, 0);
3100 if (park && (err_mask || tf.lbal != 0xc4)) {
3101 ata_dev_err(dev, "head unload failed!\n");
3102 ehc->unloaded_mask &= ~(1 << dev->devno);
3103 }
3104}
3105
3106static int ata_eh_revalidate_and_attach(struct ata_link *link,
3107 struct ata_device **r_failed_dev)
3108{
3109 struct ata_port *ap = link->ap;
3110 struct ata_eh_context *ehc = &link->eh_context;
3111 struct ata_device *dev;
3112 unsigned int new_mask = 0;
3113 unsigned long flags;
3114 int rc = 0;
3115
3116 DPRINTK("ENTER\n");
3117
3118 /* For PATA drive side cable detection to work, IDENTIFY must
3119 * be done backwards such that PDIAG- is released by the slave
3120 * device before the master device is identified.
3121 */
3122 ata_for_each_dev(dev, link, ALL_REVERSE) {
3123 unsigned int action = ata_eh_dev_action(dev);
3124 unsigned int readid_flags = 0;
3125
3126 if (ehc->i.flags & ATA_EHI_DID_RESET)
3127 readid_flags |= ATA_READID_POSTRESET;
3128
3129 if ((action & ATA_EH_REVALIDATE) && ata_dev_enabled(dev)) {
3130 WARN_ON(dev->class == ATA_DEV_PMP);
3131
3132 if (ata_phys_link_offline(ata_dev_phys_link(dev))) {
3133 rc = -EIO;
3134 goto err;
3135 }
3136
3137 ata_eh_about_to_do(link, dev, ATA_EH_REVALIDATE);
3138 rc = ata_dev_revalidate(dev, ehc->classes[dev->devno],
3139 readid_flags);
3140 if (rc)
3141 goto err;
3142
3143 ata_eh_done(link, dev, ATA_EH_REVALIDATE);
3144
3145 /* Configuration may have changed, reconfigure
3146 * transfer mode.
3147 */
3148 ehc->i.flags |= ATA_EHI_SETMODE;
3149
3150 /* schedule the scsi_rescan_device() here */
3151 schedule_work(&(ap->scsi_rescan_task));
3152 } else if (dev->class == ATA_DEV_UNKNOWN &&
3153 ehc->tries[dev->devno] &&
3154 ata_class_enabled(ehc->classes[dev->devno])) {
3155 /* Temporarily set dev->class, it will be
3156 * permanently set once all configurations are
3157 * complete. This is necessary because new
3158 * device configuration is done in two
3159 * separate loops.
3160 */
3161 dev->class = ehc->classes[dev->devno];
3162
3163 if (dev->class == ATA_DEV_PMP)
3164 rc = sata_pmp_attach(dev);
3165 else
3166 rc = ata_dev_read_id(dev, &dev->class,
3167 readid_flags, dev->id);
3168
3169 /* read_id might have changed class, store and reset */
3170 ehc->classes[dev->devno] = dev->class;
3171 dev->class = ATA_DEV_UNKNOWN;
3172
3173 switch (rc) {
3174 case 0:
3175 /* clear error info accumulated during probe */
3176 ata_ering_clear(&dev->ering);
3177 new_mask |= 1 << dev->devno;
3178 break;
3179 case -ENOENT:
3180 /* IDENTIFY was issued to non-existent
3181 * device. No need to reset. Just
3182 * thaw and ignore the device.
3183 */
3184 ata_eh_thaw_port(ap);
3185 break;
3186 default:
3187 goto err;
3188 }
3189 }
3190 }
3191
3192 /* PDIAG- should have been released, ask cable type if post-reset */
3193 if ((ehc->i.flags & ATA_EHI_DID_RESET) && ata_is_host_link(link)) {
3194 if (ap->ops->cable_detect)
3195 ap->cbl = ap->ops->cable_detect(ap);
3196 ata_force_cbl(ap);
3197 }
3198
3199 /* Configure new devices forward such that user doesn't see
3200 * device detection messages backwards.
3201 */
3202 ata_for_each_dev(dev, link, ALL) {
3203 if (!(new_mask & (1 << dev->devno)))
3204 continue;
3205
3206 dev->class = ehc->classes[dev->devno];
3207
3208 if (dev->class == ATA_DEV_PMP)
3209 continue;
3210
3211 ehc->i.flags |= ATA_EHI_PRINTINFO;
3212 rc = ata_dev_configure(dev);
3213 ehc->i.flags &= ~ATA_EHI_PRINTINFO;
3214 if (rc) {
3215 dev->class = ATA_DEV_UNKNOWN;
3216 goto err;
3217 }
3218
3219 spin_lock_irqsave(ap->lock, flags);
3220 ap->pflags |= ATA_PFLAG_SCSI_HOTPLUG;
3221 spin_unlock_irqrestore(ap->lock, flags);
3222
3223 /* new device discovered, configure xfermode */
3224 ehc->i.flags |= ATA_EHI_SETMODE;
3225 }
3226
3227 return 0;
3228
3229 err:
3230 *r_failed_dev = dev;
3231 DPRINTK("EXIT rc=%d\n", rc);
3232 return rc;
3233}
3234
3235/**
3236 * ata_set_mode - Program timings and issue SET FEATURES - XFER
3237 * @link: link on which timings will be programmed
3238 * @r_failed_dev: out parameter for failed device
3239 *
3240 * Set ATA device disk transfer mode (PIO3, UDMA6, etc.). If
3241 * ata_set_mode() fails, pointer to the failing device is
3242 * returned in @r_failed_dev.
3243 *
3244 * LOCKING:
3245 * PCI/etc. bus probe sem.
3246 *
3247 * RETURNS:
3248 * 0 on success, negative errno otherwise
3249 */
3250int ata_set_mode(struct ata_link *link, struct ata_device **r_failed_dev)
3251{
3252 struct ata_port *ap = link->ap;
3253 struct ata_device *dev;
3254 int rc;
3255
3256 /* if data transfer is verified, clear DUBIOUS_XFER on ering top */
3257 ata_for_each_dev(dev, link, ENABLED) {
3258 if (!(dev->flags & ATA_DFLAG_DUBIOUS_XFER)) {
3259 struct ata_ering_entry *ent;
3260
3261 ent = ata_ering_top(&dev->ering);
3262 if (ent)
3263 ent->eflags &= ~ATA_EFLAG_DUBIOUS_XFER;
3264 }
3265 }
3266
3267 /* has private set_mode? */
3268 if (ap->ops->set_mode)
3269 rc = ap->ops->set_mode(link, r_failed_dev);
3270 else
3271 rc = ata_do_set_mode(link, r_failed_dev);
3272
3273 /* if transfer mode has changed, set DUBIOUS_XFER on device */
3274 ata_for_each_dev(dev, link, ENABLED) {
3275 struct ata_eh_context *ehc = &link->eh_context;
3276 u8 saved_xfer_mode = ehc->saved_xfer_mode[dev->devno];
3277 u8 saved_ncq = !!(ehc->saved_ncq_enabled & (1 << dev->devno));
3278
3279 if (dev->xfer_mode != saved_xfer_mode ||
3280 ata_ncq_enabled(dev) != saved_ncq)
3281 dev->flags |= ATA_DFLAG_DUBIOUS_XFER;
3282 }
3283
3284 return rc;
3285}
3286
3287/**
3288 * atapi_eh_clear_ua - Clear ATAPI UNIT ATTENTION after reset
3289 * @dev: ATAPI device to clear UA for
3290 *
3291 * Resets and other operations can make an ATAPI device raise
3292 * UNIT ATTENTION which causes the next operation to fail. This
3293 * function clears UA.
3294 *
3295 * LOCKING:
3296 * EH context (may sleep).
3297 *
3298 * RETURNS:
3299 * 0 on success, -errno on failure.
3300 */
3301static int atapi_eh_clear_ua(struct ata_device *dev)
3302{
3303 int i;
3304
3305 for (i = 0; i < ATA_EH_UA_TRIES; i++) {
3306 u8 *sense_buffer = dev->link->ap->sector_buf;
3307 u8 sense_key = 0;
3308 unsigned int err_mask;
3309
3310 err_mask = atapi_eh_tur(dev, &sense_key);
3311 if (err_mask != 0 && err_mask != AC_ERR_DEV) {
3312 ata_dev_warn(dev,
3313 "TEST_UNIT_READY failed (err_mask=0x%x)\n",
3314 err_mask);
3315 return -EIO;
3316 }
3317
3318 if (!err_mask || sense_key != UNIT_ATTENTION)
3319 return 0;
3320
3321 err_mask = atapi_eh_request_sense(dev, sense_buffer, sense_key);
3322 if (err_mask) {
3323 ata_dev_warn(dev, "failed to clear "
3324 "UNIT ATTENTION (err_mask=0x%x)\n", err_mask);
3325 return -EIO;
3326 }
3327 }
3328
3329 ata_dev_warn(dev, "UNIT ATTENTION persists after %d tries\n",
3330 ATA_EH_UA_TRIES);
3331
3332 return 0;
3333}
3334
3335/**
3336 * ata_eh_maybe_retry_flush - Retry FLUSH if necessary
3337 * @dev: ATA device which may need FLUSH retry
3338 *
3339 * If @dev failed FLUSH, it needs to be reported upper layer
3340 * immediately as it means that @dev failed to remap and already
3341 * lost at least a sector and further FLUSH retrials won't make
3342 * any difference to the lost sector. However, if FLUSH failed
3343 * for other reasons, for example transmission error, FLUSH needs
3344 * to be retried.
3345 *
3346 * This function determines whether FLUSH failure retry is
3347 * necessary and performs it if so.
3348 *
3349 * RETURNS:
3350 * 0 if EH can continue, -errno if EH needs to be repeated.
3351 */
3352static int ata_eh_maybe_retry_flush(struct ata_device *dev)
3353{
3354 struct ata_link *link = dev->link;
3355 struct ata_port *ap = link->ap;
3356 struct ata_queued_cmd *qc;
3357 struct ata_taskfile tf;
3358 unsigned int err_mask;
3359 int rc = 0;
3360
3361 /* did flush fail for this device? */
3362 if (!ata_tag_valid(link->active_tag))
3363 return 0;
3364
3365 qc = __ata_qc_from_tag(ap, link->active_tag);
3366 if (qc->dev != dev || (qc->tf.command != ATA_CMD_FLUSH_EXT &&
3367 qc->tf.command != ATA_CMD_FLUSH))
3368 return 0;
3369
3370 /* if the device failed it, it should be reported to upper layers */
3371 if (qc->err_mask & AC_ERR_DEV)
3372 return 0;
3373
3374 /* flush failed for some other reason, give it another shot */
3375 ata_tf_init(dev, &tf);
3376
3377 tf.command = qc->tf.command;
3378 tf.flags |= ATA_TFLAG_DEVICE;
3379 tf.protocol = ATA_PROT_NODATA;
3380
3381 ata_dev_warn(dev, "retrying FLUSH 0x%x Emask 0x%x\n",
3382 tf.command, qc->err_mask);
3383
3384 err_mask = ata_exec_internal(dev, &tf, NULL, DMA_NONE, NULL, 0, 0);
3385 if (!err_mask) {
3386 /*
3387 * FLUSH is complete but there's no way to
3388 * successfully complete a failed command from EH.
3389 * Making sure retry is allowed at least once and
3390 * retrying it should do the trick - whatever was in
3391 * the cache is already on the platter and this won't
3392 * cause infinite loop.
3393 */
3394 qc->scsicmd->allowed = max(qc->scsicmd->allowed, 1);
3395 } else {
3396 ata_dev_warn(dev, "FLUSH failed Emask 0x%x\n",
3397 err_mask);
3398 rc = -EIO;
3399
3400 /* if device failed it, report it to upper layers */
3401 if (err_mask & AC_ERR_DEV) {
3402 qc->err_mask |= AC_ERR_DEV;
3403 qc->result_tf = tf;
3404 if (!(ap->pflags & ATA_PFLAG_FROZEN))
3405 rc = 0;
3406 }
3407 }
3408 return rc;
3409}
3410
3411/**
3412 * ata_eh_set_lpm - configure SATA interface power management
3413 * @link: link to configure power management
3414 * @policy: the link power management policy
3415 * @r_failed_dev: out parameter for failed device
3416 *
3417 * Enable SATA Interface power management. This will enable
3418 * Device Interface Power Management (DIPM) for min_power and
3419 * medium_power_with_dipm policies, and then call driver specific
3420 * callbacks for enabling Host Initiated Power management.
3421 *
3422 * LOCKING:
3423 * EH context.
3424 *
3425 * RETURNS:
3426 * 0 on success, -errno on failure.
3427 */
3428static int ata_eh_set_lpm(struct ata_link *link, enum ata_lpm_policy policy,
3429 struct ata_device **r_failed_dev)
3430{
3431 struct ata_port *ap = ata_is_host_link(link) ? link->ap : NULL;
3432 struct ata_eh_context *ehc = &link->eh_context;
3433 struct ata_device *dev, *link_dev = NULL, *lpm_dev = NULL;
3434 enum ata_lpm_policy old_policy = link->lpm_policy;
3435 bool no_dipm = link->ap->flags & ATA_FLAG_NO_DIPM;
3436 unsigned int hints = ATA_LPM_EMPTY | ATA_LPM_HIPM;
3437 unsigned int err_mask;
3438 int rc;
3439
3440 /* if the link or host doesn't do LPM, noop */
3441 if ((link->flags & ATA_LFLAG_NO_LPM) || (ap && !ap->ops->set_lpm))
3442 return 0;
3443
3444 /*
3445 * DIPM is enabled only for MIN_POWER as some devices
3446 * misbehave when the host NACKs transition to SLUMBER. Order
3447 * device and link configurations such that the host always
3448 * allows DIPM requests.
3449 */
3450 ata_for_each_dev(dev, link, ENABLED) {
3451 bool hipm = ata_id_has_hipm(dev->id);
3452 bool dipm = ata_id_has_dipm(dev->id) && !no_dipm;
3453
3454 /* find the first enabled and LPM enabled devices */
3455 if (!link_dev)
3456 link_dev = dev;
3457
3458 if (!lpm_dev && (hipm || dipm))
3459 lpm_dev = dev;
3460
3461 hints &= ~ATA_LPM_EMPTY;
3462 if (!hipm)
3463 hints &= ~ATA_LPM_HIPM;
3464
3465 /* disable DIPM before changing link config */
3466 if (policy < ATA_LPM_MED_POWER_WITH_DIPM && dipm) {
3467 err_mask = ata_dev_set_feature(dev,
3468 SETFEATURES_SATA_DISABLE, SATA_DIPM);
3469 if (err_mask && err_mask != AC_ERR_DEV) {
3470 ata_dev_warn(dev,
3471 "failed to disable DIPM, Emask 0x%x\n",
3472 err_mask);
3473 rc = -EIO;
3474 goto fail;
3475 }
3476 }
3477 }
3478
3479 if (ap) {
3480 rc = ap->ops->set_lpm(link, policy, hints);
3481 if (!rc && ap->slave_link)
3482 rc = ap->ops->set_lpm(ap->slave_link, policy, hints);
3483 } else
3484 rc = sata_pmp_set_lpm(link, policy, hints);
3485
3486 /*
3487 * Attribute link config failure to the first (LPM) enabled
3488 * device on the link.
3489 */
3490 if (rc) {
3491 if (rc == -EOPNOTSUPP) {
3492 link->flags |= ATA_LFLAG_NO_LPM;
3493 return 0;
3494 }
3495 dev = lpm_dev ? lpm_dev : link_dev;
3496 goto fail;
3497 }
3498
3499 /*
3500 * Low level driver acked the transition. Issue DIPM command
3501 * with the new policy set.
3502 */
3503 link->lpm_policy = policy;
3504 if (ap && ap->slave_link)
3505 ap->slave_link->lpm_policy = policy;
3506
3507 /* host config updated, enable DIPM if transitioning to MIN_POWER */
3508 ata_for_each_dev(dev, link, ENABLED) {
3509 if (policy >= ATA_LPM_MED_POWER_WITH_DIPM && !no_dipm &&
3510 ata_id_has_dipm(dev->id)) {
3511 err_mask = ata_dev_set_feature(dev,
3512 SETFEATURES_SATA_ENABLE, SATA_DIPM);
3513 if (err_mask && err_mask != AC_ERR_DEV) {
3514 ata_dev_warn(dev,
3515 "failed to enable DIPM, Emask 0x%x\n",
3516 err_mask);
3517 rc = -EIO;
3518 goto fail;
3519 }
3520 }
3521 }
3522
3523 link->last_lpm_change = jiffies;
3524 link->flags |= ATA_LFLAG_CHANGED;
3525
3526 return 0;
3527
3528fail:
3529 /* restore the old policy */
3530 link->lpm_policy = old_policy;
3531 if (ap && ap->slave_link)
3532 ap->slave_link->lpm_policy = old_policy;
3533
3534 /* if no device or only one more chance is left, disable LPM */
3535 if (!dev || ehc->tries[dev->devno] <= 2) {
3536 ata_link_warn(link, "disabling LPM on the link\n");
3537 link->flags |= ATA_LFLAG_NO_LPM;
3538 }
3539 if (r_failed_dev)
3540 *r_failed_dev = dev;
3541 return rc;
3542}
3543
3544int ata_link_nr_enabled(struct ata_link *link)
3545{
3546 struct ata_device *dev;
3547 int cnt = 0;
3548
3549 ata_for_each_dev(dev, link, ENABLED)
3550 cnt++;
3551 return cnt;
3552}
3553
3554static int ata_link_nr_vacant(struct ata_link *link)
3555{
3556 struct ata_device *dev;
3557 int cnt = 0;
3558
3559 ata_for_each_dev(dev, link, ALL)
3560 if (dev->class == ATA_DEV_UNKNOWN)
3561 cnt++;
3562 return cnt;
3563}
3564
3565static int ata_eh_skip_recovery(struct ata_link *link)
3566{
3567 struct ata_port *ap = link->ap;
3568 struct ata_eh_context *ehc = &link->eh_context;
3569 struct ata_device *dev;
3570
3571 /* skip disabled links */
3572 if (link->flags & ATA_LFLAG_DISABLED)
3573 return 1;
3574
3575 /* skip if explicitly requested */
3576 if (ehc->i.flags & ATA_EHI_NO_RECOVERY)
3577 return 1;
3578
3579 /* thaw frozen port and recover failed devices */
3580 if ((ap->pflags & ATA_PFLAG_FROZEN) || ata_link_nr_enabled(link))
3581 return 0;
3582
3583 /* reset at least once if reset is requested */
3584 if ((ehc->i.action & ATA_EH_RESET) &&
3585 !(ehc->i.flags & ATA_EHI_DID_RESET))
3586 return 0;
3587
3588 /* skip if class codes for all vacant slots are ATA_DEV_NONE */
3589 ata_for_each_dev(dev, link, ALL) {
3590 if (dev->class == ATA_DEV_UNKNOWN &&
3591 ehc->classes[dev->devno] != ATA_DEV_NONE)
3592 return 0;
3593 }
3594
3595 return 1;
3596}
3597
3598static int ata_count_probe_trials_cb(struct ata_ering_entry *ent, void *void_arg)
3599{
3600 u64 interval = msecs_to_jiffies(ATA_EH_PROBE_TRIAL_INTERVAL);
3601 u64 now = get_jiffies_64();
3602 int *trials = void_arg;
3603
3604 if ((ent->eflags & ATA_EFLAG_OLD_ER) ||
3605 (ent->timestamp < now - min(now, interval)))
3606 return -1;
3607
3608 (*trials)++;
3609 return 0;
3610}
3611
3612static int ata_eh_schedule_probe(struct ata_device *dev)
3613{
3614 struct ata_eh_context *ehc = &dev->link->eh_context;
3615 struct ata_link *link = ata_dev_phys_link(dev);
3616 int trials = 0;
3617
3618 if (!(ehc->i.probe_mask & (1 << dev->devno)) ||
3619 (ehc->did_probe_mask & (1 << dev->devno)))
3620 return 0;
3621
3622 ata_eh_detach_dev(dev);
3623 ata_dev_init(dev);
3624 ehc->did_probe_mask |= (1 << dev->devno);
3625 ehc->i.action |= ATA_EH_RESET;
3626 ehc->saved_xfer_mode[dev->devno] = 0;
3627 ehc->saved_ncq_enabled &= ~(1 << dev->devno);
3628
3629 /* the link maybe in a deep sleep, wake it up */
3630 if (link->lpm_policy > ATA_LPM_MAX_POWER) {
3631 if (ata_is_host_link(link))
3632 link->ap->ops->set_lpm(link, ATA_LPM_MAX_POWER,
3633 ATA_LPM_EMPTY);
3634 else
3635 sata_pmp_set_lpm(link, ATA_LPM_MAX_POWER,
3636 ATA_LPM_EMPTY);
3637 }
3638
3639 /* Record and count probe trials on the ering. The specific
3640 * error mask used is irrelevant. Because a successful device
3641 * detection clears the ering, this count accumulates only if
3642 * there are consecutive failed probes.
3643 *
3644 * If the count is equal to or higher than ATA_EH_PROBE_TRIALS
3645 * in the last ATA_EH_PROBE_TRIAL_INTERVAL, link speed is
3646 * forced to 1.5Gbps.
3647 *
3648 * This is to work around cases where failed link speed
3649 * negotiation results in device misdetection leading to
3650 * infinite DEVXCHG or PHRDY CHG events.
3651 */
3652 ata_ering_record(&dev->ering, 0, AC_ERR_OTHER);
3653 ata_ering_map(&dev->ering, ata_count_probe_trials_cb, &trials);
3654
3655 if (trials > ATA_EH_PROBE_TRIALS)
3656 sata_down_spd_limit(link, 1);
3657
3658 return 1;
3659}
3660
3661static int ata_eh_handle_dev_fail(struct ata_device *dev, int err)
3662{
3663 struct ata_eh_context *ehc = &dev->link->eh_context;
3664
3665 /* -EAGAIN from EH routine indicates retry without prejudice.
3666 * The requester is responsible for ensuring forward progress.
3667 */
3668 if (err != -EAGAIN)
3669 ehc->tries[dev->devno]--;
3670
3671 switch (err) {
3672 case -ENODEV:
3673 /* device missing or wrong IDENTIFY data, schedule probing */
3674 ehc->i.probe_mask |= (1 << dev->devno);
3675 /* fall through */
3676 case -EINVAL:
3677 /* give it just one more chance */
3678 ehc->tries[dev->devno] = min(ehc->tries[dev->devno], 1);
3679 /* fall through */
3680 case -EIO:
3681 if (ehc->tries[dev->devno] == 1) {
3682 /* This is the last chance, better to slow
3683 * down than lose it.
3684 */
3685 sata_down_spd_limit(ata_dev_phys_link(dev), 0);
3686 if (dev->pio_mode > XFER_PIO_0)
3687 ata_down_xfermask_limit(dev, ATA_DNXFER_PIO);
3688 }
3689 }
3690
3691 if (ata_dev_enabled(dev) && !ehc->tries[dev->devno]) {
3692 /* disable device if it has used up all its chances */
3693 ata_dev_disable(dev);
3694
3695 /* detach if offline */
3696 if (ata_phys_link_offline(ata_dev_phys_link(dev)))
3697 ata_eh_detach_dev(dev);
3698
3699 /* schedule probe if necessary */
3700 if (ata_eh_schedule_probe(dev)) {
3701 ehc->tries[dev->devno] = ATA_EH_DEV_TRIES;
3702 memset(ehc->cmd_timeout_idx[dev->devno], 0,
3703 sizeof(ehc->cmd_timeout_idx[dev->devno]));
3704 }
3705
3706 return 1;
3707 } else {
3708 ehc->i.action |= ATA_EH_RESET;
3709 return 0;
3710 }
3711}
3712
3713/**
3714 * ata_eh_recover - recover host port after error
3715 * @ap: host port to recover
3716 * @prereset: prereset method (can be NULL)
3717 * @softreset: softreset method (can be NULL)
3718 * @hardreset: hardreset method (can be NULL)
3719 * @postreset: postreset method (can be NULL)
3720 * @r_failed_link: out parameter for failed link
3721 *
3722 * This is the alpha and omega, eum and yang, heart and soul of
3723 * libata exception handling. On entry, actions required to
3724 * recover each link and hotplug requests are recorded in the
3725 * link's eh_context. This function executes all the operations
3726 * with appropriate retrials and fallbacks to resurrect failed
3727 * devices, detach goners and greet newcomers.
3728 *
3729 * LOCKING:
3730 * Kernel thread context (may sleep).
3731 *
3732 * RETURNS:
3733 * 0 on success, -errno on failure.
3734 */
3735int ata_eh_recover(struct ata_port *ap, ata_prereset_fn_t prereset,
3736 ata_reset_fn_t softreset, ata_reset_fn_t hardreset,
3737 ata_postreset_fn_t postreset,
3738 struct ata_link **r_failed_link)
3739{
3740 struct ata_link *link;
3741 struct ata_device *dev;
3742 int rc, nr_fails;
3743 unsigned long flags, deadline;
3744
3745 DPRINTK("ENTER\n");
3746
3747 /* prep for recovery */
3748 ata_for_each_link(link, ap, EDGE) {
3749 struct ata_eh_context *ehc = &link->eh_context;
3750
3751 /* re-enable link? */
3752 if (ehc->i.action & ATA_EH_ENABLE_LINK) {
3753 ata_eh_about_to_do(link, NULL, ATA_EH_ENABLE_LINK);
3754 spin_lock_irqsave(ap->lock, flags);
3755 link->flags &= ~ATA_LFLAG_DISABLED;
3756 spin_unlock_irqrestore(ap->lock, flags);
3757 ata_eh_done(link, NULL, ATA_EH_ENABLE_LINK);
3758 }
3759
3760 ata_for_each_dev(dev, link, ALL) {
3761 if (link->flags & ATA_LFLAG_NO_RETRY)
3762 ehc->tries[dev->devno] = 1;
3763 else
3764 ehc->tries[dev->devno] = ATA_EH_DEV_TRIES;
3765
3766 /* collect port action mask recorded in dev actions */
3767 ehc->i.action |= ehc->i.dev_action[dev->devno] &
3768 ~ATA_EH_PERDEV_MASK;
3769 ehc->i.dev_action[dev->devno] &= ATA_EH_PERDEV_MASK;
3770
3771 /* process hotplug request */
3772 if (dev->flags & ATA_DFLAG_DETACH)
3773 ata_eh_detach_dev(dev);
3774
3775 /* schedule probe if necessary */
3776 if (!ata_dev_enabled(dev))
3777 ata_eh_schedule_probe(dev);
3778 }
3779 }
3780
3781 retry:
3782 rc = 0;
3783
3784 /* if UNLOADING, finish immediately */
3785 if (ap->pflags & ATA_PFLAG_UNLOADING)
3786 goto out;
3787
3788 /* prep for EH */
3789 ata_for_each_link(link, ap, EDGE) {
3790 struct ata_eh_context *ehc = &link->eh_context;
3791
3792 /* skip EH if possible. */
3793 if (ata_eh_skip_recovery(link))
3794 ehc->i.action = 0;
3795
3796 ata_for_each_dev(dev, link, ALL)
3797 ehc->classes[dev->devno] = ATA_DEV_UNKNOWN;
3798 }
3799
3800 /* reset */
3801 ata_for_each_link(link, ap, EDGE) {
3802 struct ata_eh_context *ehc = &link->eh_context;
3803
3804 if (!(ehc->i.action & ATA_EH_RESET))
3805 continue;
3806
3807 rc = ata_eh_reset(link, ata_link_nr_vacant(link),
3808 prereset, softreset, hardreset, postreset);
3809 if (rc) {
3810 ata_link_err(link, "reset failed, giving up\n");
3811 goto out;
3812 }
3813 }
3814
3815 do {
3816 unsigned long now;
3817
3818 /*
3819 * clears ATA_EH_PARK in eh_info and resets
3820 * ap->park_req_pending
3821 */
3822 ata_eh_pull_park_action(ap);
3823
3824 deadline = jiffies;
3825 ata_for_each_link(link, ap, EDGE) {
3826 ata_for_each_dev(dev, link, ALL) {
3827 struct ata_eh_context *ehc = &link->eh_context;
3828 unsigned long tmp;
3829
3830 if (dev->class != ATA_DEV_ATA &&
3831 dev->class != ATA_DEV_ZAC)
3832 continue;
3833 if (!(ehc->i.dev_action[dev->devno] &
3834 ATA_EH_PARK))
3835 continue;
3836 tmp = dev->unpark_deadline;
3837 if (time_before(deadline, tmp))
3838 deadline = tmp;
3839 else if (time_before_eq(tmp, jiffies))
3840 continue;
3841 if (ehc->unloaded_mask & (1 << dev->devno))
3842 continue;
3843
3844 ata_eh_park_issue_cmd(dev, 1);
3845 }
3846 }
3847
3848 now = jiffies;
3849 if (time_before_eq(deadline, now))
3850 break;
3851
3852 ata_eh_release(ap);
3853 deadline = wait_for_completion_timeout(&ap->park_req_pending,
3854 deadline - now);
3855 ata_eh_acquire(ap);
3856 } while (deadline);
3857 ata_for_each_link(link, ap, EDGE) {
3858 ata_for_each_dev(dev, link, ALL) {
3859 if (!(link->eh_context.unloaded_mask &
3860 (1 << dev->devno)))
3861 continue;
3862
3863 ata_eh_park_issue_cmd(dev, 0);
3864 ata_eh_done(link, dev, ATA_EH_PARK);
3865 }
3866 }
3867
3868 /* the rest */
3869 nr_fails = 0;
3870 ata_for_each_link(link, ap, PMP_FIRST) {
3871 struct ata_eh_context *ehc = &link->eh_context;
3872
3873 if (sata_pmp_attached(ap) && ata_is_host_link(link))
3874 goto config_lpm;
3875
3876 /* revalidate existing devices and attach new ones */
3877 rc = ata_eh_revalidate_and_attach(link, &dev);
3878 if (rc)
3879 goto rest_fail;
3880
3881 /* if PMP got attached, return, pmp EH will take care of it */
3882 if (link->device->class == ATA_DEV_PMP) {
3883 ehc->i.action = 0;
3884 return 0;
3885 }
3886
3887 /* configure transfer mode if necessary */
3888 if (ehc->i.flags & ATA_EHI_SETMODE) {
3889 rc = ata_set_mode(link, &dev);
3890 if (rc)
3891 goto rest_fail;
3892 ehc->i.flags &= ~ATA_EHI_SETMODE;
3893 }
3894
3895 /* If reset has been issued, clear UA to avoid
3896 * disrupting the current users of the device.
3897 */
3898 if (ehc->i.flags & ATA_EHI_DID_RESET) {
3899 ata_for_each_dev(dev, link, ALL) {
3900 if (dev->class != ATA_DEV_ATAPI)
3901 continue;
3902 rc = atapi_eh_clear_ua(dev);
3903 if (rc)
3904 goto rest_fail;
3905 if (zpodd_dev_enabled(dev))
3906 zpodd_post_poweron(dev);
3907 }
3908 }
3909
3910 /* retry flush if necessary */
3911 ata_for_each_dev(dev, link, ALL) {
3912 if (dev->class != ATA_DEV_ATA &&
3913 dev->class != ATA_DEV_ZAC)
3914 continue;
3915 rc = ata_eh_maybe_retry_flush(dev);
3916 if (rc)
3917 goto rest_fail;
3918 }
3919
3920 config_lpm:
3921 /* configure link power saving */
3922 if (link->lpm_policy != ap->target_lpm_policy) {
3923 rc = ata_eh_set_lpm(link, ap->target_lpm_policy, &dev);
3924 if (rc)
3925 goto rest_fail;
3926 }
3927
3928 /* this link is okay now */
3929 ehc->i.flags = 0;
3930 continue;
3931
3932 rest_fail:
3933 nr_fails++;
3934 if (dev)
3935 ata_eh_handle_dev_fail(dev, rc);
3936
3937 if (ap->pflags & ATA_PFLAG_FROZEN) {
3938 /* PMP reset requires working host port.
3939 * Can't retry if it's frozen.
3940 */
3941 if (sata_pmp_attached(ap))
3942 goto out;
3943 break;
3944 }
3945 }
3946
3947 if (nr_fails)
3948 goto retry;
3949
3950 out:
3951 if (rc && r_failed_link)
3952 *r_failed_link = link;
3953
3954 DPRINTK("EXIT, rc=%d\n", rc);
3955 return rc;
3956}
3957
3958/**
3959 * ata_eh_finish - finish up EH
3960 * @ap: host port to finish EH for
3961 *
3962 * Recovery is complete. Clean up EH states and retry or finish
3963 * failed qcs.
3964 *
3965 * LOCKING:
3966 * None.
3967 */
3968void ata_eh_finish(struct ata_port *ap)
3969{
3970 struct ata_queued_cmd *qc;
3971 int tag;
3972
3973 /* retry or finish qcs */
3974 ata_qc_for_each_raw(ap, qc, tag) {
3975 if (!(qc->flags & ATA_QCFLAG_FAILED))
3976 continue;
3977
3978 if (qc->err_mask) {
3979 /* FIXME: Once EH migration is complete,
3980 * generate sense data in this function,
3981 * considering both err_mask and tf.
3982 */
3983 if (qc->flags & ATA_QCFLAG_RETRY)
3984 ata_eh_qc_retry(qc);
3985 else
3986 ata_eh_qc_complete(qc);
3987 } else {
3988 if (qc->flags & ATA_QCFLAG_SENSE_VALID) {
3989 ata_eh_qc_complete(qc);
3990 } else {
3991 /* feed zero TF to sense generation */
3992 memset(&qc->result_tf, 0, sizeof(qc->result_tf));
3993 ata_eh_qc_retry(qc);
3994 }
3995 }
3996 }
3997
3998 /* make sure nr_active_links is zero after EH */
3999 WARN_ON(ap->nr_active_links);
4000 ap->nr_active_links = 0;
4001}
4002
4003/**
4004 * ata_do_eh - do standard error handling
4005 * @ap: host port to handle error for
4006 *
4007 * @prereset: prereset method (can be NULL)
4008 * @softreset: softreset method (can be NULL)
4009 * @hardreset: hardreset method (can be NULL)
4010 * @postreset: postreset method (can be NULL)
4011 *
4012 * Perform standard error handling sequence.
4013 *
4014 * LOCKING:
4015 * Kernel thread context (may sleep).
4016 */
4017void ata_do_eh(struct ata_port *ap, ata_prereset_fn_t prereset,
4018 ata_reset_fn_t softreset, ata_reset_fn_t hardreset,
4019 ata_postreset_fn_t postreset)
4020{
4021 struct ata_device *dev;
4022 int rc;
4023
4024 ata_eh_autopsy(ap);
4025 ata_eh_report(ap);
4026
4027 rc = ata_eh_recover(ap, prereset, softreset, hardreset, postreset,
4028 NULL);
4029 if (rc) {
4030 ata_for_each_dev(dev, &ap->link, ALL)
4031 ata_dev_disable(dev);
4032 }
4033
4034 ata_eh_finish(ap);
4035}
4036
4037/**
4038 * ata_std_error_handler - standard error handler
4039 * @ap: host port to handle error for
4040 *
4041 * Standard error handler
4042 *
4043 * LOCKING:
4044 * Kernel thread context (may sleep).
4045 */
4046void ata_std_error_handler(struct ata_port *ap)
4047{
4048 struct ata_port_operations *ops = ap->ops;
4049 ata_reset_fn_t hardreset = ops->hardreset;
4050
4051 /* ignore built-in hardreset if SCR access is not available */
4052 if (hardreset == sata_std_hardreset && !sata_scr_valid(&ap->link))
4053 hardreset = NULL;
4054
4055 ata_do_eh(ap, ops->prereset, ops->softreset, hardreset, ops->postreset);
4056}
4057
4058#ifdef CONFIG_PM
4059/**
4060 * ata_eh_handle_port_suspend - perform port suspend operation
4061 * @ap: port to suspend
4062 *
4063 * Suspend @ap.
4064 *
4065 * LOCKING:
4066 * Kernel thread context (may sleep).
4067 */
4068static void ata_eh_handle_port_suspend(struct ata_port *ap)
4069{
4070 unsigned long flags;
4071 int rc = 0;
4072 struct ata_device *dev;
4073
4074 /* are we suspending? */
4075 spin_lock_irqsave(ap->lock, flags);
4076 if (!(ap->pflags & ATA_PFLAG_PM_PENDING) ||
4077 ap->pm_mesg.event & PM_EVENT_RESUME) {
4078 spin_unlock_irqrestore(ap->lock, flags);
4079 return;
4080 }
4081 spin_unlock_irqrestore(ap->lock, flags);
4082
4083 WARN_ON(ap->pflags & ATA_PFLAG_SUSPENDED);
4084
4085 /*
4086 * If we have a ZPODD attached, check its zero
4087 * power ready status before the port is frozen.
4088 * Only needed for runtime suspend.
4089 */
4090 if (PMSG_IS_AUTO(ap->pm_mesg)) {
4091 ata_for_each_dev(dev, &ap->link, ENABLED) {
4092 if (zpodd_dev_enabled(dev))
4093 zpodd_on_suspend(dev);
4094 }
4095 }
4096
4097 /* tell ACPI we're suspending */
4098 rc = ata_acpi_on_suspend(ap);
4099 if (rc)
4100 goto out;
4101
4102 /* suspend */
4103 ata_eh_freeze_port(ap);
4104
4105 if (ap->ops->port_suspend)
4106 rc = ap->ops->port_suspend(ap, ap->pm_mesg);
4107
4108 ata_acpi_set_state(ap, ap->pm_mesg);
4109 out:
4110 /* update the flags */
4111 spin_lock_irqsave(ap->lock, flags);
4112
4113 ap->pflags &= ~ATA_PFLAG_PM_PENDING;
4114 if (rc == 0)
4115 ap->pflags |= ATA_PFLAG_SUSPENDED;
4116 else if (ap->pflags & ATA_PFLAG_FROZEN)
4117 ata_port_schedule_eh(ap);
4118
4119 spin_unlock_irqrestore(ap->lock, flags);
4120
4121 return;
4122}
4123
4124/**
4125 * ata_eh_handle_port_resume - perform port resume operation
4126 * @ap: port to resume
4127 *
4128 * Resume @ap.
4129 *
4130 * LOCKING:
4131 * Kernel thread context (may sleep).
4132 */
4133static void ata_eh_handle_port_resume(struct ata_port *ap)
4134{
4135 struct ata_link *link;
4136 struct ata_device *dev;
4137 unsigned long flags;
4138
4139 /* are we resuming? */
4140 spin_lock_irqsave(ap->lock, flags);
4141 if (!(ap->pflags & ATA_PFLAG_PM_PENDING) ||
4142 !(ap->pm_mesg.event & PM_EVENT_RESUME)) {
4143 spin_unlock_irqrestore(ap->lock, flags);
4144 return;
4145 }
4146 spin_unlock_irqrestore(ap->lock, flags);
4147
4148 WARN_ON(!(ap->pflags & ATA_PFLAG_SUSPENDED));
4149
4150 /*
4151 * Error timestamps are in jiffies which doesn't run while
4152 * suspended and PHY events during resume isn't too uncommon.
4153 * When the two are combined, it can lead to unnecessary speed
4154 * downs if the machine is suspended and resumed repeatedly.
4155 * Clear error history.
4156 */
4157 ata_for_each_link(link, ap, HOST_FIRST)
4158 ata_for_each_dev(dev, link, ALL)
4159 ata_ering_clear(&dev->ering);
4160
4161 ata_acpi_set_state(ap, ap->pm_mesg);
4162
4163 if (ap->ops->port_resume)
4164 ap->ops->port_resume(ap);
4165
4166 /* tell ACPI that we're resuming */
4167 ata_acpi_on_resume(ap);
4168
4169 /* update the flags */
4170 spin_lock_irqsave(ap->lock, flags);
4171 ap->pflags &= ~(ATA_PFLAG_PM_PENDING | ATA_PFLAG_SUSPENDED);
4172 spin_unlock_irqrestore(ap->lock, flags);
4173}
4174#endif /* CONFIG_PM */