1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * This file implements the error recovery as a core part of PCIe error
4  * reporting. When a PCIe error is delivered, an error message will be
5  * collected and printed to console, then, an error recovery procedure
6  * will be executed by following the PCI error recovery rules.
7  *
8  * Copyright (C) 2006 Intel Corp.
9  *	Tom Long Nguyen (tom.l.nguyen@intel.com)
10  *	Zhang Yanmin (yanmin.zhang@intel.com)
11  */
12 
13 #include <linux/pci.h>
14 #include <linux/module.h>
15 #include <linux/pci.h>
16 #include <linux/kernel.h>
17 #include <linux/errno.h>
18 #include <linux/aer.h>
19 #include "portdrv.h"
20 #include "../pci.h"
21 
22 struct aer_broadcast_data {
23 	enum pci_channel_state state;
24 	enum pci_ers_result result;
25 };
26 
merge_result(enum pci_ers_result orig,enum pci_ers_result new)27 static pci_ers_result_t merge_result(enum pci_ers_result orig,
28 				  enum pci_ers_result new)
29 {
30 	if (new == PCI_ERS_RESULT_NO_AER_DRIVER)
31 		return PCI_ERS_RESULT_NO_AER_DRIVER;
32 
33 	if (new == PCI_ERS_RESULT_NONE)
34 		return orig;
35 
36 	switch (orig) {
37 	case PCI_ERS_RESULT_CAN_RECOVER:
38 	case PCI_ERS_RESULT_RECOVERED:
39 		orig = new;
40 		break;
41 	case PCI_ERS_RESULT_DISCONNECT:
42 		if (new == PCI_ERS_RESULT_NEED_RESET)
43 			orig = PCI_ERS_RESULT_NEED_RESET;
44 		break;
45 	default:
46 		break;
47 	}
48 
49 	return orig;
50 }
51 
report_error_detected(struct pci_dev * dev,void * data)52 static int report_error_detected(struct pci_dev *dev, void *data)
53 {
54 	pci_ers_result_t vote;
55 	const struct pci_error_handlers *err_handler;
56 	struct aer_broadcast_data *result_data;
57 
58 	result_data = (struct aer_broadcast_data *) data;
59 
60 	device_lock(&dev->dev);
61 	dev->error_state = result_data->state;
62 
63 	if (!dev->driver ||
64 		!dev->driver->err_handler ||
65 		!dev->driver->err_handler->error_detected) {
66 		/*
67 		 * If any device in the subtree does not have an error_detected
68 		 * callback, PCI_ERS_RESULT_NO_AER_DRIVER prevents subsequent
69 		 * error callbacks of "any" device in the subtree, and will
70 		 * exit in the disconnected error state.
71 		 */
72 		if (dev->hdr_type != PCI_HEADER_TYPE_BRIDGE)
73 			vote = PCI_ERS_RESULT_NO_AER_DRIVER;
74 		else
75 			vote = PCI_ERS_RESULT_NONE;
76 	} else {
77 		err_handler = dev->driver->err_handler;
78 		vote = err_handler->error_detected(dev, result_data->state);
79 		pci_uevent_ers(dev, PCI_ERS_RESULT_NONE);
80 	}
81 
82 	result_data->result = merge_result(result_data->result, vote);
83 	device_unlock(&dev->dev);
84 	return 0;
85 }
86 
report_mmio_enabled(struct pci_dev * dev,void * data)87 static int report_mmio_enabled(struct pci_dev *dev, void *data)
88 {
89 	pci_ers_result_t vote;
90 	const struct pci_error_handlers *err_handler;
91 	struct aer_broadcast_data *result_data;
92 
93 	result_data = (struct aer_broadcast_data *) data;
94 
95 	device_lock(&dev->dev);
96 	if (!dev->driver ||
97 		!dev->driver->err_handler ||
98 		!dev->driver->err_handler->mmio_enabled)
99 		goto out;
100 
101 	err_handler = dev->driver->err_handler;
102 	vote = err_handler->mmio_enabled(dev);
103 	result_data->result = merge_result(result_data->result, vote);
104 out:
105 	device_unlock(&dev->dev);
106 	return 0;
107 }
108 
report_slot_reset(struct pci_dev * dev,void * data)109 static int report_slot_reset(struct pci_dev *dev, void *data)
110 {
111 	pci_ers_result_t vote;
112 	const struct pci_error_handlers *err_handler;
113 	struct aer_broadcast_data *result_data;
114 
115 	result_data = (struct aer_broadcast_data *) data;
116 
117 	device_lock(&dev->dev);
118 	if (!dev->driver ||
119 		!dev->driver->err_handler ||
120 		!dev->driver->err_handler->slot_reset)
121 		goto out;
122 
123 	err_handler = dev->driver->err_handler;
124 	vote = err_handler->slot_reset(dev);
125 	result_data->result = merge_result(result_data->result, vote);
126 out:
127 	device_unlock(&dev->dev);
128 	return 0;
129 }
130 
report_resume(struct pci_dev * dev,void * data)131 static int report_resume(struct pci_dev *dev, void *data)
132 {
133 	const struct pci_error_handlers *err_handler;
134 
135 	device_lock(&dev->dev);
136 	dev->error_state = pci_channel_io_normal;
137 
138 	if (!dev->driver ||
139 		!dev->driver->err_handler ||
140 		!dev->driver->err_handler->resume)
141 		goto out;
142 
143 	err_handler = dev->driver->err_handler;
144 	err_handler->resume(dev);
145 	pci_uevent_ers(dev, PCI_ERS_RESULT_RECOVERED);
146 out:
147 	device_unlock(&dev->dev);
148 	return 0;
149 }
150 
151 /**
152  * default_reset_link - default reset function
153  * @dev: pointer to pci_dev data structure
154  *
155  * Invoked when performing link reset on a Downstream Port or a
156  * Root Port with no aer driver.
157  */
default_reset_link(struct pci_dev * dev)158 static pci_ers_result_t default_reset_link(struct pci_dev *dev)
159 {
160 	int rc;
161 
162 	rc = pci_bus_error_reset(dev);
163 	pci_printk(KERN_DEBUG, dev, "downstream link has been reset\n");
164 	return rc ? PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_RECOVERED;
165 }
166 
reset_link(struct pci_dev * dev,u32 service)167 static pci_ers_result_t reset_link(struct pci_dev *dev, u32 service)
168 {
169 	pci_ers_result_t status;
170 	struct pcie_port_service_driver *driver = NULL;
171 
172 	driver = pcie_port_find_service(dev, service);
173 	if (driver && driver->reset_link) {
174 		status = driver->reset_link(dev);
175 	} else if (dev->has_secondary_link) {
176 		status = default_reset_link(dev);
177 	} else {
178 		pci_printk(KERN_DEBUG, dev, "no link-reset support at upstream device %s\n",
179 			pci_name(dev));
180 		return PCI_ERS_RESULT_DISCONNECT;
181 	}
182 
183 	if (status != PCI_ERS_RESULT_RECOVERED) {
184 		pci_printk(KERN_DEBUG, dev, "link reset at upstream device %s failed\n",
185 			pci_name(dev));
186 		return PCI_ERS_RESULT_DISCONNECT;
187 	}
188 
189 	return status;
190 }
191 
192 /**
193  * broadcast_error_message - handle message broadcast to downstream drivers
194  * @dev: pointer to from where in a hierarchy message is broadcasted down
195  * @state: error state
196  * @error_mesg: message to print
197  * @cb: callback to be broadcasted
198  *
199  * Invoked during error recovery process. Once being invoked, the content
200  * of error severity will be broadcasted to all downstream drivers in a
201  * hierarchy in question.
202  */
broadcast_error_message(struct pci_dev * dev,enum pci_channel_state state,char * error_mesg,int (* cb)(struct pci_dev *,void *))203 static pci_ers_result_t broadcast_error_message(struct pci_dev *dev,
204 	enum pci_channel_state state,
205 	char *error_mesg,
206 	int (*cb)(struct pci_dev *, void *))
207 {
208 	struct aer_broadcast_data result_data;
209 
210 	pci_printk(KERN_DEBUG, dev, "broadcast %s message\n", error_mesg);
211 	result_data.state = state;
212 	if (cb == report_error_detected)
213 		result_data.result = PCI_ERS_RESULT_CAN_RECOVER;
214 	else
215 		result_data.result = PCI_ERS_RESULT_RECOVERED;
216 
217 	pci_walk_bus(dev->subordinate, cb, &result_data);
218 	return result_data.result;
219 }
220 
221 /**
222  * pcie_do_fatal_recovery - handle fatal error recovery process
223  * @dev: pointer to a pci_dev data structure of agent detecting an error
224  *
225  * Invoked when an error is fatal. Once being invoked, removes the devices
226  * beneath this AER agent, followed by reset link e.g. secondary bus reset
227  * followed by re-enumeration of devices.
228  */
pcie_do_fatal_recovery(struct pci_dev * dev,u32 service)229 void pcie_do_fatal_recovery(struct pci_dev *dev, u32 service)
230 {
231 	struct pci_dev *udev;
232 	struct pci_bus *parent;
233 	struct pci_dev *pdev, *temp;
234 	pci_ers_result_t result;
235 
236 	if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE)
237 		udev = dev;
238 	else
239 		udev = dev->bus->self;
240 
241 	parent = udev->subordinate;
242 	pci_lock_rescan_remove();
243 	pci_dev_get(dev);
244 	list_for_each_entry_safe_reverse(pdev, temp, &parent->devices,
245 					 bus_list) {
246 		pci_dev_get(pdev);
247 		pci_dev_set_disconnected(pdev, NULL);
248 		if (pci_has_subordinate(pdev))
249 			pci_walk_bus(pdev->subordinate,
250 				     pci_dev_set_disconnected, NULL);
251 		pci_stop_and_remove_bus_device(pdev);
252 		pci_dev_put(pdev);
253 	}
254 
255 	result = reset_link(udev, service);
256 
257 	if ((service == PCIE_PORT_SERVICE_AER) &&
258 	    (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE)) {
259 		/*
260 		 * If the error is reported by a bridge, we think this error
261 		 * is related to the downstream link of the bridge, so we
262 		 * do error recovery on all subordinates of the bridge instead
263 		 * of the bridge and clear the error status of the bridge.
264 		 */
265 		pci_aer_clear_fatal_status(dev);
266 		pci_aer_clear_device_status(dev);
267 	}
268 
269 	if (result == PCI_ERS_RESULT_RECOVERED) {
270 		if (pcie_wait_for_link(udev, true))
271 			pci_rescan_bus(udev->bus);
272 		pci_info(dev, "Device recovery from fatal error successful\n");
273 	} else {
274 		pci_uevent_ers(dev, PCI_ERS_RESULT_DISCONNECT);
275 		pci_info(dev, "Device recovery from fatal error failed\n");
276 	}
277 
278 	pci_dev_put(dev);
279 	pci_unlock_rescan_remove();
280 }
281 
282 /**
283  * pcie_do_nonfatal_recovery - handle nonfatal error recovery process
284  * @dev: pointer to a pci_dev data structure of agent detecting an error
285  *
286  * Invoked when an error is nonfatal/fatal. Once being invoked, broadcast
287  * error detected message to all downstream drivers within a hierarchy in
288  * question and return the returned code.
289  */
pcie_do_nonfatal_recovery(struct pci_dev * dev)290 void pcie_do_nonfatal_recovery(struct pci_dev *dev)
291 {
292 	pci_ers_result_t status;
293 	enum pci_channel_state state;
294 
295 	state = pci_channel_io_normal;
296 
297 	/*
298 	 * Error recovery runs on all subordinates of the first downstream port.
299 	 * If the downstream port detected the error, it is cleared at the end.
300 	 */
301 	if (!(pci_pcie_type(dev) == PCI_EXP_TYPE_ROOT_PORT ||
302 	      pci_pcie_type(dev) == PCI_EXP_TYPE_DOWNSTREAM))
303 		dev = dev->bus->self;
304 
305 	status = broadcast_error_message(dev,
306 			state,
307 			"error_detected",
308 			report_error_detected);
309 
310 	if (status == PCI_ERS_RESULT_CAN_RECOVER)
311 		status = broadcast_error_message(dev,
312 				state,
313 				"mmio_enabled",
314 				report_mmio_enabled);
315 
316 	if (status == PCI_ERS_RESULT_NEED_RESET) {
317 		/*
318 		 * TODO: Should call platform-specific
319 		 * functions to reset slot before calling
320 		 * drivers' slot_reset callbacks?
321 		 */
322 		status = broadcast_error_message(dev,
323 				state,
324 				"slot_reset",
325 				report_slot_reset);
326 	}
327 
328 	if (status != PCI_ERS_RESULT_RECOVERED)
329 		goto failed;
330 
331 	broadcast_error_message(dev,
332 				state,
333 				"resume",
334 				report_resume);
335 
336 	pci_aer_clear_device_status(dev);
337 	pci_cleanup_aer_uncorrect_error_status(dev);
338 	pci_info(dev, "AER: Device recovery successful\n");
339 	return;
340 
341 failed:
342 	pci_uevent_ers(dev, PCI_ERS_RESULT_DISCONNECT);
343 
344 	/* TODO: Should kernel panic here? */
345 	pci_info(dev, "AER: Device recovery failed\n");
346 }
347