1 /*
2 * Copyright 2017 IBM Corp.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
8 */
9
10 #include <linux/hugetlb.h>
11 #include <linux/sched/mm.h>
12 #include <asm/pnv-pci.h>
13 #include <misc/cxllib.h>
14
15 #include "cxl.h"
16
17 #define CXL_INVALID_DRA ~0ull
18 #define CXL_DUMMY_READ_SIZE 128
19 #define CXL_DUMMY_READ_ALIGN 8
20 #define CXL_CAPI_WINDOW_START 0x2000000000000ull
21 #define CXL_CAPI_WINDOW_LOG_SIZE 48
22 #define CXL_XSL_CONFIG_CURRENT_VERSION CXL_XSL_CONFIG_VERSION1
23
24
cxllib_slot_is_supported(struct pci_dev * dev,unsigned long flags)25 bool cxllib_slot_is_supported(struct pci_dev *dev, unsigned long flags)
26 {
27 int rc;
28 u32 phb_index;
29 u64 chip_id, capp_unit_id;
30
31 /* No flags currently supported */
32 if (flags)
33 return false;
34
35 if (!cpu_has_feature(CPU_FTR_HVMODE))
36 return false;
37
38 if (!cxl_is_power9())
39 return false;
40
41 if (cxl_slot_is_switched(dev))
42 return false;
43
44 /* on p9, some pci slots are not connected to a CAPP unit */
45 rc = cxl_calc_capp_routing(dev, &chip_id, &phb_index, &capp_unit_id);
46 if (rc)
47 return false;
48
49 return true;
50 }
51 EXPORT_SYMBOL_GPL(cxllib_slot_is_supported);
52
53 static DEFINE_MUTEX(dra_mutex);
54 static u64 dummy_read_addr = CXL_INVALID_DRA;
55
allocate_dummy_read_buf(void)56 static int allocate_dummy_read_buf(void)
57 {
58 u64 buf, vaddr;
59 size_t buf_size;
60
61 /*
62 * Dummy read buffer is 128-byte long, aligned on a
63 * 256-byte boundary and we need the physical address.
64 */
65 buf_size = CXL_DUMMY_READ_SIZE + (1ull << CXL_DUMMY_READ_ALIGN);
66 buf = (u64) kzalloc(buf_size, GFP_KERNEL);
67 if (!buf)
68 return -ENOMEM;
69
70 vaddr = (buf + (1ull << CXL_DUMMY_READ_ALIGN) - 1) &
71 (~0ull << CXL_DUMMY_READ_ALIGN);
72
73 WARN((vaddr + CXL_DUMMY_READ_SIZE) > (buf + buf_size),
74 "Dummy read buffer alignment issue");
75 dummy_read_addr = virt_to_phys((void *) vaddr);
76 return 0;
77 }
78
cxllib_get_xsl_config(struct pci_dev * dev,struct cxllib_xsl_config * cfg)79 int cxllib_get_xsl_config(struct pci_dev *dev, struct cxllib_xsl_config *cfg)
80 {
81 int rc;
82 u32 phb_index;
83 u64 chip_id, capp_unit_id;
84
85 if (!cpu_has_feature(CPU_FTR_HVMODE))
86 return -EINVAL;
87
88 mutex_lock(&dra_mutex);
89 if (dummy_read_addr == CXL_INVALID_DRA) {
90 rc = allocate_dummy_read_buf();
91 if (rc) {
92 mutex_unlock(&dra_mutex);
93 return rc;
94 }
95 }
96 mutex_unlock(&dra_mutex);
97
98 rc = cxl_calc_capp_routing(dev, &chip_id, &phb_index, &capp_unit_id);
99 if (rc)
100 return rc;
101
102 rc = cxl_get_xsl9_dsnctl(dev, capp_unit_id, &cfg->dsnctl);
103 if (rc)
104 return rc;
105
106 cfg->version = CXL_XSL_CONFIG_CURRENT_VERSION;
107 cfg->log_bar_size = CXL_CAPI_WINDOW_LOG_SIZE;
108 cfg->bar_addr = CXL_CAPI_WINDOW_START;
109 cfg->dra = dummy_read_addr;
110 return 0;
111 }
112 EXPORT_SYMBOL_GPL(cxllib_get_xsl_config);
113
cxllib_switch_phb_mode(struct pci_dev * dev,enum cxllib_mode mode,unsigned long flags)114 int cxllib_switch_phb_mode(struct pci_dev *dev, enum cxllib_mode mode,
115 unsigned long flags)
116 {
117 int rc = 0;
118
119 if (!cpu_has_feature(CPU_FTR_HVMODE))
120 return -EINVAL;
121
122 switch (mode) {
123 case CXL_MODE_PCI:
124 /*
125 * We currently don't support going back to PCI mode
126 * However, we'll turn the invalidations off, so that
127 * the firmware doesn't have to ack them and can do
128 * things like reset, etc.. with no worries.
129 * So always return EPERM (can't go back to PCI) or
130 * EBUSY if we couldn't even turn off snooping
131 */
132 rc = pnv_phb_to_cxl_mode(dev, OPAL_PHB_CAPI_MODE_SNOOP_OFF);
133 if (rc)
134 rc = -EBUSY;
135 else
136 rc = -EPERM;
137 break;
138 case CXL_MODE_CXL:
139 /* DMA only supported on TVT1 for the time being */
140 if (flags != CXL_MODE_DMA_TVT1)
141 return -EINVAL;
142 rc = pnv_phb_to_cxl_mode(dev, OPAL_PHB_CAPI_MODE_DMA_TVT1);
143 if (rc)
144 return rc;
145 rc = pnv_phb_to_cxl_mode(dev, OPAL_PHB_CAPI_MODE_SNOOP_ON);
146 break;
147 default:
148 rc = -EINVAL;
149 }
150 return rc;
151 }
152 EXPORT_SYMBOL_GPL(cxllib_switch_phb_mode);
153
154 /*
155 * When switching the PHB to capi mode, the TVT#1 entry for
156 * the Partitionable Endpoint is set in bypass mode, like
157 * in PCI mode.
158 * Configure the device dma to use TVT#1, which is done
159 * by calling dma_set_mask() with a mask large enough.
160 */
cxllib_set_device_dma(struct pci_dev * dev,unsigned long flags)161 int cxllib_set_device_dma(struct pci_dev *dev, unsigned long flags)
162 {
163 int rc;
164
165 if (flags)
166 return -EINVAL;
167
168 rc = dma_set_mask(&dev->dev, DMA_BIT_MASK(64));
169 return rc;
170 }
171 EXPORT_SYMBOL_GPL(cxllib_set_device_dma);
172
cxllib_get_PE_attributes(struct task_struct * task,unsigned long translation_mode,struct cxllib_pe_attributes * attr)173 int cxllib_get_PE_attributes(struct task_struct *task,
174 unsigned long translation_mode,
175 struct cxllib_pe_attributes *attr)
176 {
177 struct mm_struct *mm = NULL;
178
179 if (translation_mode != CXL_TRANSLATED_MODE &&
180 translation_mode != CXL_REAL_MODE)
181 return -EINVAL;
182
183 attr->sr = cxl_calculate_sr(false,
184 task == NULL,
185 translation_mode == CXL_REAL_MODE,
186 true);
187 attr->lpid = mfspr(SPRN_LPID);
188 if (task) {
189 mm = get_task_mm(task);
190 if (mm == NULL)
191 return -EINVAL;
192 /*
193 * Caller is keeping a reference on mm_users for as long
194 * as XSL uses the memory context
195 */
196 attr->pid = mm->context.id;
197 mmput(mm);
198 attr->tid = task->thread.tidr;
199 } else {
200 attr->pid = 0;
201 attr->tid = 0;
202 }
203 return 0;
204 }
205 EXPORT_SYMBOL_GPL(cxllib_get_PE_attributes);
206
get_vma_info(struct mm_struct * mm,u64 addr,u64 * vma_start,u64 * vma_end,unsigned long * page_size)207 static int get_vma_info(struct mm_struct *mm, u64 addr,
208 u64 *vma_start, u64 *vma_end,
209 unsigned long *page_size)
210 {
211 struct vm_area_struct *vma = NULL;
212 int rc = 0;
213
214 down_read(&mm->mmap_sem);
215
216 vma = find_vma(mm, addr);
217 if (!vma) {
218 rc = -EFAULT;
219 goto out;
220 }
221 *page_size = vma_kernel_pagesize(vma);
222 *vma_start = vma->vm_start;
223 *vma_end = vma->vm_end;
224 out:
225 up_read(&mm->mmap_sem);
226 return rc;
227 }
228
cxllib_handle_fault(struct mm_struct * mm,u64 addr,u64 size,u64 flags)229 int cxllib_handle_fault(struct mm_struct *mm, u64 addr, u64 size, u64 flags)
230 {
231 int rc;
232 u64 dar, vma_start, vma_end;
233 unsigned long page_size;
234
235 if (mm == NULL)
236 return -EFAULT;
237
238 /*
239 * The buffer we have to process can extend over several pages
240 * and may also cover several VMAs.
241 * We iterate over all the pages. The page size could vary
242 * between VMAs.
243 */
244 rc = get_vma_info(mm, addr, &vma_start, &vma_end, &page_size);
245 if (rc)
246 return rc;
247
248 for (dar = (addr & ~(page_size - 1)); dar < (addr + size);
249 dar += page_size) {
250 if (dar < vma_start || dar >= vma_end) {
251 /*
252 * We don't hold the mm->mmap_sem semaphore
253 * while iterating, since the semaphore is
254 * required by one of the lower-level page
255 * fault processing functions and it could
256 * create a deadlock.
257 *
258 * It means the VMAs can be altered between 2
259 * loop iterations and we could theoretically
260 * miss a page (however unlikely). But that's
261 * not really a problem, as the driver will
262 * retry access, get another page fault on the
263 * missing page and call us again.
264 */
265 rc = get_vma_info(mm, dar, &vma_start, &vma_end,
266 &page_size);
267 if (rc)
268 return rc;
269 }
270
271 rc = cxl_handle_mm_fault(mm, flags, dar);
272 if (rc)
273 return -EFAULT;
274 }
275 return 0;
276 }
277 EXPORT_SYMBOL_GPL(cxllib_handle_fault);
278