1/****************************************************************************** 2 * mcelog.c 3 * Driver for receiving and transferring machine check error infomation 4 * 5 * Copyright (c) 2012 Intel Corporation 6 * Author: Liu, Jinsong <jinsong.liu@intel.com> 7 * Author: Jiang, Yunhong <yunhong.jiang@intel.com> 8 * Author: Ke, Liping <liping.ke@intel.com> 9 * 10 * This program is free software; you can redistribute it and/or 11 * modify it under the terms of the GNU General Public License version 2 12 * as published by the Free Software Foundation; or, when distributed 13 * separately from the Linux kernel or incorporated into other 14 * software packages, subject to the following license: 15 * 16 * Permission is hereby granted, free of charge, to any person obtaining a copy 17 * of this source file (the "Software"), to deal in the Software without 18 * restriction, including without limitation the rights to use, copy, modify, 19 * merge, publish, distribute, sublicense, and/or sell copies of the Software, 20 * and to permit persons to whom the Software is furnished to do so, subject to 21 * the following conditions: 22 * 23 * The above copyright notice and this permission notice shall be included in 24 * all copies or substantial portions of the Software. 25 * 26 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 27 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 28 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 29 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 30 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 31 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 32 * IN THE SOFTWARE. 33 */ 34 35#define pr_fmt(fmt) "xen_mcelog: " fmt 36 37#include <linux/init.h> 38#include <linux/types.h> 39#include <linux/kernel.h> 40#include <linux/slab.h> 41#include <linux/fs.h> 42#include <linux/device.h> 43#include <linux/miscdevice.h> 44#include <linux/uaccess.h> 45#include <linux/capability.h> 46#include <linux/poll.h> 47#include <linux/sched.h> 48 49#include <xen/interface/xen.h> 50#include <xen/events.h> 51#include <xen/interface/vcpu.h> 52#include <xen/xen.h> 53#include <asm/xen/hypercall.h> 54#include <asm/xen/hypervisor.h> 55 56static struct mc_info g_mi; 57static struct mcinfo_logical_cpu *g_physinfo; 58static uint32_t ncpus; 59 60static DEFINE_MUTEX(mcelog_lock); 61 62static struct xen_mce_log xen_mcelog = { 63 .signature = XEN_MCE_LOG_SIGNATURE, 64 .len = XEN_MCE_LOG_LEN, 65 .recordlen = sizeof(struct xen_mce), 66}; 67 68static DEFINE_SPINLOCK(xen_mce_chrdev_state_lock); 69static int xen_mce_chrdev_open_count; /* #times opened */ 70static int xen_mce_chrdev_open_exclu; /* already open exclusive? */ 71 72static DECLARE_WAIT_QUEUE_HEAD(xen_mce_chrdev_wait); 73 74static int xen_mce_chrdev_open(struct inode *inode, struct file *file) 75{ 76 spin_lock(&xen_mce_chrdev_state_lock); 77 78 if (xen_mce_chrdev_open_exclu || 79 (xen_mce_chrdev_open_count && (file->f_flags & O_EXCL))) { 80 spin_unlock(&xen_mce_chrdev_state_lock); 81 82 return -EBUSY; 83 } 84 85 if (file->f_flags & O_EXCL) 86 xen_mce_chrdev_open_exclu = 1; 87 xen_mce_chrdev_open_count++; 88 89 spin_unlock(&xen_mce_chrdev_state_lock); 90 91 return nonseekable_open(inode, file); 92} 93 94static int xen_mce_chrdev_release(struct inode *inode, struct file *file) 95{ 96 spin_lock(&xen_mce_chrdev_state_lock); 97 98 xen_mce_chrdev_open_count--; 99 xen_mce_chrdev_open_exclu = 0; 100 101 spin_unlock(&xen_mce_chrdev_state_lock); 102 103 return 0; 104} 105 106static ssize_t xen_mce_chrdev_read(struct file *filp, char __user *ubuf, 107 size_t usize, loff_t *off) 108{ 109 char __user *buf = ubuf; 110 unsigned num; 111 int i, err; 112 113 mutex_lock(&mcelog_lock); 114 115 num = xen_mcelog.next; 116 117 /* Only supports full reads right now */ 118 err = -EINVAL; 119 if (*off != 0 || usize < XEN_MCE_LOG_LEN*sizeof(struct xen_mce)) 120 goto out; 121 122 err = 0; 123 for (i = 0; i < num; i++) { 124 struct xen_mce *m = &xen_mcelog.entry[i]; 125 126 err |= copy_to_user(buf, m, sizeof(*m)); 127 buf += sizeof(*m); 128 } 129 130 memset(xen_mcelog.entry, 0, num * sizeof(struct xen_mce)); 131 xen_mcelog.next = 0; 132 133 if (err) 134 err = -EFAULT; 135 136out: 137 mutex_unlock(&mcelog_lock); 138 139 return err ? err : buf - ubuf; 140} 141 142static unsigned int xen_mce_chrdev_poll(struct file *file, poll_table *wait) 143{ 144 poll_wait(file, &xen_mce_chrdev_wait, wait); 145 146 if (xen_mcelog.next) 147 return POLLIN | POLLRDNORM; 148 149 return 0; 150} 151 152static long xen_mce_chrdev_ioctl(struct file *f, unsigned int cmd, 153 unsigned long arg) 154{ 155 int __user *p = (int __user *)arg; 156 157 if (!capable(CAP_SYS_ADMIN)) 158 return -EPERM; 159 160 switch (cmd) { 161 case MCE_GET_RECORD_LEN: 162 return put_user(sizeof(struct xen_mce), p); 163 case MCE_GET_LOG_LEN: 164 return put_user(XEN_MCE_LOG_LEN, p); 165 case MCE_GETCLEAR_FLAGS: { 166 unsigned flags; 167 168 do { 169 flags = xen_mcelog.flags; 170 } while (cmpxchg(&xen_mcelog.flags, flags, 0) != flags); 171 172 return put_user(flags, p); 173 } 174 default: 175 return -ENOTTY; 176 } 177} 178 179static const struct file_operations xen_mce_chrdev_ops = { 180 .open = xen_mce_chrdev_open, 181 .release = xen_mce_chrdev_release, 182 .read = xen_mce_chrdev_read, 183 .poll = xen_mce_chrdev_poll, 184 .unlocked_ioctl = xen_mce_chrdev_ioctl, 185 .llseek = no_llseek, 186}; 187 188static struct miscdevice xen_mce_chrdev_device = { 189 MISC_MCELOG_MINOR, 190 "mcelog", 191 &xen_mce_chrdev_ops, 192}; 193 194/* 195 * Caller should hold the mcelog_lock 196 */ 197static void xen_mce_log(struct xen_mce *mce) 198{ 199 unsigned entry; 200 201 entry = xen_mcelog.next; 202 203 /* 204 * When the buffer fills up discard new entries. 205 * Assume that the earlier errors are the more 206 * interesting ones: 207 */ 208 if (entry >= XEN_MCE_LOG_LEN) { 209 set_bit(XEN_MCE_OVERFLOW, 210 (unsigned long *)&xen_mcelog.flags); 211 return; 212 } 213 214 memcpy(xen_mcelog.entry + entry, mce, sizeof(struct xen_mce)); 215 216 xen_mcelog.next++; 217} 218 219static int convert_log(struct mc_info *mi) 220{ 221 struct mcinfo_common *mic; 222 struct mcinfo_global *mc_global; 223 struct mcinfo_bank *mc_bank; 224 struct xen_mce m; 225 uint32_t i; 226 227 mic = NULL; 228 x86_mcinfo_lookup(&mic, mi, MC_TYPE_GLOBAL); 229 if (unlikely(!mic)) { 230 pr_warn("Failed to find global error info\n"); 231 return -ENODEV; 232 } 233 234 memset(&m, 0, sizeof(struct xen_mce)); 235 236 mc_global = (struct mcinfo_global *)mic; 237 m.mcgstatus = mc_global->mc_gstatus; 238 m.apicid = mc_global->mc_apicid; 239 240 for (i = 0; i < ncpus; i++) 241 if (g_physinfo[i].mc_apicid == m.apicid) 242 break; 243 if (unlikely(i == ncpus)) { 244 pr_warn("Failed to match cpu with apicid %d\n", m.apicid); 245 return -ENODEV; 246 } 247 248 m.socketid = g_physinfo[i].mc_chipid; 249 m.cpu = m.extcpu = g_physinfo[i].mc_cpunr; 250 m.cpuvendor = (__u8)g_physinfo[i].mc_vendor; 251 m.mcgcap = g_physinfo[i].mc_msrvalues[__MC_MSR_MCGCAP].value; 252 253 mic = NULL; 254 x86_mcinfo_lookup(&mic, mi, MC_TYPE_BANK); 255 if (unlikely(!mic)) { 256 pr_warn("Fail to find bank error info\n"); 257 return -ENODEV; 258 } 259 260 do { 261 if ((!mic) || (mic->size == 0) || 262 (mic->type != MC_TYPE_GLOBAL && 263 mic->type != MC_TYPE_BANK && 264 mic->type != MC_TYPE_EXTENDED && 265 mic->type != MC_TYPE_RECOVERY)) 266 break; 267 268 if (mic->type == MC_TYPE_BANK) { 269 mc_bank = (struct mcinfo_bank *)mic; 270 m.misc = mc_bank->mc_misc; 271 m.status = mc_bank->mc_status; 272 m.addr = mc_bank->mc_addr; 273 m.tsc = mc_bank->mc_tsc; 274 m.bank = mc_bank->mc_bank; 275 m.finished = 1; 276 /*log this record*/ 277 xen_mce_log(&m); 278 } 279 mic = x86_mcinfo_next(mic); 280 } while (1); 281 282 return 0; 283} 284 285static int mc_queue_handle(uint32_t flags) 286{ 287 struct xen_mc mc_op; 288 int ret = 0; 289 290 mc_op.cmd = XEN_MC_fetch; 291 mc_op.interface_version = XEN_MCA_INTERFACE_VERSION; 292 set_xen_guest_handle(mc_op.u.mc_fetch.data, &g_mi); 293 do { 294 mc_op.u.mc_fetch.flags = flags; 295 ret = HYPERVISOR_mca(&mc_op); 296 if (ret) { 297 pr_err("Failed to fetch %surgent error log\n", 298 flags == XEN_MC_URGENT ? "" : "non"); 299 break; 300 } 301 302 if (mc_op.u.mc_fetch.flags & XEN_MC_NODATA || 303 mc_op.u.mc_fetch.flags & XEN_MC_FETCHFAILED) 304 break; 305 else { 306 ret = convert_log(&g_mi); 307 if (ret) 308 pr_warn("Failed to convert this error log, continue acking it anyway\n"); 309 310 mc_op.u.mc_fetch.flags = flags | XEN_MC_ACK; 311 ret = HYPERVISOR_mca(&mc_op); 312 if (ret) { 313 pr_err("Failed to ack previous error log\n"); 314 break; 315 } 316 } 317 } while (1); 318 319 return ret; 320} 321 322/* virq handler for machine check error info*/ 323static void xen_mce_work_fn(struct work_struct *work) 324{ 325 int err; 326 327 mutex_lock(&mcelog_lock); 328 329 /* urgent mc_info */ 330 err = mc_queue_handle(XEN_MC_URGENT); 331 if (err) 332 pr_err("Failed to handle urgent mc_info queue, continue handling nonurgent mc_info queue anyway\n"); 333 334 /* nonurgent mc_info */ 335 err = mc_queue_handle(XEN_MC_NONURGENT); 336 if (err) 337 pr_err("Failed to handle nonurgent mc_info queue\n"); 338 339 /* wake processes polling /dev/mcelog */ 340 wake_up_interruptible(&xen_mce_chrdev_wait); 341 342 mutex_unlock(&mcelog_lock); 343} 344static DECLARE_WORK(xen_mce_work, xen_mce_work_fn); 345 346static irqreturn_t xen_mce_interrupt(int irq, void *dev_id) 347{ 348 schedule_work(&xen_mce_work); 349 return IRQ_HANDLED; 350} 351 352static int bind_virq_for_mce(void) 353{ 354 int ret; 355 struct xen_mc mc_op; 356 357 memset(&mc_op, 0, sizeof(struct xen_mc)); 358 359 /* Fetch physical CPU Numbers */ 360 mc_op.cmd = XEN_MC_physcpuinfo; 361 mc_op.interface_version = XEN_MCA_INTERFACE_VERSION; 362 set_xen_guest_handle(mc_op.u.mc_physcpuinfo.info, g_physinfo); 363 ret = HYPERVISOR_mca(&mc_op); 364 if (ret) { 365 pr_err("Failed to get CPU numbers\n"); 366 return ret; 367 } 368 369 /* Fetch each CPU Physical Info for later reference*/ 370 ncpus = mc_op.u.mc_physcpuinfo.ncpus; 371 g_physinfo = kcalloc(ncpus, sizeof(struct mcinfo_logical_cpu), 372 GFP_KERNEL); 373 if (!g_physinfo) 374 return -ENOMEM; 375 set_xen_guest_handle(mc_op.u.mc_physcpuinfo.info, g_physinfo); 376 ret = HYPERVISOR_mca(&mc_op); 377 if (ret) { 378 pr_err("Failed to get CPU info\n"); 379 kfree(g_physinfo); 380 return ret; 381 } 382 383 ret = bind_virq_to_irqhandler(VIRQ_MCA, 0, 384 xen_mce_interrupt, 0, "mce", NULL); 385 if (ret < 0) { 386 pr_err("Failed to bind virq\n"); 387 kfree(g_physinfo); 388 return ret; 389 } 390 391 return 0; 392} 393 394static int __init xen_late_init_mcelog(void) 395{ 396 int ret; 397 398 /* Only DOM0 is responsible for MCE logging */ 399 if (!xen_initial_domain()) 400 return -ENODEV; 401 402 /* register character device /dev/mcelog for xen mcelog */ 403 ret = misc_register(&xen_mce_chrdev_device); 404 if (ret) 405 return ret; 406 407 ret = bind_virq_for_mce(); 408 if (ret) 409 goto deregister; 410 411 return 0; 412 413deregister: 414 misc_deregister(&xen_mce_chrdev_device); 415 return ret; 416} 417device_initcall(xen_late_init_mcelog); 418