1 #include <stdlib.h> 2 #include <stdio.h> 3 #include <unistd.h> 4 #include <string.h> 5 #include <sys/ioctl.h> 6 #include <linux/perf_event.h> 7 #include <asm/unistd.h> 8 #include <sys/mman.h> 9 #include <sys/stat.h> 10 #include <poll.h> 11 #include <signal.h> 12 #include <fcntl.h> 13 #include <elf.h> 14 #include <string.h> 15 16 #include <vector> 17 #include <string> 18 #include <map> 19 #include <unordered_map> 20 #include <unordered_set> 21 #include <algorithm> 22 using namespace std; 23 24 25 #define MAXN 512 26 #define MAXCPU 128 27 #define error(msg) do { perror(msg); exit(1); } while(0) 28 29 static char gflag_kernel_only = 0; 30 31 //--------------------------------Tree for call chain and report------------------------------- 32 // 33 struct TNode { 34 int c=0; 35 unordered_map<string, TNode*> s; 36 struct TNode *add(string n) { 37 c++; 38 if (s[n]==nullptr) s[n] = new TNode(); 39 return s[n]; 40 } 41 int printit(FILE *fp, int k) { 42 if (s.size()) { 43 using tt = tuple<int, string, TNode*>; 44 vector<tt> xx; 45 for (auto x: s) xx.push_back(make_tuple(x.second->c, x.first, x.second)); 46 sort(begin(xx), end(xx), greater<tt>()); 47 for (auto x: xx) { 48 auto count = get<0>(x); 49 if (100.0*count/c<1) continue; 50 auto name = get<1>(x); 51 auto nx = get<2>(x); 52 fprintf(fp, "<li>\n"); 53 fprintf(fp, "<input type=\"checkbox\" id=\"c%d\" />\n", k); 54 fprintf(fp, "<label class=\"tree_label\" for=\"c%d\">%s(%.3f%% %d/%d)</label>\n", k, name.c_str(), 100.0*count/c, count, c); 55 fprintf(fp, "<ul>\n"); 56 // printf("%s(%.3f%% %d/%d)\n", name.c_str(), 100.0*count/c, count, c); 57 k = nx->printit(fp, k+1); 58 fprintf(fp, "</ul>\n"); 59 fprintf(fp, "</li>\n"); 60 } 61 } 62 return k; 63 } 64 }; 65 66 //--------------------------------symbols------------------------------------------- 67 using STORE_T = map<unsigned long long, pair<string, unsigned long long>>; 68 using K_STORE_T = map<unsigned long long, string>; 69 70 /* 71 * load FUNC symbols refering to the section indicated by the offset, relocate the virtual address 72 */ 73 void parse_elf64(FILE *fp, unsigned long long v_addr, unsigned long long v_size, unsigned long long v_offset, STORE_T& store) { 74 Elf64_Ehdr ehdr; 75 int rc = fread(&ehdr, sizeof(ehdr), 1, fp); 76 if (rc != 1) return; 77 int n, s, i; 78 unsigned long long offset; 79 80 // load program headers 81 unsigned long long p_vaddr, p_size; 82 n = ehdr.e_phnum; 83 s = ehdr.e_phentsize; 84 offset = ehdr.e_phoff; 85 Elf64_Phdr phdr; 86 for (i=0; i<n; i++) { 87 rc = fseek(fp, offset, SEEK_SET); 88 if (rc<0) { perror("fail to seek"); return; } 89 rc = fread(&phdr, sizeof(phdr), 1, fp); 90 if (rc != 1) { perror("fail to read program header"); return; } 91 if (phdr.p_flags&PF_X) { 92 if (phdr.p_offset == v_offset) { 93 p_vaddr = phdr.p_vaddr; 94 p_size = phdr.p_memsz; if (p_size==0) p_size = 0xffffffff; 95 break; 96 } 97 } 98 offset+=s; 99 } 100 if (i>=n) { printf("No program header match offset found, fail to load\n"); return; } 101 102 // load section headers 103 n = ehdr.e_shnum; 104 s = ehdr.e_shentsize; 105 offset = ehdr.e_shoff; 106 Elf64_Shdr shdr; 107 vector<Elf64_Shdr> headers; 108 for (int i=0; i<n; i++) { 109 rc = fseek(fp, offset, SEEK_SET); 110 if (rc<0) { perror("fail to seek"); return; } 111 rc = fread(&shdr, sizeof(shdr), 1, fp); 112 if (rc != 1) { perror("fail to read sec header"); return; } 113 headers.push_back(shdr); 114 offset+=s; 115 } 116 Elf64_Sym symb; 117 unsigned long long faddr, fsize; 118 unsigned long long size, item_size; 119 int link, ix, flink, k; 120 char fname[128]; 121 for (int i=0; i<n; i++) { 122 switch(headers[i].sh_type) { 123 case SHT_SYMTAB: 124 case SHT_DYNSYM: 125 offset = headers[i].sh_offset; 126 size = headers[i].sh_size; 127 item_size = headers[i].sh_entsize; 128 link = headers[i].sh_link; 129 if (link<=0) break; 130 for (k=0; k+item_size<=size; k+=item_size) { 131 rc = fseek(fp, offset+k, SEEK_SET); if (rc<0) continue; 132 rc = fread(&symb, sizeof(symb), 1, fp); if (rc != 1) continue; 133 if (ELF64_ST_TYPE(symb.st_info) != STT_FUNC ) continue; 134 flink = symb.st_shndx; if (flink==0) continue; 135 fsize = symb.st_size; // if (fsize==0) continue; 136 faddr = symb.st_value; if (faddr>p_vaddr+p_size) continue; 137 ix = symb.st_name; if (ix==0) continue; 138 rc = fseek(fp, headers[link].sh_offset+ix, SEEK_SET); if (rc<0) continue; 139 if (fgets(fname, sizeof(fname), fp)==NULL) continue; 140 faddr = faddr-p_vaddr+v_addr; 141 if (store.count(faddr)) { 142 if (store[faddr].second<fsize) store[faddr] = make_pair(string(fname), fsize); 143 } else store[faddr] = make_pair(string(fname), fsize); 144 } 145 break; 146 default: 147 break; 148 } 149 } 150 } 151 152 int load_symbol_from_file(const char *path, unsigned long long addr, unsigned long long size, unsigned long long offset, STORE_T& store) { 153 printf("loading symble from %s\n", path); 154 FILE *fp = fopen(path, "rb"); 155 if (fp==NULL) { perror("fail to open file"); return -1; } 156 char ident[EI_NIDENT], c; 157 int err=0; 158 int rc = fread(ident, sizeof(ident), 1, fp); 159 if (rc != 1) { perror("fail to read ident"); err=-1; goto end; } 160 if (ident[0]!=0x7f) { printf("not a elf file\n"); err=-1; goto end; } 161 c=ident[4]; 162 rc = fseek(fp, 0, SEEK_SET); if (rc<0) { perror("fail to rewind"); goto end; } 163 if (c == ELFCLASS32) { 164 printf("32bit elf not supported yet\n"); err=-2; goto end; 165 } else if (c == ELFCLASS64) { 166 parse_elf64(fp, addr, size, offset, store); 167 } 168 169 end: 170 fclose(fp); 171 return err; 172 } 173 174 static unsigned long long parse_hex(char *p, int *n) { 175 unsigned long long r=0; 176 int i=0; 177 *n = 0; 178 while(p[i]==' '||p[i]=='\t') i++; 179 if (p[i]==0) return 0; 180 if (p[i+1]=='x') i+=2; 181 int v; 182 while(p[i]) { 183 if (p[i]>='0'&&p[i]<='9') v=p[i]-'0'; 184 else if (p[i]>='a'&&p[i]<='f') v=10+p[i]-'a'; 185 else if (p[i]>='A'&&p[i]<='F') v=10+p[i]-'A'; 186 else break; 187 r=(r<<4)+v; 188 i++; 189 } 190 *n = i; 191 return r; 192 } 193 194 STORE_T* load_symbol_pid(int pid) { 195 printf("loading symbols for %d\n", pid); 196 char bb[256]; 197 sprintf(bb, "/proc/%d/maps", pid); 198 FILE* fp = fopen(bb, "r"); 199 if (fp==NULL) return NULL; 200 STORE_T *store = new STORE_T(); 201 unsigned long long start, end, offset, inode; 202 char *p; 203 int i, c, j; 204 char fname[128], xx[64], xxx[32], mod[16], idx[16]; 205 while(1) { 206 p=fgets(bb, sizeof(bb), fp); if (p==NULL) break; 207 if (sscanf(p, "%s %s %s %s %lld %s", xx, mod, xxx, idx, &inode, fname)!=6) continue; 208 i=0; c=0; 209 start = parse_hex(xx, &c); if (c==0) continue; i+=c; if (p[i]!='-') continue; i++; 210 end = parse_hex(xx+i, &c); if (c==0) continue; 211 // parse type 212 for (j=0; j<8; j++) if (mod[j]=='x') break; if (j>=8) continue; 213 if (fname[0]!='/') continue; 214 offset = parse_hex(xxx, &c); if (c==0) continue; 215 // remaining should contains '/' indicating this mmap is refering to a file 216 sprintf(bb, "/proc/%d/root%s", pid, fname); 217 load_symbol_from_file(bb, start, end-start, offset, *store); 218 } 219 fclose(fp); 220 if (store->size()==0) { 221 delete store; 222 store = NULL; 223 } 224 return store; 225 } 226 227 /* parse kernel func symbols from /proc/kallsyms */ 228 K_STORE_T* load_kernel() { 229 FILE* fp = fopen("/proc/kallsyms", "r"); 230 if (fp == NULL) return NULL; 231 char *p; 232 unsigned long long addr; 233 int c; 234 K_STORE_T* store = new K_STORE_T(); 235 char bb[128], adr[128], type[8], name[128]; 236 while(1) { 237 p = fgets(bb, sizeof(bb), fp); if (p==NULL) break; 238 if (sscanf(p, "%s %s %s", adr, type, name)!=3) continue;; 239 if (type[0]!='t'&&type[0]!='T') continue; 240 addr=parse_hex(adr, &c); if (c==0) continue; 241 (*store)[addr] = string(name); 242 } 243 return store; 244 fclose(fp); 245 } 246 247 //------------------------------perf profiler------------------------- 248 static long perf_event_open(struct perf_event_attr *perf_event, pid_t pid, int cpu, int group_fd, unsigned long flags) { 249 return syscall(__NR_perf_event_open, perf_event, pid, cpu, group_fd, flags); 250 } 251 unordered_map<int, STORE_T*> pid_symbols; 252 K_STORE_T* kernel_symbols = NULL; 253 254 struct pollfd polls[MAXCPU]; 255 // res for cleanup 256 static long long psize; 257 map<int, pair<void*, long long>> res; 258 TNode* gnode = NULL; 259 260 unordered_map<unsigned long long, string> unknowns; 261 void int_exit(int _) { 262 for (auto x: res) { 263 auto y = x.second; 264 void* addr = y.first; 265 munmap(addr, (1+MAXN)*psize); 266 close(x.first); 267 } 268 res.clear(); 269 if (gnode!=NULL) { 270 FILE* fp = fopen("./report.html", "w"); 271 if (fp) { 272 fprintf(fp, "<head> <link rel=\"stylesheet\" href=\"report.css\"> <script src=\"report.js\"> </script> </head>\n"); 273 fprintf(fp, "<ul class=\"tree\">\n"); 274 gnode->printit(fp, 0); 275 fprintf(fp, "</ul>\n"); 276 fclose(fp); 277 printf("report done\n"); 278 } 279 gnode = NULL; 280 } 281 printf("---------------------unknowns-----------------\n"); 282 for (auto x=unknowns.begin(); x!=unknowns.end(); x++) { 283 printf("0x%llx --?> %s\n", (*x).first, (*x).second.c_str()); 284 } 285 exit(0); 286 } 287 /* 288 perf call chain process 289 For now, if a address would not be located to some function, the address would be skipped. 290 */ 291 int process_event(char *base, unsigned long long size, unsigned long long offset) { 292 struct perf_event_header* p = NULL; 293 int pid, xpid; 294 unsigned long long time; 295 offset%=size; 296 // assuming the header would fit within size 297 p = (struct perf_event_header*) (base+offset); 298 offset+=sizeof(*p); if (offset>=size) offset-=size; 299 if (p->type != PERF_RECORD_SAMPLE) return p->size; 300 // pid, tip; 301 pid = *((int *)(base+offset)); offset+=8; if (offset>=size) offset-=size; 302 unsigned long long nr = *((unsigned long long*)(base+offset)); offset+=8; if (offset>=size) offset-=size; 303 if (nr>128) return -1; 304 unsigned long long addr, o, addr0; 305 if (nr) { 306 if (gnode==NULL) gnode=new TNode(); 307 char bb[64]; 308 TNode* r = gnode; 309 if (pid_symbols.count(pid)==0) pid_symbols[pid] = load_symbol_pid(pid); 310 STORE_T* px = pid_symbols[pid]; 311 addr0 = *((unsigned long long *)(base+offset)); 312 char user_mark=0, start_mark=0; 313 for (int i=nr-1; i>=0; i--) { 314 o = i*8+offset; if (o>=size) o-=size; 315 addr = *((unsigned long long*)(base+o)); 316 if (addr==0) continue; // something wrong? 317 if ((addr>>56)==(addr0>>56) && (p->misc&PERF_RECORD_MISC_KERNEL)) { 318 // skip the cross line command, no idear how to correctly resolve it now. 319 if (user_mark) { user_mark=0; continue; } 320 // check in kernel 321 if (kernel_symbols&&!kernel_symbols->empty()) { 322 auto x = kernel_symbols->upper_bound(addr); 323 if (x==kernel_symbols->begin()) { 324 // sprintf(bb, "0x%llx", addr); r = r->add(string(bb)); 325 r = r->add(string("unknown")); 326 } else { 327 x--; 328 r = r->add((*x).second); 329 } 330 } else { 331 // sprintf(bb, "0x%llx", addr); r = r->add(string(bb)); 332 r = r->add(string("unknown")); 333 } 334 } else { 335 if (gflag_kernel_only) continue; 336 if (px) { 337 auto x = px->upper_bound(addr); 338 if (x==px->begin()) { 339 // sprintf(bb, "0x%llx", addr); r = r->add(string(bb)); 340 if (start_mark) { 341 auto y = (*x).second; 342 r = r->add(y.first+"?"); 343 } 344 } else { 345 x--; 346 auto y = (*x).second; 347 if (y.second && addr>(*x).first+y.second) { 348 // r = r->add(y.first); 349 // sprintf(bb, "0x%llx", addr); r = r->add(string(bb)); 350 if (start_mark) { 351 x++; 352 if (x==px->end()) r = r->add(y.first+"??"); 353 else { 354 auto z = (*x).second; 355 r = r->add(y.first+"?"+z.first); 356 } 357 } 358 } else { 359 start_mark=1; 360 r = r->add(y.first); 361 } 362 } 363 } else { 364 // sprintf(bb, "0x%llx", addr); r = r->add(string(bb)); 365 // r = r->add(string("unknown")); 366 } 367 user_mark=1; 368 } 369 } 370 } 371 return p->size; 372 } 373 374 int main(int argc, char *argv[]) { 375 kernel_symbols = load_kernel(); 376 if (argc<2) { printf("Need pid\n"); return 1; } 377 int pid = atoi(argv[1]); 378 if (pid<0) { gflag_kernel_only = 1; pid=-pid; } 379 if (pid==0) { printf("invalid pid %s\n", argv[1]); return 1; } 380 // find cgroup 381 char xb[256], xb2[256]; 382 int i, j, k, fd; 383 void* addr; 384 sprintf(xb, "/proc/%d/cgroup", pid); 385 FILE* fp = fopen(xb, "r"); 386 if (fp==NULL) error("fail to open cgroup file"); 387 char *p; 388 xb2[0]=0; 389 int cgroup_name_len=0; 390 while(1) { 391 p = fgets(xb, sizeof(xb), fp); if (p==NULL) break; 392 i=0; while(p[i]&&p[i]!=':') i++; if (p[i]==0) continue; 393 if (strstr(p, "perf_event")) { 394 i++; while(p[i]!=':'&&p[i]) i++; if (p[i]!=':') continue; i++; 395 j=i; while(p[j]!='\r'&&p[j]!='\n'&&p[j]!=0) j++; p[j]=0; 396 sprintf(xb2, "/sys/fs/cgroup/perf_event%s", p+i); 397 cgroup_name_len=j-i; 398 break; 399 } else if (p[i+1]==':') { 400 i+=2; j=i; while(p[j]!='\r'&&p[j]!='\n'&&p[j]!=0) j++; p[j]=0; 401 sprintf(xb2, "/sys/fs/cgroup/%s", p+i); 402 cgroup_name_len=j-i; 403 } 404 } 405 fclose(fp); 406 if (xb2[0]==0) error("no proper cgroup found\n"); 407 if (cgroup_name_len<2) { 408 printf("cgroup %s seems to be root, not allowed\n", xb2); 409 return -1; 410 } 411 printf("try to use cgroup %s\n", xb2); 412 int cgroup_id = open(xb2, O_CLOEXEC); 413 if (cgroup_id<=0) { perror("error open cgroup dir"); return 1; } 414 // start perf event 415 psize = sysconf(_SC_PAGE_SIZE); // getpagesize(); 416 int cpu_num = sysconf(_SC_NPROCESSORS_ONLN); 417 struct perf_event_attr attr; 418 memset(&attr, 0, sizeof(attr)); 419 attr.type = PERF_TYPE_SOFTWARE; 420 attr.size = sizeof(attr); 421 attr.config = PERF_COUNT_SW_CPU_CLOCK; 422 attr.sample_freq = 777; // adjust it 423 attr.freq = 1; 424 attr.wakeup_events = 16; 425 attr.sample_type = PERF_SAMPLE_TID|PERF_SAMPLE_CALLCHAIN; 426 attr.sample_max_stack = 32; 427 if (gflag_kernel_only) attr.exclude_callchain_user = 1; 428 for (i=0, k=0; i<cpu_num&&i<MAXCPU; i++) { 429 printf("attaching cpu %d\n", i); 430 fd = perf_event_open(&attr, cgroup_id, i, -1, PERF_FLAG_FD_CLOEXEC|PERF_FLAG_PID_CGROUP); 431 if (fd<0) { perror("fail to open perf event"); continue; } 432 addr = mmap(NULL, (1+MAXN)*psize, PROT_READ, MAP_SHARED, fd, 0); 433 if (addr == MAP_FAILED) { perror("mmap failed"); close(fd); continue; } 434 res[fd] = make_pair(addr, 0); 435 polls[k].fd = fd; 436 polls[k].events = POLLIN; 437 polls[k].revents = 0; 438 k++; 439 } 440 if (k==0) { printf("no cpu event attached at all\n"); return 1; } 441 442 signal(SIGINT, int_exit); 443 signal(SIGTERM, int_exit); 444 445 unsigned long long head; 446 int event_size; 447 struct perf_event_mmap_page *mp; 448 while (poll(polls, k, -1)>0) { 449 // printf("wake\n"); 450 for (i=0; i<k; i++) { 451 if ((polls[i].revents&POLLIN)==0) continue; 452 fd = polls[i].fd; 453 addr = res[fd].first; 454 mp = (struct perf_event_mmap_page *)addr; 455 head = res[fd].second; 456 ioctl(fd, PERF_EVENT_IOC_PAUSE_OUTPUT, 1); 457 if (head>mp->data_head) head=mp->data_head; 458 head = mp->data_head-((mp->data_head-head)%mp->data_size); 459 while(head<mp->data_head) { 460 event_size = process_event((char*)addr+mp->data_offset, mp->data_size, head); 461 if (event_size<0) { 462 // resync 463 head=mp->data_head; 464 break; 465 } 466 head += event_size; 467 } 468 res[fd].second = mp->data_head; 469 ioctl(fd, PERF_EVENT_IOC_PAUSE_OUTPUT, 0); 470 } 471 } 472 473 int_exit(0); 474 return 0; 475 } 476