1 #include <stdlib.h> 2 #include <stdio.h> 3 #include <unistd.h> 4 #include <string.h> 5 #include <sys/ioctl.h> 6 #include <linux/perf_event.h> 7 #include <asm/unistd.h> 8 #include <sys/mman.h> 9 #include <sys/stat.h> 10 #include <poll.h> 11 #include <signal.h> 12 #include <fcntl.h> 13 #include <elf.h> 14 #include <string.h> 15 16 #include <vector> 17 #include <string> 18 #include <map> 19 #include <unordered_map> 20 #include <algorithm> 21 using namespace std; 22 23 24 #define MAXN 128 25 #define MAXCPU 1024 26 #define error(msg) do { perror(msg); exit(1); } while(0) 27 //--------------------------------Tree for call chain and report------------------------------- 28 // 29 struct TNode { 30 int c=0; 31 unordered_map<string, TNode*> s; 32 struct TNode *add(string n) { 33 c++; 34 if (s[n]==nullptr) s[n] = new TNode(); 35 return s[n]; 36 } 37 int printit(FILE *fp, int k) { 38 if (s.size()) { 39 using tt = tuple<int, string, TNode*>; 40 vector<tt> xx; 41 for (auto x: s) xx.push_back(make_tuple(x.second->c, x.first, x.second)); 42 sort(begin(xx), end(xx), greater<tt>()); 43 for (auto x: xx) { 44 auto count = get<0>(x); 45 if (100.0*count/c<1) continue; 46 auto name = get<1>(x); 47 auto nx = get<2>(x); 48 fprintf(fp, "<li>\n"); 49 fprintf(fp, "<input type=\"checkbox\" id=\"c%d\" />\n", k); 50 fprintf(fp, "<label class=\"tree_label\" for=\"c%d\">%s(%.3f%% %d/%d)</label>\n", k, name.c_str(), 100.0*count/c, count, c); 51 fprintf(fp, "<ul>\n"); 52 // printf("%s(%.3f%% %d/%d)\n", name.c_str(), 100.0*count/c, count, c); 53 k = nx->printit(fp, k+1); 54 fprintf(fp, "</ul>\n"); 55 fprintf(fp, "</li>\n"); 56 } 57 } 58 return k; 59 } 60 }; 61 62 //--------------------------------symbols------------------------------------------- 63 using STORE_T = map<unsigned long long, pair<string, unsigned long long>>; 64 using K_STORE_T = map<unsigned long long, string>; 65 66 /* 67 * load FUNC symbols refering to the section indicated by the offset, relocate the virtual address 68 */ 69 void parse_elf64(FILE *fp, unsigned long long v_addr, unsigned long long v_size, unsigned long long v_offset, STORE_T& store) { 70 // printf("read elf with offset 0x%llx, addr 0x%llx\n", v_offset, v_addr); 71 Elf64_Ehdr ehdr; 72 int rc = fread(&ehdr, sizeof(ehdr), 1, fp); 73 if (rc != 1) return; 74 int n, s, i; 75 unsigned long long offset; 76 77 // load program headers 78 unsigned long long p_vaddr, p_size; 79 n = ehdr.e_phnum; 80 s = ehdr.e_phentsize; 81 offset = ehdr.e_phoff; 82 Elf64_Phdr phdr; 83 for (i=0; i<n; i++) { 84 rc = fseek(fp, offset, SEEK_SET); 85 if (rc<0) { perror("fail to seek"); return; } 86 rc = fread(&phdr, sizeof(phdr), 1, fp); 87 if (rc != 1) { perror("fail to read program header"); return; } 88 if (phdr.p_flags&PF_X) { 89 if (phdr.p_offset == v_offset) { 90 p_vaddr = phdr.p_vaddr; 91 p_size = phdr.p_memsz; if (p_size==0) p_size = 0xffffffff; 92 break; 93 } 94 } 95 offset+=s; 96 } 97 if (i>=n) { printf("No program header match offset found, fail to load\n"); return; } 98 99 // load section headers 100 n = ehdr.e_shnum; 101 s = ehdr.e_shentsize; 102 offset = ehdr.e_shoff; 103 Elf64_Shdr shdr; 104 vector<Elf64_Shdr> headers; 105 for (int i=0; i<n; i++) { 106 rc = fseek(fp, offset, SEEK_SET); 107 if (rc<0) { perror("fail to seek"); return; } 108 rc = fread(&shdr, sizeof(shdr), 1, fp); 109 if (rc != 1) { perror("fail to read sec header"); return; } 110 headers.push_back(shdr); 111 offset+=s; 112 } 113 Elf64_Sym symb; 114 unsigned long long faddr, fsize; 115 unsigned long long size, item_size; 116 int link, ix, flink, k; 117 char fname[128]; 118 for (int i=0; i<n; i++) { 119 switch(headers[i].sh_type) { 120 case SHT_SYMTAB: 121 case SHT_DYNSYM: 122 offset = headers[i].sh_offset; 123 size = headers[i].sh_size; 124 item_size = headers[i].sh_entsize; 125 link = headers[i].sh_link; 126 if (link<=0) break; 127 for (k=0; k+item_size<=size; k+=item_size) { 128 rc = fseek(fp, offset+k, SEEK_SET); if (rc<0) continue; 129 rc = fread(&symb, sizeof(symb), 1, fp); if (rc != 1) continue; 130 if (ELF64_ST_TYPE(symb.st_info) != STT_FUNC ) continue; 131 flink = symb.st_shndx; if (flink==0) continue; 132 fsize = symb.st_size; if (fsize==0) continue; 133 faddr = symb.st_value; if (faddr>p_vaddr+p_size) continue; 134 ix = symb.st_name; if (ix==0) continue; 135 rc = fseek(fp, headers[link].sh_offset+ix, SEEK_SET); if (rc<0) continue; 136 if (fgets(fname, sizeof(fname), fp)==NULL) continue; 137 faddr = faddr-p_vaddr+v_addr; 138 store[faddr] = make_pair(string(fname), fsize); 139 } 140 break; 141 default: 142 break; 143 } 144 } 145 } 146 147 int load_symbol_from_file(const char *path, unsigned long long addr, unsigned long long size, unsigned long long offset, STORE_T& store) { 148 printf("loading symble from %s\n", path); 149 FILE *fp = fopen(path, "rb"); 150 if (fp==NULL) { perror("fail to open file"); return -1; } 151 char ident[EI_NIDENT], c; 152 int err=0; 153 int rc = fread(ident, sizeof(ident), 1, fp); 154 if (rc != 1) { perror("fail to read ident"); err=-1; goto end; } 155 if (ident[0]!=0x7f) { printf("not a elf file\n"); err=-1; goto end; } 156 c=ident[4]; 157 rc = fseek(fp, 0, SEEK_SET); if (rc<0) { perror("fail to rewind"); goto end; } 158 if (c == ELFCLASS32) { 159 printf("32bit elf not supported yet\n"); err=-2; goto end; 160 } else if (c == ELFCLASS64) { 161 parse_elf64(fp, addr, size, offset, store); 162 } 163 164 end: 165 fclose(fp); 166 return err; 167 } 168 169 static unsigned long long parse_hex(char *p, int *n) { 170 unsigned long long r=0; 171 int i=0; 172 *n = 0; 173 while(p[i]==' '||p[i]=='\t') i++; 174 if (p[i]==0) return 0; 175 if (p[i+1]=='x') i+=2; 176 int v; 177 while(p[i]) { 178 if (p[i]>='0'&&p[i]<='9') v=p[i]-'0'; 179 else if (p[i]>='a'&&p[i]<='f') v=10+p[i]-'a'; 180 else if (p[i]>='A'&&p[i]<='F') v=10+p[i]-'A'; 181 else break; 182 r=(r<<4)+v; 183 i++; 184 } 185 *n = i; 186 return r; 187 } 188 189 STORE_T* load_symbol_pid(int pid) { 190 printf("loading symbols for %d\n", pid); 191 char bb[128]; 192 sprintf(bb, "/proc/%d/maps", pid); 193 FILE* fp = fopen(bb, "r"); 194 if (fp==NULL) return NULL; 195 STORE_T *store = new STORE_T(); 196 unsigned long long start, end, offset; 197 char *p; 198 int i, c, j; 199 while(1) { 200 p=fgets(bb, sizeof(bb), fp); if (p==NULL) break; 201 i=0; c=0; 202 start = parse_hex(p, &c); if (start==0) continue; i+=c; if (p[i]!='-') continue; i++; 203 end = parse_hex(p+i, &c); if (end==0) continue; i+=c; 204 // parse type 205 for (j=0; j<8; j++) { if (p[i]=='x') break; i++; } if (j>=8) continue; 206 while(p[i]!=' '&&p[i]!='\t'&&p[i]!=0) i++; if (p[i]==0) continue; 207 offset = parse_hex(p+i, &c); if (c==0) continue; 208 // remaining should contains '/' indicating this mmap is refering to a file 209 while(p[i]&&p[i]!='/') i++; if (p[i]==0) continue; 210 sprintf(bb, "/proc/%d/map_files/%llx-%llx", pid, start, end); 211 load_symbol_from_file(bb, start, end-start, offset, *store); 212 } 213 fclose(fp); 214 return store; 215 } 216 217 /* parse kernel func symbols from /proc/kallsyms */ 218 K_STORE_T* load_kernel() { 219 FILE* fp = fopen("/proc/kallsyms", "r"); 220 if (fp == NULL) return NULL; 221 char *p; 222 unsigned long long addr; 223 int c; 224 K_STORE_T* store = new K_STORE_T(); 225 char bb[128], adr[128], type[8], name[128]; 226 while(1) { 227 p = fgets(bb, sizeof(bb), fp); if (p==NULL) break; 228 if (sscanf(p, "%s %s %s", adr, type, name)!=3) continue;; 229 if (type[0]!='t'&&type[0]!='T') continue; 230 addr=parse_hex(adr, &c); if (c==0) continue; 231 (*store)[addr] = string(name); 232 } 233 return store; 234 fclose(fp); 235 } 236 237 //------------------------------perf profiler------------------------- 238 static long perf_event_open(struct perf_event_attr *perf_event, pid_t pid, int cpu, int group_fd, unsigned long flags) { 239 return syscall(__NR_perf_event_open, perf_event, pid, cpu, group_fd, flags); 240 } 241 unordered_map<int, STORE_T*> pid_symbols; 242 K_STORE_T* kernel_symbols = NULL; 243 244 struct pollfd polls[MAXCPU]; 245 // res for cleanup 246 static long long psize; 247 map<int, pair<void*, long long>> res; 248 TNode* gnode = NULL; 249 250 void int_exit(int _) { 251 for (auto x: res) { 252 auto y = x.second; 253 void* addr = y.first; 254 munmap(addr, (1+MAXN)*psize); 255 close(x.first); 256 } 257 res.clear(); 258 if (gnode!=NULL) { 259 FILE* fp = fopen("./report.html", "w"); 260 if (fp) { 261 fprintf(fp, "<head> <link rel=\"stylesheet\" href=\"report.css\"> <script src=\"report.js\"> </script> </head>\n"); 262 fprintf(fp, "<ul class=\"tree\">\n"); 263 gnode->printit(fp, 0); 264 fprintf(fp, "</ul>\n"); 265 fclose(fp); 266 printf("report done\n"); 267 } 268 gnode = NULL; 269 } 270 } 271 /* 272 perf call chain process 273 For now, if a address would not be located to some function, the address would be skipped. 274 */ 275 int process_event(char *base, unsigned long long size, unsigned long long offset) { 276 struct perf_event_header* p = NULL; 277 int pid, xpid; 278 unsigned long long time; 279 offset%=size; 280 // assuming the header would fit within size 281 p = (struct perf_event_header*) (base+offset); 282 offset+=sizeof(*p); if (offset>=size) offset-=size; 283 if (p->type != PERF_RECORD_SAMPLE) return p->size; 284 // pid, tip; 285 pid = *((int *)(base+offset)); offset+=8; if (offset>=size) offset-=size; 286 unsigned long long nr = *((unsigned long long*)(base+offset)); offset+=8; if (offset>=size) offset-=size; 287 unsigned long long addr, o, addr0; 288 if (nr) { 289 if (gnode==NULL) gnode=new TNode(); 290 char bb[64]; 291 TNode* r = gnode; 292 if (pid_symbols.count(pid)==0) pid_symbols[pid] = load_symbol_pid(pid); 293 STORE_T* px = pid_symbols[pid]; 294 addr0 = *((unsigned long long *)(base+offset)); 295 char user_mark = 0; 296 for (int i=nr-1; i>=0; i--) { 297 o = i*8+offset; if (o>=size) o-=size; 298 addr = *((unsigned long long*)(base+o)); 299 if ((addr>>56)==(addr0>>56) && (p->misc&PERF_RECORD_MISC_KERNEL)) { 300 // skip the cross line command, no idear how to correctly resolve it now. 301 if (user_mark) { user_mark=0; continue; } 302 // check in kernel 303 if (kernel_symbols&&!kernel_symbols->empty()) { 304 auto x = kernel_symbols->upper_bound(addr); 305 if (x==kernel_symbols->begin()) { 306 // sprintf(bb, "0x%llx", addr); r = r->add(string(bb)); 307 } else { 308 x--; 309 r = r->add((*x).second); 310 } 311 } else { 312 // sprintf(bb, "0x%llx", addr); r = r->add(string(bb)); 313 } 314 } else { 315 if (px) { 316 auto x = px->upper_bound(addr); 317 if (x==px->begin()) { 318 // sprintf(bb, "0x%llx", addr); r = r->add(string(bb)); 319 } else { 320 x--; 321 auto y = (*x).second; 322 if (addr>(*x).first+y.second) { 323 // r = r->add(y.first); 324 // sprintf(bb, "0x%llx", addr); r = r->add(string(bb)); 325 } else { 326 r = r->add(y.first); 327 } 328 } 329 } else { 330 // sprintf(bb, "0x%llx", addr); r = r->add(string(bb)); 331 } 332 user_mark=1; 333 } 334 } 335 } 336 return p->size; 337 } 338 339 int main(int argc, char *argv[]) { 340 if (argc != 2) { printf("need pid\n"); return 1; } 341 load_kernel(); 342 int pid = atoi(argv[1]); if (pid<=0) { printf("invalid pid %s\n", argv[1]); return 1; } 343 // find cgroup 344 char xb[256], xb2[256]; 345 int i, j, k; 346 int fd; 347 void *addr; 348 sprintf(xb, "/proc/%d/cgroup", pid); 349 FILE* fp = fopen(xb, "r"); 350 if (fp==NULL) error("fail to open cgroup file"); 351 char *p; 352 xb2[0]=0; 353 int cgroup_name_len=0; 354 while(1) { 355 p = fgets(xb, sizeof(xb), fp); if (p==NULL) break; 356 i=0; while(p[i]&&p[i]!=':') i++; if (p[i]==0) continue; 357 if (strstr(p, "perf_event")) { 358 i++; while(p[i]!=':'&&p[i]) i++; if (p[i]!=':') continue; i++; 359 j=i; while(p[j]!='\r'&&p[j]!='\n'&&p[j]!=0) j++; p[j]=0; 360 sprintf(xb2, "/sys/fs/cgroup/perf_event%s", p+i); 361 cgroup_name_len=j-i; 362 break; 363 } else if (p[i+1]==':') { 364 i+=2; j=i; while(p[j]!='\r'&&p[j]!='\n'&&p[j]!=0) j++; p[j]=0; 365 sprintf(xb2, "/sys/fs/cgroup/%s", p+i); 366 cgroup_name_len=j-i; 367 } 368 } 369 fclose(fp); 370 if (xb2[0]==0) error("no proper cgroup found\n"); 371 if (cgroup_name_len<2) { 372 printf("cgroup %s seems to be root, not allowed\n", xb2); 373 return -1; 374 } 375 printf("try to use cgroup %s\n", xb2); 376 int cgroup_id = open(xb2, O_CLOEXEC); 377 if (cgroup_id<=0) { perror("error open cgroup dir"); return 1; } 378 // start perf event 379 psize = sysconf(_SC_PAGE_SIZE); // getpagesize(); 380 int cpu_num = sysconf(_SC_NPROCESSORS_ONLN); 381 struct perf_event_attr attr; 382 memset(&attr, 0, sizeof(attr)); 383 attr.type = PERF_TYPE_SOFTWARE; 384 attr.size = sizeof(attr); 385 attr.config = PERF_COUNT_SW_CONTEXT_SWITCHES; 386 // attr.config = PERF_COUNT_SW_CPU_MIGRATIONS; 387 attr.sample_period = 1; 388 attr.wakeup_events = 32; 389 attr.sample_type = PERF_SAMPLE_TID|PERF_SAMPLE_CALLCHAIN; 390 attr.context_switch = 1; 391 // attr.sample_id_all = 1; 392 for (i=0, k=0; i<cpu_num&&i<MAXCPU; i++) { 393 printf("attaching cpu %d\n", i); 394 fd = perf_event_open(&attr, cgroup_id, i, -1, PERF_FLAG_FD_CLOEXEC|PERF_FLAG_PID_CGROUP); 395 if (fd<0) { perror("fail to open perf event"); continue; } 396 addr = mmap(NULL, (1+MAXN)*psize, PROT_READ, MAP_SHARED, fd, 0); 397 if (addr == MAP_FAILED) { perror("mmap failed"); close(fd); continue; } 398 res[fd] = make_pair(addr, 0); 399 polls[k].fd = fd; 400 polls[k].events = POLLIN; 401 polls[k].revents = 0; 402 k++; 403 } 404 if (k==0) { printf("no cpu event attached at all\n"); return 1; } 405 406 signal(SIGINT, int_exit); 407 signal(SIGTERM, int_exit); 408 409 unsigned long long head; 410 struct perf_event_mmap_page *mp; 411 while (poll(polls, k, -1)>0) { 412 for (i=0; i<k; i++) { 413 if ((polls[i].revents&POLLIN)==0) continue; 414 fd = polls[i].fd; 415 addr = res[fd].first; 416 mp = (struct perf_event_mmap_page *)addr; 417 head = res[fd].second; 418 if (head==mp->data_head) continue; 419 ioctl(fd, PERF_EVENT_IOC_PAUSE_OUTPUT, 1); 420 head = mp->data_head-((mp->data_head-head)%mp->data_size); 421 while(head<mp->data_head) head+=process_event((char*)addr+mp->data_offset, mp->data_size, head); 422 ioctl(fd, PERF_EVENT_IOC_PAUSE_OUTPUT, 0); 423 res[fd].second = mp->data_head; 424 } 425 } 426 427 int_exit(0); 428 return 0; 429 } 430