1 #include <stdlib.h>
2 #include <stdio.h>
3 #include <unistd.h>
4 #include <string.h>
5 #include <sys/ioctl.h>
6 #include <linux/perf_event.h>
7 #include <asm/unistd.h>
8 #include <sys/mman.h>
9 #include <sys/stat.h>
10 #include <poll.h>
11 #include <signal.h>
12 #include <fcntl.h>
13 #include <elf.h>
14 #include <string.h>
15
16 #include <vector>
17 #include <string>
18 #include <map>
19 #include <unordered_map>
20 #include <unordered_set>
21 #include <algorithm>
22 using namespace std;
23
24
25 #define MAXN 512
26 #define MAXCPU 128
27 #define error(msg) do { perror(msg); exit(1); } while(0)
28
29 static char gflag_kernel_only = 0;
30
31 //--------------------------------Tree for call chain and report-------------------------------
32 //
33 struct TNode {
34 int c=0;
35 unordered_map<string, TNode*> s;
addTNode36 struct TNode *add(string n) {
37 c++;
38 if (s[n]==nullptr) s[n] = new TNode();
39 return s[n];
40 }
printitTNode41 int printit(FILE *fp, int k) {
42 if (s.size()) {
43 using tt = tuple<int, string, TNode*>;
44 vector<tt> xx;
45 for (auto x: s) xx.push_back(make_tuple(x.second->c, x.first, x.second));
46 sort(begin(xx), end(xx), greater<tt>());
47 for (auto x: xx) {
48 auto count = get<0>(x);
49 if (100.0*count/c<1) continue;
50 auto name = get<1>(x);
51 auto nx = get<2>(x);
52 fprintf(fp, "<li>\n");
53 fprintf(fp, "<input type=\"checkbox\" id=\"c%d\" />\n", k);
54 fprintf(fp, "<label class=\"tree_label\" for=\"c%d\">%s(%.3f%% %d/%d)</label>\n", k, name.c_str(), 100.0*count/c, count, c);
55 fprintf(fp, "<ul>\n");
56 // printf("%s(%.3f%% %d/%d)\n", name.c_str(), 100.0*count/c, count, c);
57 k = nx->printit(fp, k+1);
58 fprintf(fp, "</ul>\n");
59 fprintf(fp, "</li>\n");
60 }
61 }
62 return k;
63 }
64 };
65
66 //--------------------------------symbols-------------------------------------------
67 using STORE_T = map<unsigned long long, pair<string, unsigned long long>>;
68 using K_STORE_T = map<unsigned long long, string>;
69
70 /*
71 * load FUNC symbols refering to the section indicated by the offset, relocate the virtual address
72 */
parse_elf64(FILE * fp,unsigned long long v_addr,unsigned long long v_size,unsigned long long v_offset,STORE_T & store)73 void parse_elf64(FILE *fp, unsigned long long v_addr, unsigned long long v_size, unsigned long long v_offset, STORE_T& store) {
74 Elf64_Ehdr ehdr;
75 int rc = fread(&ehdr, sizeof(ehdr), 1, fp);
76 if (rc != 1) return;
77 int n, s, i;
78 unsigned long long offset;
79
80 // load program headers
81 unsigned long long p_vaddr, p_size;
82 n = ehdr.e_phnum;
83 s = ehdr.e_phentsize;
84 offset = ehdr.e_phoff;
85 Elf64_Phdr phdr;
86 for (i=0; i<n; i++) {
87 rc = fseek(fp, offset, SEEK_SET);
88 if (rc<0) { perror("fail to seek"); return; }
89 rc = fread(&phdr, sizeof(phdr), 1, fp);
90 if (rc != 1) { perror("fail to read program header"); return; }
91 if (phdr.p_flags&PF_X) {
92 if (phdr.p_offset == v_offset) {
93 p_vaddr = phdr.p_vaddr;
94 p_size = phdr.p_memsz; if (p_size==0) p_size = 0xffffffff;
95 break;
96 }
97 }
98 offset+=s;
99 }
100 if (i>=n) { printf("No program header match offset found, fail to load\n"); return; }
101
102 // load section headers
103 n = ehdr.e_shnum;
104 s = ehdr.e_shentsize;
105 offset = ehdr.e_shoff;
106 Elf64_Shdr shdr;
107 vector<Elf64_Shdr> headers;
108 for (int i=0; i<n; i++) {
109 rc = fseek(fp, offset, SEEK_SET);
110 if (rc<0) { perror("fail to seek"); return; }
111 rc = fread(&shdr, sizeof(shdr), 1, fp);
112 if (rc != 1) { perror("fail to read sec header"); return; }
113 headers.push_back(shdr);
114 offset+=s;
115 }
116 Elf64_Sym symb;
117 unsigned long long faddr, fsize;
118 unsigned long long size, item_size;
119 int link, ix, flink, k;
120 char fname[128];
121 for (int i=0; i<n; i++) {
122 switch(headers[i].sh_type) {
123 case SHT_SYMTAB:
124 case SHT_DYNSYM:
125 offset = headers[i].sh_offset;
126 size = headers[i].sh_size;
127 item_size = headers[i].sh_entsize;
128 link = headers[i].sh_link;
129 if (link<=0) break;
130 for (k=0; k+item_size<=size; k+=item_size) {
131 rc = fseek(fp, offset+k, SEEK_SET); if (rc<0) continue;
132 rc = fread(&symb, sizeof(symb), 1, fp); if (rc != 1) continue;
133 if (ELF64_ST_TYPE(symb.st_info) != STT_FUNC ) continue;
134 flink = symb.st_shndx; if (flink==0) continue;
135 fsize = symb.st_size; // if (fsize==0) continue;
136 faddr = symb.st_value; if (faddr>p_vaddr+p_size) continue;
137 ix = symb.st_name; if (ix==0) continue;
138 rc = fseek(fp, headers[link].sh_offset+ix, SEEK_SET); if (rc<0) continue;
139 if (fgets(fname, sizeof(fname), fp)==NULL) continue;
140 faddr = faddr-p_vaddr+v_addr;
141 if (store.count(faddr)) {
142 if (store[faddr].second<fsize) store[faddr] = make_pair(string(fname), fsize);
143 } else store[faddr] = make_pair(string(fname), fsize);
144 }
145 break;
146 default:
147 break;
148 }
149 }
150 }
151
load_symbol_from_file(const char * path,unsigned long long addr,unsigned long long size,unsigned long long offset,STORE_T & store)152 int load_symbol_from_file(const char *path, unsigned long long addr, unsigned long long size, unsigned long long offset, STORE_T& store) {
153 printf("loading symble from %s\n", path);
154 FILE *fp = fopen(path, "rb");
155 if (fp==NULL) { perror("fail to open file"); return -1; }
156 char ident[EI_NIDENT], c;
157 int err=0;
158 int rc = fread(ident, sizeof(ident), 1, fp);
159 if (rc != 1) { perror("fail to read ident"); err=-1; goto end; }
160 if (ident[0]!=0x7f) { printf("not a elf file\n"); err=-1; goto end; }
161 c=ident[4];
162 rc = fseek(fp, 0, SEEK_SET); if (rc<0) { perror("fail to rewind"); goto end; }
163 if (c == ELFCLASS32) {
164 printf("32bit elf not supported yet\n"); err=-2; goto end;
165 } else if (c == ELFCLASS64) {
166 parse_elf64(fp, addr, size, offset, store);
167 }
168
169 end:
170 fclose(fp);
171 return err;
172 }
173
parse_hex(char * p,int * n)174 static unsigned long long parse_hex(char *p, int *n) {
175 unsigned long long r=0;
176 int i=0;
177 *n = 0;
178 while(p[i]==' '||p[i]=='\t') i++;
179 if (p[i]==0) return 0;
180 if (p[i+1]=='x') i+=2;
181 int v;
182 while(p[i]) {
183 if (p[i]>='0'&&p[i]<='9') v=p[i]-'0';
184 else if (p[i]>='a'&&p[i]<='f') v=10+p[i]-'a';
185 else if (p[i]>='A'&&p[i]<='F') v=10+p[i]-'A';
186 else break;
187 r=(r<<4)+v;
188 i++;
189 }
190 *n = i;
191 return r;
192 }
193
load_symbol_pid(int pid)194 STORE_T* load_symbol_pid(int pid) {
195 printf("loading symbols for %d\n", pid);
196 char bb[256];
197 sprintf(bb, "/proc/%d/maps", pid);
198 FILE* fp = fopen(bb, "r");
199 if (fp==NULL) return NULL;
200 STORE_T *store = new STORE_T();
201 unsigned long long start, end, offset, inode;
202 char *p;
203 int i, c, j;
204 char fname[128], xx[64], xxx[32], mod[16], idx[16];
205 while(1) {
206 p=fgets(bb, sizeof(bb), fp); if (p==NULL) break;
207 if (sscanf(p, "%s %s %s %s %lld %s", xx, mod, xxx, idx, &inode, fname)!=6) continue;
208 i=0; c=0;
209 start = parse_hex(xx, &c); if (c==0) continue; i+=c; if (p[i]!='-') continue; i++;
210 end = parse_hex(xx+i, &c); if (c==0) continue;
211 // parse type
212 for (j=0; j<8; j++) if (mod[j]=='x') break; if (j>=8) continue;
213 if (fname[0]!='/') continue;
214 offset = parse_hex(xxx, &c); if (c==0) continue;
215 // remaining should contains '/' indicating this mmap is refering to a file
216 sprintf(bb, "/proc/%d/root%s", pid, fname);
217 load_symbol_from_file(bb, start, end-start, offset, *store);
218 }
219 fclose(fp);
220 if (store->size()==0) {
221 delete store;
222 store = NULL;
223 }
224 return store;
225 }
226
227 /* parse kernel func symbols from /proc/kallsyms */
load_kernel()228 K_STORE_T* load_kernel() {
229 FILE* fp = fopen("/proc/kallsyms", "r");
230 if (fp == NULL) return NULL;
231 char *p;
232 unsigned long long addr;
233 int c;
234 K_STORE_T* store = new K_STORE_T();
235 char bb[128], adr[128], type[8], name[128];
236 while(1) {
237 p = fgets(bb, sizeof(bb), fp); if (p==NULL) break;
238 if (sscanf(p, "%s %s %s", adr, type, name)!=3) continue;;
239 if (type[0]!='t'&&type[0]!='T') continue;
240 addr=parse_hex(adr, &c); if (c==0) continue;
241 (*store)[addr] = string(name);
242 }
243 return store;
244 fclose(fp);
245 }
246
247 //------------------------------perf profiler-------------------------
perf_event_open(struct perf_event_attr * perf_event,pid_t pid,int cpu,int group_fd,unsigned long flags)248 static long perf_event_open(struct perf_event_attr *perf_event, pid_t pid, int cpu, int group_fd, unsigned long flags) {
249 return syscall(__NR_perf_event_open, perf_event, pid, cpu, group_fd, flags);
250 }
251 unordered_map<int, STORE_T*> pid_symbols;
252 K_STORE_T* kernel_symbols = NULL;
253
254 struct pollfd polls[MAXCPU];
255 // res for cleanup
256 static long long psize;
257 map<int, pair<void*, long long>> res;
258 TNode* gnode = NULL;
259
260 unordered_map<unsigned long long, string> unknowns;
int_exit(int _)261 void int_exit(int _) {
262 for (auto x: res) {
263 auto y = x.second;
264 void* addr = y.first;
265 munmap(addr, (1+MAXN)*psize);
266 close(x.first);
267 }
268 res.clear();
269 if (gnode!=NULL) {
270 FILE* fp = fopen("./report.html", "w");
271 if (fp) {
272 fprintf(fp, "<head> <link rel=\"stylesheet\" href=\"report.css\"> <script src=\"report.js\"> </script> </head>\n");
273 fprintf(fp, "<ul class=\"tree\">\n");
274 gnode->printit(fp, 0);
275 fprintf(fp, "</ul>\n");
276 fclose(fp);
277 printf("report done\n");
278 }
279 gnode = NULL;
280 }
281 printf("---------------------unknowns-----------------\n");
282 for (auto x=unknowns.begin(); x!=unknowns.end(); x++) {
283 printf("0x%llx --?> %s\n", (*x).first, (*x).second.c_str());
284 }
285 exit(0);
286 }
287 /*
288 perf call chain process
289 For now, if a address would not be located to some function, the address would be skipped.
290 */
process_event(char * base,unsigned long long size,unsigned long long offset)291 int process_event(char *base, unsigned long long size, unsigned long long offset) {
292 struct perf_event_header* p = NULL;
293 int pid, xpid;
294 unsigned long long time;
295 offset%=size;
296 // assuming the header would fit within size
297 p = (struct perf_event_header*) (base+offset);
298 offset+=sizeof(*p); if (offset>=size) offset-=size;
299 if (p->type != PERF_RECORD_SAMPLE) return p->size;
300 // pid, tip;
301 pid = *((int *)(base+offset)); offset+=8; if (offset>=size) offset-=size;
302 unsigned long long nr = *((unsigned long long*)(base+offset)); offset+=8; if (offset>=size) offset-=size;
303 if (nr>128) return -1;
304 unsigned long long addr, o, addr0;
305 if (nr) {
306 if (gnode==NULL) gnode=new TNode();
307 char bb[64];
308 TNode* r = gnode;
309 if (pid_symbols.count(pid)==0) pid_symbols[pid] = load_symbol_pid(pid);
310 STORE_T* px = pid_symbols[pid];
311 addr0 = *((unsigned long long *)(base+offset));
312 char user_mark=0, start_mark=0;
313 for (int i=nr-1; i>=0; i--) {
314 o = i*8+offset; if (o>=size) o-=size;
315 addr = *((unsigned long long*)(base+o));
316 if (addr==0) continue; // something wrong?
317 if ((addr>>56)==(addr0>>56) && (p->misc&PERF_RECORD_MISC_KERNEL)) {
318 // skip the cross line command, no idear how to correctly resolve it now.
319 if (user_mark) { user_mark=0; continue; }
320 // check in kernel
321 if (kernel_symbols&&!kernel_symbols->empty()) {
322 auto x = kernel_symbols->upper_bound(addr);
323 if (x==kernel_symbols->begin()) {
324 // sprintf(bb, "0x%llx", addr); r = r->add(string(bb));
325 r = r->add(string("unknown"));
326 } else {
327 x--;
328 r = r->add((*x).second);
329 }
330 } else {
331 // sprintf(bb, "0x%llx", addr); r = r->add(string(bb));
332 r = r->add(string("unknown"));
333 }
334 } else {
335 if (gflag_kernel_only) continue;
336 if (px) {
337 auto x = px->upper_bound(addr);
338 if (x==px->begin()) {
339 // sprintf(bb, "0x%llx", addr); r = r->add(string(bb));
340 if (start_mark) {
341 auto y = (*x).second;
342 r = r->add(y.first+"?");
343 }
344 } else {
345 x--;
346 auto y = (*x).second;
347 if (y.second && addr>(*x).first+y.second) {
348 // r = r->add(y.first);
349 // sprintf(bb, "0x%llx", addr); r = r->add(string(bb));
350 if (start_mark) {
351 x++;
352 if (x==px->end()) r = r->add(y.first+"??");
353 else {
354 auto z = (*x).second;
355 r = r->add(y.first+"?"+z.first);
356 }
357 }
358 } else {
359 start_mark=1;
360 r = r->add(y.first);
361 }
362 }
363 } else {
364 // sprintf(bb, "0x%llx", addr); r = r->add(string(bb));
365 // r = r->add(string("unknown"));
366 }
367 user_mark=1;
368 }
369 }
370 }
371 return p->size;
372 }
373
main(int argc,char * argv[])374 int main(int argc, char *argv[]) {
375 kernel_symbols = load_kernel();
376 if (argc<2) { printf("Need pid\n"); return 1; }
377 int pid = atoi(argv[1]);
378 if (pid<0) { gflag_kernel_only = 1; pid=-pid; }
379 if (pid==0) { printf("invalid pid %s\n", argv[1]); return 1; }
380 // find cgroup
381 char xb[256], xb2[256];
382 int i, j, k, fd;
383 void* addr;
384 sprintf(xb, "/proc/%d/cgroup", pid);
385 FILE* fp = fopen(xb, "r");
386 if (fp==NULL) error("fail to open cgroup file");
387 char *p;
388 xb2[0]=0;
389 int cgroup_name_len=0;
390 while(1) {
391 p = fgets(xb, sizeof(xb), fp); if (p==NULL) break;
392 i=0; while(p[i]&&p[i]!=':') i++; if (p[i]==0) continue;
393 if (strstr(p, "perf_event")) {
394 i++; while(p[i]!=':'&&p[i]) i++; if (p[i]!=':') continue; i++;
395 j=i; while(p[j]!='\r'&&p[j]!='\n'&&p[j]!=0) j++; p[j]=0;
396 sprintf(xb2, "/sys/fs/cgroup/perf_event%s", p+i);
397 cgroup_name_len=j-i;
398 break;
399 } else if (p[i+1]==':') {
400 i+=2; j=i; while(p[j]!='\r'&&p[j]!='\n'&&p[j]!=0) j++; p[j]=0;
401 sprintf(xb2, "/sys/fs/cgroup/%s", p+i);
402 cgroup_name_len=j-i;
403 }
404 }
405 fclose(fp);
406 if (xb2[0]==0) error("no proper cgroup found\n");
407 if (cgroup_name_len<2) {
408 printf("cgroup %s seems to be root, not allowed\n", xb2);
409 return -1;
410 }
411 printf("try to use cgroup %s\n", xb2);
412 int cgroup_id = open(xb2, O_CLOEXEC);
413 if (cgroup_id<=0) { perror("error open cgroup dir"); return 1; }
414 // start perf event
415 psize = sysconf(_SC_PAGE_SIZE); // getpagesize();
416 int cpu_num = sysconf(_SC_NPROCESSORS_ONLN);
417 struct perf_event_attr attr;
418 memset(&attr, 0, sizeof(attr));
419 attr.type = PERF_TYPE_SOFTWARE;
420 attr.size = sizeof(attr);
421 attr.config = PERF_COUNT_SW_CPU_CLOCK;
422 attr.sample_freq = 777; // adjust it
423 attr.freq = 1;
424 attr.wakeup_events = 16;
425 attr.sample_type = PERF_SAMPLE_TID|PERF_SAMPLE_CALLCHAIN;
426 attr.sample_max_stack = 32;
427 if (gflag_kernel_only) attr.exclude_callchain_user = 1;
428 for (i=0, k=0; i<cpu_num&&i<MAXCPU; i++) {
429 printf("attaching cpu %d\n", i);
430 fd = perf_event_open(&attr, cgroup_id, i, -1, PERF_FLAG_FD_CLOEXEC|PERF_FLAG_PID_CGROUP);
431 if (fd<0) { perror("fail to open perf event"); continue; }
432 addr = mmap(NULL, (1+MAXN)*psize, PROT_READ, MAP_SHARED, fd, 0);
433 if (addr == MAP_FAILED) { perror("mmap failed"); close(fd); continue; }
434 res[fd] = make_pair(addr, 0);
435 polls[k].fd = fd;
436 polls[k].events = POLLIN;
437 polls[k].revents = 0;
438 k++;
439 }
440 if (k==0) { printf("no cpu event attached at all\n"); return 1; }
441
442 signal(SIGINT, int_exit);
443 signal(SIGTERM, int_exit);
444
445 unsigned long long head;
446 int event_size;
447 struct perf_event_mmap_page *mp;
448 while (poll(polls, k, -1)>0) {
449 // printf("wake\n");
450 for (i=0; i<k; i++) {
451 if ((polls[i].revents&POLLIN)==0) continue;
452 fd = polls[i].fd;
453 addr = res[fd].first;
454 mp = (struct perf_event_mmap_page *)addr;
455 head = res[fd].second;
456 ioctl(fd, PERF_EVENT_IOC_PAUSE_OUTPUT, 1);
457 if (head>mp->data_head) head=mp->data_head;
458 head = mp->data_head-((mp->data_head-head)%mp->data_size);
459 while(head<mp->data_head) {
460 event_size = process_event((char*)addr+mp->data_offset, mp->data_size, head);
461 if (event_size<0) {
462 // resync
463 head=mp->data_head;
464 break;
465 }
466 head += event_size;
467 }
468 res[fd].second = mp->data_head;
469 ioctl(fd, PERF_EVENT_IOC_PAUSE_OUTPUT, 0);
470 }
471 }
472
473 int_exit(0);
474 return 0;
475 }
476