xref: /linux-tools/perf/profiler/profiler.cpp (revision f82de63edc71b79d22ecd2c41529c9e8fd19de69)
1 #include <stdlib.h>
2 #include <stdio.h>
3 #include <unistd.h>
4 #include <string.h>
5 #include <sys/ioctl.h>
6 #include <linux/perf_event.h>
7 #include <asm/unistd.h>
8 #include <sys/mman.h>
9 #include <sys/stat.h>
10 #include <poll.h>
11 #include <signal.h>
12 #include <fcntl.h>
13 #include <elf.h>
14 #include <string.h>
15 
16 #include <vector>
17 #include <string>
18 #include <map>
19 #include <unordered_map>
20 #include <unordered_set>
21 #include <algorithm>
22 using namespace std;
23 
24 
25 #define MAXN  512
26 #define MAXCPU 128
27 #define error(msg) do { perror(msg); exit(1); } while(0)
28 //--------------------------------Tree for call chain and report-------------------------------
29 //
30 struct TNode {
31     int c=0;
32     unordered_map<string, TNode*> s;
33     struct TNode *add(string n) {
34         c++;
35         if (s[n]==nullptr) s[n] = new TNode();
36         return s[n];
37     }
38     int printit(FILE *fp, int k) {
39         if (s.size()) {
40             using tt = tuple<int, string, TNode*>;
41             vector<tt> xx;
42             for (auto x: s) xx.push_back(make_tuple(x.second->c, x.first, x.second));
43             sort(begin(xx), end(xx), greater<tt>());
44             for (auto x: xx) {
45                 auto count = get<0>(x);
46                 if (100.0*count/c<1) continue;
47                 auto name = get<1>(x);
48                 auto nx = get<2>(x);
49                 fprintf(fp, "<li>\n");
50                 fprintf(fp, "<input type=\"checkbox\" id=\"c%d\" />\n", k);
51                 fprintf(fp, "<label class=\"tree_label\" for=\"c%d\">%s(%.3f%% %d/%d)</label>\n", k, name.c_str(), 100.0*count/c, count, c);
52                 fprintf(fp, "<ul>\n");
53                 // printf("%s(%.3f%% %d/%d)\n", name.c_str(), 100.0*count/c, count, c);
54                 k = nx->printit(fp, k+1);
55                 fprintf(fp, "</ul>\n");
56                 fprintf(fp, "</li>\n");
57             }
58         }
59         return k;
60     }
61 };
62 
63 //--------------------------------symbols-------------------------------------------
64 using STORE_T = map<unsigned long long, pair<string, unsigned long long>>;
65 using K_STORE_T = map<unsigned long long, string>;
66 
67 /*
68  * load FUNC symbols refering to the section indicated by the offset, relocate the virtual address
69  */
70 void parse_elf64(FILE *fp, unsigned long long v_addr, unsigned long long v_size, unsigned long long v_offset, STORE_T& store) {
71     // printf("read elf with offset 0x%llx, addr 0x%llx\n", v_offset, v_addr);
72     Elf64_Ehdr ehdr;
73     int rc = fread(&ehdr, sizeof(ehdr), 1, fp);
74     if (rc != 1) return;
75     int n, s, i;
76     unsigned long long offset;
77 
78     // load program headers
79     unsigned long long p_vaddr, p_size;
80     n = ehdr.e_phnum;
81     s = ehdr.e_phentsize;
82     offset = ehdr.e_phoff;
83     Elf64_Phdr phdr;
84     for (i=0; i<n; i++) {
85         rc = fseek(fp, offset, SEEK_SET);
86         if (rc<0) { perror("fail to seek"); return; }
87         rc = fread(&phdr, sizeof(phdr), 1, fp);
88         if (rc != 1) { perror("fail to read program header"); return; }
89         if (phdr.p_flags&PF_X) {
90             if (phdr.p_offset == v_offset) {
91                 p_vaddr = phdr.p_vaddr;
92                 p_size = phdr.p_memsz; if (p_size==0) p_size = 0xffffffff;
93                 break;
94             }
95         }
96         offset+=s;
97     }
98     if (i>=n) { printf("No program header match offset found, fail to load\n"); return; }
99 
100     // load section headers
101     n = ehdr.e_shnum;
102     s = ehdr.e_shentsize;
103     offset = ehdr.e_shoff;
104     Elf64_Shdr shdr;
105     vector<Elf64_Shdr> headers;
106     for (int i=0; i<n; i++) {
107         rc = fseek(fp, offset, SEEK_SET);
108         if (rc<0) { perror("fail to seek"); return; }
109         rc = fread(&shdr, sizeof(shdr), 1, fp);
110         if (rc != 1) { perror("fail to read sec header"); return; }
111         headers.push_back(shdr);
112         offset+=s;
113     }
114     Elf64_Sym symb;
115     unsigned long long faddr, fsize;
116     unsigned long long size, item_size;
117     int link, ix, flink, k;
118     char fname[128];
119     for (int i=0; i<n; i++) {
120         switch(headers[i].sh_type) {
121             case SHT_SYMTAB:
122             case SHT_DYNSYM:
123                 offset = headers[i].sh_offset;
124                 size = headers[i].sh_size;
125                 item_size = headers[i].sh_entsize;
126                 link = headers[i].sh_link;
127                 if (link<=0) break;
128                 for (k=0; k+item_size<=size; k+=item_size) {
129                     rc = fseek(fp, offset+k, SEEK_SET); if (rc<0) continue;
130                     rc = fread(&symb, sizeof(symb), 1, fp); if (rc != 1) continue;
131                     if (ELF64_ST_TYPE(symb.st_info) != STT_FUNC ) continue;
132                     flink = symb.st_shndx; if (flink==0) continue;
133                     fsize = symb.st_size; // if (fsize==0) continue;
134                     faddr = symb.st_value; if (faddr>p_vaddr+p_size) continue;
135                     ix = symb.st_name; if (ix==0) continue;
136                     rc = fseek(fp, headers[link].sh_offset+ix, SEEK_SET); if (rc<0) continue;
137                     if (fgets(fname, sizeof(fname), fp)==NULL) continue;
138                     faddr = faddr-p_vaddr+v_addr;
139                     if (store.count(faddr)) {
140                         if (store[faddr].second<fsize) store[faddr] = make_pair(string(fname), fsize);
141                     } else store[faddr] = make_pair(string(fname), fsize);
142                 }
143                 break;
144             default:
145                 break;
146         }
147     }
148 }
149 
150 int load_symbol_from_file(const char *path, unsigned long long addr, unsigned long long size, unsigned long long offset, STORE_T& store) {
151     printf("loading symble from %s\n", path);
152     FILE *fp = fopen(path, "rb");
153     if (fp==NULL) { perror("fail to open file"); return -1; }
154     char ident[EI_NIDENT], c;
155     int err=0;
156     int rc = fread(ident, sizeof(ident), 1, fp);
157     if (rc != 1) { perror("fail to read ident"); err=-1; goto end; }
158     if (ident[0]!=0x7f) { printf("not a elf file\n"); err=-1; goto  end; }
159     c=ident[4];
160     rc = fseek(fp, 0, SEEK_SET); if (rc<0) { perror("fail to rewind"); goto end; }
161     if (c == ELFCLASS32) {
162         printf("32bit elf not supported yet\n"); err=-2; goto end;
163     } else if (c == ELFCLASS64) {
164         parse_elf64(fp, addr, size, offset, store);
165     }
166 
167 end:
168     fclose(fp);
169     return err;
170 }
171 
172 static unsigned long long parse_hex(char *p, int *n) {
173     unsigned long long r=0;
174     int i=0;
175     *n = 0;
176     while(p[i]==' '||p[i]=='\t') i++;
177     if (p[i]==0) return 0;
178     if (p[i+1]=='x') i+=2;
179     int v;
180     while(p[i]) {
181         if (p[i]>='0'&&p[i]<='9') v=p[i]-'0';
182         else if (p[i]>='a'&&p[i]<='f') v=10+p[i]-'a';
183         else if (p[i]>='A'&&p[i]<='F') v=10+p[i]-'A';
184         else break;
185         r=(r<<4)+v;
186         i++;
187     }
188     *n = i;
189     return r;
190 }
191 
192 STORE_T*  load_symbol_pid(int pid) {
193     printf("loading symbols for %d\n", pid);
194     char bb[256];
195     sprintf(bb, "/proc/%d/maps", pid);
196     FILE* fp = fopen(bb, "r");
197     if (fp==NULL) return NULL;
198     STORE_T *store = new STORE_T();
199     unsigned long long start, end, offset, inode;
200     char *p;
201     int i, c, j;
202     char fname[128], xx[64], xxx[32], mod[16], idx[16];
203     while(1) {
204         p=fgets(bb, sizeof(bb), fp); if (p==NULL) break;
205         if (sscanf(p, "%s %s %s %s %lld %s", xx, mod, xxx, idx, &inode, fname)!=6) continue;
206         i=0; c=0;
207         start = parse_hex(xx, &c); if (c==0) continue; i+=c; if (p[i]!='-') continue; i++;
208         end = parse_hex(xx+i, &c); if (c==0) continue;
209         // parse type
210         for (j=0; j<8; j++) if (mod[j]=='x') break; if (j>=8) continue;
211         if (fname[0]!='/') continue;
212         offset = parse_hex(xxx, &c); if (c==0) continue;
213         // remaining should contains '/' indicating this mmap is refering to a file
214         sprintf(bb, "/proc/%d/root%s", pid, fname);
215         load_symbol_from_file(bb, start, end-start, offset, *store);
216     }
217     fclose(fp);
218     return store;
219 }
220 
221 /* parse kernel func symbols from /proc/kallsyms */
222 K_STORE_T* load_kernel() {
223     FILE* fp = fopen("/proc/kallsyms", "r");
224     if (fp == NULL) return NULL;
225     char *p;
226     unsigned long long addr;
227     int c;
228     K_STORE_T* store = new K_STORE_T();
229     char bb[128], adr[128], type[8], name[128];
230     while(1) {
231         p = fgets(bb, sizeof(bb), fp); if (p==NULL) break;
232         if (sscanf(p, "%s %s %s", adr, type, name)!=3) continue;;
233         if (type[0]!='t'&&type[0]!='T') continue;
234         addr=parse_hex(adr, &c); if (c==0) continue;
235         (*store)[addr] = string(name);
236     }
237     return store;
238     fclose(fp);
239 }
240 
241 //------------------------------perf profiler-------------------------
242 static long perf_event_open(struct perf_event_attr *perf_event, pid_t pid, int cpu, int group_fd, unsigned long flags) {
243     return syscall(__NR_perf_event_open, perf_event, pid, cpu, group_fd, flags);
244 }
245 unordered_map<int, STORE_T*> pid_symbols;
246 K_STORE_T* kernel_symbols = NULL;
247 
248 struct pollfd polls[MAXCPU];
249 // res for cleanup
250 static long long psize;
251 map<int, pair<void*, long long>> res;
252 TNode* gnode = NULL;
253 
254 unordered_map<unsigned long long, string> unknowns;
255 void int_exit(int _) {
256     for (auto x: res) {
257         auto y = x.second;
258         void* addr = y.first;
259         munmap(addr, (1+MAXN)*psize);
260         close(x.first);
261     }
262     res.clear();
263     if (gnode!=NULL) {
264         FILE* fp = fopen("./report.html", "w");
265         if (fp) {
266             fprintf(fp, "<head> <link rel=\"stylesheet\" href=\"report.css\"> <script src=\"report.js\"> </script> </head>\n");
267             fprintf(fp, "<ul class=\"tree\">\n");
268             gnode->printit(fp, 0);
269             fprintf(fp, "</ul>\n");
270             fclose(fp);
271             printf("report done\n");
272         }
273         gnode = NULL;
274     }
275     printf("---------------------unknowns-----------------\n");
276     for (auto x=unknowns.begin(); x!=unknowns.end(); x++) {
277         printf("0x%llx  --?>  %s\n", (*x).first, (*x).second.c_str());
278     }
279     exit(0);
280 }
281 /*
282 perf call chain process
283 For now, if a address would not be located to some function, the address would be skipped.
284  */
285 int process_event(char *base, unsigned long long size, unsigned long long offset) {
286     struct perf_event_header* p = NULL;
287     int pid, xpid;
288     unsigned long long time;
289     offset%=size;
290     // assuming the header would fit within size
291     p = (struct perf_event_header*) (base+offset);
292     offset+=sizeof(*p); if (offset>=size) offset-=size;
293     if (p->type != PERF_RECORD_SAMPLE) return p->size;
294     // pid, tip;
295     pid = *((int *)(base+offset));  offset+=8; if (offset>=size) offset-=size;
296     unsigned long long nr = *((unsigned long long*)(base+offset)); offset+=8; if (offset>=size) offset-=size;
297     if (nr>128) return -1;
298     unsigned long long addr, o, addr0;
299     if (nr) {
300         if (gnode==NULL) gnode=new TNode();
301         char bb[64];
302         TNode* r = gnode;
303         if (pid_symbols.count(pid)==0) pid_symbols[pid] = load_symbol_pid(pid);
304         STORE_T* px = pid_symbols[pid];
305         addr0 = *((unsigned long long *)(base+offset));
306         char user_mark = 0;
307         for (int i=nr-1; i>=0; i--) {
308             o = i*8+offset; if (o>=size) o-=size;
309             addr = *((unsigned long long*)(base+o));
310             if (addr==0) continue; // something wrong?
311             if ((addr>>56)==(addr0>>56) && (p->misc&PERF_RECORD_MISC_KERNEL)) {
312                 // skip the cross line command, no idear how to correctly resolve it now.
313                 if (user_mark) { user_mark=0; continue; }
314                 // check in kernel
315                 if (kernel_symbols&&!kernel_symbols->empty()) {
316                     auto x = kernel_symbols->upper_bound(addr);
317                     if (x==kernel_symbols->begin()) {
318                         // sprintf(bb, "0x%llx", addr); r = r->add(string(bb));
319                         r = r->add(string("unknown"));
320                     } else {
321                         x--;
322                         r = r->add((*x).second);
323                     }
324                 } else {
325                     // sprintf(bb, "0x%llx", addr); r = r->add(string(bb));
326                     r = r->add(string("unknown"));
327                 }
328             } else {
329                 if (px) {
330                     auto x = px->upper_bound(addr);
331                     if (x==px->begin()) {
332                         // sprintf(bb, "0x%llx", addr); r = r->add(string(bb));
333                         auto y = (*x).second;
334                         r = r->add(y.first+"?");
335                     } else {
336                         x--;
337                         auto y = (*x).second;
338                         if (y.second && addr>(*x).first+y.second) {
339                             // r = r->add(y.first);
340                             // sprintf(bb, "0x%llx", addr); r = r->add(string(bb));
341                             x++;
342                             if (x==px->end()) r = r->add(y.first+"??");
343                             else {
344                                 auto y = (*x).second;
345                                 r = r->add(y.first+"?");
346                             }
347                         } else {
348                             r = r->add(y.first);
349                         }
350                     }
351                 } else {
352                     // sprintf(bb, "0x%llx", addr); r = r->add(string(bb));
353                     // r = r->add(string("unknown"));
354                     unknowns[addr] = "no pid symbol";
355                 }
356                 user_mark=1;
357             }
358         }
359     }
360     return p->size;
361 }
362 
363 int main(int argc, char *argv[]) {
364     kernel_symbols = load_kernel();
365     if (argc<2) { printf("Need pid\n"); return 1; }
366     int pid = atoi(argv[1]); if (pid<=0) { printf("invalid pid %s\n", argv[1]); return 1; }
367     // find cgroup
368     char xb[256], xb2[256];
369     int i, j, k, fd;
370     void* addr;
371     sprintf(xb, "/proc/%d/cgroup", pid);
372     FILE* fp = fopen(xb, "r");
373     if (fp==NULL) error("fail to open cgroup file");
374     char *p;
375     xb2[0]=0;
376     int cgroup_name_len=0;
377     while(1) {
378         p = fgets(xb, sizeof(xb), fp); if (p==NULL) break;
379         i=0; while(p[i]&&p[i]!=':') i++; if (p[i]==0) continue;
380         if (strstr(p, "perf_event")) {
381             i++; while(p[i]!=':'&&p[i]) i++;  if (p[i]!=':') continue; i++;
382             j=i; while(p[j]!='\r'&&p[j]!='\n'&&p[j]!=0) j++; p[j]=0;
383             sprintf(xb2, "/sys/fs/cgroup/perf_event%s", p+i);
384             cgroup_name_len=j-i;
385             break;
386         } else if (p[i+1]==':') {
387             i+=2; j=i; while(p[j]!='\r'&&p[j]!='\n'&&p[j]!=0) j++; p[j]=0;
388             sprintf(xb2, "/sys/fs/cgroup/%s", p+i);
389             cgroup_name_len=j-i;
390         }
391     }
392     fclose(fp);
393     if (xb2[0]==0) error("no proper cgroup found\n");
394     if (cgroup_name_len<2) {
395         printf("cgroup %s seems to be root, not allowed\n", xb2);
396         return -1;
397     }
398     printf("try to use cgroup %s\n", xb2);
399     int cgroup_id = open(xb2, O_CLOEXEC);
400     if (cgroup_id<=0) { perror("error open cgroup dir"); return 1; }
401     // start perf event
402     psize = sysconf(_SC_PAGE_SIZE); // getpagesize();
403     int cpu_num = sysconf(_SC_NPROCESSORS_ONLN);
404 	struct perf_event_attr attr;
405     memset(&attr, 0, sizeof(attr));
406     attr.type = PERF_TYPE_SOFTWARE;
407     attr.size = sizeof(attr);
408     attr.config = PERF_COUNT_SW_CPU_CLOCK;
409     attr.sample_freq = 777; // adjust it
410     attr.freq = 1;
411     attr.wakeup_events = 16;
412     attr.sample_type = PERF_SAMPLE_TID|PERF_SAMPLE_CALLCHAIN;
413     for (i=0, k=0; i<cpu_num&&i<MAXCPU; i++) {
414         printf("attaching cpu %d\n", i);
415         fd = perf_event_open(&attr, cgroup_id, i, -1, PERF_FLAG_FD_CLOEXEC|PERF_FLAG_PID_CGROUP);
416         if (fd<0) { perror("fail to open perf event"); continue; }
417         addr = mmap(NULL, (1+MAXN)*psize, PROT_READ, MAP_SHARED, fd, 0);
418         if (addr == MAP_FAILED) { perror("mmap failed"); close(fd); continue; }
419         res[fd] = make_pair(addr, 0);
420         polls[k].fd = fd;
421         polls[k].events = POLLIN;
422         polls[k].revents = 0;
423         k++;
424     }
425     if (k==0) { printf("no cpu event attached at all\n"); return 1; }
426 
427 	signal(SIGINT, int_exit);
428 	signal(SIGTERM, int_exit);
429 
430     unsigned long long head;
431     int event_size;
432     struct perf_event_mmap_page *mp;
433     while (poll(polls, k, -1)>0) {
434         // printf("wake\n");
435         for (i=0; i<k; i++) {
436             if ((polls[i].revents&POLLIN)==0) continue;
437             fd = polls[i].fd;
438             addr = res[fd].first;
439             mp = (struct perf_event_mmap_page *)addr;
440             head = res[fd].second;
441             ioctl(fd, PERF_EVENT_IOC_PAUSE_OUTPUT, 1);
442             if (head>mp->data_head) head=mp->data_head;
443             head = mp->data_head-((mp->data_head-head)%mp->data_size);
444             while(head<mp->data_head) {
445                 event_size = process_event((char*)addr+mp->data_offset, mp->data_size, head);
446                 if (event_size<0) {
447                     // resync
448                     head=mp->data_head;
449                     break;
450                 }
451                 head += event_size;
452             }
453             res[fd].second = mp->data_head;
454             ioctl(fd, PERF_EVENT_IOC_PAUSE_OUTPUT, 0);
455         }
456     }
457 
458     int_exit(0);
459     return 0;
460 }
461