1 #include <stdlib.h>
2 #include <stdio.h>
3 #include <unistd.h>
4 #include <string.h>
5 #include <sys/ioctl.h>
6 #include <linux/perf_event.h>
7 #include <asm/unistd.h>
8 #include <sys/mman.h>
9 #include <sys/stat.h>
10 #include <poll.h>
11 #include <signal.h>
12 #include <fcntl.h>
13 #include <elf.h>
14 #include <string.h>
15
16 #include <vector>
17 #include <string>
18 #include <map>
19 #include <unordered_map>
20 #include <algorithm>
21 using namespace std;
22
23
24 #define MAXN 128
25 #define MAXCPU 1024
26 #define error(msg) do { perror(msg); exit(1); } while(0)
27 //--------------------------------Tree for call chain and report-------------------------------
28 //
29 struct TNode {
30 int c=0;
31 unordered_map<string, TNode*> s;
addTNode32 struct TNode *add(string n) {
33 c++;
34 if (s[n]==nullptr) s[n] = new TNode();
35 return s[n];
36 }
printitTNode37 int printit(FILE *fp, int k) {
38 if (s.size()) {
39 using tt = tuple<int, string, TNode*>;
40 vector<tt> xx;
41 for (auto x: s) xx.push_back(make_tuple(x.second->c, x.first, x.second));
42 sort(begin(xx), end(xx), greater<tt>());
43 for (auto x: xx) {
44 auto count = get<0>(x);
45 if (100.0*count/c<1) continue;
46 auto name = get<1>(x);
47 auto nx = get<2>(x);
48 fprintf(fp, "<li>\n");
49 fprintf(fp, "<input type=\"checkbox\" id=\"c%d\" />\n", k);
50 fprintf(fp, "<label class=\"tree_label\" for=\"c%d\">%s(%.3f%% %d/%d)</label>\n", k, name.c_str(), 100.0*count/c, count, c);
51 fprintf(fp, "<ul>\n");
52 // printf("%s(%.3f%% %d/%d)\n", name.c_str(), 100.0*count/c, count, c);
53 k = nx->printit(fp, k+1);
54 fprintf(fp, "</ul>\n");
55 fprintf(fp, "</li>\n");
56 }
57 }
58 return k;
59 }
60 };
61
62 //--------------------------------symbols-------------------------------------------
63 using STORE_T = map<unsigned long long, pair<string, unsigned long long>>;
64 using K_STORE_T = map<unsigned long long, string>;
65
66 /*
67 * load FUNC symbols refering to the section indicated by the offset, relocate the virtual address
68 */
parse_elf64(FILE * fp,unsigned long long v_addr,unsigned long long v_size,unsigned long long v_offset,STORE_T & store)69 void parse_elf64(FILE *fp, unsigned long long v_addr, unsigned long long v_size, unsigned long long v_offset, STORE_T& store) {
70 // printf("read elf with offset 0x%llx, addr 0x%llx\n", v_offset, v_addr);
71 Elf64_Ehdr ehdr;
72 int rc = fread(&ehdr, sizeof(ehdr), 1, fp);
73 if (rc != 1) return;
74 int n, s, i;
75 unsigned long long offset;
76
77 // load program headers
78 unsigned long long p_vaddr, p_size;
79 n = ehdr.e_phnum;
80 s = ehdr.e_phentsize;
81 offset = ehdr.e_phoff;
82 Elf64_Phdr phdr;
83 for (i=0; i<n; i++) {
84 rc = fseek(fp, offset, SEEK_SET);
85 if (rc<0) { perror("fail to seek"); return; }
86 rc = fread(&phdr, sizeof(phdr), 1, fp);
87 if (rc != 1) { perror("fail to read program header"); return; }
88 if (phdr.p_flags&PF_X) {
89 if (phdr.p_offset == v_offset) {
90 p_vaddr = phdr.p_vaddr;
91 p_size = phdr.p_memsz; if (p_size==0) p_size = 0xffffffff;
92 break;
93 }
94 }
95 offset+=s;
96 }
97 if (i>=n) { printf("No program header match offset found, fail to load\n"); return; }
98
99 // load section headers
100 n = ehdr.e_shnum;
101 s = ehdr.e_shentsize;
102 offset = ehdr.e_shoff;
103 Elf64_Shdr shdr;
104 vector<Elf64_Shdr> headers;
105 for (int i=0; i<n; i++) {
106 rc = fseek(fp, offset, SEEK_SET);
107 if (rc<0) { perror("fail to seek"); return; }
108 rc = fread(&shdr, sizeof(shdr), 1, fp);
109 if (rc != 1) { perror("fail to read sec header"); return; }
110 headers.push_back(shdr);
111 offset+=s;
112 }
113 Elf64_Sym symb;
114 unsigned long long faddr, fsize;
115 unsigned long long size, item_size;
116 int link, ix, flink, k;
117 char fname[128];
118 for (int i=0; i<n; i++) {
119 switch(headers[i].sh_type) {
120 case SHT_SYMTAB:
121 case SHT_DYNSYM:
122 offset = headers[i].sh_offset;
123 size = headers[i].sh_size;
124 item_size = headers[i].sh_entsize;
125 link = headers[i].sh_link;
126 if (link<=0) break;
127 for (k=0; k+item_size<=size; k+=item_size) {
128 rc = fseek(fp, offset+k, SEEK_SET); if (rc<0) continue;
129 rc = fread(&symb, sizeof(symb), 1, fp); if (rc != 1) continue;
130 if (ELF64_ST_TYPE(symb.st_info) != STT_FUNC ) continue;
131 flink = symb.st_shndx; if (flink==0) continue;
132 fsize = symb.st_size; if (fsize==0) continue;
133 faddr = symb.st_value; if (faddr>p_vaddr+p_size) continue;
134 ix = symb.st_name; if (ix==0) continue;
135 rc = fseek(fp, headers[link].sh_offset+ix, SEEK_SET); if (rc<0) continue;
136 if (fgets(fname, sizeof(fname), fp)==NULL) continue;
137 faddr = faddr-p_vaddr+v_addr;
138 // printf("0x%llx +%lld > %s\n", faddr, fsize, fname);
139 store[faddr] = make_pair(string(fname), fsize);
140 }
141 break;
142 default:
143 break;
144 }
145 }
146 }
147
load_symbol_from_file(const char * path,unsigned long long addr,unsigned long long size,unsigned long long offset,STORE_T & store)148 int load_symbol_from_file(const char *path, unsigned long long addr, unsigned long long size, unsigned long long offset, STORE_T& store) {
149 printf("loading symble from %s\n", path);
150 FILE *fp = fopen(path, "rb");
151 if (fp==NULL) { perror("fail to open file"); return -1; }
152 char ident[EI_NIDENT], c;
153 int err=0;
154 int rc = fread(ident, sizeof(ident), 1, fp);
155 if (rc != 1) { perror("fail to read ident"); err=-1; goto end; }
156 if (ident[0]!=0x7f) { printf("not a elf file\n"); err=-1; goto end; }
157 c=ident[4];
158 rc = fseek(fp, 0, SEEK_SET); if (rc<0) { perror("fail to rewind"); goto end; }
159 if (c == ELFCLASS32) {
160 printf("32bit elf not supported yet\n"); err=-2; goto end;
161 } else if (c == ELFCLASS64) {
162 parse_elf64(fp, addr, size, offset, store);
163 } else {
164 printf("unknown elf type %d\n", c);
165 }
166
167 end:
168 fclose(fp);
169 return err;
170 }
171
parse_hex(char * p,int * n)172 static unsigned long long parse_hex(char *p, int *n) {
173 unsigned long long r=0;
174 int i=0;
175 *n = 0;
176 while(p[i]==' '||p[i]=='\t') i++;
177 if (p[i]==0) return 0;
178 if (p[i+1]=='x') i+=2;
179 int v;
180 while(p[i]) {
181 if (p[i]>='0'&&p[i]<='9') v=p[i]-'0';
182 else if (p[i]>='a'&&p[i]<='f') v=10+p[i]-'a';
183 else if (p[i]>='A'&&p[i]<='F') v=10+p[i]-'A';
184 else break;
185 r=(r<<4)+v;
186 i++;
187 }
188 *n = i;
189 return r;
190 }
191
load_symbol_pid(int pid,STORE_T * in)192 STORE_T* load_symbol_pid(int pid, STORE_T* in) {
193 printf("loading symbols for %d\n", pid);
194 char bb[128];
195 sprintf(bb, "/proc/%d/maps", pid);
196 FILE* fp = fopen(bb, "r");
197 if (fp==NULL) return NULL;
198 STORE_T *store = in;
199 unsigned long long start, end, offset=0;
200 if (store==NULL) store=new STORE_T();
201 char *p;
202 int i, c, j;
203 while(1) {
204 p=fgets(bb, sizeof(bb), fp); if (p==NULL) break;
205 i=0; c=0;
206 start = parse_hex(p, &c); if (start==0) continue; i+=c; if (p[i]!='-') continue; i++;
207 end = parse_hex(p+i, &c); if (end==0) continue; i+=c;
208 // parse type
209 for (j=0; j<8; j++) { if (p[i]=='x') break; i++; } if (j>=8) continue;
210 while(p[i]!=' '&&p[i]!='\t'&&p[i]!=0) i++; if (p[i]==0) continue;
211 offset = parse_hex(p+i, &c); if (c==0) continue;
212 // remaining should contains '/' indicating this mmap is refering to a file
213 while(p[i]&&p[i]!='/') i++; if (p[i]==0) continue;
214 sprintf(bb, "/proc/%d/map_files/%llx-%llx", pid, start, end);
215 load_symbol_from_file(bb, start, end-start, offset, *store);
216 }
217 fclose(fp);
218 return store;
219 }
220
221 /* parse kernel func symbols from /proc/kallsyms */
load_kernel()222 K_STORE_T* load_kernel() {
223 FILE* fp = fopen("/proc/kallsyms", "r");
224 if (fp == NULL) return NULL;
225 char *p;
226 unsigned long long addr;
227 int c;
228 K_STORE_T* store = new K_STORE_T();
229 char bb[128], adr[128], type[8], name[128];
230 while(1) {
231 p = fgets(bb, sizeof(bb), fp); if (p==NULL) break;
232 if (sscanf(p, "%s %s %s", adr, type, name)!=3) continue;;
233 if (type[0]!='t'&&type[0]!='T') continue;
234 addr=parse_hex(adr, &c); if (c==0) continue;
235 (*store)[addr] = string(name);
236 }
237 return store;
238 fclose(fp);
239 }
240
241 //------------------------------perf profiler-------------------------
perf_event_open(struct perf_event_attr * perf_event,pid_t pid,int cpu,int group_fd,unsigned long flags)242 static long perf_event_open(struct perf_event_attr *perf_event, pid_t pid, int cpu, int group_fd, unsigned long flags) {
243 return syscall(__NR_perf_event_open, perf_event, pid, cpu, group_fd, flags);
244 }
245 unordered_map<int, STORE_T*> pid_symbols;
246 unordered_map<int, pair<string,string>> pid_infos;
247 K_STORE_T* kernel_symbols = NULL;
248
249 struct pollfd polls[MAXCPU];
250 // res for cleanup
251 static long long psize;
252 map<int, pair<void*, long long>> res;
253 TNode* gnode = NULL;
254 int exiting = 0;
255
int_exit(int x)256 void int_exit(int x) {
257 exiting = 1;
258 for (auto x: res) {
259 auto y = x.second;
260 void* addr = y.first;
261 munmap(addr, (1+MAXN)*psize);
262 close(x.first);
263 }
264 res.clear();
265 if (gnode!=NULL) {
266 FILE* fp = fopen("./report.html", "w");
267 if (fp) {
268 fprintf(fp, "<head> <link rel=\"stylesheet\" href=\"report.css\"> <script src=\"report.js\"> </script> </head>\n");
269 fprintf(fp, "<ul class=\"tree\">\n");
270 gnode->printit(fp, 0);
271 fprintf(fp, "</ul>\n");
272 fclose(fp);
273 printf("report done\n");
274 }
275 gnode = NULL;
276 }
277 exit(x);
278 }
279 /*
280 perf call chain process
281 For now, if a address would not be located to some function, the address would be skipped.
282 */
process_event(char * base,unsigned long long size,unsigned long long offset)283 int process_event(char *base, unsigned long long size, unsigned long long offset) {
284 struct perf_event_header* p = NULL;
285 int pid, xpid;
286 unsigned long long time;
287 offset%=size;
288 // assuming the header would fit within size
289 p = (struct perf_event_header*) (base+offset);
290 offset+=sizeof(*p); if (offset>=size) offset-=size;
291 if (p->type != PERF_RECORD_SAMPLE) return p->size;
292 // pid, tip;
293 pid = *((int *)(base+offset)); offset+=8; if (offset>=size) offset-=size;
294 unsigned long long nr = *((unsigned long long*)(base+offset)); offset+=8; if (offset>=size) offset-=size;
295 unsigned long long addr, o, addr0;
296 if (nr) {
297 if (gnode==NULL) gnode=new TNode();
298 char bb[64];
299 TNode* r = gnode;
300 if (pid_symbols.count(pid)==0) {
301 pid_symbols[pid] = load_symbol_pid(pid, NULL);
302 // load command
303 char b[128];
304 char comm[128]="NA";
305 char host[128]="NA";
306 sprintf(b, "/proc/%d/comm", pid);
307 FILE* fp = fopen(b, "r");
308 if (fp) {
309 fscanf(fp, "%s", comm);
310 fclose(fp);
311 }
312 sprintf(b, "/proc/%d/root/etc/hostname", pid);
313 fp = fopen(b, "r");
314 if (fp) {
315 fscanf(fp, "%s", host);
316 fclose(fp);
317 }
318 pid_infos[pid]=make_pair(string(comm), string(host));
319 }
320 STORE_T* px = pid_symbols[pid];
321 addr0 = *((unsigned long long *)(base+offset));
322 char user_mark = 0;
323 char need_reload= 0;
324 auto pinfo = pid_infos[pid];
325 for (int i=nr-1; i>=0; i--) {
326 o = i*8+offset; if (o>=size) o-=size;
327 addr = *((unsigned long long*)(base+o));
328 if ((addr>>56)==(addr0>>56) && (p->misc&PERF_RECORD_MISC_KERNEL)) {
329 // skip the cross line command, no idear how to correctly resolve it now.
330 if (user_mark) { user_mark=0; continue; }
331 // check in kernel
332 if (kernel_symbols&&!kernel_symbols->empty()) {
333 auto x = kernel_symbols->upper_bound(addr);
334 if (x==kernel_symbols->begin()) {
335 // sprintf(bb, "0x%llx", addr); r = r->add(string(bb));
336 } else {
337 x--;
338 r = r->add((*x).second);
339 }
340 } else {
341 // sprintf(bb, "0x%llx", addr); r = r->add(string(bb));
342 }
343 } else {
344 if (px) {
345 auto x = px->upper_bound(addr);
346 if (x==px->begin()) {
347 // sprintf(bb, "0x%llx", addr); r = r->add(string(bb));
348 r = r->add(string("unknwon[")+pinfo.first+'@'+pinfo.second+"]");
349 } else {
350 x--;
351 auto y = (*x).second;
352 if (addr>(*x).first+y.second) {
353 // r = r->add(y.first);
354 // sprintf(bb, "0x%llx", addr); r = r->add(string(bb));
355 r = r->add(string("unknwon[")+pinfo.first+'@'+pinfo.second+"]");
356 } else {
357 r = r->add(y.first+"["+pinfo.first+'@'+pinfo.second+"]");
358 }
359 }
360 } else {
361 // sprintf(bb, "0x%llx", addr); r = r->add(string(bb));
362 }
363 user_mark=1;
364 }
365 }
366 }
367 return p->size;
368 }
369
main(int argc,char * argv[])370 int main(int argc, char *argv[]) {
371 kernel_symbols = load_kernel();
372 if (argc<2) { printf("Need kprobe function name, e.g. %s <some rarely called kernel function>\n", argv[0]); return 1; }
373 int type;
374 char *func = argv[1];
375 FILE *fp = fopen("/sys/bus/event_source/devices/kprobe/type", "r");
376 if (fp == NULL) { printf("fail to find type for kprobe\n"); return 1; }
377 type = 0;
378 fscanf(fp, "%d", &type);
379 fclose(fp);
380 if (type <= 0) { printf("unexpected type %d\n", type); return 1; }
381 // start perf event
382 psize = sysconf(_SC_PAGE_SIZE); // getpagesize();
383 int cpu_num = sysconf(_SC_NPROCESSORS_ONLN);
384 struct perf_event_attr attr;
385 memset(&attr, 0, sizeof(attr));
386 attr.type = type;
387 attr.size = sizeof(attr);
388 attr.config = 0; // (1<<0) for kreprobe
389 attr.sample_period = 2;
390 attr.wakeup_events = 2;
391 attr.sample_type = PERF_SAMPLE_TID|PERF_SAMPLE_CALLCHAIN;
392 attr.kprobe_func = (__u64)func; // "do_sys_open"; // "bprm_execve";
393 attr.probe_offset = 0;
394 int fd, i, k;
395 void* addr;
396 for (i=0, k=0; i<cpu_num&&i<MAXCPU; i++) {
397 printf("attaching cpu %d\n", i);
398 fd = perf_event_open(&attr, -1, i, -1, PERF_FLAG_FD_CLOEXEC);
399 if (fd<0) { perror("fail to open perf event"); continue; }
400 addr = mmap(NULL, (1+MAXN)*psize, PROT_READ, MAP_SHARED, fd, 0);
401 if (addr == MAP_FAILED) { perror("mmap failed"); close(fd); continue; }
402 res[fd] = make_pair(addr, 0);
403 polls[k].fd = fd;
404 polls[k].events = POLLIN;
405 polls[k].revents = 0;
406 k++;
407 }
408 if (k==0) { printf("no cpu event attached at all\n"); return 1; }
409
410 signal(SIGINT, int_exit);
411 signal(SIGTERM, int_exit);
412
413 unsigned long long head;
414 struct perf_event_mmap_page *mp;
415 while (poll(polls, k, -1)>0) {
416 for (i=0; i<k; i++) {
417 if (exiting) break;
418 if ((polls[i].revents&POLLIN)==0) continue;
419 fd = polls[i].fd;
420 addr = res[fd].first;
421 mp = (struct perf_event_mmap_page *)addr;
422 head = res[fd].second;
423 if (head==mp->data_head) continue;
424 ioctl(fd, PERF_EVENT_IOC_PAUSE_OUTPUT, 1);
425 head = mp->data_head-((mp->data_head-head)%mp->data_size);
426 while(head<mp->data_head) head+=process_event((char*)addr+mp->data_offset, mp->data_size, head);
427 ioctl(fd, PERF_EVENT_IOC_PAUSE_OUTPUT, 0);
428 res[fd].second = mp->data_head;
429 }
430 }
431
432 int_exit(0);
433 return 0;
434 }
435