1 #include <stdlib.h> 2 #include <stdio.h> 3 #include <unistd.h> 4 #include <string.h> 5 #include <sys/ioctl.h> 6 #include <linux/perf_event.h> 7 #include <asm/unistd.h> 8 #include <sys/mman.h> 9 #include <sys/stat.h> 10 #include <poll.h> 11 #include <signal.h> 12 #include <fcntl.h> 13 14 15 #include <map> 16 #include <unordered_set> 17 #include <queue> 18 using namespace std; 19 20 21 #define error(msg) do { perror(msg); exit(1); } while(0) 22 #define MAXN 128 23 24 static long perf_event_open(struct perf_event_attr *perf_event, pid_t pid, int cpu, int group_fd, unsigned long flags) { 25 return syscall(__NR_perf_event_open, perf_event, pid, cpu, group_fd, flags); 26 } 27 static void *addr = NULL; 28 static int fd = -1; 29 static long long psize; 30 map<int, pair<void*, long long>> res; 31 void int_exit(int s) { 32 for (auto x: res) { 33 auto y = x.second; 34 void* addr = y.first; 35 munmap(addr, (1+MAXN)*psize); 36 close(x.first); 37 } 38 res.clear(); 39 } 40 41 static unsigned long long cx_count=0, g_stime=0, g_total=0; 42 static priority_queue<unsigned long long, vector<unsigned long long>, greater<unsigned long long>> events; 43 44 // static int ssss[4]={0}; 45 int process_event(char *base, unsigned long long size, unsigned long long offset) { 46 struct perf_event_header* p = NULL; 47 int pid, cpuid, xpid; 48 unsigned long long time; 49 offset%=size; 50 // assuming the header would fit within size 51 p = (struct perf_event_header*) (base+offset); 52 offset+=sizeof(*p); if (offset>=size) offset-=size; 53 if (p->type == PERF_RECORD_SAMPLE) { 54 return p->size; 55 } else if (p->type == PERF_RECORD_SWITCH) { 56 return p->size; 57 } else if (p->type == PERF_RECORD_SWITCH_CPU_WIDE) { 58 xpid = *((int *)(base+offset)); 59 offset+=8; if (offset>=size) offset-=size; 60 } else { 61 return p->size; 62 } 63 pid = *((int *)(base+offset)); offset+=8; if (offset>=size) offset-=size; 64 time = *((unsigned long long *)(base+offset)); offset+=8; if (offset>=size) offset-=size; 65 events.push(time); 66 time/=1000000000; 67 while(!events.empty()) { 68 unsigned long long t = events.top(); 69 t/=1000000000; 70 if (time<t||time-t<3) break; 71 cx_count++; 72 if (g_stime==0) g_stime=t; 73 if (g_stime<t) { 74 g_total+=cx_count; 75 printf("%10lld>: %10lld/s total: %-10lld\n", g_stime, cx_count, g_total); fflush(stdout); 76 cx_count = 0; 77 g_stime=t; 78 } 79 events.pop(); 80 } 81 return p->size; 82 } 83 84 #define MAXCPU 1024 85 struct pollfd polls[MAXCPU]; 86 int main(int argc, char *argv[]) { 87 if (argc != 2) { printf("need pid\n"); return 1; } 88 int pid = atoi(argv[1]); if (pid<=0) { printf("invalid pid %s\n", argv[1]); return 1; } 89 // find cgroup 90 char xb[256], xb2[256]; 91 int i, j, k; 92 sprintf(xb, "/proc/%d/cgroup", pid); 93 FILE* fp = fopen(xb, "r"); 94 if (fp==NULL) error("fail to open cgroup file"); 95 char *p; 96 xb2[0]=0; 97 int cgroup_name_len=0; 98 while(1) { 99 p = fgets(xb, sizeof(xb), fp); if (p==NULL) break; 100 i=0; while(p[i]&&p[i]!=':') i++; if (p[i]==0) continue; 101 if (strstr(p, "perf_event")) { 102 i++; while(p[i]!=':'&&p[i]) i++; if (p[i]!=':') continue; i++; 103 j=i; while(p[j]!='\r'&&p[j]!='\n'&&p[j]!=0) j++; p[j]=0; 104 sprintf(xb2, "/sys/fs/cgroup/perf_event%s", p+i); 105 cgroup_name_len=j-i; 106 break; 107 } else if (p[i+1]==':') { 108 i+=2; j=i; while(p[j]!='\r'&&p[j]!='\n'&&p[j]!=0) j++; p[j]=0; 109 sprintf(xb2, "/sys/fs/cgroup/%s", p+i); 110 cgroup_name_len=j-i; 111 } 112 } 113 fclose(fp); 114 if (xb2[0]==0) error("no proper cgroup found\n"); 115 if (cgroup_name_len<2) { 116 printf("cgroup %s seems to be root, not allowed\n", xb2); 117 return -1; 118 } 119 printf("try to use cgroup %s\n", xb2); 120 int cgroup_id = open(xb2, O_CLOEXEC); 121 if (cgroup_id<=0) { perror("error open cgroup dir"); return 1; } 122 // start perf event 123 psize = sysconf(_SC_PAGE_SIZE); // getpagesize(); 124 int cpu_num = sysconf(_SC_NPROCESSORS_ONLN); 125 struct perf_event_attr attr; 126 memset(&attr, 0, sizeof(attr)); 127 attr.type = PERF_TYPE_SOFTWARE; 128 attr.size = sizeof(attr); 129 attr.config = PERF_COUNT_SW_CONTEXT_SWITCHES; 130 // attr.config = PERF_COUNT_SW_CPU_MIGRATIONS; 131 attr.sample_period = 1; 132 attr.wakeup_events = 32; 133 attr.sample_type = PERF_SAMPLE_TID|PERF_SAMPLE_TIME|PERF_SAMPLE_CPU; 134 attr.context_switch = 1; 135 attr.sample_id_all = 1; 136 for (i=0, k=0; i<cpu_num&&i<MAXCPU; i++) { 137 printf("attaching cpu %d\n", i); 138 fd = perf_event_open(&attr, cgroup_id, i, -1, PERF_FLAG_FD_CLOEXEC|PERF_FLAG_PID_CGROUP); 139 if (fd<0) { perror("fail to open perf event"); continue; } 140 addr = mmap(NULL, (1+MAXN)*psize, PROT_READ, MAP_SHARED, fd, 0); 141 if (addr == MAP_FAILED) { perror("mmap failed"); close(fd); continue; } 142 res[fd] = make_pair(addr, 0); 143 polls[k].fd = fd; 144 polls[k].events = POLLIN; 145 polls[k].revents = 0; 146 k++; 147 } 148 if (k==0) { printf("no cpu event attached at all\n"); return 1; } 149 150 signal(SIGINT, int_exit); 151 signal(SIGTERM, int_exit); 152 153 unsigned long long head; 154 struct perf_event_mmap_page *mp; 155 while (poll(polls, k, -1)>0) { 156 for (i=0; i<k; i++) { 157 if ((polls[i].revents&POLLIN)==0) continue; 158 fd = polls[i].fd; 159 addr = res[fd].first; 160 mp = (struct perf_event_mmap_page *)addr; 161 head = res[fd].second; 162 if (head==mp->data_head) continue; 163 ioctl(fd, PERF_EVENT_IOC_PAUSE_OUTPUT, 1); 164 head = mp->data_head-((mp->data_head-head)%mp->data_size); 165 while(head<mp->data_head) head+=process_event((char*)addr+mp->data_offset, mp->data_size, head); 166 ioctl(fd, PERF_EVENT_IOC_PAUSE_OUTPUT, 0); 167 res[fd].second = mp->data_head; 168 } 169 } 170 171 int_exit(0); 172 return 0; 173 } 174