xref: /linux-tools/perf/contextswitch/perf_cxt_switch_2.cpp (revision 342f926fd8205d4a7cd416c2ea8b61a513b3828a)
1 #include <stdlib.h>
2 #include <stdio.h>
3 #include <unistd.h>
4 #include <string.h>
5 #include <sys/ioctl.h>
6 #include <linux/perf_event.h>
7 #include <asm/unistd.h>
8 #include <sys/mman.h>
9 #include <sys/stat.h>
10 #include <poll.h>
11 #include <signal.h>
12 #include <fcntl.h>
13 
14 
15 #include <map>
16 #include <unordered_set>
17 #include <queue>
18 #include <algorithm>
19 using namespace std;
20 
21 
22 #define error(msg) do { perror(msg); exit(1); } while(0)
23 #define MAXN  128
24 
25 static long perf_event_open(struct perf_event_attr *perf_event, pid_t pid, int cpu, int group_fd, unsigned long flags) {
26     return syscall(__NR_perf_event_open, perf_event, pid, cpu, group_fd, flags);
27 }
28 static void *addr = NULL;
29 static int fd = -1;
30 static long long psize;
31 map<int, pair<void*, long long>> res;
32 static map<int, string> groups;
33 static map<string, unsigned long long> stats;
34 
35 void int_exit(int _) {
36     for (auto x: res) {
37         auto y = x.second;
38         void* addr = y.first;
39         munmap(addr, (1+MAXN)*psize);
40         close(x.first);
41     }
42     res.clear();
43     if (!stats.empty()) {
44         unsigned long long s=0;
45         using xtt = pair<unsigned long long, string>;
46         vector<xtt> ss;
47         for (auto x:stats) {
48             ss.push_back(make_pair(x.second, x.first));
49             s+=x.second;
50         }
51         sort(ss.begin(), ss.end(), greater<xtt>());
52         printf("=======================\n");
53         for (auto x:ss) {
54             printf("%.3f%% (%lld/%lld) by [%s]\n", (100.0*x.first)/s, x.first, s, x.second.c_str());
55         }
56         stats.clear();
57     }
58 
59 }
60 
61 
62 
63 int process_event(char *base, unsigned long long size, unsigned long long offset) {
64     struct perf_event_header* p = NULL;
65     int pid, cpuid, xpid;
66     unsigned long long time;
67     offset%=size;
68     // assuming the header would fit within size
69     p = (struct perf_event_header*) (base+offset);
70     offset+=sizeof(*p); if (offset>=size) offset-=size;
71     if (p->type == PERF_RECORD_SAMPLE) {
72         return p->size;
73     } else if (p->type == PERF_RECORD_SWITCH) {
74         return p->size;
75     } else if (p->type == PERF_RECORD_SWITCH_CPU_WIDE) {
76         xpid = *((int *)(base+offset));
77         offset+=8; if (offset>=size) offset-=size;
78     } else {
79         return p->size;
80     }
81     if (p->misc&PERF_RECORD_MISC_SWITCH_OUT) {
82         if (groups.count(xpid)==0) {
83             char bb[64];
84             sprintf(bb, "/proc/%d/root/etc/hostname", xpid);
85             FILE *fp = fopen(bb, "r");
86             if (fp==NULL) {
87                 string a("unknown");
88                 groups[xpid]=a;
89                 stats[a]=stats[a]+1;
90             } else {
91                 fgets(bb, sizeof(bb), fp);
92                 int i=0; while(bb[i]!='\r'&&bb[i]!='\n'&&bb[i]!=0&&i<sizeof(bb)-1) i++; bb[i]=0;
93                 string a(bb);
94                 groups[xpid]=a;
95                 stats[a]=stats[a]+1;
96                 fclose(fp);
97             }
98         }
99         stats[groups[xpid]]+=1;
100     }
101     return p->size;
102 }
103 
104 #define MAXCPU 1024
105 struct pollfd polls[MAXCPU];
106 int main(int argc, char *argv[]) {
107     if (argc != 2) { printf("need pid\n"); return 1; }
108     int pid = atoi(argv[1]); if (pid<=0) { printf("invalid pid %s\n", argv[1]); return 1; }
109     // find cgroup
110     char xb[256], xb2[256];
111     int i, j, k;
112     sprintf(xb, "/proc/%d/cgroup", pid);
113     FILE* fp = fopen(xb, "r");
114     if (fp==NULL) error("fail to open cgroup file");
115     char *p;
116     xb2[0]=0;
117     int cgroup_name_len=0;
118     while(1) {
119         p = fgets(xb, sizeof(xb), fp); if (p==NULL) break;
120         i=0; while(p[i]&&p[i]!=':') i++; if (p[i]==0) continue;
121         if (strstr(p, "perf_event")) {
122             i++; while(p[i]!=':'&&p[i]) i++;  if (p[i]!=':') continue; i++;
123             j=i; while(p[j]!='\r'&&p[j]!='\n'&&p[j]!=0) j++; p[j]=0;
124             sprintf(xb2, "/sys/fs/cgroup/perf_event%s", p+i);
125             cgroup_name_len=j-i;
126             break;
127         } else if (p[i+1]==':') {
128             i+=2; j=i; while(p[j]!='\r'&&p[j]!='\n'&&p[j]!=0) j++; p[j]=0;
129             sprintf(xb2, "/sys/fs/cgroup/%s", p+i);
130             cgroup_name_len=j-i;
131         }
132     }
133     fclose(fp);
134     if (xb2[0]==0) error("no proper cgroup found\n");
135     if (cgroup_name_len<2) {
136         printf("cgroup %s seems to be root, not allowed\n", xb2);
137         return -1;
138     }
139     printf("try to use cgroup %s\n", xb2);
140     int cgroup_id = open(xb2, O_CLOEXEC);
141     if (cgroup_id<=0) { perror("error open cgroup dir"); return 1; }
142     // start perf event
143     psize = sysconf(_SC_PAGE_SIZE); // getpagesize();
144     int cpu_num = sysconf(_SC_NPROCESSORS_ONLN);
145 	struct perf_event_attr attr;
146     memset(&attr, 0, sizeof(attr));
147     attr.type = PERF_TYPE_SOFTWARE;
148     attr.size = sizeof(attr);
149     attr.config = PERF_COUNT_SW_CONTEXT_SWITCHES;
150     attr.sample_period = 1;
151     attr.wakeup_events = 32;
152     attr.sample_type = PERF_SAMPLE_TID|PERF_SAMPLE_TIME|PERF_SAMPLE_CPU;
153     attr.context_switch = 1;
154     attr.sample_id_all = 1;
155     for (i=0, k=0; i<cpu_num&&i<MAXCPU; i++) {
156         printf("attaching cpu %d\n", i);
157         fd = perf_event_open(&attr, cgroup_id, i, -1, PERF_FLAG_FD_CLOEXEC|PERF_FLAG_PID_CGROUP);
158         if (fd<0) { perror("fail to open perf event"); continue; }
159         addr = mmap(NULL, (1+MAXN)*psize, PROT_READ, MAP_SHARED, fd, 0);
160         if (addr == MAP_FAILED) { perror("mmap failed"); close(fd); continue; }
161         res[fd] = make_pair(addr, 0);
162         polls[k].fd = fd;
163         polls[k].events = POLLIN;
164         polls[k].revents = 0;
165         k++;
166     }
167     if (k==0) { printf("no cpu event attached at all\n"); return 1; }
168 
169 	signal(SIGINT, int_exit);
170 	signal(SIGTERM, int_exit);
171 
172     unsigned long long head;
173     struct perf_event_mmap_page *mp;
174     while (poll(polls, k, -1)>0) {
175         for (i=0; i<k; i++) {
176             if ((polls[i].revents&POLLIN)==0) continue;
177             fd = polls[i].fd;
178             addr = res[fd].first;
179             mp = (struct perf_event_mmap_page *)addr;
180             head = res[fd].second;
181             if (head==mp->data_head) continue;
182             ioctl(fd, PERF_EVENT_IOC_PAUSE_OUTPUT, 1);
183             head = mp->data_head-((mp->data_head-head)%mp->data_size);
184             while(head<mp->data_head) head+=process_event((char*)addr+mp->data_offset, mp->data_size, head);
185             ioctl(fd, PERF_EVENT_IOC_PAUSE_OUTPUT, 0);
186             res[fd].second = mp->data_head;
187         }
188     }
189 
190     int_exit(0);
191     return 0;
192 }
193