xref: /linux-tools/perf/contextswitch/perf_cxt_switch_1.cpp (revision 9b8b28847ddaf94410a5675c2f0ecf54338206b3)
1 #include <stdlib.h>
2 #include <stdio.h>
3 #include <unistd.h>
4 #include <string.h>
5 #include <sys/ioctl.h>
6 #include <linux/perf_event.h>
7 #include <asm/unistd.h>
8 #include <sys/mman.h>
9 #include <sys/stat.h>
10 #include <poll.h>
11 #include <signal.h>
12 #include <fcntl.h>
13 
14 
15 #include <map>
16 #include <unordered_set>
17 #include <queue>
18 using namespace std;
19 
20 
21 #define error(msg) do { perror(msg); exit(1); } while(0)
22 #define MAXN  128
23 
perf_event_open(struct perf_event_attr * perf_event,pid_t pid,int cpu,int group_fd,unsigned long flags)24 static long perf_event_open(struct perf_event_attr *perf_event, pid_t pid, int cpu, int group_fd, unsigned long flags) {
25     return syscall(__NR_perf_event_open, perf_event, pid, cpu, group_fd, flags);
26 }
27 static void *addr = NULL;
28 static int fd = -1;
29 static long long psize;
30 map<int, pair<void*, long long>> res;
int_exit(int s)31 void int_exit(int s) {
32     for (auto x: res) {
33         auto y = x.second;
34         void* addr = y.first;
35         munmap(addr, (1+MAXN)*psize);
36         close(x.first);
37     }
38     res.clear();
39 }
40 
41 static unsigned long long cx_count=0, g_stime=0, g_total=0;
42 static priority_queue<unsigned long long, vector<unsigned long long>, greater<unsigned long long>> events;
43 
44 // static int ssss[4]={0};
process_event(char * base,unsigned long long size,unsigned long long offset)45 int process_event(char *base, unsigned long long size, unsigned long long offset) {
46     struct perf_event_header* p = NULL;
47     int pid, cpuid, xpid;
48     unsigned long long time;
49     offset%=size;
50     // assuming the header would fit within size
51     p = (struct perf_event_header*) (base+offset);
52     offset+=sizeof(*p); if (offset>=size) offset-=size;
53     if (p->type == PERF_RECORD_SAMPLE) {
54         return p->size;
55     } else if (p->type == PERF_RECORD_SWITCH) {
56         return p->size;
57     } else if (p->type == PERF_RECORD_SWITCH_CPU_WIDE) {
58         xpid = *((int *)(base+offset));
59         offset+=8; if (offset>=size) offset-=size;
60     } else {
61         return p->size;
62     }
63     pid = *((int *)(base+offset));  offset+=8; if (offset>=size) offset-=size;
64     time = *((unsigned long long *)(base+offset));  offset+=8; if (offset>=size) offset-=size;
65     events.push(time);
66     time/=1000000000;
67     while(!events.empty()) {
68         unsigned long long t = events.top();
69         t/=1000000000;
70         if (time<t||time-t<3) break;
71         cx_count++;
72         if (g_stime==0) g_stime=t;
73         if (g_stime<t) {
74             g_total+=cx_count;
75             printf("%10lld>: %10lld/s   total: %-10lld\n", g_stime, cx_count, g_total); fflush(stdout);
76             cx_count = 0;
77             g_stime=t;
78         }
79         events.pop();
80     }
81     return p->size;
82 }
83 
84 #define MAXCPU 1024
85 struct pollfd polls[MAXCPU];
main(int argc,char * argv[])86 int main(int argc, char *argv[]) {
87     if (argc != 2) { printf("need pid\n"); return 1; }
88     int pid = atoi(argv[1]); if (pid<=0) { printf("invalid pid %s\n", argv[1]); return 1; }
89     // find cgroup
90     char xb[256], xb2[256];
91     int i, j, k;
92     sprintf(xb, "/proc/%d/cgroup", pid);
93     FILE* fp = fopen(xb, "r");
94     if (fp==NULL) error("fail to open cgroup file");
95     char *p;
96     xb2[0]=0;
97     int cgroup_name_len=0;
98     while(1) {
99         p = fgets(xb, sizeof(xb), fp); if (p==NULL) break;
100         i=0; while(p[i]&&p[i]!=':') i++; if (p[i]==0) continue;
101         if (strstr(p, "perf_event")) {
102             i++; while(p[i]!=':'&&p[i]) i++;  if (p[i]!=':') continue; i++;
103             j=i; while(p[j]!='\r'&&p[j]!='\n'&&p[j]!=0) j++; p[j]=0;
104             sprintf(xb2, "/sys/fs/cgroup/perf_event%s", p+i);
105             cgroup_name_len=j-i;
106             break;
107         } else if (p[i+1]==':') {
108             i+=2; j=i; while(p[j]!='\r'&&p[j]!='\n'&&p[j]!=0) j++; p[j]=0;
109             sprintf(xb2, "/sys/fs/cgroup/%s", p+i);
110             cgroup_name_len=j-i;
111         }
112     }
113     fclose(fp);
114     if (xb2[0]==0) error("no proper cgroup found\n");
115     if (cgroup_name_len<2) {
116         printf("cgroup %s seems to be root, not allowed\n", xb2);
117         return -1;
118     }
119     printf("try to use cgroup %s\n", xb2);
120     int cgroup_id = open(xb2, O_CLOEXEC);
121     if (cgroup_id<=0) { perror("error open cgroup dir"); return 1; }
122     // start perf event
123     psize = sysconf(_SC_PAGE_SIZE); // getpagesize();
124     int cpu_num = sysconf(_SC_NPROCESSORS_ONLN);
125 	struct perf_event_attr attr;
126     memset(&attr, 0, sizeof(attr));
127     attr.type = PERF_TYPE_SOFTWARE;
128     attr.size = sizeof(attr);
129     attr.config = PERF_COUNT_SW_CONTEXT_SWITCHES;
130     // attr.config = PERF_COUNT_SW_CPU_MIGRATIONS;
131     attr.sample_period = 1;
132     attr.wakeup_events = 32;
133     attr.sample_type = PERF_SAMPLE_TID|PERF_SAMPLE_TIME|PERF_SAMPLE_CPU;
134     attr.context_switch = 1;
135     attr.sample_id_all = 1;
136     for (i=0, k=0; i<cpu_num&&i<MAXCPU; i++) {
137         printf("attaching cpu %d\n", i);
138         fd = perf_event_open(&attr, cgroup_id, i, -1, PERF_FLAG_FD_CLOEXEC|PERF_FLAG_PID_CGROUP);
139         if (fd<0) { perror("fail to open perf event"); continue; }
140         addr = mmap(NULL, (1+MAXN)*psize, PROT_READ, MAP_SHARED, fd, 0);
141         if (addr == MAP_FAILED) { perror("mmap failed"); close(fd); continue; }
142         res[fd] = make_pair(addr, 0);
143         polls[k].fd = fd;
144         polls[k].events = POLLIN;
145         polls[k].revents = 0;
146         k++;
147     }
148     if (k==0) { printf("no cpu event attached at all\n"); return 1; }
149 
150 	signal(SIGINT, int_exit);
151 	signal(SIGTERM, int_exit);
152 
153     unsigned long long head;
154     struct perf_event_mmap_page *mp;
155     while (poll(polls, k, -1)>0) {
156         for (i=0; i<k; i++) {
157             if ((polls[i].revents&POLLIN)==0) continue;
158             fd = polls[i].fd;
159             addr = res[fd].first;
160             mp = (struct perf_event_mmap_page *)addr;
161             head = res[fd].second;
162             if (head==mp->data_head) continue;
163             ioctl(fd, PERF_EVENT_IOC_PAUSE_OUTPUT, 1);
164             head = mp->data_head-((mp->data_head-head)%mp->data_size);
165             while(head<mp->data_head) head+=process_event((char*)addr+mp->data_offset, mp->data_size, head);
166             ioctl(fd, PERF_EVENT_IOC_PAUSE_OUTPUT, 0);
167             res[fd].second = mp->data_head;
168         }
169     }
170 
171     int_exit(0);
172     return 0;
173 }
174