性能记录-D报告 [英] perf mem -D report

查看:80
本文介绍了性能记录-D报告的处理方法,对大家解决问题具有一定的参考价值,需要的朋友们下面随着小编来一起学习吧!

问题描述

我正在使用perf mem -t load record "commands"来分析系统内存访问延迟.之后,我运行perf mem -D report并得到以下结果:

I was using perf mem -t load record "commands" to profile system memory access latency. After, I run perf mem -D report and I got the following results:

[root@mdtm-server wenji]# perf mem -D report
# PID, TID, IP, ADDR, LOCAL WEIGHT, DSRC, SYMBOL
 2054  2054 0xffffffff811186bf 0x016ffffe8fbffc804b0    49 0x68100842 /lib/modules/3.12.23/build/vmlinux:perf_event_aux_ctx
2054 2054 0xffffffff81321d6e 0xffff880c7fc87d44 7 0x68100142 /lib/modules/3.12.23/build/vmlinux:ghes_copy_tofrom_phys

"ADDR","DSRC","SYMBOL"是什么意思?

What does "ADDR", "DSRC", "SYMBOL" mean?

推荐答案

  • IP-加载/存储指令的PC;
  • SYMBOL-函数名称,包含此指令(IP);
  • ADDR-数据的虚拟内存地址,由加载/存储请求(如果没有--phys-data选项)
  • DSRC-解码源".
    • IP - PC of the load/store instruction;
    • SYMBOL - name of function, containing this instruction (IP);
    • ADDR - virtual memory address of data, requested by load/store (if there was no --phys-data option)
    • DSRC - "Decoded Source".
    • DSRC-建议在某些邮件列表中检查"SDM Vol 3b表18-41(PEBS记录中的数据线性地址信息的布局)".

      DSRC - There was recommendation to check "SDM Vol 3b Table 18-41 (Layout of Data Linear Address Information in PEBS Record)" in some mailing lists.

      内核中也有DSRC编码代码(来自硬件-PEBS的dse; u64返回的是dsrc):

      There is also DSRC coding code in kernel (dse from hw - PEBS; u64 return is dsrc):

      http://lxr.free-electrons.com/source/arch/x86/kernel/cpu/perf_event_intel_ds.c?v=4.3#L28

       28 union intel_x86_pebs_dse {
       29         u64 val;
       30         struct {
       31                 unsigned int ld_dse:4;
       32                 unsigned int ld_stlb_miss:1;
       33                 unsigned int ld_locked:1;
       34                 unsigned int ld_reserved:26;
       35         };
       36         struct {
       37                 unsigned int st_l1d_hit:1;
       38                 unsigned int st_reserved1:3;
       39                 unsigned int st_stlb_miss:1;
       40                 unsigned int st_locked:1;
       41                 unsigned int st_reserved2:26;
       42         };
       43 };
      

      http://lxr.free-electrons.com/source/arch/x86/kernel/cpu/perf_event_intel_ds.c?v=4.3#L46

       46 /*
       47  * Map PEBS Load Latency Data Source encodings to generic
       48  * memory data source information
       49  */
       50 #define P(a, b) PERF_MEM_S(a, b)
       51 #define OP_LH (P(OP, LOAD) | P(LVL, HIT))
       52 #define SNOOP_NONE_MISS (P(SNOOP, NONE) | P(SNOOP, MISS))
       53 
       54 static const u64 pebs_data_source[] = {
       55         P(OP, LOAD) | P(LVL, MISS) | P(LVL, L3) | P(SNOOP, NA),/* 0x00:ukn L3 */
       56         OP_LH | P(LVL, L1)  | P(SNOOP, NONE),   /* 0x01: L1 local */
       57         OP_LH | P(LVL, LFB) | P(SNOOP, NONE),   /* 0x02: LFB hit */
       58         OP_LH | P(LVL, L2)  | P(SNOOP, NONE),   /* 0x03: L2 hit */
       59         OP_LH | P(LVL, L3)  | P(SNOOP, NONE),   /* 0x04: L3 hit */
       60         OP_LH | P(LVL, L3)  | P(SNOOP, MISS),   /* 0x05: L3 hit, snoop miss */
       61         OP_LH | P(LVL, L3)  | P(SNOOP, HIT),    /* 0x06: L3 hit, snoop hit */
       62         OP_LH | P(LVL, L3)  | P(SNOOP, HITM),   /* 0x07: L3 hit, snoop hitm */
       63         OP_LH | P(LVL, REM_CCE1) | P(SNOOP, HIT),  /* 0x08: L3 miss snoop hit */
       64         OP_LH | P(LVL, REM_CCE1) | P(SNOOP, HITM), /* 0x09: L3 miss snoop hitm*/
       65         OP_LH | P(LVL, LOC_RAM)  | P(SNOOP, HIT),  /* 0x0a: L3 miss, shared */
       66         OP_LH | P(LVL, REM_RAM1) | P(SNOOP, HIT),  /* 0x0b: L3 miss, shared */
       67         OP_LH | P(LVL, LOC_RAM)  | SNOOP_NONE_MISS,/* 0x0c: L3 miss, excl */
       68         OP_LH | P(LVL, REM_RAM1) | SNOOP_NONE_MISS,/* 0x0d: L3 miss, excl */
       69         OP_LH | P(LVL, IO)  | P(SNOOP, NONE), /* 0x0e: I/O */
       70         OP_LH | P(LVL, UNC) | P(SNOOP, NONE), /* 0x0f: uncached */
       71 };
       72 
       73 static u64 precise_store_data(u64 status)
       74 {
       75         union intel_x86_pebs_dse dse;
       76         u64 val = P(OP, STORE) | P(SNOOP, NA) | P(LVL, L1) | P(TLB, L2);
       77 
       78         dse.val = status;
       79 
       80         /*
       81          * bit 4: TLB access
       82          * 1 = stored missed 2nd level TLB
       83          *
       84          * so it either hit the walker or the OS
       85          * otherwise hit 2nd level TLB
       86          */
       87         if (dse.st_stlb_miss)
       88                 val |= P(TLB, MISS);
       89         else
       90                 val |= P(TLB, HIT);
       91 
       92         /*
       93          * bit 0: hit L1 data cache
       94          * if not set, then all we know is that
       95          * it missed L1D
       96          */
       97         if (dse.st_l1d_hit)
       98                 val |= P(LVL, HIT);
       99         else
      100                 val |= P(LVL, MISS);
      101 
      102         /*
      103          * bit 5: Locked prefix
      104          */
      105         if (dse.st_locked)
      106                 val |= P(LOCK, LOCKED);
      107 
      108         return val;
      109 }
      

      dsrc听起来像PERF_MEM_ *宏在位域中的几种组合:

      dsrc sounds like several combinations of PERF_MEM_* macro in bitfields:

      http://lxr. free-electrons.com/source/include/uapi/linux/perf_event.h?v=4.3#L878

      878 union perf_mem_data_src {
      879         __u64 val;
      880         struct {
      881                 __u64   mem_op:5,       /* type of opcode */
      882                         mem_lvl:14,     /* memory hierarchy level */
      883                         mem_snoop:5,    /* snoop mode */
      884                         mem_lock:2,     /* lock instr */
      885                         mem_dtlb:7,     /* tlb access */
      886                         mem_rsvd:31;
      887         };
      888 };
      890 /* type of opcode (load/store/prefetch,code) */
      891 #define PERF_MEM_OP_NA          0x01 /* not available */
      892 #define PERF_MEM_OP_LOAD        0x02 /* load instruction */
      893 #define PERF_MEM_OP_STORE       0x04 /* store instruction */
      894 #define PERF_MEM_OP_PFETCH      0x08 /* prefetch */
      895 #define PERF_MEM_OP_EXEC        0x10 /* code (execution) */
      896 #define PERF_MEM_OP_SHIFT       0
      897 
      898 /* memory hierarchy (memory level, hit or miss) */
      899 #define PERF_MEM_LVL_NA         0x01  /* not available */
      900 #define PERF_MEM_LVL_HIT        0x02  /* hit level */
      901 #define PERF_MEM_LVL_MISS       0x04  /* miss level  */
      902 #define PERF_MEM_LVL_L1         0x08  /* L1 */
      903 #define PERF_MEM_LVL_LFB        0x10  /* Line Fill Buffer */
      904 #define PERF_MEM_LVL_L2         0x20  /* L2 */
      905 #define PERF_MEM_LVL_L3         0x40  /* L3 */
      906 #define PERF_MEM_LVL_LOC_RAM    0x80  /* Local DRAM */
      907 #define PERF_MEM_LVL_REM_RAM1   0x100 /* Remote DRAM (1 hop) */
      908 #define PERF_MEM_LVL_REM_RAM2   0x200 /* Remote DRAM (2 hops) */
      909 #define PERF_MEM_LVL_REM_CCE1   0x400 /* Remote Cache (1 hop) */
      910 #define PERF_MEM_LVL_REM_CCE2   0x800 /* Remote Cache (2 hops) */
      911 #define PERF_MEM_LVL_IO         0x1000 /* I/O memory */
      912 #define PERF_MEM_LVL_UNC        0x2000 /* Uncached memory */
      913 #define PERF_MEM_LVL_SHIFT      5
      914 
      915 /* snoop mode */
      916 #define PERF_MEM_SNOOP_NA       0x01 /* not available */
      917 #define PERF_MEM_SNOOP_NONE     0x02 /* no snoop */
      918 #define PERF_MEM_SNOOP_HIT      0x04 /* snoop hit */
      919 #define PERF_MEM_SNOOP_MISS     0x08 /* snoop miss */
      920 #define PERF_MEM_SNOOP_HITM     0x10 /* snoop hit modified */
      921 #define PERF_MEM_SNOOP_SHIFT    19
      922 
      923 /* locked instruction */
      924 #define PERF_MEM_LOCK_NA        0x01 /* not available */
      925 #define PERF_MEM_LOCK_LOCKED    0x02 /* locked transaction */
      926 #define PERF_MEM_LOCK_SHIFT     24
      927 
      928 /* TLB access */
      929 #define PERF_MEM_TLB_NA         0x01 /* not available */
      930 #define PERF_MEM_TLB_HIT        0x02 /* hit level */
      931 #define PERF_MEM_TLB_MISS       0x04 /* miss level */
      932 #define PERF_MEM_TLB_L1         0x08 /* L1 */
      933 #define PERF_MEM_TLB_L2         0x10 /* L2 */
      934 #define PERF_MEM_TLB_WK         0x20 /* Hardware Walker*/
      935 #define PERF_MEM_TLB_OS         0x40 /* OS fault handler */
      936 #define PERF_MEM_TLB_SHIFT      26
      937 
      938 #define PERF_MEM_S(a, s) \
      939         (((__u64)PERF_MEM_##a##_##s) << PERF_MEM_##a##_SHIFT)
      940 
      

      这篇关于性能记录-D报告的文章就介绍到这了,希望我们推荐的答案对大家有所帮助,也希望大家多多支持IT屋!

查看全文
登录 关闭
扫码关注1秒登录
发送“验证码”获取 | 15天全站免登陆