d combination.d

combination.d
void main(){
  import std.stdio, std.algorithm;

  const long mod=998244353;
  const int M=1_000_00;
  auto fact=new long[](M);
  fact[0]=fact[1]=1;
  foreach(i; 2..M) fact[i]=i*fact[i-1]%mod;
  auto inv_fact=new long[](M);
  long powmod(long a, long x){
    if(x==0) return 1;
    else if(x==1) return a;
    else if(x&1) return a*powmod(a, x-1)%mod;
    else return powmod(a*a%mod, x/2);
  }
  foreach(i; 0..M) inv_fact[i]=powmod(fact[i], mod-2);
  long comb(long nn, long rr){
    if(nn<rr) return 0L;
    long ret=fact[nn]%mod;
    (ret*=inv_fact[rr])%=mod;
    (ret*=inv_fact[nn-rr])%=mod;
    return ret;
  }

  assert(comb(4, 0)==1);
  assert(comb(4, 2)==6);
  assert(comb(4, 4)==1);
  assert(comb(4, 5)==0);
}

/*
  https://beta.atcoder.jp/contests/agc025/submissions/2611132
*/

d Dtrace片段用于进程内存分配测量

按大小,花费的时间等测量内存分配事件。

vmem-sbrk-change.d
// Example output:
//                1921  libumem.so.1`vmem_xalloc
//           value  ------------- Distribution ------------- count
//       268435456 |                                         0
//       536870912 |@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ 1
//      1073741824 |                                         0
//
//             2300  libumem.so.1`vmem_xalloc
//           value  ------------- Distribution ------------- count
//       536870912 |                                         0
//      1073741824 |@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ 1
//      2147483648 |                                         0
//
dtrace -qn '
    BEGIN {start = timestamp} 
pid$target::*vmem_sbrk_alloc*:entry /arg1 > 2<<27/ {
    @[(timestamp-start)/1000000, ufunc(ucaller), ustack()] = quantize(arg1);
    }' -c '/path/to/process'

d Dtrace ARC访问类型代码段

用于识别进入ARC的请求类型的片段,即Demand v。预取访问。

arc-dem-or-pref.d
dtrace -qn '
  BEGIN {
    printf("timestamp,pool,type,count\n");
    ts = walltimestamp - (walltimestamp % 1000000000) ;
  }
  ::arc_read:entry {
    this->checkPref = (*args[7] & (1<<5));
    this->pd = this->checkPref == 0x20 ? "Prefetch" : "Demand";
    this->spa = args[1];
    @c[ ts, this->spa->spa_name != NULL ? 
            this->spa->spa_name : "NA", this->pd ] = count();
}
  tick-5sec {
    printa("%d,%s,%s,%@d\n", @c); trunc(@c);
    ts = walltimestamp - (walltimestamp % 1000000000) ;
  }'

d 用于监视sd驱动程序sd_ready_and_valid函数的Dtrace脚本

在某些情况下,驱动器发生故障,但仍然看似在线,它报告它没有准备好。这是呼叫者为确保设备可用而进行的检查。这里的非零结果意味着给定设备存在问题。

sd-ready-valid-csv.d
#!/usr/sbin/dtrace -Cs
#pragma D option quiet
/*
 * CDDL HEADER START
 *
 * The contents of this file are subject to the terms of the
 * Common Development and Distribution License (the "License").
 * You may not use this file except in compliance with the License.
 *
 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
 * or http://www.opensolaris.org/os/licensing.
 * See the License for the specific language governing permissions
 * and limitations under the License.
 *
 * When distributing Covered Code, include this CDDL HEADER in each
 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
 * If applicable, add the following below this CDDL HEADER, with the
 * fields enclosed by brackets "[]" replaced with your own identifying
 * information: Portions Copyright [yyyy] [name of copyright owner]
 *
 * CDDL HEADER END
 *
 * Copyright (c) 2018 Sam Zaydel / RackTop Systems.
 *
 * sd-ready-valid-csv.d
 *
 * Description:
 * Script tracks return code from sd_ready_and_valid function, which tells
 * the caller (sdopen or sdioctl) whether a given drive is usable. 
 * When a device fails this test a non-zero value is returned and depending 
 * on state of drive we should see messages in the kernel log along the lines
 * of `drive offline`.
 */

#define SD_TO_DEVINFO(un) ((struct dev_info *)((un)->un_sd->sd_dev))
#define DEV_NAME(un) \
      stringof(`devnamesp[SD_TO_DEVINFO(un)->devi_major].dn_name) /* ` */
#define DEV_INST(un) (SD_TO_DEVINFO(un)->devi_instance)

::sd_ready_and_valid:entry {
    self->un = args[0]->ssc_un;
}
::sd_ready_and_valid:return /self->un/ {
    @[stringof(SD_TO_DEVINFO(self->un)->devi_devid_str),
        DEV_INST(self->un), args[1]] = count();
    self->un = NULL;
}

END {
    printf("device,instance,retcode,count\n");
    printa("%s,sd%d,%d,%@d\n", @);
}

d httf2018qual.d

httf2018qual.d
import std.stdio, std.string, std.conv, std.algorithm;
import std.exception, std.random, std.typecons, std.math;
import std.datetime;

auto sw=StopWatch(AutoStart.no);

class Problem{
  const int N=100;
  const int Q=1000;
  const int TL=6000; // ms

  int[N][N] a, b;
  alias Query=Tuple!(int, "x", int, "y", int, "h");
  Query[Q] ans;
  int bestScore=0, tmpScore=0, itr=0;
  auto rnd=Random(0);
  
  void stdInput(){
    foreach(i; 0..N)  
      a[i]=readln.split.to!(int[]);
  }

  void fileInput(){
    auto data=File("in.txt").byLine.map!split;
    for(int i=0; i<N; i++, data.popFront){
      a[i]=data.front.to!(int[]);
    }
    enforce(data.empty);
  }

  void solve(){
    init();
    while(sw.peek.msecs<(TL-50)){
      itr++;
      auto q=uniform(0, Q, rnd),
           x=uniform(0, N, rnd),
           y=uniform(0, N, rnd),
           h=uniform!"[]"(1, N, rnd);
      add(ans[q].y, ans[q].x, ans[q].h, -1);
      add(y, x, h);
      tmpScore=calc();
      if(tmpScore>bestScore){
        ans[q]=Query(x, y, h);
        bestScore=tmpScore;
      }else{
        add(y, x, h, -1);
        add(ans[q].y, ans[q].x, ans[q].h);        
      }
    }
  }

  void init(){
    foreach(i; 0..Q){
      ans[i].x=uniform(0, N, rnd);
      ans[i].y=uniform(0, N, rnd);
      ans[i].h=uniform!"[]"(1, N, rnd);
    }
    foreach(i; 0..N)foreach(j; 0..N) b[i][j]=0;    
    foreach(e; ans) add(e.y, e.x, e.h);
    bestScore=calc();
  }

  void add(int y, int x, int h, int sign=1){
    b[y][x]+=h*sign;
    foreach(z; 1..h){
      for(int dy=-z, dx=z-dy.abs; dy<=z; dy++, dx=z-dy.abs){
        if(ok(y+dy, x+dx)) b[y+dy][x+dx]+=(h-z)*sign;
        if(ok(y+dy, x-dx) && dx!=0) b[y+dy][x-dx]+=(h-z)*sign;
      }
    }
  }

  bool ok(int y, int x){
    return (0<=y && y<N && 0<=x && x<N);
  }

  int calc(){
    auto score=2e8.to!(int);
    foreach(i; 0..N)foreach(j; 0..N)
      score-=(a[i][j]-b[i][j]).abs;
    return score;
  }

  void stdOutput(){
    writeln(Q);
    foreach(l; ans){
      writeln(l.x, " ", l.y, " ", l.h);
    }
  }

  void fileOutput(){
    auto f=File("out.txt", "w");
    f.writeln(Q); 
    foreach(l; ans){
      f.writeln(l.x, " ", l.y, " ", l.h);
    }
  }

  void show(){
    stderr.writeln("itr = ", itr);
    stderr.writeln("score = ", bestScore);
  }
}

void main(){
  sw.start;

  auto p=new Problem;
  if(true){
    p.stdInput;
    p.solve;
    p.stdOutput;
  }else{
    p.fileInput;
    p.solve;
    p.fileOutput;
  }

  sw.stop;
  p.show;
}

d 用于观察内核内存收获的Dtrace脚本

收集内核收割活动。

kmem-reap-details.d
#!/usr/sbin/dtrace -s

#pragma D option quiet
#pragma D option destructive

arc_kmem_reap_now:entry
{
        printf("%d ts=%d freemem=%d -> arc_kmem_reap_now\n",
            walltimestamp / 1000000000, timestamp, `freemem);
}

arc_kmem_reap_now:return
{
        printf("%d ts=%d freemem=%d <- arc_kmem_reap_now\n",
            walltimestamp / 1000000000, timestamp, `freemem);
}

kmem_cache_reap:entry
{
        self->cp = args[0];
        printf("%d freemem=%d -> cache reap %s\n", walltimestamp / 1000000000,
            `freemem, stringof(args[0]->cache_name));
}

kmem_cache_reap:return
/self->cp != NULL/
{
        printf("%d freemem=%d <- cache reap %s\n", walltimestamp / 1000000000,
            `freemem, stringof(self->cp->cache_name));
        self->cp = NULL;
}

kmem_depot_ws_reap:entry
{
        self->depot = args[0];
        printf("%d freemem=%d -> mag reap %s\n", walltimestamp / 1000000000,
            `freemem, stringof(args[0]->cache_name));
}

kmem_depot_ws_reap:entry
/self->depot != NULL/
{
        printf("%d freemem=%d <- mag reap %s\n", walltimestamp / 1000000000,
            `freemem, stringof(self->depot->cache_name));
        self->depot = NULL
}

htable_steal_active.isra.2:entry
{
        steals++;
}

profile-1hz
/arg0 != 0 && curthread == `kmem_taskq->tq_thr._tq_thread/
{
        printf("%d kmem_taskq cpu=%d pc=%a", walltimestamp / 1000000000, cpu, arg0);
        stack(60);
}

tick-1sec
{
        printf("%d freemem=%d needfree=%d steals=%d arc_size=%d arc_c=%d waiters=%d load=%d tq=%d\n",
            walltimestamp / 1000000000,
            `freemem, `needfree, steals,
            `arc_stats.arcstat_size.value.ui64,
            `arc_stats.arcstat_c.value.ui64,
            ((condvar_impl_t *)&`arc_reclaim_waiters_cv)->cv_waiters,
            `loadavg.lg_total,
            `kmem_taskq->tq_tasks - `kmem_taskq->tq_executed);
}

d Dtrace ARC Snippets用于记忆缺失和回收观察

arc-kmem-reap-time-pagecnt.d
#!/usr/sbin/dtrace -qCs
/*
 * Measure how much time is spent in arc_kmem_reap_now function.
 * There are known issues with spending a long time in this call.
 * We want to know how much time was spent and how much was freed.
 */
BEGIN {
  printf("timestamp,callDuration,pagesFree,pagesReclaimed\n");
}
::arc_kmem_reap_now:entry {
  self->in=timestamp;
  this->free1 = `freemem;
}
::arc_kmem_reap_now:return /self->in/ {
  this->free2 = `freemem;
  /* How much we freed since entry into this function, could be negative! */
  this->freed = this->free2 - this->free1;
  self->in = 0;
  printf("%ld,%u,%lu,%ld\n", walltimestamp,
          (timestamp-self->in)/1000, this->free1, (long)this->freed);
}
arc-avail-min-max.d
#!/usr/sbin/dtrace -qCs
/*
  * Snippet collects minimum and maximum available bytes to ARC.
  * Measurement is coming from a periodic call to  arc_available_memory,
  * which returns a signed value, where anything below zero signals
  * memory shortfall, and will result in reclaim activity.
  */
inline const char MIN = 0;
inline const char MAX = 1;
int shortfall; int x; char reclaim; long l[char];

BEGIN { printf("timestamp,minAvail,maxAvail,reclaim,shortCnt\n"); }
::arc_available_memory:return /!x/ {
  l[MIN] = l[MAX] = args[1] ; x++ ;
  reclaim = 0 ; shortfall = 0 ;
}
::arc_available_memory:return /x/ { /* Record minimum and maximum values */
  l[MIN] = args[1] < l[MIN] ? args[1] : l[MIN] ;
  l[MAX] = args[1] > l[MAX] ? args[1] : l[MAX] ;
  reclaim = reclaim == 1 ? 1 : l[MIN] < 0 ? 1 : 0 ; /* reclaim needed? */
  shortfall += args[1] < 0 ? 1 : 0 ;
}
tick-5sec {
  printf("%ld,%ld,%ld,%d,%d\n", 
          walltimestamp, l[MIN], l[MAX], reclaim, shortfall) ;
  x = 0 ;
}
arc-avail-min-max-1liner.d
dtrace -qn '
  /*
   * Snippet collects minimum and maximum available bytes to ARC.
   * Measurement is coming from a periodic call to  arc_available_memory,
   * which returns a signed value, where anything below zero signals
   * memory shortfall, and will result in reclaim activity.
   */
  inline const char MIN = 0;
  inline const char MAX = 1;
  int shortfall; int x; char reclaim; long l[char];

  BEGIN { printf("timestamp,minAvail,maxAvail,reclaim\n"); }
  ::arc_available_memory:return /!x/ {
    l[MIN] = l[MAX] = args[1] ; x++ ;
    reclaim = 0 ; shortfall = 0 ;
  }
  ::arc_available_memory:return /x/ { /* Record minimum and maximum values */
    l[MIN] = args[1] < l[MIN] ? args[1] : l[MIN] ;
    l[MAX] = args[1] > l[MAX] ? args[1] : l[MAX] ;
    reclaim = reclaim == 1 ? 1 : l[MIN] < 0 ? 1 : 0 ; /* reclaim needed? */
    shortfall += args[1] < 0 ? 1 : 0 ;
  }
  tick-5sec {
    printf("%ld,%ld,%ld,%d,%d\n", 
          walltimestamp, l[MIN], l[MAX], reclaim, shortfall) ;
    x = 0 ;
  }'

d 用于在BrickstorOS上获取详细IO指标的Dtrace脚本

利用io ::: start,io :::完成探测以收集较低级别的IO指标。进行各种测量,包括延迟范围,处理的字节数,吞吐量,各种速率,IO类型的IO计数等。

bw-tput-iops-actv-time-hist-with-ts-csv.d
#!/usr/sbin/dtrace -Cs
#pragma D option quiet
#pragma D option dynvarsize=16M

/*
 * CDDL HEADER START
 *
 * The contents of this file are subject to the terms of the
 * Common Development and Distribution License (the "License").
 * You may not use this file except in compliance with the License.
 *
 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
 * or http://www.opensolaris.org/os/licensing.
 * See the License for the specific language governing permissions
 * and limitations under the License.
 *
 * When distributing Covered Code, include this CDDL HEADER in each
 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
 * If applicable, add the following below this CDDL HEADER, with the
 * fields enclosed by brackets "[]" replaced with your own identifying
 * information: Portions Copyright [yyyy] [name of copyright owner]
 *
 * CDDL HEADER END
 *
 * Copyright (c) 2017 Sam Zaydel / RackTop Systems.
 *
 * bw-tput-iops-actv-time-hist-with-ts-csv.d
 * This script is meant to be a more informative replacement for the
 * iostat utility. It measures some features that iostat in my opinion
 * is not useful for. For example, iostat does not offer distributions
 * at all, whereas here we plot distributions of IO rates as observed
 * for each disk. Also, iostat does not offer operating ranges for
 * measurements, which here we define as max - min of some measurement.
 *
 * Currently, script is limited to scsi_vhci devices, because they are
 * ones we normally use for actual data storage. Anything else is either
 * a locally attached device, or something not used by system, i.e.
 * USB storage media, etc.
 */
unsigned long minlati[dev_t, int], maxlati[dev_t, int]; /* Latency min and max */
unsigned long minratei[dev_t], maxratei[dev_t];         /* IO Rate min and max */
unsigned long miniosz[dev_t], maxiosz[dev_t];           /* IO size min and max */
unsigned long iocnt[dev_t, int];
int pend[dev_t];
hrtime_t start[dev_t, uint64_t];
hrtime_t ival_timer[dev_t];
hrtime_t ticks;
inline const int NSEC_PER_SEC = 1000000000;
inline const int NSEC_PER_MSEC = 1000000;
inline const int NSEC_PER_USEC = 1000;
inline const int SPA_MAXBLOCKSIZE = 128 << 10;
inline const int R = 0;
inline const int W = 1;

/*
 * These are the Output Parameter definitions for collected metrics
 *
 * sdname == Name of device, i.e. sd0
 * mpxiowwn == mpxio device, like: 5000cca24500f698
 * actvtm == amount of real, busy time spent processing IO
 * rangerlat, rangewlat == latency range: max - min for Reads and Writes
 * totbytes == total number of bytes Read and Written during sample interval
 * tput,maxtput == mean and maximum or burst throughput
 * ctrd,ctwr == Count of Reads and Writes over interval set in tick-Xsec clause
 * aviosz,rangeiosz == IO size mean and range: max - min
 * iops,maxiops == normalized IOPS mean and maximum
 * avKBps,rangeKBps == normalized IO rate and IO rate range: max - min
 * // Histogram of IO Rate distribution with 4 buckets //
 * [ ratelt1MBps  ] ==> 1 second interval with < 1,000KB/s bw
 * [ rate10MBps   ] ==> 1 second interval with < 10,000KB/s bw
 * [ rate100MBps  ] ==> 1 second interval with < 100,000KB/s bw
 * [ rate1GBps    ] ==> 1 second interval with < 1,000,000KB/s bw
 * avtime,maxtime == average and maximum IO completion latency
 * // Histogram of latency distribution with 6 buckets //
 * [ timegt1000ms ] ==> >1 second (SLOW!)
 * [ time100ms    ] ==> >50ms && 100ms<=    (not acceptable)
 * [ time50ms     ] ==> >25ms && 50ms<=     (acceptable)
 * [ time25ms     ] ==> >10ms && 25ms<=     (ok)
 * [ time10ms     ] ==> >1ms && 10ms<=      (good)
 * [ timelt1ms    ] ==> 1ms<                (probably cache)
 * // Histogram of IO size distribution with 7 buckets //
 * [ iosztiny     ] ==> 4K<
 * [ iosz4k       ] ==> between 4K and 8K
 * [ iosz8k       ] ==> between 8K and 16K
 * [ iosz16k      ] ==> between 16K and 32K
 * [ iosz32k      ] ==> between 32K and 64K
 * [ iosz64k      ] ==> between 64K and 128K
 * [ ioszbig      ] ==> anything 128K or above
 * avpending    == Average measured IOs in processing per sample interval
 * maxpending   == Maximum measured IOs in processing per sample interval
 * cterr        == Counter tracking number of errors per sample interval
 * ctretry      == Counter tracking number of retries per sample interval
 */

BEGIN {
    printf("timestamp,sdname,mpxiowwn,actvtm,rangerlat,rangewlat,totbytes,tput,maxtput,ctrd,ctwr,aviosz,rangeiosz,iops,maxiops,avKBps,rangeKBps,ratelt1MBps,rate10MBps,rate100MBps,rate1GBps,avtime,maxtime,timegt1000ms,time100ms,time50ms,time25ms,time10ms,timelt1ms,iosztiny,iosz4k,iosz8k,iosz16k,iosz32k,iosz64k,ioszbig,avpending,maxpending,cterr,ctretry\n");
    interval = walltimestamp - (walltimestamp%1000000000);
}

/* 
 * Set interval timer initial value once for a given device.
 * Each subsequent update will happen roughly once per second,
 * or if no IO, whenever there is some IO that triggers io:::done
 * probe, at which point we determine that timer expired.
 */
io:::start
/ival_timer[args[0]->b_edev] == 0/  {
    ival_timer[args[0]->b_edev] = timestamp;
}

io:::start {
    start[args[0]->b_edev, args[0]->b_blkno] = timestamp;
    /* Increment pending IOs by 1 */
    pend[args[0]->b_edev]++;
}

io:::done
/start[args[0]->b_edev, args[0]->b_blkno]/ {
    this->sn    = args[1]->dev_statname;
    this->p     = substr(args[1]->dev_pathname, 25, 16);
    this->p     = (strstr(this->p, "disk@") == 0 ||
                    strstr(this->p, "disk@") == "") ? this->p : "NA";
    this->x     = args[0]->b_bcount * 976562;
    this->elapsed = timestamp - start[args[0]->b_edev, args[0]->b_blkno];
    start[args[0]->b_edev, args[0]->b_blkno] = 0;
    
    /* Decrement pending IOs by 1, set to 0 if value < 0 */
    pend[args[0]->b_edev]--;
    pend[args[0]->b_edev] = 
        pend[args[0]->b_edev] > 0 ?
        pend[args[0]->b_edev] : 0; /* avoid underflow */

    /* Total Number of bytes per device */
    @totbytes[interval, this->sn, this->p]    = sum(args[0]->b_bcount);
    /* Total nanoseconds of active time per device */
    @actv_tm[interval, this->sn, this->p]     = sum(this->elapsed);

    /* 
     * Instead of converting nanoseconds to seconds we multiply
     * the top by NSEC_PER_SEC and the divide by delta in nanoseconds.
     * In extreme observations, i.e. stalled IO, we may have huge
     * this->elapsed values, in which case result will be 0 KB/s, even
     * if there in fact was a non-zero value of bytes transferred.
     */
    this->b_nsec = args[0]->b_bcount * NSEC_PER_SEC;
    this->kb_per_sec = (this->b_nsec / this->elapsed) >> 10;

    /* Measure IO rate range in KB/s */
    @rangeKBps[interval, this->sn, this->p]     =
        max(maxratei[args[0]->b_edev] - minratei[args[0]->b_edev]);

    /* IO Rate histogram base10, limited at 1000000 KB/s | 976 MB/s */
    /* 1000KB/s <= */
    @ratelt1MBps[interval, this->sn, this->p]     = sum(this->kb_per_sec <= 0x3e8 ? 1 : 0);
    /* > 1000KB/s && 10,000KB/s <= */ 
    @rate10MBps[interval, this->sn, this->p]      = sum(this->kb_per_sec > 0x3e8 &&
                                            this->kb_per_sec <= 0x2710 ? 1 : 0);
    /* > 10,000KB/s && 100,000KB/s <= */ 
    @rate100MBps[interval, this->sn, this->p]     = sum(this->kb_per_sec > 0x2710 &&
                                            this->kb_per_sec <= 0x186a0 ? 1 : 0);
    /* > 100,000KB/s && 1,000,000 KB/s <= */ 
    @rate1GBps[interval, this->sn, this->p]       = sum(this->kb_per_sec > 0x186a0 &&
                                            this->kb_per_sec <= 0xf4240 ? 1 : 0);

    /*
     * Collect minimum and maximum observed rate for later measurement
     * of range for this metric.
     */
    minratei[args[0]->b_edev] = minratei[args[0]->b_edev] == 0 ?
        this->kb_per_sec : minratei[args[0]->b_edev] < this->kb_per_sec ?
        minratei[args[0]->b_edev] : this->kb_per_sec;

    maxratei[args[0]->b_edev] = maxratei[args[0]->b_edev] == 0 ?
        this->kb_per_sec : maxratei[args[0]->b_edev] > this->kb_per_sec ?
        maxratei[args[0]->b_edev] : this->kb_per_sec;
    
    /* Actual Kbytes/sec per device */
    @avKBps[interval, this->sn, this->p]  = avg(this->kb_per_sec);
    
    /* Average and Maximum Latency per device */
    @avtime[interval, this->sn, this->p]  = avg(this->elapsed);
    @maxtime[interval, this->sn, this->p] = max(this->elapsed);
    /*
     * Latency histogram with buckets:
     * >1000ms, >50 to 100ms, >25 to 50ms, >10 to 25ms, >1 to 10ms, 1ms<
     */
    @timegt1000ms[interval, this->sn, this->p] = sum(
        this->elapsed >= 1 * NSEC_PER_SEC ? 1 : 0);
    @time100ms[interval, this->sn, this->p] = sum(
        this->elapsed > 50 * NSEC_PER_MSEC &&
        this->elapsed <= 100 * NSEC_PER_MSEC ? 1 : 0);
    @time50ms[interval, this->sn, this->p] = sum(
        this->elapsed > 25 * NSEC_PER_MSEC &&
        this->elapsed <= 50 * NSEC_PER_MSEC ? 1 : 0);
    @time25ms[interval, this->sn, this->p] = sum(
        this->elapsed > 10 * NSEC_PER_MSEC &&
        this->elapsed <= 25 * NSEC_PER_MSEC ? 1 : 0);
    @time10ms[interval, this->sn, this->p] = sum(
        this->elapsed > 1 * NSEC_PER_MSEC &&
        this->elapsed <= 10 * NSEC_PER_MSEC ? 1 : 0);
    @timelt1ms[interval, this->sn, this->p] = sum(
        this->elapsed < 1 * NSEC_PER_MSEC ? 1 : 0);

    /*
     * Collect minimum and maximum io size for later measurement
     * of range for this metric.
     */
    miniosz[args[0]->b_edev] =
        args[0]->b_bcount < miniosz[args[0]->b_edev] ?
        args[0]->b_bcount : miniosz[args[0]->b_edev];
    maxiosz[args[0]->b_edev] =
        args[0]->b_bcount > maxiosz[args[0]->b_edev] ?
        args[0]->b_bcount : maxiosz[args[0]->b_edev];

    /* Measure IO size range in Kilobytes */
    @rangeiosz[interval, this->sn, this->p] = 
        max((maxiosz[args[0]->b_edev] - miniosz[args[0]->b_edev]) >> 10);

    /* 
     * Convert from bytes and nanoseconds to KB/s with 976562 to obtain
     * avg. effective throughput, and maximum effective throughput.
     * Maximum throughput is likely measuring cache effects.
     */
    @tput[interval, this->sn, this->p] = avg(this->x / this->elapsed);
    @maxtput[interval, this->sn, this->p] = max(this->x / this->elapsed);
    iocnt[args[0]->b_edev, R] += args[0]->b_flags & B_READ ? 1 : 0;
    iocnt[args[0]->b_edev, W] += args[0]->b_flags & B_WRITE ? 1 : 0;

    /* Count number of IOs by IO-type */
    @ctrd[interval, this->sn, this->p]        = sum(args[0]->b_flags & B_READ ? 1 : 0);
    @ctwr[interval, this->sn, this->p]        = sum(args[0]->b_flags & B_WRITE ? 1 : 0);
    @iops[interval, this->sn, this->p]        = 
        count(); /* Normalized to per second in tick-X probe */
    
    /* Maximum sampled IOPS per device */
    @maxiops[interval, this->sn, this->p]     = 
        max(iocnt[args[0]->b_edev, R] + iocnt[args[0]->b_edev, W]);

    /* 
     * Collect minimum and maximum latency for later measurement
     * of range for this metric.
     */
    minlati[args[0]->b_edev, R] =
        args[0]->b_flags & B_READ &&
        this->elapsed < minlati[args[0]->b_edev, R] ? this->elapsed :
        minlati[args[0]->b_edev, R];
    maxlati[args[0]->b_edev, R] =
        args[0]->b_flags & B_READ &&
        this->elapsed > maxlati[args[0]->b_edev, R] ? this->elapsed :
        maxlati[args[0]->b_edev, R];
    
    minlati[args[0]->b_edev, W] =
        (args[0]->b_flags & B_READ) == 0 &&
        this->elapsed < minlati[args[0]->b_edev, W] ? this->elapsed :
        minlati[args[0]->b_edev, W];
    maxlati[args[0]->b_edev, W] =
        (args[0]->b_flags & B_READ) == 0 &&
        this->elapsed > maxlati[args[0]->b_edev, W] ? this->elapsed :
        maxlati[args[0]->b_edev, W];
    
    /*
     * IOsize distribution not grouped by direction, i.e. no distinction
     * is made between reads and writes. IO buckets double in size from
     * previous bucket. i.e. 4, 8, 16, 32...
     */
     this->bs = args[0]->b_bcount ;
    /* 4K< */
    @iosztiny[interval, this->sn, this->p]    =
        sum(this->bs < 0x1000 ? 1 : 0);
    /* 4K to 8K< */
    @iosz4k[interval, this->sn, this->p]      =
        sum(this->bs >= 0x1000 && this->bs < 0x2000 ? 1 : 0);
    /* 8K to 16K< */
    @iosz8k[interval, this->sn, this->p]      =
        sum(this->bs >= 0x2000 && this->bs < 0x4000 ? 1 : 0);
    /* 16K to 32K< */
    @iosz16k[interval, this->sn, this->p]     =
        sum(this->bs >= 0x4000 && this->bs < 0x8000 ? 1 : 0);
    /* 32K to 64K< */
    @iosz32k[interval, this->sn, this->p]     =
        sum(this->bs >= 0x8000 && this->bs < 0x10000 ? 1 : 0);
    /* 64K to 128K< */
    @iosz64k[interval, this->sn, this->p]     =
        sum(this->bs >= 0x10000 && this->bs < 0x20000 ? 1 : 0);
    /* >128K */
    @ioszbig[interval, this->sn, this->p]     =
        sum(this->bs >= 0x20000 ? 1 : 0);

    /* Average IO size for given device */
    @aviosz[interval, this->sn, this->p]      = avg(this->bs);

    /* 
     * Each time we observe an error at completion through B_ERROR flag,
     * increment count of errors for given device. This should always
     * be zero, assuming healthy device.
     */
    @cterr[interval, this->sn, this->p] = sum(args[0]->b_flags & B_ERROR ? 1 : 0);
}

/* 
 * Entry controlled by timer. By design, each device will be registered
 * here about once per second when there is even a litte bit of IO.
 */
io:::done
/ival_timer[args[0]->b_edev] > 0 &&
timestamp >= ival_timer[args[0]->b_edev] + NSEC_PER_SEC/ {
    this->sn    = args[1]->dev_statname;
    this->p     = substr(args[1]->dev_pathname, 25, 16);
    this->p     = (strstr(this->p, "disk@") == 0 ||
                    strstr(this->p, "disk@") == "") ? this->p : "NA";

    /*
     * Measure operating latency range in uS for Reads and Writes,
     * storing largest observed difference.
     */
    @rangerlat[interval, this->sn, this->p]   =
        max((maxlati[args[0]->b_edev, R] -
            minlati[args[0]->b_edev, R]) / NSEC_PER_USEC);
    @rangewlat[interval, this->sn, this->p]   =
        max((maxlati[args[0]->b_edev, W] -
            minlati[args[0]->b_edev, W]) / NSEC_PER_USEC);

    @avpending[interval, this->sn, this->p]   = avg(pend[args[0]->b_edev]);
    @maxpending[interval, this->sn, this->p]  = max(pend[args[0]->b_edev]);

    /* Reset various counters for next measurement period */
    minlati[args[0]->b_edev, R] = 0;
    maxlati[args[0]->b_edev, R] = 0;
    minlati[args[0]->b_edev, W] = 0;
    maxlati[args[0]->b_edev, W] = 0;
    iocnt[args[0]->b_edev, R]   = 0;
    iocnt[args[0]->b_edev, W]   = 0;
    miniosz[args[0]->b_edev]    = 0;
    maxiosz[args[0]->b_edev]    = 0;
    minratei[args[0]->b_edev]   = 0;
    maxratei[args[0]->b_edev]   = 0;
    ival_timer[args[0]->b_edev]         = timestamp;
}

/* 
 * Count number of retries issued to a disk. These are a good
 * indicator of potentially failing, or borderline device.
 * Under normal circumstances we should not expect
 * this to be a positive value.
 */
::sd_set_retry_bp:entry
/xlate <devinfo_t *>(args[1])->dev_pathname != "<nfs>" &&
xlate <devinfo_t *>(args[1])->dev_pathname != "" / {
    this->sn    = xlate <devinfo_t *>(args[1])->dev_statname;
    this->p     = substr(xlate <devinfo_t *>(args[1])->dev_pathname, 25, 16);
    this->p     = (strstr(this->p, "disk@") == 0 ||
                    strstr(this->p, "disk@") == "") ? this->p : "NA";
    @ctretry[interval, this->sn, this->p] = count();
}

tick-10sec {
/* First time we enter this clause, ticks will be 0, so we just assume 10 seconds */
this->elapsed = ticks > 0 ? (timestamp - ticks) / NSEC_PER_SEC : 10 ;

/* Normalize Data for correct per second reporting of rates, like IOPS */
normalize(@actv_tm, NSEC_PER_MSEC); /* from nanoseconds to milliseconds */
normalize(@iops, this->elapsed);

printa("%ld,%s,%s,%@d,%@d,%@d,%@d,%@d,%@d,%@d,%@d,%@d,%@d,%@d,%@d,%@d,%@d,%@d,%@d,%@d,%@d,%@d,%@d,%@d,%@d,%@d,%@d,%@d,%@d,%@d,%@d,%@d,%@d,%@d,%@d,%@d,%@d,%@d,%@d,%@d\n",
    @actv_tm, @rangerlat, @rangewlat, @totbytes, @tput, @maxtput, 
    @ctrd, @ctwr, @aviosz, @rangeiosz, @iops, @maxiops, @avKBps, @rangeKBps,
    @ratelt1MBps, @rate10MBps, @rate100MBps, @rate1GBps, @avtime, @maxtime,
    @timegt1000ms, @time100ms, @time50ms, @time25ms, @time10ms, @timelt1ms,
    @iosztiny, @iosz4k, @iosz8k, @iosz16k, @iosz32k, @iosz64k, @ioszbig,
    @avpending, @maxpending, @cterr, @ctretry);
    trunc(@actv_tm); trunc(@rangerlat); trunc(@rangewlat); trunc(@totbytes);
    trunc(@tput); trunc(@maxtput); trunc(@ctrd); trunc(@ctwr); trunc(@aviosz);
    trunc(@rangeiosz); trunc(@iops); trunc(@maxiops); trunc(@avKBps); 
    trunc(@rangeKBps); trunc(@ratelt1MBps); trunc(@rate10MBps); trunc(@rate100MBps); trunc(@rate1GBps); trunc(@avtime); trunc(@maxtime);
    trunc(@iosztiny); trunc(@iosz4k); trunc(@iosz8k); trunc(@iosz16k); 
    trunc(@iosz32k); trunc(@iosz64k); trunc(@ioszbig);
    trunc(@avpending); trunc(@maxpending); trunc(@cterr); trunc(@ctretry);
    trunc(@timegt1000ms); trunc(@time100ms); trunc(@time50ms); trunc(@time25ms);
    trunc(@time10ms); trunc(@timelt1ms);
    ticks = timestamp ;
    interval = walltimestamp - (walltimestamp%1000000000);
}

d Dtrace IO错误计数脚本

io-retry-and-err-count-csv.d
#!/usr/sbin/dtrace -Cs
#pragma D option quiet

/*
 * CDDL HEADER START
 *
 * The contents of this file are subject to the terms of the
 * Common Development and Distribution License (the "License").
 * You may not use this file except in compliance with the License.
 *
 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
 * or http://www.opensolaris.org/os/licensing.
 * See the License for the specific language governing permissions
 * and limitations under the License.
 *
 * When distributing Covered Code, include this CDDL HEADER in each
 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
 * If applicable, add the following below this CDDL HEADER, with the
 * fields enclosed by brackets "[]" replaced with your own identifying
 * information: Portions Copyright [yyyy] [name of copyright owner]
 *
 * CDDL HEADER END
 *
 * Copyright (c) 2017 Sam Zaydel / RackTop Systems.
 *
 * io-retry-and-err-count-csv.d
 *
 * Description:
 * Script collects a count of IOs that resulted in an error or retry.
 * The rate of error is multiplied by `multiplier` and reported with a
 * e-6, but without actually doing floating point arithmetic, which
 * dtrace does not have support for.
 * Expectation here is that if device is experiencing an IO error each
 * time it issues an IO, we should see 1000000e-6 in the output, meaning
 * rate of error is 1.0.
 */
inline const int multiplier = 1000000;
unsigned long ioct[dev_t], errct[dev_t], timer[dev_t];

BEGIN {
    printf("sdname,mpxiowwn,ctretry,cterr,cteio,noxfer,rateerr\n");
}

::sd_set_retry_bp:entry
/ xlate <devinfo_t *>(args[1])->dev_pathname != "<nfs>" &&
xlate <devinfo_t *>(args[1])->dev_pathname != "" /
{
    this->sn    = xlate <devinfo_t *>(args[1])->dev_statname;
    this->xx    = xlate <devinfo_t *>(args[1])->dev_pathname;
    this->p     = substr(this->xx, 25, 16);
    this->p     = (strstr(this->p, "disk@") == 0 ||
                    strstr(this->p, "disk@") == "") ? toupper(this->p) : "NA";
    @ctretry[this->sn, this->p] = count();
}

io:::start
/ args[1]->dev_pathname != "<nfs>" && args[1]->dev_pathname != "" &&
timer[args[0]->b_edev] == 0/
{
    timer[args[0]->b_edev] = timestamp;
}

io:::done
/ args[1]->dev_pathname != "<nfs>" && args[1]->dev_pathname != "" /
{
    ioct[args[0]->b_edev]++;
    errct[args[0]->b_edev] += args[0]->b_flags & B_ERROR ? 1 : 0;
}

io:::done
/ args[1]->dev_pathname != "<nfs>" && args[1]->dev_pathname != "" &&
timer[args[0]->b_edev] != 0 &&
timestamp - 10000000000 > timer[args[0]->b_edev] /
{
    timer[args[0]->b_edev]  = timestamp;
    this->sn    = args[1]->dev_statname;
    this->p     = substr(args[1]->dev_pathname, 25, 16);
    this->p     = (strstr(this->p, "disk@") == 0 ||
                    strstr(this->p, "disk@") == "") ? toupper(this->p) : "NA";
    /* This is a hack to work around lack of floating-point support */
    this->rate  = (multiplier * errct[args[0]->b_edev]) / ioct[args[0]->b_edev];
    @maxrateerr[this->sn, this->p]  = max(this->rate);
    ioct[args[0]->b_edev]   = 0;
    errct[args[0]->b_edev]  = 0;

}

io:::done
/ args[1]->dev_pathname != "<nfs>" && args[1]->dev_pathname != "" &&
args[0]->b_flags & B_ERROR /
{
    this->sn    = args[1]->dev_statname;
    this->p     = substr(args[1]->dev_pathname, 25, 16);
    this->p     = (strstr(this->p, "disk@") == 0 ||
                    strstr(this->p, "disk@") == "") ? toupper(this->p) : "NA";

    /* Any difference between cterr and cteio means not all errors are EIO. */
    @cterr[this->sn, this->p]   = sum(args[0]->b_flags & B_ERROR ? 1 : 0);
    @cteio[this->sn, this->p]   = sum(args[0]->b_error == EIO ? 1 : 0);
    @noxfer[this->sn, this->p]  = sum(args[0]->b_resid);
}

tick-1min
{
    printa("%s,%s,%@d,%@d,%@d,%@d,%@de-6\n",
        @ctretry, @cterr, @cteio, @noxfer, @maxrateerr);
	trunc(@ctretry); trunc(@cterr); trunc(@cteio); 
    trunc(@noxfer); trunc(@maxrateerr);
}

d 观察sqlite的Dtrace片段由BrickstorOS bsrapid服务和Golang标准库打开

golang-db-extra-open.d
dtrace -qn '
pid$target::*NewSqliteDAL*:entry {
    self->name = probefunc ;
}
pid$target::*database*:entry {
    self->name2 = probefunc ;
}
pid$target::sqlite3_open_v2:entry /self->name != ""/ {
    @a[self->name, copyinstr(arg0)] = count();
}
pid$target::sqlite3_open_v2:entry /self->name2 != ""/ {
    @b[self->name2, copyinstr(arg0)] = count();
}
END {
    printa("caller: %s path: %s count: %@d\n", @a);
    printa("caller: %s path: %s count: %@d\n", @b);
}
' -c bsrapid