d combination.d
combination.d
void main(){
import std.stdio, std.algorithm;
const long mod=998244353;
const int M=1_000_00;
auto fact=new long[](M);
fact[0]=fact[1]=1;
foreach(i; 2..M) fact[i]=i*fact[i-1]%mod;
auto inv_fact=new long[](M);
long powmod(long a, long x){
if(x==0) return 1;
else if(x==1) return a;
else if(x&1) return a*powmod(a, x-1)%mod;
else return powmod(a*a%mod, x/2);
}
foreach(i; 0..M) inv_fact[i]=powmod(fact[i], mod-2);
long comb(long nn, long rr){
if(nn<rr) return 0L;
long ret=fact[nn]%mod;
(ret*=inv_fact[rr])%=mod;
(ret*=inv_fact[nn-rr])%=mod;
return ret;
}
assert(comb(4, 0)==1);
assert(comb(4, 2)==6);
assert(comb(4, 4)==1);
assert(comb(4, 5)==0);
}
/*
https://beta.atcoder.jp/contests/agc025/submissions/2611132
*/
d Dtrace片段用于进程内存分配测量
按大小,花费的时间等测量内存分配事件。
vmem-sbrk-change.d
// Example output:
// 1921 libumem.so.1`vmem_xalloc
// value ------------- Distribution ------------- count
// 268435456 | 0
// 536870912 |@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ 1
// 1073741824 | 0
//
// 2300 libumem.so.1`vmem_xalloc
// value ------------- Distribution ------------- count
// 536870912 | 0
// 1073741824 |@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ 1
// 2147483648 | 0
//
dtrace -qn '
BEGIN {start = timestamp}
pid$target::*vmem_sbrk_alloc*:entry /arg1 > 2<<27/ {
@[(timestamp-start)/1000000, ufunc(ucaller), ustack()] = quantize(arg1);
}' -c '/path/to/process'
d Dtrace ARC访问类型代码段
用于识别进入ARC的请求类型的片段,即Demand v。预取访问。
arc-dem-or-pref.d
dtrace -qn '
BEGIN {
printf("timestamp,pool,type,count\n");
ts = walltimestamp - (walltimestamp % 1000000000) ;
}
::arc_read:entry {
this->checkPref = (*args[7] & (1<<5));
this->pd = this->checkPref == 0x20 ? "Prefetch" : "Demand";
this->spa = args[1];
@c[ ts, this->spa->spa_name != NULL ?
this->spa->spa_name : "NA", this->pd ] = count();
}
tick-5sec {
printa("%d,%s,%s,%@d\n", @c); trunc(@c);
ts = walltimestamp - (walltimestamp % 1000000000) ;
}'
d 用于监视sd驱动程序sd_ready_and_valid函数的Dtrace脚本
在某些情况下,驱动器发生故障,但仍然看似在线,它报告它没有准备好。这是呼叫者为确保设备可用而进行的检查。这里的非零结果意味着给定设备存在问题。
sd-ready-valid-csv.d
#!/usr/sbin/dtrace -Cs
#pragma D option quiet
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*
* Copyright (c) 2018 Sam Zaydel / RackTop Systems.
*
* sd-ready-valid-csv.d
*
* Description:
* Script tracks return code from sd_ready_and_valid function, which tells
* the caller (sdopen or sdioctl) whether a given drive is usable.
* When a device fails this test a non-zero value is returned and depending
* on state of drive we should see messages in the kernel log along the lines
* of `drive offline`.
*/
#define SD_TO_DEVINFO(un) ((struct dev_info *)((un)->un_sd->sd_dev))
#define DEV_NAME(un) \
stringof(`devnamesp[SD_TO_DEVINFO(un)->devi_major].dn_name) /* ` */
#define DEV_INST(un) (SD_TO_DEVINFO(un)->devi_instance)
::sd_ready_and_valid:entry {
self->un = args[0]->ssc_un;
}
::sd_ready_and_valid:return /self->un/ {
@[stringof(SD_TO_DEVINFO(self->un)->devi_devid_str),
DEV_INST(self->un), args[1]] = count();
self->un = NULL;
}
END {
printf("device,instance,retcode,count\n");
printa("%s,sd%d,%d,%@d\n", @);
}
d httf2018qual.d
httf2018qual.d
import std.stdio, std.string, std.conv, std.algorithm;
import std.exception, std.random, std.typecons, std.math;
import std.datetime;
auto sw=StopWatch(AutoStart.no);
class Problem{
const int N=100;
const int Q=1000;
const int TL=6000; // ms
int[N][N] a, b;
alias Query=Tuple!(int, "x", int, "y", int, "h");
Query[Q] ans;
int bestScore=0, tmpScore=0, itr=0;
auto rnd=Random(0);
void stdInput(){
foreach(i; 0..N)
a[i]=readln.split.to!(int[]);
}
void fileInput(){
auto data=File("in.txt").byLine.map!split;
for(int i=0; i<N; i++, data.popFront){
a[i]=data.front.to!(int[]);
}
enforce(data.empty);
}
void solve(){
init();
while(sw.peek.msecs<(TL-50)){
itr++;
auto q=uniform(0, Q, rnd),
x=uniform(0, N, rnd),
y=uniform(0, N, rnd),
h=uniform!"[]"(1, N, rnd);
add(ans[q].y, ans[q].x, ans[q].h, -1);
add(y, x, h);
tmpScore=calc();
if(tmpScore>bestScore){
ans[q]=Query(x, y, h);
bestScore=tmpScore;
}else{
add(y, x, h, -1);
add(ans[q].y, ans[q].x, ans[q].h);
}
}
}
void init(){
foreach(i; 0..Q){
ans[i].x=uniform(0, N, rnd);
ans[i].y=uniform(0, N, rnd);
ans[i].h=uniform!"[]"(1, N, rnd);
}
foreach(i; 0..N)foreach(j; 0..N) b[i][j]=0;
foreach(e; ans) add(e.y, e.x, e.h);
bestScore=calc();
}
void add(int y, int x, int h, int sign=1){
b[y][x]+=h*sign;
foreach(z; 1..h){
for(int dy=-z, dx=z-dy.abs; dy<=z; dy++, dx=z-dy.abs){
if(ok(y+dy, x+dx)) b[y+dy][x+dx]+=(h-z)*sign;
if(ok(y+dy, x-dx) && dx!=0) b[y+dy][x-dx]+=(h-z)*sign;
}
}
}
bool ok(int y, int x){
return (0<=y && y<N && 0<=x && x<N);
}
int calc(){
auto score=2e8.to!(int);
foreach(i; 0..N)foreach(j; 0..N)
score-=(a[i][j]-b[i][j]).abs;
return score;
}
void stdOutput(){
writeln(Q);
foreach(l; ans){
writeln(l.x, " ", l.y, " ", l.h);
}
}
void fileOutput(){
auto f=File("out.txt", "w");
f.writeln(Q);
foreach(l; ans){
f.writeln(l.x, " ", l.y, " ", l.h);
}
}
void show(){
stderr.writeln("itr = ", itr);
stderr.writeln("score = ", bestScore);
}
}
void main(){
sw.start;
auto p=new Problem;
if(true){
p.stdInput;
p.solve;
p.stdOutput;
}else{
p.fileInput;
p.solve;
p.fileOutput;
}
sw.stop;
p.show;
}
d 用于观察内核内存收获的Dtrace脚本
收集内核收割活动。
kmem-reap-details.d
#!/usr/sbin/dtrace -s
#pragma D option quiet
#pragma D option destructive
arc_kmem_reap_now:entry
{
printf("%d ts=%d freemem=%d -> arc_kmem_reap_now\n",
walltimestamp / 1000000000, timestamp, `freemem);
}
arc_kmem_reap_now:return
{
printf("%d ts=%d freemem=%d <- arc_kmem_reap_now\n",
walltimestamp / 1000000000, timestamp, `freemem);
}
kmem_cache_reap:entry
{
self->cp = args[0];
printf("%d freemem=%d -> cache reap %s\n", walltimestamp / 1000000000,
`freemem, stringof(args[0]->cache_name));
}
kmem_cache_reap:return
/self->cp != NULL/
{
printf("%d freemem=%d <- cache reap %s\n", walltimestamp / 1000000000,
`freemem, stringof(self->cp->cache_name));
self->cp = NULL;
}
kmem_depot_ws_reap:entry
{
self->depot = args[0];
printf("%d freemem=%d -> mag reap %s\n", walltimestamp / 1000000000,
`freemem, stringof(args[0]->cache_name));
}
kmem_depot_ws_reap:entry
/self->depot != NULL/
{
printf("%d freemem=%d <- mag reap %s\n", walltimestamp / 1000000000,
`freemem, stringof(self->depot->cache_name));
self->depot = NULL
}
htable_steal_active.isra.2:entry
{
steals++;
}
profile-1hz
/arg0 != 0 && curthread == `kmem_taskq->tq_thr._tq_thread/
{
printf("%d kmem_taskq cpu=%d pc=%a", walltimestamp / 1000000000, cpu, arg0);
stack(60);
}
tick-1sec
{
printf("%d freemem=%d needfree=%d steals=%d arc_size=%d arc_c=%d waiters=%d load=%d tq=%d\n",
walltimestamp / 1000000000,
`freemem, `needfree, steals,
`arc_stats.arcstat_size.value.ui64,
`arc_stats.arcstat_c.value.ui64,
((condvar_impl_t *)&`arc_reclaim_waiters_cv)->cv_waiters,
`loadavg.lg_total,
`kmem_taskq->tq_tasks - `kmem_taskq->tq_executed);
}
d Dtrace ARC Snippets用于记忆缺失和回收观察
arc-kmem-reap-time-pagecnt.d
#!/usr/sbin/dtrace -qCs
/*
* Measure how much time is spent in arc_kmem_reap_now function.
* There are known issues with spending a long time in this call.
* We want to know how much time was spent and how much was freed.
*/
BEGIN {
printf("timestamp,callDuration,pagesFree,pagesReclaimed\n");
}
::arc_kmem_reap_now:entry {
self->in=timestamp;
this->free1 = `freemem;
}
::arc_kmem_reap_now:return /self->in/ {
this->free2 = `freemem;
/* How much we freed since entry into this function, could be negative! */
this->freed = this->free2 - this->free1;
self->in = 0;
printf("%ld,%u,%lu,%ld\n", walltimestamp,
(timestamp-self->in)/1000, this->free1, (long)this->freed);
}
arc-avail-min-max.d
#!/usr/sbin/dtrace -qCs
/*
* Snippet collects minimum and maximum available bytes to ARC.
* Measurement is coming from a periodic call to arc_available_memory,
* which returns a signed value, where anything below zero signals
* memory shortfall, and will result in reclaim activity.
*/
inline const char MIN = 0;
inline const char MAX = 1;
int shortfall; int x; char reclaim; long l[char];
BEGIN { printf("timestamp,minAvail,maxAvail,reclaim,shortCnt\n"); }
::arc_available_memory:return /!x/ {
l[MIN] = l[MAX] = args[1] ; x++ ;
reclaim = 0 ; shortfall = 0 ;
}
::arc_available_memory:return /x/ { /* Record minimum and maximum values */
l[MIN] = args[1] < l[MIN] ? args[1] : l[MIN] ;
l[MAX] = args[1] > l[MAX] ? args[1] : l[MAX] ;
reclaim = reclaim == 1 ? 1 : l[MIN] < 0 ? 1 : 0 ; /* reclaim needed? */
shortfall += args[1] < 0 ? 1 : 0 ;
}
tick-5sec {
printf("%ld,%ld,%ld,%d,%d\n",
walltimestamp, l[MIN], l[MAX], reclaim, shortfall) ;
x = 0 ;
}
arc-avail-min-max-1liner.d
dtrace -qn '
/*
* Snippet collects minimum and maximum available bytes to ARC.
* Measurement is coming from a periodic call to arc_available_memory,
* which returns a signed value, where anything below zero signals
* memory shortfall, and will result in reclaim activity.
*/
inline const char MIN = 0;
inline const char MAX = 1;
int shortfall; int x; char reclaim; long l[char];
BEGIN { printf("timestamp,minAvail,maxAvail,reclaim\n"); }
::arc_available_memory:return /!x/ {
l[MIN] = l[MAX] = args[1] ; x++ ;
reclaim = 0 ; shortfall = 0 ;
}
::arc_available_memory:return /x/ { /* Record minimum and maximum values */
l[MIN] = args[1] < l[MIN] ? args[1] : l[MIN] ;
l[MAX] = args[1] > l[MAX] ? args[1] : l[MAX] ;
reclaim = reclaim == 1 ? 1 : l[MIN] < 0 ? 1 : 0 ; /* reclaim needed? */
shortfall += args[1] < 0 ? 1 : 0 ;
}
tick-5sec {
printf("%ld,%ld,%ld,%d,%d\n",
walltimestamp, l[MIN], l[MAX], reclaim, shortfall) ;
x = 0 ;
}'
d 用于在BrickstorOS上获取详细IO指标的Dtrace脚本
利用io ::: start,io :::完成探测以收集较低级别的IO指标。进行各种测量,包括延迟范围,处理的字节数,吞吐量,各种速率,IO类型的IO计数等。
bw-tput-iops-actv-time-hist-with-ts-csv.d
#!/usr/sbin/dtrace -Cs
#pragma D option quiet
#pragma D option dynvarsize=16M
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*
* Copyright (c) 2017 Sam Zaydel / RackTop Systems.
*
* bw-tput-iops-actv-time-hist-with-ts-csv.d
* This script is meant to be a more informative replacement for the
* iostat utility. It measures some features that iostat in my opinion
* is not useful for. For example, iostat does not offer distributions
* at all, whereas here we plot distributions of IO rates as observed
* for each disk. Also, iostat does not offer operating ranges for
* measurements, which here we define as max - min of some measurement.
*
* Currently, script is limited to scsi_vhci devices, because they are
* ones we normally use for actual data storage. Anything else is either
* a locally attached device, or something not used by system, i.e.
* USB storage media, etc.
*/
unsigned long minlati[dev_t, int], maxlati[dev_t, int]; /* Latency min and max */
unsigned long minratei[dev_t], maxratei[dev_t]; /* IO Rate min and max */
unsigned long miniosz[dev_t], maxiosz[dev_t]; /* IO size min and max */
unsigned long iocnt[dev_t, int];
int pend[dev_t];
hrtime_t start[dev_t, uint64_t];
hrtime_t ival_timer[dev_t];
hrtime_t ticks;
inline const int NSEC_PER_SEC = 1000000000;
inline const int NSEC_PER_MSEC = 1000000;
inline const int NSEC_PER_USEC = 1000;
inline const int SPA_MAXBLOCKSIZE = 128 << 10;
inline const int R = 0;
inline const int W = 1;
/*
* These are the Output Parameter definitions for collected metrics
*
* sdname == Name of device, i.e. sd0
* mpxiowwn == mpxio device, like: 5000cca24500f698
* actvtm == amount of real, busy time spent processing IO
* rangerlat, rangewlat == latency range: max - min for Reads and Writes
* totbytes == total number of bytes Read and Written during sample interval
* tput,maxtput == mean and maximum or burst throughput
* ctrd,ctwr == Count of Reads and Writes over interval set in tick-Xsec clause
* aviosz,rangeiosz == IO size mean and range: max - min
* iops,maxiops == normalized IOPS mean and maximum
* avKBps,rangeKBps == normalized IO rate and IO rate range: max - min
* // Histogram of IO Rate distribution with 4 buckets //
* [ ratelt1MBps ] ==> 1 second interval with < 1,000KB/s bw
* [ rate10MBps ] ==> 1 second interval with < 10,000KB/s bw
* [ rate100MBps ] ==> 1 second interval with < 100,000KB/s bw
* [ rate1GBps ] ==> 1 second interval with < 1,000,000KB/s bw
* avtime,maxtime == average and maximum IO completion latency
* // Histogram of latency distribution with 6 buckets //
* [ timegt1000ms ] ==> >1 second (SLOW!)
* [ time100ms ] ==> >50ms && 100ms<= (not acceptable)
* [ time50ms ] ==> >25ms && 50ms<= (acceptable)
* [ time25ms ] ==> >10ms && 25ms<= (ok)
* [ time10ms ] ==> >1ms && 10ms<= (good)
* [ timelt1ms ] ==> 1ms< (probably cache)
* // Histogram of IO size distribution with 7 buckets //
* [ iosztiny ] ==> 4K<
* [ iosz4k ] ==> between 4K and 8K
* [ iosz8k ] ==> between 8K and 16K
* [ iosz16k ] ==> between 16K and 32K
* [ iosz32k ] ==> between 32K and 64K
* [ iosz64k ] ==> between 64K and 128K
* [ ioszbig ] ==> anything 128K or above
* avpending == Average measured IOs in processing per sample interval
* maxpending == Maximum measured IOs in processing per sample interval
* cterr == Counter tracking number of errors per sample interval
* ctretry == Counter tracking number of retries per sample interval
*/
BEGIN {
printf("timestamp,sdname,mpxiowwn,actvtm,rangerlat,rangewlat,totbytes,tput,maxtput,ctrd,ctwr,aviosz,rangeiosz,iops,maxiops,avKBps,rangeKBps,ratelt1MBps,rate10MBps,rate100MBps,rate1GBps,avtime,maxtime,timegt1000ms,time100ms,time50ms,time25ms,time10ms,timelt1ms,iosztiny,iosz4k,iosz8k,iosz16k,iosz32k,iosz64k,ioszbig,avpending,maxpending,cterr,ctretry\n");
interval = walltimestamp - (walltimestamp%1000000000);
}
/*
* Set interval timer initial value once for a given device.
* Each subsequent update will happen roughly once per second,
* or if no IO, whenever there is some IO that triggers io:::done
* probe, at which point we determine that timer expired.
*/
io:::start
/ival_timer[args[0]->b_edev] == 0/ {
ival_timer[args[0]->b_edev] = timestamp;
}
io:::start {
start[args[0]->b_edev, args[0]->b_blkno] = timestamp;
/* Increment pending IOs by 1 */
pend[args[0]->b_edev]++;
}
io:::done
/start[args[0]->b_edev, args[0]->b_blkno]/ {
this->sn = args[1]->dev_statname;
this->p = substr(args[1]->dev_pathname, 25, 16);
this->p = (strstr(this->p, "disk@") == 0 ||
strstr(this->p, "disk@") == "") ? this->p : "NA";
this->x = args[0]->b_bcount * 976562;
this->elapsed = timestamp - start[args[0]->b_edev, args[0]->b_blkno];
start[args[0]->b_edev, args[0]->b_blkno] = 0;
/* Decrement pending IOs by 1, set to 0 if value < 0 */
pend[args[0]->b_edev]--;
pend[args[0]->b_edev] =
pend[args[0]->b_edev] > 0 ?
pend[args[0]->b_edev] : 0; /* avoid underflow */
/* Total Number of bytes per device */
@totbytes[interval, this->sn, this->p] = sum(args[0]->b_bcount);
/* Total nanoseconds of active time per device */
@actv_tm[interval, this->sn, this->p] = sum(this->elapsed);
/*
* Instead of converting nanoseconds to seconds we multiply
* the top by NSEC_PER_SEC and the divide by delta in nanoseconds.
* In extreme observations, i.e. stalled IO, we may have huge
* this->elapsed values, in which case result will be 0 KB/s, even
* if there in fact was a non-zero value of bytes transferred.
*/
this->b_nsec = args[0]->b_bcount * NSEC_PER_SEC;
this->kb_per_sec = (this->b_nsec / this->elapsed) >> 10;
/* Measure IO rate range in KB/s */
@rangeKBps[interval, this->sn, this->p] =
max(maxratei[args[0]->b_edev] - minratei[args[0]->b_edev]);
/* IO Rate histogram base10, limited at 1000000 KB/s | 976 MB/s */
/* 1000KB/s <= */
@ratelt1MBps[interval, this->sn, this->p] = sum(this->kb_per_sec <= 0x3e8 ? 1 : 0);
/* > 1000KB/s && 10,000KB/s <= */
@rate10MBps[interval, this->sn, this->p] = sum(this->kb_per_sec > 0x3e8 &&
this->kb_per_sec <= 0x2710 ? 1 : 0);
/* > 10,000KB/s && 100,000KB/s <= */
@rate100MBps[interval, this->sn, this->p] = sum(this->kb_per_sec > 0x2710 &&
this->kb_per_sec <= 0x186a0 ? 1 : 0);
/* > 100,000KB/s && 1,000,000 KB/s <= */
@rate1GBps[interval, this->sn, this->p] = sum(this->kb_per_sec > 0x186a0 &&
this->kb_per_sec <= 0xf4240 ? 1 : 0);
/*
* Collect minimum and maximum observed rate for later measurement
* of range for this metric.
*/
minratei[args[0]->b_edev] = minratei[args[0]->b_edev] == 0 ?
this->kb_per_sec : minratei[args[0]->b_edev] < this->kb_per_sec ?
minratei[args[0]->b_edev] : this->kb_per_sec;
maxratei[args[0]->b_edev] = maxratei[args[0]->b_edev] == 0 ?
this->kb_per_sec : maxratei[args[0]->b_edev] > this->kb_per_sec ?
maxratei[args[0]->b_edev] : this->kb_per_sec;
/* Actual Kbytes/sec per device */
@avKBps[interval, this->sn, this->p] = avg(this->kb_per_sec);
/* Average and Maximum Latency per device */
@avtime[interval, this->sn, this->p] = avg(this->elapsed);
@maxtime[interval, this->sn, this->p] = max(this->elapsed);
/*
* Latency histogram with buckets:
* >1000ms, >50 to 100ms, >25 to 50ms, >10 to 25ms, >1 to 10ms, 1ms<
*/
@timegt1000ms[interval, this->sn, this->p] = sum(
this->elapsed >= 1 * NSEC_PER_SEC ? 1 : 0);
@time100ms[interval, this->sn, this->p] = sum(
this->elapsed > 50 * NSEC_PER_MSEC &&
this->elapsed <= 100 * NSEC_PER_MSEC ? 1 : 0);
@time50ms[interval, this->sn, this->p] = sum(
this->elapsed > 25 * NSEC_PER_MSEC &&
this->elapsed <= 50 * NSEC_PER_MSEC ? 1 : 0);
@time25ms[interval, this->sn, this->p] = sum(
this->elapsed > 10 * NSEC_PER_MSEC &&
this->elapsed <= 25 * NSEC_PER_MSEC ? 1 : 0);
@time10ms[interval, this->sn, this->p] = sum(
this->elapsed > 1 * NSEC_PER_MSEC &&
this->elapsed <= 10 * NSEC_PER_MSEC ? 1 : 0);
@timelt1ms[interval, this->sn, this->p] = sum(
this->elapsed < 1 * NSEC_PER_MSEC ? 1 : 0);
/*
* Collect minimum and maximum io size for later measurement
* of range for this metric.
*/
miniosz[args[0]->b_edev] =
args[0]->b_bcount < miniosz[args[0]->b_edev] ?
args[0]->b_bcount : miniosz[args[0]->b_edev];
maxiosz[args[0]->b_edev] =
args[0]->b_bcount > maxiosz[args[0]->b_edev] ?
args[0]->b_bcount : maxiosz[args[0]->b_edev];
/* Measure IO size range in Kilobytes */
@rangeiosz[interval, this->sn, this->p] =
max((maxiosz[args[0]->b_edev] - miniosz[args[0]->b_edev]) >> 10);
/*
* Convert from bytes and nanoseconds to KB/s with 976562 to obtain
* avg. effective throughput, and maximum effective throughput.
* Maximum throughput is likely measuring cache effects.
*/
@tput[interval, this->sn, this->p] = avg(this->x / this->elapsed);
@maxtput[interval, this->sn, this->p] = max(this->x / this->elapsed);
iocnt[args[0]->b_edev, R] += args[0]->b_flags & B_READ ? 1 : 0;
iocnt[args[0]->b_edev, W] += args[0]->b_flags & B_WRITE ? 1 : 0;
/* Count number of IOs by IO-type */
@ctrd[interval, this->sn, this->p] = sum(args[0]->b_flags & B_READ ? 1 : 0);
@ctwr[interval, this->sn, this->p] = sum(args[0]->b_flags & B_WRITE ? 1 : 0);
@iops[interval, this->sn, this->p] =
count(); /* Normalized to per second in tick-X probe */
/* Maximum sampled IOPS per device */
@maxiops[interval, this->sn, this->p] =
max(iocnt[args[0]->b_edev, R] + iocnt[args[0]->b_edev, W]);
/*
* Collect minimum and maximum latency for later measurement
* of range for this metric.
*/
minlati[args[0]->b_edev, R] =
args[0]->b_flags & B_READ &&
this->elapsed < minlati[args[0]->b_edev, R] ? this->elapsed :
minlati[args[0]->b_edev, R];
maxlati[args[0]->b_edev, R] =
args[0]->b_flags & B_READ &&
this->elapsed > maxlati[args[0]->b_edev, R] ? this->elapsed :
maxlati[args[0]->b_edev, R];
minlati[args[0]->b_edev, W] =
(args[0]->b_flags & B_READ) == 0 &&
this->elapsed < minlati[args[0]->b_edev, W] ? this->elapsed :
minlati[args[0]->b_edev, W];
maxlati[args[0]->b_edev, W] =
(args[0]->b_flags & B_READ) == 0 &&
this->elapsed > maxlati[args[0]->b_edev, W] ? this->elapsed :
maxlati[args[0]->b_edev, W];
/*
* IOsize distribution not grouped by direction, i.e. no distinction
* is made between reads and writes. IO buckets double in size from
* previous bucket. i.e. 4, 8, 16, 32...
*/
this->bs = args[0]->b_bcount ;
/* 4K< */
@iosztiny[interval, this->sn, this->p] =
sum(this->bs < 0x1000 ? 1 : 0);
/* 4K to 8K< */
@iosz4k[interval, this->sn, this->p] =
sum(this->bs >= 0x1000 && this->bs < 0x2000 ? 1 : 0);
/* 8K to 16K< */
@iosz8k[interval, this->sn, this->p] =
sum(this->bs >= 0x2000 && this->bs < 0x4000 ? 1 : 0);
/* 16K to 32K< */
@iosz16k[interval, this->sn, this->p] =
sum(this->bs >= 0x4000 && this->bs < 0x8000 ? 1 : 0);
/* 32K to 64K< */
@iosz32k[interval, this->sn, this->p] =
sum(this->bs >= 0x8000 && this->bs < 0x10000 ? 1 : 0);
/* 64K to 128K< */
@iosz64k[interval, this->sn, this->p] =
sum(this->bs >= 0x10000 && this->bs < 0x20000 ? 1 : 0);
/* >128K */
@ioszbig[interval, this->sn, this->p] =
sum(this->bs >= 0x20000 ? 1 : 0);
/* Average IO size for given device */
@aviosz[interval, this->sn, this->p] = avg(this->bs);
/*
* Each time we observe an error at completion through B_ERROR flag,
* increment count of errors for given device. This should always
* be zero, assuming healthy device.
*/
@cterr[interval, this->sn, this->p] = sum(args[0]->b_flags & B_ERROR ? 1 : 0);
}
/*
* Entry controlled by timer. By design, each device will be registered
* here about once per second when there is even a litte bit of IO.
*/
io:::done
/ival_timer[args[0]->b_edev] > 0 &&
timestamp >= ival_timer[args[0]->b_edev] + NSEC_PER_SEC/ {
this->sn = args[1]->dev_statname;
this->p = substr(args[1]->dev_pathname, 25, 16);
this->p = (strstr(this->p, "disk@") == 0 ||
strstr(this->p, "disk@") == "") ? this->p : "NA";
/*
* Measure operating latency range in uS for Reads and Writes,
* storing largest observed difference.
*/
@rangerlat[interval, this->sn, this->p] =
max((maxlati[args[0]->b_edev, R] -
minlati[args[0]->b_edev, R]) / NSEC_PER_USEC);
@rangewlat[interval, this->sn, this->p] =
max((maxlati[args[0]->b_edev, W] -
minlati[args[0]->b_edev, W]) / NSEC_PER_USEC);
@avpending[interval, this->sn, this->p] = avg(pend[args[0]->b_edev]);
@maxpending[interval, this->sn, this->p] = max(pend[args[0]->b_edev]);
/* Reset various counters for next measurement period */
minlati[args[0]->b_edev, R] = 0;
maxlati[args[0]->b_edev, R] = 0;
minlati[args[0]->b_edev, W] = 0;
maxlati[args[0]->b_edev, W] = 0;
iocnt[args[0]->b_edev, R] = 0;
iocnt[args[0]->b_edev, W] = 0;
miniosz[args[0]->b_edev] = 0;
maxiosz[args[0]->b_edev] = 0;
minratei[args[0]->b_edev] = 0;
maxratei[args[0]->b_edev] = 0;
ival_timer[args[0]->b_edev] = timestamp;
}
/*
* Count number of retries issued to a disk. These are a good
* indicator of potentially failing, or borderline device.
* Under normal circumstances we should not expect
* this to be a positive value.
*/
::sd_set_retry_bp:entry
/xlate <devinfo_t *>(args[1])->dev_pathname != "<nfs>" &&
xlate <devinfo_t *>(args[1])->dev_pathname != "" / {
this->sn = xlate <devinfo_t *>(args[1])->dev_statname;
this->p = substr(xlate <devinfo_t *>(args[1])->dev_pathname, 25, 16);
this->p = (strstr(this->p, "disk@") == 0 ||
strstr(this->p, "disk@") == "") ? this->p : "NA";
@ctretry[interval, this->sn, this->p] = count();
}
tick-10sec {
/* First time we enter this clause, ticks will be 0, so we just assume 10 seconds */
this->elapsed = ticks > 0 ? (timestamp - ticks) / NSEC_PER_SEC : 10 ;
/* Normalize Data for correct per second reporting of rates, like IOPS */
normalize(@actv_tm, NSEC_PER_MSEC); /* from nanoseconds to milliseconds */
normalize(@iops, this->elapsed);
printa("%ld,%s,%s,%@d,%@d,%@d,%@d,%@d,%@d,%@d,%@d,%@d,%@d,%@d,%@d,%@d,%@d,%@d,%@d,%@d,%@d,%@d,%@d,%@d,%@d,%@d,%@d,%@d,%@d,%@d,%@d,%@d,%@d,%@d,%@d,%@d,%@d,%@d,%@d,%@d\n",
@actv_tm, @rangerlat, @rangewlat, @totbytes, @tput, @maxtput,
@ctrd, @ctwr, @aviosz, @rangeiosz, @iops, @maxiops, @avKBps, @rangeKBps,
@ratelt1MBps, @rate10MBps, @rate100MBps, @rate1GBps, @avtime, @maxtime,
@timegt1000ms, @time100ms, @time50ms, @time25ms, @time10ms, @timelt1ms,
@iosztiny, @iosz4k, @iosz8k, @iosz16k, @iosz32k, @iosz64k, @ioszbig,
@avpending, @maxpending, @cterr, @ctretry);
trunc(@actv_tm); trunc(@rangerlat); trunc(@rangewlat); trunc(@totbytes);
trunc(@tput); trunc(@maxtput); trunc(@ctrd); trunc(@ctwr); trunc(@aviosz);
trunc(@rangeiosz); trunc(@iops); trunc(@maxiops); trunc(@avKBps);
trunc(@rangeKBps); trunc(@ratelt1MBps); trunc(@rate10MBps); trunc(@rate100MBps); trunc(@rate1GBps); trunc(@avtime); trunc(@maxtime);
trunc(@iosztiny); trunc(@iosz4k); trunc(@iosz8k); trunc(@iosz16k);
trunc(@iosz32k); trunc(@iosz64k); trunc(@ioszbig);
trunc(@avpending); trunc(@maxpending); trunc(@cterr); trunc(@ctretry);
trunc(@timegt1000ms); trunc(@time100ms); trunc(@time50ms); trunc(@time25ms);
trunc(@time10ms); trunc(@timelt1ms);
ticks = timestamp ;
interval = walltimestamp - (walltimestamp%1000000000);
}
d Dtrace IO错误计数脚本
io-retry-and-err-count-csv.d
#!/usr/sbin/dtrace -Cs
#pragma D option quiet
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*
* Copyright (c) 2017 Sam Zaydel / RackTop Systems.
*
* io-retry-and-err-count-csv.d
*
* Description:
* Script collects a count of IOs that resulted in an error or retry.
* The rate of error is multiplied by `multiplier` and reported with a
* e-6, but without actually doing floating point arithmetic, which
* dtrace does not have support for.
* Expectation here is that if device is experiencing an IO error each
* time it issues an IO, we should see 1000000e-6 in the output, meaning
* rate of error is 1.0.
*/
inline const int multiplier = 1000000;
unsigned long ioct[dev_t], errct[dev_t], timer[dev_t];
BEGIN {
printf("sdname,mpxiowwn,ctretry,cterr,cteio,noxfer,rateerr\n");
}
::sd_set_retry_bp:entry
/ xlate <devinfo_t *>(args[1])->dev_pathname != "<nfs>" &&
xlate <devinfo_t *>(args[1])->dev_pathname != "" /
{
this->sn = xlate <devinfo_t *>(args[1])->dev_statname;
this->xx = xlate <devinfo_t *>(args[1])->dev_pathname;
this->p = substr(this->xx, 25, 16);
this->p = (strstr(this->p, "disk@") == 0 ||
strstr(this->p, "disk@") == "") ? toupper(this->p) : "NA";
@ctretry[this->sn, this->p] = count();
}
io:::start
/ args[1]->dev_pathname != "<nfs>" && args[1]->dev_pathname != "" &&
timer[args[0]->b_edev] == 0/
{
timer[args[0]->b_edev] = timestamp;
}
io:::done
/ args[1]->dev_pathname != "<nfs>" && args[1]->dev_pathname != "" /
{
ioct[args[0]->b_edev]++;
errct[args[0]->b_edev] += args[0]->b_flags & B_ERROR ? 1 : 0;
}
io:::done
/ args[1]->dev_pathname != "<nfs>" && args[1]->dev_pathname != "" &&
timer[args[0]->b_edev] != 0 &&
timestamp - 10000000000 > timer[args[0]->b_edev] /
{
timer[args[0]->b_edev] = timestamp;
this->sn = args[1]->dev_statname;
this->p = substr(args[1]->dev_pathname, 25, 16);
this->p = (strstr(this->p, "disk@") == 0 ||
strstr(this->p, "disk@") == "") ? toupper(this->p) : "NA";
/* This is a hack to work around lack of floating-point support */
this->rate = (multiplier * errct[args[0]->b_edev]) / ioct[args[0]->b_edev];
@maxrateerr[this->sn, this->p] = max(this->rate);
ioct[args[0]->b_edev] = 0;
errct[args[0]->b_edev] = 0;
}
io:::done
/ args[1]->dev_pathname != "<nfs>" && args[1]->dev_pathname != "" &&
args[0]->b_flags & B_ERROR /
{
this->sn = args[1]->dev_statname;
this->p = substr(args[1]->dev_pathname, 25, 16);
this->p = (strstr(this->p, "disk@") == 0 ||
strstr(this->p, "disk@") == "") ? toupper(this->p) : "NA";
/* Any difference between cterr and cteio means not all errors are EIO. */
@cterr[this->sn, this->p] = sum(args[0]->b_flags & B_ERROR ? 1 : 0);
@cteio[this->sn, this->p] = sum(args[0]->b_error == EIO ? 1 : 0);
@noxfer[this->sn, this->p] = sum(args[0]->b_resid);
}
tick-1min
{
printa("%s,%s,%@d,%@d,%@d,%@d,%@de-6\n",
@ctretry, @cterr, @cteio, @noxfer, @maxrateerr);
trunc(@ctretry); trunc(@cterr); trunc(@cteio);
trunc(@noxfer); trunc(@maxrateerr);
}
d 观察sqlite的Dtrace片段由BrickstorOS bsrapid服务和Golang标准库打开
golang-db-extra-open.d
dtrace -qn '
pid$target::*NewSqliteDAL*:entry {
self->name = probefunc ;
}
pid$target::*database*:entry {
self->name2 = probefunc ;
}
pid$target::sqlite3_open_v2:entry /self->name != ""/ {
@a[self->name, copyinstr(arg0)] = count();
}
pid$target::sqlite3_open_v2:entry /self->name2 != ""/ {
@b[self->name2, copyinstr(arg0)] = count();
}
END {
printa("caller: %s path: %s count: %@d\n", @a);
printa("caller: %s path: %s count: %@d\n", @b);
}
' -c bsrapid