协同演示源 [英] Coroutine demo source

查看:142
本文介绍了协同演示源的处理方法,对大家解决问题具有一定的参考价值,需要的朋友们下面随着小编来一起学习吧!

问题描述

这是一个程序的例子,其中协程真的有助于简化
的算法 - 其几乎不可能实现否则。
我也试图为演示选择一个有用的任务 - 这个实用程序将
a二进制文件转换为一系列AZ符号(和背面),没有任何显着的
冗余,它有一个能力与任何指定的字母表
(参见M.Init行)。基本上它的东西像泛化base64。
该代码已经过测试并与MSC,IntelC和gcc / mingw一起使用。

Here's an example of a program, where coroutines really help to simplify the algorithm - imho its hardly possible to implement otherwise. I also tried to choose a useful task for the demo - this utility converts a binary file to a sequence of A-Z symbols (and back), without any significant redundancy, and it has an ability to work with any specified alphabet (see M.Init line). Basically its something like generalized base64. The code is tested and worked with MSC,IntelC and gcc/mingw.

更新:该算法基于精确的算术编码,因此它不是一个-liner。
可以通过使用putc / getc文件i / o
(因此只有一个修改的rangecoder类和do_process()将保留),
,然后它将是一半非常有限(例如,将不适用于解码
存储器块或网络流)。
实际上协程在这里用作速度优化,其
是这篇文章的要点。
不幸的是,我没有任何更简单的应用程序来正确地演示这个
我可以写一个上下文建模压缩器,但这将像100
行更多。

Update: The algorithm is based on precise arithmetic coding, so its not a one-liner by default. It may be possible to cut it in half by using putc/getc file i/o (thus only a modified rangecoder class and do_process() would remain), but then it would be very limited (eg. won't be applicable to decode a memory block or network stream). Actually coroutines are used as a speed optimization here, and its the point of this post. Unfortunately I don't have any simpler application to properly demonstrate this - I could write a context modelling compressor instead, but that would be like 100 lines more.

问题:

1)如何用合适的C ++代码替换INCLUDE_PROCESS_TEMPLATE宏?

2)有没有办法实现这个没有协程? br>
(但仍具有内存中编码和缓冲文件i / o)

3)任何修复/改进?

Questions:
1) How to replace INCLUDE_PROCESS_TEMPLATE macro with proper C++ code?
2) Is there a way to implement this without coroutines?
(but still with in-memory encoding and buffered file i/o)
3) Any fixes/improvements?

#include <io.h>
#include <stdio.h>
#include <stdlib.h>
#include <stddef.h>
#include <string.h>
#include <setjmp.h>
#define NOINLINE __declspec(noinline)

typedef unsigned int   uint;
typedef unsigned char  byte;
typedef unsigned long long int qword;

enum{ STKPAD=1<<16 };
struct coroutine {
  volatile int   state;
  volatile byte* inpptr;
  volatile byte* inpbeg;
  volatile byte* inpend;
  volatile byte* outptr;
  volatile byte* outbeg;
  volatile byte* outend;
  jmp_buf PointA, PointB;
  void yield( int value ) { if( setjmp(PointB)==0 ) { state=value; longjmp(PointA,value); } }
  void chkinp( void ) { if( inpptr>=inpend ) yield(1), inpptr=*&inpptr; }
  void chkout( void ) { if( outptr>=outend ) yield(2); }
  template <int f> byte get( void ) { if( f ) chkinp(); return *inpptr++; }
  template <int f> void put( uint c ) { *outptr++ = c; if( f ) chkout(); }
  void coro_init( void ) { inpptr=inpbeg=inpend=0; outptr=outbeg=outend=0; state=0; }
  void addinp( byte* inp,uint inplen ) { inpbeg=inpptr=inp; inpend=&inp[inplen]; }
  void addout( byte* out,uint outlen ) { outbeg=outptr=out; outend=&out[outlen]; }
};

#define INCLUDE_PROCESS_TEMPLATE \
NOINLINE void call_do_process() { byte stktmp[STKPAD]; state=ptrdiff_t(stktmp); do_process(); } \
int coro_process( void ) { if( setjmp(PointA)==0 ) if( state ) longjmp(PointB,3); else call_do_process(); return state; } 

struct Rangecoder_SH1x : coroutine {
  enum { SCALElog=15, SCALE=1<<SCALElog };
  enum { NUM=4, sTOP=0x01000000U, Thres=0xFF000000U };
  uint f_decode; // 0=encode, 1=decode;
  uint range, Cache, FFNum;
  union {
    struct { uint low; uint Carry; };
    qword lowc;
    uint  code; 
  };
  uint rc_BProcess( uint freq, uint bit ) { 
    uint rnew = (range>>SCALElog)*freq;
    if( f_decode ) bit = (code>=rnew);
    range = ((range-rnew-rnew)&(-bit)) + rnew;
    rnew &= -bit;
    if( f_decode ) code-=rnew; else lowc+=rnew;
    if( f_decode ) while( range<sTOP ) range<<=8, (code<<=8)+=get<1>();
    else while( range<sTOP ) range<<=8, ShiftLow();
    return bit;
  }
  void ShiftLow( void ) {
    if( low<Thres || Carry ) {
      put<1>( Cache+Carry );
      for(; FFNum!=0; FFNum-- ) put<1>( Carry-1 );
      Cache=low>>24; Carry=0;
    } else FFNum++;
    low<<=8;
  }
  void rc_Init( int DECODE ) {
    f_decode=DECODE; range=-1; lowc=FFNum=Cache=0;
    if( f_decode ) for(int _=0; _<NUM+0; _++) (code<<=8)+=get<1>(); 
  }
};

struct Model : Rangecoder_SH1x {
  uint DECODE, f_quit;
  enum{ lCNUM=8, CNUM=1<<lCNUM, ROWSIZE=80 };
  uint count[2*CNUM];
  enum{ inpbufsize=1<<16, outbufsize=1<<16 };
  byte inpbuf[inpbufsize], outbuf[outbufsize];

  void Init( const char* s ) {
    uint i,j;
    uint (&p)[CNUM] = (uint(&)[CNUM])count[CNUM];
    for( i=0; i<2*CNUM; i++) count[i]=0;
    for( i=0; s[i]; i++ ) p[byte(s[i])]++;
    for( j=0; j<lCNUM; j++ ) for( i=(CNUM>>j); i<((CNUM+CNUM)>>j); i++ ) count[i>>1] += count[i];
  }

  INCLUDE_PROCESS_TEMPLATE
  void do_process( void ) {
    uint i,j;
    rc_Init(1-DECODE);
    for( i=0; !f_quit; i++ ) {
      uint c=0, ctx=1;
      if( DECODE ) do c=get<1>(); while( c==10 );
      for( j=lCNUM-1; j!=-1; j-- ) {
        uint freq = count[ctx*2+0]*SCALE/(count[ctx*2+0]+count[ctx*2+1]);
        ctx += ctx + ((freq==0) ? 1 : (freq==SCALE) ? 0 : rc_BProcess(freq,(c>>j)&1));
      }
      if( !DECODE ) put<1>(ctx), (((i+1)%ROWSIZE==0)?put<1>(10),0:0);
    }
    yield(0);
  }

  void ProcessFile( uint Mode, FILE* f, FILE* g ) {
    volatile uint r; volatile qword g_len=0; uint f_len=0;
    DECODE=Mode; f_quit=0;
    if( DECODE ) addout( (byte*)&g_len, sizeof(f_len)+1 ), r=1;
    else f_len=filelength(fileno(f)), addinp( (byte*)&f_len, sizeof(f_len) ),addout(0,0), r=2;
    do {
      if( r==1 ) {
        uint l = fread( inpbuf, 1, inpbufsize, f );
        if( l>0 ) {
          addinp( inpbuf, l );
        } else {
          if( inpbeg==inpbuf+1 ) f_quit=1;
          memset( inpbuf, 0x80, inpbufsize );
          addinp( inpbuf+1, 5 ); 
        }
      } else if( r==2 ) {
        if( outbeg==outbuf ) fwrite( outbuf, 1, outptr-outbeg, g ); else g_len>>=8;
        addout( outbuf, outbufsize );
      }
      r = coro_process(); 
    } while(r);
    fwrite( outbuf, 1,outptr-outbuf, g ); // flush
    if( DECODE==0 ) fputc( 10, g ); else fflush(g), chsize( fileno(g), g_len );
  }

} M;

int main( int argc, char** argv ) {
  if( argc<4 ) return 1;
  int DECODE = argv[1][0]=='d';
  FILE* f = fopen( argv[2], "rb" ); if( f==0 ) return 2;
  FILE* g = fopen( argv[3], "wb" ); if( g==0 ) return 3;
  M.Init( "ABCDEFGHIJKLMNOPQRSTUVWXYZ" );
  M.ProcessFile( DECODE, f, g );
}


推荐答案

询问(参见[1]) - 从子类静态调用
a函数的诀窍。 tl; dr显然是一个强大的力量,所以这里是你的
可读标准协同fibonacci发电机这次。有一个小的区别,虽然 - 我们不真的需要coroutines生成
这些数字,但它真的很难(如果可能的话),使一个更快的实现
我的第一个程序没有协同。

Ok, here's what I actually asked about (see [1]) - the trick to statically call a function from child class. tl;dr is apparently a mighty power, so here's your readable standard coroutine fibonacci generator this time. There's a small difference though - we don't really need coroutines to generate these numbers, but its really hard (if possible) to make a faster implementation of my first program without coroutines.

#include <stdio.h>
#include <stddef.h>
#include <setjmp.h>

// without noinline some compilers tend to allocate the array before setjmp()
#ifdef __GNUC__
 #define NOINLINE __attribute__((noinline))
#else
 #define NOINLINE __declspec(noinline)
#endif

enum{ STKPAD=1<<16 };
struct coroutine {
  volatile unsigned state;
  jmp_buf PointA, PointB;
  void yield( int value ) { if( setjmp(PointB)==0 ) { state=value; longjmp(PointA,value); } }
  template <typename T> NOINLINE void call_do_process() {
    char stktmp[STKPAD]; state=ptrdiff_t(stktmp); ((T*)this)->do_process();
  }
  template <typename T> unsigned coro_process( T* ) {
    if( setjmp(PointA)==0 ) if( state ) longjmp(PointB,3); else call_do_process<T>();
    return state;
  }
};

struct fibonacci : coroutine {

  void do_process( void ) {
    unsigned a=0,b=1;
    while(1) {
      yield( b );
      b = b + a;
      a = b - a;
    }
  }

  unsigned get( void ) {
    return coro_process(this); 
  }

} F;

int main( int argc, char** argv ) {

  for( int i=0; i<20; i++ ) {
    printf( "%i ", F.get() );
  } printf( "\n" );

  return 0;
}

由于Jerry Coffin的替代版本仍然无法产生合理的结果,
这里是一些更简单的流基准。它很可惜,因为我预计它甚至
慢的迭代器。

And since Jerry Coffin's alternative version still fails to produce sensible results, here're some simpler stream benchmarks. Its a pity, as I'd expect it to be even slower with iterators.

事实上,我已经测试了各种方法与算术编码器 - plain getc / putc,
虚方法,纯函数指针,类迭代器类,并清楚地表明
有一个很大的区别。现在,协程被证明是最好的方法 -
没有复杂的逻辑封装到字节i / o调用(不像迭代器),和
处理不必关心i / o细节。当然,还有更多的
优化,但我真的只是试图证明coroutine
方法的好处在这里...

In fact I've tested all kinds of approaches with arithmetic coders - plain getc/putc, virtual methods, plain functions pointers, iterator-like classes, and its clear that there's a large difference. For now, coroutines proved to be the best way for this - there's no complex logic encapsulated into byte i/o calls (unlike iterators), and the processing doesn't have to care about i/o details. Sure, there're even further optimizations, but I really only tried to demonstrate the benefits of coroutine approach here...

#define _CRT_SECURE_NO_DEPRECATE
#define _CRT_DISABLE_PERFCRIT_LOCKS

#include <stdio.h>
#include <time.h>
#include <fstream>

int main( int argc, char** argv ) {
  if( argc<3 ) return 1;

  { 
    clock_t start = clock();
    FILE* f = fopen( argv[1], "rb" ); if( f==0 ) return 2;
    FILE* g = fopen( argv[2], "wb" ); if( g==0 ) return 3;
    while(1) {
      int c = getc(f);
      if( c<0 ) break;
      putc(c,g);
    }
    fclose(f);
    fclose(g);
    clock_t stop = clock();
    printf( "            File copy via stdio getc/putc - %7.3fs\n", float(stop-start)/CLOCKS_PER_SEC );
  }

  {
    clock_t start = clock();
    FILE* f = fopen( argv[1], "rb" ); if( f==0 ) return 2;
    FILE* g = fopen( argv[2], "wb" ); if( g==0 ) return 3;
    while(1) {
      static char buf[1<<16];
      int l = fread( buf, 1,sizeof(buf), f ); if( l<=0 ) break;
      fwrite( buf, 1,l, g ); if( l<sizeof(buf) ) break;
    }
    fclose(f);
    fclose(g);
    clock_t stop = clock();
    printf( "     File copy via stdio 64k fread/fwrite - %7.3fs\n", float(stop-start)/CLOCKS_PER_SEC );
  }

  {
    clock_t start = clock();
    std::ifstream f(argv[1],std::ios::in|std::ios::binary); if( !f.is_open() ) return 2;
    std::ofstream g(argv[2],std::ios::out|std::ios::binary); if( !g.is_open() ) return 3;
    while(1) {
      int c = f.get();
      if( c<0 ) break;
      g.put(c);
    }
    f.close();
    g.close();
    clock_t stop = clock();
    printf( "File copy via ifstream::get/ofstream::put - %.3fs\n", float(stop-start)/CLOCKS_PER_SEC );
  }

}




----- 100,000,000 byte file ----- 
[ GCC 4.5 ]
            File copy via stdio getc/putc -   0.546s
     File copy via stdio 64k fread/fwrite -   0.188s
File copy via ifstream::get/ofstream::put -  10.578s

[ IntelC 11.1 / VS 2005 ]
            File copy via stdio getc/putc -   0.500s
     File copy via stdio 64k fread/fwrite -   0.156s
File copy via ifstream::get/ofstream::put -  14.656s

[ MSC 14.0 / VS 2005 ]
            File copy via stdio getc/putc -   0.609s
     File copy via stdio 64k fread/fwrite -   0.156s
File copy via ifstream::get/ofstream::put -  19.063s

----- 1,000,000,000 byte file ----- 
[ GCC 4.5 ]
            File copy via stdio getc/putc -   7.468s
     File copy via stdio 64k fread/fwrite -   1.828s
File copy via ifstream::get/ofstream::put - 109.891s

[ IntelC 11.1 / VS 2005 ]
            File copy via stdio getc/putc -   6.718s
     File copy via stdio 64k fread/fwrite -   1.672s
File copy via ifstream::get/ofstream::put - 145.500s

[ MSC 14.0 / VS 2005 ]
            File copy via stdio getc/putc -   6.453s
     File copy via stdio 64k fread/fwrite -   1.609s
File copy via ifstream::get/ofstream::put - 191.031s

这篇关于协同演示源的文章就介绍到这了,希望我们推荐的答案对大家有所帮助,也希望大家多多支持IT屋!

查看全文
登录 关闭
扫码关注1秒登录
发送“验证码”获取 | 15天全站免登陆