如何转换RGB565到YUV420SP更快的Andr​​oid? [英] How to convert RGB565 to YUV420SP faster on android?

查看:455
本文介绍了如何转换RGB565到YUV420SP更快的Andr​​oid?的处理方法,对大家解决问题具有一定的参考价值,需要的朋友们下面随着小编来一起学习吧!

问题描述

我需要显示JPEG图片,并将其转换为YUV420SP。首先,我用SkBitmap解析JPEG和显示它,然后我用下面的code转换RGB565到YUV420SP在Android上,但它花费75ms转换一个640 * 480的RGB565画面,所以任何人都知道,更快的方式转换成RGB565以YUV420SP在Android?或更快的方式转换成JPEG文件YUV420SP在Android?

  //从RGB转换为YUV420
INT RGB2YUV_YR [256],RGB2YUV_YG [256],RGB2YUV_YB [256];
INT RGB2YUV_UR [256],RGB2YUV_UG [256],RGB2YUV_UBVR [256];
INT RGB2YUV_VG [256],RGB2YUV_VB [256];

//
//用于RGB到YUV420转换表
//
无效InitLookupTable()
{
    静态布尔hasInited = FALSE;
    如果(hasInited)
        返回 ;
    hasInited = TRUE;
    INT I;
    对于(i = 0; I< 256;我++)
        RGB2YUV_YR [I] =(浮点)65.481 *(I<< 8);
    对于(i = 0; I< 256;我++)
        RGB2YUV_YG [I] =(浮点)128.553 *(I<< 8);
    对于(i = 0; I< 256;我++)
        RGB2YUV_YB [I] =(浮点)24.966 *(I<< 8);
    对于(i = 0; I< 256;我++)
        RGB2YUV_UR [I] =(浮点)37.797 *(I<< 8);
    对于(i = 0; I< 256;我++)
        RGB2YUV_UG [I] =(浮点)74.203 *(I<< 8);
    对于(i = 0; I< 256;我++)
        RGB2YUV_VG [I] =(浮点)93.786 *(I<< 8);
    对于(i = 0; I< 256;我++)
        RGB2YUV_VB [I] =(浮点)18.214 *(I<< 8);
    对于(i = 0; I< 256;我++)
        RGB2YUV_UBVR [I] =(浮点)112×(I<< 8);
}

INT ConvertRGB5652YUV420SP(INT W,INT小时,无符号字符* BMP,无符号字符* YUV)
{
    无符号字符* U,* V,* Y * UU,* VV;
    无符号字符* PU1,* PU2,* PU3,* PU4;
    无符号字符* PV1,* PV2,PV3 *,* PV4;
    unsigned char型右值= 0,g值= 0,bValue = 0;
    uint16_t * bmpPtr;
    INT I,J;
    的printf(ConvertRGB5652YUV420SP开始,W =%D,H =%D,BMP =%P,YUV =%P \ N,W,H,BMP,YUV);

    timeval结构tpstart,tpend;
    函数gettimeofday(安培; tpstart,NULL);

    InitLookupTable();

    函数gettimeofday(安培; tpend,NULL);
    浮timeuse = 1000000 *(tpend.tv_sec-tpstart.tv_sec)+ tpend.tv_usec-tpstart.tv_usec;
    timeuse / = 1000;
    的printf(InitLookupTable使用时间=%F \ N,timeuse);
    函数gettimeofday(安培; tpstart,NULL);

    UU =新的无符号的char [W * H]。
    VV =新的无符号的char [W * H]。
    如果(UU == NULL || VV == NULL || YUV == NULL)
        返回0;
    Y = YUV;
    U = UU;
    V = VV;
    //获取R,G,从BMP图像数据B指针....
    bmpPtr =(uint16_t *)BMP;

    //获取YUV值RGB值...
    对于(i = 0; I< H;我++){
        为(J = 0; J<瓦; J ++){
            uint16_t颜色= * bmpPtr;
            unsigned int类型R =(彩色>> 11)及0x1F的;
            无符号整型G =(彩色>> 5)及0x3F的;
            unsigned int类型B =(彩色)及0x1F的;
            右值=(为r 3;)| (R>&→2);
            g值=(克&其中; 2)| (g取代;→4);
            bValue =(B< 3;)| (B个→2);

            * Y + =(RGB2YUV_YR [右值] + RGB2YUV_YG [g值] + RGB2YUV_YB [bValue] +
                1048576)>> 16;
            * U + =(-RGB2YUV_UR [右值]  -  RGB2YUV_UG [g值] + RGB2YUV_UBVR [bValue] +
                8388608)>> 16;
            * V + =(RGB2YUV_UBVR [右值]  -  RGB2YUV_VG [g值]  -  RGB2YUV_VB [bValue] +
                8388608)>> 16;
            bmpPtr ++;
        }
    }

    函数gettimeofday(安培; tpend,NULL);
    timeuse = 1000000 *(tpend.tv_sec-tpstart.tv_sec)+ tpend.tv_usec-tpstart.tv_usec;
    timeuse / = 1000;
    的printf(获取YUV值使用时间=%F \ N,timeuse);
    函数gettimeofday(安培; tpstart,NULL);

    //现在品尝U&放大器; V至获得的YUV 4:2:0格式
    //获取正确的指针...
    U = YUV + W * H;
    V = U + 1;
    // 为了你
    PU1 = UU;
    PU2 = PU1 + 1;
    PU3 = PU1 + W;
    PU4 = PU3 + 1;
    //当V
    PV1 = VV;
    PV2 = PV1 + 1;
    PV3 = PV1 + W;
    PV4 = PV3 + 1;
    //做抽样....
    对于(I = 0; I&所述; H; I + = 2){
        为(J = 0; J<瓦; J + = 2){
            * U =(* PU1 + * PU2 + * PU3 + * PU4)>> 2;
            U + = 2;
            * V =(* PV1 + * PV2 + * + PV3 PV4 *)>> 2;
            V + = 2;
            PU1 + = 2;
            PU2 + = 2;
            PU3 + = 2;
            PU4 + = 2;
            PV1 + = 2;
            PV2 + = 2;
            PV3 + = 2;
            PV4 + = 2;
        }

        PU1 + = W;
        PU2 + = W;
        PU3 + = W;
        PU4 + = W;
        PV1 + = W;
        PV2 + = W;
        PV3 + = W;
        PV4 + = W;
    }

    函数gettimeofday(安培; tpend,NULL);
    timeuse = 1000000 *(tpend.tv_sec-tpstart.tv_sec)+ tpend.tv_usec-tpstart.tv_usec;
    timeuse / = 1000;
    的printf(做抽样使用时间=%F \ N,timeuse);
    函数gettimeofday(安培; tpstart,NULL);

    删除UU;
    删除VV;
    返回1;
}

INT主(INT ARGC,字符** argv的){
    unsigned char型BMP [640 * 480 * 2] = {0};
    unsigned char型YUV [(640 * 480 * 3)/ 2] = {0};
    timeval结构tpstart,tpend;
    函数gettimeofday(安培; tpstart,NULL);

    ConvertRGB5652YUV420SP(640,480,BMP,YUV);

    函数gettimeofday(安培; tpend,NULL);
    浮timeuse = 1000000 *(tpend.tv_sec-tpstart.tv_sec)+ tpend.tv_usec-tpstart.tv_usec;
    timeuse / = 1000;
    的printf(ConvertARGB2YUV420SP使用时间=%F \ N,timeuse);
    返回0;
}
 

输出在Android(ARMv6的):

  ConvertRGB5652YUV420SP开始,W = 640,H = 480,BMP = 0xbe7314fc,YUV = 0xbe7c74fc
InitLookupTable使用时间= 0.383000
获得YUV值所用时间= 61.394001
做采样使用时间= 11.918000
ConvertARGB2YUV420SP使用时间= 74.596001
 

CPU信息:

  $猫的/ proc / cpuinfo的
执行cat / proc / cpuinfo的
处理器:用于ARMv6兼容处理器第5版(v6l)
BogoMIPS:791.34
产品特点:SWP半个拇指fastmult VFP EDSP的java
CPU实施者:0×41
CPU架构:6TEJ
CPU的变体:为0x1
CPU部分:0xb36
CPU版本:5

硬件:IMAPX200
修订:0000
编号:0000000000000000
 

解决方案

在ARMv7的,使用NEON。它会做的工作,在不到1毫秒。 (VGA)

如果你被卡住的ARMv6,优化它在ARM汇编。 (关于VGA 8毫秒)

使用定点运算来代替查找表。摆脱他们。

请两个掩码:

  • 0x001f001f:掩码1
  • 0x003f003f:掩码

再一次加载两个像素为32位寄存器(这是快了很多比16位读)

 和红色,掩码1,像素,LSR#11
与GRN,MASK2,像素,LSR#5
和蓝光,MASK1,像素
 

现在有三个寄存器,每个都包含两个值 - 一个在低,中,另一个在高16位

指令的SMULxy会做一些奇迹从这里开始。 (16位乘)

祝你好运。

PS:你的查找表是不是不太妙。为什么都是在256的长度? 你可以减少他们32(R和B相关的)和64(G相关),这将提高缓存命中率。 或许,这将只是做了有针对性的为40ms,而不诉诸组装。 是的,缓存缺失是痛苦的。

I need display a jpeg picture, and convert it to YUV420SP. First I use SkBitmap to parse jpeg and display it, then I use the code below to convert RGB565 to YUV420SP on android, but it spend 75ms to convert a 640*480 RGB565 picture, so anybody know the faster way to convert RGB565 to YUV420SP on android? or faster way to convert jpeg file to YUV420SP on android?

// Convert from RGB to YUV420
int RGB2YUV_YR[256], RGB2YUV_YG[256], RGB2YUV_YB[256];
int RGB2YUV_UR[256], RGB2YUV_UG[256], RGB2YUV_UBVR[256];
int RGB2YUV_VG[256], RGB2YUV_VB[256];

//
// Table used for RGB to YUV420 conversion
//
void InitLookupTable()
{
    static bool hasInited = false;
    if(hasInited)
        return ;
    hasInited = true;
    int i;
    for (i = 0; i < 256; i++)
        RGB2YUV_YR[i] = (float) 65.481 * (i << 8);
    for (i = 0; i < 256; i++)
        RGB2YUV_YG[i] = (float) 128.553 * (i << 8);
    for (i = 0; i < 256; i++)
        RGB2YUV_YB[i] = (float) 24.966 * (i << 8);
    for (i = 0; i < 256; i++)
        RGB2YUV_UR[i] = (float) 37.797 * (i << 8);
    for (i = 0; i < 256; i++)
        RGB2YUV_UG[i] = (float) 74.203 * (i << 8);
    for (i = 0; i < 256; i++)
        RGB2YUV_VG[i] = (float) 93.786 * (i << 8);
    for (i = 0; i < 256; i++)
        RGB2YUV_VB[i] = (float) 18.214 * (i << 8);
    for (i = 0; i < 256; i++)
        RGB2YUV_UBVR[i] = (float) 112 * (i << 8);
}

int ConvertRGB5652YUV420SP(int w, int h, unsigned char *bmp, unsigned char *yuv)
{
    unsigned char *u, *v, *y, *uu, *vv;
    unsigned char *pu1, *pu2, *pu3, *pu4;
    unsigned char *pv1, *pv2, *pv3, *pv4;
    unsigned char rValue = 0, gValue = 0, bValue = 0;
    uint16_t* bmpPtr;
    int i, j;
    printf("ConvertRGB5652YUV420SP begin,w=%d,h=%d,bmp=%p,yuv=%p\n", w, h, bmp, yuv);

    struct timeval tpstart,tpend;
    gettimeofday(&tpstart,NULL);

    InitLookupTable();

    gettimeofday(&tpend,NULL);
    float timeuse=1000000*(tpend.tv_sec-tpstart.tv_sec)+tpend.tv_usec-tpstart.tv_usec;
    timeuse/=1000;
    printf("InitLookupTable used time=%f\n", timeuse);
    gettimeofday(&tpstart,NULL);

    uu = new unsigned char[w * h];
    vv = new unsigned char[w * h];
    if (uu == NULL || vv == NULL || yuv == NULL)
        return 0;
    y = yuv;
    u = uu;
    v = vv;
    // Get r,g,b pointers from bmp image data....
    bmpPtr = (uint16_t*)bmp;

    //Get YUV values for rgb values...
    for (i = 0; i < h; i++) {
        for (j = 0; j < w; j++) {
            uint16_t color = *bmpPtr;
            unsigned int r = (color>>11) & 0x1f;
            unsigned int g = (color>> 5) & 0x3f;
            unsigned int b = (color    ) & 0x1f;
            rValue = (r<<3) | (r>>2);      
            gValue = (g<<2) | (g>>4);   
            bValue = (b<<3) | (b>>2);

            *y++ = (RGB2YUV_YR[rValue] + RGB2YUV_YG[gValue] + RGB2YUV_YB[bValue] +
                1048576) >> 16;
            *u++ = (-RGB2YUV_UR[rValue] - RGB2YUV_UG[gValue] + RGB2YUV_UBVR[bValue] +
                8388608) >> 16;
            *v++ = (RGB2YUV_UBVR[rValue] - RGB2YUV_VG[gValue] - RGB2YUV_VB[bValue] +
                8388608) >> 16;
            bmpPtr++;
        }
    }

    gettimeofday(&tpend,NULL);
    timeuse=1000000*(tpend.tv_sec-tpstart.tv_sec)+tpend.tv_usec-tpstart.tv_usec;
    timeuse/=1000;
    printf("Get YUV values used  time=%f\n", timeuse);
    gettimeofday(&tpstart,NULL);

    // Now sample the U & V to obtain YUV 4:2:0 format
    // Get the right pointers...
    u = yuv + w * h;
    v = u + 1;
    // For U
    pu1 = uu;
    pu2 = pu1 + 1;
    pu3 = pu1 + w;
    pu4 = pu3 + 1;
    // For V
    pv1 = vv;
    pv2 = pv1 + 1;
    pv3 = pv1 + w;
    pv4 = pv3 + 1;
    // Do sampling....
    for (i = 0; i < h; i += 2) {
        for (j = 0; j < w; j += 2) {
            *u = (*pu1 + *pu2 + *pu3 + *pu4) >> 2;
            u += 2;
            *v = (*pv1 + *pv2 + *pv3 + *pv4) >> 2;
            v += 2;
            pu1 += 2;
            pu2 += 2;
            pu3 += 2;
            pu4 += 2;
            pv1 += 2;
            pv2 += 2;
            pv3 += 2;
            pv4 += 2;
        }

        pu1 += w;
        pu2 += w;
        pu3 += w;
        pu4 += w;
        pv1 += w;
        pv2 += w;
        pv3 += w;
        pv4 += w;
    }

    gettimeofday(&tpend,NULL);
    timeuse=1000000*(tpend.tv_sec-tpstart.tv_sec)+tpend.tv_usec-tpstart.tv_usec;
    timeuse/=1000;
    printf("Do sampling used time=%f\n", timeuse);
    gettimeofday(&tpstart,NULL);

    delete uu;
    delete vv;
    return 1;
}

int main(int argc, char **argv) {
    unsigned char bmp[640*480*2] = {0};
    unsigned char yuv[(640*480*3)/2] = {0};
    struct timeval tpstart,tpend;
    gettimeofday(&tpstart,NULL);

    ConvertRGB5652YUV420SP(640, 480, bmp, yuv);

    gettimeofday(&tpend,NULL);
    float timeuse=1000000*(tpend.tv_sec-tpstart.tv_sec)+tpend.tv_usec-tpstart.tv_usec;
    timeuse/=1000;
    printf("ConvertARGB2YUV420SP used time=%f\n", timeuse);
    return 0;
}

output on android(armv6):

ConvertRGB5652YUV420SP begin,w=640,h=480,bmp=0xbe7314fc,yuv=0xbe7c74fc
InitLookupTable used time=0.383000
Get YUV values used  time=61.394001
Do sampling used time=11.918000
ConvertARGB2YUV420SP used time=74.596001

cpu info:

$ cat /proc/cpuinfo
cat /proc/cpuinfo
Processor       : ARMv6-compatible processor rev 5 (v6l)
BogoMIPS        : 791.34
Features        : swp half thumb fastmult vfp edsp java
CPU implementer : 0x41
CPU architecture: 6TEJ
CPU variant     : 0x1
CPU part        : 0xb36
CPU revision    : 5

Hardware        : IMAPX200
Revision        : 0000
Serial          : 0000000000000000

解决方案

On ARMv7, use NEON. It will do the job in less than 1ms. (VGA)

If you are stuck with ARMv6, optimize it in ARM assembly. (about 8ms on VGA)

Use fixed-point arithmetic instead of the lookup tables. Get rid of them.

make two masks :

  • 0x001f001f : mask1
  • 0x003f003f : mask2

then load two pixels at once into a 32bit register (which is a lot faster than 16bit read)

and red, mask1, pixel, lsr #11
and grn, mask2, pixel, lsr #5
and blu, mask1, pixel

now you have three registers, each containing two values - one in the lower, and the other in the upper 16 bits.

smulxy instructions will do some miracles from here on. (16bit multiply)

Good luck.

PS : your lookup table isn't that good either. Why are they all in length of 256? You could reduce them to 32 (r and b related) and 64 (g related) Which will increase the cache hit rate. Probably that will just do for the targeted 40ms without resorting to assembly. Yes, cache-misses are THAT painful.

这篇关于如何转换RGB565到YUV420SP更快的Andr​​oid?的文章就介绍到这了,希望我们推荐的答案对大家有所帮助,也希望大家多多支持IT屋!

查看全文
登录 关闭
扫码关注1秒登录
发送“验证码”获取 | 15天全站免登陆