文件分块和缓冲? [英] File chunking and buffering?

查看:120
本文介绍了文件分块和缓冲?的处理方法,对大家解决问题具有一定的参考价值,需要的朋友们下面随着小编来一起学习吧!

问题描述

最后,我只是寻求削减二进制文件分成不大于X的大小。难道没有其他事情。如果输入文件是21MB,我想3个7MB的,我可以用猫加入或什么not.I在简单的例子,下面的作品,我使用的是7MB块大小的缓冲区。我一定要使用,在所有获得7MB的文件块?如果块大小为2GB说,这显然不是我想要的东西就摆在内存中。所以,我需要在所有创建缓冲区。

我没有读就在这里几个职位和这个其他网站,但他们似乎都使用某种缓冲由malloc或阵列创建,并查找非缓冲方式使我太超前了我的知识纳入插座, TCP / IP相关的主题。

我是注定要一大堆的if / while语句?

P.S。我在哪里可以找到C对I / O流的书吗?我能找到对C很多++,但不是C。

  IFP = FOPEN(IFILE,RB); // ifile的是25MB的声音文件
OFP = FOPEN(OFILE,W); //省略错误检查。setvbuf用来(IFP,NULL,_IOFBF,1024); //这些是上
setvbuf用来(OFP,NULL,_IOFBF,1024); // 默认?为size_t CHUNK = 7000000; // 7MB大小的块
为size_t结果为0;
为size_t *缓冲区=的malloc(分块);如果(缓冲== NULL){的fputs(无法分配内存,标准错误);出口(1);}
//一次读取1 btye?
结果= FREAD(缓冲,1,块,IFP);
如果(结果= CHUNK!){的fputs(ERROR:缓冲器/读不匹配,标准错误);出口(1);}的fwrite(缓冲液,CHUNK,1,OFP);免费(缓冲);


解决方案

下面是一个程序, bsplit ,我最初在1991年写的分割文件成任意大小的块;默认的大小以千字节指定。(当然, kibibytes - 1024字节)

  / *
@(#)文件:$ RCSfile:bsplit.c,V $
@(#)版本:$修订:$ 1.11
@(#)上次修改:$日期:2008年8月9日5时54分55秒$
@(#)用途:文件分割成块 - 二进制
@(#)作者:J-莱弗勒
* /#如果__STDC_VERSION__> = 199901L
的#define _XOPEN_SOURCE 600
#其他
的#define _XOPEN_SOURCE 500
#ENDIF / * * __STDC_VERSION__ /#包括LT&;&文件ctype.h GT;
#包括LT&;&stdio.h中GT;
#包括LT&;&string.h中GT;
#包括LT&;&stdlib.h中GT;
#包括LT&;&unistd.h中GT;
#包括stderr.h
#包括filter.h#定义MAXFILENAMELEN 256
千字节的#define 1024
#定义兆字节(千字节*千字节)
技嘉的#define(兆*千字节)
#定义NIL(X)((X)0)
#定义MIN(A,B)(((A)≤(b))的(一):(b))的字符* preFIX =bsplit。
为size_t块大小= 64;
为size_t的nblocks = 0;
为size_t skipblocks = 0;
字符缓冲区[64 *千字节]
柜长= 0;静态INT bsplit(FILE * IFP,为const char * FN)
{
    为size_t N; / *读取的字节这一次* /
    为size_t BSIZE; / *当前块*写入尺寸/
    为size_t TSIZE; / *为当前文件写入大小* /
    为size_t RSIZE; / *金额读* /
    FILE *运算; / *输出文件流* /
    字符文件[MAXFILENAMELEN] / *输出文件名* /    TSIZE = 0;
    BSIZE = 0;
    OP = NIL(FILE *);
    RSIZE = MIN(的sizeof(缓冲区),块大小);
    而((N = FREAD(缓冲区的sizeof(炭),RSIZE,IFP))大于0)
    {
        TSIZE + = N;
        如果(TSIZE> skipblocks)
        {
            如果(BSIZE == 0)
            {
                sprintf的(文件%s%03ld,preFIX,计数器++);
                如果((OP = FOPEN(文件,W))== NIL(FILE *))
                {
                    err_sysrem2(无法打开文件,文件);
                    返回(-1);
                }
                的printf(%S \\ n,文件);
            }
            BSIZE + = N;
            如果(FWRITE(缓冲区的sizeof(字符),N,OP)!= N)
            {
                err_sysrem2(无法写入文件,文件);
                返回(-1);
            }
            如果(BSIZE> =块大小)
            {
                FCLOSE(OP);
                BSIZE = 0;
            }
            如果(的nblocks大于0&放大器;&放大器; TSIZE&GT =的nblocks)
                打破;
        }
    }
    返回0;
}INT主(INT ARGC,字符** argv的)
{
    INT选择;
    为size_t乘数=千字节;
    字符* P;
    焦炭℃;
    INT RC;    OPTERR = 0;
    err_setarg0(的argv [0]);    而((选择= getopt的(ARGC,ARGV,S:N:P:B:V!))= - 1)
    {
        开关(OPT)
        {
        案例'P':
            preFIX = OPTARG;
            如果(strlen的(preFIX)GT; MAXFILENAMELEN - 的sizeof(000))
                err_error(文件名preFIX(%S)是太长(最多%D),preFIX,
                          (中间体)(MAXFILENAMELEN-的sizeof(000)));
            打破;
        案件的:
            skipblocks =的atoi(OPTARG);
            打破;
        案例'N':
            的nblocks =的atoi(OPTARG);
            打破;
        案例'B':
            块大小=的atoi(OPTARG);
            P = OPTARG + strspn(OPTARG,0123456789);
            如果(* P!='\\ 0')
            {
                C = tolower的((无符号字符)* P);
                如果(C =='C')
                    乘数= 1;
                否则,如果(C =='B')
                    乘数=千字节/ 2;
                否则,如果(C =='K')
                    乘数=千字节;
                否则如果(C ==M)
                    乘数=兆字节;
                否则,如果(C =='G')
                    乘数=技嘉;
                其他
                    err_error(未知大小乘数后缀%S \\ n,p)的;
                如果(第[1]!='\\ 0')
                    err_error(未知大小乘数后缀%S \\ n,p)的;
            }
            打破;
        案例'V':
            err_version(BSPLIT,&安培;@(#)$修订:$ 1.11($日期:2008年8月9日5时54分55秒$)[4]);
            打破;
        默认:
            err_usage([ - B块大小] [ - P preFIX] [ - S skipblocks] [ - N块] [文件[...]]);
            打破;
        }
    }    / *转换大小,以字节* /
    块大小* =事半功倍;
    skipblocks * =块大小;
    如果(的nblocks大于0)
        的nblocks = skipblocks +的nblocks *块大小;    RC = filter_stdout(ARGC,ARGV,OPTIND,bsplit);
    回报(RC);
}

stderr.h 宣布了一系列的错误报告程序的;我用它在我的大多数程序。头文件 filter.h 声明函数 filter_stdout()它通过一个参数列表的步骤,打开文件进行读取和调用一个函数 - 在这种情况下, bsplit() - 来处理每个文件依次。它处理'没有参数意味着读取标准输入自动等。 (联系我为code - 见我的个人资料)

请注意,该乘数 C 办法字符 B 表示512字节的块,而 K M 先按g 意味着昆明植物研究所,MIB和吉布分别。

Ultimately I'm just looking to cut a binary file into pieces no greater than X in size. Do no else with it. If the input file is 21MB, I want 3 pieces of 7MB I could join with cat or what not.I In the simple example below that works, I'm using a 7MB chunk size buffer. Do I have to use that at all to get 7MB file chunks? If the chunk size was say 2GB, that is obviously not something I want to put in memory. So do I need to create a buffer at all.

I did reads several posts on here and other sites about this, but they all seem to use some sort of buffer created by malloc or arrays, and looking up non-buffered ways lead me too far ahead of my knowledge into sockets and TCP/IP related topics.

Am I doomed to a whole lot of if/while statements?

P.S. Where can I find books on I/O streams in C? I can find a LOT for C++, but not C.

ifp = fopen(ifile, "rb"); // ifile is a 25MB sound file
ofp = fopen(ofile, "w");  // Omitted error checking.

setvbuf( ifp, NULL, _IOFBF, 1024); // Are these on
setvbuf( ofp, NULL, _IOFBF, 1024); // by default?

size_t CHUNK = 7000000;  // 7MB Chunk sizes
size_t result = 0;
size_t *buffer = malloc(CHUNK);

if (buffer == NULL) {fputs ("Could not allocate memory",stderr); exit (1);}
// Read 1 btye at a time?
result = fread(buffer, 1, CHUNK, ifp);
if (result != CHUNK) {fputs ("ERROR: Buffer/read mismatch.",stderr); exit (1);}

fwrite(buffer, CHUNK, 1, ofp);

free(buffer);

解决方案

Here's a program, bsplit, that I originally wrote in 1991. It splits a file up into arbitrary sized chunks; the default size is specified in kilobytes (well, kibibytes — 1024 bytes).

/*
@(#)File:           $RCSfile: bsplit.c,v $
@(#)Version:        $Revision: 1.11 $
@(#)Last changed:   $Date: 2008/08/09 05:54:55 $
@(#)Purpose:        Split file into blocks -- binary
@(#)Author:         J Leffler
*/

#if __STDC_VERSION__ >= 199901L
#define _XOPEN_SOURCE 600
#else
#define _XOPEN_SOURCE 500
#endif /* __STDC_VERSION__ */

#include <ctype.h>
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <unistd.h>
#include "stderr.h"
#include "filter.h"

#define MAXFILENAMELEN  256
#define KILOBYTE 1024
#define MEGABYTE (KILOBYTE*KILOBYTE)
#define GIGABYTE (MEGABYTE*KILOBYTE)
#define NIL(x)  ((x)0)
#define MIN(a,b)    (((a) < (b)) ? (a) : (b))

char    *prefix = "bsplit.";
size_t   blocksize = 64;
size_t   nblocks = 0;
size_t   skipblocks = 0;
char     buffer[64*KILOBYTE];
long     counter = 0;

static int  bsplit(FILE *ifp, const char *fn)
{
    size_t   n;         /* Bytes read this time */
    size_t   bsize;     /* Size written for current block */
    size_t   tsize;     /* Size written for current file */
    size_t   rsize;     /* Amount to read */
    FILE    *op;        /* Output file stream */
    char     file[MAXFILENAMELEN];  /* Output file name */

    tsize = 0;
    bsize = 0;
    op = NIL(FILE *);
    rsize = MIN(sizeof(buffer), blocksize);
    while ((n = fread(buffer, sizeof(char), rsize, ifp)) > 0)
    {
        tsize += n;
        if (tsize > skipblocks)
        {
            if (bsize == 0)
            {
                sprintf(file, "%s%03ld", prefix, counter++);
                if ((op = fopen(file, "w")) == NIL(FILE *))
                {
                    err_sysrem2("failed to open file", file);
                    return(-1);
                }
                printf("%s\n", file);
            }
            bsize += n;
            if (fwrite(buffer, sizeof(char), n, op) != n)
            {
                err_sysrem2("failed to write to file", file);
                return(-1);
            }
            if (bsize >= blocksize)
            {
                fclose(op);
                bsize = 0;
            }
            if (nblocks > 0 && tsize >= nblocks)
                break;
        }
    }
    return 0;
}

int main(int argc, char **argv)
{
    int opt;
    size_t multiplier = KILOBYTE;
    char *p;
    char  c;
    int   rc;

    opterr = 0;
    err_setarg0(argv[0]);

    while ((opt = getopt(argc, argv, "s:n:p:b:V")) != -1)
    {
        switch (opt)
        {
        case 'p':
            prefix = optarg;
            if (strlen(prefix) > MAXFILENAMELEN - sizeof("000"))
                err_error("file name prefix (%s) is too long (max %d)", prefix,
                          (int)(MAXFILENAMELEN-sizeof("000")));
            break;
        case 's':
            skipblocks = atoi(optarg);
            break;
        case 'n':
            nblocks = atoi(optarg);
            break;
        case 'b':
            blocksize = atoi(optarg);
            p = optarg + strspn(optarg, "0123456789");
            if (*p != '\0')
            {
                c = tolower((unsigned char)*p);
                if (c == 'c')
                    multiplier = 1;
                else if (c == 'b')
                    multiplier = KILOBYTE/2;
                else if (c == 'k')
                    multiplier = KILOBYTE;
                else if (c == 'm')
                    multiplier = MEGABYTE;
                else if (c == 'g')
                    multiplier = GIGABYTE;
                else
                    err_error("unknown size multiplier suffix %s\n", p);
                if (p[1] != '\0')
                    err_error("unknown size multiplier suffix %s\n", p);
            }
            break;
        case 'V':
            err_version("BSPLIT", &"@(#)$Revision: 1.11 $ ($Date: 2008/08/09 05:54:55 $)"[4]);
            break;
        default:
            err_usage("[-b blocksize][-p prefix][-s skipblocks][-n blocks][file [...]]");
            break;
        }
    }

    /* Convert sizes to bytes */
    blocksize  *= multiplier;
    skipblocks *= blocksize;
    if (nblocks > 0)
        nblocks = skipblocks + nblocks * blocksize;

    rc = filter_stdout(argc, argv, optind, bsplit);
    return(rc);
}

The header stderr.h declares a series of error reporting routines; I use it in most of my programs. The header filter.h declares the function filter_stdout() which steps through an argument list, opening the files for reading and calling a function — in this case bsplit() — to process each file in turn. It handles 'no arguments means read standard input' etc automatically. (Contact me for the code — see my profile.)

Note that the multiplier c means 'characters', b means 512-byte blocks, and k, m, and g mean KiB, MiB and GiB respectively.

这篇关于文件分块和缓冲?的文章就介绍到这了,希望我们推荐的答案对大家有所帮助,也希望大家多多支持IT屋!

查看全文
登录 关闭
扫码关注1秒登录
发送“验证码”获取 | 15天全站免登陆