在大文本文件(Delphi XE5)中查找和替换文本 [英] Find and Replace Text in a Large TextFile (Delphi XE5)

查看:256
本文介绍了在大文本文件(Delphi XE5)中查找和替换文本的处理方法,对大家解决问题具有一定的参考价值,需要的朋友们下面随着小编来一起学习吧!

问题描述

我正在尝试查找和替换文本文件中的文本。我过去可以使用以下方法来做到这一点:

I am trying to find and replace text in a text file. I have been able to do this in the past with methods like:

procedure SmallFileFindAndReplace(FileName, Find, ReplaceWith: string);
begin
  with TStringList.Create do
    begin
    LoadFromFile(FileName);
    Text := StringReplace(Text, Find, ReplaceWith, [rfReplaceAll, rfIgnoreCase]);
    SaveToFile(FileName);
    Free;
  end;
end;

但是,当文件较小时,上述方法可以正常工作;当文件大小约为170 Mb时,以上代码将导致以下错误:
EOutOfMemory,并显示消息内存不足

The above works fine when a file is relatively small, however; when the the file size is something like 170 Mb the above code will cause the following error: EOutOfMemory with message 'Out of memory'

我已经成功尝试了以下操作,但是运行时间很长:

I have tried the following with success, however it takes a long time to run:

procedure Tfrm_Main.button_MakeReplacementClick(Sender: TObject);
var
  fs : TFileStream;
  s  : AnsiString;
  //s  : string;
begin
  fs := TFileStream.Create(edit_SQLFile.Text, fmOpenread or fmShareDenyNone);
  try
    SetLength(S, fs.Size);
    fs.ReadBuffer(S[1], fs.Size);
  finally
    fs.Free;
  end;
  s := StringReplace(s, edit_Find.Text, edit_Replace.Text, [rfReplaceAll, rfIgnoreCase]);
  fs := TFileStream.Create(edit_SQLFile.Text, fmCreate);
  try
    fs.WriteBuffer(S[1], Length(S));
  finally
    fs.Free;
  end;
end;

我是流的新手,正在使用缓冲区。

I am new to "Streams" and working with buffers.

还有更好的方法吗?

谢谢。

推荐答案

在第一个代码示例中有两个错误,在第二个示例中有三个-

You have two mistakes in first code example and three - in second example:


  1. 请勿将整个大文件加载到内存中,尤其是在32位应用程序中。如果文件大小超过〜1 Gb,您总是会得到内存不足

  2. StringReplace 对于大字符串,由于重复操作会变慢内存重新分配

  3. 在第二个代码中,您没有在文件中使用文本编码,因此(对于Windows)您的代码认为该文件具有UCS2编码(每个字符两个字节)。但是如果文件编码是Ansi(每个字符一个字节)或UTF8(char的可变大小),您会得到什么呢?

  1. Do not load whole large file in memory, especially in 32bit application. If file size more than ~1 Gb, you always get "Out of memory"
  2. StringReplace slows with large strings, because of repeated memory reallocation
  3. In second code you don`t use text encoding in file, so (for Windows) your code "think" that file has UCS2 encoding (two bytes per character). But what you get, if file encoding is Ansi (one byte per character) or UTF8 (variable size of char)?

因此,如要正确查找和替换,必须使用文件编码和文件的读/写部分,如LU RD所述:

Thus, for correct find&replace you must use file encoding and read/write parts of file, as LU RD said:

interface

uses
  System.Classes,
  System.SysUtils;

type
  TFileSearchReplace = class(TObject)
  private
    FSourceFile: TFileStream;
    FtmpFile: TFileStream;
    FEncoding: TEncoding;
  public
    constructor Create(const AFileName: string);
    destructor Destroy; override;

    procedure Replace(const AFrom, ATo: string; ReplaceFlags: TReplaceFlags);
  end;

implementation

uses
  System.IOUtils,
  System.StrUtils;

function Max(const A, B: Integer): Integer;
begin
  if A > B then
    Result := A
  else
    Result := B;
end;

{ TFileSearchReplace }

constructor TFileSearchReplace.Create(const AFileName: string);
begin
  inherited Create;

  FSourceFile := TFileStream.Create(AFileName, fmOpenReadWrite);
  FtmpFile := TFileStream.Create(ChangeFileExt(AFileName, '.tmp'), fmCreate);
end;

destructor TFileSearchReplace.Destroy;
var
  tmpFileName: string;
begin
  if Assigned(FtmpFile) then
    tmpFileName := FtmpFile.FileName;

  FreeAndNil(FtmpFile);
  FreeAndNil(FSourceFile);

  TFile.Delete(tmpFileName);

  inherited;
end;

procedure TFileSearchReplace.Replace(const AFrom, ATo: string;
  ReplaceFlags: TReplaceFlags);
  procedure CopyPreamble;
  var
    PreambleSize: Integer;
    PreambleBuf: TBytes;
  begin
    // Copy Encoding preamble
    SetLength(PreambleBuf, 100);
    FSourceFile.Read(PreambleBuf, Length(PreambleBuf));
    FSourceFile.Seek(0, soBeginning);

    PreambleSize := TEncoding.GetBufferEncoding(PreambleBuf, FEncoding);
    if PreambleSize <> 0 then
      FtmpFile.CopyFrom(FSourceFile, PreambleSize);
  end;

  function GetLastIndex(const Str, SubStr: string): Integer;
  var
    i: Integer;
    tmpSubStr, tmpStr: string;
  begin
    if not(rfIgnoreCase in ReplaceFlags) then
      begin
        i := Pos(SubStr, Str);
        Result := i;
        while i > 0 do
          begin
            i := PosEx(SubStr, Str, i + 1);
            if i > 0 then
              Result := i;
          end;
        if Result > 0 then
          Inc(Result, Length(SubStr) - 1);
      end
    else
      begin
        tmpStr := UpperCase(Str);
        tmpSubStr := UpperCase(SubStr);
        i := Pos(tmpSubStr, tmpStr);
        Result := i;
        while i > 0 do
          begin
            i := PosEx(tmpSubStr, tmpStr, i + 1);
            if i > 0 then
              Result := i;
          end;
        if Result > 0 then
          Inc(Result, Length(tmpSubStr) - 1);
      end;
  end;

var
  SourceSize: int64;

  procedure ParseBuffer(Buf: TBytes; var IsReplaced: Boolean);
  var
    i: Integer;
    ReadedBufLen: Integer;
    BufStr: string;
    DestBytes: TBytes;
    LastIndex: Integer;
  begin
    if IsReplaced and (not(rfReplaceAll in ReplaceFlags)) then
      begin
        FtmpFile.Write(Buf, Length(Buf));
        Exit;
      end;

    // 1. Get chars from buffer
    ReadedBufLen := 0;
    for i := Length(Buf) downto 0 do
      if FEncoding.GetCharCount(Buf, 0, i) <> 0 then
        begin
          ReadedBufLen := i;
          Break;
        end;
    if ReadedBufLen = 0 then
      raise EEncodingError.Create('Cant convert bytes to str');

    FSourceFile.Seek(ReadedBufLen - Length(Buf), soCurrent);

    BufStr := FEncoding.GetString(Buf, 0, ReadedBufLen);
    if rfIgnoreCase in ReplaceFlags then
      IsReplaced := ContainsText(BufStr, AFrom)
    else
      IsReplaced := ContainsStr(BufStr, AFrom);

    if IsReplaced then
      begin
        LastIndex := GetLastIndex(BufStr, AFrom);
        LastIndex := Max(LastIndex, Length(BufStr) - Length(AFrom) + 1);
      end
    else
      LastIndex := Length(BufStr);

    SetLength(BufStr, LastIndex);
    FSourceFile.Seek(FEncoding.GetByteCount(BufStr) - ReadedBufLen, soCurrent);

    BufStr := StringReplace(BufStr, AFrom, ATo, ReplaceFlags);
    DestBytes := FEncoding.GetBytes(BufStr);
    FtmpFile.Write(DestBytes, Length(DestBytes));
  end;

var
  Buf: TBytes;
  BufLen: Integer;
  bReplaced: Boolean;
begin
  FSourceFile.Seek(0, soBeginning);
  FtmpFile.Size := 0;
  CopyPreamble;

  SourceSize := FSourceFile.Size;
  BufLen := Max(FEncoding.GetByteCount(AFrom) * 5, 2048);
  BufLen := Max(FEncoding.GetByteCount(ATo) * 5, BufLen);
  SetLength(Buf, BufLen);

  bReplaced := False;
  while FSourceFile.Position < SourceSize do
    begin
      BufLen := FSourceFile.Read(Buf, Length(Buf));
      SetLength(Buf, BufLen);
      ParseBuffer(Buf, bReplaced);
    end;

  FSourceFile.Size := 0;
  FSourceFile.CopyFrom(FtmpFile, 0);
end;

使用方法:

procedure TForm2.btn1Click(Sender: TObject);
var
  Replacer: TFileSearchReplace;
  StartTime: TDateTime;
begin
  StartTime:=Now;
  Replacer:=TFileSearchReplace.Create('c:\Temp\123.txt');
  try
    Replacer.Replace('some текст', 'some', [rfReplaceAll, rfIgnoreCase]);
  finally
    Replacer.Free;
  end;

  Caption:=FormatDateTime('nn:ss.zzz', Now - StartTime);
end;

这篇关于在大文本文件(Delphi XE5)中查找和替换文本的文章就介绍到这了,希望我们推荐的答案对大家有所帮助,也希望大家多多支持IT屋!

查看全文
登录 关闭
扫码关注1秒登录
发送“验证码”获取 | 15天全站免登陆