TDictionary保存到文件 [英] TDictionary save to file

查看:295
本文介绍了TDictionary保存到文件的处理方法,对大家解决问题具有一定的参考价值,需要的朋友们下面随着小编来一起学习吧!

问题描述

我有很多文件(大约16万个),我需要了解关于文件中各个单词的位置的信息(全文)。所以我使用这样的词典:

$ pre $ t $ t $ t $ t $ t $ t

现在我知道WORD1在FILE1,FILE3和FILE100中,并且在每个文件< 3,5>,< 2,8,35>等
我可以填充它,我可以使用它 - 它非常快。但我不知道如何有效地存储字典文件。



编辑:有效 - 我的意思是快速和小尺寸的文件



 类型
TFilePos = TArray< Integer>;
TFileDict = TDictionary< string,TFilePos>;
TWordDict = class(TDictionary< string,TFileDict>)
private
procedure LoadFromStream(stream:TStream);
程序SaveToStream(stream:TStream);
public
过程LoadFromZip(const AFileName:string);
过程LoadFromFile(const AFileName:string);
程序SaveToZip(const AFileName:string);
过程SaveToFile(const AFileName:string);
end;

过程TWordDict.LoadFromZip(const AFileName:string);
var
stream:TStream;
localHeader:TZipHeader;
zipFile:TZipFile;
begin
zipFile:= TZipFile.Create;
尝试
zipFIle.Open(AFIleName,zmRead);
zipFile.Read('worddict',stream,localHeader);
尝试
LoadFromStream(stream);
终于
stream.Free;
end;
zipFile.Close;
终于
zipFile.Free;
end;
end;

过程TWordDict.SaveToZip(const AFileName:string);
var
stream:TStream;
zipFile:TZipFile;
begin
stream:= TMemoryStream.Create;
尝试
SaveToStream(流);
stream.Position:= 0;
zipFile:= TZipFile.Create;
尝试
zipFile.Open(AFileName,zmWrite);
zipFile.Add(stream,'worddict');
zipFile.Close;
终于
zipFile.Free;
end;
终于
stream.Free;
end;
end;

过程TWordDict.SaveToStream(stream:TStream);
var
posi:System.Generics.Collections.TPair< string,TFilePos> ;;
我:整数;
pair:System.Generics.Collections.TPair< string,TFileDict>;
作家:TWriter;
begin
writer:= TWriter.Create(stream,4096);
尝试
writer.WriteListBegin;
对于自己做
begin
writer.WriteString(pair.Key);
writer.WriteListBegin;
for posi in pair.Value do
begin
writer.WriteString(posi.Key);
writer.WriteInteger(Length(posi.Value));
for i in posi.Value do
begin
writer.WriteInteger(i);
end;
end;
writer.WriteListEnd;
end;
writer.WriteListEnd;
终于
writer.Free;
end;
end;

过程TWordDict.LoadFromStream(stream:TStream);
var
sFiles:TFileDict;
aPosi:TFilePos;
size:Integer;
我:整数;
sWord:string;
读者:TReader;
sFile:string;
开始
清除;
reader:= TReader.Create(stream,1024);
尝试
reader.ReadListBegin;
而不是reader.EndOfList do
begin
sWord:= reader.ReadString;
sFiles:= TFileDict.Create;
reader.ReadListBegin;
而不是reader.EndOfList do
begin
sFile:= reader.ReadString;
size:= reader.ReadInteger;
SetLength(aPosi,size);
for I:= 0 to size - 1 do
begin
aPosi [I]:= reader.ReadInteger;
end;
sFiles.Add(sFile,Copy(aPosi));
end;
reader.ReadListEnd;
Add(sWord,sFiles);
end;
reader.ReadListEnd;
终于
reader.Free;
end;
end;

过程TWordDict.LoadFromFile(const AFileName:string);
var
stream:TStream;
begin
stream:= TFileStream.Create(AFileName,fmOpenRead);
尝试
LoadFromStream(stream);
终于
stream.Free;
end;
end;

过程TWordDict.SaveToFile(const AFileName:string);
var
stream:TStream;
begin
stream:= TFileStream.Create(AFileName,fmCreate);
尝试
SaveToStream(流);
终于
stream.Free;
end;
end;


I have a lot of files(about 160 000) and I need to have information about position of individual words in files(fulltext). So I used Dictionary like this:

WordDict : TDictionary<string, TDictionary<string, TIntegerDynArray>>;

Now I know that WORD1 is in FILE1,FILE3 and FILE100 and positions in each file <1,3,5>,<2,8,35> etc. I can fill it, I can use it - its very fast. But I don't know how effectively store dictionary to file.

EDIT: effectively - I mean quickly and small size of file

解决方案

You can use the streaming system of Delphi to write a proprietary stream format. If size matters (contrary to speed) you can zip the stream. Here is some code:

type
  TFilePos = TArray<Integer>;
  TFileDict = TDictionary<string, TFilePos>;
  TWordDict = class (TDictionary<string, TFileDict>)
  private
    procedure LoadFromStream(stream: TStream);
    procedure SaveToStream(stream: TStream);
  public
    procedure LoadFromZip(const AFileName: string);
    procedure LoadFromFile(const AFileName: string);
    procedure SaveToZip(const AFileName: string);
    procedure SaveToFile(const AFileName: string);
  end;

procedure TWordDict.LoadFromZip(const AFileName: string);
var
  stream: TStream;
  localHeader: TZipHeader;
  zipFile: TZipFile;
begin
  zipFile := TZipFile.Create;
  try
    zipFIle.Open(AFIleName, zmRead);
    zipFile.Read('worddict', stream, localHeader);
    try
      LoadFromStream(stream);
    finally
      stream.Free;
    end;
    zipFile.Close;
  finally
    zipFile.Free;
  end;
end;

procedure TWordDict.SaveToZip(const AFileName: string);
var
  stream: TStream;
  zipFile: TZipFile;
begin
  stream := TMemoryStream.Create;
  try
    SaveToStream(stream);
    stream.Position := 0;
    zipFile := TZipFile.Create;
    try
      zipFile.Open(AFileName, zmWrite);
      zipFile.Add(stream, 'worddict');
      zipFile.Close;
    finally
      zipFile.Free;
    end;
  finally
    stream.Free;
  end;
end;

procedure TWordDict.SaveToStream(stream: TStream);
var
  posi: System.Generics.Collections.TPair<string, TFilePos>;
  i: Integer;
  pair: System.Generics.Collections.TPair<string, TFileDict>;
  writer: TWriter;
begin
  writer := TWriter.Create(stream, 4096);
  try
    writer.WriteListBegin;
    for pair in Self do
    begin
      writer.WriteString(pair.Key);
      writer.WriteListBegin;
      for posi in pair.Value do
      begin
        writer.WriteString(posi.Key);
        writer.WriteInteger(Length(posi.Value));
        for i in posi.Value do
        begin
          writer.WriteInteger(i);
        end;
      end;
      writer.WriteListEnd;
    end;
    writer.WriteListEnd;
  finally
    writer.Free;
  end;
end;

procedure TWordDict.LoadFromStream(stream: TStream);
var
  sFiles: TFileDict;
  aPosi: TFilePos;
  size: Integer;
  i: Integer;
  sWord: string;
  reader: TReader;
  sFile: string;
begin
  Clear;
  reader := TReader.Create(stream, 1024);
  try
    reader.ReadListBegin;
    while not reader.EndOfList do
    begin
      sWord := reader.ReadString;
      sFiles := TFileDict.Create;
      reader.ReadListBegin;
      while not reader.EndOfList do
      begin
        sFile := reader.ReadString;
        size := reader.ReadInteger;
        SetLength(aPosi, size);
        for I := 0 to size - 1 do
        begin
          aPosi[I] := reader.ReadInteger;
        end;
        sFiles.Add(sFile, Copy(aPosi));
      end;
      reader.ReadListEnd;
      Add(sWord, sFiles);
    end;
    reader.ReadListEnd;
  finally
    reader.Free;
  end;
end;

procedure TWordDict.LoadFromFile(const AFileName: string);
var
  stream: TStream;
begin
  stream := TFileStream.Create(AFileName, fmOpenRead);
  try
    LoadFromStream(stream);
  finally
    stream.Free;
  end;
end;

procedure TWordDict.SaveToFile(const AFileName: string);
var
  stream: TStream;
begin
  stream := TFileStream.Create(AFileName, fmCreate);
  try
    SaveToStream(stream);
  finally
    stream.Free;
  end;
end;

这篇关于TDictionary保存到文件的文章就介绍到这了,希望我们推荐的答案对大家有所帮助,也希望大家多多支持IT屋!

查看全文
登录 关闭
扫码关注1秒登录
发送“验证码”获取 | 15天全站免登陆