我有很多文件(大约 160 000 个),我需要有关文件中各个单词的位置的信息(全文)。所以我这样使用字典:
WordDict : TDictionary<string, TDictionary<string, TIntegerDynArray>>;
现在我知道 WORD1 位于 FILE1、FILE3 和 FILE100 中,并且位于每个文件 , 等中。
我可以填满它,我可以使用它——速度非常快。但我不知道如何有效地将字典存储到文件中。
编辑:有效 - 我的意思是文件快速且小
您可以使用Delphi的流系统来编写专有的流格式。如果大小很重要(与速度相反),您可以压缩流。这是一些代码:
type
TFilePos = TArray<Integer>;
TFileDict = TDictionary<string, TFilePos>;
TWordDict = class (TDictionary<string, TFileDict>)
private
procedure LoadFromStream(stream: TStream);
procedure SaveToStream(stream: TStream);
public
procedure LoadFromZip(const AFileName: string);
procedure LoadFromFile(const AFileName: string);
procedure SaveToZip(const AFileName: string);
procedure SaveToFile(const AFileName: string);
end;
procedure TWordDict.LoadFromZip(const AFileName: string);
var
stream: TStream;
localHeader: TZipHeader;
zipFile: TZipFile;
begin
zipFile := TZipFile.Create;
try
zipFIle.Open(AFIleName, zmRead);
zipFile.Read('worddict', stream, localHeader);
try
LoadFromStream(stream);
finally
stream.Free;
end;
zipFile.Close;
finally
zipFile.Free;
end;
end;
procedure TWordDict.SaveToZip(const AFileName: string);
var
stream: TStream;
zipFile: TZipFile;
begin
stream := TMemoryStream.Create;
try
SaveToStream(stream);
stream.Position := 0;
zipFile := TZipFile.Create;
try
zipFile.Open(AFileName, zmWrite);
zipFile.Add(stream, 'worddict');
zipFile.Close;
finally
zipFile.Free;
end;
finally
stream.Free;
end;
end;
procedure TWordDict.SaveToStream(stream: TStream);
var
posi: System.Generics.Collections.TPair<string, TFilePos>;
i: Integer;
pair: System.Generics.Collections.TPair<string, TFileDict>;
writer: TWriter;
begin
writer := TWriter.Create(stream, 4096);
try
writer.WriteListBegin;
for pair in Self do
begin
writer.WriteString(pair.Key);
writer.WriteListBegin;
for posi in pair.Value do
begin
writer.WriteString(posi.Key);
writer.WriteInteger(Length(posi.Value));
for i in posi.Value do
begin
writer.WriteInteger(i);
end;
end;
writer.WriteListEnd;
end;
writer.WriteListEnd;
finally
writer.Free;
end;
end;
procedure TWordDict.LoadFromStream(stream: TStream);
var
sFiles: TFileDict;
aPosi: TFilePos;
size: Integer;
i: Integer;
sWord: string;
reader: TReader;
sFile: string;
begin
Clear;
reader := TReader.Create(stream, 1024);
try
reader.ReadListBegin;
while not reader.EndOfList do
begin
sWord := reader.ReadString;
sFiles := TFileDict.Create;
reader.ReadListBegin;
while not reader.EndOfList do
begin
sFile := reader.ReadString;
size := reader.ReadInteger;
SetLength(aPosi, size);
for I := 0 to size - 1 do
begin
aPosi[I] := reader.ReadInteger;
end;
sFiles.Add(sFile, Copy(aPosi));
end;
reader.ReadListEnd;
Add(sWord, sFiles);
end;
reader.ReadListEnd;
finally
reader.Free;
end;
end;
procedure TWordDict.LoadFromFile(const AFileName: string);
var
stream: TStream;
begin
stream := TFileStream.Create(AFileName, fmOpenRead);
try
LoadFromStream(stream);
finally
stream.Free;
end;
end;
procedure TWordDict.SaveToFile(const AFileName: string);
var
stream: TStream;
begin
stream := TFileStream.Create(AFileName, fmCreate);
try
SaveToStream(stream);
finally
stream.Free;
end;
end;
本文内容由网友自发贡献,版权归原作者所有,本站不承担相应法律责任。如您发现有涉嫌抄袭侵权的内容,请联系:hwhale#tublm.com(使用前将#替换为@)