您可以尝试改善此列表操作的性能吗? [英] Can you try improving performance of this list operation.
问题描述
您可以尝试改善此列表操作的性能吗?您将轻松了解我要实现的目标.
但基本上有一个文件列表和与该文件关联的块列表.块可能是重复的(块的主键是FileId和Start).因此,我想要另一个仅包含块的文件列表,这些文件最后添加到块列表中.
Can you try improving performance of this list operation. You will easily understand what I am trying to achieve.
But basically there is a list of files and a list of chunks associated with that file. Chunk may be duplicate (primary keys for a chunk are FileId and Start). So I want another list of files with only chunks that are added last to chunk lists.
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Diagnostics;
using System.Collections.ObjectModel;
using System.IO;
namespace ListPerformance
{
class Program
{
static void Main(string[] args)
{
Random rnd = new Random();
int fileCount = 1000; int chunkCount = 1000;
Stopwatch w = new Stopwatch(); w.Start();
List<hvfile> files = new List<hvfile>();
List<hvchunk> chunks = new List<hvchunk>();
for (int i = 0; i < fileCount; i++)
{
int fileId = rnd.Next();
files.Add(new HVFile() { FileId = fileId });
int chunksToAdd = rnd.Next(chunkCount);
for (int j = 0; j < chunksToAdd; j++)
{
chunks.Add(new HVChunk() { FileId = fileId, Start = rnd.Next(chunksToAdd) });
chunks.Add(new HVChunk() { FileId = fileId, Start = rnd.Next(chunksToAdd) });
chunks.Add(new HVChunk() { FileId = fileId, Start = rnd.Next(chunksToAdd) });
chunks.Add(new HVChunk() { FileId = fileId, Start = rnd.Next(chunksToAdd) });
}
}
Console.WriteLine("Total file Count={0}, Total chunks Count={1}", files.Count, chunks.Count);
Console.WriteLine("List creation took: {0}", w.Elapsed);
Console.WriteLine("======== Generating New Dictionary ============ \n"); w.Restart();
Dictionary<int,>> fileChunks = new Dictionary<int,>>();
for (int i = 0; i < chunks.Count; i++)
{
HVChunk c = chunks[i];
if (!fileChunks.ContainsKey(c.FileId))
fileChunks.Add(c.FileId, new List<hvchunk>());
HVChunk newChunk = fileChunks[c.FileId].FirstOrDefault(x => x.FileId == c.FileId && x.Start == c.Start);
if (newChunk != null)
{
fileChunks[c.FileId].Remove(newChunk);
fileChunks[c.FileId].Add(c);
}
else
{
fileChunks[c.FileId].Add(chunks[i]);
}
}
//foreach (var kv in fileChunks)
// Console.WriteLine("FileId={0}, chunkCount={1}", kv.Key, kv.Value.Count);
Console.WriteLine("Total files in the dictionary = {0}", fileChunks.Count);
Console.WriteLine("Time taken to Create Dictionary = {0} \n", w.Elapsed); w.Restart();
Console.WriteLine("======== Operation Complete ============");
//Console.Read();
string oldLog = File.ReadAllText("log.txt").ToString();
string log = oldLog + Environment.NewLine + "Total files: " + fileChunks.Count + " ; Total Chunks: " + chunks.Count + " ; Total time taken: " + w.Elapsed;
File.WriteAllText("log.txt", log);
}
}
public class HVFile
{
public int FileId { get; set; }
}
public class HVChunk
{
public int FileId { get; set; }
public int Start { get; set; }
}
}
推荐答案
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Diagnostics;
using System.Collections.ObjectModel;
using System.IO;
namespace ListPerformance
{
class Program
{
static void Main(string[] args)
{
Random rnd = new Random();
int fileCount = 1000; int chunkCount = 1000;
Stopwatch w = new Stopwatch(); w.Start();
List<HVFile> files = new List<HVFile>();
List<HVChunk> chunks = new List<HVChunk>();
for (int i = 0; i < fileCount; i++)
{
int fileId = rnd.Next();
files.Add(new HVFile() { FileId = fileId });
int chunksToAdd = rnd.Next(chunkCount);
for (int j = 0; j < chunksToAdd; j++)
{
chunks.Add(new HVChunk() { FileId = fileId, Start = rnd.Next(chunksToAdd) });
chunks.Add(new HVChunk() { FileId = fileId, Start = rnd.Next(chunksToAdd) });
chunks.Add(new HVChunk() { FileId = fileId, Start = rnd.Next(chunksToAdd) });
chunks.Add(new HVChunk() { FileId = fileId, Start = rnd.Next(chunksToAdd) });
}
}
Console.WriteLine("Total file Count={0}, Total chunks Count={1}", files.Count, chunks.Count);
Console.WriteLine("List creation took: {0}", w.Elapsed);
Console.WriteLine("======== Generating New Dictionary ============ \n"); w.Restart();
// Method 1
Dictionary<int, List<HVChunk>> fileChunks = new Dictionary<int, List<HVChunk>>();
for (int i = 0; i < chunks.Count; i++)
{
if (!fileChunks.ContainsKey(chunks[i].FileId))
fileChunks.Add(chunks[i].FileId, new List<HVChunk>());
fileChunks[chunks[i].FileId].Add(chunks[i]);
}
List<int> keys = new List<int>(fileChunks.Keys);
foreach (int pair in keys)
fileChunks[pair] = fileChunks[pair].Distinct().ToList();
Console.WriteLine("Total files in the dictionary = {0}", fileChunks.Count);
Console.WriteLine("Time taken to Create Dictionary = {0} \n", w.Elapsed);
Console.WriteLine("======== Operation 1 Complete ============");
w.Restart();
// Method 2
Dictionary<int, List<HVChunk>> fileChunks1 = new Dictionary<int, List<HVChunk>>();
for (int i = 0; i < chunks.Count; i++)
{
HVChunk c = chunks[i];
if (!fileChunks1.ContainsKey(c.FileId))
fileChunks1.Add(c.FileId, new List<HVChunk>());
HVChunk newChunk = fileChunks1[c.FileId].FirstOrDefault(x => x.FileId == c.FileId && x.Start == c.Start);
if (newChunk != null)
{
fileChunks1[c.FileId].Remove(newChunk);
fileChunks1[c.FileId].Add(c);
}
else
{
fileChunks1[c.FileId].Add(chunks[i]);
}
}
Console.WriteLine("Total files in the dictionary = {0}", fileChunks1.Count);
Console.WriteLine("Time taken to Create Dictionary = {0} \n", w.Elapsed); w.Restart();
Console.WriteLine("======== Operation 2 Complete ============");
if (fileChunks1.Keys.Count == fileChunks.Keys.Count)
{
List<int> allKeys = new List<int>(fileChunks.Keys);
foreach (int pair in allKeys)
{
if (fileChunks1[pair].Count != fileChunks[pair].Count)
{
Console.WriteLine("======== BOTH DICTIONARY ARE NOT EQUAL ============");
break;
}
}
}
Console.WriteLine("======== BOTH DICTIONARY ARE EQUAL ============");
}
}
public class HVFile
{
public int FileId { get; set; }
}
public class HVChunk : IEquatable<HVChunk>
{
public int FileId { get; set; }
public int Start { get; set; }
public bool Equals(HVChunk other)
{
//Check whether the compared object is null.
if (Object.ReferenceEquals(other, null)) return false;
//Check whether the compared object references the same data.
if (Object.ReferenceEquals(this, other)) return true;
//Check whether the HVChunk'' properties are equal.
return FileId.Equals(other.FileId) && Start.Equals(other.Start);
}
// If Equals() returns true for a pair of objects
// then GetHashCode() must return the same value for these objects.
public override int GetHashCode()
{
int hashFileId = FileId.GetHashCode();
int hashStart = Start.GetHashCode();
return hashFileId ^ hashStart;
}
}
}
Total file Count=1000, Total chunks Count=1956352
List creation took: 00:00:00.2618299
======== Generating New Dictionary ============
Total files in the dictionary = 999
Time taken to Create Dictionary = 00:00:00.4972550
======== Operation 1 Complete ============
Total files in the dictionary = 999
Time taken to Create Dictionary = 00:00:35.8820654
======== Operation 2 Complete ============
======== BOTH DICTIONARY ARE EQUAL ============
Press any key to continue . . .
方法1 [00:00:00.4972550]是我的修改方法,方法2 [00:00:35.8820654]是您的原始方法.请验证输出字典.我在其中有验证码,该代码仅比较fileId和块.
Method 1[00:00:00.4972550] is my modified method and Method 2[ 00:00:35.8820654] is your original one.Please validate the output dictionary.I have the validation code in there which just compares the sizes of both fileId and chunk.
这篇关于您可以尝试改善此列表操作的性能吗?的文章就介绍到这了,希望我们推荐的答案对大家有所帮助,也希望大家多多支持IT屋!