如何提高性能。文件比较器。 [英] How to increase performance. File comparer.

查看:67
本文介绍了如何提高性能。文件比较器。的处理方法,对大家解决问题具有一定的参考价值,需要的朋友们下面随着小编来一起学习吧!

问题描述

各位大家好。下面的代码非常缓慢,每次迭代大约10秒。如何改善他?



Hello, everybody. The code below, works very slowly, about 10 seconds per iteration. How to improve him?

using System;
using System.Collections.Generic;
using System.Diagnostics;
using System.IO;

namespace Compare
{
    public class Comparer
    {
        public static List<string> CompareLength(string file, List<string> arr)
        {
            List<string> result = new List<string>();
            foreach (string item in arr)
                if (new FileInfo(item).Length == new FileInfo(file).Length)
                    result.Add(item);
            return result;
        }
        public static void CompareFilesRec(List<string> array)
        {
            List<string> outp = new List<string>();

            int i = 0;
            while (i < array.Count)
            {
                List<string> fromAll = Comparer.CompareLength(array[i], array);
                string chosen = array[i];
                if (fromAll.Count != 0)
                    foreach (var f2 in fromAll)
                    {
                        if (Checksum.CompareHash(chosen, f2))
                        {
                            outp.Add(f2);
                            array.Remove(f2);
                        }
                        else { i++; }

                    }
                Comparer.ShowResults(outp);
                outp.Clear();
            }
        }
        public static void ShowResults(List<string> results)
        {
            if (results.Count >= 2)
            {
                foreach (var element in results)
                {
                    Console.WriteLine(element);
                }
                Console.WriteLine();
            }
        }
    }
}





Checksum.CompareHash



Checksum.CompareHash

static public bool CompareHash(string file1, string file2)
{
    long FirstSize = new FileInfo(file1).Length;
    long SecondSize = new FileInfo(file2).Length;

    byte[] buffer1 = new byte[FirstSize];
    byte[] buffer2 = new byte[SecondSize];

    try
    {
        using (var mmf1 = MemoryMappedFile.CreateFromFile(file1, FileMode.OpenOrCreate, null, 0))
        using (var mmf2 = MemoryMappedFile.CreateFromFile(file2, FileMode.OpenOrCreate, null, 0))
        using (var reader1 = mmf1.CreateViewStream())
        using (var reader2 = mmf2.CreateViewStream())
        {
            try
            {
                reader1.Read(buffer1, 0, (int)FirstSize);
                reader2.Read(buffer2, 0, (int)SecondSize);
            }
            catch (Exception ex)
            {
                Console.WriteLine("Error: {0}", ex.Message);
            }


            if (buffer1.Length != buffer2.Length || Crc8.ComputeChecksum(buffer1) != Crc8.ComputeChecksum(buffer2))
                return false;
        }
    }
    catch (IOException) { }
    catch (UnauthorizedAccessException) { }
    return true;
}







所以,我意识到了解决方案,检查一下:




So, I realized that solution, check it:

namespace Compare
{
    public class Comparer
    {
        public static List<string> CompareLength(string file, List<string> arr)
        {
            List<string> result = new List<string>();
            foreach (string item in arr)
                if (new FileInfo(item).Length == new FileInfo(file).Length && Checksum.CompareHash(file,item))//here I added hash check. To avoid another loop or something.
                    result.Add(item);
            return result;
        }
        public static void CompareFilesRec(List<string> array)
        {
            Dictionary<long, List<string>> di = new Dictionary<long,List<string>>();

            int i = 0;
            while (i < array.Count)
            {
                long size = new FileInfo(array[i]).Length;

                List<string> fromAll = Comparer.CompareLength(array[i], array);

                if (di.ContainsKey(size))
                {
                    fromAll.Clear();
                    array.Remove(array[i]);
                }
                else
                {
                    di.Add(size, fromAll);
                    i++;
                }
            }
            ShowResults(di);
        }
        public static void ShowResults(Dictionary<long, List<string>> dic)
        {
            foreach(var element in dic)
            {
                foreach (var outer in element.Value)
                {
                    Console.WriteLine(outer);
                }
                Console.WriteLine();
            }
        }
    }
}





我也更改了散列函数:



Also i changed the hashing function:

namespace Compare
{
    public class Checksum
    {
        static public bool CompareHash(string file1, string file2)
        {
            long FirstSize = new FileInfo(file1).Length;
            long SecondSize = new FileInfo(file2).Length;

            SHA256 sha1 = SHA256Managed.Create();
            SHA256 sha2 = SHA256Managed.Create();

            byte[] buffer1 = new byte[FirstSize];
            byte[] buffer2 = new byte[SecondSize];

            try
            {
                using (var mmf1 = MemoryMappedFile.CreateFromFile(file1, FileMode.OpenOrCreate, null, 0))
                using (var mmf2 = MemoryMappedFile.CreateFromFile(file2, FileMode.OpenOrCreate, null, 0))
                using (var reader1 = mmf1.CreateViewStream())
                using (var reader2 = mmf2.CreateViewStream())
                {
                    try
                    {
                        reader1.Read(buffer1, 0, (int)FirstSize);
                        reader2.Read(buffer2, 0, (int)SecondSize);
                    }
                    catch (Exception ex)
                    {
                        Console.WriteLine("Error: {0}", ex.Message);
                    }


                    if (buffer1.Length != buffer2.Length && sha1.ComputeHash(buffer1) != sha2.ComputeHash(buffer2))
                        return false;
                }
            }
            catch (IOException) { }
            catch (UnauthorizedAccessException) { }
            return true;
        }
    }
}





请告诉我上面代码中的错误...



Please, tell me my errors in code above...

推荐答案

没有代码,只是建议/提示。

好​​的,首先删除多余的工作。 (这里有很多。)



创建一个字典< ulong,List< string>>

在所有文件名列表中进行一次传递。

对于每个文件名,获取文件长度。

使用.TryGetValue获取具有该长度的文件列表。

如果没有这样的列表,则创建新列表< string> 并将其插入字典

将文件名添加到列表中。



所有文件名后已被处理​​。

浏览字典中的所有.Values。

如果长度是> 1然后对列表中的每个文件名执行类似的过程,校验和/散列为 Dictionary 键而不是长度(使用不同的字典)。

字典中的所有列表,包含多个条目是一组相同的文件。



长度ch每个文件只执行一次eck,每个文件只计算一次校验和/哈希(并且仅针对可能匹配的文件)。
No code, just suggestions/hints.
Ok, first remove redundant work. (There''s plenty of it.)

Create a Dictionary<ulong, List<string>>
Make one pass through the list of all filenames.
For each filename, get the file length.
Use the .TryGetValue to get the list of files with that length.
If there is no such list, then create a new List<string> and insert it into the Dictionary.
Add the filename to the list.

After all of the filenames have been "processed".
Go through all of the .Values in the Dictionary.
If the length is >1 then perform a similar process on each filename in the list, with the checksum/hash as the Dictionary key instead of the length (use a different Dictionary).
All of the lists in this Dictionary with more than one entry are sets of identical files.

Length checking is done only once per file and the checksum/hash is computed only once per file (and only for the files that could possibly be matched).


这篇关于如何提高性能。文件比较器。的文章就介绍到这了,希望我们推荐的答案对大家有所帮助,也希望大家多多支持IT屋!

查看全文
登录 关闭
扫码关注1秒登录
发送“验证码”获取 | 15天全站免登陆