如何进一步优化此代码和程序.我该怎么办? [英] How to more optimize this code and program. What i should to do?

查看:73
本文介绍了如何进一步优化此代码和程序.我该怎么办?的处理方法,对大家解决问题具有一定的参考价值,需要的朋友们下面随着小编来一起学习吧!

问题描述

资源使用率几乎很好(10-20%CPU,RAM从8Mb开始并增长),文件比较器是正常的,但是在7000+文件上程序运行缓慢.大约20-30分钟.如何优化?也许我不应该在函数中使用递归方法?也许结构没有优化?请帮忙.

The recource using is almost good(10-20% CPU, RAM starts from 8Mb and grows), file comparer is normal, but program is slow on 7000+ files. About 20-30 minutes. How optimize? Maybe i shouldn''t use recursive method in functions? Maybe the structure is not optimized? Please, help.

using System;
using System.Collections.Generic;
using System.IO;
using System.Diagnostics;

namespace ConsoleApplication3
{
    class Comparer
    {
        static void Main(string[] args)
        {
            Stopwatch s1 = new Stopwatch();
            s1.Start();
            switch (args.Length)
            {
                case 1:
                    {
                        CompareFilesRec(LookIn(args[0]));
                        s1.Stop();
                        break;
                    }
                default:
                    {
                        Console.WriteLine("Type only one argument.");
                        break;
                    }

            }
            
            Console.WriteLine("{0} ms", s1.ElapsedMilliseconds);
        }

        static bool CheckFile(string file)
        {
            FileInfo someFileInfo = new FileInfo(file);//берем інфу про файл в змінну someFileInfo
            if (someFileInfo.Length >= 2147483648 || someFileInfo.Length < 1)//перевірка
                return false;
            else return true;
        }
        static bool CheckDirEmpty(string dir)
        {
            DirectoryInfo someDir = new DirectoryInfo(dir);
            if (someDir.GetFiles().Length > 0)
                return false;
            else return true;
        }

        static List<string> LookIn(string path)
        {
            Stopwatch s1 = new Stopwatch();
            s1.Start();
            /*Ініціалізую і об*являю два списки:для файлів і папок*/
            List<string> files = new List<string>();
            List<string> dirs = new List<string>();

            /*Шукаєм всі доступні файли*/
            try
            {
                /*Добавляю знайдені папки і файли в список*/
                files.AddRange(Directory.GetFiles(path));
                dirs.AddRange(Directory.GetDirectories(path));    
            }
            catch (UnauthorizedAccessException) { }
            catch (DirectoryNotFoundException) { }
            catch (ArgumentOutOfRangeException) { }
            catch (IOException) { }

            for (int i = files.Count - 1; i >= 0; i--)
            {
                if (!CheckFile(files[i]))
                    files.RemoveAt(i);
            }
            for (int k = dirs.Count - 1; k >= 0; k--)
            {
                if(CheckDirEmpty(dirs[k]))
                    dirs.RemoveAt(k);
            }
            
            /*"Заглядаєм" за файлами в кожну директорію...*/
            foreach (string dir in dirs)
            {
                files.AddRange(LookIn(dir));//...і додаєм до списку
            }
            s1.Stop();
            Console.WriteLine("LookIN  = {0} ms", s1.ElapsedMilliseconds);
            return files;//повертаєм повний список знайдених файлів
        }

        static void CompareFilesRec(List<string> array)
        {
            Stopwatch s1 = new Stopwatch();
            s1.Start();
            List<KeyValuePair<long, string>> yeah = new List<KeyValuePair<long, string>>();
            List<string> Trash = new List<string>();
            List<string> outp = new List<string>();

            for (int j = array.Count-1; j >= 0; j--)
            {
                FileInfo fii = new FileInfo(array[j]);
                yeah.Add(new KeyValuePair<long, string>(fii.Length, array[j]));
            }
            array.Clear();

            foreach (var el in yeah)
            {
                if (!array.Contains(el.Value) && !Trash.Contains(el.Value))
                {
                    foreach (var ele in yeah)
                    {
                        if (el.Key == ele.Key && !array.Contains(ele.Value))
                        {
                            array.Add(ele.Value);
                        }
                        else
                        {
                            if (!Trash.Contains(ele.Value))
                            {
                                Trash.Add(ele.Value);
                            }
                        }
                    }
                }
            }
            yeah.Clear();
            /*TODO*/
            //byte crc = Crc8.ComputeChecksum(1, 2);
            int Arr = ComputeByteChecksum(array[0]);
            foreach (string f in array)
            {
                int File = ComputeByteChecksum(f);
                if (f != array[0] && Arr == File)
                    outp.Add(f);
                if (Arr != File)
                    if (!Trash.Contains(f))
                         Trash.Add(f);
            }
            outp.Add(array[0]);
            array.Clear();
            /*TODO end*/
            if (outp.Count > 1)
            {
                foreach (string fi in outp)
                {
                    Console.WriteLine(fi);
                }
                outp.Clear();
                Console.WriteLine();
            }
            
            if (Trash.Count > 1)
            {
                CompareFilesRec(Trash);
            }
            Trash.Clear();
            s1.Stop();
            Console.WriteLine("Comparing = {0} ms", s1.ElapsedMilliseconds);
        }
        static int ComputeByteChecksum(string path)
        {
            using (var reader = new BinaryReader(File.OpenRead(path)))
            {
                byte b1 = reader.ReadByte();
                reader.BaseStream.Position = reader.BaseStream.Position = reader.BaseStream.Length >> 1;
                byte b2 = reader.ReadByte();
                reader.BaseStream.Position = reader.BaseStream.Length - 1;
                byte b3 = reader.ReadByte();
                reader.Close();
                byte crc = Crc8.ComputeChecksum(1, 2);
                return Crc8.ComputeChecksum(b1, crc) + Crc8.ComputeChecksum(b2, crc) + Crc8.ComputeChecksum(b3, crc); 
            }
        }
    }
    public static class Crc8
    {
        static byte[] table = new byte[256];

        const byte poly = 0xd5;

        public static byte ComputeChecksum(params byte[] bytes)
        {
            byte crc = 0;
            if (bytes != null && bytes.Length > 0)
            {
                foreach (byte b in bytes)
                {
                    crc = table[crc ^ b];
                }
            }
            return crc;
        }

        static Crc8()
        {
            for (int i = 0; i < 256; ++i)
            {
                int temp = i;
                for (int j = 0; j < 8; ++j)
                {
                    if ((temp & 0x80) != 0)
                    {
                        temp = (temp << 1) ^ poly;
                    }
                    else
                    {
                        temp <<= 1;
                    }
                }
                table[i] = (byte)temp;
            }
        }
    }
}

推荐答案

您可以看看几种方法;

但是首先,您在比较什么?您在滚动自己的CRC时,最好使用一些优化的.Net方法来构建CRC或使用更大的散列来降低冲突风险.

您可以使用多线程方法,首先构建和比较要索引的文件,然后获取工作线程,然后并行散列文件等.限制因素可能是磁盘吞吐量,因此您可以一直增加线程数量,直到之前成为HDD子系统的瓶颈.

多线程并不总是有帮助,因此对不同的方法进行基准测试将是有益的;
例如1个线程-一次1个文件
例如多线程-parrallel中的多个文件
例如您自己的crc和.Net类(可能更优化)
You could look at a couple of approaches;

But first, what are you comparing? You are rolling your own CRCs, it maybe better to used some of the optimised .Net methods for building the CRCs or use larger hashes to reduce collission risk.

You could use a multithreaded approach, first build and index of files you are comparing, then get worker threads to then parrallel hash the files etc. the limiting factor is likely to be disk throughput, so you could keep increasing the thread count until just before you bottlekneck on the HDD subsystem.

Multithreading doesn''t always help, so it would be beneficial to benchmark different approachs;
e.g. 1 thread - 1 file at a time
e.g. multiple threads - multiple files in parrallel
e.g. your own crc vs .Net classes (probably more optimised)


这篇关于如何进一步优化此代码和程序.我该怎么办?的文章就介绍到这了,希望我们推荐的答案对大家有所帮助,也希望大家多多支持IT屋!

查看全文
登录 关闭
扫码关注1秒登录
发送“验证码”获取 | 15天全站免登陆