请使此代码高效 [英] please make this code efficient

查看:71
本文介绍了请使此代码高效的处理方法,对大家解决问题具有一定的参考价值,需要的朋友们下面随着小编来一起学习吧!

问题描述

它是一个相似性匹配代码,匹配相似性b / w作者论文标题&集群的标题(qurries)。但是这段代码运行得很慢。请帮助我优化这段代码。只有main()函数需要更改。 plzzzzzzzzzzzzzzzz帮助

it is a similarity matching code that matches the similarity b/w authors papers titles & titles of clusters(qurries ).but this code is running very slow.so kindly help me to optimize this code. only main () function needs to be change. plzzzzzzzzzzzzzzzz help

using System;
using System.IO;
using System.Collections.Generic;
using System.Text;
using System.Text.RegularExpressions;
using System.Collections;
using System.Linq;
namespace VectorSpaceModel
{
    class Program
    {
        static Hashtable DTVector = new Hashtable(); //Hashtable to hold Document Term Vector
        static List<string> wordlist = new List<string>(); //List of terms found in documents
        static Dictionary<double,> sortedList1 = new Dictionary<double,>(); //Documents ranked by VSM with angle value
        static Dictionary<string,> sortedList = new Dictionary<string,>();
        static string[] docs = new string[37406];
    

        static void Main(string[] args)
        {
           // string fileName = @"D:\FYP\new fyp\fnlfyp\OSIM\vsm2\AuthorsList.txt";
            string fileName2 = @"F:\fyp\fnlfyp\vsm2\OSIM\vsm2\output_titles.txt";
            string fileName3 = @"F:\fyp\fnlfyp\vsm2\OSIM\vsm2\queries.txt";
            string fileName4 = @"F:\fyp\fnlfyp\vsm2\OSIM\vsm2\output_vsm.txt";

            int num = 0;
            string[] authors = new string[37406];

            using (System.IO.StreamReader read_author = new System.IO.StreamReader(fileName2))
            {
                String line;
                int j = 0;
                j++;
                while ((line = read_author.ReadLine()) != null)
                {
                    //if (File.Exists(@"D:\FYP\new fyp\fnlfyp\OSIM\output_authorTitles\" + line + ".txt"))
                    //{
                        //using (System.IO.StreamReader read_authFile = new System.IO.StreamReader(@"D:\FYP\new fyp\fnlfyp\OSIM\output_authorTitles\" + line + ".txt"))
                        //{
                           // String line1;
                            //while ((line1 = read_authFile.ReadLine()) != null)
                            //{
                                string[] array = line.Split('=');
                                Console.WriteLine(j);
                                authors[j] = array[0];
                                docs[j] = array[1];
                                j++;
                           // }
                       // }
                    //}
                   /* if (line.StartsWith("Topic"))
                    { }
                    else
                    {
                        String line1 = "";
                        for (int i = 0; i < 10; i++)
                        {
                            string[] words = line.Split(' ');
                            line = words[0];
                            line = line.Trim();
                            line1 = line1 + line + " ";
                            line = sr.ReadLine();
                        }
                        docs[j] = line1;
                        j++;
                    }*/
                }
            }

            using (System.IO.StreamReader sr1 = new System.IO.StreamReader(fileName3))
            {
                String line2 = "";
                while ((line2 = sr1.ReadLine()) != null)
                {
                    docs[0] = line2;
                    num++;
                    createWordList();
                    createVector();
                    classify();
                    var dict = sortedList;
                    using (System.IO.StreamWriter writer = new System.IO.StreamWriter(fileName4, true))
                    {
                        writer.WriteLine(num);
                        writer.WriteLine(line2);
                        foreach (var x in dict.Reverse())
                        {
                            Console.WriteLine("Doc{1} -> {0}", x.Value, x.Key);
                            writer.WriteLine("Doc{1} -> {0}", x.Value, x.Key);
                        }
                        writer.WriteLine("");
                       
                    }
                   // Console.ReadLine();
                    docs.LastOrDefault();
                    DTVector.Clear();
                    wordlist.Clear();
                    sortedList.Clear();

                }
            }
        }


        public static void createWordList()
        {
            foreach (string doc in docs)
            {
                wordlist = getWordList(wordlist, doc);
            }
        }

        public static List<string> getWordList(List<string> wordlist, string query)
        {
            Regex exp = new Regex("\\w+", RegexOptions.IgnoreCase);
            MatchCollection MCollection = exp.Matches(query);

            foreach (Match match in MCollection)
            {
                if (!wordlist.Contains(match.Value))
                {
                    wordlist.Add(match.Value);
                }
            }

            return wordlist;
        }

        public static void createVector()
        {
            double[] queryvector;

            for (int j = 0; j < docs.Length; j++)
            {
                queryvector = new double[wordlist.Count];

                for (int i = 0; i < wordlist.Count; i++)
                {

                    double tfIDF = getTF(docs[j], wordlist[i]) * getIDF(wordlist[i]);
                    queryvector[i] = tfIDF;
                }

                if (j == 0) //is it a query?
                {
                    DTVector.Add("Query", queryvector);

                }
                else
                {

                    DTVector.Add(j.ToString(), queryvector);
                }
            }
        }

        public static void classify()
        {
            double temp = 0.0;

            IDictionaryEnumerator _enumerator = DTVector.GetEnumerator();

            double[] queryvector = new double[wordlist.Count];

            Array.Copy((double[])DTVector["Query"], queryvector, wordlist.Count);

            while (_enumerator.MoveNext())
            {
                if (_enumerator.Key.ToString() != "Query")
                {
                    temp = cosinetheta(queryvector, (double[])_enumerator.Value);
                    if(temp != 0)
                    {
                        sortedList.Add(_enumerator.Key.ToString(), temp);
                    }
                }
            }
        }

        public static double dotproduct(double[] v1, double[] v2)
        {
            double product = 0.0;
            if (v1.Length == v2.Length)
            {
                for (int i = 0; i < v1.Length; i++)
                {
                    product += v1[i] * v2[i];
                }
            }
            return product;
        }

        public static double vectorlength(double[] vector)
        {
            double length = 0.0;
            for (int i = 0; i < vector.Length; i++)
            {
                length += Math.Pow(vector[i], 2);
            }

            return Math.Sqrt(length);
        }
        private static double getTF(string document, string term)
        {
            string[] queryTerms = Regex.Split(document, "\\s");
            double count = 0;


            foreach (string t in queryTerms)
            {
                if (t == term)
                {
                    count++;
                }
            }
            return count;

        }

        private static double getIDF(string term)
        {
            double df = 0.0;
            //get term frequency of all of the sentences except for the query
            for (int i = 1; i < docs.Length; i++)
            {
                if (docs[i].Contains(term))
                {
                    df++;
                }
            }

            //Get sentence count
            double D = docs.Length - 1; //excluding the query 

            double IDF = 0.0;

            if (df > 0)
            {
                IDF = Math.Log(D / df);
            }

            return IDF;
        }

        public static double cosinetheta(double[] v1, double[] v2)
        {
            double lengthV1 = vectorlength(v1);
            double lengthV2 = vectorlength(v2);

            double dotprod = dotproduct(v1, v2);
            if (lengthV1 != 0)
                return dotprod / (lengthV1 * lengthV2);
            else
                return 0;

        }
    }
}

推荐答案

说真的吗?你在网站上打了一堆乱糟糟的垃圾,你希望我们为你解决这个问题吗?当你甚至无法摆脱多余的代码,所以我们可以看到有什么?或者评论一下你的代码,让它对我们来说简单,或者甚至给我们运行它需要什么?





我们不是做你的功课:这是有原因的。它就是为了让你思考你被告知的事情,并试着理解它。它也在那里,以便您的导师可以识别您身体虚弱的区域,并将更多的注意力集中在补救措施上。



亲自尝试,或学习魔术词: 你想要炸薯条吗?
Seriously? You slap a pile of messy rubbish on a website and you expect us to sort it out for you? When you can't even be bothered to get rid of redundant code so we can see what is there? Or comment your code to make it simple for us, or even give us what we would need to run it?


We do not do your homework: it is set for a reason. It is there so that you think about what you have been told, and try to understand it. It is also there so that your tutor can identify areas where you are weak, and focus more attention on remedial action.

Try it yourself, or learn the Magic Words: "Do you want fries with that?"


这篇关于请使此代码高效的文章就介绍到这了,希望我们推荐的答案对大家有所帮助,也希望大家多多支持IT屋!

查看全文
登录 关闭
扫码关注1秒登录
发送“验证码”获取 | 15天全站免登陆