请使此代码高效 [英] please make this code efficient

查看：71 发布时间：2019/6/13 17:12:36 C#

本文介绍了请使此代码高效的处理方法，对大家解决问题具有一定的参考价值，需要的朋友们下面随着小编来一起学习吧！

问题描述

它是一个相似性匹配代码，匹配相似性b / w作者论文标题&集群的标题（qurries）。但是这段代码运行得很慢。请帮助我优化这段代码。只有main（）函数需要更改。 plzzzzzzzzzzzzzzzz帮助

it is a similarity matching code that matches the similarity b/w authors papers titles & titles of clusters(qurries ).but this code is running very slow.so kindly help me to optimize this code. only main () function needs to be change. plzzzzzzzzzzzzzzzz help

using System;
using System.IO;
using System.Collections.Generic;
using System.Text;
using System.Text.RegularExpressions;
using System.Collections;
using System.Linq;
namespace VectorSpaceModel
{
    class Program
    {
        static Hashtable DTVector = new Hashtable(); //Hashtable to hold Document Term Vector
        static List<string> wordlist = new List<string>(); //List of terms found in documents
        static Dictionary<double,> sortedList1 = new Dictionary<double,>(); //Documents ranked by VSM with angle value
        static Dictionary<string,> sortedList = new Dictionary<string,>();
        static string[] docs = new string[37406];
    

        static void Main(string[] args)
        {
           // string fileName = @"D:\FYP\new fyp\fnlfyp\OSIM\vsm2\AuthorsList.txt";
            string fileName2 = @"F:\fyp\fnlfyp\vsm2\OSIM\vsm2\output_titles.txt";
            string fileName3 = @"F:\fyp\fnlfyp\vsm2\OSIM\vsm2\queries.txt";
            string fileName4 = @"F:\fyp\fnlfyp\vsm2\OSIM\vsm2\output_vsm.txt";

            int num = 0;
            string[] authors = new string[37406];

            using (System.IO.StreamReader read_author = new System.IO.StreamReader(fileName2))
            {
                String line;
                int j = 0;
                j++;
                while ((line = read_author.ReadLine()) != null)
                {
                    //if (File.Exists(@"D:\FYP\new fyp\fnlfyp\OSIM\output_authorTitles\" + line + ".txt"))
                    //{
                        //using (System.IO.StreamReader read_authFile = new System.IO.StreamReader(@"D:\FYP\new fyp\fnlfyp\OSIM\output_authorTitles\" + line + ".txt"))
                        //{
                           // String line1;
                            //while ((line1 = read_authFile.ReadLine()) != null)
                            //{
                                string[] array = line.Split('=');
                                Console.WriteLine(j);
                                authors[j] = array[0];
                                docs[j] = array[1];
                                j++;
                           // }
                       // }
                    //}
                   /* if (line.StartsWith("Topic"))
                    { }
                    else
                    {
                        String line1 = "";
                        for (int i = 0; i < 10; i++)
                        {
                            string[] words = line.Split(' ');
                            line = words[0];
                            line = line.Trim();
                            line1 = line1 + line + " ";
                            line = sr.ReadLine();
                        }
                        docs[j] = line1;
                        j++;
                    }*/
                }
            }

            using (System.IO.StreamReader sr1 = new System.IO.StreamReader(fileName3))
            {
                String line2 = "";
                while ((line2 = sr1.ReadLine()) != null)
                {
                    docs[0] = line2;
                    num++;
                    createWordList();
                    createVector();
                    classify();
                    var dict = sortedList;
                    using (System.IO.StreamWriter writer = new System.IO.StreamWriter(fileName4, true))
                    {
                        writer.WriteLine(num);
                        writer.WriteLine(line2);
                        foreach (var x in dict.Reverse())
                        {
                            Console.WriteLine("Doc{1} -> {0}", x.Value, x.Key);
                            writer.WriteLine("Doc{1} -> {0}", x.Value, x.Key);
                        }
                        writer.WriteLine("");
                       
                    }
                   // Console.ReadLine();
                    docs.LastOrDefault();
                    DTVector.Clear();
                    wordlist.Clear();
                    sortedList.Clear();

                }
            }
        }


        public static void createWordList()
        {
            foreach (string doc in docs)
            {
                wordlist = getWordList(wordlist, doc);
            }
        }

        public static List<string> getWordList(List<string> wordlist, string query)
        {
            Regex exp = new Regex("\\w+", RegexOptions.IgnoreCase);
            MatchCollection MCollection = exp.Matches(query);

            foreach (Match match in MCollection)
            {
                if (!wordlist.Contains(match.Value))
                {
                    wordlist.Add(match.Value);
                }
            }

            return wordlist;
        }

        public static void createVector()
        {
            double[] queryvector;

            for (int j = 0; j < docs.Length; j++)
            {
                queryvector = new double[wordlist.Count];

                for (int i = 0; i < wordlist.Count; i++)
                {

                    double tfIDF = getTF(docs[j], wordlist[i]) * getIDF(wordlist[i]);
                    queryvector[i] = tfIDF;
                }

                if (j == 0) //is it a query?
                {
                    DTVector.Add("Query", queryvector);

                }
                else
                {

                    DTVector.Add(j.ToString(), queryvector);
                }
            }
        }

        public static void classify()
        {
            double temp = 0.0;

            IDictionaryEnumerator _enumerator = DTVector.GetEnumerator();

            double[] queryvector = new double[wordlist.Count];

            Array.Copy((double[])DTVector["Query"], queryvector, wordlist.Count);

            while (_enumerator.MoveNext())
            {
                if (_enumerator.Key.ToString() != "Query")
                {
                    temp = cosinetheta(queryvector, (double[])_enumerator.Value);
                    if(temp != 0)
                    {
                        sortedList.Add(_enumerator.Key.ToString(), temp);
                    }
                }
            }
        }

        public static double dotproduct(double[] v1, double[] v2)
        {
            double product = 0.0;
            if (v1.Length == v2.Length)
            {
                for (int i = 0; i < v1.Length; i++)
                {
                    product += v1[i] * v2[i];
                }
            }
            return product;
        }

        public static double vectorlength(double[] vector)
        {
            double length = 0.0;
            for (int i = 0; i < vector.Length; i++)
            {
                length += Math.Pow(vector[i], 2);
            }

            return Math.Sqrt(length);
        }
        private static double getTF(string document, string term)
        {
            string[] queryTerms = Regex.Split(document, "\\s");
            double count = 0;


            foreach (string t in queryTerms)
            {
                if (t == term)
                {
                    count++;
                }
            }
            return count;

        }

        private static double getIDF(string term)
        {
            double df = 0.0;
            //get term frequency of all of the sentences except for the query
            for (int i = 1; i < docs.Length; i++)
            {
                if (docs[i].Contains(term))
                {
                    df++;
                }
            }

            //Get sentence count
            double D = docs.Length - 1; //excluding the query 

            double IDF = 0.0;

            if (df > 0)
            {
                IDF = Math.Log(D / df);
            }

            return IDF;
        }

        public static double cosinetheta(double[] v1, double[] v2)
        {
            double lengthV1 = vectorlength(v1);
            double lengthV2 = vectorlength(v2);

            double dotprod = dotproduct(v1, v2);
            if (lengthV1 != 0)
                return dotprod / (lengthV1 * lengthV2);
            else
                return 0;

        }
    }
}

请使此代码高效 [英] please make this code efficient

问题描述

推荐答案

相关文章

其他开发语言最新文章

热门教程

热门工具

登录关闭

请使此代码高效 [英] please make this code efficient

问题描述

推荐答案

相关文章

其他开发语言最新文章

热门教程

热门工具

登录 关闭

登录关闭