任何人都可以告诉我我能做些什么来加快速度吗? [英] Can anyone tell me of any optimations I could do to this to make it faster?

查看:87
本文介绍了任何人都可以告诉我我能做些什么来加快速度吗?的处理方法,对大家解决问题具有一定的参考价值,需要的朋友们下面随着小编来一起学习吧!

问题描述

我知道有很多方法可以让它快得多。任何

新闻阅读器都能在几秒钟内完成。我不知道他们是怎么做的,而且我对c#很新。如果有人知道更快的方式

,请告诉我。我正在做的就是查询数据库以获取某个组的所有标题,然后通过

查找每个帖子的所有部分。我只想要完成的那些

。意味着那个文件的所有段都是

张贴的。


使用System;

使用System.Collections;

使用System.Text;

使用MySql.Data;

使用System.Text.RegularExpressions;


命名空间createfiles

{

class program

{

static MySql.Data.MySqlClient.MySqlConnection conn
= new MySql.Data.MySqlClient.MySqlConnection();

static MySql.Data.MySqlClient.MySqlCommand cmd =

new MySql.Data.MySqlClient。 MySqlCommand();

静态字符串myConnectionString =" server =

127.0.0.1; uid = root; pwd = password; database = test;" ;;

静态ArrayList master;

静态字符串组;

静态字符串表;

static string [] groups = {

" alt.binaries.games.xbox"," alt.binaries.games.xbox360",

" alt.binaries.vcd" };

static Regex reg = new Regex(" \\。");

static Regex seg = new Regex(" \\( [0-9] * / [0-9] *

\\)",RegexOptions.IgnoreCase);

struct Header

{

公共字符串麻木;

公共字符串主题;

公共字符串日期;

公共字符串来自;

公共字符串msg_id;

公共字符串字节;

}


static void Main( string [] args)

{

for(int x = 1; x< 2; x ++)

{

table = reg.Replace(groups [x],"");

group = groups [x];

getheaders();

Console.WriteLine(有这么多标题

{0},master.Count);

标题一=(标题)主[ 0];

Console.WriteLine(" first one {0} {1}",

one.numb,one.subject);

find();

master.Clear();

}


}

st tic void getheaders()

{

conn.ConnectionString = myConnectionString;

conn.Open();

cmd.Connection = conn;

cmd.CommandText =" select * from" + table +"

其中主题如''%(%/%)%''";

MySql.Data.MySqlClient.MySqlDataReader reader;

reader = cmd.ExecuteReader();

标题h =新标题();

master = new ArrayList();

while(reader.Read())

{

h.numb = reader.GetValue(0).ToString();

h.subject = reader.GetValue(1).ToString();

h.from = reader.GetValue(2).ToString();

h.date = reader.GetValue(3).ToString();

h.msg_id = reader.GetValue(4).ToString();

h.bytes = reader.GetValue( 5).ToString();

master.Add(h);

}

reader.Close();

conn.Close();


}

static void find()

{

while(master.Count> 0)

{

标题开始=(标题)主文件[0];

master.RemoveAt (0);

匹配m = seg.Match(start.subject);

字符串segsplit = m.ToString();

segsplit = se gsplit.Replace("(","");

segsplit = segsplit.Replace(")","");

string [] segments = segsplit.Split(''/'');

int max = int.Parse(segments [1]);

max + = 1;

int counter = 1;

Header [] found = new Header [max];

string testsubject = seg.Replace

(start.subject,"");

int index = int.Parse(segments [0]);

// int temp = master .Count;

if(index< max)

{

found [index] = start;

for(int x = 0; x< master.Count; x ++)

{

页眉测试=(页眉)大师[x];

if(test.subject.Contains

(testsubject))

{

//master.Remove(test);

master.RemoveAt(x);

x = x - 1;

匹配t = seg.Match

(test.subject);

string tsplit = t。 ToString();

string tsegsplit =

tsplit.Replace("(","");

tsegsplit = tsegsplit .Replace

(")","");

string [] tsegments =

tsegsplit.Split('' /'');

index = int.Parse(tsegments

[0]);

//Console.WriteLine(counter);

if(index< max)

{

found [index] = test;

counter ++; < br $>
}

}


}

//Console.WriteLine("counter = {0 }",

counter);

in t testmax = max-1;

if(counter == testmax)

{

master.TrimToSize();

Console.WriteLine(我们有一个匹配

{0},找到[1] .subject);

}

}

}

}


}

}

-

--------------------------------------- -------

发布于NewsLeecher v3.0 Final

* Binary Usenet Leeching Made

* http://www.newsleecher.com/?usenet

- --------------------------------------------

I know there are ways to make this a lot faster. Any
newsreader does this in seconds. I don''t know how they do
it and I am very new to c#. If anyone knows a faster way
please let me know. All I am doing is quering the db for
all the headers for a certain group and then going through
them to find all the parts of each post. I only want ones
that are complete. Meaning all segments for that one file
posted are there.

using System;
using System.Collections;
using System.Text;
using MySql.Data;
using System.Text.RegularExpressions;

namespace createfiles
{
class Program
{
static MySql.Data.MySqlClient.MySqlConnection conn
= new MySql.Data.MySqlClient.MySqlConnection();
static MySql.Data.MySqlClient.MySqlCommand cmd =
new MySql.Data.MySqlClient.MySqlCommand();
static string myConnectionString = "server=
127.0.0.1;uid=root;pwd=password;database=test;";
static ArrayList master;
static string group;
static string table;
static string[] groups = {
"alt.binaries.games.xbox", "alt.binaries.games.xbox360",
"alt.binaries.vcd" };
static Regex reg = new Regex("\\.");
static Regex seg = new Regex("\\([0-9]*/[0-9]*
\\)",RegexOptions.IgnoreCase);
struct Header
{
public string numb;
public string subject;
public string date;
public string from;
public string msg_id;
public string bytes;
}

static void Main(string[] args)
{
for (int x = 1; x < 2; x++)
{
table = reg.Replace(groups[x], "");
group = groups[x];
getheaders();
Console.WriteLine("Have this many headers
{0}", master.Count);
Header one = (Header)master[0];
Console.WriteLine("first one {0} {1}",
one.numb, one.subject);
find();
master.Clear();
}

}
static void getheaders()
{
conn.ConnectionString = myConnectionString;
conn.Open();
cmd.Connection = conn;
cmd.CommandText = "select * from " + table + "
where subject like ''%(%/%)%''";
MySql.Data.MySqlClient.MySqlDataReader reader;
reader = cmd.ExecuteReader();
Header h = new Header();
master = new ArrayList();
while (reader.Read())
{
h.numb = reader.GetValue(0).ToString();
h.subject = reader.GetValue(1).ToString();
h.from = reader.GetValue(2).ToString();
h.date = reader.GetValue(3).ToString();
h.msg_id = reader.GetValue(4).ToString();
h.bytes = reader.GetValue(5).ToString();
master.Add(h);
}
reader.Close();
conn.Close();

}
static void find()
{
while (master.Count > 0)
{
Header start = (Header)master[0];
master.RemoveAt(0);
Match m = seg.Match(start.subject);
string segsplit = m.ToString();
segsplit = segsplit.Replace("(", "");
segsplit = segsplit.Replace(")", "");
string[] segments = segsplit.Split(''/'');
int max = int.Parse(segments[1]);
max += 1;
int counter = 1;
Header[] found = new Header[max];
string testsubject = seg.Replace
(start.subject, "");
int index = int.Parse(segments[0]);
//int temp = master.Count;
if (index < max)
{
found[index] = start;
for (int x = 0; x < master.Count; x++)
{
Header test = (Header)master[x];
if (test.subject.Contains
(testsubject))
{
//master.Remove(test);
master.RemoveAt(x);
x = x - 1;
Match t = seg.Match
(test.subject);
string tsplit = t.ToString();
string tsegsplit =
tsplit.Replace("(", "");
tsegsplit = tsegsplit.Replace
(")", "");
string[] tsegments =
tsegsplit.Split(''/'');
index = int.Parse(tsegments
[0]);
//Console.WriteLine(counter);
if (index < max)
{
found[index] = test;
counter++;
}
}

}
//Console.WriteLine("counter = {0}",
counter);
int testmax = max-1;
if (counter == testmax)
{
master.TrimToSize();
Console.WriteLine("We Have a Match
{0}", found[1].subject);
}
}
}
}

}
}
--
----------------------------------------------
Posted with NewsLeecher v3.0 Final
* Binary Usenet Leeching Made Easy
* http://www.newsleecher.com/?usenet
----------------------------------------------

推荐答案

最极端,


我可以看到你在这里做的一些事情。


首先,我不得不问你的数据库结构。你是

将不同的标题存储在不同的表中,并使用组的名称

作为表格。我不知道这一定是个好主意。

的原因是所有的表共享相同的结构,并且它们都是相关的,只有区别于消息的消息才是他们的消息。 b $ b在。


因此,我认为你应该有一个单独的表格,其中包含

消息,并添加一个列

消息所在的组的名称。当然,消息可以在多个组中(因为

的交叉发布)。在这种情况下,您将拥有另一个表格,其中包含一个组ID,以及该消息的表格名称为
in。执行此操作,然后你会在主表中有一条记录,其中包含

的消息详情,以及另一张表格,其中列出了

消息所在的组。


这样做也可以修复代码中的错误。您从表中的组名中删除了

的句点。这会带来

以下情况。假设你可以有两组:


alt.my.stuff

alt.mystuff


在你的算法,它们以相同的方式处理,并且在同一个

表中。在MySql中,您应该能够使用某种转义机制来允许表格中的
允许句点(类似于SQL中的方括号

Server)。


继续,我不会使用正则表达式执行基本的

替换功能。我会在

上使用Replace方法来执行此操作。我想你会发现这个更快。

同样适用于查找字符串(您在主题上匹配),以及

分割功能。所有这些都是在字符串类上提供的,并且因为你没有使用通配符或模式,因此没有理由使用

正则表达式类。


从数据阅读器读取时,您不必调用ToString。你

可以直接将结果转换成字符串。


最后,我建议你选择所有来自
$ b $的所有消息b组立即出组,然后按顺序处理它们。您可以按组名对

结果进行排序,然后对其进行处理。这将使您免于必须重复访问数据库。


希望有帮助。

-

- Nicholas Paldino [.NET / C#MVP]

- mv*@spam.guard .caspershouse.com


极端 <实例******* @ extremest.com>在消息中写道

news:mc ********************* @ fe01.usenetserver.com ...
Extremest,

There are a few things I can see you doing here.

First though, I have to ask about your database structure. You are
storing the different headers in different tables with the name of the group
as the table. I don''t know that this is necessarily a good idea. The
reason is that all of the tables share the same structure, and they are all
related, the only thing differentiating messages being the group that they
are in.

Because of that, I think that you should have one single table with
messages in them, and add a column which has the name of the group that the
message is in. Of course, the message could be in multiple groups (because
of crossposting). In this case, you would have another table which would
have a group id in it, as well as the name of the table that the message was
in. Doing this, you would then have a record in the main table which had
the message details, as well as another table saying which groups the
message was in.

Doing it like this also fixes an error in your code. You were removing
the periods from the group names in your tables. This brings up the
following situation. Hypothetically, you could have two groups:

alt.my.stuff
alt.mystuff

In your algorithm, they are treated the same way, and are in the same
table. In MySql, you should be able to use some sort of escape mechanism to
allow periods in your table names (something like square brackets in SQL
Server).

Moving on, I would not use regular expressions to perform basic
replacement functions as you are doing. I would use the Replace method on
the string class to do this. I think you will find this MUCH faster. The
same goes for the finding of a string (you match on the subject), as well as
the split functionality. All of this is offered on the string class, and
since you are not using wildcards or patterns, there is no reason to use the
regular expression classes.

When reading from the data reader, you don''t have to call ToString. You
can cast the results to string directly.

Finally, I would recommend selecting out all of the messages from all of
the groups out at once, then processing them in order. You can sort the
results by group name, and then process them. This will save you from
having to make repeat trips to the database.

Hope ths helps.
--
- Nicholas Paldino [.NET/C# MVP]
- mv*@spam.guard.caspershouse.com

"Extremest" <Ex*******@extremest.com> wrote in message
news:mc*********************@fe01.usenetserver.com ...
我知道有很多方法可以让它快得多。任何
新闻阅读器都能在几秒钟内完成。我不知道他们是怎么做的,而且我对c#很新。如果有人知道更快的方式
请告诉我。我正在做的就是查询数据库以获取某个组的所有标题,然后通过
查找每个帖子的所有部分。我只想要那些完整的。意味着发布了那个文件的所有部分


使用System;
使用System.Collections;
使用System.Text;
使用MySql .Data;
使用System.Text.RegularExpressions;

命名空间createfiles
{
类程序
{静态MySql.Data.MySqlClient .MySqlConnection conn
= new MySql.Data.MySqlClient.MySqlConnection();
static MySql.Data.MySqlClient.MySqlCommand cmd =
new MySql.Data.MySqlClient.MySqlCommand();
static string myConnectionString =" server =
127.0.0.1; uid = root; pwd = password; database = test;" ;;
static ArrayList master;
静态字符串组;
静态字符串表;
static string [] groups = {
" alt.binaries.games.xbox"," alt.binaries.games.xbox360",
" alt.binaries.vcd"静态正则表达式reg =新正则表达式(" \\。");
静态正则表达式seg =新正则表达式(" \\([0-9] * / [ 0-9] *
\\)",RegexOptions.IgnoreCase);
struct Header
{
公共字符串麻木;
公共字符串主题;
公共字符串日期;
公共字符串来自;
公共字符串msg_id;
公共字符串字节;
}

static void Main(string [ ] args)
{
for(int x = 1; x< 2; x ++)
{
table = reg.Replace(groups [x],"" );
group = groups [x];
getheaders();
Console.WriteLine(有这么多标题
{0},master.Count);
标题一=(标题)主[0];
Console.WriteLine(第一个{0} {1},
one.numb,o ne.subject);
find();
master.Clear();
}

}
静态void getheaders()
{
conn.ConnectionString = myConnectionString;
conn.Open();
cmd.Connection = conn;
cmd.CommandText =" select * from" + table +"
其中主题如''%(%/%)%''";
MySql.Data.MySqlClient.MySqlDataReader reader;
reader = cmd.ExecuteReader() ;
标题h =新标题();
master = new ArrayList();
while(reader.Read())
{
h.numb = reader .GetValue(0).ToString();
h.subject = reader.GetValue(1).ToString();
h.from = reader.GetValue(2).ToString();
h.date = reader.GetValue(3).ToString();
h.msg_id = reader.GetValue(4).ToString();
h.bytes = reader.GetValue(5) .ToString();
master.Add(h);
}
reader.Close();
conn.Close();

}
static void find()
{while /(master.Count> 0)
{
Header start =(Header)master [0];
master.RemoveAt(0);
匹配m = seg.Match(start.subject);
字符串segsplit = m.ToString();
segsplit = segsplit.Replace( "(","");
segsplit = segsplit.Replace(")","");
string [] segments = segsplit.Split(''/ '');
int max = int.Parse(segments [1]);
max + = 1;
int counter = 1;
Header [] found = new Header [max];
string testsubject = seg.Replace
(start.subject,"");
int index = int.Parse(segments [0]);
// int temp = master.Count;
if(index< max)
{
发现[index] = start;
for(int x = 0; x< master.Count; x ++)
{
标题测试= (Header)master [x];
if(test.subject.Contains
(testsubject))
//
//master.Remove(test);
master .RemoveAt(x);
x = x - 1;
匹配t = seg.Match
(test.subject);
string tsplit = t.ToString();
string tsegsplit =
tsplit.Replace("(","");
tsegsplit = tsegsplit.Replace
(")","" );
string [] tsegments =
tsegsplit.Split(''/'');
index = int.Parse(tsegments
[0]);
if(index< max)
{
发现[index] = test;
counter ++;
}
}

}
//控制台.WriteLine(" counter = {0}",
counter);
int testmax = max-1;
if(counter == testmax)
{
master.TrimToSize();
Console.WriteLine(我们有匹配
{0},找到[1] .subject);
}
}
}
}

}
}
-
----------------- -----------------------------
发布于NewsLeecher v3.0 Final
* Binary Usenet Leeching Made Easy
* http://www.newsleecher.com/?usenet
-------- --------------------------------------
I know there are ways to make this a lot faster. Any
newsreader does this in seconds. I don''t know how they do
it and I am very new to c#. If anyone knows a faster way
please let me know. All I am doing is quering the db for
all the headers for a certain group and then going through
them to find all the parts of each post. I only want ones
that are complete. Meaning all segments for that one file
posted are there.

using System;
using System.Collections;
using System.Text;
using MySql.Data;
using System.Text.RegularExpressions;

namespace createfiles
{
class Program
{
static MySql.Data.MySqlClient.MySqlConnection conn
= new MySql.Data.MySqlClient.MySqlConnection();
static MySql.Data.MySqlClient.MySqlCommand cmd =
new MySql.Data.MySqlClient.MySqlCommand();
static string myConnectionString = "server=
127.0.0.1;uid=root;pwd=password;database=test;";
static ArrayList master;
static string group;
static string table;
static string[] groups = {
"alt.binaries.games.xbox", "alt.binaries.games.xbox360",
"alt.binaries.vcd" };
static Regex reg = new Regex("\\.");
static Regex seg = new Regex("\\([0-9]*/[0-9]*
\\)",RegexOptions.IgnoreCase);
struct Header
{
public string numb;
public string subject;
public string date;
public string from;
public string msg_id;
public string bytes;
}

static void Main(string[] args)
{
for (int x = 1; x < 2; x++)
{
table = reg.Replace(groups[x], "");
group = groups[x];
getheaders();
Console.WriteLine("Have this many headers
{0}", master.Count);
Header one = (Header)master[0];
Console.WriteLine("first one {0} {1}",
one.numb, one.subject);
find();
master.Clear();
}

}
static void getheaders()
{
conn.ConnectionString = myConnectionString;
conn.Open();
cmd.Connection = conn;
cmd.CommandText = "select * from " + table + "
where subject like ''%(%/%)%''";
MySql.Data.MySqlClient.MySqlDataReader reader;
reader = cmd.ExecuteReader();
Header h = new Header();
master = new ArrayList();
while (reader.Read())
{
h.numb = reader.GetValue(0).ToString();
h.subject = reader.GetValue(1).ToString();
h.from = reader.GetValue(2).ToString();
h.date = reader.GetValue(3).ToString();
h.msg_id = reader.GetValue(4).ToString();
h.bytes = reader.GetValue(5).ToString();
master.Add(h);
}
reader.Close();
conn.Close();

}
static void find()
{
while (master.Count > 0)
{
Header start = (Header)master[0];
master.RemoveAt(0);
Match m = seg.Match(start.subject);
string segsplit = m.ToString();
segsplit = segsplit.Replace("(", "");
segsplit = segsplit.Replace(")", "");
string[] segments = segsplit.Split(''/'');
int max = int.Parse(segments[1]);
max += 1;
int counter = 1;
Header[] found = new Header[max];
string testsubject = seg.Replace
(start.subject, "");
int index = int.Parse(segments[0]);
//int temp = master.Count;
if (index < max)
{
found[index] = start;
for (int x = 0; x < master.Count; x++)
{
Header test = (Header)master[x];
if (test.subject.Contains
(testsubject))
{
//master.Remove(test);
master.RemoveAt(x);
x = x - 1;
Match t = seg.Match
(test.subject);
string tsplit = t.ToString();
string tsegsplit =
tsplit.Replace("(", "");
tsegsplit = tsegsplit.Replace
(")", "");
string[] tsegments =
tsegsplit.Split(''/'');
index = int.Parse(tsegments
[0]);
//Console.WriteLine(counter);
if (index < max)
{
found[index] = test;
counter++;
}
}

}
//Console.WriteLine("counter = {0}",
counter);
int testmax = max-1;
if (counter == testmax)
{
master.TrimToSize();
Console.WriteLine("We Have a Match
{0}", found[1].subject);
}
}
}
}

}
}
--
----------------------------------------------
Posted with NewsLeecher v3.0 Final
* Binary Usenet Leeching Made Easy
* http://www.newsleecher.com/?usenet
----------------------------------------------



它抓取标题的表是临时的。我还没有写完其余的编程。它会从db

中删除单个帖子完成的标题。此外,我只做特定的

组,因此这段时间的部分还不是问题。稍后重做那个

主要是想让这个目前更快地运行。现在每个表中至少有一百万个标题,如果我只从其中一个中拉出
它将需要大约500mems的ram和关于

与VM相同。就正则表达式而言,我不确定你的意思。这是

在主题中找到每个帖子独有的模式并且大小不一。

。如果有办法让它更好,请告诉我。


Nicholas Paldino [.NET / C#MVP]写道:
the tables that it grabs the headers from is temporary. I don''t have
the rest of the prog wrote yet. it will remove the headers from the db
that are complete for a single post. Also I am only doing specific
groups so that part on the periods is not an issue yet. Will redo that
later mainly just want to get this to work faster at the moment. There
are at least 1 million headers in each table right now if I just pull
from one of them it will take up around 500megs of ram and about the
same for VM. As far as the regex I am not sure what you mean. It is
finding a pattern in the subjects that are unique to each post and vary
in size. If there is a way to make that better please tell me.

Nicholas Paldino [.NET/C# MVP] wrote:
最极端,

我可以看到你在这里做的一些事情。

首先,我要询问你的数据库结构。您将不同的标题存储在不同的表中,并将组的名称作为表格存储。我不知道这一定是个好主意。原因是所有表都具有相同的结构,并且它们都是相关的,唯一区分消息的是它们所属的组。
消息的单个表,并添加一个具有
消息所在组的名称的列。当然,消息可能是多个组(因为交叉插入)。在这种情况下,您将拥有另一个表格,其中包含一个组ID,以及该消息所在的表格的名称。这样做,您将有一个记录主表有消息详细信息,另外还有一个表格,说明
消息所在的组。

这样做也可以修复代码中的错误。您正在从表中的组名中删除
期间。这带来了以下情况。假设你可以有两组:

alt.my.stuff
alt.mystuff

在你的算法中,它们的处理方式相同,并且在相同的表。在MySql中,您应该能够使用某种转义机制来允许表名中的句点(如SQL
Server中的方括号)。

继续,我不会像你一样使用正则表达式来执行基本的
替换功能。我会在字符串类上使用Replace方法来执行此操作。我想你会发现这个更快。
同样适用于查找字符串(您在主题上匹配),以及分割功能。所有这些都是在字符串类中提供的,并且由于您没有使用通配符或模式,因此没有理由使用
正则表达式类。

数据阅读器,你不必调用ToString。您可以直接将结果转换为字符串。

最后,我建议立即从所有组中选出所有消息,然后按顺序处理它们。您可以按组名对结果进行排序,然后对其进行处理。这将使您免于重复访问数据库。

希望有所帮助。

-
- Nicholas Paldino [.NET / C#MVP]
- mv*@spam.guard.caspershouse.com

"最极端" <实例******* @ extremest.com>在消息中写道
新闻:mc ********************* @ fe01.usenetserver.com ...
Extremest,

There are a few things I can see you doing here.

First though, I have to ask about your database structure. You are
storing the different headers in different tables with the name of the group
as the table. I don''t know that this is necessarily a good idea. The
reason is that all of the tables share the same structure, and they are all
related, the only thing differentiating messages being the group that they
are in.

Because of that, I think that you should have one single table with
messages in them, and add a column which has the name of the group that the
message is in. Of course, the message could be in multiple groups (because
of crossposting). In this case, you would have another table which would
have a group id in it, as well as the name of the table that the message was
in. Doing this, you would then have a record in the main table which had
the message details, as well as another table saying which groups the
message was in.

Doing it like this also fixes an error in your code. You were removing
the periods from the group names in your tables. This brings up the
following situation. Hypothetically, you could have two groups:

alt.my.stuff
alt.mystuff

In your algorithm, they are treated the same way, and are in the same
table. In MySql, you should be able to use some sort of escape mechanism to
allow periods in your table names (something like square brackets in SQL
Server).

Moving on, I would not use regular expressions to perform basic
replacement functions as you are doing. I would use the Replace method on
the string class to do this. I think you will find this MUCH faster. The
same goes for the finding of a string (you match on the subject), as well as
the split functionality. All of this is offered on the string class, and
since you are not using wildcards or patterns, there is no reason to use the
regular expression classes.

When reading from the data reader, you don''t have to call ToString. You
can cast the results to string directly.

Finally, I would recommend selecting out all of the messages from all of
the groups out at once, then processing them in order. You can sort the
results by group name, and then process them. This will save you from
having to make repeat trips to the database.

Hope ths helps.
--
- Nicholas Paldino [.NET/C# MVP]
- mv*@spam.guard.caspershouse.com

"Extremest" <Ex*******@extremest.com> wrote in message
news:mc*********************@fe01.usenetserver.com ...
我知道有很多方法可以让它快得多。任何
新闻阅读器都能在几秒钟内完成。我不知道他们是怎么做的,而且我对c#很新。如果有人知道更快的方式
请告诉我。我正在做的就是查询数据库以获取某个组的所有标题,然后通过
查找每个帖子的所有部分。我只想要那些完整的。意味着发布了那个文件的所有部分


使用System;
使用System.Collections;
使用System.Text;
使用MySql .Data;
使用System.Text.RegularExpressions;

命名空间createfiles
{
类程序
{静态MySql.Data.MySqlClient .MySqlConnection conn
= new MySql.Data.MySqlClient.MySqlConnection();
static MySql.Data.MySqlClient.MySqlCommand cmd =
new MySql.Data.MySqlClient.MySqlCommand();
static string myConnectionString =" server =
127.0.0.1; uid = root; pwd = password; database = test;" ;;
static ArrayList master;
静态字符串组;
静态字符串表;
static string [] groups = {
" alt.binaries.games.xbox"," alt.binaries.games.xbox360",
" alt.binaries.vcd"静态正则表达式reg =新正则表达式(" \\。");
静态正则表达式seg =新正则表达式(" \\([0-9] * / [ 0-9] *
\\)",RegexOptions.IgnoreCase);
struct Header
{
公共字符串麻木;
公共字符串主题;
公共字符串日期;
公共字符串来自;
公共字符串msg_id;
公共字符串字节;
}

static void Main(string [ ] args)
{
for(int x = 1; x< 2; x ++)
{
table = reg.Replace(groups [x],"" );
group = groups [x];
getheaders();
Console.WriteLine(有这么多标题
{0},master.Count);
标题一=(标题)主[0];
Console.WriteLine(第一个{0} {1},
one.numb,o ne.subject);
find();
master.Clear();
}

}
静态void getheaders()
{
conn.ConnectionString = myConnectionString;
conn.Open();
cmd.Connection = conn;
cmd.CommandText =" select * from" + table +"
其中主题如''%(%/%)%''";
MySql.Data.MySqlClient.MySqlDataReader reader;
reader = cmd.ExecuteReader() ;
标题h =新标题();
master = new ArrayList();
while(reader.Read())
{
h.numb = reader .GetValue(0).ToString();
h.subject = reader.GetValue(1).ToString();
h.from = reader.GetValue(2).ToString();
h.date = reader.GetValue(3).ToString();
h.msg_id = reader.GetValue(4).ToString();
h.bytes = reader.GetValue(5) .ToString();
master.Add(h);
}
reader.Close();
conn.Close();

}
static void find()
{while /(master.Count> 0)
{
Header start =(Header)master [0];
master.RemoveAt(0);
匹配m = seg.Match(start.subject);
字符串segsplit = m.ToString();
segsplit = segsplit.Replace( "(","");
segsplit = segsplit.Replace(")","");
string [] segments = segsplit.Split(''/ '');
int max = int.Parse(segments [1]);
max + = 1;
int counter = 1;
Header [] found = new Header [max];
string testsubject = seg.Replace
(start.subject,"");
int index = int.Parse(segments [0]);
// int temp = master.Count;
if(index< max)
{
发现[index] = start;
for(int x = 0; x< master.Count; x ++)
{
标题测试= (Header)master [x];
if(test.subject.Contains
(testsubject))
//
//master.Remove(test);
master .RemoveAt(x);
x = x - 1;
匹配t = seg.Match
(test.subject);
string tsplit = t.ToString();
string tsegsplit =
tsplit.Replace("(","");
tsegsplit = tsegsplit.Replace
(")","" );
string [] tsegments =
tsegsplit.Split(''/'');
index = int.Parse(tsegments
[0]);
if(index< max)
{
发现[index] = test;
counter ++;
}
}

}
//控制台.WriteLine(" counter = {0}",
counter);
int testmax = max-1;
if(counter == testmax)
{
master.TrimToSize();
Console.WriteLine(我们有匹配
{0},找到[1] .subject);
}
}
}
}

}
}
-
----------------- -----------------------------
发布于NewsLeecher v3.0 Final
* Binary Usenet Leeching Made Easy
* http://www.newsleecher.com/?usenet
-------- --------------------------------------
I know there are ways to make this a lot faster. Any
newsreader does this in seconds. I don''t know how they do
it and I am very new to c#. If anyone knows a faster way
please let me know. All I am doing is quering the db for
all the headers for a certain group and then going through
them to find all the parts of each post. I only want ones
that are complete. Meaning all segments for that one file
posted are there.

using System;
using System.Collections;
using System.Text;
using MySql.Data;
using System.Text.RegularExpressions;

namespace createfiles
{
class Program
{
static MySql.Data.MySqlClient.MySqlConnection conn
= new MySql.Data.MySqlClient.MySqlConnection();
static MySql.Data.MySqlClient.MySqlCommand cmd =
new MySql.Data.MySqlClient.MySqlCommand();
static string myConnectionString = "server=
127.0.0.1;uid=root;pwd=password;database=test;";
static ArrayList master;
static string group;
static string table;
static string[] groups = {
"alt.binaries.games.xbox", "alt.binaries.games.xbox360",
"alt.binaries.vcd" };
static Regex reg = new Regex("\\.");
static Regex seg = new Regex("\\([0-9]*/[0-9]*
\\)",RegexOptions.IgnoreCase);
struct Header
{
public string numb;
public string subject;
public string date;
public string from;
public string msg_id;
public string bytes;
}

static void Main(string[] args)
{
for (int x = 1; x < 2; x++)
{
table = reg.Replace(groups[x], "");
group = groups[x];
getheaders();
Console.WriteLine("Have this many headers
{0}", master.Count);
Header one = (Header)master[0];
Console.WriteLine("first one {0} {1}",
one.numb, one.subject);
find();
master.Clear();
}

}
static void getheaders()
{
conn.ConnectionString = myConnectionString;
conn.Open();
cmd.Connection = conn;
cmd.CommandText = "select * from " + table + "
where subject like ''%(%/%)%''";
MySql.Data.MySqlClient.MySqlDataReader reader;
reader = cmd.ExecuteReader();
Header h = new Header();
master = new ArrayList();
while (reader.Read())
{
h.numb = reader.GetValue(0).ToString();
h.subject = reader.GetValue(1).ToString();
h.from = reader.GetValue(2).ToString();
h.date = reader.GetValue(3).ToString();
h.msg_id = reader.GetValue(4).ToString();
h.bytes = reader.GetValue(5).ToString();
master.Add(h);
}
reader.Close();
conn.Close();

}
static void find()
{
while (master.Count > 0)
{
Header start = (Header)master[0];
master.RemoveAt(0);
Match m = seg.Match(start.subject);
string segsplit = m.ToString();
segsplit = segsplit.Replace("(", "");
segsplit = segsplit.Replace(")", "");
string[] segments = segsplit.Split(''/'');
int max = int.Parse(segments[1]);
max += 1;
int counter = 1;
Header[] found = new Header[max];
string testsubject = seg.Replace
(start.subject, "");
int index = int.Parse(segments[0]);
//int temp = master.Count;
if (index < max)
{
found[index] = start;
for (int x = 0; x < master.Count; x++)
{
Header test = (Header)master[x];
if (test.subject.Contains
(testsubject))
{
//master.Remove(test);
master.RemoveAt(x);
x = x - 1;
Match t = seg.Match
(test.subject);
string tsplit = t.ToString();
string tsegsplit =
tsplit.Replace("(", "");
tsegsplit = tsegsplit.Replace
(")", "");
string[] tsegments =
tsegsplit.Split(''/'');
index = int.Parse(tsegments
[0]);
//Console.WriteLine(counter);
if (index < max)
{
found[index] = test;
counter++;
}
}

}
//Console.WriteLine("counter = {0}",
counter);
int testmax = max-1;
if (counter == testmax)
{
master.TrimToSize();
Console.WriteLine("We Have a Match
{0}", found[1].subject);
}
}
}
}

}
}
--
----------------------------------------------
Posted with NewsLeecher v3.0 Final
* Binary Usenet Leeching Made Easy
* http://www.newsleecher.com/?usenet
----------------------------------------------






关于正则表达式,为什么不在

字符串类上使用IndexOf方法呢?你使用正则表达式获得了什么?正则表达式

表现无疑会更慢(以及分割

操作)。


-

- Nicholas Paldino [.NET / C#MVP]

- mv * @ spam .guard.caspershouse.com


< dn ********** @ charter.net>在消息中写道

news:11 ********************* @ c74g2000cwc.googlegro ups.com ...
In regards to the regex, why not just use the IndexOf method on the
string class? What are you gaining from using a regex? The regex
performance is undoubtedly going to be slower (as well as the split
operation as well).

--
- Nicholas Paldino [.NET/C# MVP]
- mv*@spam.guard.caspershouse.com

<dn**********@charter.net> wrote in message
news:11*********************@c74g2000cwc.googlegro ups.com...
它抓取标题的表是临时的。我没有
其余的编写了。它将删除db
中为单个帖子完成的标题。此外,我只是在做特定的
小组,所以这个时期的部分还不是问题。稍后重做,主要是想让这个更快地工作。现在每个表中至少有100万个标题如果我从其中一个中拉出它将占用大约500mems的RAM并且对于VM来说也是如此。就正则表达式而言,我不确定你的意思。它是在主题中找到每个帖子独有的模式并且大小不一。如果有办法让它更好,请告诉我。

Nicholas Paldino [.NET / C#MVP]写道:
the tables that it grabs the headers from is temporary. I don''t have
the rest of the prog wrote yet. it will remove the headers from the db
that are complete for a single post. Also I am only doing specific
groups so that part on the periods is not an issue yet. Will redo that
later mainly just want to get this to work faster at the moment. There
are at least 1 million headers in each table right now if I just pull
from one of them it will take up around 500megs of ram and about the
same for VM. As far as the regex I am not sure what you mean. It is
finding a pattern in the subjects that are unique to each post and vary
in size. If there is a way to make that better please tell me.

Nicholas Paldino [.NET/C# MVP] wrote:
最极端,

我可以看到你在这里做的一些事情。

首先,我不得不询问你的数据库结构。您将不同的标题存储在不同的表中,并将
组的名称作为表格存储。我不知道这一定是个好主意。原因是所有表都具有相同的结构,并且它们都是相关的,唯一区分消息的是它们是
的组。因此,我认为你应该有一个单独的表格,其中包含
消息,并添加一个具有该组名称的列
消息是在。当然,消息可以在多个组中
(因为交叉发布)。在这种情况下,您将拥有另一个表格,其中包含一个组ID,以及该消息
所在表格的名称。这样做,您就可以了在主表中有一条记录,其中有消息详细信息,还有另一张表说明了
消息所在的组。

这样做也可以修复错误在你的代码中。您从表中的组名中删除了句点。这带来了以下情况。假设你可以有两组:

alt.my.stuff
alt.mystuff

在你的算法中,它们的处理方式相同,并且在相同的表。在MySql中,您应该能够使用某种转义机制来确定表名中的句点(如SQL
Server中的方括号)。
继续,我不会像你一样使用正则表达式来执行基本的
替换功能。我会在字符串类上使用Replace方法
来执行此操作。我想你会发现这个更快。

同样适用于找到一个字符串(你在主题上匹配),以及
分裂功能。所有这些都是在字符串类中提供的,并且因为你没有使用通配符或模式,所以没有理由使用

正则表达式类。
从数据读取器读取时,您不必调用ToString。
您可以直接将结果转换为字符串。

最后,我建议选择所有来自所有组的消息,然后按顺序处理它们。您可以按组名对结果进行排序,然后对其进行处理。这将使您免于重复访问数据库。

希望有所帮助。

-
- Nicholas Paldino [.NET / C#MVP]
- mv*@spam.guard.caspershouse.com

"最极端" <实例******* @ extremest.com>在消息中写道
新闻:mc ********************* @ fe01.usenetserver.com ...
Extremest,

There are a few things I can see you doing here.

First though, I have to ask about your database structure. You are
storing the different headers in different tables with the name of the
group
as the table. I don''t know that this is necessarily a good idea. The
reason is that all of the tables share the same structure, and they are
all
related, the only thing differentiating messages being the group that
they
are in.

Because of that, I think that you should have one single table with
messages in them, and add a column which has the name of the group that
the
message is in. Of course, the message could be in multiple groups
(because
of crossposting). In this case, you would have another table which would
have a group id in it, as well as the name of the table that the message
was
in. Doing this, you would then have a record in the main table which had
the message details, as well as another table saying which groups the
message was in.

Doing it like this also fixes an error in your code. You were
removing
the periods from the group names in your tables. This brings up the
following situation. Hypothetically, you could have two groups:

alt.my.stuff
alt.mystuff

In your algorithm, they are treated the same way, and are in the same
table. In MySql, you should be able to use some sort of escape mechanism
to
allow periods in your table names (something like square brackets in SQL
Server).

Moving on, I would not use regular expressions to perform basic
replacement functions as you are doing. I would use the Replace method
on
the string class to do this. I think you will find this MUCH faster.
The
same goes for the finding of a string (you match on the subject), as well
as
the split functionality. All of this is offered on the string class, and
since you are not using wildcards or patterns, there is no reason to use
the
regular expression classes.

When reading from the data reader, you don''t have to call ToString.
You
can cast the results to string directly.

Finally, I would recommend selecting out all of the messages from all
of
the groups out at once, then processing them in order. You can sort the
results by group name, and then process them. This will save you from
having to make repeat trips to the database.

Hope ths helps.
--
- Nicholas Paldino [.NET/C# MVP]
- mv*@spam.guard.caspershouse.com

"Extremest" <Ex*******@extremest.com> wrote in message
news:mc*********************@fe01.usenetserver.com ...
>我知道有很多方法可以让它快得多。任何
>新闻阅读器在几秒钟内完成。我不知道他们是怎么做的
>它和我对c#很新。如果有人知道更快的方式
>请告诉我。我正在做的就是查询数据库
>某个组的所有标题然后通过
>他们找到每个帖子的所有部分。我只想要一些
>这是完整的。 Meaning all segments for that one file
> posted are there.
>
> using System;
> using System.Collections;
> using System.Text;
> using MySql.Data;
> using System.Text.RegularExpressions;
>
> namespace createfiles
> {
> class Program
> {
> static MySql.Data.MySqlClient.MySqlConnection conn
> = new MySql.Data.MySqlClient.MySqlConnection();
> static MySql.Data.MySqlClient.MySqlCommand cmd =
> new MySql.Data.MySqlClient.MySqlCommand();
> static string myConnectionString = "server=
> 127.0.0.1;uid=root;pwd=password;database=test;";
> static ArrayList master;
> static string group;
> static string table;
> static string[] groups = {
> "alt.binaries.games.xbox", "alt.binaries.games.xbox360",
> "alt.binaries.vcd" };
> static Regex reg = new Regex("\\.");
> static Regex seg = new Regex("\\([0-9]*/[0-9]*
> \\)",RegexOptions.IgnoreCase);
> struct Header
> {
> public string numb;
> public string subject;
> public string date;
> public string from;
> public string msg_id;
> public string bytes;
> }
>
> static void Main(string[] args)
> {
> for (int x = 1; x < 2; x++)
> {
> table = reg.Replace(groups[x], "");
> group = groups[x];
> getheaders();
> Console.WriteLine("Have this many headers
> {0}", master.Count);
> Header one = (Header)master[0];
> Console.WriteLine("first one {0} {1}",
> one.numb, one.subject);
> find();
> master.Clear();
> }
>
> }
> static void getheaders()
> {
> conn.ConnectionString = myConnectionString;
> conn.Open();
> cmd.Connection = conn;
> cmd.CommandText = "select * from " + table + "
> where subject like ’’%(%/%)%’’";
> MySql.Data.MySqlClient.MySqlDataReader reader;
> reader = cmd.ExecuteReader();
> Header h = new Header();
> master = new ArrayList();
> while (reader.Read())
> {
> h.numb = reader.GetValue(0).ToString();
> h.subject = reader.GetValue(1).ToString();
> h.from = reader.GetValue(2).ToString();
> h.date = reader.GetValue(3).ToString();
> h.msg_id = reader.GetValue(4).ToString();
> h.bytes = reader.GetValue(5).ToString();
> master.Add(h);
> }
> reader.Close();
> conn.Close();
>
> }
> static void find()
> {
> while (master.Count > 0)
> {
> Header start = (Header)master[0];
> master.RemoveAt(0);
> Match m = seg.Match(start.subject);
> string segsplit = m.ToString();
> segsplit = segsplit.Replace("(", "");
> segsplit = segsplit.Replace(")", "");
> string[] segments = segsplit.Split(’’/’’);
> int max = int.Parse(segments[1]);
> max += 1;
> int counter = 1;
> Header[] found = new Header[max];
> string testsubject = seg.Replace
> (start.subject, "");
> int index = int.Parse(segments[0]);
> //int temp = master.Count;
> if (index < max)
> {
> found[index] = start;
> for (int x = 0; x < master.Count; x++)
> {
> Header test = (Header)master[x];
> if (test.subject.Contains
> (testsubject))
> {
> //master.Remove(test);
> master.RemoveAt(x);
> x = x - 1;
> Match t = seg.Match
> (test.subject);
> string tsplit = t.ToString();
> string tsegsplit =
> tsplit.Replace("(", "");
> tsegsplit = tsegsplit.Replace
> (")", "");
> string[] tsegments =
> tsegsplit.Split(’’/’’);
> index = int.Parse(tsegments
> [0]);
> //Console.WriteLine(counter);
> if (index < max)
> {
> found[index] = test;
> counter++;
> }
> }
>
> }
> //Console.WriteLine("counter = {0}",
> counter);
> int testmax = max-1;
> if (counter == testmax)
> {
> master.TrimToSize();
> Console.WriteLine("We Have a Match
> {0}", found[1].subject);
> }
> }
> }
> }
>
> }
> }
> -
> ----------------------------------------------
> Posted with NewsLeecher v3.0 Final
> * Binary Usenet Leeching Made Easy
> * http://www.newsleecher.com/?usenet
> ----------------------------------------------
>
>I know there are ways to make this a lot faster. Any
> newsreader does this in seconds. I don''t know how they do
> it and I am very new to c#. If anyone knows a faster way
> please let me know. All I am doing is quering the db for
> all the headers for a certain group and then going through
> them to find all the parts of each post. I only want ones
> that are complete. Meaning all segments for that one file
> posted are there.
>
> using System;
> using System.Collections;
> using System.Text;
> using MySql.Data;
> using System.Text.RegularExpressions;
>
> namespace createfiles
> {
> class Program
> {
> static MySql.Data.MySqlClient.MySqlConnection conn
> = new MySql.Data.MySqlClient.MySqlConnection();
> static MySql.Data.MySqlClient.MySqlCommand cmd =
> new MySql.Data.MySqlClient.MySqlCommand();
> static string myConnectionString = "server=
> 127.0.0.1;uid=root;pwd=password;database=test;";
> static ArrayList master;
> static string group;
> static string table;
> static string[] groups = {
> "alt.binaries.games.xbox", "alt.binaries.games.xbox360",
> "alt.binaries.vcd" };
> static Regex reg = new Regex("\\.");
> static Regex seg = new Regex("\\([0-9]*/[0-9]*
> \\)",RegexOptions.IgnoreCase);
> struct Header
> {
> public string numb;
> public string subject;
> public string date;
> public string from;
> public string msg_id;
> public string bytes;
> }
>
> static void Main(string[] args)
> {
> for (int x = 1; x < 2; x++)
> {
> table = reg.Replace(groups[x], "");
> group = groups[x];
> getheaders();
> Console.WriteLine("Have this many headers
> {0}", master.Count);
> Header one = (Header)master[0];
> Console.WriteLine("first one {0} {1}",
> one.numb, one.subject);
> find();
> master.Clear();
> }
>
> }
> static void getheaders()
> {
> conn.ConnectionString = myConnectionString;
> conn.Open();
> cmd.Connection = conn;
> cmd.CommandText = "select * from " + table + "
> where subject like ''%(%/%)%''";
> MySql.Data.MySqlClient.MySqlDataReader reader;
> reader = cmd.ExecuteReader();
> Header h = new Header();
> master = new ArrayList();
> while (reader.Read())
> {
> h.numb = reader.GetValue(0).ToString();
> h.subject = reader.GetValue(1).ToString();
> h.from = reader.GetValue(2).ToString();
> h.date = reader.GetValue(3).ToString();
> h.msg_id = reader.GetValue(4).ToString();
> h.bytes = reader.GetValue(5).ToString();
> master.Add(h);
> }
> reader.Close();
> conn.Close();
>
> }
> static void find()
> {
> while (master.Count > 0)
> {
> Header start = (Header)master[0];
> master.RemoveAt(0);
> Match m = seg.Match(start.subject);
> string segsplit = m.ToString();
> segsplit = segsplit.Replace("(", "");
> segsplit = segsplit.Replace(")", "");
> string[] segments = segsplit.Split(''/'');
> int max = int.Parse(segments[1]);
> max += 1;
> int counter = 1;
> Header[] found = new Header[max];
> string testsubject = seg.Replace
> (start.subject, "");
> int index = int.Parse(segments[0]);
> //int temp = master.Count;
> if (index < max)
> {
> found[index] = start;
> for (int x = 0; x < master.Count; x++)
> {
> Header test = (Header)master[x];
> if (test.subject.Contains
> (testsubject))
> {
> //master.Remove(test);
> master.RemoveAt(x);
> x = x - 1;
> Match t = seg.Match
> (test.subject);
> string tsplit = t.ToString();
> string tsegsplit =
> tsplit.Replace("(", "");
> tsegsplit = tsegsplit.Replace
> (")", "");
> string[] tsegments =
> tsegsplit.Split(''/'');
> index = int.Parse(tsegments
> [0]);
> //Console.WriteLine(counter);
> if (index < max)
> {
> found[index] = test;
> counter++;
> }
> }
>
> }
> //Console.WriteLine("counter = {0}",
> counter);
> int testmax = max-1;
> if (counter == testmax)
> {
> master.TrimToSize();
> Console.WriteLine("We Have a Match
> {0}", found[1].subject);
> }
> }
> }
> }
>
> }
> }
> --
> ----------------------------------------------
> Posted with NewsLeecher v3.0 Final
> * Binary Usenet Leeching Made Easy
> * http://www.newsleecher.com/?usenet
> ----------------------------------------------
>



这篇关于任何人都可以告诉我我能做些什么来加快速度吗?的文章就介绍到这了,希望我们推荐的答案对大家有所帮助,也希望大家多多支持IT屋!

查看全文
相关文章
登录 关闭
扫码关注1秒登录
发送“验证码”获取 | 15天全站免登陆