如何使用httpwebrequest从网站下载文件,该文件已被重定向到具有“同意”的页面按键 [英] How do I download a file from a website using httpwebrequest that has been redirected to a page with an "agree" button
本文介绍了如何使用httpwebrequest从网站下载文件,该文件已被重定向到具有“同意”的页面按键的处理方法,对大家解决问题具有一定的参考价值,需要的朋友们下面随着小编来一起学习吧!
问题描述
HttpWebRequest request =(HttpWebRequest)WebRequest.Create(ACHJobsConfig.FederalReserveBankDirectoryURL);
//为此请求使用的资源设置一些合理的限制
request.MaximumAutomaticRedirections = 4;
request.MaximumResponseHeadersLength = 4;
//设置用于此请求的凭据。
request.Credentials = CredentialCache.DefaultCredentials;
HttpWebResponse response = null;
try
{
//这实际下载文件
response =(HttpWebResponse)request.GetResponse();
}
catch(exception ex)
{
Logger.Warn(ex.Message);
返回null;
}
if(response == null)
{
Logger.Warn(尝试下载ACH目录文件未返回响应。);
返回null;
}
//这里我们将一次读取下载文件
//一行/一行的内容。每行/记录定义一个银行路由号码。
//记录的格式记录在FedACHDirectoryRecord类中。
StreamReader readStream = null;
列表< FedACHDirectoryRecord> rnList = new List< FedACHDirectoryRecord>();
try
{
//获取与响应关联的流。
流receiveStream = response.GetResponseStream();
//使用所需的编码格式将流管道传输到更高级别的流读取器。
readStream = new StreamReader(receiveStream,Encoding.UTF8);
//读取下载文件流中的所有行。
//每行定义一个银行路由号码(银行机构)。
string text = null;
while((text = readStream.ReadLine())!= null)
{
//在记录实例中包装行(提供对字段的访问)
FedACHDirectoryRecord rec =新的FedACHDirectoryRecord(文字);
//构建列表,按记录记录
rnList.Add(rec);
}
}
catch(例外情况)
{
Logger.Warn(ex.Message);
返回null;
}
最后
{
//清理时间
if(readStream!= null)
{
readStream.Close();
}
response.Close();
}
//这是我们下载的
Logger.InfoFormat(从{1}下载的{0}记录,rnList.Count,ACHJobsConfig.FederalReserveBankDirectoryURL);
解决方案
//我们需要使用Cookie
CookieContainer cookies = new CookieContainer();
列表< FedACHDirectoryRecord> rnList = new List< FedACHDirectoryRecord>();
try
{
//由于存在证书问题,无法使用https。
//回到http。
HttpWebRequest request =(HttpWebRequest)WebRequest.Create(ACHJobsConfig.FederalReserveBankDirectoryURL);
//会话的cookie(此时将为null)
request.CookieContainer = cookies;
//为此请求使用的资源设置一些合理的限制
request.MaximumAutomaticRedirections = 4;
request.MaximumResponseHeadersLength = 4;
//设置用于此请求的凭据。
request.Credentials = CredentialCache.DefaultCredentials;
//获取请求的响应
使用(HttpWebResponse response =(HttpWebResponse)request.GetResponse())
{
//我们需要得到cookie
string cookieName = string.Empty;
string cookieValue = string.Empty;
const char separator ='=';
foreach(request.Headers中的WebHeaderCollection集合)
{
if(collection.ToString()==Cookie)
{
string value = request.Headers [ 曲奇]的ToString();
string [] keyvalues = value.Split(separator);
if(keyvalues.Length> 1)
{
cookieName = keyvalues [0];
//使用cookieName + separator值再次拆分以防我们在Cookie值中有多个分隔符
string removeStr = cookieName + separator.ToString();
cookieValue = value.Replace(removeStr,string.Empty);
//我们需要的只是来自Headers
break的Cookie值;
}
}
}
//为formPost请求设置cookie
Cookie cookie = new Cookie();
cookie.Name = cookieName;
cookie.Value = cookieValue;
cookie.Domain = response.ResponseUri.Authority;
cookie.HttpOnly = true;
cookie.Secure = true;
cookies.Add(cookie);
//在页面上发布的值和网址
string postData =agreementValue = Agree;
string redirectedTo = response.ResponseUri.AbsoluteUri;
redirectedTo = redirectedTo.Substring(0,redirectedTo.LastIndexOf(/));
redirectedTo = redirectedTo +/ subsmitAgreement.do;
//在重定向页面上发布到表单
HttpWebRequest formPostRequest =(HttpWebRequest)WebRequest.Create(redirectedTo);
formPostRequest.CookieContainer = cookies;
formPostRequest.Credentials = CredentialCache.DefaultCredentials;
formPostRequest.Method = WebRequestMethods.Http.Post;
formPostRequest.UserAgent =Mozilla / 5.0(Windows NT 6.1)AppleWebKit / 535.2(KHTML,如Gecko)Chrome / 15.0.874.121 Safari / 535.2;
formPostRequest.AllowWriteStreamBuffering = true;
formPostRequest.ProtocolVersion = HttpVersion.Version11;
formPostRequest.Referer = response.ResponseUri.AbsoluteUri;
formPostRequest.ContentType =application / x-www-form-urlencoded;
byte [] byteArray = Encoding.ASCII.GetBytes(postData);
formPostRequest.ContentLength = byteArray.Length;
//打开连接
流newStream = formPostRequest.GetRequestStream();
//发送数据。
newStream.Write(byteArray,0,byteArray.Length);
//关闭它我们不再需要了它
newStream.Close();
//将返回数据.txt url数据
使用(HttpWebResponse formPostResponse =(HttpWebResponse)formPostRequest.GetResponse())
{
//这里我们要去一次读取下载文件的内容
//一行/条记录。每行/记录定义一个银行路由号码。
//记录的格式记录在FedACHDirectoryRecord类中。
using(StreamReader readStream = new StreamReader(formPostResponse.GetResponseStream()))
{
string text = null;
//读取下载文件流中的所有行。
//每行定义一个银行路由号码(银行机构)。
while((text = readStream.ReadLine())!= null)
{
//在记录实例中包装行(提供对字段的访问)
FedACHDirectoryRecord rec =新的FedACHDirectoryRecord(文字);
//构建列表,按记录记录
rnList.Add(rec);
}
}
}
}
我提交了上述代码,以防万一其他人需要帮助
HttpWebRequest request = (HttpWebRequest)WebRequest.Create(ACHJobsConfig.FederalReserveBankDirectoryURL); // Set some reasonable limits on resources used by this request request.MaximumAutomaticRedirections = 4; request.MaximumResponseHeadersLength = 4; // Set credentials to use for this request. request.Credentials = CredentialCache.DefaultCredentials; HttpWebResponse response = null; try { // This does the actual downloading of the file response = (HttpWebResponse)request.GetResponse(); } catch (Exception ex) { Logger.Warn(ex.Message); return null; } if (response == null) { Logger.Warn("Attempt to download ACH directory file did not return a response."); return null; } // Here we are going to read the contents of the downloaded file // one line/record at a time. Each line/record defines a bank routing number. // The format of the record is documented in the FedACHDirectoryRecord class. StreamReader readStream = null; List<FedACHDirectoryRecord> rnList = new List<FedACHDirectoryRecord>(); try { // Get the stream associated with the response. Stream receiveStream = response.GetResponseStream(); // Pipes the stream to a higher level stream reader with the required encoding format. readStream = new StreamReader(receiveStream, Encoding.UTF8); // Read all the lines out of the downloaded file stream. // Each line defines a bank routing number (a banking institution). string text = null; while ((text = readStream.ReadLine()) != null) { // Wrap the line in a record instance (provides access to fields) FedACHDirectoryRecord rec = new FedACHDirectoryRecord(text); // Build the list, record by record rnList.Add(rec); } } catch (Exception ex) { Logger.Warn(ex.Message); return null; } finally { // Clean up time if (readStream != null) { readStream.Close(); } response.Close(); } // Here's what we downloaded Logger.InfoFormat("{0} records downloaded from {1}", rnList.Count, ACHJobsConfig.FederalReserveBankDirectoryURL);
解决方案
// We will need to use Cookies CookieContainer cookies = new CookieContainer(); List<FedACHDirectoryRecord> rnList = new List<FedACHDirectoryRecord>(); try { // Could not use https as there is a certificate issue. // Fell back to http. HttpWebRequest request = (HttpWebRequest)WebRequest.Create(ACHJobsConfig.FederalReserveBankDirectoryURL); // cookies for session (will be null at this point) request.CookieContainer = cookies; // Set some reasonable limits on resources used by this request request.MaximumAutomaticRedirections = 4; request.MaximumResponseHeadersLength = 4; // Set credentials to use for this request. request.Credentials = CredentialCache.DefaultCredentials; // get the response on the request using (HttpWebResponse response = (HttpWebResponse)request.GetResponse()) { // we need to get the cookie string cookieName = string.Empty; string cookieValue = string.Empty; const char separator = '='; foreach (WebHeaderCollection collection in request.Headers) { if (collection.ToString() == "Cookie") { string value = request.Headers["Cookie"].ToString(); string[] keyvalues = value.Split(separator); if (keyvalues.Length > 1) { cookieName = keyvalues[0]; // use cookieName + separator value to split again in case we have more than one separator in Cookie value string removeStr = cookieName + separator.ToString(); cookieValue = value.Replace(removeStr, string.Empty); //what we need is only "Cookie" value from Headers break; } } } // set the cookie for the formPost request Cookie cookie = new Cookie(); cookie.Name = cookieName; cookie.Value = cookieValue; cookie.Domain = response.ResponseUri.Authority; cookie.HttpOnly = true; cookie.Secure = true; cookies.Add(cookie); // values and url to post on page string postData = "agreementValue=Agree"; string redirectedTo = response.ResponseUri.AbsoluteUri; redirectedTo = redirectedTo.Substring(0, redirectedTo.LastIndexOf("/")); redirectedTo = redirectedTo + "/submitAgreement.do"; // post to the form on redirected page HttpWebRequest formPostRequest = (HttpWebRequest)WebRequest.Create(redirectedTo); formPostRequest.CookieContainer = cookies; formPostRequest.Credentials = CredentialCache.DefaultCredentials; formPostRequest.Method = WebRequestMethods.Http.Post; formPostRequest.UserAgent = "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/535.2 (KHTML, like Gecko) Chrome/15.0.874.121 Safari/535.2"; formPostRequest.AllowWriteStreamBuffering = true; formPostRequest.ProtocolVersion = HttpVersion.Version11; formPostRequest.Referer = response.ResponseUri.AbsoluteUri; formPostRequest.ContentType = "application/x-www-form-urlencoded"; byte[] byteArray = Encoding.ASCII.GetBytes(postData); formPostRequest.ContentLength = byteArray.Length; //open connection Stream newStream = formPostRequest.GetRequestStream(); // Send the data. newStream.Write(byteArray, 0, byteArray.Length); // close it we don't need it anymore newStream.Close(); // will return data .txt url data using (HttpWebResponse formPostResponse = (HttpWebResponse)formPostRequest.GetResponse()) { // Here we are going to read the contents of the downloaded file // one line/record at a time. Each line/record defines a bank routing number. // The format of the record is documented in the FedACHDirectoryRecord class. using (StreamReader readStream = new StreamReader(formPostResponse.GetResponseStream())) { string text = null; // Read all the lines out of the downloaded file stream. // Each line defines a bank routing number (a banking institution). while ((text = readStream.ReadLine()) != null) { // Wrap the line in a record instance (provides access to fields) FedACHDirectoryRecord rec = new FedACHDirectoryRecord(text); // Build the list, record by record rnList.Add(rec); } } } }
I submitted the above code, just in case anyone else needs help.
这篇关于如何使用httpwebrequest从网站下载文件,该文件已被重定向到具有“同意”的页面按键的文章就介绍到这了,希望我们推荐的答案对大家有所帮助,也希望大家多多支持IT屋!
查看全文