如何使用httpwebrequest从网站下载文件,该文件已被重定向到具有“同意”的页面按键 [英] How do I download a file from a website using httpwebrequest that has been redirected to a page with an "agree" button

查看:90
本文介绍了如何使用httpwebrequest从网站下载文件,该文件已被重定向到具有“同意”的页面按键的处理方法,对大家解决问题具有一定的参考价值,需要的朋友们下面随着小编来一起学习吧!

问题描述

 HttpWebRequest request =(HttpWebRequest)WebRequest.Create(ACHJobsConfig.FederalReserveBankDirectoryURL); 

//为此请求使用的资源设置一些合理的限制
request.MaximumAutomaticRedirections = 4;
request.MaximumResponseHeadersLength = 4;
//设置用于此请求的凭据。
request.Credentials = CredentialCache.DefaultCredentials;

HttpWebResponse response = null;
try
{
//这实际下载文件
response =(HttpWebResponse)request.GetResponse();
}
catch(exception ex)
{
Logger.Warn(ex.Message);
返回null;
}

if(response == null)
{
Logger.Warn(尝试下载ACH目录文件未返回响应。);
返回null;
}

//这里我们将一次读取下载文件
//一行/一行的内容。每行/记录定义一个银行路由号码。
//记录的格式记录在FedACHDirectoryRecord类中。
StreamReader readStream = null;
列表< FedACHDirectoryRecord> rnList = new List< FedACHDirectoryRecord>();
try
{
//获取与响应关联的流。
流receiveStream = response.GetResponseStream();

//使用所需的编码格式将流管道传输到更高级别的流读取器。
readStream = new StreamReader(receiveStream,Encoding.UTF8);

//读取下载文件流中的所有行。
//每行定义一个银行路由号码(银行机构)。
string text = null;
while((text = readStream.ReadLine())!= null)
{
//在记录实例中包装行(提供对字段的访问)
FedACHDirectoryRecord rec =新的FedACHDirectoryRecord(文字);
//构建列表,按记录记录
rnList.Add(rec);
}
}
catch(例外情况)
{
Logger.Warn(ex.Message);
返回null;
}
最后
{
//清理时间
if(readStream!= null)
{
readStream.Close();
}
response.Close();
}

//这是我们下载的
Logger.InfoFormat(从{1}下载的{0}记录,rnList.Count,ACHJobsConfig.FederalReserveBankDirectoryURL);

解决方案

 //我们需要使用Cookie 
CookieContainer cookies = new CookieContainer();
列表< FedACHDirectoryRecord> rnList = new List< FedACHDirectoryRecord>();
try
{
//由于存在证书问题,无法使用https。
//回到http。
HttpWebRequest request =(HttpWebRequest)WebRequest.Create(ACHJobsConfig.FederalReserveBankDirectoryURL);
//会话的cookie(此时将为null)
request.CookieContainer = cookies;
//为此请求使用的资源设置一些合理的限制
request.MaximumAutomaticRedirections = 4;
request.MaximumResponseHeadersLength = 4;
//设置用于此请求的凭据。
request.Credentials = CredentialCache.DefaultCredentials;

//获取请求的响应
使用(HttpWebResponse response =(HttpWebResponse)request.GetResponse())
{
//我们需要得到cookie
string cookieName = string.Empty;
string cookieValue = string.Empty;
const char separator ='=';
foreach(request.Headers中的WebHeaderCollection集合)
{
if(collection.ToString()==Cookie)
{
string value = request.Headers [ 曲奇]的ToString();
string [] keyvalues = value.Split(separator);
if(keyvalues.Length> 1)
{
cookieName = keyvalues [0];
//使用cookieName + separator值再次拆分以防我们在Cookie值中有多个分隔符
string removeStr = cookieName + separator.ToString();
cookieValue = value.Replace(removeStr,string.Empty);
//我们需要的只是来自Headers
break的Cookie值;
}
}
}

//为formPost请求设置cookie
Cookie cookie = new Cookie();
cookie.Name = cookieName;
cookie.Value = cookieValue;
cookie.Domain = response.ResponseUri.Authority;
cookie.HttpOnly = true;
cookie.Secure = true;
cookies.Add(cookie);

//在页面上发布的值和网址
string postData =agreementValue = Agree;
string redirectedTo = response.ResponseUri.AbsoluteUri;
redirectedTo = redirectedTo.Substring(0,redirectedTo.LastIndexOf(/));
redirectedTo = redirectedTo +/ subsmitAgreement.do;

//在重定向页面上发布到表单
HttpWebRequest formPostRequest =(HttpWebRequest)WebRequest.Create(redirectedTo);
formPostRequest.CookieContainer = cookies;
formPostRequest.Credentials = CredentialCache.DefaultCredentials;
formPostRequest.Method = WebRequestMethods.Http.Post;
formPostRequest.UserAgent =Mozilla / 5.0(Windows NT 6.1)AppleWebKit / 535.2(KHTML,如Gecko)Chrome / 15.0.874.121 Safari / 535.2;
formPostRequest.AllowWriteStreamBuffering = true;
formPostRequest.ProtocolVersion = HttpVersion.Version11;
formPostRequest.Referer = response.ResponseUri.AbsoluteUri;
formPostRequest.ContentType =application / x-www-form-urlencoded;
byte [] byteArray = Encoding.ASCII.GetBytes(postData);
formPostRequest.ContentLength = byteArray.Length;
//打开连接
流newStream = formPostRequest.GetRequestStream();
//发送数据。
newStream.Write(byteArray,0,byteArray.Length);
//关闭它我们不再需要了它
newStream.Close();

//将返回数据.txt url数据
使用(HttpWebResponse formPostResponse =(HttpWebResponse)formPostRequest.GetResponse())
{
//这里我们要去一次读取下载文件的内容
//一行/条记录。每行/记录定义一个银行路由号码。
//记录的格式记录在FedACHDirectoryRecord类中。
using(StreamReader readStream = new StreamReader(formPostResponse.GetResponseStream()))
{
string text = null;
//读取下载文件流中的所有行。
//每行定义一个银行路由号码(银行机构)。
while((text = readStream.ReadLine())!= null)
{
//在记录实例中包装行(提供对字段的访问)
FedACHDirectoryRecord rec =新的FedACHDirectoryRecord(文字);
//构建列表,按记录记录
rnList.Add(rec);
}
}
}
}


我提交了上述代码,以防万一其他人需要帮助

HttpWebRequest request = (HttpWebRequest)WebRequest.Create(ACHJobsConfig.FederalReserveBankDirectoryURL);

            // Set some reasonable limits on resources used by this request
            request.MaximumAutomaticRedirections = 4;
            request.MaximumResponseHeadersLength = 4;
            // Set credentials to use for this request.
            request.Credentials = CredentialCache.DefaultCredentials;
                                    
            HttpWebResponse response = null;
            try
            {
                // This does the actual downloading of the file
                response = (HttpWebResponse)request.GetResponse();               
            }
            catch (Exception ex)
            {
                Logger.Warn(ex.Message);
                return null;
            }

            if (response == null)
            {
                Logger.Warn("Attempt to download ACH directory file did not return a response.");
                return null;
            }

            // Here we are going to read the contents of the downloaded file
            // one line/record at a time. Each line/record defines a bank routing number.
            // The format of the record is documented in the FedACHDirectoryRecord class.
            StreamReader readStream = null;
            List<FedACHDirectoryRecord> rnList = new List<FedACHDirectoryRecord>();
            try
            {
                // Get the stream associated with the response.
                Stream receiveStream = response.GetResponseStream();

                // Pipes the stream to a higher level stream reader with the required encoding format. 
                readStream = new StreamReader(receiveStream, Encoding.UTF8);

                // Read all the lines out of the downloaded file stream.
                // Each line defines a bank routing number (a banking institution).
                string text = null;
                while ((text = readStream.ReadLine()) != null)
                {
                    // Wrap the line in a record instance (provides access to fields)
                    FedACHDirectoryRecord rec = new FedACHDirectoryRecord(text);
                    // Build the list, record by record
                    rnList.Add(rec);
                }
            }
            catch (Exception ex)
            {
                Logger.Warn(ex.Message);
                return null;
            }
            finally
            {
                // Clean up time
                if (readStream != null)
                {
                    readStream.Close();
                }
                response.Close();
            }

            // Here's what we downloaded
            Logger.InfoFormat("{0} records downloaded from {1}", rnList.Count, ACHJobsConfig.FederalReserveBankDirectoryURL);

解决方案

// We will need to use Cookies
            CookieContainer cookies = new CookieContainer();
            List<FedACHDirectoryRecord> rnList = new List<FedACHDirectoryRecord>();
            try
            {
                // Could not use https as there is a certificate issue.
                // Fell back to http.
                HttpWebRequest request = (HttpWebRequest)WebRequest.Create(ACHJobsConfig.FederalReserveBankDirectoryURL);
                // cookies for session (will be null at this point)
                request.CookieContainer = cookies;
                // Set some reasonable limits on resources used by this request
                request.MaximumAutomaticRedirections = 4;
                request.MaximumResponseHeadersLength = 4;
                // Set credentials to use for this request.
                request.Credentials = CredentialCache.DefaultCredentials;

                // get the response on the request
                using (HttpWebResponse response = (HttpWebResponse)request.GetResponse())
                {
                    // we need to get the cookie
                    string cookieName = string.Empty;
                    string cookieValue = string.Empty;
                    const char separator = '=';
                    foreach (WebHeaderCollection collection in request.Headers)
                    {
                        if (collection.ToString() == "Cookie")
                        {
                            string value = request.Headers["Cookie"].ToString();
                            string[] keyvalues = value.Split(separator);
                            if (keyvalues.Length > 1)
                            {
                                cookieName = keyvalues[0];
                                // use cookieName +  separator value to split again in case we have more than one separator in Cookie value
                                string removeStr = cookieName + separator.ToString();
                                cookieValue = value.Replace(removeStr, string.Empty);
                                //what we need is only "Cookie" value from Headers
                                break;
                            }
                        }
                    }

                    // set the cookie for the formPost request
                    Cookie cookie = new Cookie();
                    cookie.Name = cookieName;
                    cookie.Value = cookieValue;
                    cookie.Domain = response.ResponseUri.Authority;
                    cookie.HttpOnly = true;
                    cookie.Secure = true;
                    cookies.Add(cookie);

                    // values and url to post on page
                    string postData = "agreementValue=Agree";
                    string redirectedTo = response.ResponseUri.AbsoluteUri;
                    redirectedTo = redirectedTo.Substring(0, redirectedTo.LastIndexOf("/"));
                    redirectedTo = redirectedTo + "/submitAgreement.do";

                    // post to the form on redirected page
                    HttpWebRequest formPostRequest = (HttpWebRequest)WebRequest.Create(redirectedTo);
                    formPostRequest.CookieContainer = cookies;
                    formPostRequest.Credentials = CredentialCache.DefaultCredentials;
                    formPostRequest.Method = WebRequestMethods.Http.Post;
                    formPostRequest.UserAgent = "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/535.2 (KHTML, like Gecko) Chrome/15.0.874.121 Safari/535.2";
                    formPostRequest.AllowWriteStreamBuffering = true;
                    formPostRequest.ProtocolVersion = HttpVersion.Version11;
                    formPostRequest.Referer = response.ResponseUri.AbsoluteUri;
                    formPostRequest.ContentType = "application/x-www-form-urlencoded";
                    byte[] byteArray = Encoding.ASCII.GetBytes(postData);
                    formPostRequest.ContentLength = byteArray.Length;
                    //open connection
                    Stream newStream = formPostRequest.GetRequestStream();
                    // Send the data.
                    newStream.Write(byteArray, 0, byteArray.Length);
                    // close it we don't need it anymore
                    newStream.Close();

                    // will return data .txt url data
                    using (HttpWebResponse formPostResponse = (HttpWebResponse)formPostRequest.GetResponse())
                    {
                        // Here we are going to read the contents of the downloaded file
                        // one line/record at a time. Each line/record defines a bank routing number.
                        // The format of the record is documented in the FedACHDirectoryRecord class.                       
                        using (StreamReader readStream = new StreamReader(formPostResponse.GetResponseStream()))
                        {
                            string text = null;
                            // Read all the lines out of the downloaded file stream.
                            // Each line defines a bank routing number (a banking institution).
                            while ((text = readStream.ReadLine()) != null)
                            {
                                // Wrap the line in a record instance (provides access to fields)
                                FedACHDirectoryRecord rec = new FedACHDirectoryRecord(text);
                                // Build the list, record by record
                                rnList.Add(rec);
                            }
                        }
                    }                    
                }


I submitted the above code, just in case anyone else needs help.


这篇关于如何使用httpwebrequest从网站下载文件,该文件已被重定向到具有“同意”的页面按键的文章就介绍到这了,希望我们推荐的答案对大家有所帮助,也希望大家多多支持IT屋!

查看全文
登录 关闭
扫码关注1秒登录
发送“验证码”获取 | 15天全站免登陆