试图通过卷曲后后从页面抓取数据 [英] trying to grab data from a page after post via curl
本文介绍了试图通过卷曲后后从页面抓取数据的处理方法,对大家解决问题具有一定的参考价值,需要的朋友们下面随着小编来一起学习吧!
问题描述
我试图抓住从这里的数据: http://mediaforest.biz/mobile/nowplaying.aspx
在页面中选择一个站,张贴,那么你得到新的页面数据。但我不能抓住它,我再次得到相同的页面。
我用这个code:
< PHP
标题(内容类型:text / html的;字符集= utf-8');$ URL =http://mediaforest.biz/mobile/nowplaying.aspx;
$引荐=;//头
$头[] =主持人:.parse_url($网址,PHP_URL_HOST);
$头[] =的User-Agent:Mozilla的/ 5.0(视窗; U; Windows NT的6.1;他; RV:1.9.2.3)的Gecko / 20100401火狐/ 3.6.3
$头[] =接受:text / html的,是application / xhtml + xml的,应用/ XML; Q = 0.9 * / *; Q = 0.8;
$头[] =接收语言:他,EN-US; Q = 0.7,连接; Q = 0.3;
$头[] =接受编码:gzip,紧缩;
$头[] =接受字符集:windows-1255,UTF-8,Q = 0.7 *; Q = 0.7;
$头[] =保持活动:115;
$头[] =连接:保持活动;$饼干=cookie.txt;
$计划生育=的fopen($饼干,W +);$ CH = curl_init();
curl_setopt($ CH,CURLOPT_URL,$网址);
curl_setopt($ CH,CURLOPT_REFERER,$引用者);
curl_setopt($ CH,CURLOPT_TIMEOUT,900);
curl_setopt($ CH,CURLOPT_FAILONERROR,FALSE);
curl_setopt($ CH,CURLOPT_SSL_VERIFYHOST,0);
curl_setopt($ CH,CURLOPT_SSL_VERIFYPEER,0);
curl_setopt($ CH,CURLOPT_FOLLOWLOCATION,FALSE);
curl_setopt($ CH,CURLOPT_RETURNTRANSFER,真正的);
curl_setopt($ CH,CURLOPT_HTTPHEADER,$头);
curl_setopt($ CH,CURLOPT_HEADER,真正的);
curl_setopt($ CH,CURLOPT_COOKIEFILE,$饼干);
curl_setopt($ CH,CURLOPT_COOKIEJAR,$饼干);
curl_setopt($ CH,CURLOPT_VERBOSE,0);$内容= curl_exec($ CH);
回声$内容; 如果(stristr($内容,__ EVENTTARGET)){
$数组1 =爆炸('__ EVENTTARGETVALUE =,$内容);
$内容1 = $ ARRAY1 [1];
$数组2 =爆炸('>
<输入类型=隐藏的名字=__ EVENTARGUMENT,$内容1);
$内容2 = $数组2 [0];
$ EVENTTARGET = urlen code($内容2);
} 如果(stristr($内容,__ EVENTARGUMENT)){
$数组1 =爆炸('__ EVENTARGUMENTVALUE =,$内容);
$内容1 = $ ARRAY1 [1];
$数组2 =爆炸('>
<的脚本语言',$内容1);
$内容2 = $数组2 [0];
$ EVENTARGUMENT = urlen code($内容2);
} 如果(stristr($内容,formNowPlaying)){
$数组1 =爆炸('方法=邮报行动=,$内容);
$内容1 = $ ARRAY1 [1];
$数组2 =爆炸('>
<输入类型=隐藏的名字=__ EVENTTARGET,$内容1);
$内容2 = $数组2 [0];
$ nexturl = $内容2;
}//回声$ EVENTTARGET$ nexturl。$ EVENTARGUMENT。。$ URL =http://mediaforest.biz/mobile/\".$nexturl;$栏= \"EVENTTARGET=\".$EVENTTARGET.\"&__EVENTARGUMENT=\".$EVENTARGUMENT.\"&MyChannels=0&ViewChannel_Button=Show\";curl_setopt($ CH,CURLOPT_URL,$网址);
curl_setopt($ CH,CURLOPT_POST,真正的);
curl_setopt($ CH,CURLOPT_POSTFIELDS,$领域);
curl_setopt($ CH,CURLOPT_URL,$网址);
curl_setopt($ CH,CURLOPT_REFERER,$引用者);
curl_setopt($ CH,CURLOPT_TIMEOUT,900);
curl_setopt($ CH,CURLOPT_FAILONERROR,FALSE);
curl_setopt($ CH,CURLOPT_SSL_VERIFYHOST,0);
curl_setopt($ CH,CURLOPT_SSL_VERIFYPEER,0);
curl_setopt($ CH,CURLOPT_FOLLOWLOCATION,FALSE);
curl_setopt($ CH,CURLOPT_RETURNTRANSFER,真正的);
curl_setopt($ CH,CURLOPT_HTTPHEADER,$头);
curl_setopt($ CH,CURLOPT_HEADER,真正的);
curl_setopt($ CH,CURLOPT_COOKIEFILE,$饼干);
curl_setopt($ CH,CURLOPT_COOKIEJAR,$饼干);
curl_setopt($ CH,CURLOPT_VERBOSE,1);$ content_stage2 = curl_exec($ CH);
回声$ content_stage2;
?>
解决方案
如果您是第一次请求获得数据,请尝试关闭之后(每个请求)。
$内容= curl_exec($ CH);
curl_close($ CH);
回声$内容;
和
$ content_stage2 = curl_exec($ CH);
curl_close($ CH);
回声$ content_stage2;
i am trying to grab data from here : http://mediaforest.biz/mobile/nowplaying.aspx in the page you select a station and post it then you get new page with data. but i cant grab it, i get the same page again.
i used this code:
<?php
header ('Content-type: text/html; charset=utf-8');
$url = "http://mediaforest.biz/mobile/nowplaying.aspx";
$referer = "";
// headers
$header[] = "Host: ".parse_url($url, PHP_URL_HOST);
$header[] = "User-Agent: Mozilla/5.0 (Windows; U; Windows NT 6.1; he; rv:1.9.2.3) Gecko/20100401 Firefox/3.6.3";
$header[] = "Accept: text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8";
$header[] = "Accept-Language: he,en-us;q=0.7,en;q=0.3";
$header[] = "Accept-Encoding: gzip,deflate";
$header[] = "Accept-Charset: windows-1255,utf-8;q=0.7,*;q=0.7";
$header[] = "Keep-Alive: 115";
$header[] = "Connection: keep-alive";
$cookie="cookie.txt";
$fp=fopen($cookie,"w+");
$ch = curl_init();
curl_setopt($ch, CURLOPT_URL, $url);
curl_setopt($ch,CURLOPT_REFERER,$referer);
curl_setopt($ch, CURLOPT_TIMEOUT, 900);
curl_setopt($ch, CURLOPT_FAILONERROR, false);
curl_setopt($ch, CURLOPT_SSL_VERIFYHOST, 0);
curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, 0);
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, false);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
curl_setopt($ch, CURLOPT_HTTPHEADER, $header);
curl_setopt($ch, CURLOPT_HEADER,true);
curl_setopt($ch, CURLOPT_COOKIEFILE,$cookie);
curl_setopt($ch, CURLOPT_COOKIEJAR,$cookie);
curl_setopt($ch, CURLOPT_VERBOSE, 0);
$content=curl_exec($ch);
echo $content;
if(stristr($content,"__EVENTTARGET")){
$array1=explode('__EVENTTARGET" value="',$content);
$content1=$array1[1];
$array2=explode('">
<input type="hidden" name="__EVENTARGUMENT"',$content1);
$content2=$array2[0];
$EVENTTARGET=urlencode($content2);
}
if(stristr($content,"__EVENTARGUMENT")){
$array1=explode('__EVENTARGUMENT" value="',$content);
$content1=$array1[1];
$array2=explode('">
<script language',$content1);
$content2=$array2[0];
$EVENTARGUMENT=urlencode($content2);
}
if(stristr($content,"formNowPlaying")){
$array1=explode('method="post" action="',$content);
$content1=$array1[1];
$array2=explode('">
<input type="hidden" name="__EVENTTARGET"',$content1);
$content2=$array2[0];
$nexturl=$content2;
}
//echo $EVENTTARGET." ".$EVENTARGUMENT." ".$nexturl;
$url = "http://mediaforest.biz/mobile/".$nexturl;
$fields = "EVENTTARGET=".$EVENTTARGET."&__EVENTARGUMENT=".$EVENTARGUMENT."&MyChannels=0&ViewChannel_Button=Show";
curl_setopt($ch, CURLOPT_URL, $url);
curl_setopt($ch, CURLOPT_POST, true);
curl_setopt($ch, CURLOPT_POSTFIELDS, $fields);
curl_setopt($ch, CURLOPT_URL, $url);
curl_setopt($ch,CURLOPT_REFERER,$referer);
curl_setopt($ch, CURLOPT_TIMEOUT, 900);
curl_setopt($ch, CURLOPT_FAILONERROR, false);
curl_setopt($ch, CURLOPT_SSL_VERIFYHOST, 0);
curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, 0);
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, false);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
curl_setopt($ch, CURLOPT_HTTPHEADER, $header);
curl_setopt($ch, CURLOPT_HEADER,true);
curl_setopt($ch, CURLOPT_COOKIEFILE,$cookie);
curl_setopt($ch, CURLOPT_COOKIEJAR,$cookie);
curl_setopt($ch, CURLOPT_VERBOSE, 1);
$content_stage2=curl_exec($ch);
echo $content_stage2;
?>
解决方案
If you're getting data from the first request, try closing afterwards (for each request).
$content=curl_exec($ch);
curl_close($ch);
echo $content;
and
$content_stage2=curl_exec($ch);
curl_close($ch);
echo $content_stage2;
这篇关于试图通过卷曲后后从页面抓取数据的文章就介绍到这了,希望我们推荐的答案对大家有所帮助,也希望大家多多支持IT屋!
查看全文