解析HTML文件和返回值作为PHP变量 [英] Parse HTML file and return values as php variables
问题描述
我从公司的网站这个HTML code。因为我没有对数据库的访问,我想分析直通一个HTML文件,并返回值。在code是这样的:
i have this HTML code from my company site. Since I do not have access to the database, I want to parse thru a HTML file and return the values. The code is like this:
<?php
$string = '
<p> <b>HEADER INFO</b>
<table width=100% cellspacing=0>
<tr align=left>
<td width=2% colspan=1><font face=\"verdana, arial, helvetica\" size=\"-2\"> </font></td>
<td valign=top colspan=1><font face=\"verdana, arial, helvetica\" size=\"-2\"><b>View Object:</b> 6600422</font></td>
<td valign=top colspan=1><font face=\"verdana, arial, helvetica\" size=\"-2\"><b>BPO:</b> G37147359-000000</font></td>
<td valign=top colspan=1><font face=\"verdana, arial, helvetica\" size=\"-2\"><b>Ack Date:</b> 2012-05-28</font></td>
</tr>
<tr align=left>
<td width=2% colspan=1><font face=\"verdana, arial, helvetica\" size=\"-2\"> </font></td>
<td valign=top colspan=3><font face=\"verdana, arial, helvetica\" size=\"-2\"><b>Operation(s):</b> PPS_Queue, PPS_Build, PPS_BoxAll, JPN_End</font></td>
</tr>
</table>
</p>
<hr>
<p> <b>EXTERNAL ORDER NUMBER REFERENCE</b>
<table width=100% cellspacing=0>
<tr align=left>
<td width=2% colspan=1><font face=\"verdana, arial, helvetica\" size=\"-2\"> </font></td>
<td valign=top colspan=1><font face=\"verdana, arial, helvetica\" size=\"-2\"><b>SAP Sales Order Number</b></font></td>
<td valign=top colspan=1><font face=\"verdana, arial, helvetica\" size=\"-2\"><b>Customer P.O. Number</b></font></td>
<td valign=top colspan=1><font face=\"verdana, arial, helvetica\" size=\"-2\"><b>Legacy Order Number</b></font></td>
</tr>
<tr align=left>
<td width=2% colspan=1><font face=\"verdana, arial, helvetica\" size=\"-2\"> </font></td>
<td valign=top colspan=1><font face=\"verdana, arial, helvetica\" size=\"-2\">0310363858</font></td>
<td valign=top colspan=1><font face=\"verdana, arial, helvetica\" size=\"-2\">77340892008-120413</font></td>
<td valign=top colspan=1><font face=\"verdana, arial, helvetica\" size=\"-2\">89FF09378001</font></td>
</tr>
</table>
</p>
<hr>
<p> <b>PRODUCTS FOR THIS WORK OBJECT/OPERATION(S)</b>
<table width=100% cellspacing=0>
<tr align=left>
<td width=2% colspan=1><font face=\"verdana, arial, helvetica\" size=\"-2\"> </font></td>
<td valign=top colspan=1><font face=\"verdana, arial, helvetica\" size=\"-2\"><b>PL</b></font></td>
<td valign=top colspan=1><font face=\"verdana, arial, helvetica\" size=\"-2\"><b>Product #</b></font></td>
<td valign=top colspan=1><font face=\"verdana, arial, helvetica\" size=\"-2\"><b>Qty</b></font></td>
<td valign=top colspan=1><font face=\"verdana, arial, helvetica\" size=\"-2\"><b>Options</b></font></td>
<td valign=top colspan=1><font face=\"verdana, arial, helvetica\" size=\"-2\"><b>Serial #</b></font></td>
</tr>
<tr align=left>
<td width=2% colspan=1><font face=\"verdana, arial, helvetica\" size=\"-2\"> </font></td>
<td valign=top colspan=1><font face=\"verdana, arial, helvetica\" size=\"-2\">3C</font></td>
<td valign=top colspan=1><font face=\"verdana, arial, helvetica\" size=\"-2\">AP703B</font></td>
<td valign=top colspan=1><font face=\"verdana, arial, helvetica\" size=\"-2\">1</font></td>
<td valign=top colspan=1>  </td>
<td valign=top colspan=1><font face=\"verdana, arial, helvetica\" size=\"-2\">2S6219000G</font></td>
</tr>
</table>
</p>
<hr>
<p> <b>Station Info</b>
<table width=100% cellspacing=0>
<tr align=left>
<td width=2% colspan=1><font face=\"verdana, arial, helvetica\" size=\"-2\"> </font></td>
<td valign=top colspan=1><font face=\"verdana, arial, helvetica\" size=\"-2\"><b>Start Station:</b> JPN_End</font></td>
<td valign=top colspan=1><font face=\"verdana, arial, helvetica\" size=\"-2\"><b>Location:</b> Done</font></td>
<td valign=top colspan=1><font face=\"verdana, arial, helvetica\" size=\"-2\"><b>Station:</b> </font></td>
</tr>
<tr align=left>
<td width=2% colspan=1><font face=\"verdana, arial, helvetica\" size=\"-2\"> </font></td>
<td valign=top colspan=1><font face=\"verdana, arial, helvetica\" size=\"-2\"><b>Birth Date/Time:</b> 2012-05-23 14:20:32 SGT</font></td>
<td valign=top colspan=1><font face=\"verdana, arial, helvetica\" size=\"-2\"><b>Power Cord:</b></font></td>
<td valign=top colspan=1><font face=\"verdana, arial, helvetica\" size=\"-2\"><b>Voltage:</b></font></td>
</tr>
</table>
</p>
<hr>
<p> <b>MATERIAL LIST FOR THIS WORK OBJECT/OPERATION(S)</b>
<table width=100% cellspacing=0>
<tr align=left>
<td width=2% colspan=1><font face=\"verdana, arial, helvetica\" size=\"-2\"> </font></td>
<td valign=top colspan=1><font face=\"verdana, arial, helvetica\" size=\"-2\"><b>Part Number</b></font></td>
<td valign=top colspan=1><font face=\"verdana, arial, helvetica\" size=\"-2\"><b>Qty</b></font></td>
<td valign=top colspan=1><font face=\"verdana, arial, helvetica\" size=\"-2\"><b>Description</b></font></td>
<td valign=top colspan=1><font face=\"verdana, arial, helvetica\" size=\"-2\"><b>BB Type</b></font></td>
<td valign=top colspan=1><font face=\"verdana, arial, helvetica\" size=\"-2\"><b>Material Location</b></font></td>
<td valign=top colspan=1><font face=\"verdana, arial, helvetica\" size=\"-2\"><b>Serial Number</b></font></td>
</tr>
<tr align=left>
<td width=2% colspan=1><font face=\"verdana, arial, helvetica\" size=\"-2\"> </font></td>
<td valign=top colspan=1><font face=\"verdana, arial, helvetica\" size=\"-2\">AP703B@@</font></td>
<td valign=top colspan=1><font face=\"verdana, arial, helvetica\" size=\"-2\">1</font></td>
<td valign=top colspan=1><font face=\"verdana, arial, helvetica\" size=\"-2\">OEM Generic 1U SAS Enclosure</font></td>
<td valign=top colspan=1><font face=\"verdana, arial, helvetica\" size=\"-2\">BOM</font></td>
<td valign=top colspan=1><font face=\"verdana, arial, helvetica\" size=\"-2\">ASSY</font></td>
<td valign=top colspan=1><font face=\"verdana, arial, helvetica\" size=\"-2\">2S6219000G</font></td>
</tr>
</table>
</p>
';
$result = parse_data($string);
extract($result);
echo $headertext.'<br />';
echo $sapSON.'<br />';
echo $custPON.'<br />';
echo $legacyON.'<br />';
echo $pl.'<br />';
echo $pn.'<br />';
function parse_data($string){
$string = str_replace(' ','',$string);
$xml = new DOMDocument();
@$xml->loadHTML($string);
$ret = array();
foreach($xml->getElementsByTagName('p') as $p) {
$header = trim($p->nodeValue);
}
foreach($xml->getElementsByTagName('td') as $td) {
$value = trim($td->nodeValue);
if(!empty($value) && is_numeric($value{0})){
$ret[] = $value;
}
}
$ret = array('headertext'=>$header,
'sapSON'=>$ret[0],
'custPON'=>$ret[1],
'legacyON'=>$ret[2],
'pl'=>$ret[3],
'pn'=>$ret[4],);
return $ret;
}
?>
现在我想的标题外部订单编号参考保存到我可以叫后来我变量。
Now I want to save the header "External Order Number Reference into i variable which I can call later on.
另外,第一行的第二,第三和第四列分别对应于第二行的第二,第三和第四列的值。我也希望将这些值保存到变量。因此,基本上,我需要一个PHP脚本,将解析这个HTML文件,并返回我下面的:
Also, the second, third and fourth column of the first row correspond to the value of the second, third and fourth column of the second row respectively. I also want to save these values to variables. So basically, I need a PHP script which will parse this HTML file and return me the following:
$header1 = "HEADER INFO";
$viewObject = "6600422";
$BPO = "G37147359-000000";
$AckDate = "2012-05-28";
$Operations = "PPS_Queue, PPS_Build, PPS_BoxAll, JPN_End";
$header2 = "EXTERNAL ORDER NUMBER REFERENCE";
$sapSON = "0310363858";
$custPON = "77340892008-120413";
$legacyON = "89FF09378001";
$header3 = "PRODUCTS FOR THIS WORK OBJECT/OPERATION(S)"
$pl = "3C";
$pn = "AP703B";
$qty = "1";
$options = " ";
$serialNo = "2S6219000G";
等...基本上,我需要所有保存到变量,因为我会在稍后将它们保存到我的数据库,并创建一个报告出来,并产生酒吧codeS的一些细节表内容
ETC... Basically, I need all the table contents saved into variables because I will later save them to my database and create a report out of it and generate barcodes for some details
感谢您的帮助!
FYI:我没有访问数据库,以便所有我能做的就是分析通过这个HTML文件和值保存到我可以存储到我的数据库以后的变量。另外,不要注意,标头是恒定的,只要改变值是这对不同的订单号码。
FYI: I do not have access to the database so all I can do is parse thru this HTML file and save the values to variables which I can store to my database later on. Also, do note that the headers are constant, the only changing values are the numbers which are for different orders.
推荐答案
下面试试这个,<大骨节病> 的看见它在行动A>
<?php
$string = '<p> <b>EXTERNAL ORDER NUMBER REFERENCE</b>
<table width=100% cellspacing=0>
<tr align=left>
<td width=2% colspan=1><font face="verdana, arial, helvetica" size="-2"> </font></td>
<td valign=top colspan=1><font face="verdana, arial, helvetica" size="-2"><b>SAP Sales Order Number</b></font></td>
<td valign=top colspan=1><font face="verdana, arial, helvetica" size="-2"><b>Customer P.O. Number</b></font></td>
<td valign=top colspan=1><font face="verdana, arial, helvetica" size="-2"><b>Legacy Order Number</b></font></td>
</tr>
<tr align=left>
<td width=2% colspan=1><font face="verdana, arial, helvetica" size="-2"> </font></td>
<td valign=top colspan=1><font face="verdana, arial, helvetica" size="-2">0310363858</font></td>
<td valign=top colspan=1><font face="verdana, arial, helvetica" size="-2">77340892008-120413</font></td>
<td valign=top colspan=1><font face="verdana, arial, helvetica" size="-2">89FF09378001</font></td>
</tr>
</table>
</p>
';
$result = parse_data($string);
extract($result);
echo $headertext.'<br />';
echo $sapSON.'<br />';
echo $custPON.'<br />';
echo $legacyON.'<br />';
function parse_data($string){
$string = str_replace(' ','',$string);
$xml = new DOMDocument();
@$xml->loadHTML($string);
$ret = array();
foreach($xml->getElementsByTagName('p') as $p) {
$header = trim($p->nodeValue);
}
foreach($xml->getElementsByTagName('td') as $td) {
$value = trim($td->nodeValue);
if(!empty($value) && is_numeric($value{0})){
$ret[] = $value;
}
}
$ret = array('headertext'=>$header,
'sapSON'=>$ret[0],
'custPON'=>$ret[1],
'legacyON'=>$ret[2]);
return $ret;
}
?>
修改版本2(多行):
由于你的表是每个迭代就变得相当复杂不同,但我喜欢挑战。在这里,你走了,希望它可以帮助...
Edit version 2 (Multiple rows):
As your table is different for each iteration it becomes quite complex, but I like a challenge. Here you go, hope it helps...
<?php
$result = parse_data($string);
//Create Variables From Values
foreach($result as $key=>$value){
foreach($value as $key_b=>$value_b){
$$key_b = $value_b;
}
}
/* --New Available Variables--
$header0 = HEADER INFO
$ViewObject = 6600422
$BPO = G37147359-000000
$AckDate = 2012-05-28
$Operations = PPS_Queue, PPS_Build, PPS_BoxAll, JPN_End
$header1 = EXTERNAL ORDER NUMBER REFERENCE
$SAPSalesOrderNumber = 0310363858
$CustomerPONumber = 77340892008-120413
$LegacyOrderNumber = 89FF09378001
$header2 = PRODUCTS FOR THIS WORK OBJECT/OPERATION(S)
$PL = 3C
$Product = AP703B
$Qty = 1
$Options =
$Serial = 2S6219000G
$header3 = Station Info
$StartStation = JPN_End
$Location = Done
$Station =
$BirthDateTime = 2012-05-23 14
$PowerCord =
$Voltage =
$header4 = MATERIAL LIST FOR THIS WORK OBJECT/OPERATION(S)
$PartNumber = AP703B@@
$Description = OEM Generic 1U SAS Enclosure
$BBType = BOM
$MaterialLocation = ASSY
$SerialNumber = 2S6219000G
*/
function parse_data($string){
$string = str_replace(' ','',$string);
$parts = explode('<hr>',$string);
$html = new DOMDocument();
$ret = array();
$entry=0;
foreach($parts as $part){
@$html->loadHTML($part);
//Get Header
foreach($html->getElementsByTagName('p') as $p) {
$ret[$entry]['header'.$entry] = trim($p->nodeValue);
}
$i=0;
foreach($html->getElementsByTagName('td') as $td){
$value = trim($td->nodeValue);
if(empty($value)){
continue;
}
switch($entry){
case 0:
$split = explode(':',$value);
$ret[$entry][preg_replace('/[^a-zA-Z]/s', '', $split[0])] = trim($split[1]);
break;
case 1:
if(!is_numeric($value{0})){
$ret[$entry][$i] = trim($value);
}else{
$ret[$entry][preg_replace('/[^a-zA-Z]/s', '', $ret[$entry][$i-3])] = trim($value);
unset($ret[$entry][$i-3]);
}
break;
case 2:
if($i<=4){
$ret[$entry][$i] = trim($value);
}else{
$ret[$entry][preg_replace('/[^a-zA-Z]/s', '', $ret[$entry][$i-5])] = trim($value);
unset($ret[$entry][$i-5]);
}
break;
case 3:
$split = explode(':',$value);
$ret[$entry][preg_replace('/[^a-zA-Z]/s', '', $split[0])] = trim($split[1]);
break;
case 4:
if($i<=5){
$ret[$entry][$i] = trim($value);
}else{
$ret[$entry][preg_replace('/[^a-zA-Z]/s', '', $ret[$entry][$i-6])] = trim($value);
unset($ret[$entry][$i-6]);
}
break;
}
$i++;
}
$entry++;
}
return $ret;
}
?>
这篇关于解析HTML文件和返回值作为PHP变量的文章就介绍到这了,希望我们推荐的答案对大家有所帮助,也希望大家多多支持IT屋!