php获取远程网页源码的程序代码
有时我们需要做一些采集需要下载远程网页源码到本来了,在这里我们整理了一些php获取远程网页源码代码,希望对各位会有所帮助.
php的curl函数,基本例子,代码如下:
<?php // 初始化一个 cURL 对象 $curl = curl_init(); // 设置你需要抓取的URL curl_setopt($curl, CURLOPT_URL, 'http://www.phprm.com'); // 设置header curl_setopt($curl, CURLOPT_HEADER, 1); // 设置cURL 参数,要求结果保存到字符串中还是输出到屏幕上。 curl_setopt($curl, CURLOPT_RETURNTRANSFER, 1); // 运行cURL,请求网页 $data = curl_exec($curl); // 关闭URL请求 curl_close($curl); // 显示获得的数据 var_dump($data);
php fopen函数,代码如下:
<?php
print("<H1>HTTP</H1>n");
// open a file using http protocol
if(!($myFile = fopen("http://www.phprm.com/", "r")))
{
print("file could not be opened");
exit;
}
while(!feof($myFile))
{
// read a line from the file
$myLine = fgetss($myFile, 255);
print("$myLine <BR>n");
}
// close the file
fclose($myFile);
print("<H1>FTP</H1>n");
print("<HR>n");
// open a file using ftp protocol
if(!($myFile = fopen("ftp://ftp.php.net/welcome.msg", "r")))
{
print("file could not be opened");
exit;
}
while(!feof($myFile))
{
// read a line from the file
$myLine = fgetss($myFile, 255);
print("$myLine <BR>n");
}
// close the file
fclose($myFile);
print("<H1>Local</H1>n");
print("<HR>n");
// open a local file
if(!($myFile = fopen("data.txt", "r")))
{
print("file could not be opened");
exit;
}
while(!feof($myFile))
{
// read a line from the file
$myLine = fgetss($myFile, 255);
print("$myLine <BR>n");
}
// close the file
fclose($myFile);
file_get_contents函数,代码如下:
<?php
file_get_contents('http://www.phprm.com/');
抓取远程网页源码类,代码如下:
<?php
class HTTPRequest
{
var $_fp; // HTTP socket
var $_url; // full URL
var $_host; // HTTP host
var $_protocol; // protocol (HTTP/HTTPS)
var $_uri; // request URI
var $_port; // port
// scan url
function _scan_url()
{
$req = $this->_url;
$pos = strpos($req, '://');
$this->_protocol = strtolower(substr($req, 0, $pos));
$req = substr($req, $pos+3);
$pos = strpos($req, '/');
if($pos === false)
$pos = strlen($req);
$host = substr($req, 0, $pos);
if(strpos($host, ':') !== false)
{
list($this->_host, $this->_port) = explode(':', $host);
}
else
{
$this->_host = $host;
$this->_port = ($this->_protocol == 'https') ? 443 : 80;
}
$this->_uri = substr($req, $pos);
if($this->_uri == '')
$this->_uri = '/';
}
// constructor
function HTTPRequest($url)
{
$this->_url = $url;
$this->_scan_url();
}
// download URL to string
function DownloadToString()
{
$crlf = "rn";
// generate request
$req = 'GET ' . $this->_uri . ' HTTP/1.0' . $crlf
. 'Host: ' . $this->_host . $crlf
. $crlf;
// fetch
$this->_fp = fsockopen(($this->_protocol == 'https' ? 'ssl://' : '') . $this->_host, $this->_port);
fwrite($this->_fp, $req);
while(is_resource($this->_fp) && $this->_fp && !feof($this->_fp))
$response .= fread($this->_fp, 1024);
fclose($this->_fp);
// split header and body
$pos = strpos($response, $crlf . $crlf);
if($pos === false)
return($response);
$header = substr($response, 0, $pos);
$body = substr($response, $pos + 2 * strlen($crlf));
// parse headers
$headers = array();
$lines = explode($crlf, $header);
foreach($lines as $line)
if(($pos = strpos($line, ':')) !== false)
$headers[strtolower(trim(substr($line, 0, $pos)))] = trim(substr($line, $pos+1));
// redirection?
if(isset($headers['location']))
{
$http = new HTTPRequest($headers['location']);
return($http->DownloadToString($http));
}
else
{
return($body);
}
}
}
//使用方法
$r = new HTTPRequest('http://www.phprm.com');
$str=$r->DownloadToString();
永久链接:http://www.phprm.com/develop/fs8985.html
转载随意!带上文章地址吧。