忽然之间的博客

Thoughts, stories and ideas.

PHP 读取网址相关总结

简单粗暴,直接上代码。


<?php error_reporting(E_ALL &~ E_NOTICE); set_time_limit(0); date_default_timezone_set("Asia/Shanghai"); define("BR" , PHP_SAPI == "cli" ? "\n" : "<br>"); #define("READURL", "http://bbs.free9.net/forum.php?mod=post&action=newthread&fid=23&t=".time()); define("READURL", "http://localhost/test/1.php?t=".time()); #define("READURL", "http://www.163.com/"); trace_start("ALL"); define("MAX_RUN",1); $i = 0 ; while($i<1) { echo BR; call_user_func("fun{$i}"); echo BR; $i++; } /**************1**************/ function fun0(){ trace_start("curl_geturl"); for($i=0;$i<MAX_RUN;$i++){ $urls = geturl(READURL); } echo $urls; trace_end("curl_geturl"); } function fun1(){ trace_start("curl_filegetcontents"); for($i=0;$i<MAX_RUN;$i++){ $urls = file_get_contents(READURL); } trace_end("curl_filegetcontents"); } function fun2(){ trace_start("curl_filegetcontents"); for($i=0;$i<MAX_RUN;$i++){ $urls = file_get_contents(READURL); } trace_end("curl_filegetcontents"); } function fun3(){ trace_start("fpassthru"); for($i=0;$i<MAX_RUN;$i++){ $urls = fpassthru_read(READURL); } trace_end("fpassthru"); } function fun4(){ trace_start("fopenurl"); for($i=0;$i<MAX_RUN;$i++){ $urls = fopenurl(READURL); } trace_end("fopenurl"); } echo "$br$br"; trace_end("ALL"); **function geturl($url){** $curl = curl_init($url); $header[] = "HTTP/1.1 300 OK";  $header[] = "Cache-Control: max-age=0";  $header[] = "Connection: keep-alive";  $header[] = "Keep-Alive: 300"; $header[] = "Accept-Charset: ISO-8859-1,utf-8;q=0.7,*;q=0.7"; $header[] = "Accept-Language: en-us,en;q=0.5"; $header[] = "Pragma: "; // browsers keep this blank.    #curl_setopt($curl, CURLOPT_HTTPHEADER, $header);  curl_setopt($curl, CURLOPT_TIMEOUT , 10); /** CURLOPT_HEADER 设置成 true 启用时会将头文件的信息作为数据流输出。  (会发送请求header ) **/ #curl_setopt($curl, CURLOPT_USERAGENT, "Mozilla/5.0 (Windows NT 6.1; rv:18.0) Gecko/20100101 Firefox/18.0"); curl_setopt($curl, CURLOPT_HEADER , true); curl_setopt($curl, CURLOPT_RETURNTRANSFER , true); $content = curl_exec($curl); curl_close($curl); return $content ; } function multi_geturl($urls, $ttl = 3){ $multi_ch= curl_multi_init(); $curl = array(); $contents = array(); foreach($urls as $k => $url){ /**init curl */ $curl[$k] = curl_init($url); /**set options**/ curl_setopt_array( $curl[$k],  array( CURLOPT_URL => $url , CURLOPT_TIMEOUT => $ttl , CURLOPT_RETURNTRANSFER => true, CURLOPT_HEADER => false, CURLOPT_FOLLOWLOCATION => true, CURLOPT_MAXREDIRS => 1, ) ); /**add handle **/ curl_multi_add_handle($multi_ch, $curl[$k]); } /**exec multi curl **/ //下面一大步的目的是为了减少cpu的无谓负担,暂时不明,来自php.net的建议,几乎是固定用法   do {  $mrc = curl_multi_exec($multi_ch,$active);//当无数据时或请求暂停时,active=true   } while ($mrc == CURLM_CALL_MULTI_PERFORM);//当正在接受数据时   while ($active and $mrc == CURLM_OK) {//当无数据时或请求暂停时,active=true,为了减少cpu的无谓负担,这一步很难明啊   if (curl_multi_select($multi_ch) != -1) {   do {   $mrc = curl_multi_exec($multi_ch, $active);   } while ($mrc == CURLM_CALL_MULTI_PERFORM);   }   }  //依次提取连接内容       foreach ($urls as $k => $url)       {           $contents[$k]=curl_multi_getcontent($curl[$k]);           curl_multi_remove_handle($multi_ch,$curl[$k]);         curl_close($curl[$k]);       } curl_multi_close($multi_ch);     return $contents; } function fpassthru_read($url){ $handle = fopen($url,"rb"); ob_start(); $res = fpassthru($handle); ob_end_clean(); ob_flush(); fclose($handle); return $res ; } function fopenurl($url){ $res = ''; $handle = fopen($url,"rb"); while (!feof($handle)) { $res .= fread($handle, 8192); } fclose($handle); return $res; } //程序运行时间跟踪 function trace_start($flag){ $GLOBALS[$flag]['_timeStart'] = microtime(TRUE); //$GLOBALS[$flag]['_memStart'] = memory_get_usage(); //echo $flag.'_timeStart:'.$GLOBALS[$flag]['_timeStart']."-----_memStart:".$GLOBALS[$flag]['_memStart']. BR; } function trace_end($flag){ $GLOBALS[$flag]['_timeEnd'] = microtime(TRUE); //$GLOBALS[$flag]['_memEnd'] = memory_get_usage(); //echo $flag.'_timeEnd:'.$GLOBALS[$flag]['_timeEnd']."-----_memEnd:".$GLOBALS[$flag]['_memEnd'].BR; echo '['.$flag.'] Process Times: '.number_format($GLOBALS[$flag]['_timeEnd'] - $GLOBALS[$flag]['_timeStart'] ,6 ). " S ".BR ; //echo '['.$flag.'] Memories: '.number_format( ($GLOBALS[$flag]['_memEnd'] - $GLOBALS[$flag]['_memStart']) /1024   ,6 ). " KBytes ".BR ; } echo 'ok'; exit;

一些总结

1. fopen /file_get_contents 每次请求都会重新做DNS查询,并不对DNS信息进行缓存。 但是CURL会自动对DNS信息进行缓存。对同一域名下的网页或者图片的请求只需要一次DNS 查询。这大大减少了DNS查询的次数。所以CURL的性能比fopen /file_get_contents 好很多。 

2. fopen /file_get_contents在请求HTTP时,使用的是http_fopen_wrapper,不会keeplive。 而curl却可以。这样在多次请求多个链接时,curl效率会好一些。

3. fopen / file_get_contents函数会受到php.ini文件中allow_url_open选项配置的影响。如果该配置关闭了,则该函数也就失效了。而curl不受该配置的影响。 

4. curl可以模拟多种请求,例如:POST数据,表单提交等,用户可以按照自己的需求来定制请求。而fopen /file_get_contents只能使用get方式获取数据。file_get_contents 获取远程文件时会把结果都存在一个字符串中 fiels函数则会储存成数组形式