PHP模拟百度蜘蛛,伪造IP爬行网站,附源代码

编程语言
0 1115

<?php
// 关闭 PHP 报错
error_reporting( E_ALL^E_NOTICE^E_WARNING );
 
//实现使用 curl 模拟百度 蜘蛛进行采集
function Go($url){
    $ch = curl_init();
    //随机生成 IP
    $ip = rand(0,255).'.'.rand(0,255).'.'.rand(0,255).'.'.rand(0,255) ; // 百度 蜘蛛
    $timeout = 15;
    curl_setopt($ch,CURLOPT_URL,$url);
    curl_setopt($ch,CURLOPT_TIMEOUT,0);
    //伪造百度 蜘蛛 IP  
    curl_setopt($ch,CURLOPT_HTTPHEADER,array('X-FORWARDED-FOR:'.$ip.'','CLIENT-IP:'.$ip.'')); 
    //伪造百度 蜘蛛头部
    curl_setopt($ch,CURLOPT_USERAGENT,"Mozilla/5.0 (compatible; Baiduspider/2.0; +http://www.baidu.com/search/spider.html)");
    curl_setopt($ch,CURLOPT_RETURNTRANSFER,1);
    curl_setopt($ch,CURLOPT_HEADER,0);
    curl_setopt($ch,CURLOPT_CONNECTTIMEOUT,$timeout);
    curl_setopt($ch,CURLOPT_SSL_VERIFYPEER,false);
      curl_setopt($ch, CURLOPT_IPRESOLVE, CURL_IPRESOLVE_V4);
    $content = curl_exec($ch);
}