php下通过curl抓取yahoo boss 搜索结果的实现代码

  1.编写curl类,进行网页内容抓取

  

复制代码 代码如下:

  class CurlUtil

  {

  private $curl;

  private $timeout = 10;

  /**

  * 初始化curl对象

  */

  public function __construct()

  {

  $this->curl = curl_init();

  curl_setopt($this->curl, CURLOPT_RETURNTRANSFER, 1);

  curl_setopt($this->curl, CURLOPT_USERAGENT, "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1)");

  curl_setopt($this->curl, CURLOPT_HEADER, false); //设定是否显示头信息

  curl_setopt($this->curl, CURLOPT_NOBODY, false); //设定是否输出页面内容

  curl_setopt($this->curl, CURLOPT_CONNECTTIMEOUT, $this->timeout);

  curl_setopt($this->curl, CURLOPT_FOLLOWLOCATION, true);

  curl_setopt($this->curl, CURLOPT_AUTOREFERER, true);

  }

  /**

  * 注销函数 关闭curl对象

  */

  public function __destruct()

  {

  curl_close($this->curl);

  }

  /**

  * 获取网页的内容

  */

  public function getWebPageContent($url)

  {

  curl_setopt($this->curl, CURLOPT_URL, $url);

  return curl_exec($this->curl);

  }

  }

  2.创建curl对象

  

复制代码 代码如下:

  $CurlUtil = new CurlUtil();

  3.抓取yahoo搜索结果

  

复制代码 代码如下:

  function getYahooSearch(CurlUtil $curl, $key)

  {

  $key = urlencode($key);

  $searchUrl = "http://boss.yahooapis.com/ysearch/web/v1/$key?appid=你的雅虎appid&lang=tzh®ion=hk&abstract=long&count=20&format=json&start=0&count=10";

  $josnStr = $curl->getWebPageContent($searchUrl);

  $searchDataInfo = json_decode($josnStr, true);

  $searchData = $searchDataInfo['ysearchresponse']['resultset_web'];

  $returnArray = array();

  if (!empty($searchData)) {

  foreach ($searchData as $data) {

  $returnArray[] = array("url" => $data['url'], "date" => $data['date'], 'title' => strip_tags($data['title']), 'description' => strip_tags($data['abstract']));

  }

  }

  return $returnArray;

  }

  4.测试结果

  var_dump(getYahooSearch($CurlUtil, "百度"));