页面爬虫(获取其他页面HTML)加载到自己页面示例

复制代码 代码如下:

  //前台

  <div id="showIframe"></div>

  $(document).ready(function() {

  var url = "@Url.Action("GetPageHtml","Catalog")";

  $.ajax({

  url: url,

  type: "POST",

  dataType:"json",

  data: { url: "http://www.baidu.com" },

  error: function () {

  alert("bbb");

  },

  success: function (data) {

  $("#showIframe").append(data);

  //$("#showIframe div").hide();

  //$("#showIframe>#container").show();

  //$("#showIframe>#container>#content").show();

  //$("#showIframe>#container>#content>.cmsPage").show();

  }

  });

  });

  //后台

  //爬虫本质,发送URL请求,返回整个页面HTML

  [HttpPost]

  public JsonResult GetPageHtml(string url)

  {

  string pageinfo;

  try

  {

  HttpWebRequest myReq = (HttpWebRequest)HttpWebRequest.Create(url);

  myReq.Accept = "image/gif, image/x-xbitmap, image/jpeg, image/pjpeg, application/x-shockwave-flash, application/vnd.ms-excel, application/vnd.ms-powerpoint, application/msword, */*";

  myReq.UserAgent = "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; .NET CLR 2.0.50727)";

  HttpWebResponse myRep = (HttpWebResponse)myReq.GetResponse();

  Stream myStream = myRep.GetResponseStream();

  StreamReader sr = new StreamReader(myStream, Encoding.Default);

  pageinfo = sr.ReadToEnd().ToString();

  }

  catch

  {

  pageinfo = "";

  }

  return Json(pageinfo);

  }