asp.net HttpWebRequest自动识别网页编码

  

复制代码 代码如下:

  static string GetEncoding(string url)

  {

  HttpWebRequest request = null;

  HttpWebResponse response = null;

  StreamReader reader = null;

  try

  {

  request = (HttpWebRequest)WebRequest.Create(url);

  request.Timeout = 20000;

  request.AllowAutoRedirect = false;

  response = (HttpWebResponse)request.GetResponse();

  if (response.StatusCode == HttpStatusCode.OK && response.ContentLength < 1024 * 1024)

  {

  if (response.ContentEncoding != null && response.ContentEncoding.Equals("gzip", StringComparison.InvariantCultureIgnoreCase))

  reader = new StreamReader(new GZipStream(response.GetResponseStream(), CompressionMode.Decompress));

  else

  reader = new StreamReader(response.GetResponseStream(), Encoding.ASCII);

  string html = reader.ReadToEnd();

  Regex reg_charset = new Regex(@"charset\b\s*=\s*(?<charset>[^""]*)");

  if (reg_charset.IsMatch(html))

  {

  return reg_charset.Match(html).Groups["charset"].Value;

  }

  else if (response.CharacterSet != string.Empty)

  {

  return response.CharacterSet;

  }

  else

  return Encoding.Default.BodyName;

  }

  }

  catch

  {

  }

  finally

  {

  if (response != null)

  {

  response.Close();

  response = null;

  }

  if (reader != null)

  reader.Close();

  if (request != null)

  request = null;

  }

  return Encoding.Default.BodyName;

  }

  /// <summary>

  /// 获取源代码

  /// </summary>

  /// <param name="url"></param>

  /// <returns></returns>

  static string GetHtml(string url, Encoding encoding)

  {

  HttpWebRequest request = null;

  HttpWebResponse response = null;

  StreamReader reader = null;

  try

  {

  request = (HttpWebRequest)WebRequest.Create(url);

  request.Timeout = 20000;

  request.AllowAutoRedirect = false;

  response = (HttpWebResponse)request.GetResponse();

  if (response.StatusCode == HttpStatusCode.OK && response.ContentLength < 1024 * 1024)

  {

  if (response.ContentEncoding != null && response.ContentEncoding.Equals("gzip", StringComparison.InvariantCultureIgnoreCase))

  reader = new StreamReader(new GZipStream(response.GetResponseStream(), CompressionMode.Decompress), encoding);

  else

  reader = new StreamReader(response.GetResponseStream(), encoding);

  string html = reader.ReadToEnd();

  return html;

  }

  }

  catch

  {

  }

  finally

  {

  if (response != null)

  {

  response.Close();

  response = null;

  }

  if (reader != null)

  reader.Close();

  if (request != null)

  request = null;

  }

  return string.Empty;

  }