php获取中文拼音首字母类和函数分享

  一、公司同事整理的类,挺实用的.相信拿出来分享下他不会介意的O(∩_∩)O.不过如果首字母是数字或英文会有些问题.

  

复制代码 代码如下:

  /**

  * Helper_Spell 汉字拼音首字母工具类

  *

  * @category Helper

  * @package  Helper_Spell

  * @author   Lancer <[email protected]>

  * @version  1.0

  * @see      Translation_Big2gb

  */

  class Helper_Spell {

  /**

  * $_pinyins

  * @var array

  * @access private

  */

  private $_pinyins = array(

  176161 => 'A',

  176197 => 'B',

  178193 => 'C',

  180238 => 'D',

  182234 => 'E',

  183162 => 'F',

  184193 => 'G',

  185254 => 'H',

  187247 => 'J',

  191166 => 'K',

  192172 => 'L',

  194232 => 'M',

  196195 => 'N',

  197182 => 'O',

  197190 => 'P',

  198218 => 'Q',

  200187 => 'R',

  200246 => 'S',

  203250 => 'T',

  205218 => 'W',

  206244 => 'X',

  209185 => 'Y',

  212209 => 'Z',

  215249 => 'Z',

  );

  /**

  * $_charset

  * @var string

  * @access private

  */

  private $_charset = null;

  /**

  * __construct 构造函数, 指定需要的编码 default: utf-8 支持utf-8, gb2312

  *

  * @param unknown_type $charset

  */

  public function __construct( $charset = 'utf-8' ) {

  $this->_charset = $charset;

  }

  /**

  * getInitialsFirst 返回首个汉字的拼音

  *

  * @access public

  * @static

  * @param  string $str

  * @return string

  * @example Helper_Spell::getInitialsFirst('我的爱'); => w

  */

  public static function getInitialsFirst( $str, $charset = 'utf-8' ) {

  $chars = array(

  'A','B','C','D','E','F',

  'G','H','I','J','K','L',

  'M','N','O','P','Q','R',

  'S','T','U','V','W','X',

  'Y','Z');

  $string = self::getInitials( $str );

  $length = strlen($string);

  for($i=0; $i < $length; $i++) {

  if ( in_array( $string{$i}, $chars ) ) {

  return $string{$i};

  }

  }

  return '*';

  }

  /**

  * getInitials 返回拼音组合

  *

  * @access public

  * @static

  * @param  string $str

  * @return string

  * @example Helper_Spell::getInitials('我的爱'); => wda

  */

  public static function getInitials( $str, $charset = 'utf-8' ) {

  $instance = new Helper_Spell( $charset );

  return $instance->_getInitials( $str );

  }

  /**

  * _getInitials 获取中文字串的拼音首字符

  *              注:英文的字串:不变返回(包括数字)    eg .abc123 => abc123

  *                  中文字符串:返回拼音首字符        eg. 王小明 => WXM

  *                  中英混合串: 返回拼音首字符和英文  eg. 我i我j => WIWJ

  *

  * @access private

  * @param  string $str

  * @return string

  */

  private function _getInitials( $str, $translation=TRUE ){

  if ( empty($str) ) return '';

  if ( $this->_isAscii($str[0]) && $this->_isAsciis( $str ))

  return $str;

  if ( $translation )

  $str = Translation_Big2gb::big2gb( $str );

  $result = array();

  if ( $this->_charset == 'utf-8' ){

  //IGNORE很重要,加上这个就可以是ICONV()函数忽略错误,继续执行

  $str = iconv( 'utf-8', 'gbk//IGNORE', $str );

  }

  $words = $this->_cutWord( $str );

  foreach ( $words AS $word ) {

  if ( $this->_isAscii($word) ) {//非中文

  $result[] = $word;

  continue;

  }

  $code = ( ord(substr($word,0,1)) ) * 1000 + (ord(substr($word,1,1)));

  //获取拼音首字母A--Z

  if ( ($i = $this->_search($code)) != -1 ){

  $result[] = $this->_pinyins[$i];

  }

  }

  return strtoupper(implode('', $result));

  }

  /**

  * _msubstr 获取中文字符串

  *

  * @access private

  * @param string $str

  * @param int    $start

  * @param int    $len

  * @return string

  */

  private function _msubstr ($str, $start, $len) {

  $start  = $start * 2;

  $len    = $len * 2;

  $strlen = strlen($str);

  $result = '';

  for ( $i = 0; $i < $strlen; $i++ ) {

  if ( $i >= $start && $i < ($start + $len) ) {

  if ( ord(substr($str, $i, 1)) > 129 ) $result .= substr($str, $i, 2);

  else $result .= substr($str, $i, 1);

  }

  if ( ord(substr($str, $i, 1)) > 129 ) $i++;

  }

  return $result;

  }

  /**

  * _cutWord  字符串切分为数组 (汉字或者一个字符为单位)

  *

  * @access private

  * @param string $str

  * @return array

  */

  private function _cutWord( $str ) {

  $words = array();

  while ( $str != "" ) {

  if ( $this->_isAscii($str) ) {//非中文

  $words[] = $str[0];

  $str = substr( $str, strlen($str[0]) );

  } else {

  $word = $this->_msubstr( $str, 0, 1 );

  $words[] = $word;

  $str = substr( $str,  strlen($word) );

  }

  }

  return $words;

  }

  /**

  * _isAscii 判断字符是否是ascii字符

  *

  * @access private

  * @param  string $char

  * @return bool

  */

  private function _isAscii( $char ) {

  return ( ord( substr($char,0,1) ) < 160 );

  }

  /**

  * _isAsciis 判断字符串前3个字符是否是ascii字符

  *

  * @access private

  * @param  string $str

  * @return bool

  */

  private function _isAsciis( $str ) {

  $len = strlen($str) >= 3 ? 3: 2;

  $chars = array();

  for( $i = 1; $i < $len -1; $i++ ){

  $chars[] = $this->_isAscii( $str[$i] ) ? 'yes':'no';

  }

  $result = array_count_values( $chars );

  if ( empty($result['no']) ){

  return true;

  }

  return false;

  }

  /**

  * _getChar 通过ASC码返回字母或者数字

  *

  * @access private

  * @param  string $ascii

  * @return string

  */

  private function _getChar( $ascii ){

  if ( $ascii >= 48 && $ascii <= 57 ) {

  return chr($ascii);  //数字

  } elseif ( $ascii>=65 && $ascii<=90 ) {

  return chr($ascii);   // A--Z

  } elseif ($ascii>=97 && $ascii<=122 ) {

  return chr($ascii-32); // a--z

  } else {

  return '~'; //其他

  }

  }

  /**

  * _search 查找需要的汉字内码(gb2312) 对应的拼音字符(二分法)

  *

  * @access private

  * @param int $code

  * @return int

  */

  private function _search( $code ) {

  $data = array_keys($this->_pinyins);

  $lower = 0;

  $upper = sizeof($data)-1;

  // 排除非一级汉字

  if ($code < $data[0] || $code > $data[23]) return -1;

  for (;;) {

  if ( $lower > $upper ){

  return $data[$lower-1];

  }

  $middle = (int) round(($lower + $upper) / 2);

  if ( !isset($data[$middle]) ) {

  return -1;

  }

  if ( $data[$middle] < $code ){

  $lower = (int)$middle + 1;

  } else if ( $data[$middle] == $code ) {

  return $data[$middle];

  } else {

  $upper = (int)$middle - 1;

  }

  }// end for

  }

  }

  二、用来得到中文的首字母

  这个是将中文转换为拼音的类:charset

  

复制代码 代码如下:

  <?php

  /**

  * 汉字转化为拼音,拼音转化为汉字

  *

  */

  class charset

  {

  private $_code=array(

  array("a",'-20319'),

  array("ai",'-20317'),

  array("an",'-20304'),

  array("ang",'-20295'),

  array("ao",'-20292'),

  array("ba",'-20283'),

  array("bai",'-20265'),

  array("ban",'-20257'),

  array("bang",'-20242'),

  array("bao",'-20230'),

  array("bei",'-20051'),

  array("ben",'-20036'),

  array("beng",'-20032'),

  array("bi",'-20026'),

  array("bian",'-20002'),

  array("biao",'-19990'),

  array("bie",'-19986'),

  array("bin",'-19982'),

  array("bing",'-19976'),

  array("bo",'-19805'),

  array("bu",'-19784'),

  array("ca",'-19775'),

  array("cai",'-19774'),

  array("can",'-19763'),

  array("cang",'-19756'),

  array("cao",'-19751'),

  array("ce",'-19746'),

  array("ceng",'-19741'),

  array("cha",'-19739'),

  array("chai",'-19728'),

  array("chan",'-19725'),

  array("chang",'-19715'),

  array("chao",'-19540'),

  array("che",'-19531'),

  array("chen",'-19525'),

  array("cheng",'-19515'),

  array("chi",'-19500'),

  array("chong",'-19484'),

  array("chou",'-19479'),

  array("chu",'-19467'),

  array("chuai",'-19289'),

  array("chuan",'-19288'),

  array("chuang",'-19281'),

  array("chui",'-19275'),

  array("chun",'-19270'),

  array("chuo",'-19263'),

  array("ci",'-19261'),

  array("cong",'-19249'),

  array("cou",'-19243'),

  array("cu",'-19242'),

  array("cuan",'-19238'),

  array("cui",'-19235'),

  array("cun",'-19227'),

  array("cuo",'-19224'),

  array("da",'-19218'),

  array("dai",'-19212'),

  array("dan",'-19038'),

  array("dang",'-19023'),

  array("dao",'-19018'),

  array("de",'-19006'),

  array("deng",'-19003'),

  array("di",'-18996'),

  array("dian",'-18977'),

  array("diao",'-18961'),

  array("die",'-18952'),

  array("ding",'-18783'),

  array("diu",'-18774'),

  array("dong",'-18773'),

  array("dou",'-18763'),

  array("du",'-18756'),

  array("duan",'-18741'),

  array("dui",'-18735'),

  array("dun",'-18731'),

  array("duo",'-18722'),

  array("e",'-18710'),

  array("en",'-18697'),

  array("er",'-18696'),

  array("fa",'-18526'),

  array("fan",'-18518'),

  array("fang",'-18501'),

  array("fei",'-18490'),

  array("fen",'-18478'),

  array("feng",'-18463'),

  array("fo",'-18448'),

  array("fou",'-18447'),

  array("fu",'-18446'),

  array("ga",'-18239'),

  array("gai",'-18237'),

  array("gan",'-18231'),

  array("gang",'-18220'),

  array("gao",'-18211'),

  array("ge",'-18201'),

  array("gei",'-18184'),

  array("gen",'-18183'),

  array("geng",'-18181'),

  array("gong",'-18012'),

  array("gou",'-17997'),

  array("gu",'-17988'),

  array("gua",'-17970'),

  array("guai",'-17964'),

  array("guan",'-17961'),

  array("guang",'-17950'),

  array("gui",'-17947'),

  array("gun",'-17931'),

  array("guo",'-17928'),

  array("ha",'-17922'),

  array("hai",'-17759'),

  array("han",'-17752'),

  array("hang",'-17733'),

  array("hao",'-17730'),

  array("he",'-17721'),

  array("hei",'-17703'),

  array("hen",'-17701'),

  array("heng",'-17697'),

  array("hong",'-17692'),

  array("hou",'-17683'),

  array("hu",'-17676'),

  array("hua",'-17496'),

  array("huai",'-17487'),

  array("huan",'-17482'),

  array("huang",'-17468'),

  array("hui",'-17454'),

  array("hun",'-17433'),

  array("huo",'-17427'),

  array("ji",'-17417'),

  array("jia",'-17202'),

  array("jian",'-17185'),

  array("jiang",'-16983'),

  array("jiao",'-16970'),

  array("jie",'-16942'),

  array("jin",'-16915'),

  array("jing",'-16733'),

  array("jiong",'-16708'),

  array("jiu",'-16706'),

  array("ju",'-16689'),

  array("juan",'-16664'),

  array("jue",'-16657'),

  array("jun",'-16647'),

  array("ka",'-16474'),

  array("kai",'-16470'),

  array("kan",'-16465'),

  array("kang",'-16459'),

  array("kao",'-16452'),

  array("ke",'-16448'),

  array("ken",'-16433'),

  array("keng",'-16429'),

  array("kong",'-16427'),

  array("kou",'-16423'),

  array("ku",'-16419'),

  array("kua",'-16412'),

  array("kuai",'-16407'),

  array("kuan",'-16403'),

  array("kuang",'-16401'),

  array("kui",'-16393'),

  array("kun",'-16220'),

  array("kuo",'-16216'),

  array("la",'-16212'),

  array("lai",'-16205'),

  array("lan",'-16202'),

  array("lang",'-16187'),

  array("lao",'-16180'),

  array("le",'-16171'),

  array("lei",'-16169'),

  array("leng",'-16158'),

  array("li",'-16155'),

  array("lia",'-15959'),

  array("lian",'-15958'),

  array("liang",'-15944'),

  array("liao",'-15933'),

  array("lie",'-15920'),

  array("lin",'-15915'),

  array("ling",'-15903'),

  array("liu",'-15889'),

  array("long",'-15878'),

  array("lou",'-15707'),

  array("lu",'-15701'),

  array("lv",'-15681'),

  array("luan",'-15667'),

  array("lue",'-15661'),

  array("lun",'-15659'),

  array("luo",'-15652'),

  array("ma",'-15640'),

  array("mai",'-15631'),

  array("man",'-15625'),

  array("mang",'-15454'),

  array("mao",'-15448'),

  array("me",'-15436'),

  array("mei",'-15435'),

  array("men",'-15419'),

  array("meng",'-15416'),

  array("mi",'-15408'),

  array("mian",'-15394'),

  array("miao",'-15385'),

  array("mie",'-15377'),

  array("min",'-15375'),

  array("ming",'-15369'),

  array("miu",'-15363'),

  array("mo",'-15362'),

  array("mou",'-15183'),

  array("mu",'-15180'),

  array("na",'-15165'),

  array("nai",'-15158'),

  array("nan",'-15153'),

  array("nang",'-15150'),

  array("nao",'-15149'),

  array("ne",'-15144'),

  array("nei",'-15143'),

  array("nen",'-15141'),

  array("neng",'-15140'),

  array("ni",'-15139'),

  array("nian",'-15128'),

  array("niang",'-15121'),

  array("niao",'-15119'),

  array("nie",'-15117'),

  array("nin",'-15110'),

  array("ning",'-15109'),

  array("niu",'-14941'),

  array("nong",'-14937'),

  array("nu",'-14933'),

  array("nv",'-14930'),

  array("nuan",'-14929'),

  array("nue",'-14928'),

  array("nuo",'-14926'),

  array("o",'-14922'),

  array("ou",'-14921'),

  array("pa",'-14914'),

  array("pai",'-14908'),

  array("pan",'-14902'),

  array("pang",'-14894'),

  array("pao",'-14889'),

  array("pei",'-14882'),

  array("pen",'-14873'),

  array("peng",'-14871'),

  array("pi",'-14857'),

  array("pian",'-14678'),

  array("piao",'-14674'),

  array("pie",'-14670'),

  array("pin",'-14668'),

  array("ping",'-14663'),

  array("po",'-14654'),

  array("pu",'-14645'),

  array("qi",'-14630'),

  array("qia",'-14594'),

  array("qian",'-14429'),

  array("qiang",'-14407'),

  array("qiao",'-14399'),

  array("qie",'-14384'),

  array("qin",'-14379'),

  array("qing",'-14368'),

  array("qiong",'-14355'),

  array("qiu",'-14353'),

  array("qu",'-14345'),

  array("quan",'-14170'),

  array("que",'-14159'),

  array("qun",'-14151'),

  array("ran",'-14149'),

  array("rang",'-14145'),

  array("rao",'-14140'),

  array("re",'-14137'),

  array("ren",'-14135'),

  array("reng",'-14125'),

  array("ri",'-14123'),

  array("rong",'-14122'),

  array("rou",'-14112'),

  array("ru",'-14109'),

  array("ruan",'-14099'),

  array("rui",'-14097'),

  array("run",'-14094'),

  array("ruo",'-14092'),

  array("sa",'-14090'),

  array("sai",'-14087'),

  array("san",'-14083'),

  array("sang",'-13917'),

  array("sao",'-13914'),

  array("se",'-13910'),

  array("sen",'-13907'),

  array("seng",'-13906'),

  array("sha",'-13905'),

  array("shai",'-13896'),

  array("shan",'-13894'),

  array("shang",'-13878'),

  array("shao",'-13870'),

  array("she",'-13859'),

  array("shen",'-13847'),

  array("sheng",'-13831'),

  array("shi",'-13658'),

  array("shou",'-13611'),

  array("shu",'-13601'),

  array("shua",'-13406'),

  array("shuai",'-13404'),

  array("shuan",'-13400'),

  array("shuang",'-13398'),

  array("shui",'-13395'),

  array("shun",'-13391'),

  array("shuo",'-13387'),

  array("si",'-13383'),

  array("song",'-13367'),

  array("sou",'-13359'),

  array("su",'-13356'),

  array("suan",'-13343'),

  array("sui",'-13340'),

  array("sun",'-13329'),

  array("suo",'-13326'),

  array("ta",'-13318'),

  array("tai",'-13147'),

  array("tan",'-13138'),

  array("tang",'-13120'),

  array("tao",'-13107'),

  array("te",'-13096'),

  array("teng",'-13095'),

  array("ti",'-13091'),

  array("tian",'-13076'),

  array("tiao",'-13068'),

  array("tie",'-13063'),

  array("ting",'-13060'),

  array("tong",'-12888'),

  array("tou",'-12875'),

  array("tu",'-12871'),

  array("tuan",'-12860'),

  array("tui",'-12858'),

  array("tun",'-12852'),

  array("tuo",'-12849'),

  array("wa",'-12838'),

  array("wai",'-12831'),

  array("wan",'-12829'),

  array("wang",'-12812'),

  array("wei",'-12802'),

  array("wen",'-12607'),

  array("weng",'-12597'),

  array("wo",'-12594'),

  array("wu",'-12585'),

  array("xi",'-12556'),

  array("xia",'-12359'),

  array("xian",'-12346'),

  array("xiang",'-12320'),

  array("xiao",'-12300'),

  array("xie",'-12120'),

  array("xin",'-12099'),

  array("xing",'-12089'),

  array("xiong",'-12074'),

  array("xiu",'-12067'),

  array("xu",'-12058'),

  array("xuan",'-12039'),

  array("xue",'-11867'),

  array("xun",'-11861'),

  array("ya",'-11847'),

  array("yan",'-11831'),

  array("yang",'-11798'),

  array("yao",'-11781'),

  array("ye",'-11604'),

  array("yi",'-11589'),

  array("yin",'-11536'),

  array("ying",'-11358'),

  array("yo",'-11340'),

  array("yong",'-11339'),

  array("you",'-11324'),

  array("yu",'-11303'),

  array("yuan",'-11097'),

  array("yue",'-11077'),

  array("yun",'-11067'),

  array("za",'-11055'),

  array("zai",'-11052'),

  array("zan",'-11045'),

  array("zang",'-11041'),

  array("zao",'-11038'),

  array("ze",'-11024'),

  array("zei",'-11020'),

  array("zen",'-11019'),

  array("zeng",'-11018'),

  array("zha",'-11014'),

  array("zhai",'-10838'),

  array("zhan",'-10832'),

  array("zhang",'-10815'),

  array("zhao",'-10800'),

  array("zhe",'-10790'),

  array("zhen",'-10780'),

  array("zheng",'-10764'),

  array("zhi",'-10587'),

  array("zhong",'-10544'),

  array("zhou",'-10533'),

  array("zhu",'-10519'),

  array("zhua",'-10331'),

  array("zhuai",'-10329'),

  array("zhuan",'-10328'),

  array("zhuang",'-10322'),

  array("zhui",'-10315'),

  array("zhun",'-10309'),

  array("zhuo",'-10307'),

  array("zi",'-10296'),

  array("zong",'-10281'),

  array("zou",'-10274'),

  array("zu",'-10270'),

  array("zuan",'-10262'),

  array("zui",'-10260'),

  array("zun",'-10256'),

  array("zuo",'-10254')

  );

  //拼音转化函数

  function PinYin($str){

  $ret="";

  for($i=0;$i<strlen($str);$i++){

  $p=ord(substr($str,$i,1)); //查看ASCII码

  if($p>160){ //如果是中文,再多截取一个字符

  $q=ord(substr($str,++$i,1));

  $p=$p*256+$q-65536;

  }

  $ret.=$this->convert($p);

  }

  return $ret;

  }

  //转化函数

  function convert($num){

  if($num>0&&$num<160){ //如果不是汉字,直接返回相对字符

  return chr($num);

  }elseif($num<"-20319"||$num>"-10247"){

  return "";

  }else{ //汉字,查找对应拼音

  for($i=count($this->_code)-1;$i>=0;$i--){

  if($this->_code[$i][1]<=$num)

  break;

  }

  return substr($this->_code[$i][0],0,1);

  }

  }

  }

  下面这个是用来测试代码的:

  

复制代码 代码如下:

  <?php

  include_once 'charset.class.php';

  header('Content-type: text/html;charset=utf-8');

  $charset=new charset();

  foreach(array('武汉','中国','上海') as $val){

  echo iconv('gbk','utf-8//IGNORE',strtoupper($charset->PinYin(mb_convert_encoding($val,'gbk','utf-8'))));

  echo '<br/>';

  }

  三、这是网上找到个一个方法,经过测试可以正常使用,但对一些生僻字或者特殊字符会有问题.

  

复制代码 代码如下:

  function getFirstCharter($str){

  if(empty($str)){return '';}

  $fchar=ord($str{0});

  if($fchar>=ord('A')&&$fchar<=ord('z')) return strtoupper($str{0});

  $s1=iconv('UTF-8','gb2312',$str);

  $s2=iconv('gb2312','UTF-8',$s1);

  $s=$s2==$str?$s1:$str;

  $asc=ord($s{0})*256+ord($s{1})-65536;

  if($asc>=-20319&&$asc<=-20284) return 'A';

  if($asc>=-20283&&$asc<=-19776) return 'B';

  if($asc>=-19775&&$asc<=-19219) return 'C';

  if($asc>=-19218&&$asc<=-18711) return 'D';

  if($asc>=-18710&&$asc<=-18527) return 'E';

  if($asc>=-18526&&$asc<=-18240) return 'F';

  if($asc>=-18239&&$asc<=-17923) return 'G';

  if($asc>=-17922&&$asc<=-17418) return 'H';

  if($asc>=-17417&&$asc<=-16475) return 'J';

  if($asc>=-16474&&$asc<=-16213) return 'K';

  if($asc>=-16212&&$asc<=-15641) return 'L';

  if($asc>=-15640&&$asc<=-15166) return 'M';

  if($asc>=-15165&&$asc<=-14923) return 'N';

  if($asc>=-14922&&$asc<=-14915) return 'O';

  if($asc>=-14914&&$asc<=-14631) return 'P';

  if($asc>=-14630&&$asc<=-14150) return 'Q';

  if($asc>=-14149&&$asc<=-14091) return 'R';

  if($asc>=-14090&&$asc<=-13319) return 'S';

  if($asc>=-13318&&$asc<=-12839) return 'T';

  if($asc>=-12838&&$asc<=-12557) return 'W';

  if($asc>=-12556&&$asc<=-11848) return 'X';

  if($asc>=-11847&&$asc<=-11056) return 'Y';

  if($asc>=-11055&&$asc<=-10247) return 'Z';

  return null;

  }