php权重计算方法代码分享

  

复制代码 代码如下:

  <?php

  /* vim: set expandtab tabstop=4 shiftwidth=4: */

  // +------------------------------------------------------------------------

  //  Name       :   权重计算

  //  Description:   稍加修改,亦可用于分词,词频统计,全文检索和垃圾检测

  //  Date       :   2013/12/16 08:51

  class weight {

  protected $aDict = array(array());

  protected $aItems = array();

  protected $sLastRule;

  protected $aMatchs = array();

  protected $aShow = array();

  private function init() {

  //清空记录的匹配表和输出结果

  unset($this->aShow);

  }

  public function newItems($mItems) {

  //导入新的项目

  $this->aItems = (is_array($mItems))? $mItems: array($mItems);

  $this->init();

  }

  public function newTable(array $aTable) {

  //导入新的对照表,并生成字典

  foreach($aTable as $iTableKey=>$sTableLine) {

  $aTableLine = explode(',', str_replace('|', ',', $sTableLine));

  $setter = function($v, $k, $paraMeter) {

  $k1 = $paraMeter[0]; $oWeight = $paraMeter[1];

  $oWeight->genDict($v, $k1);

  };

  array_walk($aTableLine, $setter, array($iTableKey, $this));

  }

  $this->init();

  }

  public function getShow($sRule = 'max') {

  //获取最终的显示结果

  if(empty($this->aItems) || empty($this->aDict))

  return array();

  if (empty($this->aShow) || $sRule != $this->sLastRule)

  return $this->genShow($sRule);

  return $this->aShow;

  }

  public function genShow($sRule) {

  $aShow = array();

  $aMatchs = array();

  $getter = function($v, $k, $oWeight) use(&$aShow, &$aMatchs, $sRule) {

  $t = array_count_values($oWeight->matchWord($v));

  $aMatchs[] = $t;

  switch ($sRule) {

  case 'max':

  $aShow[$k] = array_keys($t, max($t));

  break;

  }

  };

  array_walk($this->aItems, $getter, $this);

  $this->aShow = $aShow;

  $this->aMatchs = $aMatchs;

  return $aShow;

  }

  private function genDict($mWord, $iKey = '') {

  $iInsertPonit = count($this->aDict);

  $iCur = 0; //当前节点号

  foreach (str_split($mWord) as $iChar) {

  if (isset($this->aDict[$iCur][$iChar])) {

  $iCur = $this->aDict[$iCur][$iChar];

  continue;

  }

  $this->aDict[$iInsertPonit] = array();

  $this->aDict[$iCur][$iChar] = $iInsertPonit;

  $iCur = $iInsertPonit;

  $iInsertPonit++;

  }

  $this->aDict[$iCur]['acc'][] = $iKey;

  }

  function matchWord($sLine) {

  $iCur = $iOffset = $iPosition = 0;

  $sLine .= "\0";

  $iLen = strlen($sLine);

  $aReturn = array();

  while($iOffset < $iLen) {

  $sChar = $sLine{$iOffset};

  if(isset($this->aDict[$iCur][$sChar])) {

  $iCur = $this->aDict[$iCur][$sChar];

  if(isset($this->aDict[$iCur]['acc'])) {

  $aReturn = array_merge($aReturn, $this->aDict[$iCur]['acc']);

  $iPosition = $iOffset + 1;

  $iCur = 0;

  }

  } else {

  $iCur = 0;

  $iOffset = $iPosition;

  $iPosition = $iOffset + 1;

  }

  ++$iOffset;

  }

  return $aReturn;

  }

  }

  ?>

  外部调用示例

  

复制代码 代码如下:

  $aItems = array(

  'chinaisbig',

  'whichisnot',

  'totalyrightforme',

  );

  $aTable = array(

  'china,is|small',

  'china,big|me',

  'china,is|big,which|not,me',

  'totaly|right,for,me',

  );

  $oWeight = new ttrie;

  $oWeight->newItems($aItems);

  $aResult = $oWeight->newTable($aTable);