php读取大文件示例分享(文件操作类)

  Lib_File2.php

  

复制代码 代码如下:

  <?php

  class Lib_File2

  {

  //文件目录

  private $root = '/data/wwwroot/kkpromo/data/';

  //文件后缀

  private $suffix = '.log';

  //文件句柄

  private $handle=null;

  //一次读取文件的最大记录数

  private $limit=40000;

  //每行读取的字节长度

  private $length=1024;

  //开始时间

  private  $startTime=0;

  //内存使用基准点

  private static $startMemory=0;

  //

  private $conn=null;

  //

  private static  $init=null;

  public static function instance()

  {

  self::$startMemory = memory_get_usage(true);

  if(self::$init && is_object(self::$init))

  {

  return self::$init;

  }

  self::$init = new self();

  return self::$init;

  }

  private function __construct(){}

  public  function setRoot($root)

  {

  if(!is_dir($root)) die($root.' ROOT DOES NOT EXIST');

  $this->root = $root;

  }

  public function setSuffix($suffix)

  {

  $this->suffix = $suffix;

  }

  public function setLimit($limit)

  {

  if(!is_numeric($limit)) die($limit.' SHOULD BE NUMBERIC');

  if(intval($limit) > 1000000) die($limit.' SHOULD BE LOWER THAN 1000000');

  $this->limit = intval($limit);

  }

  public function _getFile($date , $appid , $op)

  {

  $filename = rtrim($this->root , '/').DIRECTORY_SEPARATOR.$date.DIRECTORY_SEPARATOR.$appid.'.'.$op.$this->suffix;

  if(!file_exists($filename))

  {

  die($filename.' FILE DOES NOT EXISTS!');

  }

  if(!is_file($filename))

  {

  die($filename.' FILE DOES NOT EXIST!');

  }

  if(!is_readable($filename))

  {

  die($filename.'  FILE ACCESS DENY!');

  }

  return $filename;

  }

  public function closeFile($date=null , $appid=null , $op=null)

  {

  if($op && $date && $appid)

  {

  if(is_object($this->handle[$date.'_'.$appid.'_'.$op]) || $this->conn[$date.'_'.$appid.'_'.$op])

  {

  unset($this->handle[$date.'_'.$appid.'_'.$op]);

  $this->handle[$date.'_'.$appid.'_'.$op]=null;

  }

  $this->conn[$date.'_'.$appid.'_'.$op]=null;

  $this->handle[$date.'_'.$appid.'_'.$op]=null;

  unset($this->handle[$date.'_'.$appid.'_'.$op]);

  }

  else {

  if(is_array($this->handle) && $this->handle)

  {

  foreach ($this->handle as $key=>$val){

  unset($this->handle[$key]);

  $this->conn[$key]=null;

  $this->handle[$key]=null;

  }

  }

  }

  return true;

  }

  private function _openFile($date , $appid , $op)

  {

  $this->startTime = microtime(true);

  if(isset($this->conn[$date.'_'.$appid.'_'.$op])  && $this->conn[$date.'_'.$appid.'_'.$op])

  {

  return $this->handle[$date.'_'.$appid.'_'.$op];

  }

  $filename = self::_getFile($date , $appid , $op);

  if(($this->handle[$date.'_'.$appid.'_'.$op] = new SplFileObject($filename , 'r'))!=null)

  {

  $this->conn[$date.'_'.$appid.'_'.$op] = true;

  return $this->handle[$date.'_'.$appid.'_'.$op];

  }

  else {

  die('FILE OPEN FAILED!');

  }

  }

  /**

  * 功能:解析数据

  * 格式:  array('timestamp','mid','data');

  * @param string $data

  * @return boolean|array

  */

  private  function _parseData($data , $jsonFlag=true)

  {

  if(empty($data) || !is_string($data)) return false;

  $result = array(

  'timestamp'=>0,

  'mid'=>0,

  'data'=>array(),

  );

  $data = explode('|', $data);

  if(count($data) < 3 || !is_array($data)) return false;

  $result['timestamp'] = $data[0];

  $result['mid'] = $data[1];

  if($jsonFlag)

  {

  $result['data'] = @json_decode($data[2] , true);

  unset($result['mid']);

  }

  if(empty($result['timestamp']) || empty($result['mid'])) return false;

  unset($data);

  return $result;

  }

  /**

  * TODO:读取单一文件

  * @param string $date: 如(20140327)

  * @param int  $appid: 如(1000,9000)

  * @param string $op:如(show,login , index)

  * @param number $startNum 默认从第一行开始

  * @param number $length 默认到$this->limit 读取的行数

  * @param array $condition:array('mid'=>arrray() , 'ip'=>array() , ...)  过滤条件

  * @param bool $jsonFlag:默认为true, 则保留jsondata字段;设为false,则去掉false字段

  * @return array(count , diffTime , memory , data)

  */

  public  function readFile($date , $appid , $op , $startNum=0 , $length=0 , $jsonFlag=false ,  $condition=array())

  {

  $data['data'] = "";

  $data['count'] = 0;

  $index = $startNum;

  $startNum = empty($startNum) ? 0 : $startNum;

  $length = empty($length) ? $this->limit : $length;

  $handle = self::_openFile($date , $appid , $op );

  $line_number=0;

  if($handle)

  {

  $handle->seek($startNum);

  $handle->setMaxLineLen($this->length);

  while (intval($line_number) - intval($startNum) < intval($length)-1)

  {

  $tmp = $handle->current();

  if(empty($tmp)) continue;

  $tmp = self::_parseData($tmp , $jsonFlag);

  $line_number = $handle->key();

  !$jsonFlag && $condition= array();

  if(isset($condition) && $condition)

  {

  $key = array_keys($condition);

  if(in_array($tmp['data'][$key[0]], $condition[$key[0]]))

  {

  $data['count']++;

  $data['data'][$line_number] = $tmp;

  }

  }

  else

  {

  $data['data'][$line_number] = $tmp;

  $data['count']++;

  }

  if(intval($line_number) - intval($startNum) >= intval($length)-1) break;

  unset($tmp);

  $handle->next();

  }

  unset($tmp , $length , $line_number , $condition);

  }

  $data['diffTime'] = doubleval(microtime(true)) - doubleval($this->startTime);

  $data['memory'] = doubleval((doubleval(memory_get_usage(true)) -  doubleval(self::$startMemory))/1024/1024) . ' M';

  return $data;

  }

  /**

  * TODO:命令行下获取文件总记录数*

  * @param string $date

  * @param int $appid

  * @param string $op

  * @return array

  */

  public  function total_lineFile($date, $appid, $op)

  {

  $this->_openFile($date, $appid, $op);

  $file = escapeshellarg($this->_getFile($date, $appid, $op)); // 对命令行参数进行安全转义

  $line = `wc -l  $file`;

  if(preg_match("/(\d{1,})/", $line , $ret)){

  $data['count']=$ret[1];

  }else{

  $data['count']=0;

  }

  $data['diffTime'] = doubleval(microtime(true)) - doubleval($this->startTime);

  $data['memory'] = doubleval((doubleval(memory_get_usage(true)) -  doubleval(self::$startMemory))/1024/1024) . ' M';

  return $data;

  }

  /**

  * TODO:统计{$data}.{$op}.log记录数

  * @param string $date

  * @param int $appid

  * @param string $op

  * @param array $condition

  * @return array

  */

  public function countFile($date , $appid , $op ,$condition=array())

  {

  $data['count'] = 0;

  $handle = self::_openFile($date , $appid , $op );

  if($handle)

  {

  $handle->setMaxLineLen($this->length);

  while (!$handle->eof())

  {

  $tmp = $handle->current();

  if(empty($tmp)) continue;

  $tmp = self::_parseData($tmp);

  if($condition && is_array($condition) )

  {

  $key = array_keys($condition);

  if(isset($tmp['data'][$key[0]]) && $tmp['data'][$key[0]] && in_array($tmp['data'][$key[0]], $condition[$key[0]]))

  {

  $data['count']++;

  }

  }

  else

  {

  $data['count']++;

  }

  unset($tmp);

  $handle->next();

  }

  }

  unset($handle , $condition , $tmp , $key , $val);

  self::closeFile($date , $appid , $op );

  $data['diffTime'] = doubleval(microtime(true)) - doubleval($this->startTime);

  $data['memory'] = doubleval((doubleval(memory_get_usage(true)) -  doubleval(self::$startMemory))/1024/1024) . ' M';

  return $data;

  }

  /**

  * TODO:统计用户数

  * @param string $date

  * @param int $appid

  * @param string $op

  * @param bool $midflag :默认为false 则 mid返回空数组;如设为true,则mid数组不为空

  * * @param bool $jsonFlag:默认为true, 则保留jsondata字段;设为false,则去掉jsondata字段

  * @param array $condition

  * @return : array:形如({"mid":[],"count":2181,"diffTime":0.0397667884827,"memory":"3.75 M"})

  */

  public function countFileMID($date , $appid , $op  ,  $midflag=false , $jsonFlag=false,  $condition=array())

  {

  //$count = self::total_lineFile($date , $appid , $op );

  $count = self::countFile($date , $appid , $op );

  $index = ceil($count['count'] / $this->limit);

  $result = array('mid'=>array() , 'count'=>0 , 'diffTime'=>0 , 'memory'=>0);

  for ($i=0 ; $i<$index ; $i++)

  {

  $startNum = $this->limit*$i;

  $endNum = $this->limit;

  $data = self::readFile($date , $appid , $op  ,  $startNum , $endNum , $jsonFlag);

  var_dump($data);exit();

  if($data['data'] && is_array($data['data']))

  {

  foreach ($data['data'] as $arr)

  {

  if($condition && is_array($condition))

  {

  $key = array_keys($condition);

  if(isset($arr['data'][$key[0]])  && (in_array($arr['data'][$key[0]] , $condition[$key]) || empty($condition[$key[0]])))

  {

  $result['mid'][$arr["mid"]] =1;

  $result['count']++;

  }

  }

  else

  {

  $result['mid'][$arr["mid"]] =1;

  $result['count']++;

  }

  unset($data);

  }

  }

  }

  unset($index , $count , $condition , $data  , $arr);

  self::closeFile($date , $appid , $op);

  $result['mid'] = array_keys($result['mid']);

  if(empty($midflag)) unset($result['mid']);

  $result['diffTime'] = doubleval(microtime(true)) - doubleval($this->startTime);

  $result['memory'] = (memory_get_usage(true) - self::$startMemory)/1024/1024 . ' M';

  return $result;

  }

  /**

  * TODO:跨时间段 统计参加$op用户数据

  * @param string $date

  * @param int $appid

  * @param string $op

  * @param number $day

  * @param bool  $midflag :默认为false 则 mid返回空数组;如设为true,则mid数组不为空

  * @return array 形如("20140326":{"mid":[],"count":4571,"diffTime":0.0806441307068,"memory":"3.75 M"},

  *          "20140325":{"mid":[],"count":2181,"diffTime":0.0397667884827,"memory":"3.75 M"})

  */

  public function getReturnUser($date , $appid , $op , $day=1 , $midflag=false)

  {

  $date_i=0;

  for ($i =0; $i<$day ; $i++){

  $date_i = date('Ymd' , strtotime($date)-$i*86400);

  $result[$date_i] = self::countFileMID($date_i , $appid , $op , $midflag);

  }

  unset($date , $date_i , $appid , $op  , $day);

  return $result;

  }

  }

  ?>

  Lib_File1.php

  

复制代码 代码如下:

  <?php

  class Lib_File1

  {

  //文件目录

  private $root = '/data/wwwroot/kkpromo/data/';

  //文件后缀

  private $suffix = '.log';

  //文件句柄

  private $hander=null;

  //一次读取文件的最大记录数

  private $limit=40000;

  //每行读取的字节长度

  private $length=1024;

  //开始时间

  private  $startTime=0;

  //内存使用基准点

  private static $startMemory=0;

  //

  private $conn=null;

  //

  private static  $init=null;

  public static function instance()

  {

  self::$startMemory = memory_get_usage(true);

  if(self::$init && is_object(self::$init))

  {

  return self::$init;

  }

  self::$init = new self();

  return self::$init;

  }

  private function __construct(){}

  public  function setRoot($root)

  {

  if(!is_dir($root)) die($root.' ROOT DOES NOT EXIST');

  $this->root = $root;

  }

  public function setSuffix($suffix)

  {

  $this->suffix = $suffix;

  }

  public function setLimit($limit)

  {

  if(!is_numeric($limit)) die($limit.' SHOULD BE NUMBERIC');

  if(intval($limit) > 1000000) die($limit.' SHOULD BE LOWER THAN 1000000');

  $this->limit = intval($limit);

  }

  private function _getFile($date , $appid , $op)

  {

  $filename = rtrim($this->root , '/').DIRECTORY_SEPARATOR.$date.DIRECTORY_SEPARATOR.$appid.'.'.$op.$this->suffix;

  if(!file_exists($filename))

  {

  die($filename.' FILE DOES NOT EXISTS!');

  }

  if(!is_file($filename))

  {

  die($filename.' FILE DOES NOT EXIST!');

  }

  if(!is_readable($filename))

  {

  die($filename.'  FILE ACCESS DENY!');

  }

  return $filename;

  }

  public function closeFile($date=null , $appid=null , $op=null)

  {

  if($op && $date && $appid)

  {

  if(is_object($this->hander[$date.'_'.$appid.'_'.$op]) || $this->conn[$date.'_'.$appid.'_'.$op])

  {

  fclose($this->hander[$date.'_'.$appid.'_'.$op]);

  }

  $this->conn[$date.'_'.$appid.'_'.$op]=null;

  $this->hander[$date.'_'.$appid.'_'.$op]=null;

  }

  else {

  if(is_array($this->hander) && $this->hander)

  {

  foreach ($this->hander as $key=>$val){

  fclose($this->hander[$key]);

  $this->conn[$key]=null;

  $this->hander[$key]=null;

  }

  }

  }

  return true;

  }

  private function _openFile($date , $appid , $op)

  {

  $this->startTime = microtime(true);

  if(isset($this->conn[$date.'_'.$appid.'_'.$op])  && $this->conn[$date.'_'.$appid.'_'.$op])

  {

  return $this->hander[$date.'_'.$appid.'_'.$op];

  }

  $filename = self::_getFile($date , $appid , $op);

  if(($this->hander[$date.'_'.$appid.'_'.$op] = fopen($filename, 'r'))!=null)

  {

  $this->conn[$date.'_'.$appid.'_'.$op] = true;

  return $this->hander[$date.'_'.$appid.'_'.$op];

  }

  else {

  die('FILE OPEN FAILED!');

  }

  }

  /**

  * 功能:解析数据

  * 格式:  array('timestamp','mid','data');

  * @param string $data

  * @return boolean|array

  */

  private  function _parseData($data)

  {

  if(empty($data) || !is_string($data)) return false;

  $result = array(

  'timestamp'=>0,

  'mid'=>0,

  'data'=>array(),

  );

  $data = explode('|', $data);

  if(count($data) < 3 || !is_array($data)) return false;

  $result['timestamp'] = $data[0];

  $result['mid'] = $data[1];

  $result['data'] = @json_decode($data[2] , true);

  if(empty($result['timestamp']) || empty($result['mid'])) return false;

  unset($data);

  return $result;

  }

  /**

  * TODO:读取单一文件

  * @param string $date: 如(20140327)

  * @param int  $appid: 如(1000,9000)

  * @param string $op:如(show,login , index)

  * @param number $startNum 默认从第一行开始

  * @param number $endNum 默认到$this->limit结束

  * @param array $condition:array('mid'=>arrray() , 'ip'=>array() , ...)  过滤条件

  * @param bool $jsonFlag:默认为true, 则保留jsondata字段;设为false,则去掉false字段

  * @return array(count , diffTime , memory , data)

  */

  public  function readFile($date , $appid , $op ,$startNum=0 , $endNum=0 , $jsonFlag=false ,  $condition=array())

  {

  $data['data'] = "";

  $data['count'] = 0;

  $index = $startNum;

  $startNum = empty($startNum) ? 0 : $startNum;

  $endNum = empty($endNum) ? $this->limit : $endNum;

  $hander = self::_openFile($date , $appid , $op );

  $tmpindex=0;

  if($hander)

  {

  //!feof($hander)

  while ($tmpindex < $endNum)

  {

  $tmp = fgets($hander , $this->length);

  if(empty($tmp)) continue;

  if($tmpindex < $endNum  && $tmpindex >=$startNum)

  {

  $tmp = self::_parseData($tmp);

  if(empty($tmp)) continue;

  //去掉jsondata

  if(!$jsonFlag) { unset($tmp[2]);  $condition= array();  }

  //条件过滤

  if($condition && is_array($condition) )

  {

  foreach ($condition as $key=>$val){

  if(in_array($tmp['data'][$key], $condition[$key]))

  unset($key , $val);

  $data['count']++;

  $data['data'][$index] = $tmp;

  $index++;

  }

  }

  else{

  $data['data'][$index] = $tmp;

  $index++;

  $data['count']++;

  }

  }

  if($tmpindex >= $endNum) break;

  $tmpindex++;

  unset($tmp);

  }

  fseek($hander ,  SEEK_END);

  }

  $data['diffTime'] = doubleval(microtime(true)) - doubleval($this->startTime);

  $data['memory'] = doubleval((doubleval(memory_get_usage(true)) -  doubleval(self::$startMemory))/1024/1024) . ' M';

  return $data;

  }

  /**

  * TODO:命令行下获取文件总记录数*

  * @param string $date

  * @param int $appid

  * @param string $op

  * @return array

  */

  public  function total_lineFile($date, $appid, $op)

  {

  $this->_openFile($date, $appid, $op);

  $file = escapeshellarg($this->_getFile($date, $appid, $op)); // 对命令行参数进行安全转义

  $line = `wc -l  $file`;

  if(preg_match("/(\d{1,})/", $line , $ret)){

  $data['count']=$ret[1];

  }else{

  $data['count']=0;

  }

  $data['diffTime'] = doubleval(microtime(true)) - doubleval($this->startTime);

  $data['memory'] = doubleval((doubleval(memory_get_usage(true)) -  doubleval(self::$startMemory))/1024/1024) . ' M';

  return $data;

  }

  /**

  * TODO:统计{$data}.{$op}.log记录数

  * @param string $date

  * @param int $appid

  * @param string $op

  * @param array $condition

  * @return array

  */

  public function countFile($date , $appid , $op ,$condition=array())

  {

  $data['count'] = 0;

  $hander = self::_openFile($date , $appid , $op );

  if($hander)

  {

  while (!feof($hander))

  {

  $tmp = fgets($hander , $this->length);

  $tmp = self::_parseData($tmp);

  if(empty($tmp)) continue;

  if($condition && is_array($condition) )

  {

  foreach ($condition as $key=>$val){

  if(isset($tmp['data'][$key]) && $tmp['data'][$key] && in_array($tmp['data'][$key], $condition[$key])){

  unset($key , $val);

  $data['count']++;

  }

  }

  }

  else

  $data['count']++;

  unset($tmp);

  }

  fseek($hander , SEEK_END);

  }

  $data['diffTime'] = doubleval(microtime(true)) - doubleval($this->startTime);

  $data['memory'] = doubleval((doubleval(memory_get_usage(true)) -  doubleval(self::$startMemory))/1024/1024) . ' M';

  return $data;

  }

  /**

  * TODO:统计用户数

  * @param string $date

  * @param int $appid

  * @param string $op

  * @param bool $midflag :默认为false 则 mid返回空数组;如设为true,则mid数组不为空

  * * @param bool $jsonFlag:默认为true, 则保留jsondata字段;设为false,则去掉jsondata字段

  * @param array $condition

  * @return : array:形如({"mid":[],"count":2181,"diffTime":0.0397667884827,"memory":"3.75 M"})

  */

  public function countFileMID($date , $appid , $op  ,  $midflag=false , $jsonFlag=false,  $condition=array())

  {

  $count = self::total_lineFile($date , $appid , $op );

  $index = ceil($count['count'] / $this->limit);

  $result = array('mid'=>array() , 'count'=>0 , 'diffTime'=>0 , 'memory'=>0);

  for ($i=0 ; $i<$index ; $i++)

  {

  $startNum = $this->limit*$i;

  $endNum = $this->limit*($i+1);

  $data = self::readFile($date , $appid , $op  ,  $startNum , $endNum , $jsonFlag);

  if($data['data'] && is_array($data['data']))

  {

  foreach ($data['data'] as $arr)

  {

  if($condition && is_array($condition)){

  foreach ($condition as $key=>$val){

  if(isset($arr['data'][$key])  && (in_array($arr['data'][$key] , $condition[$key]) || empty($condition[$key]))){

  if(!isset($result['mid'][$arr['mid']]))  { $result['mid'][$arr["mid"]] =1; $result['count']++; }

  }

  }

  }

  else {

  if(!isset($result['mid'][$arr['mid']]))  { $result['mid'][$arr["mid"]] =1; $result['count']++;   }

  }

  }

  }

  unset($data['data'] , $data);

  }

  unset($index , $count , $condition , $data  , $arr);

  self::closeFile($date , $appid , $op);

  $result['mid'] = array_keys($result['mid']);

  //$result['count'] = count($result['mid']);

  if(empty($midflag)) unset($result['mid']);

  $result['diffTime'] = doubleval(microtime(true)) - doubleval($this->startTime);

  $result['memory'] = (memory_get_usage(true) - self::$startMemory)/1024/1024 . ' M';

  return $result;

  }

  /**

  * TODO:跨时间段 统计参加$op用户数据

  * @param string $date

  * @param int $appid

  * @param string $op

  * @param number $day

  * @param bool  $midflag :默认为false 则 mid返回空数组;如设为true,则mid数组不为空

  * @return array 形如("20140326":{"mid":[],"count":4571,"diffTime":0.0806441307068,"memory":"3.75 M"},

  *          "20140325":{"mid":[],"count":2181,"diffTime":0.0397667884827,"memory":"3.75 M"})

  */

  public function getReturnUser($date , $appid , $op , $day=1 , $midflag=false)

  {

  $date_i=0;

  for ($i =0; $i<$day ; $i++){

  $date_i = date('Ymd' , strtotime($date)-$i*86400);

  $result[$date_i] = self::countFileMID($date_i , $appid , $op , $midflag);

  }

  unset($date , $date_i , $appid , $op  , $day);

  return $result;

  }

  }

  ?>