PHP Binary search
Nov 21, 2008
Author: City Hall
This class can be used to quickly search for sorted data in large files using the binary search algorithm. It can search for a given identifier in a file that has lines that contain only the identifier and a value separated by a tab character. The class uses the binary search algorithm to quickly narrow the search for a given identifier even when the files are very large.
- filename: binary_search.class.php
- date: 28.08.2008
- what for: implements binary search. Works for files with "id-value" pairs, where usually id and value are separated by a tab symbol (\t), and pairs are separeted by a new line symbol (\n). You can define border symbols so the parser could found a token (actually won't work with \r\n because it's 2 symbols)
*/
class BinarySearch
{
public static $start_symbol = "\n"; //symbol after which a token starts
public static $end_symbol = "\t"; //symbol before which a token ends
protected static $fp; //pointer to the opened file to avoid multiple fopen()s if frequent requests take place
// opens a file and starts the search. Returns found value or null
public static function find( $filename, $token )
{
if( !self::$fp ) //open the file with all honor
{
if( !is_readable( $filename ) )
{
throw new Exception( 'File cannot be read: ' . $filename );
}
self::$fp = fopen( $filename, 'r' );
if( !self::$fp )
{
throw new Exception( 'File cannot be opened for reading: ' . $filename );
}
}
return self::binary( 0, filesize( $filename ), $token );
}
//implements binary search itself
protected static function binary( $Lb, $Ub, $token )
{
while( true )
{
$M = $Lb + round( ($Ub-$Lb)/2 );
$candidate = self::parseAt( $M, self::$start_symbol, self::$end_symbol );
$cmp = strcmp( $token, $candidate );
if ( $cmp < 0 )
{
$Ub = $M - 1;
}
elseif ( $cmp > 0 )
{
$Lb = $M + 1;
}
else
{
return self::parseAt( $M + strlen($candidate), self::$end_symbol, self::$start_symbol );
}
if ( $Lb > $Ub )
{
return null;
}
}
}
//first looks back for a start symbol, then reads token up to an end symbol
protected static function parseAt( $pos, $start_symbol="\n", $end_symbol="\t" )
{
while( $pos )
{
self::seek( $pos );
if( $start_symbol == self::getSymbol() )
{
break; //found!
}
$pos--;
}
if( $pos > 0 )
{
$pos++; //skip a new line symbol if we are not in the beginning of the file
}
$token = '';
while( !feof( self::$fp ) )
{
self::seek( $pos );
$symbol = self::getSymbol();
if( $end_symbol == $symbol )
{
break; //token has finished!
}
$token .= $symbol;
$pos++;
}
return $token;
}
//my impelemntation of fseek() with exceptions support
protected static function seek( $pos )
{
if( fseek( self::$fp, $pos ) < 0 )
{
throw new Exception( 'Cannot fseek in the file. fseek() = ' . fseek( self::$fp, $pos ) . ', pos = ' . $pos );
}
return true;
}
//returns current symbol in an opened file
protected static function getSymbol()
{
return fgetc( self::$fp );
}
}
views 5496



