attributes() as $key => $value){
$this->$key = $value->__toString();
}
$this->engine = $engine;
# Wir registrieren die Benutzung dieser Suchmaschine
$this->uses = intval(Redis::hget($this->name, "uses")) + 1;
Redis::hset($this->name, "uses", $this->uses);
# Eine Suchmaschine kann automatisch temporär deaktiviert werden, wenn es Verbindungsprobleme gab:
if(isset($this->disabled) && strtotime($this->disabled) <= time() )
{
# In diesem Fall ist der Timeout der Suchmaschine abgelaufen.
$this->enable($metager->getSumaFile(), "Die Suchmaschine " . $this->name . " wurde wieder eingeschaltet.");
}elseif (isset($this->disabled) && strtotime($this->disabled) > time())
{
$this->enabled = false;
return;
}
# User-Agent definieren:
if( isset($_SERVER['HTTP_USER_AGENT']))
{
$this->useragent = $_SERVER['HTTP_USER_AGENT'];
}else
{
$this->useragent = "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:40.0) Gecko/20100101 Firefox/40.1";
}
$this->ip = $metager->getIp();
$this->gefVon = "homepage . "\" target=\"_blank\">" . $this->displayName . "";
$this->startTime = microtime();
$this->getString = $this->generateGetString($metager->getQ(), $metager->getUrl(), $metager->getLanguage(), $metager->getCategory());
$counter = 0;
# Wir benötigen einen verfügbaren Socket, über den wir kommunizieren können:
$time = microtime(true);
$this->fp = $this->getFreeSocket();
$this->setStatistic("connection_time", ((microtime(true)-$time) / 1000000));
if(!$this->fp)
{
$this->disable($metager->getSumaFile(), "Die Suchmaschine " . $this->name . " wurde für 1h deaktiviert, weil keine Verbindung aufgebaut werden konnte");
}else
{
$time = microtime(true);
$this->writeRequest();
$this->setStatistic("write_time", ((microtime(true)-$time) / 1000000));
}
}
public abstract function loadResults($result);
private function writeRequest ()
{
$out = "GET " . $this->getString . " HTTP/1.1\r\n";
$out .= "Host: " . $this->host . "\r\n";
$out .= "User-Agent: " . $this->useragent . "\r\n";
$out .= "Accept: text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8\r\n";
$out .= "Accept-Language: de,en-US;q=0.7,en;q=0.3\r\n";
$out .= "Accept-Encoding: gzip, deflate, br\r\n";
$out .= "Connection: keep-alive\r\n\r\n";
# Anfrage senden:
$sent = 0; $string = $out; $time = microtime(true);
while(true)
{
try{
$tmp = fwrite($this->fp, $string);
}catch(\ErrorException $e)
{
# Irgendwas ist mit unserem Socket passiert. Wir brauchen einen neuen:
fclose($this->fp);
Redis::del($this->name . "." . $this->socketNumber);
$this->fp = $this->getFreeSocket();
$sent = 0;
$string = $out;
continue;
}
if($tmp){
$sent += $tmp;
$string = substr($string, $tmp);
}else
abort(500, "Fehler beim schreiben.");
if(((microtime(true) - $time) / 1000000) >= 500)
{
abort(500, "Konnte die Request Daten nicht an: " . $this->name . " senden");
}
if($sent >= strlen($out))
break;
}
}
public function rank (\App\MetaGer $metager)
{
foreach($this->results as $result)
{
$result->rank($metager);
}
}
private function getFreeSocket()
{
# Je nach Auslastung des Servers ( gleichzeitige Abfragen ), kann es sein, dass wir mehrere Sockets benötigen um die Abfragen ohne Wartezeit beantworten zu können.
# pfsockopen öffnet dabei einen persistenten Socket, der also auch zwischen den verschiedenen php Prozessen geteilt werden kann.
# Wenn der Hostname mit einem bereits erstellten Socket übereinstimmt, wird die Verbindung also aufgegriffen und fortgeführt.
# Allerdings dürfen wir diesen nur verwenden, wenn er nicht bereits von einem anderen Prozess zur Kommunikation verwendet wird.
# Wenn dem so ist, probieren wir den nächsten Socket zu verwenden.
# Dies festzustellen ist komplizierter, als man sich das vorstellt. Folgendes System sollte funktionieren:
# 1. Stelle fest, ob dieser Socket neu erstellt wurde, oder ob ein existierender geöffnet wurde.
$counter = 0; $fp = null;
do
{
if( intval(Redis::exists($this->host . ".$counter")) === 0 )
{
Redis::set($this->host . ".$counter", 1);
Redis::expire($this->host . ".$counter", 5);
$this->socketNumber = $counter;
try
{
$fp = pfsockopen($this->getHost() . ":" . $this->port . "/$counter", $this->port, $errstr, $errno, 1);
}catch(\ErrorException $e)
{
break;
}
# Wir gucken, ob der Lesepuffer leer ist:
stream_set_blocking($fp, 0);
if(fgets($fp, BUFFER_LENGTH) !== false)
{
Log::error("Der Lesepuffer von: " . $this->name . " war nach dem Erstellen nicht leer. Musste den Socket neu starten.");
fclose($fp);
$fp = pfsockopen($this->getHost() . ":" . $this->port . "/$counter", $this->port, $errstr, $errno, 1);
}
header($this->name . ": " . $counter . "_" . $this->getHost());
break;
}
$counter++;
}while(true);
return $fp;
}
private function setStatistic($key, float $val)
{
$oldVal = floatval(Redis::hget($this->name, $key)) * $this->uses;
$newVal = ($oldVal + max($val, 0)) / $this->uses;
Redis::hset($this->name, $key, $newVal);
$this->$key = $newVal;
}
public function disable($sumaFile, $message)
{
Log::info($message);
$xml = simplexml_load_file($sumaFile);
$xml->xpath("//sumas/suma[@name='" . $this->name . "']")['0']['disabled'] = date(DATE_RFC822, mktime(date("H")+1,date("i"), date("s"), date("m"), date("d"), date("Y")));
$xml->saveXML($sumaFile);
}
public function enable($sumaFile, $message)
{
Log::info($message);
$xml = simplexml_load_file($sumaFile);
unset($xml->xpath("//sumas/suma[@name='" . $this->name . "']")['0']['disabled']);
$xml->saveXML($sumaFile);
}
public function closeFp()
{
fclose($this->fp);
}
public function retrieveResults()
{
$time = microtime(true);
$headers = '';
$body = '';
$length = 0;
if(!$this->fp)
{
return;
}
// get headers FIRST
$c = 0;
stream_set_blocking($this->fp, 0);
do
{
// use fgets() not fread(), fgets stops reading at first newline
// or buffer which ever one is reached first
$data = fgets($this->fp, BUFFER_LENGTH);
// a sincle CRLF indicates end of headers
if ($data === false || $data == CRLF || feof($this->fp) || ((microtime()-$time)/1000000) > 100 ) {
// break BEFORE OUTPUT
break;
}
if( sizeof(($tmp = explode(": ", $data))) === 2 )
$headers[trim($tmp[0])] = trim($tmp[1]);
$c++;
}
while (true);
// end of headers
if(sizeof($headers) > 1){
$bodySize = 0;
if( isset($headers["Transfer-Encoding"]) && $headers["Transfer-Encoding"] === "chunked" )
{
$body = $this->readChunked();
}elseif( isset($headers['Content-Length']) )
{
$length = trim($headers['Content-Length']);
if(is_numeric($length) && $length >= 1)
$body = $this->readBody($length);
$bodySize = strlen($body);
}else
{
die("Konnte nicht herausfinden, wie ich die Serverantwort von: " . $this->name . " auslesen soll. Header war: " . print_r($headers));
}
}
Redis::del($this->host . "." . $this->socketNumber);
$this->setStatistic("read_time", ((microtime(true)-$time) / 1000000));
if( isset($headers["Content-Encoding"]) && $headers['Content-Encoding'] === "gzip")
{
$body = $this->gunzip($body);
}
#print_r($headers);
#print($body);
#print("\r\n". $bodySize);
#exit;
#die(print_r($headers));
// $body and $headers should contain your stream data
$this->loadResults($body);
#print(print_r($headers, TRUE) . $body);
#exit;
}
private function readBody(int $length)
{
$theData = '';
$done = false;
stream_set_blocking($this->fp, 0);
$startTime = time();
$lastTime = $startTime;
while (!feof($this->fp) && !$done && (($startTime + 1) > time()) && $length !== 0)
{
usleep(100);
$theNewData = fgets($this->fp, BUFFER_LENGTH);
$theData .= $theNewData;
$length -= strlen($theNewData);
$done = (trim($theNewData) === '0');
}
return $theData;
}
private function readChunked()
{
$body = '';
// read from chunked stream
// loop though the stream
do
{
// NOTE: for chunked encoding to work properly make sure
// there is NOTHING (besides newlines) before the first hexlength
// get the line which has the length of this chunk (use fgets here)
$line = fgets($this->fp, BUFFER_LENGTH);
// if it's only a newline this normally means it's read
// the total amount of data requested minus the newline
// continue to next loop to make sure we're done
if ($line == CRLF) {
continue;
}
// the length of the block is sent in hex decode it then loop through
// that much data get the length
// NOTE: hexdec() ignores all non hexadecimal chars it finds
$length = hexdec($line);
if (!is_int($length)) {
trigger_error('Most likely not chunked encoding', E_USER_ERROR);
}
// zero is sent when at the end of the chunks
// or the end of the stream or error
if ($line === false || $length < 1 || feof($this->fp)) {
if($length <= 0)
fgets($this->fp, BUFFER_LENGTH);
// break out of the streams loop
break;
}
// loop though the chunk
do
{
// read $length amount of data
// (use fread here)
$data = fread($this->fp, $length);
// remove the amount received from the total length on the next loop
// it'll attempt to read that much less data
$length -= strlen($data);
// PRINT out directly
#print $data;
#flush();
// you could also save it directly to a file here
// store in string for later use
$body .= $data;
// zero or less or end of connection break
if ($length <= 0 || feof($this->fp))
{
// break out of the chunk loop
if($length <= 0)
fgets($this->fp, BUFFER_LENGTH);
break;
}
}
while (true);
// end of chunk loop
}
while (true);
// end of stream loop
return $body;
}
private function gunzip($zipped) {
$offset = 0;
if (substr($zipped,0,2) == "\x1f\x8b")
$offset = 2;
if (substr($zipped,$offset,1) == "\x08")
{
try
{
return gzinflate(substr($zipped, $offset + 8));
} catch (\Exception $e)
{
abort(500, "Fehler beim unzip des Ergebnisses von folgendem Anbieter: " . $this->name);
}
}
return "Unknown Format";
}
protected function getHost()
{
$return = "";
if( $this->port === "443" )
{
$return .= "tls://";
}else
{
$return .= "tcp://";
}
$return .= $this->host;
return $return;
}
public function getCurlInfo()
{
return curl_getinfo($this->ch);
}
public function getCurlErrors()
{
return curl_errno($this->ch);
}
public function addCurlHandle ($mh)
{
curl_multi_add_handle($mh, $this->ch);
}
public function removeCurlHandle ($mh)
{
curl_multi_remove_handle($mh, $this->ch);
}
private function generateGetString($query, $url, $language, $category)
{
$getString = "";
# Skript:
if(strlen($this->skript) > 0)
$getString .= $this->skript;
else
$getString .= "/";
# FormData:
if(strlen($this->formData) > 0)
$getString .= "?" . $this->formData;
# Wir müssen noch einige Platzhalter in dem GET-String ersetzen:
if( strpos($getString, "<>") ){
$getString = str_replace("<>", $this->urlEncode($this->useragent), $getString);
}
if( strpos($getString, "<>") )
{
$getString = str_replace("<>", $this->urlEncode($query), $getString);
}
if( strpos($getString, "<>") )
{
$getString = str_replace("<>", $this->urlEncode($this->ip), $getString);
}
if( strpos($getString, "<>") )
{
$getString = str_replace("<>", $this->urlEncode($language), $getString);
}
if( strpos($getString, "<>") )
{
$getString = str_replace("<>", $this->urlEncode($category), $getString);
}
if( strpos($getString, "<>") )
{
$getString = str_replace("<>", $this->getOvertureAffilData($url), $getString);
}
return $getString;
}
protected function urlEncode($string)
{
if(isset($this->inputEncoding))
{
return urlencode(mb_convert_encoding($string, $this->inputEncoding));
}else
{
return urlencode($string);
}
}
private function getOvertureAffilData($url)
{
$affil_data = 'ip=' . $this->ip;
$affil_data .= '&ua=' . $this->useragent;
if ( isset($_SERVER['HTTP_X_FORWARDED_FOR']) ) {
$affil_data .= '&xfip=' . $_SERVER['HTTP_X_FORWARDED_FOR'];
}
$affilDataValue = $this->urlEncode($affil_data);
# Wir benötigen die ServeUrl:
$serveUrl = $this->urlEncode($url);
return "&affilData=" . $affilDataValue . "&serveUrl=" . $serveUrl;
}
public function isEnabled ()
{
return $this->enabled;
}
}