From 4f27633e161f2257333673dde420f7aec9a8ac20 Mon Sep 17 00:00:00 2001 From: Dominik Pfennig <dominik@suma-ev.de> Date: Thu, 26 May 2016 12:23:32 +0200 Subject: [PATCH] =?UTF-8?q?Websuche=20funktioniert.=20Unter=20umst=C3=A4nd?= =?UTF-8?q?en=20aber=20noch=20nicht=20fehlerfrei?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- app/MetaGer.php | 3 +- app/Models/Searchengine.php | 64 +++++++++++++++---- app/Models/parserSkripte/Onenewspagevideo.php | 6 +- app/Models/parserSkripte/Yacy.php | 45 +++++++++++++ config/app.php | 2 +- 5 files changed, 105 insertions(+), 15 deletions(-) create mode 100644 app/Models/parserSkripte/Yacy.php diff --git a/app/MetaGer.php b/app/MetaGer.php index 7fc4a9d72..87fadc046 100644 --- a/app/MetaGer.php +++ b/app/MetaGer.php @@ -112,6 +112,7 @@ class MetaGer { $overtureEnabled = TRUE; } + $enabledSearchengines[] = $suma; } } @@ -152,7 +153,7 @@ class MetaGer $tmp = new $path($engine, $this); - if($tmp) + if($tmp->isEnabled()) { $engines[] = $tmp; } diff --git a/app/Models/Searchengine.php b/app/Models/Searchengine.php index 38a8dbb3a..376bf11fb 100644 --- a/app/Models/Searchengine.php +++ b/app/Models/Searchengine.php @@ -13,8 +13,8 @@ abstract class Searchengine protected $getString = ""; protected $engine; protected $counter = 0; - protected $enabled; protected $socketNumber = null; + protected $enabled = true; public $results = []; function __construct(\SimpleXMLElement $engine, MetaGer $metager) @@ -24,6 +24,17 @@ abstract class Searchengine $this->$key = $value->__toString(); } + # Eine Suchmaschine kann automatisch temporär deaktiviert werden, wenn es Verbindungsprobleme gab: + if(isset($this->disabled) && strtotime($this->disabled) <= time() ) + { + # In diesem Fall ist der Timeout der Suchmaschine abgelaufen. + $this->enable($metager->getSumaFile(), "Die Suchmaschine " . $this->name . " wurde wieder eingeschaltet."); + }elseif (isset($this->disabled) && strtotime($this->disabled) > time()) + { + $this->enabled = false; + return; + } + # User-Agent definieren: if( isset($_SERVER['HTTP_USER_AGENT'])) { @@ -56,7 +67,7 @@ abstract class Searchengine if(!$this->fp) { // Mache etwas - Log::error("Konnte keine Verbindung zur Suchmaschine: " . $this->name . " aufbauen."); + $this->disable($metager->getSumaFile(), "Die Suchmaschine " . $this->name . " wurde für 1h deaktiviert, weil keine Verbindung aufgebaut werden konnte"); break; }else { @@ -148,14 +159,22 @@ abstract class Searchengine } - abort(500, "Konnte keinen freien Socket bekommen für: " . $this->name); + return null; } public function disable(string $sumaFile, string $message) { Log::info($message); $xml = simplexml_load_file($sumaFile); - $xml->xpath("//sumas/suma[@name='" . $this->name . "']")['0']['disabled'] = "1"; + $xml->xpath("//sumas/suma[@name='" . $this->name . "']")['0']['disabled'] = date(DATE_RFC822, mktime(date("H")+1,date("i"), date("s"), date("m"), date("d"), date("Y"))); + $xml->saveXML($sumaFile); + } + + public function enable(string $sumaFile, string $message) + { + Log::info($message); + $xml = simplexml_load_file($sumaFile); + unset($xml->xpath("//sumas/suma[@name='" . $this->name . "']")['0']['disabled']); $xml->saveXML($sumaFile); } @@ -182,11 +201,6 @@ abstract class Searchengine // use fgets() not fread(), fgets stops reading at first newline // or buffer which ever one is reached first $data = fgets($this->fp, BUFFER_LENGTH); - if($data === false) - { - usleep(10000); - continue; - } // a sincle CRLF indicates end of headers if ($data === false || $data == CRLF || feof($this->fp)) { // break BEFORE OUTPUT @@ -197,15 +211,20 @@ abstract class Searchengine } while (true); // end of headers - + $bodySize = 0; if( isset($headers["Transfer-Encoding"]) && $headers["Transfer-Encoding"] === "chunked" ) { $body = $this->readChunked(); + }elseif( isset($headers['Content-Length']) ) { $length = trim($headers['Content-Length']); if(is_numeric($length) && $length >= 1) - $body = fread($this->fp, $headers['Content-Length']); + $body = $this->readBody($length); + $bodySize = strlen($body); + }else + { + abort(500, "Konnte nicht herausfinden, wie ich die Serverantwort von: " . $this->name . " auslesen soll. Header war: " . print_r($headers)); } Redis::setBit($this->name, $this->socketNumber, 0 ); @@ -213,6 +232,10 @@ abstract class Searchengine { $body = $this->gunzip($body); } + #print_r($headers); + #print($body); + #print("\r\n". $bodySize); + #exit; #die(print_r($headers)); // $body and $headers should contain your stream data $this->loadResults($body); @@ -220,6 +243,25 @@ abstract class Searchengine #exit; } + private function readBody(int $length) + { + $theData = ''; + $done = false; + stream_set_blocking($this->fp, 0); + $startTime = time(); + $lastTime = $startTime; + while (!feof($this->fp) && !$done && (($startTime + 1) > time()) && $length !== 0) + { + usleep(100); + $theNewData = fgets($this->fp, BUFFER_LENGTH); + $theData .= $theNewData; + $length -= strlen($theNewData); + $done = (trim($theNewData) === '0'); + + } + return $theData; + } + private function readChunked() { $body = ''; diff --git a/app/Models/parserSkripte/Onenewspagevideo.php b/app/Models/parserSkripte/Onenewspagevideo.php index 582a21f75..f8e8179e2 100644 --- a/app/Models/parserSkripte/Onenewspagevideo.php +++ b/app/Models/parserSkripte/Onenewspagevideo.php @@ -15,11 +15,13 @@ class Onenewspagevideo extends Searchengine public function loadResults (String $result) { $results = trim($result); - foreach( explode("\n", $results) as $result ) { $res = explode("|", $result); - + if(sizeof($res) < 3) + { + continue; + } $title = $res[0]; $link = $res[2]; $anzeigeLink = $link; diff --git a/app/Models/parserSkripte/Yacy.php b/app/Models/parserSkripte/Yacy.php new file mode 100644 index 000000000..5dfc72d51 --- /dev/null +++ b/app/Models/parserSkripte/Yacy.php @@ -0,0 +1,45 @@ +<?php + +namespace app\Models\parserSkripte; +use App\Models\Searchengine; + +class Yacy extends Searchengine +{ + public $results = []; + + function __construct (\SimpleXMLElement $engine, \App\MetaGer $metager) + { + parent::__construct($engine, $metager); + } + + public function loadResults (String $result) + { + + # die($result); + try { + $content = simplexml_load_string($result); + } catch (\Exception $e) { + abort(500, "$result is not a valid xml string"); + } + + $results = $content->xpath("//rss/channel/item"); + + foreach($results as $res) + { + $title = $res->{"title"}; + $link = $res->{"link"}; + $anzeigeLink = $link; + $descr = $res->{"description"}; + + $this->counter++; + $this->results[] = new \App\Models\Result( + $title, + $link, + $anzeigeLink, + $descr, + $this->gefVon, + $this->counter + ); + } + } +} \ No newline at end of file diff --git a/config/app.php b/config/app.php index 22ac17845..088dc4ddc 100644 --- a/config/app.php +++ b/config/app.php @@ -52,7 +52,7 @@ return [ | */ - 'timezone' => 'UTC', + 'timezone' => 'Europe/Berlin', /* |-------------------------------------------------------------------------- -- GitLab