Commit 515366c7 authored by Dominik Hebeler's avatar Dominik Hebeler
Browse files

Merge branch...

Merge branch '182-wir-fragen-immer-nur-die-ersten-ergebnisse-einer-suchmaschine-ab' into 'development'

Resolve "Wir fragen immer nur die ersten Ergebnisse einer Suchmaschine ab."

Closes #182

See merge request !304
parents 114cb6b1 a6387d93
...@@ -13,7 +13,7 @@ class Search extends Job implements ShouldQueue ...@@ -13,7 +13,7 @@ class Search extends Job implements ShouldQueue
{ {
use InteractsWithQueue, SerializesModels; use InteractsWithQueue, SerializesModels;
protected $hash, $host, $port, $name, $getString, $useragent, $fp, $sumaFile; protected $hash, $host, $port, $name, $getString, $useragent, $fp;
protected $buffer_length = 8192; protected $buffer_length = 8192;
/** /**
...@@ -21,7 +21,7 @@ class Search extends Job implements ShouldQueue ...@@ -21,7 +21,7 @@ class Search extends Job implements ShouldQueue
* *
* @return void * @return void
*/ */
public function __construct($hash, $host, $port, $name, $getString, $useragent, $sumaFile) public function __construct($hash, $host, $port, $name, $getString, $useragent)
{ {
$this->hash = $hash; $this->hash = $hash;
$this->host = $host; $this->host = $host;
...@@ -29,7 +29,6 @@ class Search extends Job implements ShouldQueue ...@@ -29,7 +29,6 @@ class Search extends Job implements ShouldQueue
$this->name = $name; $this->name = $name;
$this->getString = $getString; $this->getString = $getString;
$this->useragent = $useragent; $this->useragent = $useragent;
$this->sumaFile = $sumaFile;
} }
/** /**
...@@ -50,13 +49,6 @@ class Search extends Job implements ShouldQueue ...@@ -50,13 +49,6 @@ class Search extends Job implements ShouldQueue
} }
} }
public function disable($sumaFile, $message)
{
$xml = simplexml_load_file($sumaFile);
$xml->xpath("//sumas/suma[@name='" . $this->name . "']")['0']['disabled'] = date(DATE_RFC822, mktime(date("H")+1,date("i"), date("s"), date("m"), date("d"), date("Y")));
$xml->saveXML($sumaFile);
}
private function readAnswer () private function readAnswer ()
{ {
$time = microtime(true); $time = microtime(true);
......
...@@ -3,9 +3,8 @@ namespace App; ...@@ -3,9 +3,8 @@ namespace App;
use App; use App;
use App\lib\TextLanguageDetect\TextLanguageDetect; use App\lib\TextLanguageDetect\TextLanguageDetect;
use Cache;
use Illuminate\Http\Request; use Illuminate\Http\Request;
use Illuminate\Pagination\LengthAwarePaginator;
use Illuminate\Support\Collection;
use Jenssegers\Agent\Agent; use Jenssegers\Agent\Agent;
use LaravelLocalization; use LaravelLocalization;
use Log; use Log;
...@@ -33,6 +32,7 @@ class MetaGer ...@@ -33,6 +32,7 @@ class MetaGer
protected $warnings = []; protected $warnings = [];
protected $errors = []; protected $errors = [];
protected $addedHosts = []; protected $addedHosts = [];
protected $startCount = 0;
# Daten über die Abfrage # Daten über die Abfrage
protected $ip; protected $ip;
protected $language; protected $language;
...@@ -60,6 +60,14 @@ class MetaGer ...@@ -60,6 +60,14 @@ class MetaGer
Log::warning("Achtung: Eine, oder mehrere Blacklist Dateien, konnten nicht geöffnet werden"); Log::warning("Achtung: Eine, oder mehrere Blacklist Dateien, konnten nicht geöffnet werden");
} }
$dir = app_path() . "/Models/parserSkripte/";
foreach (scandir($dir) as $filename) {
$path = $dir . $filename;
if (is_file($path)) {
require $path;
}
}
$this->languageDetect = new TextLanguageDetect(); $this->languageDetect = new TextLanguageDetect();
$this->languageDetect->setNameMode("2"); $this->languageDetect->setNameMode("2");
} }
...@@ -182,16 +190,22 @@ class MetaGer ...@@ -182,16 +190,22 @@ class MetaGer
public function combineResults() public function combineResults()
{ {
foreach ($this->engines as $engine) { foreach ($this->engines as $engine) {
if (isset($engine->next)) {
$this->next[] = $engine->next;
}
if (isset($engine->last)) {
$this->last[] = $engine->last;
}
foreach ($engine->results as $result) { foreach ($engine->results as $result) {
if ($result->valid) { if ($result->valid) {
$this->results[] = $result; $this->results[] = $result;
} }
} }
foreach ($engine->ads as $ad) { foreach ($engine->ads as $ad) {
$this->ads[] = $ad; $this->ads[] = $ad;
} }
} }
uasort($this->results, function ($a, $b) { uasort($this->results, function ($a, $b) {
if ($a->getRank() == $b->getRank()) { if ($a->getRank() == $b->getRank()) {
return 0; return 0;
...@@ -211,13 +225,23 @@ class MetaGer ...@@ -211,13 +225,23 @@ class MetaGer
$counter = 0; $counter = 0;
$firstRank = 0; $firstRank = 0;
if(isset($this->startForwards))
{
$this->startCount = $this->startForwards;
}elseif (isset($this->startBackwards)) {
$this->startCount = $this->startBackwards - count($this->results) - 1;
}else{
$this->startCount = 0;
}
foreach ($this->results as $result) { foreach ($this->results as $result) {
if ($counter === 0) { if ($counter === 0) {
$firstRank = $result->rank; $firstRank = $result->rank;
} }
$counter++; $counter++;
$result->number = $counter; $result->number = $counter + $this->startCount;
$confidence = 0; $confidence = 0;
if ($firstRank > 0) { if ($firstRank > 0) {
$confidence = $result->rank / $firstRank; $confidence = $result->rank / $firstRank;
...@@ -237,38 +261,6 @@ class MetaGer ...@@ -237,38 +261,6 @@ class MetaGer
} }
//Get current page form url e.g. &page=6
$currentPage = LengthAwarePaginator::resolveCurrentPage();
$offset = $currentPage - 1;
//Create a new Laravel collection from the array data
$collection = new Collection($this->results);
//Define how many items we want to be visible in each page
$perPage = $this->resultCount;
//Slice the collection to get the items to display in current page
$currentPageSearchResults = $collection->slice($offset * $perPage, $perPage)->all();
# Für diese 20 Links folgt nun unsere Boost-Implementation.
$currentPageSearchResults = $this->parseBoost($currentPageSearchResults);
# Für diese 20 Links folgt nun unsere Adgoal- Implementation.
$currentPageSearchResults = $this->parseAdgoal($currentPageSearchResults);
//Create our paginator and pass it to the view
$paginatedSearchResults = new LengthAwarePaginator($currentPageSearchResults, count($collection), $perPage);
$paginatedSearchResults->setPath('/meta/meta.ger3');
foreach ($this->request->all() as $key => $value) {
if ($key === "out") {
continue;
}
$paginatedSearchResults->addQuery($key, $value);
}
$this->results = $paginatedSearchResults;
if (LaravelLocalization::getCurrentLocale() === "en") { if (LaravelLocalization::getCurrentLocale() === "en") {
$this->ads = []; $this->ads = [];
} }
...@@ -289,6 +281,29 @@ class MetaGer ...@@ -289,6 +281,29 @@ class MetaGer
if (count($this->results) <= 0) { if (count($this->results) <= 0) {
$this->errors[] = "Leider konnten wir zu Ihrer Sucheingabe keine passenden Ergebnisse finden."; $this->errors[] = "Leider konnten wir zu Ihrer Sucheingabe keine passenden Ergebnisse finden.";
} }
if( isset($this->last) && count($this->last) > 0 )
{
$page = $this->page - 1;
$this->last = [
'page' => $page,
'startBackwards' => $this->results[0]->number,
'engines' => $this->last,
];
Cache::put(md5(serialize($this->last)), serialize($this->last), 60);
}
if( isset($this->next) && count($this->next) > 0 && count($this->results) > 0)
{
$page = $this->page + 1;
$this->next = [
'page' => $page,
'startForwards' => $this->results[count($this->results)-1]->number,
'engines' => $this->next,
];
Cache::put(md5(serialize($this->next)), serialize($this->next), 60);
}
} }
public function parseBoost($results) public function parseBoost($results)
...@@ -480,39 +495,53 @@ class MetaGer ...@@ -480,39 +495,53 @@ class MetaGer
$typeslist = []; $typeslist = [];
$counter = 0; $counter = 0;
foreach ($enabledSearchengines as $engine) { if ($request->has('next') && Cache::has($request->input('next')) && unserialize(Cache::get($request->input('next')))['page'] > 1 ) {
$next = unserialize(Cache::get($request->input('next')));
$this->page = $next['page'];
$engines = $next['engines'];
if(isset($next['startForwards']))
$this->startForwards = $next['startForwards'];
if(isset($next['startBackwards']))
$this->startBackwards = $next['startBackwards'];
} else {
foreach ($enabledSearchengines as $engine) {
if (!$siteSearchFailed && strlen($this->site) > 0 && (!isset($engine['hasSiteSearch']) || $engine['hasSiteSearch']->__toString() === "0")) { if (!$siteSearchFailed && strlen($this->site) > 0 && (!isset($engine['hasSiteSearch']) || $engine['hasSiteSearch']->__toString() === "0")) {
continue; continue;
} }
# Wenn diese Suchmaschine gar nicht eingeschaltet sein soll # Wenn diese Suchmaschine gar nicht eingeschaltet sein soll
$path = "App\Models\parserSkripte\\" . ucfirst($engine["package"]->__toString()); $path = "App\Models\parserSkripte\\" . ucfirst($engine["package"]->__toString());
if (!file_exists(app_path() . "/Models/parserSkripte/" . ucfirst($engine["package"]->__toString()) . ".php")) { if (!file_exists(app_path() . "/Models/parserSkripte/" . ucfirst($engine["package"]->__toString()) . ".php")) {
Log::error("Konnte " . $engine["name"] . " nicht abfragen, da kein Parser existiert"); Log::error("Konnte " . $engine["name"] . " nicht abfragen, da kein Parser existiert");
continue; continue;
} }
$time = microtime(); $time = microtime();
try try
{ {
$tmp = new $path($engine, $this); $tmp = new $path($engine, $this);
} catch (\ErrorException $e) { } catch (\ErrorException $e) {
Log::error("Konnte " . $engine["name"] . " nicht abfragen." . var_dump($e)); Log::error("Konnte " . $engine["name"] . " nicht abfragen." . var_dump($e));
continue; continue;
} }
if ($tmp->enabled && isset($this->debug)) { if ($tmp->enabled && isset($this->debug)) {
$this->warnings[] = $tmp->service . " Connection_Time: " . $tmp->connection_time . " Write_Time: " . $tmp->write_time . " Insgesamt:" . ((microtime() - $time) / 1000); $this->warnings[] = $tmp->service . " Connection_Time: " . $tmp->connection_time . " Write_Time: " . $tmp->write_time . " Insgesamt:" . ((microtime() - $time) / 1000);
} }
if ($tmp->isEnabled()) {
$engines[] = $tmp;
}
if ($tmp->isEnabled()) {
$engines[] = $tmp;
$this->sockets[$tmp->name] = $tmp->fp;
} }
}
# Wir starten die Suche manuell:
foreach ($engines as $engine) {
$engine->startSearch($this);
} }
# Jetzt werden noch alle Kategorien der Settings durchgegangen und die jeweils enthaltenen namen der Suchmaschinen gespeichert. # Jetzt werden noch alle Kategorien der Settings durchgegangen und die jeweils enthaltenen namen der Suchmaschinen gespeichert.
...@@ -607,11 +636,10 @@ class MetaGer ...@@ -607,11 +636,10 @@ class MetaGer
usleep(50000); usleep(50000);
} }
#exit;
foreach ($engines as $engine) { foreach ($engines as $engine) {
if (!$engine->loaded) { if (!$engine->loaded) {
try { try {
$engine->retrieveResults(); $engine->retrieveResults($this);
} catch (\ErrorException $e) { } catch (\ErrorException $e) {
Log::error($e); Log::error($e);
...@@ -681,7 +709,7 @@ class MetaGer ...@@ -681,7 +709,7 @@ class MetaGer
$this->time = $request->input('time', 1000); $this->time = $request->input('time', 1000);
# Page # Page
$this->page = $request->input('page', 1); $this->page = 1;
# Lang # Lang
$this->lang = $request->input('lang', 'all'); $this->lang = $request->input('lang', 'all');
if ($this->lang !== "de" && $this->lang !== "en" && $this->lang !== "all") { if ($this->lang !== "de" && $this->lang !== "en" && $this->lang !== "all") {
...@@ -880,6 +908,10 @@ class MetaGer ...@@ -880,6 +908,10 @@ class MetaGer
{ {
return $this->phrases; return $this->phrases;
} }
public function getPage()
{
return $this->page;
}
public function getSumaFile() public function getSumaFile()
{ {
...@@ -921,6 +953,10 @@ class MetaGer ...@@ -921,6 +953,10 @@ class MetaGer
return 0; return 0;
} }
} }
public function getStartCount()
{
return $this->startCount;
}
public function addHostCount($host) public function addHostCount($host)
{ {
$hash = md5($host); $hash = md5($host);
...@@ -961,7 +997,7 @@ class MetaGer ...@@ -961,7 +997,7 @@ class MetaGer
public function generateSearchLink($fokus, $results = true) public function generateSearchLink($fokus, $results = true)
{ {
$requestData = $this->request->except('page'); $requestData = $this->request->except(['page', 'next']);
$requestData['focus'] = $fokus; $requestData['focus'] = $fokus;
if ($results) { if ($results) {
$requestData['out'] = "results"; $requestData['out'] = "results";
...@@ -983,7 +1019,7 @@ class MetaGer ...@@ -983,7 +1019,7 @@ class MetaGer
public function generateSiteSearchLink($host) public function generateSiteSearchLink($host)
{ {
$host = urlencode($host); $host = urlencode($host);
$requestData = $this->request->except(['page', 'out']); $requestData = $this->request->except(['page', 'out', 'next']);
$requestData['eingabe'] .= " site:$host"; $requestData['eingabe'] .= " site:$host";
$requestData['focus'] = "web"; $requestData['focus'] = "web";
$link = action('MetaGerSearch@search', $requestData); $link = action('MetaGerSearch@search', $requestData);
...@@ -993,7 +1029,7 @@ class MetaGer ...@@ -993,7 +1029,7 @@ class MetaGer
public function generateRemovedHostLink($host) public function generateRemovedHostLink($host)
{ {
$host = urlencode($host); $host = urlencode($host);
$requestData = $this->request->except(['page', 'out']); $requestData = $this->request->except(['page', 'out', 'next']);
$requestData['eingabe'] .= " -host:$host"; $requestData['eingabe'] .= " -host:$host";
$link = action('MetaGerSearch@search', $requestData); $link = action('MetaGerSearch@search', $requestData);
return $link; return $link;
...@@ -1002,12 +1038,36 @@ class MetaGer ...@@ -1002,12 +1038,36 @@ class MetaGer
public function generateRemovedDomainLink($domain) public function generateRemovedDomainLink($domain)
{ {
$domain = urlencode($domain); $domain = urlencode($domain);
$requestData = $this->request->except(['page', 'out']); $requestData = $this->request->except(['page', 'out', 'next']);
$requestData['eingabe'] .= " -domain:$domain"; $requestData['eingabe'] .= " -domain:$domain";
$link = action('MetaGerSearch@search', $requestData); $link = action('MetaGerSearch@search', $requestData);
return $link; return $link;
} }
public function lastSearchLink()
{
if( isset($this->last) && count($this->last['engines']) > 0){
$requestData = $this->request->except(['page', 'out']);
$requestData['next'] = md5(serialize($this->last));
$link = action('MetaGerSearch@search', $requestData);
}else{
$link = "#";
}
return $link;
}
public function nextSearchLink()
{
if( isset($this->next) && count($this->next['engines']) > 0){
$requestData = $this->request->except(['page', 'out']);
$requestData['next'] = md5(serialize($this->next));
$link = action('MetaGerSearch@search', $requestData);
}else{
$link = "#";
}
return $link;
}
public function getTab() public function getTab()
{ {
return $this->tab; return $this->tab;
......
...@@ -5,8 +5,9 @@ namespace App\Models; ...@@ -5,8 +5,9 @@ namespace App\Models;
class Result class Result
{ {
public function __construct(\SimpleXMLElement $provider, $titel, $link, $anzeigeLink, $descr, $gefVon, $sourceRank, $partnershop = false, $image = "", $price = 0) public function __construct($provider, $titel, $link, $anzeigeLink, $descr, $gefVon, $sourceRank, $partnershop = false, $image = "", $price = 0)
{ {
$provider = simplexml_load_string($provider);
$this->titel = strip_tags(trim($titel)); $this->titel = strip_tags(trim($titel));
$this->link = trim($link); $this->link = trim($link);
$this->anzeigeLink = trim($anzeigeLink); $this->anzeigeLink = trim($anzeigeLink);
......
...@@ -36,16 +36,12 @@ abstract class Searchengine ...@@ -36,16 +36,12 @@ abstract class Searchengine
$this->homepage = "https://metager.de"; $this->homepage = "https://metager.de";
} }
$this->engine = $engine; $this->engine = $engine->asXML();
if (!isset($this->cacheDuration)) { if (!isset($this->cacheDuration)) {
$this->cacheDuration = 60; $this->cacheDuration = 60;
} }
# Wir registrieren die Benutzung dieser Suchmaschine
$this->uses = intval(Redis::hget($this->name, "uses")) + 1;
Redis::hset($this->name, "uses", $this->uses);
# Eine Suchmaschine kann automatisch temporär deaktiviert werden, wenn es Verbindungsprobleme gab: # Eine Suchmaschine kann automatisch temporär deaktiviert werden, wenn es Verbindungsprobleme gab:
if (isset($this->disabled) && strtotime($this->disabled) <= time()) { if (isset($this->disabled) && strtotime($this->disabled) <= time()) {
# In diesem Fall ist der Timeout der Suchmaschine abgelaufen. # In diesem Fall ist der Timeout der Suchmaschine abgelaufen.
...@@ -75,21 +71,32 @@ abstract class Searchengine ...@@ -75,21 +71,32 @@ abstract class Searchengine
$this->getString = $this->generateGetString($q, $metager->getUrl(), $metager->getLanguage(), $metager->getCategory()); $this->getString = $this->generateGetString($q, $metager->getUrl(), $metager->getLanguage(), $metager->getCategory());
$this->hash = md5($this->host . $this->getString . $this->port . $this->name); $this->hash = md5($this->host . $this->getString . $this->port . $this->name);
$this->resultHash = $metager->getHashCode(); $this->resultHash = $metager->getHashCode();
}
abstract public function loadResults($result);
public function getLast(MetaGer $metager, $result){
}
public function getNext(MetaGer $metager, $result){
}
public function startSearch(\App\MetaGer $metager)
{
if (Cache::has($this->hash)) { if (Cache::has($this->hash)) {
$this->cached = true; $this->cached = true;
$this->retrieveResults(); $this->retrieveResults($metager);
} else { } else {
# Die Anfragen an die Suchmaschinen werden nun von der Laravel-Queue bearbeitet: # Die Anfragen an die Suchmaschinen werden nun von der Laravel-Queue bearbeitet:
# Hinweis: solange in der .env der QUEUE_DRIVER auf "sync" gestellt ist, werden die Abfragen # Hinweis: solange in der .env der QUEUE_DRIVER auf "sync" gestellt ist, werden die Abfragen
# nacheinander abgeschickt. # nacheinander abgeschickt.
# Sollen diese Parallel verarbeitet werden, muss ein anderer QUEUE_DRIVER verwendet werden. # Sollen diese Parallel verarbeitet werden, muss ein anderer QUEUE_DRIVER verwendet werden.
# siehe auch: https://laravel.com/docs/5.2/queues # siehe auch: https://laravel.com/docs/5.2/queues
$this->dispatch(new Search($this->resultHash, $this->host, $this->port, $this->name, $this->getString, $this->useragent, $metager->getSumaFile())); $this->dispatch(new Search($this->resultHash, $this->host, $this->port, $this->name, $this->getString, $this->useragent));
} }
} }
abstract public function loadResults($result);
public function rank(\App\MetaGer $metager) public function rank(\App\MetaGer $metager)
{ {
foreach ($this->results as $result) { foreach ($this->results as $result) {
...@@ -130,7 +137,7 @@ abstract class Searchengine ...@@ -130,7 +137,7 @@ abstract class Searchengine
} }
} }
public function retrieveResults() public function retrieveResults(MetaGer $metager)
{ {
if ($this->loaded) { if ($this->loaded) {
return true; return true;
...@@ -149,6 +156,8 @@ abstract class Searchengine ...@@ -149,6 +156,8 @@ abstract class Searchengine
if ($body !== "") { if ($body !== "") {
$this->loadResults($body); $this->loadResults($body);
$this->getNext($metager, $body);
$this->getLast($metager, $body);
$this->loaded = true; $this->loaded = true;
Redis::hdel('search.' . $this->hash, $this->name); Redis::hdel('search.' . $this->hash, $this->name);
return true; return true;
......
...@@ -62,4 +62,58 @@ class Overture extends Searchengine ...@@ -62,4 +62,58 @@ class Overture extends Searchengine
); );
} }
} }