Commit fdf8ddda authored by Dominik Hebeler's avatar Dominik Hebeler
Browse files

Merge branch...

Merge branch '182-wir-fragen-immer-nur-die-ersten-ergebnisse-einer-suchmaschine-ab' into 'development'

Resolve "Wir fragen immer nur die ersten Ergebnisse einer Suchmaschine ab."

Closes #182

See merge request !304
parents 4652b8c2 ee8830ec
......@@ -13,7 +13,7 @@ class Search extends Job implements ShouldQueue
{
use InteractsWithQueue, SerializesModels;
protected $hash, $host, $port, $name, $getString, $useragent, $fp, $sumaFile;
protected $hash, $host, $port, $name, $getString, $useragent, $fp;
protected $buffer_length = 8192;
/**
......@@ -21,7 +21,7 @@ class Search extends Job implements ShouldQueue
*
* @return void
*/
public function __construct($hash, $host, $port, $name, $getString, $useragent, $sumaFile)
public function __construct($hash, $host, $port, $name, $getString, $useragent)
{
$this->hash = $hash;
$this->host = $host;
......@@ -29,7 +29,6 @@ class Search extends Job implements ShouldQueue
$this->name = $name;
$this->getString = $getString;
$this->useragent = $useragent;
$this->sumaFile = $sumaFile;
}
/**
......@@ -50,13 +49,6 @@ class Search extends Job implements ShouldQueue
}
}
public function disable($sumaFile, $message)
{
$xml = simplexml_load_file($sumaFile);
$xml->xpath("//sumas/suma[@name='" . $this->name . "']")['0']['disabled'] = date(DATE_RFC822, mktime(date("H")+1,date("i"), date("s"), date("m"), date("d"), date("Y")));
$xml->saveXML($sumaFile);
}
private function readAnswer ()
{
$time = microtime(true);
......
......@@ -3,9 +3,8 @@ namespace App;
use App;
use App\lib\TextLanguageDetect\TextLanguageDetect;
use Cache;
use Illuminate\Http\Request;
use Illuminate\Pagination\LengthAwarePaginator;
use Illuminate\Support\Collection;
use Jenssegers\Agent\Agent;
use LaravelLocalization;
use Log;
......@@ -33,6 +32,7 @@ class MetaGer
protected $warnings = [];
protected $errors = [];
protected $addedHosts = [];
protected $startCount = 0;
# Daten über die Abfrage
protected $ip;
protected $language;
......@@ -60,6 +60,14 @@ class MetaGer
Log::warning("Achtung: Eine, oder mehrere Blacklist Dateien, konnten nicht geöffnet werden");
}
$dir = app_path() . "/Models/parserSkripte/";
foreach (scandir($dir) as $filename) {
$path = $dir . $filename;
if (is_file($path)) {
require $path;
}
}
$this->languageDetect = new TextLanguageDetect();
$this->languageDetect->setNameMode("2");
}
......@@ -182,16 +190,22 @@ class MetaGer
public function combineResults()
{
foreach ($this->engines as $engine) {
if (isset($engine->next)) {
$this->next[] = $engine->next;
}
if (isset($engine->last)) {
$this->last[] = $engine->last;
}
foreach ($engine->results as $result) {
if ($result->valid) {
$this->results[] = $result;
}
}
foreach ($engine->ads as $ad) {
$this->ads[] = $ad;
}
}
uasort($this->results, function ($a, $b) {
if ($a->getRank() == $b->getRank()) {
return 0;
......@@ -211,13 +225,23 @@ class MetaGer
$counter = 0;
$firstRank = 0;
if(isset($this->startForwards))
{
$this->startCount = $this->startForwards;
}elseif (isset($this->startBackwards)) {
$this->startCount = $this->startBackwards - count($this->results) - 1;
}else{
$this->startCount = 0;
}
foreach ($this->results as $result) {
if ($counter === 0) {
$firstRank = $result->rank;
}
$counter++;
$result->number = $counter;
$result->number = $counter + $this->startCount;
$confidence = 0;
if ($firstRank > 0) {
$confidence = $result->rank / $firstRank;
......@@ -237,38 +261,6 @@ class MetaGer
}
//Get current page form url e.g. &page=6
$currentPage = LengthAwarePaginator::resolveCurrentPage();
$offset = $currentPage - 1;
//Create a new Laravel collection from the array data
$collection = new Collection($this->results);
//Define how many items we want to be visible in each page
$perPage = $this->resultCount;
//Slice the collection to get the items to display in current page
$currentPageSearchResults = $collection->slice($offset * $perPage, $perPage)->all();
# Für diese 20 Links folgt nun unsere Boost-Implementation.
$currentPageSearchResults = $this->parseBoost($currentPageSearchResults);
# Für diese 20 Links folgt nun unsere Adgoal- Implementation.
$currentPageSearchResults = $this->parseAdgoal($currentPageSearchResults);
//Create our paginator and pass it to the view
$paginatedSearchResults = new LengthAwarePaginator($currentPageSearchResults, count($collection), $perPage);
$paginatedSearchResults->setPath('/meta/meta.ger3');
foreach ($this->request->all() as $key => $value) {
if ($key === "out") {
continue;
}
$paginatedSearchResults->addQuery($key, $value);
}
$this->results = $paginatedSearchResults;
if (LaravelLocalization::getCurrentLocale() === "en") {
$this->ads = [];
}
......@@ -289,6 +281,29 @@ class MetaGer
if (count($this->results) <= 0) {
$this->errors[] = "Leider konnten wir zu Ihrer Sucheingabe keine passenden Ergebnisse finden.";
}
if( isset($this->last) && count($this->last) > 0 )
{
$page = $this->page - 1;
$this->last = [
'page' => $page,
'startBackwards' => $this->results[0]->number,
'engines' => $this->last,
];
Cache::put(md5(serialize($this->last)), serialize($this->last), 60);
}
if( isset($this->next) && count($this->next) > 0 && count($this->results) > 0)
{
$page = $this->page + 1;
$this->next = [
'page' => $page,
'startForwards' => $this->results[count($this->results)-1]->number,
'engines' => $this->next,
];
Cache::put(md5(serialize($this->next)), serialize($this->next), 60);
}
}
public function parseBoost($results)
......@@ -480,39 +495,53 @@ class MetaGer
$typeslist = [];
$counter = 0;
foreach ($enabledSearchengines as $engine) {
if ($request->has('next') && Cache::has($request->input('next')) && unserialize(Cache::get($request->input('next')))['page'] > 1 ) {
$next = unserialize(Cache::get($request->input('next')));
$this->page = $next['page'];
$engines = $next['engines'];
if(isset($next['startForwards']))
$this->startForwards = $next['startForwards'];
if(isset($next['startBackwards']))
$this->startBackwards = $next['startBackwards'];
} else {
foreach ($enabledSearchengines as $engine) {
if (!$siteSearchFailed && strlen($this->site) > 0 && (!isset($engine['hasSiteSearch']) || $engine['hasSiteSearch']->__toString() === "0")) {
if (!$siteSearchFailed && strlen($this->site) > 0 && (!isset($engine['hasSiteSearch']) || $engine['hasSiteSearch']->__toString() === "0")) {
continue;
}
# Wenn diese Suchmaschine gar nicht eingeschaltet sein soll
$path = "App\Models\parserSkripte\\" . ucfirst($engine["package"]->__toString());
continue;
}
# Wenn diese Suchmaschine gar nicht eingeschaltet sein soll
$path = "App\Models\parserSkripte\\" . ucfirst($engine["package"]->__toString());
if (!file_exists(app_path() . "/Models/parserSkripte/" . ucfirst($engine["package"]->__toString()) . ".php")) {
Log::error("Konnte " . $engine["name"] . " nicht abfragen, da kein Parser existiert");
continue;
}
if (!file_exists(app_path() . "/Models/parserSkripte/" . ucfirst($engine["package"]->__toString()) . ".php")) {
Log::error("Konnte " . $engine["name"] . " nicht abfragen, da kein Parser existiert");
continue;
}
$time = microtime();
$time = microtime();
try
{
$tmp = new $path($engine, $this);
} catch (\ErrorException $e) {
Log::error("Konnte " . $engine["name"] . " nicht abfragen." . var_dump($e));
continue;
}
try
{
$tmp = new $path($engine, $this);
} catch (\ErrorException $e) {
Log::error("Konnte " . $engine["name"] . " nicht abfragen." . var_dump($e));
continue;
}
if ($tmp->enabled && isset($this->debug)) {
$this->warnings[] = $tmp->service . " Connection_Time: " . $tmp->connection_time . " Write_Time: " . $tmp->write_time . " Insgesamt:" . ((microtime() - $time) / 1000);
}
if ($tmp->enabled && isset($this->debug)) {
$this->warnings[] = $tmp->service . " Connection_Time: " . $tmp->connection_time . " Write_Time: " . $tmp->write_time . " Insgesamt:" . ((microtime() - $time) / 1000);
}
if ($tmp->isEnabled()) {
$engines[] = $tmp;
}
if ($tmp->isEnabled()) {
$engines[] = $tmp;
$this->sockets[$tmp->name] = $tmp->fp;
}
}
# Wir starten die Suche manuell:
foreach ($engines as $engine) {
$engine->startSearch($this);
}
# Jetzt werden noch alle Kategorien der Settings durchgegangen und die jeweils enthaltenen namen der Suchmaschinen gespeichert.
......@@ -607,11 +636,10 @@ class MetaGer
usleep(50000);
}
#exit;
foreach ($engines as $engine) {
if (!$engine->loaded) {
try {
$engine->retrieveResults();
$engine->retrieveResults($this);
} catch (\ErrorException $e) {
Log::error($e);
......@@ -681,7 +709,7 @@ class MetaGer
$this->time = $request->input('time', 1000);
# Page
$this->page = $request->input('page', 1);
$this->page = 1;
# Lang
$this->lang = $request->input('lang', 'all');
if ($this->lang !== "de" && $this->lang !== "en" && $this->lang !== "all") {
......@@ -880,6 +908,10 @@ class MetaGer
{
return $this->phrases;
}
public function getPage()
{
return $this->page;
}
public function getSumaFile()
{
......@@ -921,6 +953,10 @@ class MetaGer
return 0;
}
}
public function getStartCount()
{
return $this->startCount;
}
public function addHostCount($host)
{
$hash = md5($host);
......@@ -961,7 +997,7 @@ class MetaGer
public function generateSearchLink($fokus, $results = true)
{
$requestData = $this->request->except('page');
$requestData = $this->request->except(['page', 'next']);
$requestData['focus'] = $fokus;
if ($results) {
$requestData['out'] = "results";
......@@ -983,7 +1019,7 @@ class MetaGer
public function generateSiteSearchLink($host)
{
$host = urlencode($host);
$requestData = $this->request->except(['page', 'out']);
$requestData = $this->request->except(['page', 'out', 'next']);
$requestData['eingabe'] .= " site:$host";
$requestData['focus'] = "web";
$link = action('MetaGerSearch@search', $requestData);
......@@ -993,7 +1029,7 @@ class MetaGer
public function generateRemovedHostLink($host)
{
$host = urlencode($host);
$requestData = $this->request->except(['page', 'out']);
$requestData = $this->request->except(['page', 'out', 'next']);
$requestData['eingabe'] .= " -host:$host";
$link = action('MetaGerSearch@search', $requestData);
return $link;
......@@ -1002,12 +1038,36 @@ class MetaGer
public function generateRemovedDomainLink($domain)
{
$domain = urlencode($domain);
$requestData = $this->request->except(['page', 'out']);
$requestData = $this->request->except(['page', 'out', 'next']);
$requestData['eingabe'] .= " -domain:$domain";
$link = action('MetaGerSearch@search', $requestData);
return $link;
}
public function lastSearchLink()
{
if( isset($this->last) && count($this->last['engines']) > 0){
$requestData = $this->request->except(['page', 'out']);
$requestData['next'] = md5(serialize($this->last));
$link = action('MetaGerSearch@search', $requestData);
}else{
$link = "#";
}
return $link;
}
public function nextSearchLink()
{
if( isset($this->next) && count($this->next['engines']) > 0){
$requestData = $this->request->except(['page', 'out']);
$requestData['next'] = md5(serialize($this->next));
$link = action('MetaGerSearch@search', $requestData);
}else{
$link = "#";
}
return $link;
}
public function getTab()
{
return $this->tab;
......
......@@ -5,8 +5,9 @@ namespace App\Models;
class Result
{
public function __construct(\SimpleXMLElement $provider, $titel, $link, $anzeigeLink, $descr, $gefVon, $sourceRank, $partnershop = false, $image = "", $price = 0)
public function __construct($provider, $titel, $link, $anzeigeLink, $descr, $gefVon, $sourceRank, $partnershop = false, $image = "", $price = 0)
{
$provider = simplexml_load_string($provider);
$this->titel = strip_tags(trim($titel));
$this->link = trim($link);
$this->anzeigeLink = trim($anzeigeLink);
......
......@@ -36,16 +36,12 @@ abstract class Searchengine
$this->homepage = "https://metager.de";
}
$this->engine = $engine;
$this->engine = $engine->asXML();
if (!isset($this->cacheDuration)) {
$this->cacheDuration = 60;
}
# Wir registrieren die Benutzung dieser Suchmaschine
$this->uses = intval(Redis::hget($this->name, "uses")) + 1;
Redis::hset($this->name, "uses", $this->uses);
# Eine Suchmaschine kann automatisch temporär deaktiviert werden, wenn es Verbindungsprobleme gab:
if (isset($this->disabled) && strtotime($this->disabled) <= time()) {
# In diesem Fall ist der Timeout der Suchmaschine abgelaufen.
......@@ -75,21 +71,32 @@ abstract class Searchengine
$this->getString = $this->generateGetString($q, $metager->getUrl(), $metager->getLanguage(), $metager->getCategory());
$this->hash = md5($this->host . $this->getString . $this->port . $this->name);
$this->resultHash = $metager->getHashCode();
}
abstract public function loadResults($result);
public function getLast(MetaGer $metager, $result){
}
public function getNext(MetaGer $metager, $result){
}
public function startSearch(\App\MetaGer $metager)
{
if (Cache::has($this->hash)) {
$this->cached = true;
$this->retrieveResults();
$this->retrieveResults($metager);
} else {
# Die Anfragen an die Suchmaschinen werden nun von der Laravel-Queue bearbeitet:
# Hinweis: solange in der .env der QUEUE_DRIVER auf "sync" gestellt ist, werden die Abfragen
# nacheinander abgeschickt.
# Sollen diese Parallel verarbeitet werden, muss ein anderer QUEUE_DRIVER verwendet werden.
# siehe auch: https://laravel.com/docs/5.2/queues
$this->dispatch(new Search($this->resultHash, $this->host, $this->port, $this->name, $this->getString, $this->useragent, $metager->getSumaFile()));
$this->dispatch(new Search($this->resultHash, $this->host, $this->port, $this->name, $this->getString, $this->useragent));
}
}
abstract public function loadResults($result);
public function rank(\App\MetaGer $metager)
{
foreach ($this->results as $result) {
......@@ -130,7 +137,7 @@ abstract class Searchengine
}
}
public function retrieveResults()
public function retrieveResults(MetaGer $metager)
{
if ($this->loaded) {
return true;
......@@ -149,6 +156,8 @@ abstract class Searchengine
if ($body !== "") {
$this->loadResults($body);
$this->getNext($metager, $body);
$this->getLast($metager, $body);
$this->loaded = true;
Redis::hdel('search.' . $this->hash, $this->name);
return true;
......
......@@ -62,4 +62,58 @@ class Overture extends Searchengine
);
}
}
public function getLast(\App\MetaGer $metager, $result)
{
# Auslesen der Argumente für die nächste Suchseite:
$result = preg_replace("/\r\n/si", "", $result);
try {
$content = simplexml_load_string($result);
} catch (\Exception $e) {
abort(500, "$result is not a valid xml string");
}
$lastArgs = $content->xpath('//Results/PrevArgs');
if (isset($lastArgs[0])) {
$lastArgs = $lastArgs[0]->__toString();
} else {
$lastArgs = $content->xpath('//Results/ResultSet[@id="inktomi"]/PrevArgs');
if (isset($lastArgs[0])) {
$lastArgs = $lastArgs[0]->__toString();
} else {
return;
}
}
# Erstellen des neuen Suchmaschinenobjekts und anpassen des GetStrings:
$last = new Overture(simplexml_load_string($this->engine), $metager);
$last->getString = preg_replace("/&Keywords=.*?&/si", "&", $last->getString) . "&" . $lastArgs;
$this->last = $last;
}
public function getNext(\App\MetaGer $metager, $result)
{
# Auslesen der Argumente für die nächste Suchseite:
$result = preg_replace("/\r\n/si", "", $result);
try {
$content = simplexml_load_string($result);
} catch (\Exception $e) {
abort(500, "$result is not a valid xml string");
}
$nextArgs = $content->xpath('//Results/NextArgs');
if (isset($nextArgs[0])) {
$nextArgs = $nextArgs[0]->__toString();
} else {
$nextArgs = $content->xpath('//Results/ResultSet[@id="inktomi"]/NextArgs');
if (isset($nextArgs[0])) {
$nextArgs = $nextArgs[0]->__toString();
} else {
return;
}
}
# Erstellen des neuen Suchmaschinenobjekts und anpassen des GetStrings:
$next = new Overture(simplexml_load_string($this->engine), $metager);
$next->getString = preg_replace("/&Keywords=.*?&/si", "&", $next->getString) . "&" . $nextArgs;
$this->next = $next;
}
}
......@@ -51,4 +51,22 @@ class Yandex extends Searchengine
);
}
}
public function getLast(\App\MetaGer $metager, $result)
{
if( $metager->getPage() <= 1 )
return;
$next = new Yandex(simplexml_load_string($this->engine), $metager);
$next->getString .= "page=" . ($metager->getPage() -1 );
$this->next = $next;
}
public function getNext(\App\MetaGer $metager, $result)
{
if( count($this->results) <= 0 )
return;
$next = new Yandex(simplexml_load_string($this->engine), $metager);
$next->getString .= "page=" . $metager->getPage();
$this->next = $next;
}
}
......@@ -21,7 +21,7 @@
</div>
@foreach( $metager->request->all() as $key => $value)
@if($key !== "eingabe" && $key !== "page")
@if($key !== "eingabe" && $key !== "page" && $key !== "next")
<input type='hidden' name='{{ $key }}' value='{{ $value }}' form='submitForm' />
@endif
@endforeach
......
......@@ -15,7 +15,6 @@
<meta content="{{ getmypid() }}" name="p" />
<meta content="{{ $eingabe }}" name="q" />
<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
<meta name="referrer" content="no-referrer" />
<link rel="search" type="application/opensearchdescription+xml" title="MetaGer: Sicher suchen &amp; finden, Privatsph&auml;re sch&uuml;tzen" href="{{ LaravelLocalization::getLocalizedURL(LaravelLocalization::getCurrentLocale(), action('StartpageController@loadPlugin', ['params' => base64_encode(serialize(Request::all()))])) }}">
</head>
......@@ -24,9 +23,15 @@
@include('layouts.researchandtabs')
@else
<div class="tab-content container-fluid">
@yield('results')
@yield('results')
</div>
@endif
<nav aria-label="...">
<ul class="pager">
<li @if($metager->lastSearchLink() === "#") class="disabled" @endif><a href="{{ $metager->lastSearchLink() }}">Zurück</a></li>
<li @if($metager->nextSearchLink() === "#") class="disabled" @endif><a href="{{ $metager->nextSearchLink() }}">Weiter Suchen</a></li>
</ul>
</nav>
<footer>
<div class="row">
<div class="col-xs-6">
......
......@@ -35,15 +35,12 @@
@for($i = 0; $i <= 2; $i++)
@include('layouts.ad', ['ad' => $metager->popAd()])
@endfor
@foreach($metager->getResults()->items() as $result)
@foreach($metager->getResults() as $result)
@if($result->number % 7 === 0)
@include('layouts.ad', ['ad' => $metager->popAd()])
@endif
@include('layouts.result', ['result' => $result])
@endforeach
<nav class="pager">
{!! $metager->getResults()->links() !!}
</nav>
</div>
@if( $metager->showQuicktips() )
<div class="hidden-xs col-md-4" id="quicktips">
......@@ -51,4 +48,3 @@
</div>
@endif
@endsection
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment