Newer
Older
<?php
namespace App;
use Illuminate\Http\Request;
use Jenssegers\Agent\Agent;
use App;

Dominik Hebeler
committed
use Storage;
use Log;
use App\lib\TextLanguageDetect\TextLanguageDetect;
use App\lib\TextLanguageDetect\LanguageDetect\TextLanguageDetectException;
use Illuminate\Pagination\LengthAwarePaginator;
use Illuminate\Support\Collection;
#use \Illuminate\Pagination\Paginator;
class MetaGer
{
# Einstellungen für die Suche
protected $fokus;
protected $eingabe;
protected $q;
protected $category;
protected $time;
protected $page;
protected $lang;
protected $cache = "";
protected $site;
protected $hostBlacklist = [];
protected $domainBlacklist = [];
protected $stopWords = [];
protected $engines = [];
protected $results = [];
protected $warnings = [];

Dominik Hebeler
committed
protected $errors = [];
# Daten über die Abfrage
protected $ip;
protected $language;
protected $agent;
# Konfigurationseinstellungen:
protected $sumaFile;
protected $mobile;
protected $resultCount;
protected $sprueche;

Dominik Hebeler
committed
protected $domainsBlacklisted = [];
protected $urlsBlacklisted = [];
protected $url;
function __construct()
{
define('CRLF', "\r\n");
define('BUFFER_LENGTH', 8192);

Dominik Hebeler
committed
if( file_exists(config_path() . "/blacklistDomains.txt") && file_exists(config_path() . "/blacklistUrl.txt") )
{
# Blacklists einlesen:
$tmp = file_get_contents(config_path() . "/blacklistDomains.txt");
$this->domainsBlacklisted = explode("\n", $tmp);
$tmp = file_get_contents(config_path() . "/blacklistUrl.txt");
$this->urlsBlacklisted = explode("\n", $tmp);
}else
{
Log::warning("Achtung: Eine, oder mehrere Blacklist Dateien, konnten nicht geöffnet werden");
}
$this->languageDetect = new TextLanguageDetect();
$this->languageDetect->setNameMode("2");
public function rankAll ()
{
foreach( $this->engines as $engine )
{
$engine->rank($this);
}
}
public function createView()
{
$viewResults = [];
# Wir extrahieren alle notwendigen Variablen und geben Sie an unseren View:
foreach($this->results as $result)
{
$viewResults[] = get_object_vars($result);
}
if( $this->fokus === "bilder" )
{
switch ($this->out)
{
case 'results':

Dominik Hebeler
committed
return view('metager3bilderresults')
->with('results', $viewResults)
->with('eingabe', $this->eingabe)
->with('mobile', $this->mobile)
->with('warnings', $this->warnings)
->with('errors', $this->errors)
->with('metager', $this);
default:
return view('metager3bilder')
->with('results', $viewResults)
->with('eingabe', $this->eingabe)
->with('mobile', $this->mobile)
->with('warnings', $this->warnings)
->with('errors', $this->errors)
->with('metager', $this);
}
}
->with('results', $viewResults)
->with('eingabe', $this->eingabe)
->with('mobile', $this->mobile)
->with('warnings', $this->warnings)
->with('errors', $this->errors)
->with('metager', $this);
break;
->with('eingabe', $this->eingabe)
->with('mobile', $this->mobile)
->with('warnings', $this->warnings)
->with('errors', $this->errors)
->with('metager', $this)
->with('suspendheader', "yes");
break;
default:
return view('metager3')
->with('eingabe', $this->eingabe)
->with('mobile', $this->mobile)
->with('warnings', $this->warnings)
->with('errors', $this->errors)
->with('metager', $this);
public function removeInvalids ()
{
$results = [];
foreach($this->results as $result)
{
if($result->isValid($this))
$results[] = $result;
}
#$this->results = $results;
}
public function combineResults ()
{
foreach($this->engines as $engine)
{
foreach($engine->results as $result)
{
if($result->valid)
$this->results[] = $result;
}
foreach($engine->ads as $ad)
{
$this->ads[] = $ad;
}
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
uasort($this->results, function($a, $b){
if($a->getRank() == $b->getRank())
return 0;
return ($a->getRank() < $b->getRank()) ? 1 : -1;
});
# Validate Results
$newResults = [];
foreach($this->results as $result)
{
if($result->isValid($this))
$newResults[] = $result;
}
$this->results = $newResults;
$counter = 0;
$firstRank = 0;
foreach($this->results as $result)
{
if($counter === 0)
$firstRank = $result->rank;
$counter++;
$result->number = $counter;
$confidence = 0;
if($firstRank > 0)
$confidence = $result->rank/$firstRank;
else
$confidence = 0;
if($confidence > 0.65)
$result->color = "#FF4000";
elseif($confidence > 0.4)
$result->color = "#FF0080";
elseif($confidence > 0.2)
$result->color = "#C000C0";
else
$result->color = "#000000";
}
//Get current page form url e.g. &page=6
$currentPage = LengthAwarePaginator::resolveCurrentPage();
$offset= $currentPage-1;
//Create a new Laravel collection from the array data
$collection = new Collection($this->results);
//Define how many items we want to be visible in each page
$perPage = $this->resultCount;
//Slice the collection to get the items to display in current page
$currentPageSearchResults = $collection->slice($offset * $perPage, $perPage)->all();
//Create our paginator and pass it to the view
$paginatedSearchResults= new LengthAwarePaginator($currentPageSearchResults, count($collection), $perPage);
$paginatedSearchResults->setPath('/meta/meta.ger3');
foreach($this->request->all() as $key => $value)
{
continue;
$paginatedSearchResults->addQuery($key, $value);
}
$this->results = $paginatedSearchResults;
if( isset($this->password) )
{
# Wir bieten einen bezahlten API-Zugriff an, bei dem dementsprechend die Werbung ausgeblendet wurde:
# Aktuell ist es nur die Uni-Mainz. Deshalb überprüfen wir auch nur diese.
$password = getenv('mainz');
$eingabe = $this->eingabe;
$password = md5($eingabe . $password);
if( $this->password === $password )
{
$this->ads = [];
}
}
}
public function createSearchEngines (Request $request)
{
#die(SocketRocket::get("tls", "dominik-pfennig.de", "", 443));
# Überprüfe, welche Sumas eingeschaltet sind
$xml = simplexml_load_file($this->sumaFile);
$enabledSearchengines = [];
$overtureEnabled = FALSE;

Dominik Hebeler
committed
$countSumas = 0;
if($this->fokus === "angepasst")
{
$sumas = $xml->xpath("suma");
/**$maxSumas = 30;
$count = 0;
foreach($sumas as $suma)
{
if($maxSumas === $count)
break;
$enabledSearchengines[] = $suma;
$count++;
}**/
foreach($sumas as $suma)
{
if($request->has($suma["service"])
#|| ( $this->fokus !== "bilder"
# && ($suma["name"]->__toString() === "qualigo"
# || $suma["name"]->__toString() === "similar_product_ads"
# || ( !$overtureEnabled && $suma["name"]->__toString() === "overtureAds" )
# )
# )
#|| 1 === 1 #Todo: entfernen

Dominik Hebeler
committed
if(!(isset($suma['disabled']) && $suma['disabled']->__toString() === "1"))
{
if($suma["name"]->__toString() === "overture")
{
$overtureEnabled = TRUE;
}

Dominik Hebeler
committed
if( $suma["name"]->__toString() !== "qualigo" && $suma["name"]->__toString() !== "similar_product_ads" && $suma["name"]->__toString() !== "overtureAds" )
$countSumas += 1;

Dominik Hebeler
committed
$enabledSearchengines[] = $suma;
}
}
}
}else{
$sumas = $xml->xpath("suma");
foreach($sumas as $suma){
$types = explode(",",$suma["type"]);
if(in_array($this->fokus, $types)
|| ( $this->fokus !== "bilder"
&& ($suma["name"]->__toString() === "qualigo"
|| $suma["name"]->__toString() === "similar_product_ads"
|| ( !$overtureEnabled && $suma["name"]->__toString() === "overtureAds" )
)
)

Dominik Hebeler
committed
if(!(isset($suma['disabled']) && $suma['disabled']->__toString() === "1"))
{
if($suma["name"]->__toString() === "overture")
{
$overtureEnabled = TRUE;
}

Dominik Hebeler
committed
if( $suma["name"]->__toString() !== "qualigo" && $suma["name"]->__toString() !== "similar_product_ads" && $suma["name"]->__toString() !== "overtureAds" )
$countSumas += 1;

Dominik Hebeler
committed
$enabledSearchengines[] = $suma;
}
if( $countSumas <= 0 )

Dominik Hebeler
committed
{
$this->errors[] = "Achtung: Sie haben in ihren Einstellungen keine Suchmaschine ausgewählt.";
}
$engines = [];
foreach($enabledSearchengines as $engine){
if(strlen($this->site) > 0 && (!isset($engine["hasSiteSearch"]) || $engine["hasSiteSearch"]->__toString() !== "1"))
{
continue;
}
# Wenn diese Suchmaschine gar nicht eingeschaltet sein soll
$path = "App\Models\parserSkripte\\" . ucfirst($engine["package"]->__toString());
$tmp = new $path($engine, $this);
if($tmp->enabled && isset($this->debug))
{
$this->warnings[] = $tmp->service . " Connection_Time: " . $tmp->connection_time . " Write_Time: " . $tmp->write_time . " Insgesamt:" . ((microtime()-$time)/1000);
}
if($tmp->isEnabled())

Dominik Hebeler
committed
{
$engines[] = $tmp;
$this->sockets[$tmp->name] = $tmp->fp;

Dominik Hebeler
committed
}
# Nun passiert ein elementarer Schritt.
# Wir warten auf die Antwort der Suchmaschinen, da wir vorher nicht weiter machen können.
# aber natürlich nicht ewig.
# Die Verbindung steht zu diesem Zeitpunkt und auch unsere Request wurde schon gesendet.
# Wir geben der Suchmaschine nun bis zu 500ms Zeit zu antworten.
# Jetzt lesen wir alles aus, was da ist und verwerfen den Rest:
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
$enginesToLoad = count($engines);
$loadedEngines = 0;
$time = 0;
while( true )
{
# Abbruchbedingung
if($time < 500)
{
if($loadedEngines >= $enginesToLoad)
break;
}elseif( $time >= 500 && $time < $this->time)
{
if( ($loadedEngines / ($enginesToLoad * 1.0)) >= 0.8 )
break;
}else
{
break;
}
foreach($engines as $engine)
{
if(!$engine->loaded)
{
$success = $engine->retrieveResults();
if($engine->loaded)
$loadedEngines += 1;
}
}
usleep(50000);
$time += 50;
}
foreach( $engines as $engine )

Dominik Hebeler
committed
{
if( !$engine->loaded )
$engine->shutdown();

Dominik Hebeler
committed
}
$this->engines = $engines;
}
public function parseFormData (Request $request)
{
if($request->input('encoding', '') !== "utf8")
{
# In früheren Versionen, als es den Encoding Parameter noch nicht gab, wurden die Daten in ISO-8859-1 übertragen
$input = $request->all();
foreach($input as $key => $value)
{
$input[$key] = mb_convert_encoding("$value", "UTF-8", "ISO-8859-1");
}
$request->replace($input);
}

Dominik Hebeler
committed
$this->url = $request->url();
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
# Zunächst überprüfen wir die eingegebenen Einstellungen:
# FOKUS
$this->fokus = trans('fokiNames.'
. $request->input('focus', 'web'));
if(strpos($this->fokus,"."))
{
$this->fokus = trans('fokiNames.web');
}
# SUMA-FILE
if(App::isLocale("en")){
$this->sumaFile = config_path() . "/sumasEn.xml";
}else{
$this->sumaFile = config_path() . "/sumas.xml";
}
if(!file_exists($this->sumaFile))
{
die("Suma-File konnte nicht gefunden werden");
}
# Sucheingabe:
$this->eingabe = trim($request->input('eingabe', ''));
if(strlen($this->eingabe) === 0)
{
$this->warnings[] = 'Achtung: Sie haben keinen Suchbegriff eingegeben. Sie können ihre Suchbegriffe oben eingeben und es erneut versuchen.';
}
$this->q = $this->eingabe;
# IP:
if( isset($_SERVER['HTTP_FROM']) )
{
$this->ip = $_SERVER['HTTP_FROM'];
}else
{
$this->ip = "127.0.0.1";
}
# Language:
if( isset($_SERVER['HTTP_LANGUAGE']) )
{
$this->language = $_SERVER['HTTP_LANGUAGE'];
}else
{
$this->language = "";
}
# Category
$this->category = $request->input('category', '');
# Request Times:
$this->time = $request->input('time', 1000);
# Page
$this->page = $request->input('page', 1);
# Lang
$this->lang = $request->input('lang', 'all');
if ( $this->lang !== "de" && $this->lang !== "en" && $this->lang !== "all" )
{
$this->lang = "all";
}
$this->agent = new Agent();
$this->mobile = $this->agent->isMobile();
if($this->mobile)
die("test");
$this->sprueche = $request->input('sprueche', 'off');
if($this->sprueche === "off" )
$this->sprueche = true;
else
$this->sprueche = false;
# Ergebnisse pro Seite:
$this->resultCount = $request->input('resultCount', '20');
# Manchmal müssen wir Parameter anpassen um den Sucheinstellungen gerecht zu werden:
if( $request->has('dart') )
{
$this->time = 10000;
$this->warnings[] = "Hinweis: Sie haben Dart-Europe aktiviert. Die Suche kann deshalb länger dauern und die maximale Suchzeit wurde auf 10 Sekunden hochgesetzt.";
}
if( $this->time <= 500 || $this->time > 20000 )
$this->time = 1000;
}
if( $request->has('minism') && ( $request->has('fportal') || $request->has('harvest') ) )
{
$input = $request->all();
$newInput = [];
foreach($input as $key => $value)
{
if( $key !== "fportal" && $key !== "harvest" )
{
$newInput[$key] = $value;
}
}
$request->replace($newInput);
}
if( $request->has('ebay') )
{
$this->time = 2000;
$this->warnings[] = "Hinweis: Sie haben Ebay aktiviert. Die Suche kann deshalb länger dauern und die maximale Suchzeit wurde auf 2 Sekunden hochgesetzt.";
}
if( App::isLocale("en") )
{
$this->sprueche = "off";
}
if($this->resultCount <= 0 || $this->resultCount > 200 )
{
$this->resultCount = 1000;
}
if( $request->has('onenewspageAll') || $request->has('onenewspageGermanyAll') )
{
$this->time = 5000;
$this->cache = "cache";
}
if( $request->has('tab'))
{
if($request->input('tab') === "1")
{
$this->tab = "_blank";
}else
{
$this->tab = "_self";
}
}else
{
$this->tab = "_blank";
}
if( $request->has('password') )
$this->password = $request->input('password');
if( $request->has('quicktips') )
$this->quicktips = false;
else
$this->quicktips = true;
$this->out = $request->input('out', "html");
if($this->out !== "html" && $this->out !== "json" && $this->out !== "results" && $this->out !== "results-with-style")
$this->out = "html";
$this->request = $request;
}
public function checkSpecialSearches (Request $request)
{
# Site Search:
if(preg_match("/(.*)\bsite:(\S+)(.*)/si", $this->q, $match))
{
$this->site = $match[2];
$this->q = $match[1] . $match[3];
$this->warnings[] = "Sie führen eine Sitesearch durch. Es werden nur Ergebnisse von der Seite: \"" . $this->site . "\" angezeigt.";
}
if( $request->has('site') )
{
$this->site = $request->input('site');
$this->warnings[] = "Sie führen eine Sitesearch durch. Es werden nur Ergebnisse von der Seite: \"" . $this->site . "\" angezeigt.";
}
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
# Wenn die Suchanfrage um das Schlüsselwort "-host:*" ergänzt ist, sollen bestimmte Hosts nicht eingeblendet werden
# Wir prüfen, ob das hier der Fall ist:
while(preg_match("/(.*)(^|\s)-host:(\S+)(.*)/si", $this->q, $match))
{
$this->hostBlacklist[] = $match[3];
$this->q = $match[1] . $match[4];
}
if( sizeof($this->hostBlacklist) > 0 )
{
$hostString = "";
foreach($this->hostBlacklist as $host)
{
$hostString .= $host . ", ";
}
$hostString = rtrim($hostString, ", ");
$this->warnings[] = "Ergebnisse von folgenden Hosts werden nicht angezeigt: \"" . $hostString . "\"";
}
# Wenn die Suchanfrage um das Schlüsselwort "-domain:*" ergänzt ist, sollen bestimmte Domains nicht eingeblendet werden
# Wir prüfen, ob das hier der Fall ist:
while(preg_match("/(.*)(^|\s)-domain:(\S+)(.*)/si", $this->q, $match))
{
$this->domainBlacklist[] = $match[3];
$this->q = $match[1] . $match[4];
}
if( sizeof($this->domainBlacklist) > 0 )
{
$domainString = "";
foreach($this->domainBlacklist as $domain)
{
$domainString .= $domain . ", ";
}
$domainString = rtrim($domainString, ", ");
$this->warnings[] = "Ergebnisse von folgenden Domains werden nicht angezeigt: \"" . $domainString . "\"";
}
# Alle mit "-" gepräfixten Worte sollen aus der Suche ausgeschlossen werden.
# Wir prüfen, ob das hier der Fall ist:
while(preg_match("/(.*)(^|\s)-(\S+)(.*)/si", $this->q, $match))
{
$this->stopWords[] = $match[3];
$this->q = $match[1] . $match[4];
}
if( sizeof($this->stopWords) > 0 )
{
$stopwordsString = "";
foreach($this->stopWords as $stopword)
{
$stopwordsString .= $stopword . ", ";
}
$stopwordsString = rtrim($stopwordsString, ", ");
$this->warnings[] = "Sie machen eine Ausschlusssuche. Ergebnisse mit folgenden Wörtern werden nicht angezeigt: \"" . $stopwordsString . "\"";
}
# Meldung über eine Phrasensuche
if(preg_match("/\"(.+)\"/si", $this->q, $match)){
$this->warnings[] = "Sie führen eine Phrasensuche durch: \"" . $match[1] . "\"";
}
}

Dominik Hebeler
committed
public function getFokus ()
{
return $this->fokus;
}
public function getIp ()
{
return $this->ip;
}
public function getEingabe ()
{
return $this->eingabe;
}
public function getQ ()
{
if(strlen($this->site) > 0)
return $this->q . " site:" . $this->site;
else
return $this->q;
}

Dominik Hebeler
committed
public function getUrl ()
{
return $this->url;
}
public function getTime ()
{
return $this->time;
}
public function getLanguage ()
{
return $this->language;
}
public function getLang ()
{
return $this->lang;
}
public function getSprueche ()
{
return $this->sprueche;
}

Dominik Hebeler
committed
public function getCategory ()
{
return $this->category;
}
public function getSumaFile ()
{
return $this->sumaFile;
}
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
public function getUserHostBlacklist ()
{
return $this->hostBlacklist;
}
public function getUserDomainBlacklist ()
{
return $this->domainBlacklist;
}
public function getDomainBlacklist ()
{
return $this->domainsBlacklisted;
}
public function getUrlBlacklist ()
{
return $this->urlsBlacklisted;
}
public function getLanguageDetect ()
{
return $this->languageDetect;
}
public function getStopWords ()
{
return $this->stopWords;
}

Dominik Hebeler
committed
public function getHostCount($host)
{
if(isset($this->addedHosts[$host]))
{
return $this->addedHosts[$host];
}else
{
return 0;
}
}

Dominik Hebeler
committed
public function addHostCount($host)
{
$hash = md5($host);
if(isset($this->addedHosts[$hash]))
{
$this->addedHosts[$hash] += 1;
}else
{
$this->addedHosts[$hash] = 1;
}
}
public function getSite()
{
return $this->site;
}

Dominik Hebeler
committed
public function addLink($link)
{
$hash = md5($link);
if(isset($this->addedLinks[$hash]))
{
return false;
}else
{
$this->addedLinks[$hash] = 1;
return true;
}
}

Dominik Hebeler
committed
public function generateSearchLink($fokus)
$requestData = $this->request->except('page');
$requestData['focus'] = $fokus;
$requestData['out'] = "results";
$link = action('MetaGerSearch@search', $requestData);
return $link;
}
public function generateQuicktipLink()
{
$link = action('MetaGerSearch@quicktips');
return $link;
}

Dominik Hebeler
committed
public function generateSiteSearchLink($host)
$requestData = $this->request->except(['page','out']);
$requestData['eingabe'] .= " site:$host";
$requestData['focus'] = "web";
$link = action('MetaGerSearch@search', $requestData);

Dominik Hebeler
committed
public function generateRemovedHostLink ($host)
$requestData = $this->request->except(['page','out']);
$requestData['eingabe'] .= " -host:$host";
$link = action('MetaGerSearch@search', $requestData);

Dominik Hebeler
committed
public function generateRemovedDomainLink ($domain)
{
$domain = urlencode($domain);
$requestData = $this->request->except(['page','out']);
$requestData['eingabe'] .= " -domain:$domain";
$link = action('MetaGerSearch@search', $requestData);
return $link;
}
public function getTab ()
{
return $this->tab;
}
public function getResults ()
{
return $this->results;
}
public function popAd()
{
if(count($this->ads) > 0)
return get_object_vars(array_shift($this->ads));
else
return null;
}
public function getImageProxyLink($link)
{
$requestData = [];
$requestData["url"] = $link;
$link = action('Pictureproxy@get', $requestData);
return $link;
}
public function showQuicktips ()
{
return $this->quicktips;
}