Newer
Older
<?php
namespace App;
use Illuminate\Http\Request;
use Jenssegers\Agent\Agent;
use App;

Dominik Hebeler
committed
use Storage;
use Log;
use App\lib\TextLanguageDetect\TextLanguageDetect;
use App\lib\TextLanguageDetect\LanguageDetect\TextLanguageDetectException;
use Illuminate\Pagination\LengthAwarePaginator;
use Illuminate\Support\Collection;
#use \Illuminate\Pagination\Paginator;
class MetaGer
{
# Einstellungen für die Suche
protected $fokus;
protected $eingabe;
protected $q;
protected $category;
protected $time;
protected $page;
protected $lang;
protected $cache = "";
protected $site;
protected $hostBlacklist = [];
protected $domainBlacklist = [];
protected $stopWords = [];
protected $engines = [];
protected $results = [];
protected $warnings = [];

Dominik Hebeler
committed
protected $errors = [];
# Daten über die Abfrage
protected $ip;
protected $language;
protected $agent;
# Konfigurationseinstellungen:
protected $sumaFile;
protected $mobile;
protected $resultCount;
protected $sprueche;

Dominik Hebeler
committed
protected $domainsBlacklisted = [];
protected $urlsBlacklisted = [];
protected $url;
function __construct()
{
define('CRLF', "\r\n");
define('BUFFER_LENGTH', 8192);

Dominik Hebeler
committed
if( file_exists(config_path() . "/blacklistDomains.txt") && file_exists(config_path() . "/blacklistUrl.txt") )
{
# Blacklists einlesen:
$tmp = file_get_contents(config_path() . "/blacklistDomains.txt");
$this->domainsBlacklisted = explode("\n", $tmp);
$tmp = file_get_contents(config_path() . "/blacklistUrl.txt");
$this->urlsBlacklisted = explode("\n", $tmp);
}else
{
Log::warning("Achtung: Eine, oder mehrere Blacklist Dateien, konnten nicht geöffnet werden");
}
$this->languageDetect = new TextLanguageDetect();
$this->languageDetect->setNameMode("2");
public function rankAll ()
{
foreach( $this->engines as $engine )
{
$engine->rank($this);
}
}
public function createView()
{
$viewResults = [];
# Wir extrahieren alle notwendigen Variablen und geben Sie an unseren View:
foreach($this->results as $result)
{
$viewResults[] = get_object_vars($result);
}
if( $this->fokus === "bilder" )
{
switch ($this->out)
{
case 'results':

Dominik Hebeler
committed
return view('metager3bilderresults')
->with('results', $viewResults)
->with('eingabe', $this->eingabe)
->with('mobile', $this->mobile)
->with('warnings', $this->warnings)
->with('errors', $this->errors)
->with('metager', $this);
default:
return view('metager3bilder')
->with('results', $viewResults)
->with('eingabe', $this->eingabe)
->with('mobile', $this->mobile)
->with('warnings', $this->warnings)
->with('errors', $this->errors)
->with('metager', $this);
}
}
->with('results', $viewResults)
->with('eingabe', $this->eingabe)
->with('mobile', $this->mobile)
->with('warnings', $this->warnings)
->with('errors', $this->errors)
->with('metager', $this);
break;
default:
return view('metager3')
->with('results', $viewResults)
->with('eingabe', $this->eingabe)
->with('mobile', $this->mobile)
->with('warnings', $this->warnings)
->with('errors', $this->errors)
->with('metager', $this);
public function removeInvalids ()
{
$results = [];
foreach($this->results as $result)
{
if($result->isValid($this))
$results[] = $result;
}
#$this->results = $results;
}
public function combineResults ()
{
foreach($this->engines as $engine)
{
foreach($engine->results as $result)
{
if($result->valid)
$this->results[] = $result;
}
foreach($engine->ads as $ad)
{
$this->ads[] = $ad;
}
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
uasort($this->results, function($a, $b){
if($a->getRank() == $b->getRank())
return 0;
return ($a->getRank() < $b->getRank()) ? 1 : -1;
});
# Validate Results
$newResults = [];
foreach($this->results as $result)
{
if($result->isValid($this))
$newResults[] = $result;
}
$this->results = $newResults;
$counter = 0;
$firstRank = 0;
foreach($this->results as $result)
{
if($counter === 0)
$firstRank = $result->rank;
$counter++;
$result->number = $counter;
$confidence = 0;
if($firstRank > 0)
$confidence = $result->rank/$firstRank;
else
$confidence = 0;
if($confidence > 0.65)
$result->color = "#FF4000";
elseif($confidence > 0.4)
$result->color = "#FF0080";
elseif($confidence > 0.2)
$result->color = "#C000C0";
else
$result->color = "#000000";
}
//Get current page form url e.g. &page=6
$currentPage = LengthAwarePaginator::resolveCurrentPage();
$offset= $currentPage-1;
//Create a new Laravel collection from the array data
$collection = new Collection($this->results);
//Define how many items we want to be visible in each page
$perPage = $this->resultCount;
//Slice the collection to get the items to display in current page
$currentPageSearchResults = $collection->slice($offset * $perPage, $perPage)->all();
//Create our paginator and pass it to the view
$paginatedSearchResults= new LengthAwarePaginator($currentPageSearchResults, count($collection), $perPage);
$paginatedSearchResults->setPath('/meta/meta.ger3');
foreach($this->request->all() as $key => $value)
{
continue;
$paginatedSearchResults->addQuery($key, $value);
}
$this->results = $paginatedSearchResults;
}
public function createSearchEngines (Request $request)
{
#die(SocketRocket::get("tls", "dominik-pfennig.de", "", 443));
# Überprüfe, welche Sumas eingeschaltet sind
$xml = simplexml_load_file($this->sumaFile);
$enabledSearchengines = [];
$overtureEnabled = FALSE;

Dominik Hebeler
committed
$countSumas = 0;
if($this->fokus === "angepasst")
{
$sumas = $xml->xpath("suma");
/**$maxSumas = 30;
$count = 0;
foreach($sumas as $suma)
{
if($maxSumas === $count)
break;
$enabledSearchengines[] = $suma;
$count++;
}**/
foreach($sumas as $suma)
{
if($request->has($suma["service"])
#|| ( $this->fokus !== "bilder"
# && ($suma["name"]->__toString() === "qualigo"
# || $suma["name"]->__toString() === "similar_product_ads"
# || ( !$overtureEnabled && $suma["name"]->__toString() === "overtureAds" )
# )
# )
#|| 1 === 1 #Todo: entfernen

Dominik Hebeler
committed
if(!(isset($suma['disabled']) && $suma['disabled']->__toString() === "1"))
{
if($suma["name"]->__toString() === "overture")
{
$overtureEnabled = TRUE;
}

Dominik Hebeler
committed
if( $suma["name"]->__toString() !== "qualigo" && $suma["name"]->__toString() !== "similar_product_ads" && $suma["name"]->__toString() !== "overtureAds" )
$countSumas += 1;

Dominik Hebeler
committed
$enabledSearchengines[] = $suma;
}
}
}
}else{
$sumas = $xml->xpath("suma");
foreach($sumas as $suma){
$types = explode(",",$suma["type"]);
if(in_array($this->fokus, $types)
|| ( $this->fokus !== "bilder"
&& ($suma["name"]->__toString() === "qualigo"
|| $suma["name"]->__toString() === "similar_product_ads"
|| ( !$overtureEnabled && $suma["name"]->__toString() === "overtureAds" )
)
)

Dominik Hebeler
committed
if(!(isset($suma['disabled']) && $suma['disabled']->__toString() === "1"))
{
if($suma["name"]->__toString() === "overture")
{
$overtureEnabled = TRUE;
}

Dominik Hebeler
committed
if( $suma["name"]->__toString() !== "qualigo" && $suma["name"]->__toString() !== "similar_product_ads" && $suma["name"]->__toString() !== "overtureAds" )
$countSumas += 1;

Dominik Hebeler
committed
$enabledSearchengines[] = $suma;
}
if( $countSumas <= 0 )

Dominik Hebeler
committed
{
$this->errors[] = "Achtung: Sie haben in ihren Einstellungen keine Suchmaschine ausgewählt.";
}
$engines = [];
foreach($enabledSearchengines as $engine){
if(strlen($this->site) > 0 && (!isset($engine["hasSiteSearch"]) || $engine["hasSiteSearch"]->__toString() !== "1"))
{
continue;
}
# Wenn diese Suchmaschine gar nicht eingeschaltet sein soll
$path = "App\Models\parserSkripte\\" . ucfirst($engine["package"]->__toString());
$tmp = new $path($engine, $this);
if($tmp->enabled && isset($this->debug))
{
$this->warnings[] = $tmp->service . " Connection_Time: " . $tmp->connection_time . " Write_Time: " . $tmp->write_time . " Insgesamt:" . ((microtime()-$time)/1000);
}
if($tmp->isEnabled())

Dominik Hebeler
committed
{
$engines[] = $tmp;
$this->sockets[$tmp->name] = $tmp->fp;

Dominik Hebeler
committed
}
# Nun passiert ein elementarer Schritt.
# Wir warten auf die Antwort der Suchmaschinen, da wir vorher nicht weiter machen können.
# aber natürlich nicht ewig.
# Die Verbindung steht zu diesem Zeitpunkt und auch unsere Request wurde schon gesendet.
# Wir geben der Suchmaschine nun bis zu 500ms Zeit zu antworten.
# Jetzt lesen wir alles aus, was da ist und verwerfen den Rest:
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
$enginesToLoad = count($engines);
$loadedEngines = 0;
$time = 0;
while( true )
{
# Abbruchbedingung
if($time < 500)
{
if($loadedEngines >= $enginesToLoad)
break;
}elseif( $time >= 500 && $time < $this->time)
{
if( ($loadedEngines / ($enginesToLoad * 1.0)) >= 0.8 )
break;
}else
{
break;
}
foreach($engines as $engine)
{
if(!$engine->loaded)
{
$success = $engine->retrieveResults();
if($engine->loaded)
$loadedEngines += 1;
}
}
usleep(50000);
$time += 50;
}
foreach( $engines as $engine )

Dominik Hebeler
committed
{
if( !$engine->loaded )
$engine->shutdown();

Dominik Hebeler
committed
}
$this->engines = $engines;
}
public function parseFormData (Request $request)
{
if($request->input('encoding', '') !== "utf8")
{
# In früheren Versionen, als es den Encoding Parameter noch nicht gab, wurden die Daten in ISO-8859-1 übertragen
$input = $request->all();
foreach($input as $key => $value)
{
$input[$key] = mb_convert_encoding("$value", "UTF-8", "ISO-8859-1");
}
$request->replace($input);
}

Dominik Hebeler
committed
$this->url = $request->url();
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
# Zunächst überprüfen wir die eingegebenen Einstellungen:
# FOKUS
$this->fokus = trans('fokiNames.'
. $request->input('focus', 'web'));
if(strpos($this->fokus,"."))
{
$this->fokus = trans('fokiNames.web');
}
# SUMA-FILE
if(App::isLocale("en")){
$this->sumaFile = config_path() . "/sumasEn.xml";
}else{
$this->sumaFile = config_path() . "/sumas.xml";
}
if(!file_exists($this->sumaFile))
{
die("Suma-File konnte nicht gefunden werden");
}
# Sucheingabe:
$this->eingabe = trim($request->input('eingabe', ''));
if(strlen($this->eingabe) === 0)
{
$this->warnings[] = 'Achtung: Sie haben keinen Suchbegriff eingegeben. Sie können ihre Suchbegriffe oben eingeben und es erneut versuchen.';
}
$this->q = $this->eingabe;
# IP:
if( isset($_SERVER['HTTP_FROM']) )
{
$this->ip = $_SERVER['HTTP_FROM'];
}else
{
$this->ip = "127.0.0.1";
}
# Language:
if( isset($_SERVER['HTTP_LANGUAGE']) )
{
$this->language = $_SERVER['HTTP_LANGUAGE'];
}else
{
$this->language = "";
}
# Category
$this->category = $request->input('category', '');
# Request Times:
$this->time = $request->input('time', 1000);
# Page
$this->page = $request->input('page', 1);
# Lang
$this->lang = $request->input('lang', 'all');
if ( $this->lang !== "de" && $this->lang !== "en" && $this->lang !== "all" )
{
$this->lang = "all";
}
$this->agent = new Agent();
$this->mobile = $this->agent->isMobile();
#Sprüche
$this->sprueche = $request->input('sprueche', 'off');
if($this->sprueche === "off" )
$this->sprueche = true;
else
$this->sprueche = false;
# Ergebnisse pro Seite:
$this->resultCount = $request->input('resultCount', '20');
# Manchmal müssen wir Parameter anpassen um den Sucheinstellungen gerecht zu werden:
if( $request->has('dart') )
{
$this->time = 10000;
$this->warnings[] = "Hinweis: Sie haben Dart-Europe aktiviert. Die Suche kann deshalb länger dauern und die maximale Suchzeit wurde auf 10 Sekunden hochgesetzt.";
}
if( $this->time <= 500 || $this->time > 20000 )
$this->time = 1000;
}
if( $request->has('minism') && ( $request->has('fportal') || $request->has('harvest') ) )
{
$input = $request->all();
$newInput = [];
foreach($input as $key => $value)
{
if( $key !== "fportal" && $key !== "harvest" )
{
$newInput[$key] = $value;
}
}
$request->replace($newInput);
}
if( $request->has('ebay') )
{
$this->time = 2000;
$this->warnings[] = "Hinweis: Sie haben Ebay aktiviert. Die Suche kann deshalb länger dauern und die maximale Suchzeit wurde auf 2 Sekunden hochgesetzt.";
}
if( App::isLocale("en") )
{
$this->sprueche = "off";
}
if($this->resultCount <= 0 || $this->resultCount > 200 )
{
$this->resultCount = 1000;
}
if( $request->has('onenewspageAll') || $request->has('onenewspageGermanyAll') )
{
$this->time = 5000;
$this->cache = "cache";
}
if( $request->has('tab'))
{
if($request->input('tab') === "1")
{
$this->tab = "_blank";
}else
{
$this->tab = "_self";
}
}else
{
$this->tab = "_blank";
}
$this->out = $request->input('out', "html");
if($this->out !== "html" && $this->out !== "json" && $this->out !== "results" && $this->out !== "results-with-style")
$this->out = "html";
$this->request = $request;
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
}
public function checkSpecialSearches (Request $request)
{
# Site Search:
if(preg_match("/(.*)\bsite:(\S+)(.*)/si", $this->q, $match))
{
$this->site = $match[2];
$this->q = $match[1] . $match[3];
$this->warnings[] = "Sie führen eine Sitesearch durch. Es werden nur Ergebnisse von der Seite: \"" . $this->site . "\" angezeigt.";
}
# Wenn die Suchanfrage um das Schlüsselwort "-host:*" ergänzt ist, sollen bestimmte Hosts nicht eingeblendet werden
# Wir prüfen, ob das hier der Fall ist:
while(preg_match("/(.*)(^|\s)-host:(\S+)(.*)/si", $this->q, $match))
{
$this->hostBlacklist[] = $match[3];
$this->q = $match[1] . $match[4];
}
if( sizeof($this->hostBlacklist) > 0 )
{
$hostString = "";
foreach($this->hostBlacklist as $host)
{
$hostString .= $host . ", ";
}
$hostString = rtrim($hostString, ", ");
$this->warnings[] = "Ergebnisse von folgenden Hosts werden nicht angezeigt: \"" . $hostString . "\"";
}
# Wenn die Suchanfrage um das Schlüsselwort "-domain:*" ergänzt ist, sollen bestimmte Domains nicht eingeblendet werden
# Wir prüfen, ob das hier der Fall ist:
while(preg_match("/(.*)(^|\s)-domain:(\S+)(.*)/si", $this->q, $match))
{
$this->domainBlacklist[] = $match[3];
$this->q = $match[1] . $match[4];
}
if( sizeof($this->domainBlacklist) > 0 )
{
$domainString = "";
foreach($this->domainBlacklist as $domain)
{
$domainString .= $domain . ", ";
}
$domainString = rtrim($domainString, ", ");
$this->warnings[] = "Ergebnisse von folgenden Domains werden nicht angezeigt: \"" . $domainString . "\"";
}
# Alle mit "-" gepräfixten Worte sollen aus der Suche ausgeschlossen werden.
# Wir prüfen, ob das hier der Fall ist:
while(preg_match("/(.*)(^|\s)-(\S+)(.*)/si", $this->q, $match))
{
$this->stopWords[] = $match[3];
$this->q = $match[1] . $match[4];
}
if( sizeof($this->stopWords) > 0 )
{
$stopwordsString = "";
foreach($this->stopWords as $stopword)
{
$stopwordsString .= $stopword . ", ";
}
$stopwordsString = rtrim($stopwordsString, ", ");
$this->warnings[] = "Sie machen eine Ausschlusssuche. Ergebnisse mit folgenden Wörtern werden nicht angezeigt: \"" . $stopwordsString . "\"";
}
# Meldung über eine Phrasensuche
if(preg_match("/\"(.+)\"/si", $this->q, $match)){
$this->warnings[] = "Sie führen eine Phrasensuche durch: \"" . $match[1] . "\"";
}
}

Dominik Hebeler
committed
public function getFokus ()
{
return $this->fokus;
}
public function getIp ()
{
return $this->ip;
}
public function getEingabe ()
{
return $this->eingabe;
}
public function getQ ()
{
if(strlen($this->site) > 0)
return $this->q . " site:" . $this->site;
else
return $this->q;
}

Dominik Hebeler
committed
public function getUrl ()
{
return $this->url;
}
public function getTime ()
{
return $this->time;
}
public function getLanguage ()
{
return $this->language;
}
public function getLang ()
{
return $this->lang;
}
public function getSprueche ()
{
return $this->sprueche;
}

Dominik Hebeler
committed
public function getCategory ()
{
return $this->category;
}
public function getSumaFile ()
{
return $this->sumaFile;
}
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
public function getUserHostBlacklist ()
{
return $this->hostBlacklist;
}
public function getUserDomainBlacklist ()
{
return $this->domainBlacklist;
}
public function getDomainBlacklist ()
{
return $this->domainsBlacklisted;
}
public function getUrlBlacklist ()
{
return $this->urlsBlacklisted;
}
public function getLanguageDetect ()
{
return $this->languageDetect;
}
public function getStopWords ()
{
return $this->stopWords;
}

Dominik Hebeler
committed
public function getHostCount($host)
{
if(isset($this->addedHosts[$host]))
{
return $this->addedHosts[$host];
}else
{
return 0;
}
}

Dominik Hebeler
committed
public function addHostCount($host)
{
$hash = md5($host);
if(isset($this->addedHosts[$hash]))
{
$this->addedHosts[$hash] += 1;
}else
{
$this->addedHosts[$hash] = 1;
}
}
public function getSite()
{
return $this->site;
}

Dominik Hebeler
committed
public function addLink($link)
{
$hash = md5($link);
if(isset($this->addedLinks[$hash]))
{
return false;
}else
{
$this->addedLinks[$hash] = 1;
return true;
}
}

Dominik Hebeler
committed
public function generateSearchLink($fokus)
$requestData = $this->request->except('page');
$requestData['focus'] = $fokus;
$requestData['out'] = "results";
$link = action('MetaGerSearch@search', $requestData);
return $link;
}
public function generateQuicktipLink()
{
$link = action('MetaGerSearch@quicktips');
return $link;
}

Dominik Hebeler
committed
public function generateSiteSearchLink($host)
$requestData = $this->request->except(['page','out']);
$requestData['eingabe'] .= " site:$host";
$requestData['focus'] = "web";
$link = action('MetaGerSearch@search', $requestData);

Dominik Hebeler
committed
public function generateRemovedHostLink ($host)
$requestData = $this->request->except(['page','out']);
$requestData['eingabe'] .= " -host:$host";
$link = action('MetaGerSearch@search', $requestData);

Dominik Hebeler
committed
public function generateRemovedDomainLink ($domain)
{
$domain = urlencode($domain);
$requestData = $this->request->except(['page','out']);
$requestData['eingabe'] .= " -domain:$domain";
$link = action('MetaGerSearch@search', $requestData);
return $link;
}
public function getTab ()
{
return $this->tab;
}
public function getResults ()
{
return $this->results;
}
public function popAd()
{
if(count($this->ads) > 0)
return get_object_vars(array_shift($this->ads));
else
return null;
}
public function getImageProxyLink($link)
{
$requestData = [];
$requestData["url"] = $link;
$link = action('Pictureproxy@get', $requestData);
return $link;
}