-
Dominik Hebeler authoredDominik Hebeler authored
MetaGer.php 26.06 KiB
<?php
namespace App;
use Illuminate\Http\Request;
use Jenssegers\Agent\Agent;
use App;
use Storage;
use Log;
use Config;
use Redis;
use App\lib\TextLanguageDetect\TextLanguageDetect;
use App\lib\TextLanguageDetect\LanguageDetect\TextLanguageDetectException;
use Illuminate\Pagination\LengthAwarePaginator;
use Illuminate\Support\Collection;
#use \Illuminate\Pagination\Paginator;
class MetaGer
{
# Einstellungen für die Suche
protected $fokus;
protected $eingabe;
protected $q;
protected $category;
protected $time;
protected $page;
protected $lang;
protected $cache = "";
protected $site;
protected $hostBlacklist = [];
protected $domainBlacklist = [];
protected $stopWords = [];
protected $engines = [];
protected $results = [];
protected $ads = [];
protected $warnings = [];
protected $errors = [];
protected $addedHosts = [];
# Daten über die Abfrage
protected $ip;
protected $language;
protected $agent;
# Konfigurationseinstellungen:
protected $sumaFile;
protected $mobile;
protected $resultCount;
protected $sprueche;
protected $domainsBlacklisted = [];
protected $urlsBlacklisted = [];
protected $url;
protected $languageDetect;
function __construct()
{
$this->starttime = microtime(true);
define('CRLF', "\r\n");
define('BUFFER_LENGTH', 8192);
if( file_exists(config_path() . "/blacklistDomains.txt") && file_exists(config_path() . "/blacklistUrl.txt") )
{
# Blacklists einlesen:
$tmp = file_get_contents(config_path() . "/blacklistDomains.txt");
$this->domainsBlacklisted = explode("\n", $tmp);
$tmp = file_get_contents(config_path() . "/blacklistUrl.txt");
$this->urlsBlacklisted = explode("\n", $tmp);
}else
{
Log::warning("Achtung: Eine, oder mehrere Blacklist Dateien, konnten nicht geöffnet werden");
}
$this->languageDetect = new TextLanguageDetect();
$this->languageDetect->setNameMode("2");
}
public function rankAll ()
{
foreach( $this->engines as $engine )
{
$engine->rank($this);
}
}
public function createView()
{
$viewResults = [];
# Wir extrahieren alle notwendigen Variablen und geben Sie an unseren View:
foreach($this->results as $result)
{
$viewResults[] = get_object_vars($result);
}
# Wir müssen natürlich noch den Log für die durchgeführte Suche schreiben:
$this->createLogs();
if( $this->fokus === "bilder" )
{
switch ($this->out)
{
case 'results':
return view('metager3bilderresults')
->with('results', $viewResults)
->with('eingabe', $this->eingabe)
->with('mobile', $this->mobile)
->with('warnings', $this->warnings)
->with('errors', $this->errors)
->with('metager', $this);
default:
return view('metager3bilder')
->with('results', $viewResults)
->with('eingabe', $this->eingabe)
->with('mobile', $this->mobile)
->with('warnings', $this->warnings)
->with('errors', $this->errors)
->with('metager', $this);
}
}
switch ($this->out) {
case 'results':
return view('metager3results')
->with('results', $viewResults)
->with('eingabe', $this->eingabe)
->with('mobile', $this->mobile)
->with('warnings', $this->warnings)
->with('errors', $this->errors)
->with('metager', $this);
break;
case 'results-with-style':
return view('metager3')
->with('results', $viewResults)
->with('eingabe', $this->eingabe)
->with('mobile', $this->mobile)
->with('warnings', $this->warnings)
->with('errors', $this->errors)
->with('metager', $this)
->with('suspendheader', "yes");
break;
default:
return view('metager3')
->with('eingabe', $this->eingabe)
->with('mobile', $this->mobile)
->with('warnings', $this->warnings)
->with('errors', $this->errors)
->with('metager', $this);
break;
}
}
private function createLogs()
{
$redis = Redis::connection('redisLogs');
if( $redis )
{
$logEntry = "";
$logEntry .= "[" . date(DATE_RFC822, mktime(date("H"),date("i"), date("s"), date("m"), date("d"), date("Y"))) . "]";
$logEntry .= " From=" . $this->ip;
$logEntry .= " pid=" . getmypid();
$anonId= md5("MySeCrEtSeEdFoRmd5"
.$this->request->header('Accept')
.$this->request->header('Accept-Charset')
.$this->request->header('Accept-Encoding')
.$this->request->header('HTTP_LANGUAGE')
.$this->request->header('User-Agent')
.$this->request->header('Keep-Alive')
.$this->request->header('X-Forwarded-For'));
$logEntry .= " anonId=$anonId";
$logEntry .= " ref=" . $this->request->header('Referer');
$useragent = $this->request->header('User-Agent');
$useragent = str_replace("(", " ", $useragent);
$useragent = str_replace(")", " ", $useragent);
$useragent = str_replace(" ", "", $useragent);
$logEntry .= " ua=" . $useragent;
$logEntry .= " iter= mm= time=" . round((microtime(true)-$this->starttime), 2) . " serv=" . $this->fokus . " which= hits= stringSearch= QuickTips= SSS= check=";
$logEntry .= " search=" . $this->eingabe;
$redis->rpush('logs.search', $logEntry);
}
}
public function removeInvalids ()
{
$results = [];
foreach($this->results as $result)
{
if($result->isValid($this))
$results[] = $result;
}
#$this->results = $results;
}
public function combineResults ()
{
foreach($this->engines as $engine)
{
foreach($engine->results as $result)
{
if($result->valid)
$this->results[] = $result;
}
foreach($engine->ads as $ad)
{
$this->ads[] = $ad;
}
}
uasort($this->results, function($a, $b){
if($a->getRank() == $b->getRank())
return 0;
return ($a->getRank() < $b->getRank()) ? 1 : -1;
});
# Validate Results
$newResults = [];
foreach($this->results as $result)
{
if($result->isValid($this))
$newResults[] = $result;
}
$this->results = $newResults;
$counter = 0;
$firstRank = 0;
foreach($this->results as $result)
{
if($counter === 0)
$firstRank = $result->rank;
$counter++;
$result->number = $counter;
$confidence = 0;
if($firstRank > 0)
$confidence = $result->rank/$firstRank;
else
$confidence = 0;
if($confidence > 0.65)
$result->color = "#FF4000";
elseif($confidence > 0.4)
$result->color = "#FF0080";
elseif($confidence > 0.2)
$result->color = "#C000C0";
else
$result->color = "#000000";
}
//Get current page form url e.g. &page=6
$currentPage = LengthAwarePaginator::resolveCurrentPage();
$offset= $currentPage-1;
//Create a new Laravel collection from the array data
$collection = new Collection($this->results);
//Define how many items we want to be visible in each page
$perPage = $this->resultCount;
//Slice the collection to get the items to display in current page
$currentPageSearchResults = $collection->slice($offset * $perPage, $perPage)->all();
//Create our paginator and pass it to the view
$paginatedSearchResults= new LengthAwarePaginator($currentPageSearchResults, count($collection), $perPage);
$paginatedSearchResults->setPath('/meta/meta.ger3');
foreach($this->request->all() as $key => $value)
{
if( $key === "out" )
continue;
$paginatedSearchResults->addQuery($key, $value);
}
$this->results = $paginatedSearchResults;
if( isset($this->password) )
{
# Wir bieten einen bezahlten API-Zugriff an, bei dem dementsprechend die Werbung ausgeblendet wurde:
# Aktuell ist es nur die Uni-Mainz. Deshalb überprüfen wir auch nur diese.
$password = getenv('mainz');
$eingabe = $this->eingabe;
$password = md5($eingabe . $password);
if( $this->password === $password )
{
$this->ads = [];
}
}
}
public function createSearchEngines (Request $request)
{
#die(SocketRocket::get("tls", "dominik-pfennig.de", "", 443));
# Überprüfe, welche Sumas eingeschaltet sind
$xml = simplexml_load_file($this->sumaFile);
$enabledSearchengines = [];
$overtureEnabled = FALSE;
$countSumas = 0;
if($this->fokus === "angepasst")
{
$sumas = $xml->xpath("suma");
/**$maxSumas = 30;
$count = 0;
foreach($sumas as $suma)
{
if($maxSumas === $count)
break;
$enabledSearchengines[] = $suma;
$count++;
}**/
foreach($sumas as $suma)
{
if($request->has($suma["service"])
#|| ( $this->fokus !== "bilder"
# && ($suma["name"]->__toString() === "qualigo"
# || $suma["name"]->__toString() === "similar_product_ads"
# || ( !$overtureEnabled && $suma["name"]->__toString() === "overtureAds" )
# )
# )
#|| 1 === 1 #Todo: entfernen
){
if(!(isset($suma['disabled']) && $suma['disabled']->__toString() === "1"))
{
if($suma["name"]->__toString() === "overture")
{
$overtureEnabled = TRUE;
}
if( $suma["name"]->__toString() !== "qualigo" && $suma["name"]->__toString() !== "similar_product_ads" && $suma["name"]->__toString() !== "overtureAds" )
$countSumas += 1;
$enabledSearchengines[] = $suma;
}
}
}
}else{
$sumas = $xml->xpath("suma");
foreach($sumas as $suma){
$types = explode(",",$suma["type"]);
if(in_array($this->fokus, $types)
|| ( $this->fokus !== "bilder"
&& ($suma["name"]->__toString() === "qualigo"
|| $suma["name"]->__toString() === "similar_product_ads"
|| ( !$overtureEnabled && $suma["name"]->__toString() === "overtureAds" )
)
)
){
if(!(isset($suma['disabled']) && $suma['disabled']->__toString() === "1"))
{
if($suma["name"]->__toString() === "overture")
{
$overtureEnabled = TRUE;
}
if( $suma["name"]->__toString() !== "qualigo" && $suma["name"]->__toString() !== "similar_product_ads" && $suma["name"]->__toString() !== "overtureAds" )
$countSumas += 1;
$enabledSearchengines[] = $suma;
}
}
}
}
if( $countSumas <= 0 )
{
$this->errors[] = "Achtung: Sie haben in ihren Einstellungen keine Suchmaschine ausgewählt.";
}
$engines = [];
foreach($enabledSearchengines as $engine){
if(strlen($this->site) > 0 && (!isset($engine["hasSiteSearch"]) || $engine["hasSiteSearch"]->__toString() !== "1"))
{
continue;
}
# Wenn diese Suchmaschine gar nicht eingeschaltet sein soll
$path = "App\Models\parserSkripte\\" . ucfirst($engine["package"]->__toString());
$time = microtime();
$tmp = new $path($engine, $this);
if($tmp->enabled && isset($this->debug))
{
$this->warnings[] = $tmp->service . " Connection_Time: " . $tmp->connection_time . " Write_Time: " . $tmp->write_time . " Insgesamt:" . ((microtime()-$time)/1000);
}
if($tmp->isEnabled())
{
$engines[] = $tmp;
$this->sockets[$tmp->name] = $tmp->fp;
}
}
# Nun passiert ein elementarer Schritt.
# Wir warten auf die Antwort der Suchmaschinen, da wir vorher nicht weiter machen können.
# aber natürlich nicht ewig.
# Die Verbindung steht zu diesem Zeitpunkt und auch unsere Request wurde schon gesendet.
# Wir geben der Suchmaschine nun bis zu 500ms Zeit zu antworten.
# Jetzt lesen wir alles aus, was da ist und verwerfen den Rest:
$enginesToLoad = count($engines);
$loadedEngines = 0;
$time = 0;
while( true )
{
# Abbruchbedingung
if($time < 500)
{
if($loadedEngines >= $enginesToLoad)
break;
}elseif( $time >= 500 && $time < $this->time)
{
if( ($loadedEngines / ($enginesToLoad * 1.0)) >= 0.8 )
break;
}else
{
break;
}
foreach($engines as $engine)
{
if(!$engine->loaded)
{
$success = $engine->retrieveResults();
if($engine->loaded)
$loadedEngines += 1;
}
}
usleep(50000);
$time += 50;
}
foreach( $engines as $engine )
{
if( !$engine->loaded )
$engine->shutdown();
}
$this->engines = $engines;
}
public function parseFormData (Request $request)
{
if($request->input('encoding', '') !== "utf8")
{
# In früheren Versionen, als es den Encoding Parameter noch nicht gab, wurden die Daten in ISO-8859-1 übertragen
$input = $request->all();
foreach($input as $key => $value)
{
$input[$key] = mb_convert_encoding("$value", "UTF-8", "ISO-8859-1");
}
$request->replace($input);
}
$this->url = $request->url();
# Zunächst überprüfen wir die eingegebenen Einstellungen:
# FOKUS
$this->fokus = trans('fokiNames.'
. $request->input('focus', 'web'));
if(strpos($this->fokus,"."))
{
$this->fokus = trans('fokiNames.web');
}
# SUMA-FILE
if(App::isLocale("en")){
$this->sumaFile = config_path() . "/sumasEn.xml";
}else{
$this->sumaFile = config_path() . "/sumas.xml";
}
if(!file_exists($this->sumaFile))
{
die("Suma-File konnte nicht gefunden werden");
}
# Sucheingabe:
$this->eingabe = trim($request->input('eingabe', ''));
if(strlen($this->eingabe) === 0)
{
$this->warnings[] = 'Achtung: Sie haben keinen Suchbegriff eingegeben. Sie können ihre Suchbegriffe oben eingeben und es erneut versuchen.';
}
$this->q = $this->eingabe;
# IP:
$this->ip = $request->ip();
# Language:
if( isset($_SERVER['HTTP_LANGUAGE']) )
{
$this->language = $_SERVER['HTTP_LANGUAGE'];
}else
{
$this->language = "";
}
# Category
$this->category = $request->input('category', '');
# Request Times:
$this->time = $request->input('time', 1000);
# Page
$this->page = $request->input('page', 1);
# Lang
$this->lang = $request->input('lang', 'all');
if ( $this->lang !== "de" && $this->lang !== "en" && $this->lang !== "all" )
{
$this->lang = "all";
}
$this->agent = new Agent();
$this->mobile = $this->agent->isMobile();
#Sprüche
$this->sprueche = $request->input('sprueche', 'off');
if($this->sprueche === "off" )
$this->sprueche = true;
else
$this->sprueche = false;
# Ergebnisse pro Seite:
$this->resultCount = $request->input('resultCount', '20');
# Manchmal müssen wir Parameter anpassen um den Sucheinstellungen gerecht zu werden:
if( $request->has('dart') )
{
$this->time = 10000;
$this->warnings[] = "Hinweis: Sie haben Dart-Europe aktiviert. Die Suche kann deshalb länger dauern und die maximale Suchzeit wurde auf 10 Sekunden hochgesetzt.";
}
if( $this->time <= 500 || $this->time > 20000 )
{
$this->time = 1000;
}
if( $request->has('minism') && ( $request->has('fportal') || $request->has('harvest') ) )
{
$input = $request->all();
$newInput = [];
foreach($input as $key => $value)
{
if( $key !== "fportal" && $key !== "harvest" )
{
$newInput[$key] = $value;
}
}
$request->replace($newInput);
}
if( $request->has('ebay') )
{
$this->time = 2000;
$this->warnings[] = "Hinweis: Sie haben Ebay aktiviert. Die Suche kann deshalb länger dauern und die maximale Suchzeit wurde auf 2 Sekunden hochgesetzt.";
}
if( App::isLocale("en") )
{
$this->sprueche = "off";
}
if($this->resultCount <= 0 || $this->resultCount > 200 )
{
$this->resultCount = 1000;
}
if( $request->has('onenewspageAll') || $request->has('onenewspageGermanyAll') )
{
$this->time = 5000;
$this->cache = "cache";
}
if( $request->has('tab'))
{
if($request->input('tab') === "1")
{
$this->tab = "_blank";
}else
{
$this->tab = "_self";
}
}else
{
$this->tab = "_blank";
}
if( $request->has('password') )
$this->password = $request->input('password');
if( $request->has('quicktips') )
$this->quicktips = false;
else
$this->quicktips = true;
$this->out = $request->input('out', "html");
if($this->out !== "html" && $this->out !== "json" && $this->out !== "results" && $this->out !== "results-with-style")
$this->out = "html";
$this->request = $request;
}
public function checkSpecialSearches (Request $request)
{
# Site Search:
if(preg_match("/(.*)\bsite:(\S+)(.*)/si", $this->q, $match))
{
$this->site = $match[2];
$this->q = $match[1] . $match[3];
$this->warnings[] = "Sie führen eine Sitesearch durch. Es werden nur Ergebnisse von der Seite: \"" . $this->site . "\" angezeigt.";
}
if( $request->has('site') )
{
$this->site = $request->input('site');
$this->warnings[] = "Sie führen eine Sitesearch durch. Es werden nur Ergebnisse von der Seite: \"" . $this->site . "\" angezeigt.";
}
# Wenn die Suchanfrage um das Schlüsselwort "-host:*" ergänzt ist, sollen bestimmte Hosts nicht eingeblendet werden
# Wir prüfen, ob das hier der Fall ist:
while(preg_match("/(.*)(^|\s)-host:(\S+)(.*)/si", $this->q, $match))
{
$this->hostBlacklist[] = $match[3];
$this->q = $match[1] . $match[4];
}
if( sizeof($this->hostBlacklist) > 0 )
{
$hostString = "";
foreach($this->hostBlacklist as $host)
{
$hostString .= $host . ", ";
}
$hostString = rtrim($hostString, ", ");
$this->warnings[] = "Ergebnisse von folgenden Hosts werden nicht angezeigt: \"" . $hostString . "\"";
}
# Wenn die Suchanfrage um das Schlüsselwort "-domain:*" ergänzt ist, sollen bestimmte Domains nicht eingeblendet werden
# Wir prüfen, ob das hier der Fall ist:
while(preg_match("/(.*)(^|\s)-domain:(\S+)(.*)/si", $this->q, $match))
{
$this->domainBlacklist[] = $match[3];
$this->q = $match[1] . $match[4];
}
if( sizeof($this->domainBlacklist) > 0 )
{
$domainString = "";
foreach($this->domainBlacklist as $domain)
{
$domainString .= $domain . ", ";
}
$domainString = rtrim($domainString, ", ");
$this->warnings[] = "Ergebnisse von folgenden Domains werden nicht angezeigt: \"" . $domainString . "\"";
}
# Alle mit "-" gepräfixten Worte sollen aus der Suche ausgeschlossen werden.
# Wir prüfen, ob das hier der Fall ist:
while(preg_match("/(.*)(^|\s)-(\S+)(.*)/si", $this->q, $match))
{
$this->stopWords[] = $match[3];
$this->q = $match[1] . $match[4];
}
if( sizeof($this->stopWords) > 0 )
{
$stopwordsString = "";
foreach($this->stopWords as $stopword)
{
$stopwordsString .= $stopword . ", ";
}
$stopwordsString = rtrim($stopwordsString, ", ");
$this->warnings[] = "Sie machen eine Ausschlusssuche. Ergebnisse mit folgenden Wörtern werden nicht angezeigt: \"" . $stopwordsString . "\"";
}
# Meldung über eine Phrasensuche
if(preg_match("/\"(.+)\"/si", $this->q, $match)){
$this->warnings[] = "Sie führen eine Phrasensuche durch: \"" . $match[1] . "\"";
}
}
public function getFokus ()
{
return $this->fokus;
}
public function getIp ()
{
return $this->ip;
}
public function getEingabe ()
{
return $this->eingabe;
}
public function getQ ()
{
if(strlen($this->site) > 0)
return $this->q . " site:" . $this->site;
else
return $this->q;
}
public function getUrl ()
{
return $this->url;
}
public function getTime ()
{
return $this->time;
}
public function getLanguage ()
{
return $this->language;
}
public function getLang ()
{
return $this->lang;
}
public function getSprueche ()
{
return $this->sprueche;
}
public function getCategory ()
{
return $this->category;
}
public function getSumaFile ()
{
return $this->sumaFile;
}
public function getUserHostBlacklist ()
{
return $this->hostBlacklist;
}
public function getUserDomainBlacklist ()
{
return $this->domainBlacklist;
}
public function getDomainBlacklist ()
{
return $this->domainsBlacklisted;
}
public function getUrlBlacklist ()
{
return $this->urlsBlacklisted;
}
public function getLanguageDetect ()
{
return $this->languageDetect;
}
public function getStopWords ()
{
return $this->stopWords;
}
public function getHostCount($host)
{
if(isset($this->addedHosts[$host]))
{
return $this->addedHosts[$host];
}else
{
return 0;
}
}
public function addHostCount($host)
{
$hash = md5($host);
if(isset($this->addedHosts[$hash]))
{
$this->addedHosts[$hash] += 1;
}else
{
$this->addedHosts[$hash] = 1;
}
}
public function getSite()
{
return $this->site;
}
public function addLink($link)
{
$hash = md5($link);
if(isset($this->addedLinks[$hash]))
{
return false;
}else
{
$this->addedLinks[$hash] = 1;
return true;
}
}
public function generateSearchLink($fokus)
{
$requestData = $this->request->except('page');
$requestData['focus'] = $fokus;
$requestData['out'] = "results";
$link = action('MetaGerSearch@search', $requestData);
return $link;
}
public function generateQuicktipLink()
{
$link = action('MetaGerSearch@quicktips');
return $link;
}
public function generateSiteSearchLink($host)
{
$host = urlencode($host);
$requestData = $this->request->except(['page','out']);
$requestData['eingabe'] .= " site:$host";
$requestData['focus'] = "web";
$link = action('MetaGerSearch@search', $requestData);
return $link;
}
public function generateRemovedHostLink ($host)
{
$host = urlencode($host);
$requestData = $this->request->except(['page','out']);
$requestData['eingabe'] .= " -host:$host";
$link = action('MetaGerSearch@search', $requestData);
return $link;
}
public function generateRemovedDomainLink ($domain)
{
$domain = urlencode($domain);
$requestData = $this->request->except(['page','out']);
$requestData['eingabe'] .= " -domain:$domain";
$link = action('MetaGerSearch@search', $requestData);
return $link;
}
public function getTab ()
{
return $this->tab;
}
public function getResults ()
{
return $this->results;
}
public function popAd()
{
if(count($this->ads) > 0)
return get_object_vars(array_shift($this->ads));
else
return null;
}
public function getImageProxyLink($link)
{
$requestData = [];
$requestData["url"] = $link;
$link = action('Pictureproxy@get', $requestData);
return $link;
}
public function showQuicktips ()
{
return $this->quicktips;
}
}