Commit 29a6022f authored by Dominik Hebeler's avatar Dominik Hebeler
Browse files

Datenschutzerklärung erweitert und einige Formatierungen durchgeführt

parent 764b5d00
This diff is collapsed.
...@@ -2,256 +2,244 @@ ...@@ -2,256 +2,244 @@
namespace App\Models; namespace App\Models;
class Result class Result
{ {
function __construct ( \SimpleXMLElement $provider, $titel, $link, $anzeigeLink , $descr, $gefVon, $sourceRank, $partnershop = false, $image = "", $price = 0 ) public function __construct(\SimpleXMLElement $provider, $titel, $link, $anzeigeLink, $descr, $gefVon, $sourceRank, $partnershop = false, $image = "", $price = 0)
{ {
$this->titel = strip_tags(trim($titel)); $this->titel = strip_tags(trim($titel));
$this->link = trim($link); $this->link = trim($link);
$this->anzeigeLink = trim($anzeigeLink); $this->anzeigeLink = trim($anzeigeLink);
$this->descr = strip_tags(trim($descr), '<p>'); $this->descr = strip_tags(trim($descr), '<p>');
$this->descr = preg_replace("/\n+/si", " ", $this->descr); $this->descr = preg_replace("/\n+/si", " ", $this->descr);
if( strlen($this->descr) > 250 ) if (strlen($this->descr) > 250) {
{ $this->descr = wordwrap($this->descr, 250);
$this->descr = wordwrap($this->descr, 250); $this->descr = substr($this->descr, 0, strpos($this->descr, "\n"));
$this->descr = substr($this->descr, 0, strpos($this->descr, "\n"));
}
} $this->gefVon = trim($gefVon);
$this->gefVon = trim($gefVon); $this->proxyLink = $this->generateProxyLink($this->link);
$this->proxyLink = $this->generateProxyLink($this->link); $this->sourceRank = $sourceRank;
$this->sourceRank = $sourceRank; if ($this->sourceRank <= 0 || $this->sourceRank > 20) {
if($this->sourceRank <= 0 || $this->sourceRank > 20) $this->sourceRank = 20;
$this->sourceRank = 20; }
$this->sourceRank = 20 - $this->sourceRank;
if(isset($provider["engineBoost"])) $this->sourceRank = 20 - $this->sourceRank;
{ if (isset($provider["engineBoost"])) {
$this->engineBoost = floatval($provider["engineBoost"]->__toString()); $this->engineBoost = floatval($provider["engineBoost"]->__toString());
}else } else {
{ $this->engineBoost = 1;
$this->engineBoost = 1; }
}
$this->valid = true;
$this->valid = true; $this->host = @parse_url($link, PHP_URL_HOST);
$this->host = @parse_url($link, PHP_URL_HOST); $this->strippedHost = $this->getStrippedHost($this->anzeigeLink);
$this->strippedHost = $this->getStrippedHost($this->anzeigeLink); $this->strippedDomain = $this->getStrippedDomain($this->strippedHost);
$this->strippedDomain = $this->getStrippedDomain($this->strippedHost); $this->strippedLink = $this->getStrippedLink($this->anzeigeLink);
$this->strippedLink = $this->getStrippedLink($this->anzeigeLink); $this->rank = 0;
$this->rank = 0; $this->partnershop = $partnershop;
$this->partnershop = $partnershop; $this->image = $image;
$this->image = $image;
#die($this->anzeigeLink . "\r\n" . $this->strippedHost);
#die($this->anzeigeLink . "\r\n" . $this->strippedHost); }
}
public function rank(\App\MetaGer $metager)
public function rank (\App\MetaGer $metager) {
{
$rank = 0;
$rank = 0; $rank += ($this->sourceRank * 0.02);
$rank += ($this->sourceRank * 0.02);
#URL-Boost
#URL-Boost $link = $this->anzeigeLink;
$link = $this->anzeigeLink; if (strpos($link, "http") !== 0) {
if(strpos($link, "http") !== 0) $link = "http://" . $link;
{ }
$link = "http://" . $link; $link = @parse_url($link, PHP_URL_HOST) . @parse_url($link, PHP_URL_PATH);
} $tmpLi = $link;
$link = @parse_url($link, PHP_URL_HOST) . @parse_url($link, PHP_URL_PATH); $tmpEingabe = $metager->getQ();
$tmpLi = $link; $count = 0;
$tmpEingabe = $metager->getQ(); $tmpLink = "";
$count = 0;
$tmpLink = ""; $regex = [
"/\s+/si",
$regex = [ "/http:/si",
"/\s+/si", "/https:/si",
"/http:/si", "/www\./si",
"/https:/si", "/\//si",
"/www\./si", "/\./si",
"/\//si", "/-/si",
"/\./si", ];
"/-/si" foreach ($regex as $reg) {
]; $link = preg_replace($regex, "", $link);
foreach($regex as $reg) $tmpEingabe = preg_replace($regex, "", $tmpEingabe);
{ }
$link = preg_replace($regex, "", $link); #die($tmpLi . "<br>" . $link . "<br>" . $tmpEingabe . "<br><br>");
$tmpEingabe = preg_replace($regex, "", $tmpEingabe); foreach (str_split($tmpEingabe) as $char) {
} if (!$char || !$tmpEingabe || strlen($tmpEingabe) === 0 || strlen($char) === 0) {
#die($tmpLi . "<br>" . $link . "<br>" . $tmpEingabe . "<br><br>"); continue;
foreach(str_split($tmpEingabe) as $char) }
{
if( !$char || !$tmpEingabe || strlen($tmpEingabe) === 0 || strlen($char) === 0 ) if (strpos(strtolower($tmpLink), strtolower($char)) >= 0) {
continue; $count++;
if(strpos(strtolower($tmpLink), strtolower($char)) >= 0) $tmpLink = str_replace(urlencode($char), "", $tmpLink);
{ }
$count++; if (strlen($this->descr) > 80 && strlen($link) > 0) {
$tmpLink = str_replace(urlencode($char), "", $tmpLink); #$rank += $count /((strlen($link)) * 60);
} }
if(strlen($this->descr) > 80 && strlen($link) > 0) }
{
#$rank += $count /((strlen($link)) * 60); # Boost für Vorkommen der Suchwörter:
} $maxRank = 0.1;
} $tmpTitle = $this->titel;
$tmpDescription = $this->descr;
# Boost für Vorkommen der Suchwörter: $isWithin = false;
$maxRank = 0.1; $tmpRank = 0;
$tmpTitle = $this->titel; $tmpEingabe = $metager->getQ();
$tmpDescription = $this->descr; $tmpEingabe = preg_replace("/\b\w{1,3}\b/si", "", $tmpEingabe);
$isWithin = false; $tmpEingabe = preg_replace("/\s+/si", " ", $tmpEingabe);
$tmpRank = 0; #die($tmpEingabe);
$tmpEingabe = $metager->getQ(); foreach (explode(" ", trim($tmpEingabe)) as $el) {
$tmpEingabe = preg_replace("/\b\w{1,3}\b/si", "", $tmpEingabe); if (strlen($tmpTitle) === 0 || strlen($el) === 0 || strlen($tmpDescription) === 0) {
$tmpEingabe = preg_replace("/\s+/si", " ", $tmpEingabe); continue;
#die($tmpEingabe); }
foreach(explode(" ", trim($tmpEingabe)) as $el)
{ $el = preg_quote($el, "/");
if( strlen($tmpTitle) === 0 || strlen($el) === 0 || strlen($tmpDescription) === 0 ) if (strlen($tmpTitle) > 0) {
continue; if (preg_match("/\b$el\b/si", $tmpTitle)) {
$el = preg_quote($el, "/"); $tmpRank += .7 * .6 * $maxRank;
if(strlen($tmpTitle) > 0) } elseif (strpos($tmpTitle, $el) !== false) {
{ $tmpRank += .3 * .6 * $maxRank;
if(preg_match("/\b$el\b/si", $tmpTitle)) }
{ }
$tmpRank += .7 * .6 * $maxRank; if (strlen($tmpDescription) > 0) {
}elseif (strpos($tmpTitle, $el) !== false) { if (preg_match("/\b$el\b/si", $tmpDescription)) {
$tmpRank += .3 * .6 * $maxRank; $tmpRank += .7 * .4 * $maxRank;
} } elseif (strpos($tmpDescription, $el) !== false) {
} $tmpRank += .3 * .4 * $maxRank;
if( strlen($tmpDescription) > 0 ) }
{ }
if(preg_match("/\b$el\b/si", $tmpDescription)) }
{ $tmpRank /= sizeof(explode(" ", trim($tmpEingabe))) * 10;
$tmpRank += .7 * .4 * $maxRank; $rank += $tmpRank;
}elseif (strpos($tmpDescription, $el) !== false) {
$tmpRank += .3 * .4 * $maxRank; if ($this->engineBoost > 0) {
} $rank *= floatval($this->engineBoost);
} }
}
$tmpRank /= sizeof(explode(" ", trim($tmpEingabe))) * 10; $this->rank = $rank;
$rank += $tmpRank; }
if($this->engineBoost > 0) public function getRank()
{ {
$rank *= floatval($this->engineBoost); return $this->rank;
} }
$this->rank = $rank; public function isValid(\App\MetaGer $metager)
} {
# Zunächst die persönlich ( über URL-Parameter ) definierten Blacklists:
public function getRank () if (in_array($this->strippedHost, $metager->getUserHostBlacklist())
{ || in_array($this->strippedDomain, $metager->getUserDomainBlacklist())) {
return $this->rank; return false;
} }
public function isValid (\App\MetaGer $metager) # Jetzt unsere URL und Domain Blacklist
{ if ($this->strippedHost !== "" && (in_array($this->strippedHost, $metager->getDomainBlacklist()) || in_array($this->strippedLink, $metager->getUrlBlacklist()))) {
# Zunächst die persönlich ( über URL-Parameter ) definierten Blacklists: return false;
if(in_array($this->strippedHost, $metager->getUserHostBlacklist()) }
|| in_array($this->strippedDomain, $metager->getUserDomainBlacklist()))
return false; # Nun der Eventuelle Sprachfilter
if ($metager->getLang() !== "all") {
# Jetzt unsere URL und Domain Blacklist $text = $this->titel . " " . $this->descr;
if($this->strippedHost !== "" && (in_array($this->strippedHost, $metager->getDomainBlacklist()) || in_array($this->strippedLink, $metager->getUrlBlacklist()))) $path = app_path() . "/Models/lang.pl";
{ $lang = exec("echo '$text' | $path");
return false;
} if ($metager->getLang() !== $lang) {
return false;
# Nun der Eventuelle Sprachfilter }
if( $metager->getLang() !== "all" )
{ }
$text = $this->titel . " " . $this->descr;
$path = app_path() . "/Models/lang.pl"; # Wir wenden die Stoppwortsuche an und schmeißen entsprechende Ergebnisse raus:
$lang = exec("echo '$text' | $path"); foreach ($metager->getStopWords() as $stopWord) {
$text = $this->titel . " " . $this->descr;
if( $metager->getLang() !== $lang ) if (stripos($text, $stopWord) !== false) {
return false; return false;
} }
}
# Die Strinsuche:
# Wir wenden die Stoppwortsuche an und schmeißen entsprechende Ergebnisse raus: $text = strtolower($this->titel) . " " . strtolower($this->descr);
foreach($metager->getStopWords() as $stopWord) foreach ($metager->getPhrases() as $phrase) {
{ if (strpos($text, $phrase) === false) {
$text = $this->titel . " " . $this->descr; return false;
if(stripos($text, $stopWord) !== false) }
{
return false; }
}
} # Abschließend noch 2 Überprüfungen. Einmal den Host filter, der Sicherstellt, dass von jedem Host maximal 3 Links angezeigt werden
# und dann noch den Dublettefilter, der sicher stellt, dass wir nach Möglichkeit keinen Link doppelt in der Ergebnisliste haben
# Die Strinsuche: # Diese Überprüfung führen wir unter bestimmten Bedingungen nicht durch:
$text = strtolower($this->titel) . " " . strtolower($this->descr); if ($metager->getSite() === "" &&
foreach($metager->getPhrases() as $phrase) strpos($this->strippedHost, "ncbi.nlm.nih.gov") === false &&
{ strpos($this->strippedHost, "twitter.com") === false &&
if(strpos($text, $phrase) === FALSE) strpos($this->strippedHost, "www.ladenpreis.net") === false &&
return false; strpos($this->strippedHost, "ncbi.nlm.nih.gov") === false &&
} strpos($this->strippedHost, "www.onenewspage.com") === false) {
$count = $metager->getHostCount($this->strippedHost);
# Abschließend noch 2 Überprüfungen. Einmal den Host filter, der Sicherstellt, dass von jedem Host maximal 3 Links angezeigt werden if ($count >= 3) {
# und dann noch den Dublettefilter, der sicher stellt, dass wir nach Möglichkeit keinen Link doppelt in der Ergebnisliste haben return false;
# Diese Überprüfung führen wir unter bestimmten Bedingungen nicht durch: }
if($metager->getSite() === "" && }
strpos($this->strippedHost, "ncbi.nlm.nih.gov") === false &&
strpos($this->strippedHost, "twitter.com") === false && # Unabhängig davon unser Dublettenfilter:
strpos($this->strippedHost, "www.ladenpreis.net") === false && if ($metager->addLink($this->strippedLink)) {
strpos($this->strippedHost, "ncbi.nlm.nih.gov") === false && $metager->addHostCount($this->strippedHost);
strpos($this->strippedHost, "www.onenewspage.com") === false) return true;
{ } else {
$count = $metager->getHostCount($this->strippedHost); return false;
if($count >= 3) }
{ }
return false;
} private function getStrippedHost($link)
} {
if (strpos($link, "http") !== 0) {
# Unabhängig davon unser Dublettenfilter: $link = "http://" . $link;
if($metager->addLink($this->strippedLink)) }
{
$metager->addHostCount($this->strippedHost); $link = @parse_url($link, PHP_URL_HOST);
return true; $link = preg_replace("/^www\./si", "", $link);
}else return $link;
{ }
return false; private function getStrippedLink($link)
} {
} if (strpos($link, "http") !== 0) {
$link = "http://" . $link;
private function getStrippedHost ($link) }
{
if(strpos($link, "http") !== 0) $host = $this->strippedHost;
$link = "http://" . $link; $path = @parse_url($link, PHP_URL_PATH);
$link = @parse_url($link, PHP_URL_HOST); return $host . $path;
$link = preg_replace("/^www\./si", "", $link); }
return $link;
} private function getStrippedDomain($link)
private function getStrippedLink ($link) {
{ if (preg_match("/([^\.]*\.[^\.]*)$/si", $link, $match)) {
if(strpos($link, "http") !== 0) return $match[1];
$link = "http://" . $link; } else {
$host = $this->strippedHost; return $link;
$path = @parse_url($link , PHP_URL_PATH); }
return $host . $path; }
}
private function generateProxyLink($link)
private function getStrippedDomain ($link) {
{ if (!$link) {
if(preg_match("/([^\.]*\.[^\.]*)$/si", $link, $match)) return "";
{ }
return $match[1];
}else $tmp = $link;
{ $tmp = preg_replace("/\r?\n$/s", "", $tmp);
return $link; $tmp = preg_replace("#^([\w+.-]+)://#s", "$1/", $tmp);
} return "https://proxy.suma-ev.de/cgi-bin/nph-proxy.cgi/en/I0/" . $tmp;
}
}
private function generateProxyLink ($link)
{
if(!$link)
return "";
$tmp = $link;
$tmp = preg_replace("/\r?\n$/s", "", $tmp);
$tmp = preg_replace("#^([\w+.-]+)://#s", "$1/", $tmp);
return "https://proxy.suma-ev.de/cgi-bin/nph-proxy.cgi/en/I0/" . $tmp;
}
} }
<?php <?php
namespace App\Models; namespace App\Models;
use App\Jobs\Search;
use App\MetaGer; use App\MetaGer;
use Cache;
use Illuminate\Foundation\Bus\DispatchesJobs;
use Log; use Log;
use Redis; use Redis;
use App\Jobs\Search;
use Illuminate\Foundation\Bus\DispatchesJobs;
use Cache;
abstract class Searchengine abstract class Searchengine
{ {
use DispatchesJobs; use DispatchesJobs;
protected $ch; # Curl Handle zum erhalten der Ergebnisse protected $ch; # Curl Handle zum erhalten der Ergebnisse
public $fp; public $fp;
protected $getString = ""; protected $getString = "";
protected $engine; protected $engine;
protected $counter = 0; protected $counter = 0;
protected $socketNumber = null; protected $socketNumber = null;
public $enabled = true; public $enabled = true;
public $results = []; public $results = [];
public $ads = []; public $ads = [];
public $write_time = 0; public $write_time = 0;
public $connection_time = 0; public $connection_time = 0;
public $loaded = false; public $loaded = false;
public $cached = false; public $cached = false;
function __construct(\SimpleXMLElement $engine, MetaGer $metager) public function __construct(\SimpleXMLElement $engine, MetaGer $metager)
{ {
foreach($engine->attributes() as $key => $value){ foreach ($engine->attributes() as $key => $value) {
$this->$key = $value->__toString(); $this->$key = $value->__toString();
} }
if( !isset($this->homepage) ) if (!isset($this->homepage)) {
$this->homepage = "https://metager.de"; $this->homepage = "https://metager.de";
$this->engine = $engine; }
if( !isset($this->cacheDuration) ) $this->engine = $engine;
$this->cacheDuration = 60;
if (!isset($this->cacheDuration)) {
# Wir registrieren die Benutzung dieser Suchmaschine $this->cacheDuration = 60;
$this->uses = intval(Redis::hget($this->name, "uses")) + 1; }
Redis::hset($this->name, "uses", $this->uses);
# Wir registrieren die Benutzung dieser Suchmaschine
# Eine Suchmaschine kann automatisch temporär deaktiviert werden, wenn es Verbindungsprobleme gab: $this->uses = intval(Redis::hget($this->name, "uses")) + 1;
if(isset($this->disabled) && strtotime($this->disabled) <= time() ) Redis::hset($this->name, "uses", $this->uses);
{
# In diesem Fall ist der Timeout der Suchmaschine abgelaufen. # Eine Suchmaschine kann automatisch temporär deaktiviert werden, wenn es Verbindungsprobleme gab:
$this->enable($metager->getSumaFile(), "Die Suchmaschine " . $this->name . " wurde wieder eingeschaltet."); if (isset($this->disabled) && strtotime($this->disabled) <= time()) {
}elseif (isset($this->disabled) && strtotime($this->disabled) > time()) # In diesem Fall ist der Timeout der Suchmaschine abgelaufen.
{ $this->enable($metager->getSumaFile(), "Die Suchmaschine " . $this->name . " wurde wieder eingeschaltet.");
$this->enabled = false; } elseif (isset($this->disabled) && strtotime($this->disabled) > time()) {
return; $this->enabled = false;
return;
}
# User-Agent definieren:
$this->useragent = "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:40.0) Gecko/20100101 Firefox/40.1";
$this->ip = $metager->getIp();
$this->gefVon = "<a href=\"" . $this->homepage . "\" target=\"_blank\">" . $this->displayName . "</a>";
$this->startTime = microtime();
$q = "";
if (isset($this->hasSiteSearch) && $this->hasSiteSearch === "1") {
if (strlen($metager->getSite()) === 0) {
$q = $metager->getQ();
} else {
$q = $metager->getQ() . " site:" . $metager->getSite();
}
} else {
$q = $metager->getQ();
}
$this->getString = $this->generateGetString($q, $metager->getUrl(), $metager->getLanguage(), $metager->getCategory());
$this->hash = md5($this->host . $this->getString . $this->port . $this->name);
$this->resultHash = $metager->getHashCode();
if (Cache::has($this->hash)) {
$this->cached = true;
$this->retrieveResults();
} else {
# Die Anfragen an die Suchmaschinen werden nun von der Laravel-Queue bearbeitet:
# Hinweis: solange in der .env der QUEUE_DRIVER auf "sync" gestellt ist, werden die Abfragen
# nacheinander abgeschickt.
# Sollen diese Parallel verarbeitet werden, muss ein anderer QUEUE_DRIVER verwendet werden.
# siehe auch: https://laravel.com/docs/5.2/queues
$this->dispatch(new Search($this->resultHash, $this->host, $this->port, $this->name, $this->getString, $this->useragent, $metager->getSumaFile()));
}
}
abstract public function loadResults($result);
public function rank(\App\MetaGer $metager)
{
foreach ($this->results as $result) {
$result->rank($metager);
}
}
private function setStatistic($key, $val)
{
$oldVal = floatval(Redis::hget($this->name, $key)) * $this->uses;
$newVal = ($oldVal + max