Commit 29a6022f authored by Dominik Hebeler's avatar Dominik Hebeler
Browse files

Datenschutzerklärung erweitert und einige Formatierungen durchgeführt

parent 764b5d00
This diff is collapsed.
......@@ -2,20 +2,17 @@
namespace App\Models;
class Result
{
function __construct ( \SimpleXMLElement $provider, $titel, $link, $anzeigeLink , $descr, $gefVon, $sourceRank, $partnershop = false, $image = "", $price = 0 )
public function __construct(\SimpleXMLElement $provider, $titel, $link, $anzeigeLink, $descr, $gefVon, $sourceRank, $partnershop = false, $image = "", $price = 0)
{
$this->titel = strip_tags(trim($titel));
$this->link = trim($link);
$this->anzeigeLink = trim($anzeigeLink);
$this->descr = strip_tags(trim($descr), '<p>');
$this->descr = preg_replace("/\n+/si", " ", $this->descr);
if( strlen($this->descr) > 250 )
{
if (strlen($this->descr) > 250) {
$this->descr = wordwrap($this->descr, 250);
$this->descr = substr($this->descr, 0, strpos($this->descr, "\n"));
......@@ -23,14 +20,14 @@ class Result
$this->gefVon = trim($gefVon);
$this->proxyLink = $this->generateProxyLink($this->link);
$this->sourceRank = $sourceRank;
if($this->sourceRank <= 0 || $this->sourceRank > 20)
if ($this->sourceRank <= 0 || $this->sourceRank > 20) {
$this->sourceRank = 20;
}
$this->sourceRank = 20 - $this->sourceRank;
if(isset($provider["engineBoost"]))
{
if (isset($provider["engineBoost"])) {
$this->engineBoost = floatval($provider["engineBoost"]->__toString());
}else
{
} else {
$this->engineBoost = 1;
}
......@@ -46,7 +43,7 @@ class Result
#die($this->anzeigeLink . "\r\n" . $this->strippedHost);
}
public function rank (\App\MetaGer $metager)
public function rank(\App\MetaGer $metager)
{
$rank = 0;
......@@ -54,8 +51,7 @@ class Result
#URL-Boost
$link = $this->anzeigeLink;
if(strpos($link, "http") !== 0)
{
if (strpos($link, "http") !== 0) {
$link = "http://" . $link;
}
$link = @parse_url($link, PHP_URL_HOST) . @parse_url($link, PHP_URL_PATH);
......@@ -71,25 +67,23 @@ class Result
"/www\./si",
"/\//si",
"/\./si",
"/-/si"
"/-/si",
];
foreach($regex as $reg)
{
foreach ($regex as $reg) {
$link = preg_replace($regex, "", $link);
$tmpEingabe = preg_replace($regex, "", $tmpEingabe);
}
#die($tmpLi . "<br>" . $link . "<br>" . $tmpEingabe . "<br><br>");
foreach(str_split($tmpEingabe) as $char)
{
if( !$char || !$tmpEingabe || strlen($tmpEingabe) === 0 || strlen($char) === 0 )
foreach (str_split($tmpEingabe) as $char) {
if (!$char || !$tmpEingabe || strlen($tmpEingabe) === 0 || strlen($char) === 0) {
continue;
if(strpos(strtolower($tmpLink), strtolower($char)) >= 0)
{
}
if (strpos(strtolower($tmpLink), strtolower($char)) >= 0) {
$count++;
$tmpLink = str_replace(urlencode($char), "", $tmpLink);
}
if(strlen($this->descr) > 80 && strlen($link) > 0)
{
if (strlen($this->descr) > 80 && strlen($link) > 0) {
#$rank += $count /((strlen($link)) * 60);
}
}
......@@ -104,26 +98,23 @@ class Result
$tmpEingabe = preg_replace("/\b\w{1,3}\b/si", "", $tmpEingabe);
$tmpEingabe = preg_replace("/\s+/si", " ", $tmpEingabe);
#die($tmpEingabe);
foreach(explode(" ", trim($tmpEingabe)) as $el)
{
if( strlen($tmpTitle) === 0 || strlen($el) === 0 || strlen($tmpDescription) === 0 )
foreach (explode(" ", trim($tmpEingabe)) as $el) {
if (strlen($tmpTitle) === 0 || strlen($el) === 0 || strlen($tmpDescription) === 0) {
continue;
}
$el = preg_quote($el, "/");
if(strlen($tmpTitle) > 0)
{
if(preg_match("/\b$el\b/si", $tmpTitle))
{
if (strlen($tmpTitle) > 0) {
if (preg_match("/\b$el\b/si", $tmpTitle)) {
$tmpRank += .7 * .6 * $maxRank;
}elseif (strpos($tmpTitle, $el) !== false) {
} elseif (strpos($tmpTitle, $el) !== false) {
$tmpRank += .3 * .6 * $maxRank;
}
}
if( strlen($tmpDescription) > 0 )
{
if(preg_match("/\b$el\b/si", $tmpDescription))
{
if (strlen($tmpDescription) > 0) {
if (preg_match("/\b$el\b/si", $tmpDescription)) {
$tmpRank += .7 * .4 * $maxRank;
}elseif (strpos($tmpDescription, $el) !== false) {
} elseif (strpos($tmpDescription, $el) !== false) {
$tmpRank += .3 * .4 * $maxRank;
}
}
......@@ -131,123 +122,120 @@ class Result
$tmpRank /= sizeof(explode(" ", trim($tmpEingabe))) * 10;
$rank += $tmpRank;
if($this->engineBoost > 0)
{
if ($this->engineBoost > 0) {
$rank *= floatval($this->engineBoost);
}
$this->rank = $rank;
}
public function getRank ()
public function getRank()
{
return $this->rank;
}
public function isValid (\App\MetaGer $metager)
public function isValid(\App\MetaGer $metager)
{
# Zunächst die persönlich ( über URL-Parameter ) definierten Blacklists:
if(in_array($this->strippedHost, $metager->getUserHostBlacklist())
|| in_array($this->strippedDomain, $metager->getUserDomainBlacklist()))
if (in_array($this->strippedHost, $metager->getUserHostBlacklist())
|| in_array($this->strippedDomain, $metager->getUserDomainBlacklist())) {
return false;
}
# Jetzt unsere URL und Domain Blacklist
if($this->strippedHost !== "" && (in_array($this->strippedHost, $metager->getDomainBlacklist()) || in_array($this->strippedLink, $metager->getUrlBlacklist())))
{
if ($this->strippedHost !== "" && (in_array($this->strippedHost, $metager->getDomainBlacklist()) || in_array($this->strippedLink, $metager->getUrlBlacklist()))) {
return false;
}
# Nun der Eventuelle Sprachfilter
if( $metager->getLang() !== "all" )
{
if ($metager->getLang() !== "all") {
$text = $this->titel . " " . $this->descr;
$path = app_path() . "/Models/lang.pl";
$lang = exec("echo '$text' | $path");
if( $metager->getLang() !== $lang )
if ($metager->getLang() !== $lang) {
return false;
}
}
# Wir wenden die Stoppwortsuche an und schmeißen entsprechende Ergebnisse raus:
foreach($metager->getStopWords() as $stopWord)
{
foreach ($metager->getStopWords() as $stopWord) {
$text = $this->titel . " " . $this->descr;
if(stripos($text, $stopWord) !== false)
{
if (stripos($text, $stopWord) !== false) {
return false;
}
}
# Die Strinsuche:
$text = strtolower($this->titel) . " " . strtolower($this->descr);
foreach($metager->getPhrases() as $phrase)
{
if(strpos($text, $phrase) === FALSE)
foreach ($metager->getPhrases() as $phrase) {
if (strpos($text, $phrase) === false) {
return false;
}
}
# Abschließend noch 2 Überprüfungen. Einmal den Host filter, der Sicherstellt, dass von jedem Host maximal 3 Links angezeigt werden
# und dann noch den Dublettefilter, der sicher stellt, dass wir nach Möglichkeit keinen Link doppelt in der Ergebnisliste haben
# Diese Überprüfung führen wir unter bestimmten Bedingungen nicht durch:
if($metager->getSite() === "" &&
if ($metager->getSite() === "" &&
strpos($this->strippedHost, "ncbi.nlm.nih.gov") === false &&
strpos($this->strippedHost, "twitter.com") === false &&
strpos($this->strippedHost, "www.ladenpreis.net") === false &&
strpos($this->strippedHost, "ncbi.nlm.nih.gov") === false &&
strpos($this->strippedHost, "www.onenewspage.com") === false)
{
strpos($this->strippedHost, "www.onenewspage.com") === false) {
$count = $metager->getHostCount($this->strippedHost);
if($count >= 3)
{
if ($count >= 3) {
return false;
}
}
# Unabhängig davon unser Dublettenfilter:
if($metager->addLink($this->strippedLink))
{
if ($metager->addLink($this->strippedLink)) {
$metager->addHostCount($this->strippedHost);
return true;
}else
{
} else {
return false;
}
}
private function getStrippedHost ($link)
private function getStrippedHost($link)
{
if(strpos($link, "http") !== 0)
if (strpos($link, "http") !== 0) {
$link = "http://" . $link;
}
$link = @parse_url($link, PHP_URL_HOST);
$link = preg_replace("/^www\./si", "", $link);
return $link;
}
private function getStrippedLink ($link)
private function getStrippedLink($link)
{
if(strpos($link, "http") !== 0)
if (strpos($link, "http") !== 0) {
$link = "http://" . $link;
}
$host = $this->strippedHost;
$path = @parse_url($link , PHP_URL_PATH);
$path = @parse_url($link, PHP_URL_PATH);
return $host . $path;
}
private function getStrippedDomain ($link)
{
if(preg_match("/([^\.]*\.[^\.]*)$/si", $link, $match))
private function getStrippedDomain($link)
{
if (preg_match("/([^\.]*\.[^\.]*)$/si", $link, $match)) {
return $match[1];
}else
{
} else {
return $link;
}
}
private function generateProxyLink ($link)
private function generateProxyLink($link)
{
if(!$link)
if (!$link) {
return "";
}
$tmp = $link;
$tmp = preg_replace("/\r?\n$/s", "", $tmp);
$tmp = preg_replace("#^([\w+.-]+)://#s", "$1/", $tmp);
......
<?php
namespace App\Models;
use App\Jobs\Search;
use App\MetaGer;
use Cache;
use Illuminate\Foundation\Bus\DispatchesJobs;
use Log;
use Redis;
use App\Jobs\Search;
use Illuminate\Foundation\Bus\DispatchesJobs;
use Cache;
abstract class Searchengine
{
......@@ -27,29 +27,30 @@ abstract class Searchengine
public $loaded = false;
public $cached = false;
function __construct(\SimpleXMLElement $engine, MetaGer $metager)
public function __construct(\SimpleXMLElement $engine, MetaGer $metager)
{
foreach($engine->attributes() as $key => $value){
foreach ($engine->attributes() as $key => $value) {
$this->$key = $value->__toString();
}
if( !isset($this->homepage) )
if (!isset($this->homepage)) {
$this->homepage = "https://metager.de";
}
$this->engine = $engine;
if( !isset($this->cacheDuration) )
if (!isset($this->cacheDuration)) {
$this->cacheDuration = 60;
}
# Wir registrieren die Benutzung dieser Suchmaschine
$this->uses = intval(Redis::hget($this->name, "uses")) + 1;
Redis::hset($this->name, "uses", $this->uses);
# Eine Suchmaschine kann automatisch temporär deaktiviert werden, wenn es Verbindungsprobleme gab:
if(isset($this->disabled) && strtotime($this->disabled) <= time() )
{
if (isset($this->disabled) && strtotime($this->disabled) <= time()) {
# In diesem Fall ist der Timeout der Suchmaschine abgelaufen.
$this->enable($metager->getSumaFile(), "Die Suchmaschine " . $this->name . " wurde wieder eingeschaltet.");
}elseif (isset($this->disabled) && strtotime($this->disabled) > time())
{
} elseif (isset($this->disabled) && strtotime($this->disabled) > time()) {
$this->enabled = false;
return;
}
......@@ -61,25 +62,23 @@ abstract class Searchengine
$this->startTime = microtime();
$q = "";
if( isset($this->hasSiteSearch) && $this->hasSiteSearch === "1")
{
if(strlen($metager->getSite()) === 0)
if (isset($this->hasSiteSearch) && $this->hasSiteSearch === "1") {
if (strlen($metager->getSite()) === 0) {
$q = $metager->getQ();
else
} else {
$q = $metager->getQ() . " site:" . $metager->getSite();
}else
{
}
} else {
$q = $metager->getQ();
}
$this->getString = $this->generateGetString($q, $metager->getUrl(), $metager->getLanguage(), $metager->getCategory());
$this->hash = md5($this->host . $this->getString . $this->port . $this->name);
$this->resultHash = $metager->getHashCode();
if( Cache::has($this->hash) )
{
if (Cache::has($this->hash)) {
$this->cached = true;
$this->retrieveResults();
}else
{
} else {
# Die Anfragen an die Suchmaschinen werden nun von der Laravel-Queue bearbeitet:
# Hinweis: solange in der .env der QUEUE_DRIVER auf "sync" gestellt ist, werden die Abfragen
# nacheinander abgeschickt.
......@@ -89,20 +88,15 @@ abstract class Searchengine
}
}
public abstract function loadResults($result);
abstract public function loadResults($result);
public function rank (\App\MetaGer $metager)
{
foreach($this->results as $result)
public function rank(\App\MetaGer $metager)
{
foreach ($this->results as $result) {
$result->rank($metager);
}
}
private function setStatistic($key, $val)
{
......@@ -112,8 +106,6 @@ abstract class Searchengine
$this->$key = $newVal;
}
public function enable($sumaFile, $message)
{
Log::info($message);
......@@ -130,38 +122,37 @@ abstract class Searchengine
public function getSocket()
{
$number = Redis::hget('search.' . $this->hash, $this->name);
if( $number === null )
{
if ($number === null) {
die("test");
return null;
}else
{
} else {
return pfsockopen($this->getHost() . ":" . $this->port . "/$number", $this->port, $errstr, $errno, 1);
}
}
public function retrieveResults()
{
if( $this->loaded )
if ($this->loaded) {
return true;
}
$body = "";
if( $this->cacheDuration > 0 && Cache::has($this->hash) )
{
if ($this->cacheDuration > 0 && Cache::has($this->hash)) {
$body = Cache::get($this->hash);
}elseif ( Redis::hexists('search.' . $this->resultHash, $this->name) ) {
} elseif (Redis::hexists('search.' . $this->resultHash, $this->name)) {
$body = Redis::hget('search.' . $this->resultHash, $this->name);
if( $this->cacheDuration > 0 )
if ($this->cacheDuration > 0) {
Cache::put($this->hash, $body, $this->cacheDuration);
}
if( $body !== "" )
{
}
if ($body !== "") {
$this->loadResults($body);
$this->loaded = true;
Redis::hdel('search.' . $this->hash, $this->name);
return true;
}else
{
} else {
return false;
}
}
......@@ -174,11 +165,9 @@ abstract class Searchengine
protected function getHost()
{
$return = "";
if( $this->port === "443" )
{
if ($this->port === "443") {
$return .= "tls://";
}else
{
} else {
$return .= "tcp://";
}
$return .= $this->host;
......@@ -195,12 +184,12 @@ abstract class Searchengine
return curl_errno($this->ch);
}
public function addCurlHandle ($mh)
public function addCurlHandle($mh)
{
curl_multi_add_handle($mh, $this->ch);
}
public function removeCurlHandle ($mh)
public function removeCurlHandle($mh)
{
curl_multi_remove_handle($mh, $this->ch);
}
......@@ -210,41 +199,39 @@ abstract class Searchengine
$getString = "";
# Skript:
if(strlen($this->skript) > 0)
if (strlen($this->skript) > 0) {
$getString .= $this->skript;
else
} else {
$getString .= "/";
}
# FormData:
if(strlen($this->formData) > 0)
if (strlen($this->formData) > 0) {
$getString .= "?" . $this->formData;
}
# Wir müssen noch einige Platzhalter in dem GET-String ersetzen:
if( strpos($getString, "<<USERAGENT>>") ){
if (strpos($getString, "<<USERAGENT>>")) {
$getString = str_replace("<<USERAGENT>>", $this->urlEncode($this->useragent), $getString);
}
if( strpos($getString, "<<QUERY>>") )
{
if (strpos($getString, "<<QUERY>>")) {
$getString = str_replace("<<QUERY>>", $this->urlEncode($query), $getString);
}
if( strpos($getString, "<<IP>>") )
{
if (strpos($getString, "<<IP>>")) {
$getString = str_replace("<<IP>>", $this->urlEncode($this->ip), $getString);
}
if( strpos($getString, "<<LANGUAGE>>") )
{
if (strpos($getString, "<<LANGUAGE>>")) {
$getString = str_replace("<<LANGUAGE>>", $this->urlEncode($language), $getString);
}
if( strpos($getString, "<<CATEGORY>>") )
{
if (strpos($getString, "<<CATEGORY>>")) {
$getString = str_replace("<<CATEGORY>>", $this->urlEncode($category), $getString);
}
if( strpos($getString, "<<AFFILDATA>>") )
{
if (strpos($getString, "<<AFFILDATA>>")) {
$getString = str_replace("<<AFFILDATA>>", $this->getOvertureAffilData($url), $getString);
}
return $getString;
......@@ -252,11 +239,9 @@ abstract class Searchengine
protected function urlEncode($string)
{
if(isset($this->inputEncoding))
{
if (isset($this->inputEncoding)) {
return urlencode(mb_convert_encoding($string, $this->inputEncoding));
}else
{
} else {
return urlencode($string);
}
}
......@@ -265,7 +250,7 @@ abstract class Searchengine
{
$affil_data = 'ip=' . $this->ip;
$affil_data .= '&ua=' . $this->useragent;
if ( isset($_SERVER['HTTP_X_FORWARDED_FOR']) ) {
if (isset($_SERVER['HTTP_X_FORWARDED_FOR'])) {
$affil_data .= '&xfip=' . $_SERVER['HTTP_X_FORWARDED_FOR'];
}
$affilDataValue = $this->urlEncode($affil_data);
......@@ -275,7 +260,7 @@ abstract class Searchengine
return "&affilData=" . $affilDataValue . "&serveUrl=" . $serveUrl;
}
public function isEnabled ()
public function isEnabled()
{
return $this->enabled;
}
......
......@@ -8,36 +8,32 @@ use Symfony\Component\DomCrawler\Crawler;
class Allesklar extends Searchengine
{
protected $tds = "";
function __construct (\SimpleXMLElement $engine, \App\MetaGer $metager)
public function __construct(\SimpleXMLElement $engine, \App\MetaGer $metager)
{
parent::__construct($engine, $metager);
}
public function loadResults ($result)
public function loadResults($result)
{
$crawler = new Crawler(utf8_decode($result));
$crawler = $crawler
->filter('table[width=585]')
->reduce(function(Crawler $node, $i) {
if($i < 5)
{
->reduce(function (Crawler $node, $i) {
if ($i < 5) {
return false;
}
});
$this->counter = 0;
$crawler->filter('table')->each(function (Crawler $node, $i)
{
$crawler->filter('table')->each(function (Crawler $node, $i) {
try {