Commit 2c9388a6 authored by Dominik Hebeler's avatar Dominik Hebeler

Merge branch '42-bugs' into 'master'

Ein paar weitere Bugs behoben

Closes #42

See merge request !43
parents e07a39d3 65e534de
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
<?php
namespace App\Models;
class Result
{
function __construct ( \SimpleXMLElement $provider, $titel, $link, $anzeigeLink , $descr, $gefVon, $sourceRank, $partnershop = false )
{
$this->titel = strip_tags(trim($titel));
$this->link = trim($link);
$this->anzeigeLink = trim($anzeigeLink);
$this->descr = strip_tags(trim($descr));
$this->descr = preg_replace("/\n+/si", " ", $this->descr);
$this->gefVon = trim($gefVon);
$this->proxyLink = $this->generateProxyLink($this->link);
$this->sourceRank = $sourceRank;
if($this->sourceRank <= 0 || $this->sourceRank > 20)
$this->sourceRank = 20;
$this->sourceRank = 20 - $this->sourceRank;
if(isset($provider["engineBoost"]))
{
$this->engineBoost = $provider["engineBoost"];
}else
{
$this->engineBoost = 1;
}
$this->valid = true;
$this->host = @parse_url($link, PHP_URL_HOST);
$this->strippedHost = $this->getStrippedHost($this->anzeigeLink);
$this->strippedDomain = $this->getStrippedDomain($this->strippedHost);
$this->strippedLink = $this->getStrippedLink($this->anzeigeLink);
$this->rank = 0;
$this->partnershop = $partnershop;
#die($this->anzeigeLink . "\r\n" . $this->strippedHost);
}
public function rank (\App\MetaGer $metager)
{
$rank = 0;
$rank += ($this->sourceRank * 0.02);
#URL-Boost
$link = $this->anzeigeLink;
if(strpos($link, "http") !== 0)
{
$link = "http://" . $link;
}
$link = @parse_url($link, PHP_URL_HOST) . @parse_url($link, PHP_URL_PATH);
$tmpLi = $link;
$tmpEingabe = $metager->getQ();
$count = 0;
$tmpLink = "";
$regex = [
"/\s+/si",
"/http:/si",
"/https:/si",
"/www\./si",
"/\//si",
"/\./si",
"/-/si"
];
foreach($regex as $reg)
{
$link = preg_replace($regex, "", $link);
$tmpEingabe = preg_replace($regex, "", $tmpEingabe);
}
#die($tmpLi . "<br>" . $link . "<br>" . $tmpEingabe . "<br><br>");
foreach(str_split($tmpEingabe) as $char)
{
if(strpos(strtolower($tmpLink), strtolower($char)) >= 0)
{
$count++;
$tmpLink = str_replace(urlencode($char), "", $tmpLink);
}
if(strlen($this->descr) > 80 && strlen($link) > 0)
{
$rank += $count /((strlen($link)) * 60);
}
}
# Boost für Vorkommen der Suchwörter:
$maxRank = 0.1;
$tmpTitle = $this->titel;
$tmpDescription = $this->descr;
$isWithin = false;
$tmpRank = 0;
$tmpEingabe = $metager->getQ();
$tmpEingabe = preg_replace("/\b\w{1,3}\b/si", "", $tmpEingabe);
$tmpEingabe = preg_replace("/\s+/si", " ", $tmpEingabe);
#die($tmpEingabe);
foreach(explode(" ", trim($tmpEingabe)) as $el)
{
$el = preg_quote($el, "/");
if(preg_match("/\b$el\b/si", $tmpTitle))
{
$tmpRank += .7 * .6 * $maxRank;
}elseif (strpos($tmpTitle, $el) !== false) {
$tmpRank += .3 * .6 * $maxRank;
}
if(preg_match("/\b$el\b/si", $tmpDescription))
{
$tmpRank += .7 * .4 * $maxRank;
}elseif (strpos($tmpDescription, $el) !== false) {
$tmpRank += .3 * .4 * $maxRank;
}
}
$tmpRank /= sizeof(explode(" ", trim($tmpEingabe))) * 10;
$rank += $tmpRank;
if($this->engineBoost > 0)
{
$rank *= floatval($this->engineBoost);
}
$this->rank = $rank;
}
public function getRank ()
{
return $this->rank;
}
public function isValid (\App\MetaGer $metager)
{
# Zunächst die persönlich ( über URL-Parameter ) definierten Blacklists:
if(in_array($this->strippedHost, $metager->getUserHostBlacklist())
|| in_array($this->strippedDomain, $metager->getUserDomainBlacklist()))
return false;
# Jetzt unsere URL und Domain Blacklist
if($this->strippedHost !== "" && (in_array($this->strippedHost, $metager->getDomainBlacklist()) || in_array($this->strippedLink, $metager->getUrlBlacklist())))
{
return false;
}
$text = $this->titel . " " . $this->descr;
if($metager->getLang() !== "all")
{
$result = $metager->getLanguageDetect()->detect($text, 1);
$lang = "";
foreach($result as $key => $value)
{
$lang = $key;
}
if($lang !== "" && $lang !== $metager->getLang())
return false;
}
# Wir wenden die Stoppwortsuche an und schmeißen entsprechende Ergebnisse raus:
foreach($metager->getStopWords() as $stopWord)
{
if(stripos($text, $stopWord) !== false)
{
return false;
}
}
# Abschließend noch 2 Überprüfungen. Einmal den Host filter, der Sicherstellt, dass von jedem Host maximal 3 Links angezeigt werden
# und dann noch den Dublettefilter, der sicher stellt, dass wir nach Möglichkeit keinen Link doppelt in der Ergebnisliste haben
# Diese Überprüfung führen wir unter bestimmten Bedingungen nicht durch:
if($metager->getSite() === "" &&
strpos($this->strippedHost, "ncbi.nlm.nih.gov") === false &&
strpos($this->strippedHost, "twitter.com") === false &&
strpos($this->strippedHost, "www.ladenpreis.net") === false &&
strpos($this->strippedHost, "ncbi.nlm.nih.gov") === false &&
strpos($this->strippedHost, "www.onenewspage.com") === false)
{
$count = $metager->getHostCount($this->strippedHost);
if($count >= 3)
{
return false;
}
}
# Unabhängig davon unser Dublettenfilter:
if($metager->addLink($this->strippedLink))
{
$metager->addHostCount($this->strippedHost);
return true;
}else
{
return false;
}
}
<<<<<<< HEAD
private function getStrippedHost ($link)
=======
private function getStrippedHost (String $link)
>>>>>>> e060ccae7fd0ede5daca4f3bfa267bd1418fde7a
{
if(strpos($link, "http") !== 0)
$link = "http://" . $link;
$link = @parse_url($link, PHP_URL_HOST);
$link = preg_replace("/^www\./si", "", $link);
return $link;
}
<<<<<<< HEAD
private function getStrippedLink ($link)
=======
private function getStrippedLink (String $link)
>>>>>>> e060ccae7fd0ede5daca4f3bfa267bd1418fde7a
{
if(strpos($link, "http") !== 0)
$link = "http://" . $link;
$host = $this->strippedHost;
$path = @parse_url($link , PHP_URL_PATH);
return $host . $path;
}
<<<<<<< HEAD
private function getStrippedDomain ($link)
=======
private function getStrippedDomain (String $link)
>>>>>>> e060ccae7fd0ede5daca4f3bfa267bd1418fde7a
{
if(preg_match("/([^\.]*\.[^\.]*)$/si", $link, $match))
{
return $match[1];
}else
{
return $link;
}
}
<<<<<<< HEAD
private function generateProxyLink ($link)
=======
private function generateProxyLink (String $link)
>>>>>>> e060ccae7fd0ede5daca4f3bfa267bd1418fde7a
{
if(!$link)
return "";
$tmp = $link;
$tmp = preg_replace("/\r?\n$/s", "", $tmp);
$tmp = preg_replace("#^([\w+.-]+)://#s", "$1/", $tmp);
return "https://proxy.suma-ev.de/cgi-bin/nph-proxy.cgi/en/I0/" . $tmp;
}
}
\ No newline at end of file
......@@ -168,7 +168,7 @@ abstract class Searchengine
return $fp;
}
private function setStatistic($key, float $val)
private function setStatistic($key, $val)
{
$oldVal = floatval(Redis::hget($this->name, $key)) * $this->uses;
......@@ -263,7 +263,7 @@ abstract class Searchengine
#exit;
}
private function readBody(int $length)
private function readBody($length)
{
$theData = '';
$done = false;
......
This diff is collapsed.
<?php
namespace App\Models\parserSkripte;
use App\Models\Searchengine;
use Symfony\Component\DomCrawler\Crawler;
class Allesklar extends Searchengine
{
protected $tds = "";
function __construct (\SimpleXMLElement $engine, \App\MetaGer $metager)
{
parent::__construct($engine, $metager);
}
<<<<<<< HEAD
public function loadResults ($result)
=======
public function loadResults (String $result)
>>>>>>> e060ccae7fd0ede5daca4f3bfa267bd1418fde7a
{
$crawler = new Crawler(utf8_decode($result));
$crawler = $crawler
->filter('table[width=585]')
->reduce(function(Crawler $node, $i) {
if($i < 5)
{
return false;
}
});
$this->counter = 0;
$crawler->filter('table')->each(function (Crawler $node, $i)
{
try {
$this->string = "";
$titleTag = $node->filter('tr > td > a')->first();
$title = trim($titleTag->filter('a')->text());
$link = $titleTag->filter('a')->attr('href');
if($i === 0)
{
$descr = trim($node->filter('tr > td.bodytext')->eq(3)->text());
}else
{
$descr = trim($node->filter('tr > td.bodytext')->eq(2)->text());
}
$this->counter++;
$this->results[] = new \App\Models\Result(
$this->engine,
$title,
$link,
$link,
$descr,
$this->gefVon,
$this->counter
);
} catch (\InvalidArgumentException $e)
{
}
});
}
}
\ No newline at end of file
<?php
namespace app\Models\parserSkripte;
use App\Models\Searchengine;
class BASE extends Searchengine
{
public $results = [];
function __construct (\SimpleXMLElement $engine,\App\MetaGer $metager)
{
parent::__construct($engine, $metager);
}
<<<<<<< HEAD
public function loadResults ($result)
=======
public function loadResults (String $result)
>>>>>>> e060ccae7fd0ede5daca4f3bfa267bd1418fde7a
{
$title = "";
$link = "";
$anzeigeLink = $link;
$descr = "";
#die($result);
/*$this->counter++;
$this->results[] = new \App\Models\Result(
$title,
$link,
$anzeigeLink,
$descr,
$this->gefVon,
$this->counter
);*/
}
}
\ No newline at end of file
<?php
namespace app\Models\parserSkripte;
use App\Models\Searchengine;
use Symfony\Component\DomCrawler\Crawler;
class Bing extends Searchengine
{
public $results = [];
function __construct (\SimpleXMLElement $engine, \App\MetaGer $metager)
{
parent::__construct($engine, $metager);
}
<<<<<<< HEAD
public function loadResults ($result)
=======
public function loadResults (String $result)
>>>>>>> e060ccae7fd0ede5daca4f3bfa267bd1418fde7a
{
$crawler = new Crawler($result);
$crawler->filter('ol#b_results > li.b_algo')->each(function (Crawler $node, $i)
{
$title = $node->filter('li h2 > a')->text();
$link = $node->filter('li h2 > a')->attr('href');
$anzeigeLink = $link;
$descr = $node->filter('li div > p')->text();
#die($result);
$this->counter++;
$this->results[] = new \App\Models\Result(
$this->engine,
$title,
$link,
$anzeigeLink,
$descr,
$this->gefVon,
$this->counter
);
} );
}
}
\ No newline at end of file
<?php
namespace app\Models\parserSkripte;
use App\Models\Searchengine;
class Dmoznebel extends Searchengine
{
public $results = [];
function __construct (\SimpleXMLElement $engine, \App\MetaGer $metager)
{
parent::__construct($engine, $metager);
}
<<<<<<< HEAD
public function loadResults ($result)
=======
public function loadResults (String $result)
>>>>>>> e060ccae7fd0ede5daca4f3bfa267bd1418fde7a
{
$title = "";
$link = "";
$anzeigeLink = $link;
$descr = "";
die($result);
$this->counter++;
$this->results[] = new \App\Models\Result(
$this->engine,
$title,
$link,
$anzeigeLink,
$descr,
$this->gefVon,
$this->counter
);
}
}
\ No newline at end of file
<?php
namespace app\Models\parserSkripte;
use App\Models\Searchengine;
class Dmoznebel_int extends Searchengine
{
public $results = [];
function __construct (\SimpleXMLElement $engine, \App\MetaGer $metager)
{
parent::__construct($engine, $metager);
}
<<<<<<< HEAD
public function loadResults ($result)
=======
public function loadResults (String $result)
>>>>>>> e060ccae7fd0ede5daca4f3bfa267bd1418fde7a
{
$title = "";
$link = "";
$anzeigeLink = $link;
$descr = "";
die($result);
$this->counter++;
$this->results[] = new \App\Models\Result(
$this->engine,
$title,
$link,
$anzeigeLink,
$descr,
$this->gefVon,
$this->counter
);
}
}
\ No newline at end of file
<?php
namespace app\Models\parserSkripte;
use App\Models\Searchengine;
class Ebay extends Searchengine
{
public $results = [];
function __construct (\SimpleXMLElement $engine, \App\MetaGer $metager)
{
parent::__construct($engine, $metager);
}
<<<<<<< HEAD
public function loadResults ($result)
=======
public function loadResults (String $result)
>>>>>>> e060ccae7fd0ede5daca4f3bfa267bd1418fde7a
{
try {
$content = simplexml_load_string($result);
} catch (\Exception $e) {
abort(500, "$result is not a valid xml string");
}
if(!$content)
{
return;
}
$results = $content->xpath('//rss/channel/item');
$count = 0;
foreach($results as $result)
{
if($count > 10)
break;
$title = $result->{"title"}->__toString();
$link = $result->{"link"}->__toString();
$anzeigeLink = $link;
$descr = strip_tags($result->{"description"}->__toString());
$this->counter++;
$this->results[] = new \App\Models\Result(
$this->engine,
$title,
$link,
$anzeigeLink,
$descr,
$this->gefVon,
$this->counter
);
$count++;
}
}
}
\ No newline at end of file
<?php
namespace app\Models\parserSkripte;
use App\Models\Searchengine;
class Exalead extends Searchengine
{
public $results = [];
function __construct (\SimpleXMLElement $engine, \App\MetaGer $metager)
{
parent::__construct($engine, $metager);
}
<<<<<<< HEAD
public function loadResults ($result)
=======
public function loadResults (String $result)
>>>>>>> e060ccae7fd0ede5daca4f3bfa267bd1418fde7a
{
$title = "";
$link = "";
$anzeigeLink = $link;
$descr = "";
#die($result);
/*$this->counter++;
$this->results[] = new \App\Models\Result(
$title,
$link,
$anzeigeLink,
$descr,
$this->gefVon,
$this->counter
); */
}
}
\ No newline at end of file
<?php
namespace app\Models\parserSkripte;
use App\Models\Searchengine;
class Fastbot extends Searchengine
{
public $results = [];
function __construct (\SimpleXMLElement $engine, \App\MetaGer $metager)
{
parent::__construct($engine, $metager);
if ( strpos($this->urlEncode($metager->getEingabe()), "%") !== FALSE )
{
$this->enabled = false;
return null;
}
}
<<<<<<< HEAD
public function loadResults ($result)
=======
public function loadResults (String $result)
>>>>>>> e060ccae7fd0ede5daca4f3bfa267bd1418fde7a
{
$result = utf8_encode($result);
$counter = 0;
foreach( explode("\n", $result) as $line )
{
$line = trim($line);
if( strlen($line) > 0 ){
# Hier bekommen wir jedes einzelne Ergebnis
$result = explode("|:|", $line);
$link = $result[1];
$link = substr($link, strpos($link, "href=\"") + 6);
$link = substr($link, 0, strpos($link, "\""));
$counter++;
$this->results[] = new \App\Models\Result(
$this->engine,
trim(strip_tags($result[1])),
$link,
$result[3],
$result[2],
$this->gefVon,
$counter
);
}
}
}
}
\ No newline at end of file
<?php
namespace app\Models\parserSkripte;
use App\Models\Searchengine;
class Goyax extends Searchengine
{
public $results = [];
function __construct (\SimpleXMLElement $engine, \App\MetaGer $metager)
{
parent::__construct($engine, $metager);
}
<<<<<<< HEAD
public function loadResults ($result)
=======
public function loadResults (String $result)
>>>>>>> e060ccae7fd0ede5daca4f3bfa267bd1418fde7a
{
die($result);
$results = trim($result);
foreach( explode("\n", $results) as $result )
{
$res = explode("|", $result);
if(sizeof($res) < 3)
{
continue;
}
$title = $res[0];