Commit ac9abf35 authored by Dominik Hebeler's avatar Dominik Hebeler
Browse files

Scopia eingebaut

parent 6b6a1566
......@@ -3,18 +3,16 @@
namespace App\Jobs;
use Illuminate\Bus\Queueable;
use Illuminate\Queue\SerializesModels;
use Illuminate\Queue\InteractsWithQueue;
use Illuminate\Contracts\Queue\ShouldQueue;
use Illuminate\Foundation\Bus\DispatchesJobs;
use Illuminate\Queue\InteractsWithQueue;
use Illuminate\Queue\SerializesModels;
use Illuminate\Support\Facades\Redis;
use Log;
class Searcher implements ShouldQueue
{
use InteractsWithQueue, Queueable, SerializesModels;
protected $name, $ch, $pid, $counter, $lastTime, $connectionInfo;
protected $name, $ch, $pid, $counter, $lastTime, $connectionInfo, $user, $password;
# Each Searcher will shutdown after a specified time(s) or number of requests
protected $MAX_REQUESTS = 100;
# This value should always be below the retry_after value in config/queue.php
......@@ -34,12 +32,14 @@ class Searcher implements ShouldQueue
* keep-alive requests.
* @return void
*/
public function __construct($name)
public function __construct($name, $user = null, $password = null)
{
$this->name = $name;
$this->pid = getmypid();
$this->recheck = false;
$this->startTime = microtime(true);
$this->user = $user;
$this->password = $password;
// Submit this worker to the Redis System
Redis::expire($this->name, 5);
}
......@@ -53,9 +53,10 @@ class Searcher implements ShouldQueue
{
// This Searches is freshly called so we need to initialize the curl handle $ch
$this->ch = $this->initCurlHandle();
try {
$this->counter = 0; // Counts the number of answered jobs
$time = microtime(true);
while(true){
while (true) {
// Update the expire
Redis::expire($this->name, 5);
Redis::expire($this->name . ".stats", 5);
......@@ -69,7 +70,7 @@ class Searcher implements ShouldQueue
$this->updateStats(microtime(true) - $time);
$this->switchToRunning();
// The mission can be empty when blpop hit the timeout
if(!empty($mission)){
if (!empty($mission)) {
$mission = $mission[1];
$poptime = microtime(true) - $time;
......@@ -97,17 +98,20 @@ class Searcher implements ShouldQueue
// In sync mode every Searcher may only retrieve one result because it would block
// the execution of the remaining code otherwise:
if(getenv("QUEUE_DRIVER") === "sync"
if (getenv("QUEUE_DRIVER") === "sync"
|| $this->counter > $this->MAX_REQUESTS
|| (microtime(true)-$this->startTime) > $this->MAX_TIME){
|| (microtime(true) - $this->startTime) > $this->MAX_TIME) {
break;
}
}
} finally {
// When we reach this point, time has come for this Searcher to retire
$this->shutdown();
}
}
private function switchToRunning(){
private function switchToRunning()
{
/**
* When a Searcher is initially started the redis value for $this->name is set to "locked"
* which effectively will prevent new Searchers of this type to be started. (Value is checked by the MetaGer process which starts the Searchers)
......@@ -119,25 +123,27 @@ class Searcher implements ShouldQueue
* When a search engine needs more time to produce search results than the timeout of the MetaGer process, we won't even bother of spawning
* more and more Searchers because they would just block free worker processes from serving the important engines which will give results in time.
**/
if($this->counter === 3 || getenv("QUEUE_DRIVER") === "sync"){
if ($this->counter === 3 || getenv("QUEUE_DRIVER") === "sync") {
# If the MetaGer process waits longer for the results than this Fetcher will probably need to fetch
# Or if this engine is in the array of important engines which we will always try to serve
Redis::set($this->name, "running");
$this->recheck = false;
}
}
private function updateStats($poptime){
if($this->connectionInfo !== NULL){
private function updateStats($poptime)
{
if ($this->connectionInfo !== null) {
$connectionInfo = base64_encode(json_encode($this->connectionInfo));
Redis::hset($this->name . ".stats", $this->pid, $connectionInfo . ";" . $poptime);
}
}
private function getFetchTime(){
private function getFetchTime()
{
$vals = Redis::hgetall($this->name . ".stats");
if(sizeof($vals) === 0){
if (sizeof($vals) === 0) {
return 0;
}else{
} else {
$totalTime = 0;
foreach ($vals as $pid => $value) {
$time = floatval(json_decode(base64_decode(explode(";", $value)[0]), true)["total_time"]);
......@@ -149,7 +155,8 @@ class Searcher implements ShouldQueue
}
}
private function retrieveUrl($url){
private function retrieveUrl($url)
{
// Set this URL to the Curl handle
curl_setopt($this->ch, CURLOPT_URL, $url);
$result = curl_exec($this->ch);
......@@ -157,36 +164,43 @@ class Searcher implements ShouldQueue
return $result;
}
private function storeResult($result, $poptime, $hashValue){
private function storeResult($result, $poptime, $hashValue)
{
Redis::hset('search.' . $hashValue, $this->name, $result);
// After 60 seconds the results should be read by the MetaGer Process and stored in the Cache instead
Redis::expire('search.' . $hashValue, 60);
$this->lastTime = microtime(true);
}
private function shutdown(){
private function shutdown()
{
Redis::hdel($this->name . ".stats", $this->pid);
if(sizeof(Redis::hgetall($this->name . ".stats")) === 0){
if (sizeof(Redis::hgetall($this->name . ".stats")) === 0) {
Redis::del($this->name);
}
// We should close our curl handle before we do so
curl_close($this->ch);
}
private function initCurlHandle(){
private function initCurlHandle()
{
$ch = curl_init();
curl_setopt_array($ch, array(
CURLOPT_RETURNTRANSFER => 1,
CURLOPT_USERAGENT => "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:40.0) Gecko/20100101 Firefox/40.1",
CURLOPT_FOLLOWLOCATION => TRUE,
CURLOPT_FOLLOWLOCATION => true,
CURLOPT_CONNECTTIMEOUT => 10,
CURLOPT_MAXCONNECTS => 500,
CURLOPT_LOW_SPEED_LIMIT => 500,
CURLOPT_LOW_SPEED_TIME => 5,
CURLOPT_TIMEOUT => 10
CURLOPT_TIMEOUT => 10,
));
if ($this->user !== null && $this->password !== null) {
curl_setopt($ch, CURLOPT_USERPWD, $this->user . ":" . $this->password);
}
return $ch;
}
}
......@@ -114,13 +114,13 @@ abstract class Searchengine
// With <ResultHash> being the Hash Value where the fetcher will store the result.
// and <URL to fetch> being the full URL to the searchengine
$url = "";
if($this->port === "443"){
if ($this->port === "443") {
$url = "https://";
}else{
} else {
$url = "http://";
}
$url .= $this->host;
if($this->port !== 80 && $this->port !== 443){
if ($this->port !== 80 && $this->port !== 443) {
$url .= ":" . $this->port;
}
$url .= $this->getString;
......@@ -150,9 +150,9 @@ abstract class Searchengine
// Each searcher has one entry in it.
// So if it's empty, then we have currently no searcher running and
// of course need to spawn a new one.
if(sizeof($searcherData) === 0){
if (sizeof($searcherData) === 0) {
$needSearcher = true;
}else{
} else {
// There we go:
// There's at least one Fetcher running for this search engine.
// Now we have to check if the current count is enough to fetch all the
......@@ -160,18 +160,18 @@ abstract class Searchengine
// Let's hardcode a minimum of 100ms between every search job.
// First calculate the median of all Times
$median = 0;
foreach($searcherData as $pid => $data){
foreach ($searcherData as $pid => $data) {
$data = explode(";", $data);
$median += floatval($data[1]);
}
$median /= sizeof($searcherData);
if($median < .1){
if ($median < .1) {
$needSearcher = true;
}
}
if($needSearcher && Redis::get($this->name) !== "locked"){
if ($needSearcher && Redis::get($this->name) !== "locked") {
Redis::set($this->name, "locked");
$this->dispatch(new Searcher($this->name));
$this->dispatch(new Searcher($this->name, $this->user, $this->password));
}
}
}
......@@ -203,7 +203,8 @@ abstract class Searchengine
$this->enabled = true;
}
public function setResultHash($hash){
public function setResultHash($hash)
{
$this->resultHash = $hash;
}
......
<?php
namespace app\Models\parserSkripte;
use App\Models\Searchengine;
class Fastbot extends Searchengine
{
public $results = [];
public function __construct(\SimpleXMLElement $engine, \App\MetaGer $metager)
{
parent::__construct($engine, $metager);
if (strpos($this->urlEncode($metager->getEingabe()), "%") !== false) {
$this->enabled = false;
return null;
}
}
public function loadResults($result)
{
$result = utf8_encode($result);
$counter = 0;
foreach (explode("\n", $result) as $line) {
$line = trim($line);
if (strlen($line) > 0) {
# Hier bekommen wir jedes einzelne Ergebnis
$result = explode("|:|", $line);
if(count($result) < 4) continue;
$link = $result[1];
$link = substr($link, strpos($link, "href=\"") + 6);
$link = substr($link, 0, strpos($link, "\""));
$counter++;
$this->gefVon = "<a href=\"" . $this->homepage . "\" target=\"_blank\" rel=\"noopener\">" . $this->displayName . " " . trans('results.redirect') . "</a>";
$this->results[] = new \App\Models\Result(
$this->engine,
trim(strip_tags($result[1])),
$link,
$result[3],
$result[2],
$this->displayName,$this->homepage,
$counter
);
}
}
}
}
<?php
namespace app\Models\parserSkripte;
use App\Models\Searchengine;
use Log;
class Scopia extends Searchengine
{
public $results = [];
public function __construct(\SimpleXMLElement $engine, \App\MetaGer $metager)
{
parent::__construct($engine, $metager);
}
public function loadResults($result)
{
$result = html_entity_decode($result);
$result = str_replace("&", "&amp;", $result);
try {
$content = simplexml_load_string($result);
if (!$content) {
return;
}
$results = $content->xpath('//results/result');
foreach ($results as $result) {
$title = $result->title->__toString();
$link = $result->url->__toString();
$anzeigeLink = $link;
$descr = $result->description->__toString();
$this->counter++;
$this->results[] = new \App\Models\Result(
$this->engine,
$title,
$link,
$anzeigeLink,
$descr,
$this->gefVon,
$this->counter
);
}
} catch (\Exception $e) {
Log::error("A problem occurred parsing results from $this->name:");
Log::error($e->getMessage());
return;
}
}
public function getNext(\App\MetaGer $metager, $result)
{
$result = html_entity_decode($result);
$result = str_replace("&", "&amp;", $result);
try {
$content = simplexml_load_string($result);
} catch (\Exception $e) {
Log::error("A problem occurred parsing results from $this->name:");
Log::error($e->getMessage());
return;
}
if (!$content) {
return;
}
$more = $content->xpath('//results/more')[0]->__toString() === "1" ? true : false;
if ($more) {
$results = $content->xpath('//results/result');
$number = $results[sizeof($results) - 1]->number->__toString();
# Erstellen des neuen Suchmaschinenobjekts und anpassen des GetStrings:
$next = new Scopia(simplexml_load_string($this->engine), $metager);
$next->getString = preg_replace("/\\?s=.*?&/si", "?s=" . $number, $next->getString);
$next->hash = md5($next->host . $next->getString . $next->port . $next->name);
$this->next = $next;
}
}
}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment