Commit 59da7150 authored by Dominik Hebeler's avatar Dominik Hebeler

Merge branch 'development' of https://gitlab.metager.de/open-source/MetaGer into development

parents 3770307c 43f2b4c1
......@@ -33,12 +33,11 @@ EXPOSE 80
COPY config/nginx.conf /etc/nginx/nginx.conf
COPY config/nginx-default.conf /etc/nginx/conf.d/default.conf
COPY . /html
COPY --chown=root:nginx . /html
RUN chmod -R g+w storage bootstrap/cache
CMD /etc/init.d/cron start && \
/etc/init.d/php7.3-fpm start && \
/etc/init.d/nginx start && \
/etc/init.d/redis-server start && \
chmod -R 0777 /html/storage && \
chmod -R 0777 /html/bootstrap/cache && \
php artisan worker:spawner
su -s /bin/bash -c 'php artisan requests:fetcher' nginx
<?php
namespace App;
use Illuminate\Support\Facades\Redis;
class CacheHelper
{
/**
* MetaGer uses a pretty slow harddrive for the configured cache
* That's why we have some processes running to write cache to disk in parallel
*/
public static function put($key, $value, $timeSeconds)
{
$cacherItem = [
'timeSeconds' => $timeSeconds,
'key' => $key,
'value' => $value,
];
Redis::rpush(\App\Console\Commands\RequestCacher::CACHER_QUEUE, json_encode($cacherItem));
}
}
<?php
namespace App\Console\Commands;
use Illuminate\Console\Command;
class CacheGC extends Command
{
/**
* The name and signature of the console command.
*
* @var string
*/
protected $signature = 'cache:gc';
/**
* The console command description.
*
* @var string
*/
protected $description = 'Cleans up every expired cache File';
/**
* Create a new command instance.
*
* @return void
*/
public function __construct()
{
parent::__construct();
}
/**
* Execute the console command.
*
* @return mixed
*/
public function handle()
{
$cachedir = storage_path('framework/cache');
$lockfile = $cachedir . "/cache.gc";
if (file_exists($lockfile)) {
return;
} else {
touch($lockfile);
}
try {
foreach (new \DirectoryIterator($cachedir) as $fileInfo) {
if ($fileInfo->isDot()) {
continue;
}
$file = $fileInfo->getPathname();
$basename = basename($file);
if (!is_dir($file) && $basename !== "cache.gc" && $basename !== ".gitignore") {
$fp = fopen($file, 'r');
$delete = false;
try {
$time = intval(fread($fp, 10));
if ($time < time()) {
$delete = true;
}
} finally {
fclose($fp);
}
if ($delete) {
unlink($file);
}
} else if (is_dir($file)) {
// Delete Directory if empty
try {
rmdir($file);
} catch (\ErrorException $e) {
}
}
}
} finally {
unlink($lockfile);
}
}
}
<?php
namespace App\Console\Commands;
use Cache;
use Illuminate\Console\Command;
use Illuminate\Support\Facades\Redis;
class RequestCacher extends Command
{
/**
* The name and signature of the console command.
*
* @var string
*/
protected $signature = 'requests:cacher';
const CACHER_QUEUE = 'cacher.queue';
protected $shouldRun = true;
/**
* The console command description.
*
* @var string
*/
protected $description = 'Listens to a buffer of fetched search results and writes them to the filesystem cache.';
/**
* Create a new command instance.
*
* @return void
*/
public function __construct()
{
parent::__construct();
}
/**
* Execute the console command.
*
* @return mixed
*/
public function handle()
{
pcntl_async_signals(true);
pcntl_signal(SIGINT, [$this, "sig_handler"]);
pcntl_signal(SIGTERM, [$this, "sig_handler"]);
pcntl_signal(SIGHUP, [$this, "sig_handler"]);
while ($this->shouldRun) {
$cacheItem = Redis::blpop(self::CACHER_QUEUE, 1);
if (!empty($cacheItem)) {
$cacheItem = json_decode($cacheItem[1], true);
if (empty($cacheItem["body"])) {
$cacheItem["body"] = "no-result";
}
Cache::put($cacheItem["hash"], $cacheItem["body"], now()->addMinutes($cacheItem["cacheDuration"]));
}
}
}
public function sig_handler($sig)
{
$this->shouldRun = false;
echo ("Terminating Cacher Process\n");
}
}
<?php
namespace App\Console\Commands;
use Artisan;
use Illuminate\Console\Command;
use Illuminate\Support\Facades\Redis;
use Log;
class RequestFetcher extends Command
{
/**
* The name and signature of the console command.
*
* @var string
*/
protected $signature = 'requests:fetcher';
/**
* The console command description.
*
* @var string
*/
protected $description = 'This commands fetches requests to the installed search engines';
protected $shouldRun = true;
protected $multicurl = null;
protected $proxyhost, $proxyuser, $proxypassword;
/**
* Create a new command instance.
*
* @return void
*/
public function __construct()
{
parent::__construct();
$this->multicurl = curl_multi_init();
$this->proxyhost = env("PROXY_HOST", "");
$this->proxyport = env("PROXY_PORT", "");
$this->proxyuser = env("PROXY_USER", "");
$this->proxypassword = env("PROXY_PASSWORD", "");
}
/**
* Execute the console command.
*
* @return mixed
*/
public function handle()
{
$pids = [];
$pid = null;
for ($i = 0; $i < 5; $i++) {
$pid = \pcntl_fork();
$pids[] = $pid;
if ($pid === 0) {
break;
}
}
if ($pid === 0) {
Artisan::call('requests:cacher');
exit;
} else {
pcntl_async_signals(true);
pcntl_signal(SIGINT, [$this, "sig_handler"]);
pcntl_signal(SIGTERM, [$this, "sig_handler"]);
pcntl_signal(SIGHUP, [$this, "sig_handler"]);
}
try {
$blocking = false;
while ($this->shouldRun) {
$status = curl_multi_exec($this->multicurl, $active);
$currentJob = null;
if (!$blocking) {
$currentJob = Redis::lpop(\App\MetaGer::FETCHQUEUE_KEY);
} else {
$currentJob = Redis::blpop(\App\MetaGer::FETCHQUEUE_KEY, 1);
if (!empty($currentJob)) {
$currentJob = $currentJob[1];
}
}
if (!empty($currentJob)) {
$currentJob = json_decode($currentJob, true);
$ch = $this->getCurlHandle($currentJob);
curl_multi_add_handle($this->multicurl, $ch);
$blocking = false;
$active = true;
}
$answerRead = false;
while (($info = curl_multi_info_read($this->multicurl)) !== false) {
$answerRead = true;
$infos = curl_getinfo($info["handle"], CURLINFO_PRIVATE);
$infos = explode(";", $infos);
$resulthash = $infos[0];
$cacheDurationMinutes = intval($infos[1]);
$responseCode = curl_getinfo($info["handle"], CURLINFO_HTTP_CODE);
$body = "";
$error = curl_error($info["handle"]);
if (!empty($error)) {
Log::error($error);
}
if ($responseCode !== 200) {
Log::debug("Got responsecode " . $responseCode . " fetching \"" . curl_getinfo($info["handle"], CURLINFO_EFFECTIVE_URL) . "\n");
} else {
$body = \curl_multi_getcontent($info["handle"]);
}
Redis::pipeline(function ($pipe) use ($resulthash, $body, $cacheDurationMinutes) {
$pipe->set($resulthash, $body);
$pipe->expire($resulthash, 60);
$cacherItem = [
'timeSeconds' => $cacheDurationMinutes * 60,
'key' => $resulthash,
'value' => $body,
];
$pipe->rpush(\App\Console\Commands\RequestCacher::CACHER_QUEUE, json_encode($cacherItem));
});
\curl_multi_remove_handle($this->multicurl, $info["handle"]);
}
if (!$active && !$answerRead) {
$blocking = true;
}
}
} finally {
curl_multi_close($this->multicurl);
}
foreach ($pids as $tmppid) {
\pcntl_waitpid($tmppid, $status, WNOHANG);
}
}
private function getCurlHandle($job)
{
$ch = curl_init();
curl_setopt_array($ch, array(
CURLOPT_URL => $job["url"],
CURLOPT_PRIVATE => $job["resulthash"] . ";" . $job["cacheDuration"],
CURLOPT_RETURNTRANSFER => 1,
CURLOPT_USERAGENT => "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:40.0) Gecko/20100101 Firefox/40.1",
CURLOPT_FOLLOWLOCATION => true,
CURLOPT_CONNECTTIMEOUT => 10,
CURLOPT_MAXCONNECTS => 500,
CURLOPT_LOW_SPEED_LIMIT => 500,
CURLOPT_LOW_SPEED_TIME => 5,
CURLOPT_TIMEOUT => 10,
));
if (!empty($this->proxyhost) && !empty($this->proxyport) && !empty($this->proxyuser) && !empty($this->proxypassword)) {
curl_setopt($ch, CURLOPT_PROXY, $this->proxyhost);
curl_setopt($ch, CURLOPT_PROXYUSERPWD, $this->proxyuser . ":" . $this->proxypassword);
curl_setopt($ch, CURLOPT_PROXYPORT, $this->proxyport);
curl_setopt($ch, CURLOPT_PROXYTYPE, CURLPROXY_SOCKS5);
}
if (!empty($job["username"]) && !empty($job["password"])) {
curl_setopt($ch, CURLOPT_USERPWD, $job["username"] . ":" . $job["password"]);
}
if (!empty($job["headers"])) {
$headers = [];
foreach ($job["headers"] as $key => $value) {
$headers[] = $key . ":" . $value;
}
# Headers are in the Form:
# <key>:<value>;<key>:<value>
curl_setopt($ch, CURLOPT_HTTPHEADER, $headers);
}
return $ch;
}
public function sig_handler($sig)
{
$this->shouldRun = false;
echo ("Terminating Process\n");
}
}
<?php
namespace App\Console\Commands;
use Illuminate\Console\Command;
use Illuminate\Support\Facades\Redis;
class WorkerSpawner extends Command
{
/**
* The name and signature of the console command.
*
* @var string
*/
protected $signature = 'worker:spawner';
/**
* The console command description.
*
* @var string
*/
protected $description = 'This command makes sure that enough worker processes are spawned';
protected $shouldRun = true;
protected $processes = [];
/**
* Create a new command instance.
*
* @return void
*/
public function __construct()
{
parent::__construct();
}
/**
* Execute the console command.
*
* @return mixed
*/
public function handle()
{
pcntl_async_signals(true);
pcntl_signal(SIGINT, [$this, "sig_handler"]);
pcntl_signal(SIGTERM, [$this, "sig_handler"]);
pcntl_signal(SIGHUP, [$this, "sig_handler"]);
try {
$counter = 0;
while ($this->shouldRun) {
$counter++;
$counter = $counter % 10;
$length = Redis::llen("queues:default");
if ($length > 0) {
while (true) {
usleep(50 * 1000);
if (Redis::llen("queues:default") !== $length) {
$length = Redis::llen("queues:default");
} else {
break;
}
}
$jobs = Redis::lrange("queues:default", 0, -1);
$length = sizeof($jobs) + 5;
$ids = $this->getJobIds($jobs);
for ($i = 0; $i <= $length; $i++) {
$this->processes[] = $this->spawnWorker();
}
while (sizeof($ids) > 0) {
$jobs = Redis::lrange("queues:default", 0, -1);
$newIds = $this->getJobIds($jobs);
foreach ($ids as $index => $id) {
foreach ($newIds as $newId) {
if ($id === $newId) {
continue 2;
}
}
unset($ids[$index]);
break;
}
}
} else {
usleep(100 * 1000); // Sleep for 100ms
}
if ($counter === 0) {
$newProcs = [];
foreach ($this->processes as $process) {
$infos = proc_get_status($process["process"]);
if (!$infos["running"]) {
fclose($process["pipes"][1]);
proc_close($process["process"]);
} else {
$newProcs[] = $process;
}
}
$this->processes = $newProcs;
}
}
} finally {
foreach ($this->processes as $process) {
fclose($process["pipes"][1]);
proc_close($process["process"]);
}
}
}
private function getJobIds($jobs)
{
$result = [];
foreach ($jobs as $job) {
$result[] = json_decode($job, true)["id"];
}
return $result;
}
private function sig_handler($sig)
{
$this->shouldRun = false;
echo ("Terminating Process\n");
}
private function spawnWorker()
{
$descriptorspec = array(
0 => array("pipe", "r"), // STDIN ist eine Pipe, von der das Child liest
1 => array("pipe", "w"), // STDOUT ist eine Pipe, in die das Child schreibt
2 => array("file", "/tmp/worker-error.txt", "a"), // STDERR ist eine Datei,
// in die geschrieben wird
);
$cwd = getcwd();
$env = array();
$process = proc_open('php artisan queue:work --stop-when-empty --sleep=1', $descriptorspec, $pipes, $cwd, $env);
if (is_resource($process)) {
fclose($pipes[0]);
\stream_set_blocking($pipes[1], 0);
return [
"process" => $process,
"pipes" => $pipes,
"working" => false,
];
}
}
}
......@@ -27,6 +27,7 @@ class Kernel extends ConsoleKernel
{
$schedule->command('requests:gather')->everyFifteenMinutes();
$schedule->command('requests:useragents')->everyFiveMinutes();
$schedule->command('cache:gc')->hourly();
$schedule->call(function () {
DB::table('monthlyrequests')->truncate();
......
......@@ -14,6 +14,7 @@ class MetaGerSearch extends Controller
{
public function search(Request $request, MetaGer $metager)
{
$time = microtime(true);
$spamEntries = [];
if (file_exists(config_path('spam.txt'))) {
$spamEntries = file(config_path('spam.txt'));
......
......@@ -50,7 +50,7 @@ class HumanVerification
# Get all Users of this IP
$users = Cache::get($prefix . "." . $id, []);
$users = $this->removeOldUsers($users);
$users = $this->removeOldUsers($prefix, $users);
$user = [];
if (empty($users[$uid])) {
......@@ -148,10 +148,10 @@ class HumanVerification
// Lock must be acquired within 2 seconds
$userList = Cache::get($prefix . "." . $user["id"], []);
$userList[$user["uid"]] = $user;
Cache::put($prefix . "." . $user["id"], $userList, now()->addWeeks(2));
\App\CacheHelper::put($prefix . "." . $user["id"], $userList, 2 * 7 * 24 * 60 * 60);
}
public function removeOldUsers($userList)
public function removeOldUsers($prefix, $userList)
{
$newUserlist = [];
$now = now();
......@@ -168,10 +168,7 @@ class HumanVerification
}
if ($changed) {
// Lock must be acquired within 2 seconds
Cache::lock($prefix . "." . $user["id"])->block(2, function () {
Cache::put($prefix . "." . $user["id"], $newUserlist, now()->addWeeks(2));
});
\App\CacheHelper::put($prefix . "." . $user["id"], $newUserlist, 2 * 7 * 24 * 60 * 60);
}
return $newUserlist;
......
This diff is collapsed.
......@@ -14,6 +14,8 @@ use Predis\Connection\ConnectionException;
class MetaGer
{
const FETCHQUEUE_KEY = "fetcher.queue";
# Einstellungen für die Suche
public $alteredQuery = "";
public $alterationOverrideQuery = "";
......@@ -321,7 +323,7 @@ class MetaGer
'page' => $page,
'engines' => $this->next,
];
Cache::put($this->getSearchUid(), serialize($this->next), 60);
\App\CacheHelper::put($this->getSearchUid(), serialize($this->next), 60 * 60);
} else {
$this->next = [];
}
......@@ -780,13 +782,12 @@ class MetaGer
public function waitForMainResults()
{
$redis = Redis::connection(env('REDIS_RESULT_CONNECTION'));
$engines = $this->engines;
$enginesToWaitFor = [];
$mainEngines = $this->sumaFile->foki->{$this->fokus}->main;
foreach ($mainEngines as $mainEngine) {
foreach ($engines as $engine) {
if (!$engine->cached && $engine->name === $mainEngine) {
if ($engine->name === $mainEngine) {
$enginesToWaitFor[] = $engine;
}
}
......@@ -803,41 +804,38 @@ class MetaGer
}
while (sizeof($enginesToWaitFor) > 0 || ($forceTimeout !== null && (microtime(true) - $timeStart) < $forceTimeout)) {
$newEngine = $redis->blpop($this->redisResultWaitingKey, 1);
if ($newEngine === null || sizeof($newEngine) !== 2) {
continue;
} else {
$newEngine = $newEngine[1];
foreach ($enginesToWaitFor as $index => $engine) {
if ($engine->name === $newEngine) {
unset($enginesToWaitFor[$index]);
break;
}
Log::info(sizeof($enginesToWaitFor) . " " . sizeof($answered) . " " . $enginesToWaitFor[0]->hash);
foreach ($enginesToWaitFor as $index => $engine) {
if (Redis::get($engine->hash) !== null) {
$answered[] = $engine;
unset($enginesToWaitFor[$index]);
break;
}
$answered[] = $newEngine;
}
if ((microtime(true) - $timeStart) >= 2) {
break;
} else {
usleep(50 * 1000);
}
}
# Now we can add an entry to Redis which defines the starting time and how many engines should answer this request
$pipeline = $redis->pipeline();
$pipeline->hset($this->getRedisEngineResult() . "status", "startTime", $timeStart);
$pipeline->hset($this->getRedisEngineResult() . "status", "engineCount", sizeof($engines));
$pipeline->hset($this->getRedisEngineResult() . "status", "engineDelivered", sizeof($answered));
# Add the cached engines as answered
foreach ($engines as $engine) {
if ($engine->cached) {
$pipeline->hincrby($this->getRedisEngineResult() . "status", "engineDelivered", 1);
$pipeline->hincrby($this->getRedisEngineResult() . "status", "engineAnswered", 1);
}
}
foreach ($answered as $engine) {
$pipeline->hset($this->getRedisEngineResult() . $engine, "delivered", "1");
}
$pipeline->execute();
/*
$pipeline = $redis->pipeline();
$pipeline->hset($this->getRedisEngineResult() . "status", "startTime", $timeStart);
$pipeline->hset($this->getRedisEngineResult() . "status", "engineCount", sizeof($engines));
$pipeline->hset($this->getRedisEngineResult() . "status", "engineDelivered", sizeof($answered));
# Add the cached engines as answered
foreach ($engines as $engine) {
if ($engine->cached) {
$pipeline->hincrby($this->getRedisEngineResult() . "status", "engineDelivered", 1);
$pipeline->hincrby($this->getRedisEngineResult() . "status", "engineAnswered", 1);
}
}
foreach ($answered as $engine) {
$pipeline->hset($this->getRedisEngineResult() . $engine, "delivered", "1");
}
$pipeline->execute();*/
}
public function retrieveResults()
......
......@@ -2,7 +2,7 @@
namespace App\Models\Quicktips;
use App\Jobs\Searcher;
use Cache;
use Illuminate\Foundation\Bus\DispatchesJobs;
use Illuminate\Support\Facades\Redis;
use Log;
......@@ -30,41 +30,23 @@ class Quicktips
public function startSearch($search, $locale, $max_time)
{
$url = $this->quicktipUrl . "?search=" . $this->normalize_search($search) . "&locale=" . $locale;
# TODO anders weitergeben
$this->hash = md5($url);
# TODO cache wieder einbauen (eventuell)
if ( /*!Cache::has($hash)*/true) {
$redis = Redis::connection(env('REDIS_RESULT_CONNECTION'));
$redis->hset("search." . $this->hash . ".results." . self::QUICKTIP_NAME, "status", "waiting");
if (!Cache::has($this->hash)) {
// Queue this search
$mission = $this->hash . ";" . base64_encode($url) . ";" . $max_time;
Redis::rpush(self::QUICKTIP_NAME . ".queue", $mission);
// Check the current status of Searchers for QUICKTIP_NAME
$needSearcher = false;
$searcherData = Redis::hgetall(self::QUICKTIP_NAME . ".stats");
// Create additional Searchers for QUICKTIP_NAME if necessary