Commit 59da7150 authored by Dominik Hebeler's avatar Dominik Hebeler

Merge branch 'development' of https://gitlab.metager.de/open-source/MetaGer into development

parents 3770307c 43f2b4c1
......@@ -33,12 +33,11 @@ EXPOSE 80
COPY config/nginx.conf /etc/nginx/nginx.conf
COPY config/nginx-default.conf /etc/nginx/conf.d/default.conf
COPY . /html
COPY --chown=root:nginx . /html
RUN chmod -R g+w storage bootstrap/cache
CMD /etc/init.d/cron start && \
/etc/init.d/php7.3-fpm start && \
/etc/init.d/nginx start && \
/etc/init.d/redis-server start && \
chmod -R 0777 /html/storage && \
chmod -R 0777 /html/bootstrap/cache && \
php artisan worker:spawner
su -s /bin/bash -c 'php artisan requests:fetcher' nginx
<?php
namespace App;
use Illuminate\Support\Facades\Redis;
class CacheHelper
{
/**
* MetaGer uses a pretty slow harddrive for the configured cache
* That's why we have some processes running to write cache to disk in parallel
*/
public static function put($key, $value, $timeSeconds)
{
$cacherItem = [
'timeSeconds' => $timeSeconds,
'key' => $key,
'value' => $value,
];
Redis::rpush(\App\Console\Commands\RequestCacher::CACHER_QUEUE, json_encode($cacherItem));
}
}
<?php
namespace App\Console\Commands;
use Illuminate\Console\Command;
class CacheGC extends Command
{
/**
* The name and signature of the console command.
*
* @var string
*/
protected $signature = 'cache:gc';
/**
* The console command description.
*
* @var string
*/
protected $description = 'Cleans up every expired cache File';
/**
* Create a new command instance.
*
* @return void
*/
public function __construct()
{
parent::__construct();
}
/**
* Execute the console command.
*
* @return mixed
*/
public function handle()
{
$cachedir = storage_path('framework/cache');
$lockfile = $cachedir . "/cache.gc";
if (file_exists($lockfile)) {
return;
} else {
touch($lockfile);
}
try {
foreach (new \DirectoryIterator($cachedir) as $fileInfo) {
if ($fileInfo->isDot()) {
continue;
}
$file = $fileInfo->getPathname();
$basename = basename($file);
if (!is_dir($file) && $basename !== "cache.gc" && $basename !== ".gitignore") {
$fp = fopen($file, 'r');
$delete = false;
try {
$time = intval(fread($fp, 10));
if ($time < time()) {
$delete = true;
}
} finally {
fclose($fp);
}
if ($delete) {
unlink($file);
}
} else if (is_dir($file)) {
// Delete Directory if empty
try {
rmdir($file);
} catch (\ErrorException $e) {
}
}
}
} finally {
unlink($lockfile);
}
}
}
<?php
namespace App\Console\Commands;
use Cache;
use Illuminate\Console\Command;
use Illuminate\Support\Facades\Redis;
class RequestCacher extends Command
{
/**
* The name and signature of the console command.
*
* @var string
*/
protected $signature = 'requests:cacher';
const CACHER_QUEUE = 'cacher.queue';
protected $shouldRun = true;
/**
* The console command description.
*
* @var string
*/
protected $description = 'Listens to a buffer of fetched search results and writes them to the filesystem cache.';
/**
* Create a new command instance.
*
* @return void
*/
public function __construct()
{
parent::__construct();
}
/**
* Execute the console command.
*
* @return mixed
*/
public function handle()
{
pcntl_async_signals(true);
pcntl_signal(SIGINT, [$this, "sig_handler"]);
pcntl_signal(SIGTERM, [$this, "sig_handler"]);
pcntl_signal(SIGHUP, [$this, "sig_handler"]);
while ($this->shouldRun) {
$cacheItem = Redis::blpop(self::CACHER_QUEUE, 1);
if (!empty($cacheItem)) {
$cacheItem = json_decode($cacheItem[1], true);
if (empty($cacheItem["body"])) {
$cacheItem["body"] = "no-result";
}
Cache::put($cacheItem["hash"], $cacheItem["body"], now()->addMinutes($cacheItem["cacheDuration"]));
}
}
}
public function sig_handler($sig)
{
$this->shouldRun = false;
echo ("Terminating Cacher Process\n");
}
}
<?php
namespace App\Console\Commands;
use Artisan;
use Illuminate\Console\Command;
use Illuminate\Support\Facades\Redis;
use Log;
class RequestFetcher extends Command
{
/**
* The name and signature of the console command.
*
* @var string
*/
protected $signature = 'requests:fetcher';
/**
* The console command description.
*
* @var string
*/
protected $description = 'This commands fetches requests to the installed search engines';
protected $shouldRun = true;
protected $multicurl = null;
protected $proxyhost, $proxyuser, $proxypassword;
/**
* Create a new command instance.
*
* @return void
*/
public function __construct()
{
parent::__construct();
$this->multicurl = curl_multi_init();
$this->proxyhost = env("PROXY_HOST", "");
$this->proxyport = env("PROXY_PORT", "");
$this->proxyuser = env("PROXY_USER", "");
$this->proxypassword = env("PROXY_PASSWORD", "");
}
/**
* Execute the console command.
*
* @return mixed
*/
public function handle()
{
$pids = [];
$pid = null;
for ($i = 0; $i < 5; $i++) {
$pid = \pcntl_fork();
$pids[] = $pid;
if ($pid === 0) {
break;
}
}
if ($pid === 0) {
Artisan::call('requests:cacher');
exit;
} else {
pcntl_async_signals(true);
pcntl_signal(SIGINT, [$this, "sig_handler"]);
pcntl_signal(SIGTERM, [$this, "sig_handler"]);
pcntl_signal(SIGHUP, [$this, "sig_handler"]);
}
try {
$blocking = false;
while ($this->shouldRun) {
$status = curl_multi_exec($this->multicurl, $active);
$currentJob = null;
if (!$blocking) {
$currentJob = Redis::lpop(\App\MetaGer::FETCHQUEUE_KEY);
} else {
$currentJob = Redis::blpop(\App\MetaGer::FETCHQUEUE_KEY, 1);
if (!empty($currentJob)) {
$currentJob = $currentJob[1];
}
}
if (!empty($currentJob)) {
$currentJob = json_decode($currentJob, true);
$ch = $this->getCurlHandle($currentJob);
curl_multi_add_handle($this->multicurl, $ch);
$blocking = false;
$active = true;
}
$answerRead = false;
while (($info = curl_multi_info_read($this->multicurl)) !== false) {
$answerRead = true;
$infos = curl_getinfo($info["handle"], CURLINFO_PRIVATE);
$infos = explode(";", $infos);
$resulthash = $infos[0];
$cacheDurationMinutes = intval($infos[1]);
$responseCode = curl_getinfo($info["handle"], CURLINFO_HTTP_CODE);
$body = "";
$error = curl_error($info["handle"]);
if (!empty($error)) {
Log::error($error);
}
if ($responseCode !== 200) {
Log::debug("Got responsecode " . $responseCode . " fetching \"" . curl_getinfo($info["handle"], CURLINFO_EFFECTIVE_URL) . "\n");
} else {
$body = \curl_multi_getcontent($info["handle"]);
}
Redis::pipeline(function ($pipe) use ($resulthash, $body, $cacheDurationMinutes) {
$pipe->set($resulthash, $body);
$pipe->expire($resulthash, 60);
$cacherItem = [
'timeSeconds' => $cacheDurationMinutes * 60,
'key' => $resulthash,
'value' => $body,
];
$pipe->rpush(\App\Console\Commands\RequestCacher::CACHER_QUEUE, json_encode($cacherItem));
});
\curl_multi_remove_handle($this->multicurl, $info["handle"]);
}
if (!$active && !$answerRead) {
$blocking = true;
}
}
} finally {
curl_multi_close($this->multicurl);
}
foreach ($pids as $tmppid) {
\pcntl_waitpid($tmppid, $status, WNOHANG);
}
}
private function getCurlHandle($job)
{
$ch = curl_init();
curl_setopt_array($ch, array(
CURLOPT_URL => $job["url"],
CURLOPT_PRIVATE => $job["resulthash"] . ";" . $job["cacheDuration"],
CURLOPT_RETURNTRANSFER => 1,
CURLOPT_USERAGENT => "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:40.0) Gecko/20100101 Firefox/40.1",
CURLOPT_FOLLOWLOCATION => true,
CURLOPT_CONNECTTIMEOUT => 10,
CURLOPT_MAXCONNECTS => 500,
CURLOPT_LOW_SPEED_LIMIT => 500,
CURLOPT_LOW_SPEED_TIME => 5,
CURLOPT_TIMEOUT => 10,
));
if (!empty($this->proxyhost) && !empty($this->proxyport) && !empty($this->proxyuser) && !empty($this->proxypassword)) {
curl_setopt($ch, CURLOPT_PROXY, $this->proxyhost);
curl_setopt($ch, CURLOPT_PROXYUSERPWD, $this->proxyuser . ":" . $this->proxypassword);
curl_setopt($ch, CURLOPT_PROXYPORT, $this->proxyport);
curl_setopt($ch, CURLOPT_PROXYTYPE, CURLPROXY_SOCKS5);
}
if (!empty($job["username"]) && !empty($job["password"])) {
curl_setopt($ch, CURLOPT_USERPWD, $job["username"] . ":" . $job["password"]);
}
if (!empty($job["headers"])) {
$headers = [];
foreach ($job["headers"] as $key => $value) {
$headers[] = $key . ":" . $value;
}
# Headers are in the Form:
# <key>:<value>;<key>:<value>
curl_setopt($ch, CURLOPT_HTTPHEADER, $headers);
}
return $ch;
}
public function sig_handler($sig)
{
$this->shouldRun = false;
echo ("Terminating Process\n");
}
}
<?php
namespace App\Console\Commands;
use Illuminate\Console\Command;
use Illuminate\Support\Facades\Redis;
class WorkerSpawner extends Command
{
/**
* The name and signature of the console command.
*
* @var string
*/
protected $signature = 'worker:spawner';
/**
* The console command description.
*
* @var string
*/
protected $description = 'This command makes sure that enough worker processes are spawned';
protected $shouldRun = true;
protected $processes = [];
/**
* Create a new command instance.
*
* @return void
*/
public function __construct()
{
parent::__construct();
}
/**
* Execute the console command.
*
* @return mixed
*/
public function handle()
{
pcntl_async_signals(true);
pcntl_signal(SIGINT, [$this, "sig_handler"]);
pcntl_signal(SIGTERM, [$this, "sig_handler"]);
pcntl_signal(SIGHUP, [$this, "sig_handler"]);
try {
$counter = 0;
while ($this->shouldRun) {
$counter++;
$counter = $counter % 10;
$length = Redis::llen("queues:default");
if ($length > 0) {
while (true) {
usleep(50 * 1000);
if (Redis::llen("queues:default") !== $length) {
$length = Redis::llen("queues:default");
} else {
break;
}
}
$jobs = Redis::lrange("queues:default", 0, -1);
$length = sizeof($jobs) + 5;
$ids = $this->getJobIds($jobs);
for ($i = 0; $i <= $length; $i++) {
$this->processes[] = $this->spawnWorker();
}
while (sizeof($ids) > 0) {
$jobs = Redis::lrange("queues:default", 0, -1);
$newIds = $this->getJobIds($jobs);
foreach ($ids as $index => $id) {
foreach ($newIds as $newId) {
if ($id === $newId) {
continue 2;
}
}
unset($ids[$index]);
break;
}
}
} else {
usleep(100 * 1000); // Sleep for 100ms
}
if ($counter === 0) {
$newProcs = [];
foreach ($this->processes as $process) {
$infos = proc_get_status($process["process"]);
if (!$infos["running"]) {
fclose($process["pipes"][1]);
proc_close($process["process"]);
} else {
$newProcs[] = $process;
}
}
$this->processes = $newProcs;
}
}
} finally {
foreach ($this->processes as $process) {
fclose($process["pipes"][1]);
proc_close($process["process"]);
}
}
}
private function getJobIds($jobs)
{
$result = [];
foreach ($jobs as $job) {
$result[] = json_decode($job, true)["id"];
}
return $result;
}
private function sig_handler($sig)
{
$this->shouldRun = false;
echo ("Terminating Process\n");
}
private function spawnWorker()
{
$descriptorspec = array(
0 => array("pipe", "r"), // STDIN ist eine Pipe, von der das Child liest
1 => array("pipe", "w"), // STDOUT ist eine Pipe, in die das Child schreibt
2 => array("file", "/tmp/worker-error.txt", "a"), // STDERR ist eine Datei,
// in die geschrieben wird
);
$cwd = getcwd();
$env = array();
$process = proc_open('php artisan queue:work --stop-when-empty --sleep=1', $descriptorspec, $pipes, $cwd, $env);
if (is_resource($process)) {
fclose($pipes[0]);
\stream_set_blocking($pipes[1], 0);
return [
"process" => $process,
"pipes" => $pipes,
"working" => false,
];
}
}
}
......@@ -27,6 +27,7 @@ class Kernel extends ConsoleKernel
{
$schedule->command('requests:gather')->everyFifteenMinutes();
$schedule->command('requests:useragents')->everyFiveMinutes();
$schedule->command('cache:gc')->hourly();
$schedule->call(function () {
DB::table('monthlyrequests')->truncate();
......
......@@ -14,6 +14,7 @@ class MetaGerSearch extends Controller
{
public function search(Request $request, MetaGer $metager)
{
$time = microtime(true);
$spamEntries = [];
if (file_exists(config_path('spam.txt'))) {
$spamEntries = file(config_path('spam.txt'));
......
......@@ -50,7 +50,7 @@ class HumanVerification
# Get all Users of this IP
$users = Cache::get($prefix . "." . $id, []);
$users = $this->removeOldUsers($users);
$users = $this->removeOldUsers($prefix, $users);
$user = [];
if (empty($users[$uid])) {
......@@ -148,10 +148,10 @@ class HumanVerification
// Lock must be acquired within 2 seconds
$userList = Cache::get($prefix . "." . $user["id"], []);
$userList[$user["uid"]] = $user;
Cache::put($prefix . "." . $user["id"], $userList, now()->addWeeks(2));
\App\CacheHelper::put($prefix . "." . $user["id"], $userList, 2 * 7 * 24 * 60 * 60);
}
public function removeOldUsers($userList)
public function removeOldUsers($prefix, $userList)
{
$newUserlist = [];
$now = now();
......@@ -168,10 +168,7 @@ class HumanVerification
}
if ($changed) {
// Lock must be acquired within 2 seconds
Cache::lock($prefix . "." . $user["id"])->block(2, function () {
Cache::put($prefix . "." . $user["id"], $newUserlist, now()->addWeeks(2));
});
\App\CacheHelper::put($prefix . "." . $user["id"], $newUserlist, 2 * 7 * 24 * 60 * 60);
}
return $newUserlist;
......
<?php
namespace App\Jobs;
use Illuminate\Bus\Queueable;
use Illuminate\Contracts\Queue\ShouldQueue;
use Illuminate\Queue\InteractsWithQueue;
use Illuminate\Queue\SerializesModels;
use Illuminate\Support\Facades\Redis;
class Searcher implements ShouldQueue
{
use InteractsWithQueue, Queueable, SerializesModels;
public $tries = 1;
/**
* The number of seconds the job can run before timing out.