RequestFetcher.php 7.72 KB
Newer Older
1 2 3 4 5 6
<?php

namespace App\Console\Commands;

use Illuminate\Console\Command;
use Illuminate\Support\Facades\Redis;
Dominik Hebeler's avatar
Dominik Hebeler committed
7
use Log;
8

Dominik Hebeler's avatar
Dominik Hebeler committed
9
class RequestFetcher extends Command
10 11 12 13 14 15
{
    /**
     * The name and signature of the console command.
     *
     * @var string
     */
Dominik Hebeler's avatar
Dominik Hebeler committed
16
    protected $signature = 'requests:fetcher';
17 18 19 20 21 22

    /**
     * The console command description.
     *
     * @var string
     */
Dominik Hebeler's avatar
Dominik Hebeler committed
23
    protected $description = 'This commands fetches requests to the installed search engines';
24 25

    protected $shouldRun = true;
Dominik Hebeler's avatar
Dominik Hebeler committed
26
    protected $multicurl = null;
27
    protected $oldMultiCurl = null;
28
    protected $maxFetchedDocuments = 10000;
29
    protected $fetchedDocuments = 0;
Dominik Hebeler's avatar
Dominik Hebeler committed
30
    protected $proxyhost, $proxyuser, $proxypassword;
31 32 33 34 35 36 37 38 39

    /**
     * Create a new command instance.
     *
     * @return void
     */
    public function __construct()
    {
        parent::__construct();
Dominik Hebeler's avatar
Dominik Hebeler committed
40 41 42 43 44 45
        $this->multicurl = curl_multi_init();
        $this->proxyhost = env("PROXY_HOST", "");
        $this->proxyport = env("PROXY_PORT", "");
        $this->proxyuser = env("PROXY_USER", "");
        $this->proxypassword = env("PROXY_PASSWORD", "");

46 47 48 49 50 51 52 53 54
    }

    /**
     * Execute the console command.
     *
     * @return mixed
     */
    public function handle()
    {
55

56
        $pidFile = "/tmp/fetcher";
57 58 59
        pcntl_signal(SIGINT, [$this, "sig_handler"]);
        pcntl_signal(SIGTERM, [$this, "sig_handler"]);
        pcntl_signal(SIGHUP, [$this, "sig_handler"]);
60

61 62 63 64 65 66 67 68 69 70 71 72 73 74
        // Redis might not be available now
        for ($count = 0; $count < 10; $count++) {
            try {
                Redis::connection();
                break;
            } catch (\Predis\Connection\ConnectionException $e) {
                if ($count >= 9) {
                    // If its not available after 10 seconds we will exit
                    return;
                }
                sleep(1);
            }
        }

75 76 77 78 79 80
        touch($pidFile);

        if (!file_exists($pidFile)) {
            return;
        }

81
        try {
Dominik Hebeler's avatar
Dominik Hebeler committed
82
            $blocking = false;
83
            while ($this->shouldRun) {
Dominik Hebeler's avatar
Dominik Hebeler committed
84
                $status = curl_multi_exec($this->multicurl, $active);
85
                $currentJobs = [];
Dominik Hebeler's avatar
Dominik Hebeler committed
86
                if (!$blocking) {
87 88 89 90 91
                    $elements = Redis::pipeline(function($redis){
                        $redis->lrange(\App\MetaGer::FETCHQUEUE_KEY, 0, -1);
                        $redis->del(\App\MetaGer::FETCHQUEUE_KEY);
                    });
                    $currentJobs = $elements[0];
Dominik Hebeler's avatar
Dominik Hebeler committed
92
                } else {
93
                    $currentJob = Redis::blpop(\App\MetaGer::FETCHQUEUE_KEY, 1);
Dominik Hebeler's avatar
Dominik Hebeler committed
94
                    if (!empty($currentJob)) {
95
                        $currentJobs[] = $currentJob[1];
96
                    }
Dominik Hebeler's avatar
Dominik Hebeler committed
97 98
                }

99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116
                if (sizeof($currentJobs) > 0) {
                    foreach($currentJobs as $currentJob){
                        $currentJob = json_decode($currentJob, true);
                        $ch = $this->getCurlHandle($currentJob);
                        if (curl_multi_add_handle($this->multicurl, $ch) !== 0) {
                            $this->shouldRun = false;
                            Log::error("Couldn't add Handle to multicurl");
                            break;
                        }
                        $this->fetchedDocuments++;
                        if ($this->fetchedDocuments > $this->maxFetchedDocuments) {
                            Log::info("Reinitializing Multicurl after " . $this->fetchedDocuments . " requests.");
                            $this->oldMultiCurl = $this->multicurl;
                            $this->multicurl = curl_multi_init();
                            $this->fetchedDocuments = 0;
                        }
                        $blocking = false;
                        $active = true;
117
                    }
Dominik Hebeler's avatar
Dominik Hebeler committed
118 119
                }

120 121 122
                $answerRead = $this->readMultiCurl($this->multicurl);
                if ($this->oldMultiCurl != null) {
                    $this->readMultiCurl($this->oldMultiCurl);
123 124 125 126 127 128 129 130
                    $messagesLeft = -1;
                    if (curl_multi_info_read($this->oldMultiCurl, $messagesLeft) === false) {
                        if ($messagesLeft = 0) {
                            Log::debug("Removing finished multicurl handle");
                            curl_multi_close($this->oldMultiCurl);
                            $this->oldMultiCurl = null;
                        }
                    }
131
                }
132

Dominik Hebeler's avatar
Dominik Hebeler committed
133 134
                if (!$active && !$answerRead) {
                    $blocking = true;
135 136
                } else {
                    usleep(50 * 1000);
137 138 139
                }
            }
        } finally {
140
            unlink($pidFile);
Dominik Hebeler's avatar
Dominik Hebeler committed
141
            curl_multi_close($this->multicurl);
142 143 144
        }
    }

145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179
    private function readMultiCurl($mc)
    {
        $answerRead = false;
        while (($info = curl_multi_info_read($mc)) !== false) {
            try {
                $answerRead = true;
                $infos = curl_getinfo($info["handle"], CURLINFO_PRIVATE);
                $infos = explode(";", $infos);
                $resulthash = $infos[0];
                $cacheDurationMinutes = intval($infos[1]);
                $responseCode = curl_getinfo($info["handle"], CURLINFO_HTTP_CODE);
                $body = "";

                $error = curl_error($info["handle"]);
                if (!empty($error)) {
                    Log::error($error);
                }

                if ($responseCode !== 200) {
                    Log::debug("Got responsecode " . $responseCode . " fetching \"" . curl_getinfo($info["handle"], CURLINFO_EFFECTIVE_URL) . "\n");
                } else {
                    $body = \curl_multi_getcontent($info["handle"]);
                }

                Redis::pipeline(function ($pipe) use ($resulthash, $body, $cacheDurationMinutes) {
                    $pipe->set($resulthash, $body);
                    $pipe->expire($resulthash, 60);
                });
            } finally {
                \curl_multi_remove_handle($mc, $info["handle"]);
            }
        }
        return $answerRead;
    }

Dominik Hebeler's avatar
Dominik Hebeler committed
180
    private function getCurlHandle($job)
181
    {
Dominik Hebeler's avatar
Dominik Hebeler committed
182 183 184 185 186 187 188 189
        $ch = curl_init();

        curl_setopt_array($ch, array(
            CURLOPT_URL => $job["url"],
            CURLOPT_PRIVATE => $job["resulthash"] . ";" . $job["cacheDuration"],
            CURLOPT_RETURNTRANSFER => 1,
            CURLOPT_USERAGENT => "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:40.0) Gecko/20100101 Firefox/40.1",
            CURLOPT_FOLLOWLOCATION => true,
Dominik Hebeler's avatar
Dominik Hebeler committed
190
            CURLOPT_CONNECTTIMEOUT => 2,
Dominik Hebeler's avatar
Dominik Hebeler committed
191
            CURLOPT_MAXCONNECTS => 500,
Dominik Hebeler's avatar
Dominik Hebeler committed
192
            CURLOPT_LOW_SPEED_LIMIT => 50000,
Dominik Hebeler's avatar
Dominik Hebeler committed
193 194
            CURLOPT_LOW_SPEED_TIME => 5,
            CURLOPT_TIMEOUT => 7,
Dominik Hebeler's avatar
Dominik Hebeler committed
195 196 197 198 199 200
        ));

        if (!empty($this->proxyhost) && !empty($this->proxyport) && !empty($this->proxyuser) && !empty($this->proxypassword)) {
            curl_setopt($ch, CURLOPT_PROXY, $this->proxyhost);
            curl_setopt($ch, CURLOPT_PROXYUSERPWD, $this->proxyuser . ":" . $this->proxypassword);
            curl_setopt($ch, CURLOPT_PROXYPORT, $this->proxyport);
201
            curl_setopt($ch, CURLOPT_PROXYTYPE, CURLPROXY_HTTP);
Dominik Hebeler's avatar
Dominik Hebeler committed
202 203 204 205 206 207
        }

        if (!empty($job["username"]) && !empty($job["password"])) {
            curl_setopt($ch, CURLOPT_USERPWD, $job["username"] . ":" . $job["password"]);
        }

Dominik Hebeler's avatar
Dominik Hebeler committed
208
        if (!empty($job["headers"]) && sizeof($job["headers"]) > 0) {
Dominik Hebeler's avatar
Dominik Hebeler committed
209 210 211 212 213 214 215
            $headers = [];
            foreach ($job["headers"] as $key => $value) {
                $headers[] = $key . ":" . $value;
            }
            # Headers are in the Form:
            # <key>:<value>;<key>:<value>
            curl_setopt($ch, CURLOPT_HTTPHEADER, $headers);
216
        }
Dominik Hebeler's avatar
Dominik Hebeler committed
217 218

        return $ch;
219 220
    }

Dominik Hebeler's avatar
Dominik Hebeler committed
221
    public function sig_handler($sig)
222 223 224 225 226 227
    {
        $this->shouldRun = false;
        echo ("Terminating Process\n");
    }

}