RequestFetcher.php 6.77 KB
Newer Older
1
2
3
4
5
6
<?php

namespace App\Console\Commands;

use Illuminate\Console\Command;
use Illuminate\Support\Facades\Redis;
Dominik Hebeler's avatar
Dominik Hebeler committed
7
use Log;
8

Dominik Hebeler's avatar
Dominik Hebeler committed
9
class RequestFetcher extends Command
10
11
12
13
14
15
{
    /**
     * The name and signature of the console command.
     *
     * @var string
     */
Dominik Hebeler's avatar
Dominik Hebeler committed
16
    protected $signature = 'requests:fetcher';
17
18
19
20
21
22

    /**
     * The console command description.
     *
     * @var string
     */
Dominik Hebeler's avatar
Dominik Hebeler committed
23
    protected $description = 'This commands fetches requests to the installed search engines';
24
25

    protected $shouldRun = true;
Dominik Hebeler's avatar
Dominik Hebeler committed
26
    protected $multicurl = null;
27
28
29
    protected $oldMultiCurl = null;
    protected $maxFetchedDocuments = 1000;
    protected $fetchedDocuments = 0;
Dominik Hebeler's avatar
Dominik Hebeler committed
30
    protected $proxyhost, $proxyuser, $proxypassword;
31
32
33
34
35
36
37
38
39

    /**
     * Create a new command instance.
     *
     * @return void
     */
    public function __construct()
    {
        parent::__construct();
Dominik Hebeler's avatar
Dominik Hebeler committed
40
41
42
43
44
45
        $this->multicurl = curl_multi_init();
        $this->proxyhost = env("PROXY_HOST", "");
        $this->proxyport = env("PROXY_PORT", "");
        $this->proxyuser = env("PROXY_USER", "");
        $this->proxypassword = env("PROXY_PASSWORD", "");

46
47
48
49
50
51
52
53
54
    }

    /**
     * Execute the console command.
     *
     * @return mixed
     */
    public function handle()
    {
55

56
        $pidFile = "/tmp/fetcher";
57
58
59
        pcntl_signal(SIGINT, [$this, "sig_handler"]);
        pcntl_signal(SIGTERM, [$this, "sig_handler"]);
        pcntl_signal(SIGHUP, [$this, "sig_handler"]);
60

61
62
63
64
65
66
67
68
69
70
71
72
73
74
        // Redis might not be available now
        for ($count = 0; $count < 10; $count++) {
            try {
                Redis::connection();
                break;
            } catch (\Predis\Connection\ConnectionException $e) {
                if ($count >= 9) {
                    // If its not available after 10 seconds we will exit
                    return;
                }
                sleep(1);
            }
        }

75
76
77
78
79
80
        touch($pidFile);

        if (!file_exists($pidFile)) {
            return;
        }

81
        try {
Dominik Hebeler's avatar
Dominik Hebeler committed
82
            $blocking = false;
83
            while ($this->shouldRun) {
Dominik Hebeler's avatar
Dominik Hebeler committed
84
85
86
                $status = curl_multi_exec($this->multicurl, $active);
                $currentJob = null;
                if (!$blocking) {
87
                    $currentJob = Redis::lpop(\App\MetaGer::FETCHQUEUE_KEY);
Dominik Hebeler's avatar
Dominik Hebeler committed
88
                } else {
89
                    $currentJob = Redis::blpop(\App\MetaGer::FETCHQUEUE_KEY, 1);
Dominik Hebeler's avatar
Dominik Hebeler committed
90
91
                    if (!empty($currentJob)) {
                        $currentJob = $currentJob[1];
92
                    }
Dominik Hebeler's avatar
Dominik Hebeler committed
93
94
95
96
97
98
                }

                if (!empty($currentJob)) {
                    $currentJob = json_decode($currentJob, true);
                    $ch = $this->getCurlHandle($currentJob);
                    curl_multi_add_handle($this->multicurl, $ch);
99
100
101
102
103
                    $this->fetchedDocuments++;
                    if ($this->fetchedDocuments > $this->maxFetchedDocuments) {
                        Log::info("Reinitializing Multicurl after " . $this->fetchedDocuments . " requests.");
                        $this->oldMultiCurl = $this->multicurl;
                        $this->multicurl = curl_multi_init();
104
                        $this->fetchedDocuments = 0;
105
                    }
Dominik Hebeler's avatar
Dominik Hebeler committed
106
107
108
109
                    $blocking = false;
                    $active = true;
                }

110
111
112
                $answerRead = $this->readMultiCurl($this->multicurl);
                if ($this->oldMultiCurl != null) {
                    $this->readMultiCurl($this->oldMultiCurl);
113
                }
114

Dominik Hebeler's avatar
Dominik Hebeler committed
115
116
                if (!$active && !$answerRead) {
                    $blocking = true;
117
118
                } else {
                    usleep(50 * 1000);
119
120
121
                }
            }
        } finally {
122
            unlink($pidFile);
Dominik Hebeler's avatar
Dominik Hebeler committed
123
            curl_multi_close($this->multicurl);
124
125
126
        }
    }

127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
    private function readMultiCurl($mc)
    {
        $answerRead = false;
        while (($info = curl_multi_info_read($mc)) !== false) {
            try {
                $answerRead = true;
                $infos = curl_getinfo($info["handle"], CURLINFO_PRIVATE);
                $infos = explode(";", $infos);
                $resulthash = $infos[0];
                $cacheDurationMinutes = intval($infos[1]);
                $responseCode = curl_getinfo($info["handle"], CURLINFO_HTTP_CODE);
                $body = "";

                $error = curl_error($info["handle"]);
                if (!empty($error)) {
                    Log::error($error);
                }

                if ($responseCode !== 200) {
                    Log::debug("Got responsecode " . $responseCode . " fetching \"" . curl_getinfo($info["handle"], CURLINFO_EFFECTIVE_URL) . "\n");
                } else {
                    $body = \curl_multi_getcontent($info["handle"]);
                }

                Redis::pipeline(function ($pipe) use ($resulthash, $body, $cacheDurationMinutes) {
                    $pipe->set($resulthash, $body);
                    $pipe->expire($resulthash, 60);
                });
            } finally {
                \curl_multi_remove_handle($mc, $info["handle"]);
            }
        }
        return $answerRead;
    }

Dominik Hebeler's avatar
Dominik Hebeler committed
162
    private function getCurlHandle($job)
163
    {
Dominik Hebeler's avatar
Dominik Hebeler committed
164
165
166
167
168
169
170
171
        $ch = curl_init();

        curl_setopt_array($ch, array(
            CURLOPT_URL => $job["url"],
            CURLOPT_PRIVATE => $job["resulthash"] . ";" . $job["cacheDuration"],
            CURLOPT_RETURNTRANSFER => 1,
            CURLOPT_USERAGENT => "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:40.0) Gecko/20100101 Firefox/40.1",
            CURLOPT_FOLLOWLOCATION => true,
Dominik Hebeler's avatar
Dominik Hebeler committed
172
            CURLOPT_CONNECTTIMEOUT => 2,
Dominik Hebeler's avatar
Dominik Hebeler committed
173
            CURLOPT_MAXCONNECTS => 500,
Dominik Hebeler's avatar
Dominik Hebeler committed
174
175
176
            CURLOPT_LOW_SPEED_LIMIT => 50000,
            CURLOPT_LOW_SPEED_TIME => 2,
            CURLOPT_TIMEOUT => 3,
Dominik Hebeler's avatar
Dominik Hebeler committed
177
178
179
180
181
182
        ));

        if (!empty($this->proxyhost) && !empty($this->proxyport) && !empty($this->proxyuser) && !empty($this->proxypassword)) {
            curl_setopt($ch, CURLOPT_PROXY, $this->proxyhost);
            curl_setopt($ch, CURLOPT_PROXYUSERPWD, $this->proxyuser . ":" . $this->proxypassword);
            curl_setopt($ch, CURLOPT_PROXYPORT, $this->proxyport);
183
            curl_setopt($ch, CURLOPT_PROXYTYPE, CURLPROXY_HTTP);
Dominik Hebeler's avatar
Dominik Hebeler committed
184
185
186
187
188
189
        }

        if (!empty($job["username"]) && !empty($job["password"])) {
            curl_setopt($ch, CURLOPT_USERPWD, $job["username"] . ":" . $job["password"]);
        }

Dominik Hebeler's avatar
Dominik Hebeler committed
190
        if (!empty($job["headers"]) && sizeof($job["headers"]) > 0) {
Dominik Hebeler's avatar
Dominik Hebeler committed
191
192
193
194
195
196
197
            $headers = [];
            foreach ($job["headers"] as $key => $value) {
                $headers[] = $key . ":" . $value;
            }
            # Headers are in the Form:
            # <key>:<value>;<key>:<value>
            curl_setopt($ch, CURLOPT_HTTPHEADER, $headers);
198
        }
Dominik Hebeler's avatar
Dominik Hebeler committed
199
200

        return $ch;
201
202
    }

Dominik Hebeler's avatar
Dominik Hebeler committed
203
    public function sig_handler($sig)
204
205
206
207
208
209
    {
        $this->shouldRun = false;
        echo ("Terminating Process\n");
    }

}