多接口,可与代理多个连接卷曲 [英] cURL with multi interface for many connections with proxy

查看:1786
本文介绍了多接口,可与代理多个连接卷曲的处理方法,对大家解决问题具有一定的参考价值,需要的朋友们下面随着小编来一起学习吧!

问题描述

我需要从列表检查许多代理违心的网站。我决定使用libcurl的做到这一点。结果
我根据我的需要使用这个例子,修改了它。结果
这里是我的code:

I need to check many proxies from list against one website. I decided to use libcurl to do this.
I used this example and modified it according to my needs.
Here is my code:

#include <cstdio>
#include <cstring>
#include <fstream>
#include <string>
#include <iostream>

#include <curl/curl.h>

/* somewhat unix-specific */ 
#include <sys/time.h>
#include <unistd.h>

using namespace std;

CURL * handles [100];

CURL * createProxyHandle (string proxyData){
    CURL * handle = curl_easy_init ();

    curl_slist * chunk = NULL;
    chunk = curl_slist_append(chunk, "Accept: text/html, application/xml;q=0.9, application/xhtml+xml, image/png, image/webp, image/jpeg, image/gif, image/x-xbitmap, */*;q=0.1");
    chunk = curl_slist_append(chunk, "Accept-Language: ru-RU,ru;q=0.9,en;q=0.8");
    chunk = curl_slist_append(chunk, "Accept-Encoding: gzip, deflate, sdch");

    curl_easy_setopt (handle, CURLOPT_URL, "<site>");
    curl_easy_setopt (handle, CURLOPT_CONNECTTIMEOUT, 40);
    curl_easy_setopt (handle, CURLOPT_TIMEOUT, 50);
    curl_easy_setopt (handle, CURLOPT_FRESH_CONNECT, true);
    curl_easy_setopt (handle, CURLOPT_VERBOSE, true);
    curl_easy_setopt (handle, CURLOPT_FOLLOWLOCATION, true);

    curl_easy_setopt (handle, CURLOPT_ENCODING , "gzip");
    curl_easy_setopt (handle, CURLOPT_PROXY, proxyData.c_str());
    curl_easy_setopt (handle, CURLOPT_HTTPHEADER, chunk);
    curl_easy_setopt (handle, CURLOPT_USERAGENT, "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.65 Safari/537.36");

    return handle;
}

int main(){
    ifstream fin ("data.txt", ifstream::in);
    string proxy;

    CURLM *multi_handle;
    CURLMsg *msg;

    int msgs_left;
    int still_running;



    multi_handle = curl_multi_init();

    while (fin >> proxy){
        cout << "Proxy: " << proxy << endl;
        CURL * handle = createProxyHandle (proxy);
        curl_multi_add_handle(multi_handle, handle);
    }

    curl_multi_perform(multi_handle, &still_running);

do {
    struct timeval timeout;
    int rc; /* select() return code */ 
    CURLMcode mc; /* curl_multi_fdset() return code */ 

    fd_set fdread;
    fd_set fdwrite;
    fd_set fdexcep;
    int maxfd = -1;

    long curl_timeo = -1;

    FD_ZERO(&fdread);
    FD_ZERO(&fdwrite);
    FD_ZERO(&fdexcep);

    /* set a suitable timeout to play around with */ 
    timeout.tv_sec = 1;
    timeout.tv_usec = 0;

    curl_multi_timeout(multi_handle, &curl_timeo);
    if(curl_timeo >= 0) {
      timeout.tv_sec = curl_timeo / 1000;
      if(timeout.tv_sec > 1)
        timeout.tv_sec = 1;
      else
        timeout.tv_usec = (curl_timeo % 1000) * 1000;
    }

    /* get file descriptors from the transfers */ 
    mc = curl_multi_fdset(multi_handle, &fdread, &fdwrite, &fdexcep, &maxfd);

    if(mc != CURLM_OK)
    {
      fprintf(stderr, "curl_multi_fdset() failed, code %d.\n", mc);
      break;
    }

    /* On success the value of maxfd is guaranteed to be >= -1. We call
       select(maxfd + 1, ...); specially in case of (maxfd == -1) there are
       no fds ready yet so we call select(0, ...) --or Sleep() on Windows--
       to sleep 100ms, which is the minimum suggested value in the
       curl_multi_fdset() doc. */ 

    if(maxfd == -1) {
#ifdef _WIN32
      Sleep(100);
      rc = 0;
#else
      /* Portable sleep for platforms other than Windows. */ 
      struct timeval wait = { 0, 100 * 1000 }; /* 100ms */ 
      rc = select(0, NULL, NULL, NULL, &wait);
#endif
    }
    else {
      /* Note that on some platforms 'timeout' may be modified by select().
         If you need access to the original value save a copy beforehand. */ 
      rc = select(maxfd+1, &fdread, &fdwrite, &fdexcep, &timeout);
    }

    switch(rc) {
    case -1:
      /* select error */ 
      break;
    case 0:
    default:
      /* timeout or readable/writable sockets */ 
      curl_multi_perform(multi_handle, &still_running);
      break;
    }
  } while(still_running);

   while ((msg = curl_multi_info_read(multi_handle, &msgs_left))) {
        if (msg->msg == CURLMSG_DONE) {
          printf("Finished with %d\n", msg->data.result);
        }
      }
    cout << "Completed" << endl;
    curl_multi_cleanup(multi_handle);

    return 0;
}

代理服务器是不可靠的,但我在输出中看到:

Proxies are not reliable, but what I see in output:

Proxy: 69.12.64.105:8089
Proxy: 69.12.64.105:7808
Proxy: 210.245.20.170:80
Proxy: 190.74.165.109:8080
Proxy: 39.184.2.111:8123
Proxy: 190.201.166.37:8080
Proxy: 190.36.85.199:8080
Proxy: 92.255.231.54:8080
Proxy: 124.126.126.101:80
Proxy: 43.250.255.65:8080
Proxy: 69.12.64.106:7808
Proxy: 201.217.213.166:8080
Proxy: 178.169.90.188:8888
Proxy: 124.248.205.25:8080
Proxy: 39.190.82.133:8123
Proxy: 190.77.230.36:8080
* Rebuilt URL to: <site>
* Hostname was NOT found in DNS cache
*   Trying 69.12.64.105...
* Rebuilt URL to: <site>
* Hostname was NOT found in DNS cache
*   Trying 69.12.64.105...
* Rebuilt URL to: <site>
* Hostname was NOT found in DNS cache
*   Trying 210.245.20.170...
* Rebuilt URL to: <site>
* Hostname was NOT found in DNS cache
*   Trying 190.74.165.109...
* Rebuilt URL to: <site>
* Hostname was NOT found in DNS cache
*   Trying 39.184.2.111...
* Rebuilt URL to: <site>
* Hostname was NOT found in DNS cache
*   Trying 190.201.166.37...
* Rebuilt URL to: <site>
* Hostname was NOT found in DNS cache
*   Trying 190.36.85.199...
* Rebuilt URL to: <site>
* Hostname was NOT found in DNS cache
*   Trying 92.255.231.54...
* Rebuilt URL to: <site>
* Hostname was NOT found in DNS cache
*   Trying 124.126.126.101...
* Rebuilt URL to: <site>
* Hostname was NOT found in DNS cache
*   Trying 43.250.255.65...
* Rebuilt URL to: <site>
* Hostname was NOT found in DNS cache
*   Trying 69.12.64.106...
* Rebuilt URL to: <site>
* Hostname was NOT found in DNS cache
*   Trying 201.217.213.166...
* Rebuilt URL to: <site>
* Hostname was NOT found in DNS cache
*   Trying 178.169.90.188...
* Rebuilt URL to: <site>
* Hostname was NOT found in DNS cache
*   Trying 124.248.205.25...
* Rebuilt URL to: <site>
* Hostname was NOT found in DNS cache
*   Trying 39.190.82.133...
* Rebuilt URL to: <site>
* Hostname was NOT found in DNS cache
*   Trying 190.77.230.36...
* Connected to 69.12.64.105 (69.12.64.105) port 8089 (#0)
* Establish HTTP proxy tunnel to <site>:443
> CONNECT <site>:443 HTTP/1.1
Host: <site>:443
User-Agent: Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.65 Safari/537.36
Proxy-Connection: Keep-Alive
Accept: text/html, application/xml;q=0.9, application/xhtml+xml, image/png, image/webp, image/jpeg, image/gif, image/x-xbitmap, */*;q=0.1
Accept-Language: ru-RU,ru;q=0.9,en;q=0.8
Accept-Encoding: gzip, deflate, sdch

< HTTP/1.1 503 Service Unavailable
< Server: squid/3.2.13
< Mime-Version: 1.0
< Date: Mon, 20 Apr 2015 23:00:24 GMT
< Content-Type: text/html
< Content-Length: 3694
< X-Squid-Error: ERR_DNS_FAIL 0
< 
* Received HTTP code 503 from proxy after CONNECT
* Connected to 69.12.64.105 (69.12.64.105) port 7808 (#1)
* Establish HTTP proxy tunnel to <site>:443
> CONNECT <site>:443 HTTP/1.1
Host: <site>:443
User-Agent: Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.65 Safari/537.36
Proxy-Connection: Keep-Alive
Accept: text/html, application/xml;q=0.9, application/xhtml+xml, image/png, image/webp, image/jpeg, image/gif, image/x-xbitmap, */*;q=0.1
Accept-Language: ru-RU,ru;q=0.9,en;q=0.8
Accept-Encoding: gzip, deflate, sdch

< HTTP/1.1 503 Service Unavailable
< Server: squid/3.2.13
< Mime-Version: 1.0
< Date: Mon, 20 Apr 2015 23:00:25 GMT
< Content-Type: text/html
< Content-Length: 3694
< X-Squid-Error: ERR_DNS_FAIL 0
< 
* Received HTTP code 503 from proxy after CONNECT
* Connected to 43.250.255.65 (43.250.255.65) port 8080 (#9)
* Establish HTTP proxy tunnel to <site>:443
> CONNECT <site>:443 HTTP/1.1
Host: <site>:443
User-Agent: Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.65 Safari/537.36
Proxy-Connection: Keep-Alive
Accept: text/html, application/xml;q=0.9, application/xhtml+xml, image/png, image/webp, image/jpeg, image/gif, image/x-xbitmap, */*;q=0.1
Accept-Language: ru-RU,ru;q=0.9,en;q=0.8
Accept-Encoding: gzip, deflate, sdch

< HTTP/1.1 200 OK
< 
* Proxy replied OK to CONNECT request
* found 173 certificates in /etc/ssl/certs/ca-certificates.crt
* Connected to 69.12.64.106 (69.12.64.106) port 7808 (#10)
* Establish HTTP proxy tunnel to <site>:443
> CONNECT <site>:443 HTTP/1.1
Host: <site>:443
User-Agent: Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.65 Safari/537.36
Proxy-Connection: Keep-Alive
Accept: text/html, application/xml;q=0.9, application/xhtml+xml, image/png, image/webp, image/jpeg, image/gif, image/x-xbitmap, */*;q=0.1
Accept-Language: ru-RU,ru;q=0.9,en;q=0.8
Accept-Encoding: gzip, deflate, sdch

< HTTP/1.1 200 Connection established
< 
* Proxy replied OK to CONNECT request
* found 173 certificates in /etc/ssl/certs/ca-certificates.crt
* Connected to 190.77.230.36 (190.77.230.36) port 8080 (#15)
* Establish HTTP proxy tunnel to <site>:443
> CONNECT <site>:443 HTTP/1.1
Host: <site>:443
User-Agent: Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.65 Safari/537.36
Proxy-Connection: Keep-Alive
Accept: text/html, application/xml;q=0.9, application/xhtml+xml, image/png, image/webp, image/jpeg, image/gif, image/x-xbitmap, */*;q=0.1
Accept-Language: ru-RU,ru;q=0.9,en;q=0.8
Accept-Encoding: gzip, deflate, sdch

< HTTP/1.0 200 Connection established
< Proxy-agent: tinyproxy/1.8.2
< 
* Proxy replied OK to CONNECT request
* found 173 certificates in /etc/ssl/certs/ca-certificates.crt
* Connected to 39.184.2.111 (39.184.2.111) port 8123 (#4)
* Establish HTTP proxy tunnel to <site>:443
> CONNECT <site>:443 HTTP/1.1
Host: <site>:443
User-Agent: Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.65 Safari/537.36
Proxy-Connection: Keep-Alive
Accept: text/html, application/xml;q=0.9, application/xhtml+xml, image/png, image/webp, image/jpeg, image/gif, image/x-xbitmap, */*;q=0.1
Accept-Language: ru-RU,ru;q=0.9,en;q=0.8
Accept-Encoding: gzip, deflate, sdch

* Proxy CONNECT aborted due to timeout
* Connection time-out
* Closing connection 5
* Connection time-out
* Closing connection 6
* Connection time-out
* Closing connection 7
* Connection time-out
* Closing connection 8
* SSL connection timeout
* Closing connection 9
* SSL connection timeout
* Closing connection 10
* Connection time-out
* Closing connection 11
* Connection time-out
* Closing connection 12
* Connection time-out
* Closing connection 13
* Connection time-out
* Closing connection 14
* SSL connection timeout
* Closing connection 15
* Connection timed out after 50056 milliseconds
* Connection timed out after 50055 milliseconds
Finished with 56
Finished with 56
Finished with 56
Finished with 28
Finished with 28
Finished with 28
Finished with 28
Finished with 28
Finished with 28
Finished with 28
Finished with 28
Finished with 28
Finished with 28
Finished with 28
Finished with 28
Finished with 28
Completed

在某些情况下(真的有很多代理是坏的,但并不是所有在此列表中)从卷曲接收代理的答案,发头,但仅此而已。我测试了这个代理分开,它们是确定。结果
我想不通的是与卷曲多发生的事情。

In some cases (really a lot of proxies are bad, but not all in this list) curl receives answers from proxies, sends headers, but nothing more. I tested this proxies separated and they are OK.
I can not figure what is happening with curl multi.

推荐答案

在此卷曲的文档还有多接口有一些限制。我意识到,我已经使用这个受限制的功能:

In this cURL documentation there are some restrictions for multi interface. I realized that I have used this restricted features:


  • NSS SSL连接

  • HTTP代理连接操作

至于解决这个问题我已经使用卷曲与POSIX线程简单的界面和它工作得很好。这是我的解决方案,只是此例如一些code安全的多线程TLS用法:

As solution to this problem I have used curl easy interface with POSIX threads and it works well. This is my solution, simply this example with some code for safe multithreaded TLS usage:

#define USE_GNUTLS

#include <cstdio>
#include <pthread.h>

#include <curl/curl.h>

/* we have this global to let the callback get easy access to it */
static pthread_mutex_t *lockarray;

#ifdef USE_GNUTLS
#include <gcrypt.h>
#include <errno.h>

GCRY_THREAD_OPTION_PTHREAD_IMPL;

void init_locks(void)
{
  gcry_control(GCRYCTL_SET_THREAD_CBS);
}

#define kill_locks()
#endif

static void *pull_one_url(void *url)
{
    FILE * file = fopen ("/dev/null", "w");

    CURL * handle = curl_easy_init ();

    curl_slist * chunk = NULL;
    chunk = curl_slist_append(chunk, "Accept: text/html, application/xml;q=0.9, application/xhtml+xml, image/png, image/webp, image/jpeg, image/gif, image/x-xbitmap, */*;q=0.1");
    chunk = curl_slist_append(chunk, "Accept-Language: ru-RU,ru;q=0.9,en;q=0.8");
    chunk = curl_slist_append(chunk, "Accept-Encoding: gzip, deflate, sdch");

    curl_easy_setopt (handle, CURLOPT_URL, "https://www.avito.ru");
    curl_easy_setopt (handle, CURLOPT_CONNECTTIMEOUT, 30);
    curl_easy_setopt (handle, CURLOPT_TIMEOUT, 30);
    curl_easy_setopt (handle, CURLOPT_FRESH_CONNECT, true);
    //curl_easy_setopt (handle, CURLOPT_VERBOSE, true);
    curl_easy_setopt (handle, CURLOPT_FOLLOWLOCATION, true);
    curl_easy_setopt (handle, CURLOPT_WRITEDATA, file);
    curl_easy_setopt (handle, CURLOPT_TCP_KEEPALIVE, 0L);

    curl_easy_setopt (handle, CURLOPT_ENCODING , "gzip");
    curl_easy_setopt (handle, CURLOPT_PROXY, (const char*) url);
    curl_easy_setopt (handle, CURLOPT_HTTPHEADER, chunk);
    curl_easy_setopt (handle, CURLOPT_USERAGENT, "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.65 Safari/537.36");

    CURLcode res = curl_easy_perform (handle);
    if (res != CURLE_OK){
        printf ("Proxy %s failed with: %d (%s)\n", (const char*) url, res, curl_easy_strerror (res));
    } else {
        long http_code = 0;
        curl_easy_getinfo (handle, CURLINFO_RESPONSE_CODE, &http_code);

        printf("Proxy %s finished with code: %d\n", (const char*) url, http_code);
    }


    curl_easy_cleanup (handle);

    return NULL;
}

const int NUMT = 21;

const char * urls[] = {
        "69.12.64.105:8089",
        "69.12.64.105:7808",
        "210.245.20.170:80",
        "190.74.165.109:8080",
        "39.184.2.111:8123",
        "190.201.166.37:8080",
        "190.36.85.199:8080",
        "92.255.231.54:8080",
        "124.126.126.101:80",
        "43.250.255.65:8080",
        "69.12.64.106:7808",
        "201.217.213.166:8080",
        "178.169.90.188:8888",
        "124.248.205.25:8080",
        "39.190.82.133:8123",
        "190.77.230.36:8080",
        "201.243.204.230:8080",
        "190.201.58.26:8080",
        "178.166.155.36:8080",
        "183.221.188.66:8123",
        "207.66.105.37:24040",
};


int main(int argc, char **argv)
{
    pthread_t tid[NUMT];
    int i;
    int error;
    (void)argc; /* we don't use any arguments in this example */
    (void)argv;

    /* Must initialize libcurl before any threads are started */
    curl_global_init(CURL_GLOBAL_ALL);

    init_locks();

    for(i=0; i< NUMT; i++) {
        error = pthread_create(&tid[i],
                               NULL, /* default attributes please */
                               pull_one_url,
                               (void *)urls[i]);
        if(0 != error)
            fprintf(stderr, "Couldn't run thread number %d, errno %d\n", i, error);
        else
            fprintf(stderr, "Thread %d, gets %s\n", i, urls[i]);
    }

    /* now wait for all threads to terminate */
    for(i=0; i< NUMT; i++) {
        error = pthread_join(tid[i], NULL);
        fprintf(stderr, "Thread %d terminated\n", i);
    }

    kill_locks();

    return 0;
}

这篇关于多接口,可与代理多个连接卷曲的文章就介绍到这了,希望我们推荐的答案对大家有所帮助,也希望大家多多支持IT屋!

查看全文
登录 关闭
扫码关注1秒登录
发送“验证码”获取 | 15天全站免登陆