C ++ libcurl检查标题下载之前 [英] C++ libcurl check header before downloading body

查看:424
本文介绍了C ++ libcurl检查标题下载之前的处理方法,对大家解决问题具有一定的参考价值,需要的朋友们下面随着小编来一起学习吧!

问题描述

现在我使用PHP / CURL,但我想移动到C + +,因为我想要的是不可能的PHP和一些性能问题。



可能在C ++中:



Curl向URL请求检查标头是否为text / html,如果是,则下载正文,否则停止, url。



我想在一步中进行此操作,如果HEAD正常,则不首先发送HEAD, / p>

如果这不是libcurl的可能,是否给c ++的其他库,这将支持这个?

解决方案

要发送HTTP HEAD请求,您需要向上设置此选项:

  curl_easy_setopt (ctx,CURLOPT_NOBODY,1); 

,你也可以看看这个问题:在发送HTTP HEAD请求时需要libcurl编程帮助



从头部分离头部(不知道是否真的需要),你可以看看:



http://curl.haxx.se/libcurl/c/sepheaders.html



下载很多不同的URL libcurl-multi,你应该看看:



http://curl.haxx.se/libcurl/c/libcurl-multi.html



略微修改 http:// curl。 haxx.se/libcurl/c/sendrecv.html

  #include< stdio.h> 
#include< string.h>
#include< curl / curl.h>

/ *等待套接字的辅助函数。 * /
static int wait_on_socket(curl_socket_t sockfd,int for_recv,long timeout_ms)
{
struct timeval tv;
fd_set infd,outfd,errfd;
int res;

tv.tv_sec = timeout_ms / 1000;
tv.tv_usec =(timeout_ms%1000)* 1000;

FD_ZERO(& infd);
FD_ZERO(& outfd);
FD_ZERO(& errfd);

FD_SET(sockfd,& errfd); / *总是检查错误* /

if(for_recv)
{
FD_SET(sockfd,& infd);
}
else
{
FD_SET(sockfd,& outfd);
}

/ * select()返回信号套接字的数量或-1 * /
res = select(sockfd + 1,& infd,& outfd,& ; errfd,& tv);
return res;
}

int main(void)
{
CURL * curl;
CURLcode res;
/ *简约http请求* /
const char * request =GET / HTTP / 1.0\r\\\
Host:m0g.net\r\\\
\r\\\
;
curl_socket_t sockfd; / * socket * /
long sockextr;
size_t iolen;
curl_off_t nread;

curl = curl_easy_init();
if(curl){
curl_easy_setopt(curl,CURLOPT_URL,http://m0g.net);
/ *不要做传输 - 只连接到主机* /
curl_easy_setopt(curl,CURLOPT_CONNECT_ONLY,1L);
res = curl_easy_perform(curl);

if(CURLE_OK!= res)
{
printf(Error:%s\\\
,strerror(res));
return 1;
}

/ *从curl句柄中提取套接字 - 我们需要它来等待。
*请注意,这个API接受一个指向'long'的指针,否则我们使用
* curl_socket_t作为套接字。
* /
res = curl_easy_getinfo(curl,CURLINFO_LASTSOCKET,& sockextr);

if(CURLE_OK!= res)
{
printf(Error:%s\\\
,curl_easy_strerror(res));
return 1;
}

sockfd = sockextr;

/ *等待套接字准备发送* /
if(!wait_on_socket(sockfd,0,60000L))
{
printf :timeout.\\\
);
return 1;
}

puts(Sending request。);
/ *发送请求。实际应用程序应该检查iolen
*以查看是否所有请求已发送* /
res = curl_easy_send(curl,request,strlen(request),& iolen);

if(CURLE_OK!= res)
{
printf(Error:%s\\\
,curl_easy_strerror(res));
return 1;
}
puts(Reading response。);
char data [2048]
int idxread = 0;

/ *读取响应* /
for(;;)
{
char buf [1024]

wait_on_socket(sockfd,1,60000L);
res = curl_easy_recv(curl,buf,32,& iolen);

if(CURLE_OK!= res)
break;

if(nread + idxread> 2048)
break;

strncpy(data + idxread,buf,nread);
idxread + = nread;

if(strstr(data,\r\\\
\r\\\
)!= NULL){
if(strstr(data,Content-Type:text / html)== NULL){
printf(not an html document。);
return 2;
}
}

nread =(curl_off_t)iolen;

printf(Received%CURL_FORMAT_CURL_OFF_Tbytes.\\\
,nread);
}
printf('''%s'''\\\
,data);

/ * always cleanup * /
curl_easy_cleanup(curl);
}
return 0;
}


Right now i use PHP / CURL but i want to move to C++, cause what i want is not possible with PHP, and some performance issues.

Is this possible in C++:

Curl makes a request to a URL checks the header if its text/html, if yes, it downloads the body, else it stops and takes the next url.

I want this todo in one step, not first sending HEAD, if HEAD is ok, requesting the page again to download.

If this is not possible with libcurl, does it give other libaries for c++, which would support this?

解决方案

To send a HTTP HEAD request, you need to set this option up:

 curl_easy_setopt(ctx,CURLOPT_NOBODY ,1 );

and you can also have a look at this question: help needed on libcurl programming in sending HTTP HEAD Request

to separate headers from body (don't know if it is really needed), you can have a look at :

http://curl.haxx.se/libcurl/c/sepheaders.html

to download lot of different URLs libcurl-multi, you should have a look at :

http://curl.haxx.se/libcurl/c/libcurl-multi.html

slight modification of http://curl.haxx.se/libcurl/c/sendrecv.html:

#include <stdio.h>
#include <string.h>
#include <curl/curl.h>

/* Auxiliary function that waits on the socket. */
static int wait_on_socket(curl_socket_t sockfd, int for_recv, long timeout_ms)
{
  struct timeval tv;
  fd_set infd, outfd, errfd;
  int res;

  tv.tv_sec = timeout_ms / 1000;
  tv.tv_usec= (timeout_ms % 1000) * 1000;

  FD_ZERO(&infd);
  FD_ZERO(&outfd);
  FD_ZERO(&errfd);

  FD_SET(sockfd, &errfd); /* always check for error */

  if(for_recv)
  {
    FD_SET(sockfd, &infd);
  }
  else
  {
    FD_SET(sockfd, &outfd);
  }

  /* select() returns the number of signalled sockets or -1 */
  res = select(sockfd + 1, &infd, &outfd, &errfd, &tv);
  return res;
}

int main(void)
{
  CURL *curl;
  CURLcode res;
  /* Minimalistic http request */
  const char *request = "GET / HTTP/1.0\r\nHost: m0g.net\r\n\r\n";
  curl_socket_t sockfd; /* socket */
  long sockextr;
  size_t iolen;
  curl_off_t nread;

  curl = curl_easy_init();
  if(curl) {
    curl_easy_setopt(curl, CURLOPT_URL, "http://m0g.net");
    /* Do not do the transfer - only connect to host */
    curl_easy_setopt(curl, CURLOPT_CONNECT_ONLY, 1L);
    res = curl_easy_perform(curl);

    if(CURLE_OK != res)
    {
      printf("Error: %s\n", strerror(res));
      return 1;
    }

    /* Extract the socket from the curl handle - we'll need it for waiting.
     * Note that this API takes a pointer to a 'long' while we use
     * curl_socket_t for sockets otherwise.
     */
    res = curl_easy_getinfo(curl, CURLINFO_LASTSOCKET, &sockextr);

    if(CURLE_OK != res)
    {
      printf("Error: %s\n", curl_easy_strerror(res));
      return 1;
    }

    sockfd = sockextr;

    /* wait for the socket to become ready for sending */
    if(!wait_on_socket(sockfd, 0, 60000L))
    {
      printf("Error: timeout.\n");
      return 1;
    }

    puts("Sending request.");
    /* Send the request. Real applications should check the iolen
     * to see if all the request has been sent */
    res = curl_easy_send(curl, request, strlen(request), &iolen);

    if(CURLE_OK != res)
    {
      printf("Error: %s\n", curl_easy_strerror(res));
      return 1;
    }
    puts("Reading response.");
    char data[2048];
    int idxread=0;

    /* read the response */
    for(;;)
    {
      char buf[1024];

      wait_on_socket(sockfd, 1, 60000L);
      res = curl_easy_recv(curl, buf, 32, &iolen);

      if(CURLE_OK != res)
        break;

      if (nread+idxread > 2048)
          break;

      strncpy(data+idxread,buf,nread);
      idxread+=nread;

      if (strstr(data,"\r\n\r\n") != NULL) {
        if (strstr(data,"Content-Type: text/html") == NULL) {
            printf("not an html document.");
            return 2;
        }
      }

      nread = (curl_off_t)iolen;

      printf("Received %" CURL_FORMAT_CURL_OFF_T " bytes.\n", nread);
    }
    printf("'''%s'''\n", data);

    /* always cleanup */
    curl_easy_cleanup(curl);
  }
  return 0;
}

这篇关于C ++ libcurl检查标题下载之前的文章就介绍到这了,希望我们推荐的答案对大家有所帮助,也希望大家多多支持IT屋!

查看全文
登录 关闭
扫码关注1秒登录
发送“验证码”获取 | 15天全站免登陆