下载HTTP直通插座(C) [英] Download HTTP thru sockets (C)

查看:136
本文介绍了下载HTTP直通插座(C)的处理方法,对大家解决问题具有一定的参考价值,需要的朋友们下面随着小编来一起学习吧!

问题描述

最近,我开始服用本指南让自己开始从互联网上下载文件。我读了它,并提出了以下code下载网站的HTTP主体。唯一的问题是,它不工作。调用的recv()调用时,code停止。它不会崩溃,它只是继续运行。这是我的错吗?我使用了错误的计算策略?我打算用code不只是下载的.html,文件的内容,也下载其他文件(ZIP,PNG,JPG,伤害...)。我希望有别人可以帮助我。这是我的code:

 的#include<&stdio.h中GT;
#包括LT&; SYS / socket.h中> / * * SOCKET /
#包括LT&;&netdb.h中GT; / *结构addrinfo中的* /
#包括LT&;&stdlib.h中GT; /* 出口() */
#包括LT&;&string.h中GT; / * memset的()* /
#包括LT&;&errno.h中GT; / * *错误号/
#包括LT&;&unistd.h中GT; /* 关() */
#包括LT&; ARPA / inet.h> / * IP转换* /#包括LT&;&STDARG.H GT; / * * va_list的/#定义SERVERNAMEdeveloperief2.site11.com
#定义协议80后
#定义MAXDATASIZE 1024 * 1024无效errorOut(INT的地位,为const char *格式,...);
无效* get_in_addr(结构sockaddr * SA);INT主(INT ARGC,为const char * argv的[]){
    INT状态;    //获取地址信息
    结构addrinfo中的*的相关信息;
    结构addrinfo中的提示;    //填写提示
    memset的(安培;提示,0,sizeof的(提示));
    hints.ai_socktype = SOCK_STREAM;
    hints.ai_flags = AI_PASSIVE;
    hints.ai_family = AF_UNSPEC;    //获取地址信息
    状态=的getaddrinfo(服务器,
                         协议,
                         &安培;提示,
                         &安培;相关信息);
    如果(状态!= 0)
        errorOut(-1无法获取ADDRES信息:%S \\ n,gai_strerror(状态));    //建立套接字
    INT的sockfd;    //循环,使用的第一个有效
    结构addrinfo中* P;
    为(P =的相关信息; P!= NULL; P = P-> ai_next){
        //创建SOCKET
        的sockfd =插座(对GT; ai_family,
                        P-> ai_socktype,
                        P-> ai_protocol);
        如果(的sockfd == -1)
            继续;        //尝试连接
        状态=连接(的sockfd,
                         P-> ai_addr,
                         P-> ai_addrlen);
        如果(状态== -1){
            接近(的sockfd);
            继续;
        }        打破;
    }    如果(P == NULL){
        fprintf中(标准错误,无法连接\\ n);
        返回1;
    }    //提醒用户
    焦炭printableIP [INET6_ADDRSTRLEN]
    inet_ntop(对GT; ai_family,
              get_in_addr((结构sockaddr *)&P- GT; ai_addr)
              printableIP,
              的sizeof(printableIP));
    的printf(连接到%s \\ n,printableIP);    //获取相关信息RID
    freeaddrinfo(相关信息);    //接收数据
    ssiz​​e_t供receivedBytes;
    烧焦的buf [MAXDATASIZE]
    的printf(开始接收\\ n);
    receivedBytes =的recv(的sockfd,
                         BUF,
                         MAXDATASIZE-1,
                         0);
    的printf(收到%d字节\\ n,(INT)receivedBytes);
    如果(receivedBytes == -1)
        errorOut(1,错误而接收\\ n);    //空终止
    BUF [receivedBytes] ='\\ 0';    //打印
    的printf(接收数据:\\ n \\ n%S \\ n,BUF);    // 关
    接近(的sockfd);    返回0;
}无效* get_in_addr(结构sockaddr * SA){
    // IP4
    如果(SA->上sa_family == AF_INET)
        返回及(((结构SOCKADDR_IN *)SA) - GT; sin_addr);    返回及(((结构* sockaddr_in6的)SA) - GT; sin6_addr);
}无效errorOut(INT的地位,为const char *格式,...){
    va_list的ARGS;
    的va_start(参数,格式);
    vfprintf(标准错误,格式,参数);
    va_end用来(参数);
    退出(状态);
}


解决方案

如果你想使用HTTP抢文件,然后 libcurl的可能是C.你最好的选择但是,如果您使用的是这样的一种方式来学习网络编程,那么你将不得不学习一些有关HTTP,然后才能检索文件。

你们看到在目前的方案是,你需要发送一个明确要求的文件,然后才能恢复。我会通过 RFC2616 的阅读开始。不要试图去了解它 - 它是很多关于这个例子来阅读。阅读首节获得如何HTTP的作品,然后阅读部分的 4,5,6 了解基本信息格式

下面是一个什么样的计算器问题网页的HTTP请求看起来像一个例子:

  GET http://stackoverflow.com/questions HTTP / 1.1 \\ r \\ n
主持人:stackoverflow.com:80\\r\连接:关闭\\ r \\ n
接受编码:身份,*; Q = 0 \\ r \\ n
\\ r \\ n

我相信这是一个最低限度的要求。我加入了CRLFs明确表明,一个空行用于终止请求头块在RFC2616 。如果你离开了 的Accept-Encoding 头,那么结果的文件可能会被转移作为一个gzip-COM pressed流,因为HTTP允许这种明确指出你不想要它,除非你告诉服务器。

服务器响应还包含元数据描述响应H​​TTP头。下面是从previous请求的响应的示例:

  HTTP / 1.1 200 OK \\ r \\ n
服务器:nginx \\ r \\ n
日期:孙老师,2010年8月1日13时54分56秒GMT \\ r \\ n
内容类型:text / html的;字符集= UTF-8 \\ r \\ n
连接:关闭\\ r \\ n
缓存控制:私人\\ r \\ n
内容长度:49731 \\ r \\ n
\\ r \\ n
\\ r \\ n
\\ r \\ n
< D​​OCTYPE HTML PUBLIC - // W3C // DTD HTML 4.01 // EN... 49667字节跟随!

这个简单的例子应该给你一个想法是什么你做了,如果你想使用HTTP抓取的文件执行。这是最好的情况下,最简单的例子。这不是我会轻易进行,但它可能是学习和AP preciate HTTP的最佳方式。

如果你正在寻找一个简单的方法来学习网络编程,这是开始一个体面的方式。我建议拿起 TCP副本/ IP详解卷1 并的UNIX网络编程,第1卷。这些可能是真正了解如何编写基于网络的应用的最佳途径。我可能会通过自的​​ FTP客户端开始=htt​​p://tool​​s.ietf.org/html / RFC959> FTP 是一个简单得多的协议开始。

如果你正在努力学习与HTTP相关的详细信息,然后:


  1. :权威指南并阅读

  2. ,直到你明白它

    • 使用尝试的例子 telnet服务器80 和手工打字的要求

    • 下载卷曲客户端,并使用 - 详细 - 包括命令行选项,这样就可以看看发生了什么


  3. Fielding的论文直到HTTP真的很有道理。

只是不编写自己的HTTP客户端计划的企业使用的。你不想这样做,相信我作为一个谁一直保持这样的错误了一小会儿,现在...

Recently I started taking this guide to get myself started on downloading files from the internet. I read it and came up with the following code to download the HTTP body of a website. The only problem is, it's not working. The code stops when calling the recv() call. It does not crash, it just keeps on running. Is this my fault? Am I using the wrong approch? I intent to use the code to not just download the contents of .html-files, but also to download other files (zip, png, jpg, dmg ...). I hope there's somebody that can help me. This is my code:

#include <stdio.h>
#include <sys/socket.h> /* SOCKET */
#include <netdb.h> /* struct addrinfo */
#include <stdlib.h> /* exit() */
#include <string.h> /* memset() */
#include <errno.h> /* errno */
#include <unistd.h> /* close() */
#include <arpa/inet.h> /* IP Conversion */

#include <stdarg.h> /* va_list */

#define SERVERNAME "developerief2.site11.com"
#define PROTOCOL "80"
#define MAXDATASIZE 1024*1024

void errorOut(int status, const char *format, ...);
void *get_in_addr(struct sockaddr *sa);

int main (int argc, const char * argv[]) {
    int status;

    // GET ADDRESS INFO
    struct addrinfo *infos; 
    struct addrinfo hints;

    // fill hints
    memset(&hints, 0, sizeof(hints));
    hints.ai_socktype = SOCK_STREAM;
    hints.ai_flags = AI_PASSIVE;
    hints.ai_family = AF_UNSPEC;

    // get address info
    status = getaddrinfo(SERVERNAME, 
                         PROTOCOL, 
                         &hints, 
                         &infos);
    if(status != 0)
        errorOut(-1, "Couldn't get addres information: %s\n", gai_strerror(status));

    // MAKE SOCKET
    int sockfd;

    // loop, use first valid
    struct addrinfo *p;
    for(p = infos; p != NULL; p = p->ai_next) {
        // CREATE SOCKET
        sockfd = socket(p->ai_family, 
                        p->ai_socktype, 
                        p->ai_protocol);
        if(sockfd == -1)
            continue;

        // TRY TO CONNECT
        status = connect(sockfd, 
                         p->ai_addr, 
                         p->ai_addrlen);
        if(status == -1) {
            close(sockfd);
            continue;
        }

        break;
    }

    if(p == NULL) {
        fprintf(stderr, "Failed to connect\n");
        return 1;
    }

    // LET USER KNOW
    char printableIP[INET6_ADDRSTRLEN];
    inet_ntop(p->ai_family,
              get_in_addr((struct sockaddr *)p->ai_addr),
              printableIP,
              sizeof(printableIP));
    printf("Connection to %s\n", printableIP);

    // GET RID OF INFOS
    freeaddrinfo(infos);

    // RECEIVE DATA
    ssize_t receivedBytes;
    char buf[MAXDATASIZE];
    printf("Start receiving\n");
    receivedBytes = recv(sockfd, 
                         buf, 
                         MAXDATASIZE-1, 
                         0);
    printf("Received %d bytes\n", (int)receivedBytes);
    if(receivedBytes == -1)
        errorOut(1, "Error while receiving\n");

    // null terminate
    buf[receivedBytes] = '\0';

    // PRINT
    printf("Received Data:\n\n%s\n", buf);

    // CLOSE
    close(sockfd);

    return 0;
}

void *get_in_addr(struct sockaddr *sa) {
    // IP4
    if(sa->sa_family == AF_INET)
        return &(((struct sockaddr_in *) sa)->sin_addr);

    return &(((struct sockaddr_in6 *) sa)->sin6_addr);
}

void errorOut(int status, const char *format, ...) {
    va_list args;
    va_start(args, format);
    vfprintf(stderr, format, args);
    va_end(args);
    exit(status);
}

解决方案

If you want to grab files using HTTP, then libcURL is probably your best bet in C. However, if you are using this as a way to learn network programming, then you are going to have to learn a bit more about HTTP before you can retrieve a file.

What you are seeing in your current program is that you need to send an explicit request for the file before you can retrieve it. I would start by reading through RFC2616. Don't try to understand it all - it is a lot to read for this example. Read the first section to get an understanding of how HTTP works, then read sections 4, 5, and 6 to understand the basic message format.

Here is an example of what an HTTP request for the stackoverflow Questions page looks like:

GET http://stackoverflow.com/questions HTTP/1.1\r\n
Host: stackoverflow.com:80\r\n
Connection: close\r\n
Accept-Encoding: identity, *;q=0\r\n
\r\n

I believe that is a minimal request. I added the CRLFs explicitly to show that a blank line is used to terminate the request header block as described in RFC2616. If you leave out the Accept-Encoding header, then the result document will probably be transfered as a gzip-compressed stream since HTTP allows for this explicitly unless you tell the server that you do not want it.

The server response also contains HTTP headers for the meta-data describing the response. Here is an example of a response from the previous request:

HTTP/1.1 200 OK\r\n
Server: nginx\r\n
Date: Sun, 01 Aug 2010 13:54:56 GMT\r\n
Content-Type: text/html; charset=utf-8\r\n
Connection: close\r\n
Cache-Control: private\r\n
Content-Length: 49731\r\n
\r\n
\r\n
\r\n
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" ... 49,667 bytes follow

This simple example should give you an idea what you are getting into implementing if you want to grab files using HTTP. This is the best case, most simple example. This isn't something that I would undertake lightly, but it is probably the best way to learn and appreciate HTTP.

If you are looking for a simple way to learn network programming, this is a decent way to start. I would recommend picking up a copy of TCP/IP Illustrated, Volume 1 and UNIX Network Programming, Volume 1. These are probably the best way to really learn how to write network-based applications. I would probably start by writing an FTP client since FTP is a much simpler protocol to start with.

If you are trying to learn the details associated with HTTP, then:

  1. Buy HTTP: the Definitive Guide and read it
  2. Read RFC2616 until you understand it
    • Try examples using telnet server 80 and typing in requests by hand
    • Download the cURL client and use the --verbose and --include command line options so that you can see what is happening
  3. Read Fielding's dissertation until HTTP really makes sense.

Just don't plan on writing your own HTTP client for enterprise use. You do not want to do that, trust me as one who has been maintaining such a mistake for a little while now...

这篇关于下载HTTP直通插座(C)的文章就介绍到这了,希望我们推荐的答案对大家有所帮助,也希望大家多多支持IT屋!

查看全文
登录 关闭
扫码关注1秒登录
发送“验证码”获取 | 15天全站免登陆