HTTP标头错误报告内容长度时该怎么办 [英] What to do when http header wrongly reports content-length

查看：108 发布时间：2020/11/25 19:14:47 c sockets https

本文介绍了HTTP标头错误报告内容长度时该怎么办的处理方法，对大家解决问题具有一定的参考价值，需要的朋友们下面随着小编来一起学习吧！

问题描述

我正在尝试通过以下方式通过https下载网页:首先下载带有HEAD请求的标头，然后解析以获得Content-Length，然后使用Content-Length加上标头的一些空间来分配用于存储缓冲区的内存GET请求的结果.似乎stackoverflow.com提供的Content-Length太小，因此我的代码存在段错误.

I am trying to download web pages over https by first downloading the headers with a HEAD request, then parsing to obtain the Content-Length and then using the Content-Length plus some space for headers to allocate memory for a buffer to store results from a GET request. It seems that stackoverflow.com gives a Content-Length that is too small and thus my code segfaults.

我尝试遍历堆栈溢出的问题，以了解如何动态分配内存来处理那些错误报告其Content-Length但无法找到任何合适答案的页面.

I've tried looking through stack overflow past questions to see how to go about dynamically allocating memory to handle pages which misreport their Content-Length but haven't been able to find any suitable answers.

#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <openssl/bio.h>
#include <openssl/ssl.h>
#include <openssl/err.h>

#define MAX_HEADER_SIZE 8192

/**
 * Main SSL demonstration code entry point
 */
int main() {
    char* host_and_port = "stackoverflow.com:443"; 
    char* head_request = "HEAD / HTTP/1.1\r\nHost: stackoverflow.com\r\n\r\n"; 
    char* get_request = "GET / HTTP/1.1\r\nHost: stackoverflow.com\r\n\r\n"; 
    char* store_path = "mycert.pem"; 
    char *header_token, *line_token, content_length_line[1024];
    char *cmp = "\r\n";
    char *html;
    char *get;
    int content_length;
    size_t i = 0;
    char buffer[MAX_HEADER_SIZE];
    buffer[0] = 0;

    BIO* bio;
    SSL_CTX* ctx = NULL;
    SSL* ssl = NULL;

    /* initilise the OpenSSL library */
    SSL_load_error_strings();
    SSL_library_init();
    ERR_load_BIO_strings();
    OpenSSL_add_all_algorithms();

    bio = NULL;
    int r = 0;

    /* Set up the SSL pointers */
    ctx = SSL_CTX_new(TLS_client_method());
    ssl = NULL;
    r = SSL_CTX_load_verify_locations(ctx, store_path, NULL);

    if (r == 0) {
        fprintf(stdout,"Unable to load the trust store from %s.\n", store_path);
        fprintf(stdout, "Error: %s\n", ERR_reason_error_string(ERR_get_error()));
        fprintf(stdout, "%s\n", ERR_error_string(ERR_get_error(), NULL));
        ERR_print_errors_fp(stdout);
    }

    /* Setting up the BIO SSL object */
    bio = BIO_new_ssl_connect(ctx);
    BIO_get_ssl(bio, &ssl);
    if (!(ssl)) {
        printf("Unable to allocate SSL pointer.\n");
        fprintf(stdout, "Error: %s\n", ERR_reason_error_string(ERR_get_error()));
        fprintf(stdout, "%s\n", ERR_error_string(ERR_get_error(), NULL));
        ERR_print_errors_fp(stdout); 
        bio = NULL;           
    }
    SSL_set_mode(ssl, SSL_MODE_AUTO_RETRY);

    /* Attempt to connect */
    BIO_set_conn_hostname(bio, host_and_port);

    /* Verify the connection opened and perform the handshake */
    if (BIO_do_connect(bio) < 1) {
        fprintf(stdout, "Unable to connect BIO.%s\n", host_and_port);
        fprintf(stdout, "Error: %s\n", ERR_reason_error_string(ERR_get_error()));
        fprintf(stdout, "%s\n", ERR_error_string(ERR_get_error(), NULL));
        ERR_print_errors_fp(stdout);
        bio = NULL;
    }

    if (SSL_get_verify_result(ssl) != X509_V_OK) {
        printf("Unable to verify connection result.\n");
        fprintf(stdout, "Error: %s\n", ERR_reason_error_string(ERR_get_error()));
        fprintf(stdout, "%s\n", ERR_error_string(ERR_get_error(), NULL));
        ERR_print_errors_fp(stdout);            
    }

    if (bio == NULL)
        return (EXIT_FAILURE);

    r = -1;

    while (r < 0) {

        r = BIO_write(bio, head_request, strlen(head_request));
        if (r <= 0) {
            if (!BIO_should_retry(bio)) {
                printf("BIO_read should retry test failed.\n");
                fprintf(stdout, "Error: %s\n", ERR_reason_error_string(ERR_get_error()));
                fprintf(stdout, "%s\n", ERR_error_string(ERR_get_error(), NULL));
                ERR_print_errors_fp(stdout);            
                continue;
            }
            /* It would be prudent to check the reason for the retry and handle
            * it appropriately here */
        }
    }

    r = -1;

    while (r < 0) {
        r = BIO_read(bio, buffer, MAX_HEADER_SIZE);
        if (r == 0) {
            printf("Reached the end of the data stream.\n");
            fprintf(stdout, "Error: %s\n", ERR_reason_error_string(ERR_get_error()));
            fprintf(stdout, "%s\n", ERR_error_string(ERR_get_error(), NULL));
            ERR_print_errors_fp(stdout);
            continue;
        } else if (r < 0) {
            if (!BIO_should_retry(bio)) {
                printf("BIO_read should retry test failed.\n");
                fprintf(stdout, "Error: %s\n", ERR_reason_error_string(ERR_get_error()));
                fprintf(stdout, "%s\n", ERR_error_string(ERR_get_error(), NULL));
                ERR_print_errors_fp(stdout);
                continue;
            }

            /* It would be prudent to check the reason for the retry and handle
            * it appropriately here */
        }
    };
    printf("%s\r\n", buffer);

    header_token = strtok(buffer, cmp);

    while (header_token != NULL)
    {
        //printf ("header_token: %s\n\n", header_token);
        if (strncmp(header_token, "Content-Length:", strlen("Content-Length:")) == 0 
        || strncmp(header_token, "content-length:", strlen("content-length:")) == 0)
        {
            //printf ("header_token %s is equal to Content-Length:\n", header_token);
            strcpy(content_length_line, header_token);
        }
        header_token = strtok(NULL, cmp);
    }

    if (strlen(content_length_line) > 0) 
    {
        line_token = strtok(content_length_line, " ");
        line_token = strtok(NULL, " ");
        content_length = atoi(line_token);
        printf ("Content-Length = %d\n", content_length);
    }

    //char get[content_length + MAX_HEADER_SIZE];
    get = malloc((content_length + MAX_HEADER_SIZE)*sizeof(char));
    if (get == NULL) {
        fprintf(stdout, "Out of memory\n");
        return (EXIT_FAILURE);
    }

    r = -1;

    while (r < 0) {

        r = BIO_write(bio, get_request, strlen(get_request));
        if (r <= 0) {
            if (!BIO_should_retry(bio)) {
                printf("BIO_read should retry test failed.\n");
                fprintf(stdout, "Error: %s\n", ERR_reason_error_string(ERR_get_error()));
                fprintf(stdout, "%s\n", ERR_error_string(ERR_get_error(), NULL));
                ERR_print_errors_fp(stdout);            
                continue;
            }
            /* It would be prudent to check the reason for the retry and handle
            * it appropriately here */
        }
    }

    r = -1;

    while (r) {
        while (r < 0) {
            r = BIO_read(bio, buffer, 4096);
            if (r == 0) {
                printf("Reached the end of the data stream.\n");
                fprintf(stdout, "Error: %s\n", ERR_reason_error_string(ERR_get_error()));
                fprintf(stdout, "%s\n", ERR_error_string(ERR_get_error(), NULL));
                ERR_print_errors_fp(stdout);
                continue;
            } else if (r < 0) {
                if (!BIO_should_retry(bio)) {
                    printf("BIO_read should retry test failed.\n");
                    fprintf(stdout, "Error: %s\n", ERR_reason_error_string(ERR_get_error()));
                    fprintf(stdout, "%s\n", ERR_error_string(ERR_get_error(), NULL));
                    ERR_print_errors_fp(stdout);
                    continue;
                }

                /* It would be prudent to check the reason for the retry and handle
                * it appropriately here */
            }
        };
        printf("Received %d bytes\n",r);
        printf("Received total of %ld bytes of %d\n", i+r, content_length);
        memcpy(get+i, buffer, r);
        i += r;
    }
    printf("%s\r\n", buffer);

    /* clean up the SSL context resources for the encrypted link */
    SSL_CTX_free(ctx);

    free(get);  

    return (EXIT_SUCCESS);
}

我通常希望能够打印出完整的网页，但是由于错误的Content-Length，我得到了以下输出和段错误.

I would usually expect to be able to print out the full web page but because of the erroneous Content-Length I get the following output and segfault.

Received 1752 bytes
Received total of 248784 bytes of 105585

Program received signal SIGSEGV, Segmentation fault.
__memmove_sse2_unaligned_erms () at ../sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S:404
404     ../sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S: No such file or directory.

我应该如何处理内容长度不正确的页面?

How should I handle pages that give incorrect Content-Length?

HTTP标头错误报告内容长度时该怎么办 [英] What to do when http header wrongly reports content-length

问题描述

推荐答案

相关文章

其他开发最新文章

热门教程

热门工具

登录关闭

HTTP标头错误报告内容长度时该怎么办 [英] What to do when http header wrongly reports content-length

问题描述

推荐答案

相关文章

其他开发最新文章

热门教程

热门工具

登录 关闭

登录关闭