使用CURL中的UTF-8数据流，数据流何时从UTF-8转换为ASCII？ [英] Using UTF-8 Datastreams in cURL, when is the datastream converted from UTF-8 to ASCII?

查看：34 发布时间：2022/5/17 16:59:16 c utf-8 libcurl

本文介绍了使用CURL中的UTF-8数据流，数据流何时从UTF-8转换为ASCII？的处理方法，对大家解决问题具有一定的参考价值，需要的朋友们下面随着小编来一起学习吧！

问题描述

使用cURL从Internet获取字符流时，数据流何时从多字节数据类型转换为单字节字符数组？

我编写了一个程序here，它似乎在回调函数中使用ASCII运行。

但是，我编写了另一个使用带有wchar_t数据类型的UTF-8的程序，它似乎也可以工作。数据流似乎没有区分这两种数据类型，即使在我的机器上wchar_t类型是4字节，而字符是1字节。

我猜有某种类型转换对这个程序是透明的，但我不知道(我认为在UTF-8 ASCII字符中仍然占用1字节的内存，但当程序使用wchar_t数据类型时，系统会用零填充常规的ASCII字符，将它们转换为4个字节，但这不是程序员实现的...)。

#include "multicurl.h"

#define MAX_WAIT_MSECS 5*1000 /* Wait max. 5 seconds */

/*  The largest difference between the ASCII and UTF-8 variations of this program is that this callback function is now dealing with an array of wchar_t blocks rather than chars which are always 1 byte long, but it still works the same basic way. */
static size_t write_callback(wchar_t *ptr, size_t size, size_t nmemb, void *userdata){// cURL callback function [read in datastream to memory]
    // This prototype is provided by cURL, with an argument at the end for our data structure.
    // This function is repeatedly called by cURL until there is no more data in the data stream; *ptr [it is assumed cURL handles memory management for this pointer].
    
    size_t realsize = nmemb * size;// The number of bytes in the datastream [there is no NULL char]
    MemType *mem = (MemType *)userdata;
    wchar_t *tmp = realloc(mem->memory, mem->size  + realsize + sizeof(wchar_t) );// We add 1 wchar_t unit for the NULL character.

    if (tmp == NULL){
        printf("Not Enough Memory, realloc returned NULL.
");
        exit(EXIT_FAILURE);
    }

    mem->memory = tmp;
    memcpy(&(mem->memory[ mem->size /  sizeof(wchar_t) ]), ptr, realsize );// Starting at the last element copy in datastream [it overwrites the last element]
    mem->size += realsize;// The actual size, in bytes, is realsize + ( 1 * sizeof(wchar_t) ), however realsize gives us the location of the last element.
    mem->memory[ mem->size / sizeof(wchar_t) ] = 0;// The datastream doesn't include a NULL character, so we zeroize the last element.
    // We overwrite the NULL character {the zeroized element} on the next callback iteration, if any.

    return (size * nmemb);// cURL crosschecks the datastream with this return value.
}

void *SetUpCurlHandle(CURLM * mh, wchar_t *utf8_url, MemType *output){
// Take in a multi handle pointer address, a URL and a struct pointer address, set up the curl easy handle and add it to the multi handle.

    /* Convert our UTF-8 URL string to a regular ASCII URL string. */
    char* url = (char*) malloc ( wcslen( utf8_url ) + 1 );
    wcstombs(url, utf8_url, wcslen( utf8_url ) * sizeof( wchar_t ) );

    CURL *hnd = NULL;
    output->memory = malloc( sizeof( wchar_t ) );              // Initialize the memory component of the structure.
    output->size = 0;                                           // Initialize the size component of the structure.

    // Initialize the cURL handle.
    hnd = curl_easy_init();

    if(hnd){

        // Setup the cURL options.
        curl_easy_setopt(hnd, CURLOPT_BUFFERSIZE, 102400L);
        curl_easy_setopt(hnd, CURLOPT_URL, url);// Set the request URL
        curl_easy_setopt(hnd, CURLOPT_NOPROGRESS, 1L);
        curl_easy_setopt(hnd, CURLOPT_USERAGENT, "curl/7.80.0");
        curl_easy_setopt(hnd, CURLOPT_MAXREDIRS, 50L);
        curl_easy_setopt(hnd, CURLOPT_HTTP_VERSION, (long)CURL_HTTP_VERSION_2TLS);
        curl_easy_setopt(hnd, CURLOPT_FTP_SKIP_PASV_IP, 1L);
        curl_easy_setopt(hnd, CURLOPT_TCP_KEEPALIVE, 1L);
        curl_easy_setopt(hnd, CURLOPT_WRITEFUNCTION, write_callback);// The callback function to write data to.
        curl_easy_setopt(hnd, CURLOPT_WRITEDATA, (void *)output);// Send the address of the data struct to callback func.
        //curl_easy_setopt(hnd, CURLOPT_VERBOSE, 1);

        curl_multi_add_handle(mh, hnd);
    }else{
        output->memory[0] = '';
    }    
    return NULL;// The output struct was passed by reference no need to return anything.
}

CURLM *SetUpMultiCurlHandle(){
    curl_global_init(CURL_GLOBAL_ALL);

    CURLM * mh = curl_multi_init();
    return mh;
}

void *PerformMultiCurl(CURLM * mh) 
/*Take in a preset multi handle, request data from the remote server asynchronously {it's assumed cURL is using threads transparent to the calling program}.
   Remove the handles from memory.*/
{
    CURLMsg *msg=NULL;
    CURL *hnd = NULL;
    CURLcode return_code = 0;
    int still_running = 0;
    int msgs_left = 0;

    curl_multi_perform(mh, &still_running);// Perform the requests.
    do {
        int numfds=0;
        int res = curl_multi_wait(mh, NULL, 0, MAX_WAIT_MSECS, &numfds);
        if(res != CURLM_OK) {
            fprintf(stderr, "error: curl_multi_wait() returned %d
", res);
            return NULL;
        }
        curl_multi_perform(mh, &still_running);
        
       /* Without this loop the program will proceed to the next statement, most likely before the messages are retrieved from the server.
           The easy handle requests are conducted asynchronously, but one multi handle request is obviously conducted sequentially (can use pthreads to make asynchronous multi requests).*/
    } while(still_running); 
    
    
    /* This portion of the code will clean up and remove the handles from memory, you could change this to make them more persistent */
    while ((msg = curl_multi_info_read(mh, &msgs_left))) {
        if (msg->msg == CURLMSG_DONE) {
            hnd = msg->easy_handle;

            return_code = msg->data.result;
            if(return_code!=CURLE_OK) {
                fprintf(stderr, "CURL error code: %d
", msg->data.result);
                continue;
            }

            curl_multi_remove_handle(mh, hnd);
            curl_easy_cleanup(hnd);
            hnd = NULL;
        }
        else {
            fprintf(stderr, "error: after curl_multi_info_read(), CURLMsg=%d
", msg->msg);
        }
    }

    curl_multi_cleanup(mh);
    curl_global_cleanup();
    return NULL;
}

该程序的完整UTF-8变体here

使用CURL中的UTF-8数据流，数据流何时从UTF-8转换为ASCII？ [英] Using UTF-8 Datastreams in cURL, when is the datastream converted from UTF-8 to ASCII?

问题描述

推荐答案

相关文章

其他开发最新文章

热门教程

热门工具

登录关闭

使用CURL中的UTF-8数据流，数据流何时从UTF-8转换为ASCII？ [英] Using UTF-8 Datastreams in cURL, when is the datastream converted from UTF-8 to ASCII?

问题描述

推荐答案

相关文章

其他开发最新文章

热门教程

热门工具

登录 关闭

登录关闭