scanf的格式说明从一组字符阅读零个或多个字符 [英] scanf format specifier to read zero or more characters from a set of characters

查看:156
本文介绍了scanf的格式说明从一组字符阅读零个或多个字符的处理方法,对大家解决问题具有一定的参考价值,需要的朋友们下面随着小编来一起学习吧!

问题描述

我需要非常严格的关于,可以在读取字符串中的字符。

我有一系列空白后跟一个字符后面是一系列空白的。

例如:CC

我需要找到一个格式说明,让我忽略的字符,但只有当它是这个特定的字符,而不是任何其他字符。该序列E应中止。

我试过%* [C],但我的单元测试失败某些情况下 - 导致我相信%* [C] 正在寻找一个或多个'C'而不是零个或多个'C'

我写了一个小例子来帮助说明我的问题更好。请记住,这仅仅是一个最小的例子。中心问题是我如何解析的零个或单个字符之一的ammount的。

 的#include<&stdio.h中GT;
#包括LT&;&string.h中GT;无符号匹配(为const char *公式){
    无符号的E = 0,实测值= 0,S;
    焦德尔;
    INT解析,POS,LEN =(INT)strlen的(公式);
    常量为size_t SOC = sizeof的(字符);
    德尔='';
    经过分析的= sscanf_s(公式中,\\%* [(] X%* [^>>>%U%* [)]%C%N,&安培; S,放大器;德尔,SOC和放大器正); //(X GT;&GT氏)
    如果((2- ==解析)及及(POS == LEN)及及(''==德尔)){
        的printf(%6S:%S \\ n,OK,公式);
    }其他{
        的printf(%6S:%S \\ n,不及格,公式);
        E + = 1;
    }
    返回e的;
}无符号主要(无效)
{
    无符号E = 0;    输出(应该OK \\ n);
    E + =匹配(\\X>> 3 \\); //这其中不具有可选的字符
    E + =匹配(\\(X>> 3)\\);
    E + =匹配(\\(X>> 3)\\\\ r);    的printf(应该失败\\ n);
    如果(0 ==匹配(\\(Y>> 3)\\))E + = 1;
    如果(0 ==匹配(\\G X - GT;> 3)\\))E + = 1;
    如果(0 ==匹配(\\(X>> 3.3-4.2,5.0-6.1)\\))E + = 1;    如果(0 = E!){printf的(错误:%2U \\ n,E); }
    其他{printf的(全通\\ n,E); }
    返回e的;
}


解决方案

正如其他人指出你的,使用的sscanf 为不建议这个目的。它不能赶上的情况下是可选的,可能会或可能不会在之间出现 X 。有了 scanf函数,如果有一个没有任何分隔符来表明它缺少一个可选字段,然后才能确定它丢失的唯一方法是尝试分析它,发现它不存在,并尝试以不同的扫描格式字符串再次解析它。

 解析= sscanf的(公式中,\\%* [(] X%* [^>>>%U%* [)]%C%N ,&安培; S,放大器;德尔,和放大器; POS)
如果(解析!= 2){
    解析后的sscanf =(公式中,\\X%* [^>>>%U%C%N,&安培; S,放大器;德尔,和放大器; POS)
}

在这个解决方案的其余部分描述了如何使用POSIX < regex.h方式> 基本的正前pressions解析它​​

首先,你需要确定你的正常前pression并进行编译。

 为const char *重=
    [\\ t] * \\/ *匹配到* /
    [\\ t] *(\\\\ {0,1 \\\\} [\\ t] */ *匹配(如果present * /
    X [\\ t] *>> [\\ t] */ *匹配'X>>' * /
    \\\\([0-9] [0-9] * \\\\)/ *匹配号码SUBEX pression * /
    [\\ t] *)\\\\ {0,1 \\\\} [\\ t] */ *匹配')',如果present * /
    \\\\(。\\\\)/ *匹配最后一个分隔符为SUBEX pression * /
    [\\ t \\ r \\ n] *; / *匹配结尾的空白* /
regex_t章;
INT R = regcomp(安培;章,重,0);
如果(R!= 0){
    焦炭BUF [256];
    regerror(R,&安培;章,BUF,sizeof的(BUF));
    fprintf中(标准错误,regcomp:%S \\ n,BUF);
    /*...*/
}

现在,你需要对你要匹配字符串执行前pression。编译器会跟踪SUBEX pressions的数量在常规的前pression,并把在 reg.re_nsub 该号码。然而,存在不包括在该计数的隐式SUBEX pression。这是匹配所提供的前pression完整的字符串。这总是在第一场比赛出现了。所以,当您创建匹配阵列,占了点。这就是为什么匹配阵列比什么是在 reg.re_nsub 一个。

 无符号匹配(常量regex_t * $ P $皮克,为const char *公式){
    /*...*/
    INT R;
    const int的NSUB = preG-> re_nsub + 1;
    regmatch_t匹配[NSUB]    R = regexec($ P $皮克,配方,NSUB,火柴,0);
    如果(R == 0){
        / * *成功/
        解析= preG-> re_nsub;
        S =的atoi(公式+匹配[1] .rm_so);
        德尔=公式[匹配[2] .rm_so];
        POS =比赛[0] .rm_eo;
    }其他{
        解析= 0;
    }
    /*...*/

当您使用常规的前pression这样做,你应该释放它(如果它成功地编译)。

  regfree(安培;章);

I need to be very strict in regard to the characters that can be in a read string.

I have a series of whitespace followed by a character followed by a series of whitespace.
Examples: " c ", "c" , "", " "

I need to find a format specifier that allows me to ignore the character but only if it is this particular character and not any other character. This sequence " e " should be aborted.

I tried " %*[c] " but my unittests fail for some scenarios - leading me to believe that " %*[c] " is looking for one or more 'c' instead of zero or more 'c'.

I wrote a mini example to help illustrate my problem better. Keep in mind that this is only a minimum example. The central issue is how do i parse an ammount of zero or one of a single character.

#include <stdio.h>
#include <string.h>

unsigned match(const char * formula){
    unsigned e = 0, found = 0, s;
    char del;
    int parsed, pos, len = (int) strlen(formula); 
    const size_t soc = sizeof( char );
    del = ' ';
    parsed = sscanf_s( formula, " \" %*[(] X%*[^>]>> %u %*[)] %c %n", &s, &del, soc, &pos );// (X >> s )
    if( ( 2 == parsed ) && ( pos == len) && ( '"' == del ) ){
        printf("%6s:%s\n", "OK", formula);
    }else{
        printf("%6s:%s\n", "FAIL", formula);
        e += 1;
    }
    return e;
}

unsigned main( void )
{
    unsigned e = 0;

    printf("SHOULD BE OK\n");
    e += match("     \"X >> 3\""); //This one does not feature the optional characters
    e += match("     \"( X >> 3 ) \"");
    e += match("     \"( X >> 3 ) \"\r");

    printf("SHOULD FAIL\n");
    if ( 0 == match("     \"( Y >> 3 ) \"") ) e += 1;
    if ( 0 == match("     \"g X >> 3 ) \"") ) e += 1;
    if ( 0 == match("     \"( X >> 3.3-4.2 ) \"") ) e += 1;

    if( 0 != e ){ printf( "ERRORS: %2u\n", e ); }
    else{ printf( "all pass\n", e ); }
    return e;
}

解决方案

As others have indicated to you, using sscanf for this purpose is not advised. The case that it cannot catch is the "optional" ( that may or may not appear between the " and the X. With scanf, if there is an optional field that does not have any kind of delimiter to indicate it is missing, then the only way to determine it is missing is to try to parse it, notice it is not there, and try to parse it again with a different scan format string.

parsed = sscanf( formula, " \" %*[(] X%*[^>]>> %u %*[)] %c %n", &s, &del, &pos );
if (parsed != 2) {
    parsed = sscanf( formula, " \" X%*[^>]>> %u %c %n", &s, &del, &pos );
}

The remainder of this solution describes how to use the POSIX <regex.h> basic regular expressions to parse it.

First, you need to define your regular expression and compile it.

const char *re =
    "[ \t]*\""                 /* match up to '"' */
    "[ \t]*(\\{0,1\\}[ \t]*"   /* match '(' if present */
    "X[ \t]*>>[ \t]*"          /* match 'X >>' */
    "\\([0-9][0-9]*\\)"        /* match number as subexpression */
    "[ \t]*)\\{0,1\\}[ \t]*"   /* match ')' if present */
    "\\(.\\)"                  /* match final delimiter as subexpression */
    "[ \t\r\n]*";              /* match trailing whitespace */
regex_t reg;
int r = regcomp(&reg, re, 0);
if (r != 0) {
    char buf[256];
    regerror(r, &reg, buf, sizeof(buf));
    fprintf(stderr, "regcomp: %s\n", buf);
    /*...*/
}

Now, you will need to execute the expression against the string you want to match against. The compiler will track the number of subexpressions in your regular expression, and put that number in reg.re_nsub. However, there is an implicit subexpression that is not included in that count. That is the complete string that matches the supplied expression. This always shows up in the first match. So, when you create your matching array, account for that. That is why the matches array has one more than what is in reg.re_nsub.

unsigned match(const regex_t *preg, const char * formula){
    /*...*/
    int r;
    const int NSUB = preg->re_nsub + 1;
    regmatch_t matches[NSUB];

    r = regexec(preg, formula, NSUB, matches, 0);
    if (r == 0) {
        /* success */
        parsed = preg->re_nsub;
        s = atoi(formula + matches[1].rm_so);
        del = formula[matches[2].rm_so];
        pos = matches[0].rm_eo;
    } else {
        parsed = 0;
    }
    /*...*/

When you are done with the regular expression, you should free it (if it was successfully compiled).

regfree(&reg);

这篇关于scanf的格式说明从一组字符阅读零个或多个字符的文章就介绍到这了,希望我们推荐的答案对大家有所帮助,也希望大家多多支持IT屋!

查看全文
登录 关闭
扫码关注1秒登录
发送“验证码”获取 | 15天全站免登陆