程序删除C注释(长签名) [英] Program to remove C comments (long signature)

查看:58
本文介绍了程序删除C注释(长签名)的处理方法,对大家解决问题具有一定的参考价值,需要的朋友们下面随着小编来一起学习吧!

问题描述

这个程序很长。我不是真的想用

详细信息给每个人带来好处,但是它处理了一些奇怪的案例,例如:


/ \

*这是评论* \

/


#define FOO ?? / *这不是评论* /


char * a = / *这是一条评论" \ * /"这是一个字符串" / *"另一条评论* /;


我打算将这个程序作为一个如何编写一种状态的例子来说机器,而不是一个紧密编码的例子,但任何评论都会欢迎。
欢迎。


谢谢,


- 詹姆斯

-

/ *

* cstripc:从C文件中删除评论的AC程序。

*用法:

* cstripc [file [...]]

* cstripc [-t]

*

*'' - t''选项用于测试。它打印一些指向字符串的指针

*与注释字符交错。

* /


#include< stdio。 h>

#include< stdlib.h>

#include< string.h>


/ *** ************** /

/ **** GLOBALS **** /

/ ******* ********** /


static const char * progname;

static int debug_flag;


/ ********************** /

/ ****主程序**** /

/ ********************** /


static void print_usage(void);

static void print_test(void);


static FILE * open_input_file(const char * filename);

static void close_input_file(FILE * Infile);

static void parse_input_file(FILE * infile);


int

main(int argc,char * argv [ ])

{

progname = argv [0];

if(progname == 0){

progname =" cstripc";

}


while(argc> 1){


if((* argv [1]!='' - '')||(strcmp(argv [1]," - ")== 0 )){

休息;

}


if(strcmp(argv [1]," -t")= = 0){

print_test();

exit(0);

}否则if(strcmp(argv [1]," -d")== 0){

debug_flag = 1;

} else {

fprintf(stderr,"%s:无法识别)选项''%s''\ n",

progname,argv [1]);

print_usage();

退出( EXIT_FAILURE);

}


--argc;

++ argv;

}


if(argc< = 1){

parse_input_file(stdin);

退出(0);

}


while(argc> 1){

FILE * infile;


parse_input_file (infile = open_input_file(argv [1]));

close_input_file(infile);


--argc;

+ + argv;

}

}


/ **************** ********** /

/ ****打印使用/测试**** /

/ ************************** /


static const char * usage_string =

"%s:用于从C文件中删除注释的AC程序。\ n"

"用法:\ n"

" %s [file [...]] \ n"

" %s [-t] \ n"

" \ n"

"''-t''选项用于测试。它会打印一些指向字符串的指针\ n"

"它们与注释字符交错。\ n"

;


static void

print_usage(无效)

{

fprintf(stderr,usage_string,progname,progname,progname);

}


static const char * a;

static const char * b;

static const char * c ;


静态无效

print_test(无效)

{

if(a)puts( a);

如果(b)放(b);

如果(c)放(c);

}


/ ******************************* /

/ ****打开/关闭输入文件**** /

/ *************************** **** /


static const char * input_file_name;


静态文件*

open_input_file(const char * filename)

{

FILE * infile;


input_file_name = filename;


if(filename == 0){

返回0;

}


if(strcmp(filename," - ")== 0){

return stdin;

}


infile = fopen(filename," r");

if(infile == 0){

fprintf(stderr," ;%s:无法打开''%s''进行阅读。\ n",

progname,filename);

}

返回infile;

}


static void

close_input_file(FILE * infile)

{

if(infile){

if(infile!= stdin){

if(fclose(infile)== EOF )

fprintf(stderr,"%s,无法关闭''%s''。\ n",

progname,input_file_name);

}其他{

clearerr(stdin);

}

}

}


/ ************************** /

/ **** PARSE INPUT FILE **** /

/ ************************** /


typedef struct scan_state scan_state;

typedef struct scan_context scan_context;

struct scan_context {

scan_state * ss;

char * sbuf;

unsigned sbufsz;

unsigned sbufcnt;

};


struct scan_state {

scan_state *(* scan)( scan_context * ctx,int input);

const char * name;

};


static scan_context initial_scan_context;


静态无效

parse_input_file(文件* infile)

{

int c;

scan_context ctx;


if(infile == 0){

return;

}


ctx = initial_scan_context;


while((c = fgetc(infile))!= EOF){

if(debug_flag) {

fprintf(stderr,"%s \ n",ctx.ss-> name);

}

ctx。 ss = ctx.ss-> scan(& ctx,c);

}

}


/ ** ********************* /

/ ****国家机器**** /

/ *************************** /

/ *

*

***************************************** ********* *************************

*假设输入是语法正确的C.程序。

*

*基本算法是:

*逐个字符扫描:

*处理三字符作为单个字符。

*如果序列没有开始注释,则发出序列。

*否则,

*扫描字符字符:

*将三字符视为单个字符。

*将序列''\\''''\ n''视为无字符。

*如果序列没有结束评论,继续消费。

*否则,发出一个空格,然后循环回到顶部。

* ************************************************* ************************

*

* /

#define SCAN_STATE_DEFINE(name)\

static scan_state * name ## _ func(scan_context * ctx,int input); \

静态scan_state名称## _ state = {name ## _ func,#name}


SCAN_STATE_DEFINE(正常);

SCAN_STATE_DEFINE(normal_maybe_tri_1);

SCAN_STATE_DEFINE(normal_maybe_tri_2);

SCAN_STATE_DEFINE(字符串);

SCAN_STATE_DEFINE(string_maybe_tri_1);

SCAN_STATE_DEFINE(string_maybe_tri_2);

SCAN_STATE_DEFINE(string_maybe_splice);

SCAN_STATE_DEFINE(char);

SCAN_STATE_DEFINE(char_maybe_tri_1);

SCAN_STATE_DEFINE(char_maybe_tri_2);

SCAN_STATE_DEFINE(char_maybe_splice);

SCAN_STATE_DEFINE(斜杠);

SCAN_STATE_DEFINE(slash_maybe_tri_1);

SCAN_STATE_DEFINE(slash_maybe_tri_2);

SCAN_STATE_DEFINE(slash_maybe_splice);

SCAN_STATE_DEFINE(斜杠);

SCAN_STATE_DEFINE(slashslash_maybe_tri_1) ;

SCAN_STATE_DEFINE(slashslash_maybe_tri_2);

SCAN_STATE_DEFINE(slashslash_maybe_splice);

SCAN_STATE _DEFINE(slashsplat);

SCAN_STATE_DEFINE(slashsplat_splat);

SCAN_STATE_DEFINE(slashsplat_splat_maybe_tri_1);

SCAN_STATE_DEFINE(slashsplat_splat_maybe_tri_2);

SCAN_STATE_DEFINE(slashsplat_splat_maybe_splice);


#define SCAN_STATE(名称)(& name ## _ state)


static scan_context initial_scan_context = { SCAN_STATE(正常),0,0,0};


static void sbuf_append_char(scan_context * ctx,int c);

static void sbuf_append_string(scan_context * ctx,char * s);

static void sbuf_clear(scan_context * ctx);

static void sbuf_emit(scan_context * ctx);


static scan_state *

normal_func(scan_context * ctx,int input)

{

开关(输入){

case''?'':sbuf_emit(ctx);

sbuf_append_char(ctx,input);

返回SCAN_STATE(normal_maybe_tri_1);

case''"'':sbuf_emit(ctx);

putc har(输入);

返回SCAN_STATE(字符串);

case''\'''':sbuf_emit(ctx);

putchar(输入);

返回SCAN_STATE(char);

case''/'':sbuf_emit(ctx);

sbuf_append_char(ctx ,输入);

返回SCAN_STATE(斜杠);

默认值:sbuf_emit(ctx);

putchar(输入);

返回SCAN_STATE(正常);

}

}


static scan_state *

normal_maybe_tri_1_func(scan_context * ctx,int input)

{

switch(输入){

case''?'':sbuf_append_char(ctx ,输入);

返回SCAN_STATE(normal_maybe_tri_2);

默认值:sbuf_emit(ctx);

返回SCAN_STATE(正常) - >扫描(ctx,输入);

}

}


static scan_state *

normal_maybe_tri_2_func(scan_context * ctx,int input)

{

开关(输入){

case''?'':putchar(输入);

返回SCAN_STATE(normal_maybe_tri_2);

case''='':

case''('':

case'')'':

case''<'':

case''>'':

case''!'':

case''\'''':

case'' - '':

case ''/'':sbuf_emit(ctx);

putchar(输入);

返回SCAN_STATE(正常);

默认值:sbuf_emit( ctx);

返回SCAN_STATE(正常) - >扫描(ctx,输入);

}

}


static scan_state *

string_func(scan_context * ctx,int input)

{

switch(input){

case''?'':sbuf_emit(ctx);

sbuf_append_char(ctx,input);

返回SCAN_STATE(string_maybe_tri_1);

case''"'':sbuf_emit(ctx);

putchar(输入);

返回SCAN_STATE(正常);

case''\\'':sbuf_emit( ctx);

sbuf_append_char(ctx,input);

返回SCAN_STATE(string_maybe_splice);

默认值:sbuf_emit(ctx);

putchar(输入);

返回SCAN_STATE(字符串);

}

}


static scan_state *

string_maybe_tri_1_func(scan_context * ctx,int input)

{

开关(输入){

case''?'':sbuf_append_char(ctx,input);

返回SCAN_STATE(string_maybe_tri_2);

默认值:sbuf_emit(ctx);

返回SCAN_STATE(字符串) - >扫描(ctx,输入);

}

}


static scan_state *

string_maybe_tri_2_func(scan_context * ctx,int input)

{

开关(输入){

case''?'':putchar(输入);

返回SCAN_STATE(string_maybe_tri_2);

case''/'':sbuf_append_car(ctx,input);

返回SCAN_STATE(string_maybe_splice);

case''='':

case''('':

case'')'':

case''<'':

case''>'':

case''!'':

case''\'''':

case'' - '':sbuf_emit(ctx);

putchar(输入);

返回SCAN_STATE(字符串);

默认值:sbuf_emit (ctx);

返回SCAN_STATE(字符串) - >扫描(ctx,输入);

}

}


static scan_state *

string_maybe_splice_func(scan_context * ctx,int input)

{

switch(input){

case''\ n'':

默认值:sbuf_emit(ctx);

putchar(输入);

返回SCAN_STATE(字符串);

}

}


static scan_state *

char_func (scan_context * ctx,int input)

{

switch(输入){

case''?'':sbuf_emit(ctx);

sbuf_append_char(ctx,input);

返回SCAN_STATE(ch ar_maybe_tri_1);

case''\'''':sbuf_emit(ctx);

putchar(输入);

返回SCAN_STATE(正常);

case''\\'':sbuf_emit(ctx);

sbuf_append_char(ctx,input);

return SCAN_STATE(char_maybe_splice);

默认值:sbuf_emit(ctx);

putchar(输入);

返回SCAN_STATE(char);

}

}


static scan_state *

char_maybe_tri_1_func(scan_context * ctx,int input)

{

开关(输入){

case''?'':sbuf_append_char(ctx,input);

返回SCAN_STATE(char_maybe_tri_2);

默认值:sbuf_emit(ctx);

putchar(输入);

返回SCAN_STATE(字符) - >扫描(ctx,输入);

}

}


static scan_state *

char_maybe_tri_2_func(scan_context * ctx,int input)

{

开关(输入){

case''?'':putch ar(输入);

返回SCAN_STATE(char_maybe_tri_2);

case''/'':sbuf_append_char(ctx,input);

返回SCAN_STATE(char_maybe_splice);

case''='':

case''('':

case'')'':

case''<'':

case''>'':

case''!'':

case''\'''':

case'' - '':sbuf_emit(ctx);

putchar(输入);

返回SCAN_STATE(char);

默认值:sbuf_emit(ctx);

返回SCAN_STATE(char) - > scan(ctx,input);

}

}


static scan_state *

char_maybe_splice_func(scan_context * ctx,int input)

{

开关(输入){

case''\ n'':

默认值:sbuf_emit (ctx);

putchar(输入);

返回SCAN_STATE(字符);

}

}


static scan_state *

slas h_func(scan_context * ctx,int input)

{

switch(输入){

case''?'':sbuf_append_char(ctx,输入);

返回SCAN_STATE(slash_maybe_tri_1);

case''\\'':sbuf_append_char(ctx,input);

返回SCAN_STATE(slash_maybe_splice);

case''/'':sbuf_clear(ctx);

返回SCAN_STATE(斜杠);

case' '*'':sbuf_clear(ctx);

返回SCAN_STATE(slashsplat);

默认值:sbuf_emit(ctx);

返回SCAN_STATE(正常) - >扫描(ctx,输入);

}

}


静态scan_state *

slash_maybe_tri_1_func(scan_context * ctx,int input)

{

开关(输入){

case''?'':return SCAN_STATE(slash_maybe_tri_2);

默认值:sbuf_emit(ctx);

返回SCAN_STATE(正常) - > scan(ctx,input);

}

}


static scan_state *

slash_maybe_tri_2_func(scan_context * ctx,int input)

{

开关(输入){

case''?' ':sbuf_emit(ctx);

sbuf_append_string(ctx," ??");

返回SCAN_STATE(normal_maybe_tri_2);

case ''/'':sbuf_append_char(ctx,''?'');

sbuf_append_char(ctx,input);

返回SCAN_STATE(slash_maybe_splice);

case''='':

case''('':

case'')'':

case ''<'':

case''>'':

case''!'':

case''\\ \\'''':

case'' - '':sbuf_append_char(ctx,''?'');

sbuf_append_char(ctx,input);

sbuf_emit(ctx);

返回SCAN_STATE(正常);

默认值:sbuf_append_char(ctx,''?'');

sbuf_emit(ctx);

返回SCAN_STATE(正常) - >扫描(ctx,输入);

}

} < b r $>

static scan_state *

slash_maybe_splice_func(scan_context * ctx,int input)

{

switch(输入){

case''\ n'':sbuf_append_char(ctx,input);

返回SCAN_STATE(斜杠);

默认:sbuf_emit(ctx);

返回SCAN_STATE(正常) - >扫描(ctx,输入);

}

}


static scan_state *

slashslash_func(scan_context * ctx,int input)

{

/ * UNUSED * / ctx = ctx;

开关(输入){

case''?'':返回SCAN_STATE(slashslash_maybe_tri_1);

case'' \\'':返回SCAN_STATE(slashslash_maybe_splice);

case''\ n'':putchar('''');

putchar(输入) ;

返回SCAN_STATE(正常);

默认值:返回SCAN_STATE(斜杠);

}

}


static scan_state *

slashslash_maybe_tri_1_func(scan_conte xt * ctx,int input)

{

switch(输入){

case''?'':返回SCAN_STATE(slashslash_maybe_tri_2);

默认:返回SCAN_STATE(斜杠) - >扫描(ctx,输入);

}

}

static scan_state *

slashslash_maybe_tri_2_func(scan_context * ctx,int input)

{

switch(input){

case''?'':返回SCAN_STATE(slashslash_maybe_tri_2);

case''/'':返回SCAN_STATE(slashslash_maybe_splice);

case'' ='':

case''('':

case'')'':

case''<'':

case''>'':

case''!'':

case''\'''':

case'' - '':返回SCAN_STATE(斜杠);

默认值:返回SCAN_STATE(斜杠) - > scan(ctx,input);

}

}


静态scan_state *

slashslash_ma ybe_splice_func(scan_context * ctx,int input)

{

switch(输入){

case''\ n'':返回SCAN_STATE (斜杠);

默认:返回SCAN_STATE(斜杠) - >扫描(ctx,输入);

}

}


static scan_state *

slashsplat_func(scan_context * ctx,int input)

{

/ * UNUSED * / ctx = ctx;

开关(输入){

case''*'':返回SCAN_STATE(slashsplat_splat);

默认值:返回SCAN_STATE(slashsplat);

}

}


static scan_state *

slashsplat_splat_func(scan_context * ctx,int input)

{

switch(输入){

case''?'':返回SCAN_STATE(slashsplat_splat_maybe_tri_1);

case''\\'':返回SCAN_STATE(slashsplat_splat_maybe_splice);

case''/'':putchar('''');

返回SCAN_STATE(正常);

默认值:返回SCAN_STATE(slashsplat) - > scan(ctx,输入);

}

}


static scan_state *

slashsplat_splat_maybe_tri_1_func(scan_context * ctx,int input)

{

开关(输入){

case'' ?'':返回SCAN_STATE(slashsplat_splat_maybe_tri_2);

默认值:返回SCAN_STATE(slashsplat) - > scan(ctx,input);

}

}


static scan_state *

slashsplat_splat_maybe_tri_2_func(scan_context * ctx,int input)

{

开关(输入){

case''/'':返回SCAN_STATE(slashsplat_splat_maybe_splice);

case''='':

case''('':

case'')'':

case''<'':

case'' >'':

case''!'':

case''\'''':

case'' - '':返回SCAN_STATE(slashsplat);

默认值:返回SCAN_ST ATE(slashsplat) - > scan(ctx,input);

}

}


static scan_state *

slashsplat_splat_maybe_splice_func(scan_context * ctx,int input)

{

开关(输入){

case''\ n '':返回SCAN_STATE(slashsplat_splat);

默认值:返回SCAN_STATE(slashsplat) - > scan(ctx,input);

}

}


/ ************************* /

/ **** BUFFER HANDLING **** /

/ ************************* /


static void

sbuf_append_char(scan_context * ctx,int c)

{

if(ctx-> sbuf == 0){

ctx-> sbuf = malloc(ctx-> sbufsz = 128);

}否则if(ctx-> sbufcnt == ctx-> sbufsz){

char * p = realloc(ctx-> sbuf,ctx-> sbufsz * = 2);

if(p == 0){

fprintf(stderr,"%s:内存分配失败\ n",progname);

退出(EXIT_FAILURE);

}

ctx-> sbuf = p;

}


ctx-> sbuf [ctx-> sbufcnt ++] = c;

ctx-> sbuf [ctx-> sbufcnt] =''\ 0'';

}


静态无效

sbuf_append_string(scan_context * ctx,char * s)

{

while(* s!=''\'''){

sbuf_append_char(ctx,* s ++);

}

}


静态无效

sbuf_clear(scan_context * ctx)

{

ctx-> sbufcnt = 0;

if(ctx-> sbuf){

ctx-> sbuf [ctx-> sbufcnt] =''\ 0'';

}

}


静态无效

sbuf_emit(scan_context * ctx)

{

if(ctx-> sbuf = = 0 || ctx-> sbufcnt == 0){

返回;

}


printf("%s",ctx - > sbuf);

sbuf_clear(ctx);

}


/ ********* *********** /

/ ****测试案例**** /

/ ********* *********** /


/ *评论* /

/ \

*评论分开* /

/ \

\

*评论分两次* /

/ *

块评论

* /

/ *评论,尾随分隔符拆分* \

/

/ *评论,尾随分隔符拆分两次* \

\

/

/ *评论,尾随分隔符拆分一次,再次按三字形* \

?? /

/


static const char * a = / *代码行中的注释* /" Hello," / ** /" World!" ;;

static const char * b = / \

*对代码行拆分的评论* /Hello," / \

\

*代码行拆分两次评论* /" World!" ;;


#定义FOO ?? / *这不会开始评论* /


#if定义(__ STDC__)&& (__STDC__ == 1)

#if定义(__ STD_VERSION__)&& (__STD_VERSION__> = 199901L)

// ***更多测试案例*** //

/ \

/ //评论分割

/ \

\

/ //评论拆分两次

static const char * c = // //对代码行发表评论

" Hello," / \

/ //对代码行拆分的评论

" World!" / \

\

/ //对代码行的评论拆分两次。

;


#define BAR ?? //这不会发表评论


//这是一条//评论\

两行


#else

static const char * c =" STDC without STD_VERSION&qu​​ot ;;

#endif

#endif

解决方案

James Hu写道:

这个程序很长。我不是真的想用
细节来讨论每个人,但是它处理的是一些奇怪的案例:
^^^^^

< rant>

很奇怪这个单词是我来过的最拼错的单词之一

跨越:)。

< / rant>

#define FOO ?? / *这不是评论* /




为什么这不是评论?我一直想知道

评论在定义中的行为。如果它真的成为了定义的一部分,那么在这里省略它就不会像在FOO

被替换的所有地方省略它一样吗? br />

感谢您的信息,


-

Martijn
http://www.sereneconcepts.nl


Martijn< su*********************@hotnofiltermail.com>潦草地写道:

James Hu写道:

这个程序很长。我不是真的想用
细节给每个人带来烦恼,但是它处理了一些奇怪的案例,例如:^^^^^
< rant>
奇怪这个词怎么一个大多数拼写错误的单词我来过了。
< / rant>


#define FOO ?? / *这不是评论* /



为什么这不是评论?我一直想知道
评论在定义中的行为。如果它真的成为定义的一部分,那么在这里省略它不一样就像在FOO
被替换的所有地方省略它一样吗?




这不是评论,因为?? /形成一个三字形,扩展为一个\

(反斜杠)。


-

/ - Joona Palaste(pa*****@cc.helsinki.fi)-------------芬兰----- --- \

\ - http:// www.helsinki.fi/~palaste ---------------------规则! -------- /

计算机程序执行你告诉它要做的事情,而不是你想要它做什么。

- Anon


" Martijn" <苏********************* @ hotNOFILTERmail.com>写道:

James Hu写道:
< snip>

#define FOO ?? / * this不是评论* /



为什么这不是评论?我总是想知道
评论在定义中的行为。




问题不是在#define指令后发表评论,

问题是/ ??是一个三元组序列,将被C预处理器/先前/评论剥离的反斜杠替换为


如果真的成为其中的一部分定义,是不是在这里省略它与在FOO
被替换的所有地方省略它不一样?


Mu。注释将被每个单独的空格替换,/之前/

#define指令由预处理器处理。 (否则它将无法''注释''预处理器指令。)


有关翻译阶段的更多信息,我建议阅读
5.1.1.2。

感谢您的信息,




HTH

问候

-

Irrwahn

(ir*******@freenet.de)


This program is long. I don''t really want to bore everyone with the
details, but it handles wierd cases like:

/\
* this is a comment *\
/

#define FOO ??/* this is not a comment */

char *a = /* this is a comment "\*/"this is a string"/*" another comment */;

I intend this program to be an example of how to write a kind of state
machine, not really an example of tight coding, but any comments would
be welcome.

Thanks,

-- James
--
/*
* cstripc: A C program to strip comments from C files.
* Usage:
* cstripc [file [...]]
* cstripc [-t]
*
* The ''-t'' options is used for testing. It prints some pointers to strings
* that are interlaced with comment characters.
*/

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

/*****************/
/**** GLOBALS ****/
/*****************/

static const char *progname;
static int debug_flag;

/**********************/
/**** MAIN PROGRAM ****/
/**********************/

static void print_usage(void);
static void print_test(void);

static FILE * open_input_file(const char *filename);
static void close_input_file(FILE *infile);
static void parse_input_file(FILE *infile);

int
main(int argc, char *argv[])
{
progname = argv[0];
if (progname == 0) {
progname = "cstripc";
}

while (argc > 1) {

if ((*argv[1] != ''-'') || (strcmp(argv[1], "-") == 0)) {
break;
}

if (strcmp(argv[1], "-t") == 0) {
print_test();
exit(0);
} else if (strcmp(argv[1], "-d") == 0) {
debug_flag = 1;
} else {
fprintf(stderr, "%s: Unrecognized option ''%s''\n",
progname, argv[1]);
print_usage();
exit(EXIT_FAILURE);
}

--argc;
++argv;
}

if (argc <= 1) {
parse_input_file(stdin);
exit(0);
}

while (argc > 1) {
FILE *infile;

parse_input_file(infile = open_input_file(argv[1]));
close_input_file(infile);

--argc;
++argv;
}
}

/**************************/
/**** PRINT USAGE/TEST ****/
/**************************/

static const char *usage_string =
"%s: A C program to strip comments from C files.\n"
"Usage:\n"
" %s [file [...]]\n"
" %s [-t]\n"
"\n"
"The ''-t'' options is used for testing. It prints some pointers to strings\n"
"that are interlaced with comment characters.\n"
;

static void
print_usage(void)
{
fprintf(stderr, usage_string, progname, progname, progname);
}

static const char *a;
static const char *b;
static const char *c;

static void
print_test(void)
{
if (a) puts(a);
if (b) puts(b);
if (c) puts(c);
}

/*******************************/
/**** OPEN/CLOSE INPUT FILE ****/
/*******************************/

static const char *input_file_name;

static FILE *
open_input_file(const char *filename)
{
FILE *infile;

input_file_name = filename;

if (filename == 0) {
return 0;
}

if (strcmp(filename, "-") == 0) {
return stdin;
}

infile = fopen(filename, "r");
if (infile == 0) {
fprintf(stderr, "%s: Could not open ''%s'' for reading.\n",
progname, filename);
}

return infile;
}

static void
close_input_file(FILE *infile)
{
if (infile) {
if (infile != stdin) {
if (fclose(infile) == EOF)
fprintf(stderr, "%s, Could not close ''%s''.\n",
progname, input_file_name);
} else {
clearerr(stdin);
}
}
}

/**************************/
/**** PARSE INPUT FILE ****/
/**************************/

typedef struct scan_state scan_state;
typedef struct scan_context scan_context;

struct scan_context {
scan_state *ss;
char *sbuf;
unsigned sbufsz;
unsigned sbufcnt;
};

struct scan_state {
scan_state *(*scan)(scan_context *ctx, int input);
const char *name;
};

static scan_context initial_scan_context;

static void
parse_input_file(FILE *infile)
{
int c;
scan_context ctx;

if (infile == 0) {
return;
}

ctx = initial_scan_context;

while ((c = fgetc(infile)) != EOF) {
if (debug_flag) {
fprintf(stderr, "%s\n", ctx.ss->name);
}
ctx.ss = ctx.ss->scan(&ctx, c);
}
}

/***********************/
/**** STATE MACHINE ****/
/***********************/

/*
*
************************************************** *************************
* Assume input is a syntactically correct C program.
*
* The basic algorithm is:
* Scan character by character:
* Treat trigraphs as a single character.
* If the sequence does not start a comment, emit the sequence.
* Otherwise,
* Scan character by character:
* Treat trigraphs as a single character.
* Treat the sequence ''\\'' ''\n'' as no character.
* If the sequence does not end a comment, continue consuming.
* Otherwise, emit a space, and loop back to top.
************************************************** *************************
*
*/

#define SCAN_STATE_DEFINE(name) \
static scan_state * name##_func(scan_context *ctx, int input); \
static scan_state name##_state = { name##_func, #name }

SCAN_STATE_DEFINE(normal);
SCAN_STATE_DEFINE(normal_maybe_tri_1);
SCAN_STATE_DEFINE(normal_maybe_tri_2);
SCAN_STATE_DEFINE(string);
SCAN_STATE_DEFINE(string_maybe_tri_1);
SCAN_STATE_DEFINE(string_maybe_tri_2);
SCAN_STATE_DEFINE(string_maybe_splice);
SCAN_STATE_DEFINE(char);
SCAN_STATE_DEFINE(char_maybe_tri_1);
SCAN_STATE_DEFINE(char_maybe_tri_2);
SCAN_STATE_DEFINE(char_maybe_splice);
SCAN_STATE_DEFINE(slash);
SCAN_STATE_DEFINE(slash_maybe_tri_1);
SCAN_STATE_DEFINE(slash_maybe_tri_2);
SCAN_STATE_DEFINE(slash_maybe_splice);
SCAN_STATE_DEFINE(slashslash);
SCAN_STATE_DEFINE(slashslash_maybe_tri_1);
SCAN_STATE_DEFINE(slashslash_maybe_tri_2);
SCAN_STATE_DEFINE(slashslash_maybe_splice);
SCAN_STATE_DEFINE(slashsplat);
SCAN_STATE_DEFINE(slashsplat_splat);
SCAN_STATE_DEFINE(slashsplat_splat_maybe_tri_1);
SCAN_STATE_DEFINE(slashsplat_splat_maybe_tri_2);
SCAN_STATE_DEFINE(slashsplat_splat_maybe_splice);

#define SCAN_STATE(name) (&name##_state)

static scan_context initial_scan_context = { SCAN_STATE(normal), 0, 0, 0 };

static void sbuf_append_char(scan_context *ctx, int c);
static void sbuf_append_string(scan_context *ctx, char *s);
static void sbuf_clear(scan_context *ctx);
static void sbuf_emit(scan_context *ctx);

static scan_state *
normal_func(scan_context *ctx, int input)
{
switch (input) {
case ''?'': sbuf_emit(ctx);
sbuf_append_char(ctx, input);
return SCAN_STATE(normal_maybe_tri_1);
case ''"'': sbuf_emit(ctx);
putchar(input);
return SCAN_STATE(string);
case ''\'''': sbuf_emit(ctx);
putchar(input);
return SCAN_STATE(char);
case ''/'': sbuf_emit(ctx);
sbuf_append_char(ctx, input);
return SCAN_STATE(slash);
default: sbuf_emit(ctx);
putchar(input);
return SCAN_STATE(normal);
}
}

static scan_state *
normal_maybe_tri_1_func(scan_context *ctx, int input)
{
switch (input) {
case ''?'': sbuf_append_char(ctx, input);
return SCAN_STATE(normal_maybe_tri_2);
default: sbuf_emit(ctx);
return SCAN_STATE(normal)->scan(ctx, input);
}
}

static scan_state *
normal_maybe_tri_2_func(scan_context *ctx, int input)
{
switch (input) {
case ''?'': putchar(input);
return SCAN_STATE(normal_maybe_tri_2);
case ''='':
case ''('':
case '')'':
case ''<'':
case ''>'':
case ''!'':
case ''\'''':
case ''-'':
case ''/'': sbuf_emit(ctx);
putchar(input);
return SCAN_STATE(normal);
default: sbuf_emit(ctx);
return SCAN_STATE(normal)->scan(ctx, input);
}
}

static scan_state *
string_func(scan_context *ctx, int input)
{
switch (input) {
case ''?'': sbuf_emit(ctx);
sbuf_append_char(ctx, input);
return SCAN_STATE(string_maybe_tri_1);
case ''"'': sbuf_emit(ctx);
putchar(input);
return SCAN_STATE(normal);
case ''\\'': sbuf_emit(ctx);
sbuf_append_char(ctx, input);
return SCAN_STATE(string_maybe_splice);
default: sbuf_emit(ctx);
putchar(input);
return SCAN_STATE(string);
}
}

static scan_state *
string_maybe_tri_1_func(scan_context *ctx, int input)
{
switch (input) {
case ''?'': sbuf_append_char(ctx, input);
return SCAN_STATE(string_maybe_tri_2);
default: sbuf_emit(ctx);
return SCAN_STATE(string)->scan(ctx, input);
}
}

static scan_state *
string_maybe_tri_2_func(scan_context *ctx, int input)
{
switch (input) {
case ''?'': putchar(input);
return SCAN_STATE(string_maybe_tri_2);
case ''/'': sbuf_append_car(ctx, input);
return SCAN_STATE(string_maybe_splice);
case ''='':
case ''('':
case '')'':
case ''<'':
case ''>'':
case ''!'':
case ''\'''':
case ''-'': sbuf_emit(ctx);
putchar(input);
return SCAN_STATE(string);
default: sbuf_emit(ctx);
return SCAN_STATE(string)->scan(ctx, input);
}
}

static scan_state *
string_maybe_splice_func(scan_context *ctx, int input)
{
switch (input) {
case ''\n'':
default: sbuf_emit(ctx);
putchar(input);
return SCAN_STATE(string);
}
}

static scan_state *
char_func(scan_context *ctx, int input)
{
switch (input) {
case ''?'': sbuf_emit(ctx);
sbuf_append_char(ctx, input);
return SCAN_STATE(char_maybe_tri_1);
case ''\'''': sbuf_emit(ctx);
putchar(input);
return SCAN_STATE(normal);
case ''\\'': sbuf_emit(ctx);
sbuf_append_char(ctx, input);
return SCAN_STATE(char_maybe_splice);
default: sbuf_emit(ctx);
putchar(input);
return SCAN_STATE(char);
}
}

static scan_state *
char_maybe_tri_1_func(scan_context *ctx, int input)
{
switch (input) {
case ''?'': sbuf_append_char(ctx, input);
return SCAN_STATE(char_maybe_tri_2);
default: sbuf_emit(ctx);
putchar(input);
return SCAN_STATE(char)->scan(ctx, input);
}
}

static scan_state *
char_maybe_tri_2_func(scan_context *ctx, int input)
{
switch (input) {
case ''?'': putchar(input);
return SCAN_STATE(char_maybe_tri_2);
case ''/'': sbuf_append_char(ctx, input);
return SCAN_STATE(char_maybe_splice);
case ''='':
case ''('':
case '')'':
case ''<'':
case ''>'':
case ''!'':
case ''\'''':
case ''-'': sbuf_emit(ctx);
putchar(input);
return SCAN_STATE(char);
default: sbuf_emit(ctx);
return SCAN_STATE(char)->scan(ctx, input);
}
}

static scan_state *
char_maybe_splice_func(scan_context *ctx, int input)
{
switch (input) {
case ''\n'':
default: sbuf_emit(ctx);
putchar(input);
return SCAN_STATE(char);
}
}

static scan_state *
slash_func(scan_context *ctx, int input)
{
switch (input) {
case ''?'': sbuf_append_char(ctx, input);
return SCAN_STATE(slash_maybe_tri_1);
case ''\\'': sbuf_append_char(ctx, input);
return SCAN_STATE(slash_maybe_splice);
case ''/'': sbuf_clear(ctx);
return SCAN_STATE(slashslash);
case ''*'': sbuf_clear(ctx);
return SCAN_STATE(slashsplat);
default: sbuf_emit(ctx);
return SCAN_STATE(normal)->scan(ctx, input);
}
}

static scan_state *
slash_maybe_tri_1_func(scan_context *ctx, int input)
{
switch (input) {
case ''?'': return SCAN_STATE(slash_maybe_tri_2);
default: sbuf_emit(ctx);
return SCAN_STATE(normal)->scan(ctx, input);
}
}

static scan_state *
slash_maybe_tri_2_func(scan_context *ctx, int input)
{
switch (input) {
case ''?'': sbuf_emit(ctx);
sbuf_append_string(ctx, "??");
return SCAN_STATE(normal_maybe_tri_2);
case ''/'': sbuf_append_char(ctx, ''?'');
sbuf_append_char(ctx, input);
return SCAN_STATE(slash_maybe_splice);
case ''='':
case ''('':
case '')'':
case ''<'':
case ''>'':
case ''!'':
case ''\'''':
case ''-'': sbuf_append_char(ctx, ''?'');
sbuf_append_char(ctx, input);
sbuf_emit(ctx);
return SCAN_STATE(normal);
default: sbuf_append_char(ctx, ''?'');
sbuf_emit(ctx);
return SCAN_STATE(normal)->scan(ctx, input);
}
}

static scan_state *
slash_maybe_splice_func(scan_context *ctx, int input)
{
switch (input) {
case ''\n'': sbuf_append_char(ctx, input);
return SCAN_STATE(slash);
default: sbuf_emit(ctx);
return SCAN_STATE(normal)->scan(ctx, input);
}
}

static scan_state *
slashslash_func(scan_context *ctx, int input)
{
/* UNUSED */ ctx = ctx;
switch (input) {
case ''?'': return SCAN_STATE(slashslash_maybe_tri_1);
case ''\\'': return SCAN_STATE(slashslash_maybe_splice);
case ''\n'': putchar('' '');
putchar(input);
return SCAN_STATE(normal);
default: return SCAN_STATE(slashslash);
}
}

static scan_state *
slashslash_maybe_tri_1_func(scan_context *ctx, int input)
{
switch (input) {
case ''?'': return SCAN_STATE(slashslash_maybe_tri_2);
default: return SCAN_STATE(slashslash)->scan(ctx, input);
}
}

static scan_state *
slashslash_maybe_tri_2_func(scan_context *ctx, int input)
{
switch (input) {
case ''?'': return SCAN_STATE(slashslash_maybe_tri_2);
case ''/'': return SCAN_STATE(slashslash_maybe_splice);
case ''='':
case ''('':
case '')'':
case ''<'':
case ''>'':
case ''!'':
case ''\'''':
case ''-'': return SCAN_STATE(slashslash);
default: return SCAN_STATE(slashslash)->scan(ctx, input);
}
}

static scan_state *
slashslash_maybe_splice_func(scan_context *ctx, int input)
{
switch (input) {
case ''\n'': return SCAN_STATE(slashslash);
default: return SCAN_STATE(slashslash)->scan(ctx, input);
}
}

static scan_state *
slashsplat_func(scan_context *ctx, int input)
{
/* UNUSED */ ctx = ctx;
switch (input) {
case ''*'': return SCAN_STATE(slashsplat_splat);
default: return SCAN_STATE(slashsplat);
}
}

static scan_state *
slashsplat_splat_func(scan_context *ctx, int input)
{
switch (input) {
case ''?'': return SCAN_STATE(slashsplat_splat_maybe_tri_1);
case ''\\'': return SCAN_STATE(slashsplat_splat_maybe_splice);
case ''/'': putchar('' '');
return SCAN_STATE(normal);
default: return SCAN_STATE(slashsplat)->scan(ctx, input);
}
}

static scan_state *
slashsplat_splat_maybe_tri_1_func(scan_context *ctx, int input)
{
switch (input) {
case ''?'': return SCAN_STATE(slashsplat_splat_maybe_tri_2);
default: return SCAN_STATE(slashsplat)->scan(ctx, input);
}
}

static scan_state *
slashsplat_splat_maybe_tri_2_func(scan_context *ctx, int input)
{
switch (input) {
case ''/'': return SCAN_STATE(slashsplat_splat_maybe_splice);
case ''='':
case ''('':
case '')'':
case ''<'':
case ''>'':
case ''!'':
case ''\'''':
case ''-'': return SCAN_STATE(slashsplat);
default: return SCAN_STATE(slashsplat)->scan(ctx, input);
}
}

static scan_state *
slashsplat_splat_maybe_splice_func(scan_context *ctx, int input)
{
switch (input) {
case ''\n'': return SCAN_STATE(slashsplat_splat);
default: return SCAN_STATE(slashsplat)->scan(ctx, input);
}
}

/*************************/
/**** BUFFER HANDLING ****/
/*************************/

static void
sbuf_append_char(scan_context *ctx, int c)
{
if (ctx->sbuf == 0) {
ctx->sbuf = malloc(ctx->sbufsz = 128);
} else if (ctx->sbufcnt == ctx->sbufsz) {
char *p = realloc(ctx->sbuf, ctx->sbufsz *= 2);
if (p == 0) {
fprintf(stderr, "%s: memory allocation failure\n", progname);
exit(EXIT_FAILURE);
}
ctx->sbuf = p;
}

ctx->sbuf[ctx->sbufcnt++] = c;
ctx->sbuf[ctx->sbufcnt] = ''\0'';
}

static void
sbuf_append_string(scan_context *ctx, char *s)
{
while (*s != ''\0'') {
sbuf_append_char(ctx, *s++);
}
}

static void
sbuf_clear(scan_context *ctx)
{
ctx->sbufcnt = 0;
if (ctx->sbuf) {
ctx->sbuf[ctx->sbufcnt] = ''\0'';
}
}

static void
sbuf_emit(scan_context *ctx)
{
if (ctx->sbuf == 0 || ctx->sbufcnt == 0) {
return;
}

printf("%s", ctx->sbuf);
sbuf_clear(ctx);
}

/********************/
/**** TEST CASES ****/
/********************/

/* a comment */
/\
* a comment split */
/\
\
* a comment split twice */
/*
block comment
*/
/* comment, trailing delimiter split *\
/
/* comment, trailing delimiter split twice *\
\
/
/* comment, trailing delimiter split once, and again by trigraph *\
??/
/

static const char *a = /* comment in code line "*/"Hello, "/**/"World!";
static const char *b = /\
* comment on code line split */ "Hello, " /\
\
* comment on code line split twice */ "World!";

#define FOO ??/* this does not start a comment */

#if defined(__STDC__) && (__STDC__ == 1)
#if defined(__STD_VERSION__) && (__STD_VERSION__ >= 199901L)
//*** MORE TEST CASES ***//
/\
/ // comment split
/\
\
/ // comment split twice
static const char *c = // // comment on code line
"Hello, " /\
/ // comment on code line split
"World!" /\
\
/ // comment on code line split twice.
;

#define BAR ??// this does not start a comment

// This is a // comment \
on two lines

#else
static const char *c = "STDC without STD_VERSION";
#endif
#endif

解决方案

James Hu wrote:

This program is long. I don''t really want to bore everyone with the
details, but it handles wierd cases like: ^^^^^
<rant>
Weird how this word is one of the most misspelled words I have ever come
across :) .
</rant>
#define FOO ??/* this is not a comment */



Why is this not a comment? I have always wondered about the behaviour of
comments in "defines". And if it really becomes part of the define, isn''t
ommiting it here not the same as ommiting it in all the places where FOO
would be replaced?

Thanks for the info,

--
Martijn
http://www.sereneconcepts.nl


Martijn <su*********************@hotnofiltermail.com> scribbled the following:

James Hu wrote:

This program is long. I don''t really want to bore everyone with the
details, but it handles wierd cases like: ^^^^^
<rant>
Weird how this word is one of the most misspelled words I have ever come
across :) .
</rant>


#define FOO ??/* this is not a comment */


Why is this not a comment? I have always wondered about the behaviour of
comments in "defines". And if it really becomes part of the define, isn''t
ommiting it here not the same as ommiting it in all the places where FOO
would be replaced?



It''s not a comment because the ??/ forms a trigraph, expanding into a \
(backslash).

--
/-- Joona Palaste (pa*****@cc.helsinki.fi) ------------- Finland --------\
\-- http://www.helsinki.fi/~palaste --------------------- rules! --------/
"A computer program does what you tell it to do, not what you want it to do."
- Anon


"Martijn" <su*********************@hotNOFILTERmail.com> wrote:

James Hu wrote: <snip>

#define FOO ??/* this is not a comment */



Why is this not a comment? I have always wondered about the behaviour of
comments in "defines".



The problem is not to have a comment after a #define directive, the
"problem" is that ??/ is a trigraph sequence that will be replaced
by a backslash by the C preprocessor /prior/ to comment stripping.
And if it really becomes part of the define, isn''t
ommiting it here not the same as ommiting it in all the places where FOO
would be replaced?
Mu. Comments will be replaced by a single blank each, /before/ the
#define directive is handled by the preprocessor. (Otherwise it would
be impossible to ''comment out'' preprocessor directives.)

For more info about translation phases I suggest to read section
5.1.1.2 in the C99 Standard.
Thanks for the info,



HTH
Regards
--
Irrwahn
(ir*******@freenet.de)


这篇关于程序删除C注释(长签名)的文章就介绍到这了,希望我们推荐的答案对大家有所帮助,也希望大家多多支持IT屋!

查看全文
登录 关闭
扫码关注1秒登录
发送“验证码”获取 | 15天全站免登陆