如何将此应用程序更改为命令行的可靠输入? [英] How to change this app to desable input from command line?

查看:70
本文介绍了如何将此应用程序更改为命令行的可靠输入?的处理方法,对大家解决问题具有一定的参考价值,需要的朋友们下面随着小编来一起学习吧!

问题描述

这是原始代码:



This is the Original code:

#include <stdio.h>
#include <string.h>
#include <assert.h>

#if defined(_WIN32) && !defined(__CYGWIN__)
#include <windows.h>
#else
#include <sys/select.h>
#endif

#include <sphinxbase/err.h>
#include <sphinxbase/ad.h>

#include "pocketsphinx.h"

static const arg_t cont_args_def[] = {
    POCKETSPHINX_OPTIONS,
    /* Argument file. */
    {"-argfile",
     ARG_STRING,
     NULL,
     "Argument file giving extra arguments."},
    {"-adcdev",
     ARG_STRING,
     NULL,
     "Name of audio device to use for input."},
    {"-infile",
     ARG_STRING,
     NULL,
     "Audio file to transcribe."},
    {"-inmic",
     ARG_BOOLEAN,
     "no",
     "Transcribe audio from microphone."},
    {"-time",
     ARG_BOOLEAN,
     "no",
     "Print word times in file transcription."},
    CMDLN_EMPTY_OPTION
};

static ps_decoder_t *ps;
static cmd_ln_t *config;
static FILE *rawfd;

static void
print_word_times()
{
    int frame_rate = cmd_ln_int32_r(config, "-frate");
    ps_seg_t *iter = ps_seg_iter(ps);
    while (iter != NULL) {
        int32 sf, ef, pprob;
        float conf;

        ps_seg_frames(iter, &sf, &ef);
        pprob = ps_seg_prob(iter, NULL, NULL, NULL);
        conf = logmath_exp(ps_get_logmath(ps), pprob);
        printf("%s %.3f %.3f %f\n", ps_seg_word(iter), ((float)sf / frame_rate),
               ((float) ef / frame_rate), conf);
        iter = ps_seg_next(iter);
    }
}

static int
check_wav_header(char *header, int expected_sr)
{
    int sr;

    if (header[34] != 0x10) {
        E_ERROR("Input audio file has [%d] bits per sample instead of 16\n", header[34]);
        return 0;
    }
    if (header[20] != 0x1) {
        E_ERROR("Input audio file has compression [%d] and not required PCM\n", header[20]);
        return 0;
    }
    if (header[22] != 0x1) {
        E_ERROR("Input audio file has [%d] channels, expected single channel mono\n", header[22]);
        return 0;
    }
    sr = ((header[24] & 0xFF) | ((header[25] & 0xFF) << 8) | ((header[26] & 0xFF) << 16) | ((header[27] & 0xFF) << 24));
    if (sr != expected_sr) {
        E_ERROR("Input audio file has sample rate [%d], but decoder expects [%d]\n", sr, expected_sr);
        return 0;
    }
    return 1;
}

/*
 * Continuous recognition from a file
 */
static void
recognize_from_file()
{
    int16 adbuf[2048];
    const char *fname;
    const char *hyp;
    int32 k;
    uint8 utt_started, in_speech;
    int32 print_times = cmd_ln_boolean_r(config, "-time");

    fname = cmd_ln_str_r(config, "-infile");
    if ((rawfd = fopen(fname, "rb")) == NULL) {
        E_FATAL_SYSTEM("Failed to open file '%s' for reading",
                       fname);
    }

    if (strlen(fname) > 4 && strcmp(fname + strlen(fname) - 4, ".wav") == 0) {
        char waveheader[44];
    fread(waveheader, 1, 44, rawfd);
    if (!check_wav_header(waveheader, (int)cmd_ln_float32_r(config, "-samprate")))
            E_FATAL("Failed to process file '%s' due to format mismatch.\n", fname);
    }

    if (strlen(fname) > 4 && strcmp(fname + strlen(fname) - 4, ".mp3") == 0) {
    E_FATAL("Can not decode mp3 files, convert input file to WAV 16kHz 16-bit mono before decoding.\n");
    }

    ps_start_utt(ps);
    utt_started = FALSE;

    while ((k = fread(adbuf, sizeof(int16), 2048, rawfd)) > 0) {
        ps_process_raw(ps, adbuf, k, FALSE, FALSE);
        in_speech = ps_get_in_speech(ps);
        if (in_speech && !utt_started) {
            utt_started = TRUE;
        } 
        if (!in_speech && utt_started) {
            ps_end_utt(ps);
            hyp = ps_get_hyp(ps, NULL);
            if (hyp != NULL)
            printf("%s\n", hyp);
            if (print_times)
            print_word_times();
            fflush(stdout);

            ps_start_utt(ps);
            utt_started = FALSE;
        }
    }
    ps_end_utt(ps);
    if (utt_started) {
        hyp = ps_get_hyp(ps, NULL);
        if (hyp != NULL) {
            printf("%s\n", hyp);
            if (print_times) {
            print_word_times();
        }
    }
    }

    fclose(rawfd);
}

/* Sleep for specified msec */
static void
sleep_msec(int32 ms)
{
#if (defined(_WIN32) && !defined(GNUWINCE)) || defined(_WIN32_WCE)
    Sleep(ms);
#else
    /* ------------------- Unix ------------------ */
    struct timeval tmo;

    tmo.tv_sec = 0;
    tmo.tv_usec = ms * 1000;

    select(0, NULL, NULL, NULL, &tmo);
#endif
}

/*
 * Main utterance processing loop:
 *     for (;;) {
 *        start utterance and wait for speech to process
 *        decoding till end-of-utterance silence will be detected
 *        print utterance result;
 *     }
 */
static void
recognize_from_microphone()
{
    ad_rec_t *ad;
    int16 adbuf[2048];
    uint8 utt_started, in_speech;
    int32 k;
    char const *hyp;

    if ((ad = ad_open_dev(cmd_ln_str_r(config, "-adcdev"),
                          (int) cmd_ln_float32_r(config,
                                                 "-samprate"))) == NULL)
        E_FATAL("Failed to open audio device\n");
    if (ad_start_rec(ad) < 0)
        E_FATAL("Failed to start recording\n");

    if (ps_start_utt(ps) < 0)
        E_FATAL("Failed to start utterance\n");
    utt_started = FALSE;
    E_INFO("Ready....\n");

    for (;;) {
        if ((k = ad_read(ad, adbuf, 2048)) < 0)
            E_FATAL("Failed to read audio\n");
        ps_process_raw(ps, adbuf, k, FALSE, FALSE);
        in_speech = ps_get_in_speech(ps);
        if (in_speech && !utt_started) {
            utt_started = TRUE;
            E_INFO("Listening...\n");
        }
        if (!in_speech && utt_started) {
            /* speech -> silence transition, time to start new utterance  */
            ps_end_utt(ps);
            hyp = ps_get_hyp(ps, NULL );
            if (hyp != NULL) {
                printf("%s\n", hyp);
                fflush(stdout);
            }

            if (ps_start_utt(ps) < 0)
                E_FATAL("Failed to start utterance\n");
            utt_started = FALSE;
            E_INFO("Ready....\n");
        }
        sleep_msec(100);
    }
    ad_close(ad);
}

int
main(int argc, char *argv[])
{
    char const *cfg;

    config = cmd_ln_parse_r(NULL, cont_args_def, argc, argv, TRUE);

    /* Handle argument file as -argfile. */
    if (config && (cfg = cmd_ln_str_r(config, "-argfile")) != NULL) {
        config = cmd_ln_parse_file_r(config, cont_args_def, cfg, FALSE);
    }

    if (config == NULL || (cmd_ln_str_r(config, "-infile") == NULL && cmd_ln_boolean_r(config, "-inmic") == FALSE)) {
    E_INFO("Specify '-infile <file.wav>' to recognize from file or '-inmic yes' to recognize from microphone.\n");
        cmd_ln_free_r(config);
    return 1;
    }

    ps_default_search_args(config);
    ps = ps_init(config);
    if (ps == NULL) {
        cmd_ln_free_r(config);
        return 1;
    }

    E_INFO("%s COMPILED ON: %s, AT: %s\n\n", argv[0], __DATE__, __TIME__);

    if (cmd_ln_str_r(config, "-infile") != NULL) {
        recognize_from_file();
    } else if (cmd_ln_boolean_r(config, "-inmic")) {
        recognize_from_microphone();
    }

    ps_free(ps);
    cmd_ln_free_r(config);

    return 0;
}

#if defined(_WIN32_WCE)
#pragma comment(linker,"/entry:mainWCRTStartup")
#include <windows.h>
//Windows Mobile has the Unicode main only
int
wmain(int32 argc, wchar_t * wargv[])
{
    char **argv;
    size_t wlen;
    size_t len;
    int i;

    argv = malloc(argc * sizeof(char *));
    for (i = 0; i < argc; i++) {
        wlen = lstrlenW(wargv[i]);
        len = wcstombs(NULL, wargv[i], wlen);
        argv[i] = malloc(len + 1);
        wcstombs(argv[i], wargv[i], wlen);
    }

    //assuming ASCII parameters
    return main(argc, argv);
}
#endif





我可以通过这个命令编译它:





I can compile it by this command:

g++ -o output continuous.cpp         -DMODELDIR=\"`pkg-config --variable=modeldir pocketsphinx`\"     `pkg-config --cflags --libs pocketsphinx sphinxbase`





并通过此命令运行它:



And run it by this command :

output -inmic yes





我尝试过:



但我喜欢转换代码,因为它没有必要得到inmic是的,它会自动从麦克风启动程序。但是当我更改这些部分时出现了分段错误(核心转储)错误:





What I have tried:

But I like to convert the code as it has no need to get inmic yes and it automatically starts the program from microphone. But I got segmentation fault(core dumped) error when I changed these parts:

static const arg_t cont_args_def= {"-inmic",
     ARG_BOOLEAN,
     "no",
     "Transcribe audio from microphone."};

int main(int argc, char *argv[])
{
    config = cmd_ln_parse_r(NULL, cont_args_def, argc, argv, TRUE);

 if (cmd_ln_boolean_r(config, "-inmic")) {
        recognize_from_microphone();
    }



   // recognize_from_microphone();
    ps_free(ps);
    cmd_ln_free_r(config);



    return 0;

}





我搜索了很多并重新编写文档,但无法理解问题是什么?< br $>






编辑:我改变了这样的代码:





I searched a lot and red the documentation but couldn't understand what's the problem?



I changed the code like this:

static const arg_t cont_args_def[] = {
    POCKETSPHINX_OPTIONS,

    {"-inmic",
     ARG_BOOLEAN,
     "no",
     "Transcribe audio from microphone."},

    CMDLN_EMPTY_OPTION
};







int main(int argc, char *argv[])
{
    config = cmd_ln_parse_r(NULL, cont_args_def, argc, argv, TRUE);

// if (cmd_ln_boolean_r(config, "-inmic")) {
        recognize_from_microphone();
//    }



   // recognize_from_microphone();
    ps_free(ps);
    cmd_ln_free_r(config);



    return 0;

}







结果是:






But the result is:

Arguments list definition:
[NAME]			[DEFLT]		[DESCR]
-agc			none		Automatic gain control for c0 ('max', 'emax', 'noise', or 'none')
-agcthresh		2.0		Initial threshold for automatic gain control
-allphone				Perform phoneme decoding with phonetic lm
-allphone_ci		no		Perform phoneme decoding with phonetic lm and context-independent units only
-alpha			0.97		Preemphasis parameter
-ascale			20.0		Inverse of acoustic model scale for confidence score calculation
-aw			1		Inverse weight applied to acoustic scores.
-backtrace		no		Print results and backtraces to log.
-beam			1e-48		Beam width applied to every frame in Viterbi search (smaller values mean wider beam)
-bestpath		yes		Run bestpath (Dijkstra) search over word lattice (3rd pass)
-bestpathlw		9.5		Language model probability weight for bestpath search
-ceplen			13		Number of components in the input feature vector
-cmn			live		Cepstral mean normalization scheme ('live', 'batch', or 'none')
-cmninit		40,3,-1		Initial values (comma-separated) for cepstral mean when 'live' is used
-compallsen		no		Compute all senone scores in every frame (can be faster when there are many senones)
-debug					Verbosity level for debugging messages
-dict					Main pronunciation dictionary (lexicon) input file
-dictcase		no		Dictionary is case sensitive (NOTE: case insensitivity applies to ASCII characters only)
-dither			no		Add 1/2-bit noise
-doublebw		no		Use double bandwidth filters (same center freq)
-ds			1		Frame GMM computation downsampling ratio
-fdict					Noise word pronunciation dictionary input file
-feat			1s_c_d_dd	Feature stream type, depends on the acoustic model
-featparams				File containing feature extraction parameters.
-fillprob		1e-8		Filler word transition probability
-frate			100		Frame rate
-fsg					Sphinx format finite state grammar file
-fsgusealtpron		yes		Add alternate pronunciations to FSG
-fsgusefiller		yes		Insert filler words at each state.
-fwdflat		yes		Run forward flat-lexicon search over word lattice (2nd pass)
-fwdflatbeam		1e-64		Beam width applied to every frame in second-pass flat search
-fwdflatefwid		4		Minimum number of end frames for a word to be searched in fwdflat search
-fwdflatlw		8.5		Language model probability weight for flat lexicon (2nd pass) decoding
-fwdflatsfwin		25		Window of frames in lattice to search for successor words in fwdflat search 
-fwdflatwbeam		7e-29		Beam width applied to word exits in second-pass flat search
-fwdtree		yes		Run forward lexicon-tree search (1st pass)
-hmm					Directory containing acoustic model files.
-inmic			no		Transcribe audio from microphone.
-input_endian		little		Endianness of input data, big or little, ignored if NIST or MS Wav
-jsgf					JSGF grammar file
-keyphrase				Keyphrase to spot
-kws					A file with keyphrases to spot, one per line
-kws_delay		10		Delay to wait for best detection score
-kws_plp		1e-1		Phone loop probability for keyphrase spotting
-kws_threshold		1		Threshold for p(hyp)/p(alternatives) ratio
-latsize		5000		Initial backpointer table size
-lda					File containing transformation matrix to be applied to features (single-stream features only)
-ldadim			0		Dimensionality of output of feature transformation (0 to use entire matrix)
-lifter			0		Length of sin-curve for liftering, or 0 for no liftering.
-lm					Word trigram language model input file
-lmctl					Specify a set of language model
-lmname					Which language model in -lmctl to use by default
-logbase		1.0001		Base in which all log-likelihoods calculated
-logfn					File to write log messages in
-logspec		no		Write out logspectral files instead of cepstra
-lowerf			133.33334	Lower edge of filters
-lpbeam			1e-40		Beam width applied to last phone in words
-lponlybeam		7e-29		Beam width applied to last phone in single-phone words
-lw			6.5		Language model probability weight
-maxhmmpf		30000		Maximum number of active HMMs to maintain at each frame (or -1 for no pruning)
-maxwpf			-1		Maximum number of distinct word exits at each frame (or -1 for no pruning)
-mdef					Model definition input file
-mean					Mixture gaussian means input file
-mfclogdir				Directory to log feature files to
-min_endfr		0		Nodes ignored in lattice construction if they persist for fewer than N frames
-mixw					Senone mixture weights input file (uncompressed)
-mixwfloor		0.0000001	Senone mixture weights floor (applied to data from -mixw file)
-mllr					MLLR transformation to apply to means and variances
-mmap			yes		Use memory-mapped I/O (if possible) for model files
-ncep			13		Number of cep coefficients
-nfft			512		Size of FFT
-nfilt			40		Number of filter banks
-nwpen			1.0		New word transition penalty
-pbeam			1e-48		Beam width applied to phone transitions
-pip			1.0		Phone insertion penalty
-pl_beam		1e-10		Beam width applied to phone loop search for lookahead
-pl_pbeam		1e-10		Beam width applied to phone loop transitions for lookahead
-pl_pip			1.0		Phone insertion penalty for phone loop
-pl_weight		3.0		Weight for phoneme lookahead penalties
-pl_window		5		Phoneme lookahead window size, in frames
-rawlogdir				Directory to log raw audio files to
-remove_dc		no		Remove DC offset from each frame
-remove_noise		yes		Remove noise with spectral subtraction in mel-energies
-remove_silence		yes		Enables VAD, removes silence frames from processing
-round_filters		yes		Round mel filter frequencies to DFT points
-samprate		16000		Sampling rate
-seed			-1		Seed for random number generator; if less than zero, pick our own
-sendump				Senone dump (compressed mixture weights) input file
-senlogdir				Directory to log senone score files to
-senmgau				Senone to codebook mapping input file (usually not needed)
-silprob		0.005		Silence word transition probability
-smoothspec		no		Write out cepstral-smoothed logspectral files
-svspec					Subvector specification (e.g., 24,0-11/25,12-23/26-38 or 0-12/13-25/26-38)
-tmat					HMM state transition matrix input file
-tmatfloor		0.0001		HMM state transition probability floor (applied to -tmat file)
-topn			4		Maximum number of top Gaussians to use in scoring.
-topn_beam		0		Beam width used to determine top-N Gaussians (or a list, per-feature)
-toprule				Start rule for JSGF (first public rule is default)
-transform		legacy		Which type of transform to use to calculate cepstra (legacy, dct, or htk)
-unit_area		yes		Normalize mel filters to unit area
-upperf			6855.4976	Upper edge of filters
-uw			1.0		Unigram weight
-vad_postspeech		50		Num of silence frames to keep after from speech to silence.
-vad_prespeech		20		Num of speech frames to keep before silence to speech.
-vad_startspeech	10		Num of speech frames to trigger vad from silence to speech.
-vad_threshold		2.0		Threshold for decision between noise and silence frames. Log-ratio between signal level and noise level.
-var					Mixture gaussian variances input file
-varfloor		0.0001		Mixture gaussian variance floor (applied to data from -var file)
-varnorm		no		Variance normalize each utterance (only if CMN == current)
-verbose		no		Show input filenames
-warp_params				Parameters defining the warping function
-warp_type		inverse_linear	Warping function type (or shape)
-wbeam			7e-29		Beam width applied to word exits
-wip			0.65		Word insertion penalty
-wlen			0.025625	Hamming window length

Segmentation fault (core dumped)

推荐答案

arg_t 变量与原始代码进行比较,并阅读 cmd_ln_parse_r()函数或者如果没有文档,请查看实现。



arg_t 必须是具有终止条目的数组,以便函数知道何时停止解析支持的参数:

Compare your arg_t variable with those of the original code and read the documentation of the cmd_ln_parse_r() function or have a look at the implementation if there is no documentation.

arg_t must be an array with a terminating entry so that the function knows when to stop parsing the supported arguments:
// Must be an array!
//static const arg_t cont_args_def= {"-inmic",
static const arg_t cont_args_def[] = {
    {"-inmic",
     ARG_BOOLEAN,
     "no",
     "Transcribe audio from microphone."},
    // This must be always the last array entry
    CMDLN_EMPTY_OPTION
}; 





要始终使用麦克风,请删除 -inmic 和命令列表中的 -infile 选项并始终调用 recogn_from_microphone()



但在此之前,您必须准备命令选项:



To always use the microphone, remove the -inmic and -infile options from the command list and always call recognize_from_microphone().

But before doing so, you have to prepare the command options:

ps_default_search_args(config);
ps = ps_init(config);
if (ps == NULL) {
    cmd_ln_free_r(config);
    return 1;
}
/* Always use microphone */
recognize_from_microphone();



如果你仍然遇到段错,那就是源码在其他地方。



修改现有代码以根据自己的需要进行调整时,了解现有代码的作用至关重要。



例如,如果您不包含上述块,该值为 ps 赋值, ps 是不确定的。然后不要再调用 free(ps)。

[/ EDIT]


If you still get a seg fault, that is sourced somewhere else.

When modifying existing code to adapt it for your own needs, it is essential to understand what the existing code is doing.

If you for example does not include the above block which assigns a value to ps, ps is indeterminate. Then don't call free(ps) too.
[/EDIT]


这篇关于如何将此应用程序更改为命令行的可靠输入?的文章就介绍到这了,希望我们推荐的答案对大家有所帮助,也希望大家多多支持IT屋!

查看全文
登录 关闭
扫码关注1秒登录
发送“验证码”获取 | 15天全站免登陆