如何将此应用程序更改为命令行的可靠输入？ [英] How to change this app to desable input from command line?

查看：70 发布时间：2019/6/8 3:00:45 C++14

本文介绍了如何将此应用程序更改为命令行的可靠输入？的处理方法，对大家解决问题具有一定的参考价值，需要的朋友们下面随着小编来一起学习吧！

问题描述

这是原始代码：

This is the Original code:

#include <stdio.h>
#include <string.h>
#include <assert.h>

#if defined(_WIN32) && !defined(__CYGWIN__)
#include <windows.h>
#else
#include <sys/select.h>
#endif

#include <sphinxbase/err.h>
#include <sphinxbase/ad.h>

#include "pocketsphinx.h"

static const arg_t cont_args_def[] = {
    POCKETSPHINX_OPTIONS,
    /* Argument file. */
    {"-argfile",
     ARG_STRING,
     NULL,
     "Argument file giving extra arguments."},
    {"-adcdev",
     ARG_STRING,
     NULL,
     "Name of audio device to use for input."},
    {"-infile",
     ARG_STRING,
     NULL,
     "Audio file to transcribe."},
    {"-inmic",
     ARG_BOOLEAN,
     "no",
     "Transcribe audio from microphone."},
    {"-time",
     ARG_BOOLEAN,
     "no",
     "Print word times in file transcription."},
    CMDLN_EMPTY_OPTION
};

static ps_decoder_t *ps;
static cmd_ln_t *config;
static FILE *rawfd;

static void
print_word_times()
{
    int frame_rate = cmd_ln_int32_r(config, "-frate");
    ps_seg_t *iter = ps_seg_iter(ps);
    while (iter != NULL) {
        int32 sf, ef, pprob;
        float conf;

        ps_seg_frames(iter, &sf, &ef);
        pprob = ps_seg_prob(iter, NULL, NULL, NULL);
        conf = logmath_exp(ps_get_logmath(ps), pprob);
        printf("%s %.3f %.3f %f\n", ps_seg_word(iter), ((float)sf / frame_rate),
               ((float) ef / frame_rate), conf);
        iter = ps_seg_next(iter);
    }
}

static int
check_wav_header(char *header, int expected_sr)
{
    int sr;

    if (header[34] != 0x10) {
        E_ERROR("Input audio file has [%d] bits per sample instead of 16\n", header[34]);
        return 0;
    }
    if (header[20] != 0x1) {
        E_ERROR("Input audio file has compression [%d] and not required PCM\n", header[20]);
        return 0;
    }
    if (header[22] != 0x1) {
        E_ERROR("Input audio file has [%d] channels, expected single channel mono\n", header[22]);
        return 0;
    }
    sr = ((header[24] & 0xFF) | ((header[25] & 0xFF) << 8) | ((header[26] & 0xFF) << 16) | ((header[27] & 0xFF) << 24));
    if (sr != expected_sr) {
        E_ERROR("Input audio file has sample rate [%d], but decoder expects [%d]\n", sr, expected_sr);
        return 0;
    }
    return 1;
}

/*
 * Continuous recognition from a file
 */
static void
recognize_from_file()
{
    int16 adbuf[2048];
    const char *fname;
    const char *hyp;
    int32 k;
    uint8 utt_started, in_speech;
    int32 print_times = cmd_ln_boolean_r(config, "-time");

    fname = cmd_ln_str_r(config, "-infile");
    if ((rawfd = fopen(fname, "rb")) == NULL) {
        E_FATAL_SYSTEM("Failed to open file '%s' for reading",
                       fname);
    }

    if (strlen(fname) > 4 && strcmp(fname + strlen(fname) - 4, ".wav") == 0) {
        char waveheader[44];
    fread(waveheader, 1, 44, rawfd);
    if (!check_wav_header(waveheader, (int)cmd_ln_float32_r(config, "-samprate")))
            E_FATAL("Failed to process file '%s' due to format mismatch.\n", fname);
    }

    if (strlen(fname) > 4 && strcmp(fname + strlen(fname) - 4, ".mp3") == 0) {
    E_FATAL("Can not decode mp3 files, convert input file to WAV 16kHz 16-bit mono before decoding.\n");
    }

    ps_start_utt(ps);
    utt_started = FALSE;

    while ((k = fread(adbuf, sizeof(int16), 2048, rawfd)) > 0) {
        ps_process_raw(ps, adbuf, k, FALSE, FALSE);
        in_speech = ps_get_in_speech(ps);
        if (in_speech && !utt_started) {
            utt_started = TRUE;
        } 
        if (!in_speech && utt_started) {
            ps_end_utt(ps);
            hyp = ps_get_hyp(ps, NULL);
            if (hyp != NULL)
            printf("%s\n", hyp);
            if (print_times)
            print_word_times();
            fflush(stdout);

            ps_start_utt(ps);
            utt_started = FALSE;
        }
    }
    ps_end_utt(ps);
    if (utt_started) {
        hyp = ps_get_hyp(ps, NULL);
        if (hyp != NULL) {
            printf("%s\n", hyp);
            if (print_times) {
            print_word_times();
        }
    }
    }

    fclose(rawfd);
}

/* Sleep for specified msec */
static void
sleep_msec(int32 ms)
{
#if (defined(_WIN32) && !defined(GNUWINCE)) || defined(_WIN32_WCE)
    Sleep(ms);
#else
    /* ------------------- Unix ------------------ */
    struct timeval tmo;

    tmo.tv_sec = 0;
    tmo.tv_usec = ms * 1000;

    select(0, NULL, NULL, NULL, &tmo);
#endif
}

/*
 * Main utterance processing loop:
 *     for (;;) {
 *        start utterance and wait for speech to process
 *        decoding till end-of-utterance silence will be detected
 *        print utterance result;
 *     }
 */
static void
recognize_from_microphone()
{
    ad_rec_t *ad;
    int16 adbuf[2048];
    uint8 utt_started, in_speech;
    int32 k;
    char const *hyp;

    if ((ad = ad_open_dev(cmd_ln_str_r(config, "-adcdev"),
                          (int) cmd_ln_float32_r(config,
                                                 "-samprate"))) == NULL)
        E_FATAL("Failed to open audio device\n");
    if (ad_start_rec(ad) < 0)
        E_FATAL("Failed to start recording\n");

    if (ps_start_utt(ps) < 0)
        E_FATAL("Failed to start utterance\n");
    utt_started = FALSE;
    E_INFO("Ready....\n");

    for (;;) {
        if ((k = ad_read(ad, adbuf, 2048)) < 0)
            E_FATAL("Failed to read audio\n");
        ps_process_raw(ps, adbuf, k, FALSE, FALSE);
        in_speech = ps_get_in_speech(ps);
        if (in_speech && !utt_started) {
            utt_started = TRUE;
            E_INFO("Listening...\n");
        }
        if (!in_speech && utt_started) {
            /* speech -> silence transition, time to start new utterance  */
            ps_end_utt(ps);
            hyp = ps_get_hyp(ps, NULL );
            if (hyp != NULL) {
                printf("%s\n", hyp);
                fflush(stdout);
            }

            if (ps_start_utt(ps) < 0)
                E_FATAL("Failed to start utterance\n");
            utt_started = FALSE;
            E_INFO("Ready....\n");
        }
        sleep_msec(100);
    }
    ad_close(ad);
}

int
main(int argc, char *argv[])
{
    char const *cfg;

    config = cmd_ln_parse_r(NULL, cont_args_def, argc, argv, TRUE);

    /* Handle argument file as -argfile. */
    if (config && (cfg = cmd_ln_str_r(config, "-argfile")) != NULL) {
        config = cmd_ln_parse_file_r(config, cont_args_def, cfg, FALSE);
    }

    if (config == NULL || (cmd_ln_str_r(config, "-infile") == NULL && cmd_ln_boolean_r(config, "-inmic") == FALSE)) {
    E_INFO("Specify '-infile <file.wav>' to recognize from file or '-inmic yes' to recognize from microphone.\n");
        cmd_ln_free_r(config);
    return 1;
    }

    ps_default_search_args(config);
    ps = ps_init(config);
    if (ps == NULL) {
        cmd_ln_free_r(config);
        return 1;
    }

    E_INFO("%s COMPILED ON: %s, AT: %s\n\n", argv[0], __DATE__, __TIME__);

    if (cmd_ln_str_r(config, "-infile") != NULL) {
        recognize_from_file();
    } else if (cmd_ln_boolean_r(config, "-inmic")) {
        recognize_from_microphone();
    }

    ps_free(ps);
    cmd_ln_free_r(config);

    return 0;
}

#if defined(_WIN32_WCE)
#pragma comment(linker,"/entry:mainWCRTStartup")
#include <windows.h>
//Windows Mobile has the Unicode main only
int
wmain(int32 argc, wchar_t * wargv[])
{
    char **argv;
    size_t wlen;
    size_t len;
    int i;

    argv = malloc(argc * sizeof(char *));
    for (i = 0; i < argc; i++) {
        wlen = lstrlenW(wargv[i]);
        len = wcstombs(NULL, wargv[i], wlen);
        argv[i] = malloc(len + 1);
        wcstombs(argv[i], wargv[i], wlen);
    }

    //assuming ASCII parameters
    return main(argc, argv);
}
#endif

我可以通过这个命令编译它：

I can compile it by this command:

g++ -o output continuous.cpp         -DMODELDIR=\"`pkg-config --variable=modeldir pocketsphinx`\"     `pkg-config --cflags --libs pocketsphinx sphinxbase`

并通过此命令运行它：

And run it by this command :

output -inmic yes

我尝试过：

但我喜欢转换代码，因为它没有必要得到inmic是的，它会自动从麦克风启动程序。但是当我更改这些部分时出现了分段错误（核心转储）错误：

What I have tried:

But I like to convert the code as it has no need to get inmic yes and it automatically starts the program from microphone. But I got segmentation fault(core dumped) error when I changed these parts:

static const arg_t cont_args_def= {"-inmic",
     ARG_BOOLEAN,
     "no",
     "Transcribe audio from microphone."};

int main(int argc, char *argv[])
{
    config = cmd_ln_parse_r(NULL, cont_args_def, argc, argv, TRUE);

 if (cmd_ln_boolean_r(config, "-inmic")) {
        recognize_from_microphone();
    }



   // recognize_from_microphone();
    ps_free(ps);
    cmd_ln_free_r(config);



    return 0;

}

我搜索了很多并重新编写文档，但无法理解问题是什么？< br $>

编辑：我改变了这样的代码：

I searched a lot and red the documentation but couldn't understand what's the problem?

I changed the code like this:

static const arg_t cont_args_def[] = {
    POCKETSPHINX_OPTIONS,

    {"-inmic",
     ARG_BOOLEAN,
     "no",
     "Transcribe audio from microphone."},

    CMDLN_EMPTY_OPTION
};

int main(int argc, char *argv[])
{
    config = cmd_ln_parse_r(NULL, cont_args_def, argc, argv, TRUE);

// if (cmd_ln_boolean_r(config, "-inmic")) {
        recognize_from_microphone();
//    }



   // recognize_from_microphone();
    ps_free(ps);
    cmd_ln_free_r(config);



    return 0;

}

结果是：

But the result is:

Arguments list definition:
[NAME]			[DEFLT]		[DESCR]
-agc			none		Automatic gain control for c0 ('max', 'emax', 'noise', or 'none')
-agcthresh		2.0		Initial threshold for automatic gain control
-allphone				Perform phoneme decoding with phonetic lm
-allphone_ci		no		Perform phoneme decoding with phonetic lm and context-independent units only
-alpha			0.97		Preemphasis parameter
-ascale			20.0		Inverse of acoustic model scale for confidence score calculation
-aw			1		Inverse weight applied to acoustic scores.
-backtrace		no		Print results and backtraces to log.
-beam			1e-48		Beam width applied to every frame in Viterbi search (smaller values mean wider beam)
-bestpath		yes		Run bestpath (Dijkstra) search over word lattice (3rd pass)
-bestpathlw		9.5		Language model probability weight for bestpath search
-ceplen			13		Number of components in the input feature vector
-cmn			live		Cepstral mean normalization scheme ('live', 'batch', or 'none')
-cmninit		40,3,-1		Initial values (comma-separated) for cepstral mean when 'live' is used
-compallsen		no		Compute all senone scores in every frame (can be faster when there are many senones)
-debug					Verbosity level for debugging messages
-dict					Main pronunciation dictionary (lexicon) input file
-dictcase		no		Dictionary is case sensitive (NOTE: case insensitivity applies to ASCII characters only)
-dither			no		Add 1/2-bit noise
-doublebw		no		Use double bandwidth filters (same center freq)
-ds			1		Frame GMM computation downsampling ratio
-fdict					Noise word pronunciation dictionary input file
-feat			1s_c_d_dd	Feature stream type, depends on the acoustic model
-featparams				File containing feature extraction parameters.
-fillprob		1e-8		Filler word transition probability
-frate			100		Frame rate
-fsg					Sphinx format finite state grammar file
-fsgusealtpron		yes		Add alternate pronunciations to FSG
-fsgusefiller		yes		Insert filler words at each state.
-fwdflat		yes		Run forward flat-lexicon search over word lattice (2nd pass)
-fwdflatbeam		1e-64		Beam width applied to every frame in second-pass flat search
-fwdflatefwid		4		Minimum number of end frames for a word to be searched in fwdflat search
-fwdflatlw		8.5		Language model probability weight for flat lexicon (2nd pass) decoding
-fwdflatsfwin		25		Window of frames in lattice to search for successor words in fwdflat search 
-fwdflatwbeam		7e-29		Beam width applied to word exits in second-pass flat search
-fwdtree		yes		Run forward lexicon-tree search (1st pass)
-hmm					Directory containing acoustic model files.
-inmic			no		Transcribe audio from microphone.
-input_endian		little		Endianness of input data, big or little, ignored if NIST or MS Wav
-jsgf					JSGF grammar file
-keyphrase				Keyphrase to spot
-kws					A file with keyphrases to spot, one per line
-kws_delay		10		Delay to wait for best detection score
-kws_plp		1e-1		Phone loop probability for keyphrase spotting
-kws_threshold		1		Threshold for p(hyp)/p(alternatives) ratio
-latsize		5000		Initial backpointer table size
-lda					File containing transformation matrix to be applied to features (single-stream features only)
-ldadim			0		Dimensionality of output of feature transformation (0 to use entire matrix)
-lifter			0		Length of sin-curve for liftering, or 0 for no liftering.
-lm					Word trigram language model input file
-lmctl					Specify a set of language model
-lmname					Which language model in -lmctl to use by default
-logbase		1.0001		Base in which all log-likelihoods calculated
-logfn					File to write log messages in
-logspec		no		Write out logspectral files instead of cepstra
-lowerf			133.33334	Lower edge of filters
-lpbeam			1e-40		Beam width applied to last phone in words
-lponlybeam		7e-29		Beam width applied to last phone in single-phone words
-lw			6.5		Language model probability weight
-maxhmmpf		30000		Maximum number of active HMMs to maintain at each frame (or -1 for no pruning)
-maxwpf			-1		Maximum number of distinct word exits at each frame (or -1 for no pruning)
-mdef					Model definition input file
-mean					Mixture gaussian means input file
-mfclogdir				Directory to log feature files to
-min_endfr		0		Nodes ignored in lattice construction if they persist for fewer than N frames
-mixw					Senone mixture weights input file (uncompressed)
-mixwfloor		0.0000001	Senone mixture weights floor (applied to data from -mixw file)
-mllr					MLLR transformation to apply to means and variances
-mmap			yes		Use memory-mapped I/O (if possible) for model files
-ncep			13		Number of cep coefficients
-nfft			512		Size of FFT
-nfilt			40		Number of filter banks
-nwpen			1.0		New word transition penalty
-pbeam			1e-48		Beam width applied to phone transitions
-pip			1.0		Phone insertion penalty
-pl_beam		1e-10		Beam width applied to phone loop search for lookahead
-pl_pbeam		1e-10		Beam width applied to phone loop transitions for lookahead
-pl_pip			1.0		Phone insertion penalty for phone loop
-pl_weight		3.0		Weight for phoneme lookahead penalties
-pl_window		5		Phoneme lookahead window size, in frames
-rawlogdir				Directory to log raw audio files to
-remove_dc		no		Remove DC offset from each frame
-remove_noise		yes		Remove noise with spectral subtraction in mel-energies
-remove_silence		yes		Enables VAD, removes silence frames from processing
-round_filters		yes		Round mel filter frequencies to DFT points
-samprate		16000		Sampling rate
-seed			-1		Seed for random number generator; if less than zero, pick our own
-sendump				Senone dump (compressed mixture weights) input file
-senlogdir				Directory to log senone score files to
-senmgau				Senone to codebook mapping input file (usually not needed)
-silprob		0.005		Silence word transition probability
-smoothspec		no		Write out cepstral-smoothed logspectral files
-svspec					Subvector specification (e.g., 24,0-11/25,12-23/26-38 or 0-12/13-25/26-38)
-tmat					HMM state transition matrix input file
-tmatfloor		0.0001		HMM state transition probability floor (applied to -tmat file)
-topn			4		Maximum number of top Gaussians to use in scoring.
-topn_beam		0		Beam width used to determine top-N Gaussians (or a list, per-feature)
-toprule				Start rule for JSGF (first public rule is default)
-transform		legacy		Which type of transform to use to calculate cepstra (legacy, dct, or htk)
-unit_area		yes		Normalize mel filters to unit area
-upperf			6855.4976	Upper edge of filters
-uw			1.0		Unigram weight
-vad_postspeech		50		Num of silence frames to keep after from speech to silence.
-vad_prespeech		20		Num of speech frames to keep before silence to speech.
-vad_startspeech	10		Num of speech frames to trigger vad from silence to speech.
-vad_threshold		2.0		Threshold for decision between noise and silence frames. Log-ratio between signal level and noise level.
-var					Mixture gaussian variances input file
-varfloor		0.0001		Mixture gaussian variance floor (applied to data from -var file)
-varnorm		no		Variance normalize each utterance (only if CMN == current)
-verbose		no		Show input filenames
-warp_params				Parameters defining the warping function
-warp_type		inverse_linear	Warping function type (or shape)
-wbeam			7e-29		Beam width applied to word exits
-wip			0.65		Word insertion penalty
-wlen			0.025625	Hamming window length

Segmentation fault (core dumped)

如何将此应用程序更改为命令行的可靠输入？ [英] How to change this app to desable input from command line?

问题描述

推荐答案

相关文章

其他开发语言最新文章

热门教程

热门工具

登录关闭

如何将此应用程序更改为命令行的可靠输入？ [英] How to change this app to desable input from command line?

问题描述

推荐答案

相关文章

其他开发语言最新文章

热门教程

热门工具

登录 关闭

登录关闭