PocketSphinx 5prealpha
phone_loop_search.c
1/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */
2/* ====================================================================
3 * Copyright (c) 2008 Carnegie Mellon University. All rights
4 * reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 *
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 *
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
16 * distribution.
17 *
18 * This work was supported in part by funding from the Defense Advanced
19 * Research Projects Agency and the National Science Foundation of the
20 * United States of America, and the CMU Sphinx Speech Consortium.
21 *
22 * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND
23 * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
24 * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
25 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY
26 * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
27 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
28 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
29 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
30 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
31 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
32 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33 *
34 * ====================================================================
35 *
36 */
37
42#include <sphinxbase/err.h>
43
44#include "phone_loop_search.h"
45
46static int phone_loop_search_start(ps_search_t *search);
47static int phone_loop_search_step(ps_search_t *search, int frame_idx);
48static int phone_loop_search_finish(ps_search_t *search);
49static int phone_loop_search_reinit(ps_search_t *search, dict_t *dict, dict2pid_t *d2p);
50static void phone_loop_search_free(ps_search_t *search);
51static char const *phone_loop_search_hyp(ps_search_t *search, int32 *out_score);
52static int32 phone_loop_search_prob(ps_search_t *search);
53static ps_seg_t *phone_loop_search_seg_iter(ps_search_t *search);
54
55static ps_searchfuncs_t phone_loop_search_funcs = {
56 /* start: */ phone_loop_search_start,
57 /* step: */ phone_loop_search_step,
58 /* finish: */ phone_loop_search_finish,
59 /* reinit: */ phone_loop_search_reinit,
60 /* free: */ phone_loop_search_free,
61 /* lattice: */ NULL,
62 /* hyp: */ phone_loop_search_hyp,
63 /* prob: */ phone_loop_search_prob,
64 /* seg_iter: */ phone_loop_search_seg_iter,
65};
66
67static int
68phone_loop_search_reinit(ps_search_t *search, dict_t *dict, dict2pid_t *d2p)
69{
71 cmd_ln_t *config = ps_search_config(search);
72 acmod_t *acmod = ps_search_acmod(search);
73 int i;
74
75 /* Free old dict2pid, dict, if necessary. */
76 ps_search_base_reinit(search, dict, d2p);
77
78 /* Initialize HMM context. */
79 if (pls->hmmctx)
81 pls->hmmctx = hmm_context_init(bin_mdef_n_emit_state(acmod->mdef),
82 acmod->tmat->tp, NULL, acmod->mdef->sseq);
83 if (pls->hmmctx == NULL)
84 return -1;
85
86 /* Initialize penalty storage */
87 pls->n_phones = bin_mdef_n_ciphone(acmod->mdef);
88 pls->window = cmd_ln_int32_r(config, "-pl_window");
89 if (pls->penalties)
90 ckd_free(pls->penalties);
91 pls->penalties = (int32 *)ckd_calloc(pls->n_phones, sizeof(*pls->penalties));
92 if (pls->pen_buf)
93 ckd_free_2d(pls->pen_buf);
94 pls->pen_buf = (int32 **)ckd_calloc_2d(pls->window, pls->n_phones, sizeof(**pls->pen_buf));
95
96 /* Initialize phone HMMs. */
97 if (pls->hmms) {
98 for (i = 0; i < pls->n_phones; ++i)
99 hmm_deinit((hmm_t *)&pls->hmms[i]);
100 ckd_free(pls->hmms);
101 }
102 pls->hmms = (hmm_t *)ckd_calloc(pls->n_phones, sizeof(*pls->hmms));
103 for (i = 0; i < pls->n_phones; ++i) {
104 hmm_init(pls->hmmctx, (hmm_t *)&pls->hmms[i],
105 FALSE,
106 bin_mdef_pid2ssid(acmod->mdef, i),
107 bin_mdef_pid2tmatid(acmod->mdef, i));
108 }
109 pls->penalty_weight = cmd_ln_float64_r(config, "-pl_weight");
110 pls->beam = logmath_log(acmod->lmath, cmd_ln_float64_r(config, "-pl_beam")) >> SENSCR_SHIFT;
111 pls->pbeam = logmath_log(acmod->lmath, cmd_ln_float64_r(config, "-pl_pbeam")) >> SENSCR_SHIFT;
112 pls->pip = logmath_log(acmod->lmath, cmd_ln_float32_r(config, "-pl_pip")) >> SENSCR_SHIFT;
113 E_INFO("State beam %d Phone exit beam %d Insertion penalty %d\n",
114 pls->beam, pls->pbeam, pls->pip);
115
116 return 0;
117}
118
120phone_loop_search_init(cmd_ln_t *config,
121 acmod_t *acmod,
122 dict_t *dict)
123{
125
126 /* Allocate and initialize. */
127 pls = (phone_loop_search_t *)ckd_calloc(1, sizeof(*pls));
128 ps_search_init(ps_search_base(pls), &phone_loop_search_funcs,
129 PS_SEARCH_TYPE_PHONE_LOOP, PS_DEFAULT_PL_SEARCH,
130 config, acmod, dict, NULL);
131 phone_loop_search_reinit(ps_search_base(pls), ps_search_dict(pls),
132 ps_search_dict2pid(pls));
133
134 return ps_search_base(pls);
135}
136
137static void
138phone_loop_search_free_renorm(phone_loop_search_t *pls)
139{
140 gnode_t *gn;
141 for (gn = pls->renorm; gn; gn = gnode_next(gn))
142 ckd_free(gnode_ptr(gn));
143 glist_free(pls->renorm);
144 pls->renorm = NULL;
145}
146
147static void
148phone_loop_search_free(ps_search_t *search)
149{
151 int i;
152
153 ps_search_base_free(search);
154 for (i = 0; i < pls->n_phones; ++i)
155 hmm_deinit((hmm_t *)&pls->hmms[i]);
156 phone_loop_search_free_renorm(pls);
157 ckd_free_2d(pls->pen_buf);
158 ckd_free(pls->hmms);
159 ckd_free(pls->penalties);
161 ckd_free(pls);
162}
163
164static int
165phone_loop_search_start(ps_search_t *search)
166{
168 int i;
169
170 /* Reset and enter all phone HMMs. */
171 for (i = 0; i < pls->n_phones; ++i) {
172 hmm_t *hmm = (hmm_t *)&pls->hmms[i];
173 hmm_clear(hmm);
174 hmm_enter(hmm, 0, -1, 0);
175 }
176 memset(pls->penalties, 0, pls->n_phones * sizeof(*pls->penalties));
177 for (i = 0; i < pls->window; i++)
178 memset(pls->pen_buf[i], 0, pls->n_phones * sizeof(*pls->pen_buf[i]));
179 phone_loop_search_free_renorm(pls);
180 pls->best_score = 0;
181 pls->pen_buf_ptr = 0;
182
183 return 0;
184}
185
186static void
187renormalize_hmms(phone_loop_search_t *pls, int frame_idx, int32 norm)
188{
189 phone_loop_renorm_t *rn = (phone_loop_renorm_t *)ckd_calloc(1, sizeof(*rn));
190 int i;
191
192 pls->renorm = glist_add_ptr(pls->renorm, rn);
193 rn->frame_idx = frame_idx;
194 rn->norm = norm;
195
196 for (i = 0; i < pls->n_phones; ++i) {
197 hmm_normalize((hmm_t *)&pls->hmms[i], norm);
198 }
199}
200
201static void
202evaluate_hmms(phone_loop_search_t *pls, int16 const *senscr, int frame_idx)
203{
204 int32 bs = WORST_SCORE;
205 int i;
206
207 hmm_context_set_senscore(pls->hmmctx, senscr);
208
209 for (i = 0; i < pls->n_phones; ++i) {
210 hmm_t *hmm = (hmm_t *)&pls->hmms[i];
211 int32 score;
212
213 if (hmm_frame(hmm) < frame_idx)
214 continue;
215 score = hmm_vit_eval(hmm);
216 if (score BETTER_THAN bs) {
217 bs = score;
218 }
219 }
220 pls->best_score = bs;
221}
222
223static void
224store_scores(phone_loop_search_t *pls, int frame_idx)
225{
226 int i, j, itr;
227
228 for (i = 0; i < pls->n_phones; ++i) {
229 hmm_t *hmm = (hmm_t *)&pls->hmms[i];
230 pls->pen_buf[pls->pen_buf_ptr][i] = (hmm_bestscore(hmm) - pls->best_score) * pls->penalty_weight;
231 }
232 pls->pen_buf_ptr++;
233 pls->pen_buf_ptr = pls->pen_buf_ptr % pls->window;
234
235 /* update penalties */
236 for (i = 0; i < pls->n_phones; ++i) {
237 pls->penalties[i] = WORST_SCORE;
238 for (j = 0, itr = pls->pen_buf_ptr + 1; j < pls->window; j++, itr++) {
239 itr = itr % pls->window;
240 if (pls->pen_buf[itr][i] > pls->penalties[i])
241 pls->penalties[i] = pls->pen_buf[itr][i];
242 }
243 }
244}
245
246static void
247prune_hmms(phone_loop_search_t *pls, int frame_idx)
248{
249 int32 thresh = pls->best_score + pls->beam;
250 int nf = frame_idx + 1;
251 int i;
252
253 /* Check all phones to see if they remain active in the next frame. */
254 for (i = 0; i < pls->n_phones; ++i) {
255 hmm_t *hmm = (hmm_t *)&pls->hmms[i];
256
257 if (hmm_frame(hmm) < frame_idx)
258 continue;
259 /* Retain if score better than threshold. */
260 if (hmm_bestscore(hmm) BETTER_THAN thresh) {
261 hmm_frame(hmm) = nf;
262 }
263 else
264 hmm_clear_scores(hmm);
265 }
266}
267
268static void
269phone_transition(phone_loop_search_t *pls, int frame_idx)
270{
271 int32 thresh = pls->best_score + pls->pbeam;
272 int nf = frame_idx + 1;
273 int i;
274
275 /* Now transition out of phones whose last states are inside the
276 * phone transition beam. */
277 for (i = 0; i < pls->n_phones; ++i) {
278 hmm_t *hmm = (hmm_t *)&pls->hmms[i];
279 int32 newphone_score;
280 int j;
281
282 if (hmm_frame(hmm) != nf)
283 continue;
284
285 newphone_score = hmm_out_score(hmm) + pls->pip;
286 if (newphone_score BETTER_THAN thresh) {
287 /* Transition into all phones using the usual Viterbi rule. */
288 for (j = 0; j < pls->n_phones; ++j) {
289 hmm_t *nhmm = (hmm_t *)&pls->hmms[j];
290
291 if (hmm_frame(nhmm) < frame_idx
292 || newphone_score BETTER_THAN hmm_in_score(nhmm)) {
293 hmm_enter(nhmm, newphone_score, hmm_out_history(hmm), nf);
294 }
295 }
296 }
297 }
298}
299
300static int
301phone_loop_search_step(ps_search_t *search, int frame_idx)
302{
304 acmod_t *acmod = ps_search_acmod(search);
305 int16 const *senscr;
306 int i;
307
308 /* All CI senones are active all the time. */
309 if (!ps_search_acmod(pls)->compallsen) {
310 acmod_clear_active(ps_search_acmod(pls));
311 for (i = 0; i < pls->n_phones; ++i)
312 acmod_activate_hmm(acmod, (hmm_t *)&pls->hmms[i]);
313 }
314
315 /* Calculate senone scores for current frame. */
316 senscr = acmod_score(acmod, &frame_idx);
317
318 /* Renormalize, if necessary. */
319 if (pls->best_score + (2 * pls->beam) WORSE_THAN WORST_SCORE) {
320 E_INFO("Renormalizing Scores at frame %d, best score %d\n",
321 frame_idx, pls->best_score);
322 renormalize_hmms(pls, frame_idx, pls->best_score);
323 }
324
325 /* Evaluate phone HMMs for current frame. */
326 evaluate_hmms(pls, senscr, frame_idx);
327
328 /* Store hmm scores for senone penaly calculation */
329 store_scores(pls, frame_idx);
330
331 /* Prune phone HMMs. */
332 prune_hmms(pls, frame_idx);
333
334 /* Do phone transitions. */
335 phone_transition(pls, frame_idx);
336
337 return 0;
338}
339
340static int
341phone_loop_search_finish(ps_search_t *search)
342{
343 /* Actually nothing to do here really. */
344 return 0;
345}
346
347static char const *
348phone_loop_search_hyp(ps_search_t *search, int32 *out_score)
349{
350 E_WARN("Hypotheses are not returned from phone loop search");
351 return NULL;
352}
353
354static int32
355phone_loop_search_prob(ps_search_t *search)
356{
357 /* FIXME: Actually... they ought to be. */
358 E_WARN("Posterior probabilities are not returned from phone loop search");
359 return 0;
360}
361
362static ps_seg_t *
363phone_loop_search_seg_iter(ps_search_t *search)
364{
365 E_WARN("Hypotheses are not returned from phone loop search");
366 return NULL;
367}
void acmod_activate_hmm(acmod_t *acmod, hmm_t *hmm)
Activate senones associated with an HMM.
Definition: acmod.c:1213
int16 const * acmod_score(acmod_t *acmod, int *inout_frame_idx)
Score one frame of data.
Definition: acmod.c:1106
void acmod_clear_active(acmod_t *acmod)
Clear set of active senones.
Definition: acmod.c:1197
void hmm_normalize(hmm_t *h, int32 bestscr)
Renormalize the scores in this HMM based on the given best score.
Definition: hmm.c:209
int32 hmm_vit_eval(hmm_t *hmm)
Viterbi evaluation of given HMM.
Definition: hmm.c:789
#define BETTER_THAN
Is one score better than another?
Definition: hmm.h:95
#define hmm_context_set_senscore(ctx, senscr)
Change the senone score array for a context.
Definition: hmm.h:227
void hmm_enter(hmm_t *h, int32 score, int32 histid, int frame)
Enter an HMM with the given path score and history ID.
Definition: hmm.c:201
void hmm_deinit(hmm_t *hmm)
Free an HMM structure, releasing internal data (but not the HMM structure itself).
Definition: hmm.c:111
#define WORST_SCORE
Large "bad" score.
Definition: hmm.h:84
void hmm_clear_scores(hmm_t *h)
Reset the scores of the HMM.
Definition: hmm.c:170
void hmm_init(hmm_context_t *ctx, hmm_t *hmm, int mpx, int ssid, int tmatid)
Populate a previously-allocated HMM structure, allocating internal data.
Definition: hmm.c:89
#define WORSE_THAN
Is one score worse than another?
Definition: hmm.h:100
void hmm_context_free(hmm_context_t *ctx)
Free an HMM context.
Definition: hmm.c:80
hmm_context_t * hmm_context_init(int32 n_emit_state, uint8 **const *tp, int16 const *senscore, uint16 *const *sseq)
Create an HMM context.
Definition: hmm.c:56
void hmm_clear(hmm_t *h)
Reset the states of the HMM to the invalid condition.
Definition: hmm.c:183
#define SENSCR_SHIFT
Shift count for senone scores.
Definition: hmm.h:73
Fast and rough context-independent phoneme loop search.
void ps_search_base_reinit(ps_search_t *search, dict_t *dict, dict2pid_t *d2p)
Re-initialize base structure with new dictionary.
void ps_search_base_free(ps_search_t *search)
Free search.
void ps_search_init(ps_search_t *search, ps_searchfuncs_t *vt, const char *type, const char *name, cmd_ln_t *config, acmod_t *acmod, dict_t *dict, dict2pid_t *d2p)
Initialize base structure.
Acoustic model structure.
Definition: acmod.h:148
bin_mdef_t * mdef
Model definition.
Definition: acmod.h:159
logmath_t * lmath
Log-math computation.
Definition: acmod.h:151
tmat_t * tmat
Transition matrices.
Definition: acmod.h:160
uint16 ** sseq
Unique senone sequences (2D array built at load time)
Definition: bin_mdef.h:134
Building composite triphone (as well as word internal triphones) with the dictionary.
Definition: dict2pid.h:84
a structure for a dictionary.
Definition: dict.h:76
An individual HMM among the HMM search space.
Renormalization event.
int32 norm
Normalization constant.
int frame_idx
Frame of renormalization.
Phone loop search structure.
int32 * penalties
Penalties for CI phones in current frame.
int32 beam
HMM pruning beam width.
glist_t renorm
List of renormalizations.
int window
Window size for phoneme lookahead.
int16 pen_buf_ptr
Pointer for frame to fill in penalty buffer.
hmm_context_t * hmmctx
HMM context structure.
hmm_t * hmms
Basic HMM structures for CI phones.
int16 n_phones
Size of phone array.
int32 pbeam
Phone exit pruning beam width.
int32 pip
Phone insertion penalty ("language score").
int32 ** pen_buf
Penalty buffer.
float64 penalty_weight
Weighting factor for penalties.
int32 best_score
Best Viterbi score in current frame.
Base structure for search module.
V-table for search algorithm.
Base structure for hypothesis segmentation iterator.
uint8 *** tp
The transition matrices; kept in the same scale as acoustic scores; tp[tmatid][from-state][to-state].
Definition: tmat.h:56