diff options
Diffstat (limited to 'lpc.c')
| -rw-r--r-- | lpc.c | 306 |
1 files changed, 306 insertions, 0 deletions
| @@ -0,0 +1,306 @@ | |||
| 1 | /*---------------------------------------------------------------------------*\ | ||
| 2 | |||
| 3 | FILE........: lpc.c | ||
| 4 | AUTHOR......: David Rowe | ||
| 5 | DATE CREATED: 30 Sep 1990 (!) | ||
| 6 | |||
| 7 | Linear Prediction functions written in C. | ||
| 8 | |||
| 9 | \*---------------------------------------------------------------------------*/ | ||
| 10 | |||
| 11 | /* | ||
| 12 | Copyright (C) 2009-2012 David Rowe | ||
| 13 | |||
| 14 | All rights reserved. | ||
| 15 | |||
| 16 | This program is free software; you can redistribute it and/or modify | ||
| 17 | it under the terms of the GNU Lesser General Public License version 2.1, as | ||
| 18 | published by the Free Software Foundation. This program is | ||
| 19 | distributed in the hope that it will be useful, but WITHOUT ANY | ||
| 20 | WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
| 21 | FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public | ||
| 22 | License for more details. | ||
| 23 | |||
| 24 | You should have received a copy of the GNU Lesser General Public License | ||
| 25 | along with this program; if not, see <http://www.gnu.org/licenses/>. | ||
| 26 | */ | ||
| 27 | |||
| 28 | #define LPC_MAX_N 512 /* maximum no. of samples in frame */ | ||
| 29 | #define PI 3.141592654 /* mathematical constant */ | ||
| 30 | |||
| 31 | #define ALPHA 1.0 | ||
| 32 | #define BETA 0.94 | ||
| 33 | |||
| 34 | #include <assert.h> | ||
| 35 | #include <math.h> | ||
| 36 | #include "defines.h" | ||
| 37 | #include "lpc.h" | ||
| 38 | |||
| 39 | /*---------------------------------------------------------------------------*\ | ||
| 40 | |||
| 41 | pre_emp() | ||
| 42 | |||
| 43 | Pre-emphasise (high pass filter with zero close to 0 Hz) a frame of | ||
| 44 | speech samples. Helps reduce dynamic range of LPC spectrum, giving | ||
| 45 | greater weight and hense a better match to low energy formants. | ||
| 46 | |||
| 47 | Should be balanced by de-emphasis of the output speech. | ||
| 48 | |||
| 49 | \*---------------------------------------------------------------------------*/ | ||
| 50 | |||
| 51 | void pre_emp( | ||
| 52 | float Sn_pre[], /* output frame of speech samples */ | ||
| 53 | float Sn[], /* input frame of speech samples */ | ||
| 54 | float *mem, /* Sn[-1]single sample memory */ | ||
| 55 | int Nsam /* number of speech samples to use */ | ||
| 56 | ) | ||
| 57 | { | ||
| 58 | int i; | ||
| 59 | |||
| 60 | for(i=0; i<Nsam; i++) { | ||
| 61 | Sn_pre[i] = Sn[i] - ALPHA * mem[0]; | ||
| 62 | mem[0] = Sn[i]; | ||
| 63 | } | ||
| 64 | |||
| 65 | } | ||
| 66 | |||
| 67 | |||
| 68 | /*---------------------------------------------------------------------------*\ | ||
| 69 | |||
| 70 | de_emp() | ||
| 71 | |||
| 72 | De-emphasis filter (low pass filter with a pole close to 0 Hz). | ||
| 73 | |||
| 74 | \*---------------------------------------------------------------------------*/ | ||
| 75 | |||
| 76 | void de_emp( | ||
| 77 | float Sn_de[], /* output frame of speech samples */ | ||
| 78 | float Sn[], /* input frame of speech samples */ | ||
| 79 | float *mem, /* Sn[-1]single sample memory */ | ||
| 80 | int Nsam /* number of speech samples to use */ | ||
| 81 | ) | ||
| 82 | { | ||
| 83 | int i; | ||
| 84 | |||
| 85 | for(i=0; i<Nsam; i++) { | ||
| 86 | Sn_de[i] = Sn[i] + BETA * mem[0]; | ||
| 87 | mem[0] = Sn_de[i]; | ||
| 88 | } | ||
| 89 | |||
| 90 | } | ||
| 91 | |||
| 92 | |||
| 93 | /*---------------------------------------------------------------------------*\ | ||
| 94 | |||
| 95 | hanning_window() | ||
| 96 | |||
| 97 | Hanning windows a frame of speech samples. | ||
| 98 | |||
| 99 | \*---------------------------------------------------------------------------*/ | ||
| 100 | |||
| 101 | void hanning_window( | ||
| 102 | float Sn[], /* input frame of speech samples */ | ||
| 103 | float Wn[], /* output frame of windowed samples */ | ||
| 104 | int Nsam /* number of samples */ | ||
| 105 | ) | ||
| 106 | { | ||
| 107 | int i; /* loop variable */ | ||
| 108 | |||
| 109 | for(i=0; i<Nsam; i++) | ||
| 110 | Wn[i] = Sn[i]*(0.5 - 0.5*cosf(2*PI*(float)i/(Nsam-1))); | ||
| 111 | } | ||
| 112 | |||
| 113 | /*---------------------------------------------------------------------------*\ | ||
| 114 | |||
| 115 | autocorrelate() | ||
| 116 | |||
| 117 | Finds the first P autocorrelation values of an array of windowed speech | ||
| 118 | samples Sn[]. | ||
| 119 | |||
| 120 | \*---------------------------------------------------------------------------*/ | ||
| 121 | |||
| 122 | void autocorrelate( | ||
| 123 | float Sn[], /* frame of Nsam windowed speech samples */ | ||
| 124 | float Rn[], /* array of P+1 autocorrelation coefficients */ | ||
| 125 | int Nsam, /* number of windowed samples to use */ | ||
| 126 | int order /* order of LPC analysis */ | ||
| 127 | ) | ||
| 128 | { | ||
| 129 | int i,j; /* loop variables */ | ||
| 130 | |||
| 131 | for(j=0; j<order+1; j++) { | ||
| 132 | Rn[j] = 0.0; | ||
| 133 | for(i=0; i<Nsam-j; i++) | ||
| 134 | Rn[j] += Sn[i]*Sn[i+j]; | ||
| 135 | } | ||
| 136 | } | ||
| 137 | |||
| 138 | /*---------------------------------------------------------------------------*\ | ||
| 139 | |||
| 140 | levinson_durbin() | ||
| 141 | |||
| 142 | Given P+1 autocorrelation coefficients, finds P Linear Prediction Coeff. | ||
| 143 | (LPCs) where P is the order of the LPC all-pole model. The Levinson-Durbin | ||
| 144 | algorithm is used, and is described in: | ||
| 145 | |||
| 146 | J. Makhoul | ||
| 147 | "Linear prediction, a tutorial review" | ||
| 148 | Proceedings of the IEEE | ||
| 149 | Vol-63, No. 4, April 1975 | ||
| 150 | |||
| 151 | \*---------------------------------------------------------------------------*/ | ||
| 152 | |||
| 153 | void levinson_durbin( | ||
| 154 | float R[], /* order+1 autocorrelation coeff */ | ||
| 155 | float lpcs[], /* order+1 LPC's */ | ||
| 156 | int order /* order of the LPC analysis */ | ||
| 157 | ) | ||
| 158 | { | ||
| 159 | float a[order+1][order+1]; | ||
| 160 | float sum, e, k; | ||
| 161 | int i,j; /* loop variables */ | ||
| 162 | |||
| 163 | e = R[0]; /* Equation 38a, Makhoul */ | ||
| 164 | |||
| 165 | for(i=1; i<=order; i++) { | ||
| 166 | sum = 0.0; | ||
| 167 | for(j=1; j<=i-1; j++) | ||
| 168 | sum += a[i-1][j]*R[i-j]; | ||
| 169 | k = -1.0*(R[i] + sum)/e; /* Equation 38b, Makhoul */ | ||
| 170 | if (fabsf(k) > 1.0) | ||
| 171 | k = 0.0; | ||
| 172 | |||
| 173 | a[i][i] = k; | ||
| 174 | |||
| 175 | for(j=1; j<=i-1; j++) | ||
| 176 | a[i][j] = a[i-1][j] + k*a[i-1][i-j]; /* Equation 38c, Makhoul */ | ||
| 177 | |||
| 178 | e *= (1-k*k); /* Equation 38d, Makhoul */ | ||
| 179 | } | ||
| 180 | |||
| 181 | for(i=1; i<=order; i++) | ||
| 182 | lpcs[i] = a[order][i]; | ||
| 183 | lpcs[0] = 1.0; | ||
| 184 | } | ||
| 185 | |||
| 186 | /*---------------------------------------------------------------------------*\ | ||
| 187 | |||
| 188 | inverse_filter() | ||
| 189 | |||
| 190 | Inverse Filter, A(z). Produces an array of residual samples from an array | ||
| 191 | of input samples and linear prediction coefficients. | ||
| 192 | |||
| 193 | The filter memory is stored in the first order samples of the input array. | ||
| 194 | |||
| 195 | \*---------------------------------------------------------------------------*/ | ||
| 196 | |||
| 197 | void inverse_filter( | ||
| 198 | float Sn[], /* Nsam input samples */ | ||
| 199 | float a[], /* LPCs for this frame of samples */ | ||
| 200 | int Nsam, /* number of samples */ | ||
| 201 | float res[], /* Nsam residual samples */ | ||
| 202 | int order /* order of LPC */ | ||
| 203 | ) | ||
| 204 | { | ||
| 205 | int i,j; /* loop variables */ | ||
| 206 | |||
| 207 | for(i=0; i<Nsam; i++) { | ||
| 208 | res[i] = 0.0; | ||
| 209 | for(j=0; j<=order; j++) | ||
| 210 | res[i] += Sn[i-j]*a[j]; | ||
| 211 | } | ||
| 212 | } | ||
| 213 | |||
| 214 | /*---------------------------------------------------------------------------*\ | ||
| 215 | |||
| 216 | synthesis_filter() | ||
| 217 | |||
| 218 | C version of the Speech Synthesis Filter, 1/A(z). Given an array of | ||
| 219 | residual or excitation samples, and the the LP filter coefficients, this | ||
| 220 | function will produce an array of speech samples. This filter structure is | ||
| 221 | IIR. | ||
| 222 | |||
| 223 | The synthesis filter has memory as well, this is treated in the same way | ||
| 224 | as the memory for the inverse filter (see inverse_filter() notes above). | ||
| 225 | The difference is that the memory for the synthesis filter is stored in | ||
| 226 | the output array, wheras the memory of the inverse filter is stored in the | ||
| 227 | input array. | ||
| 228 | |||
| 229 | Note: the calling function must update the filter memory. | ||
| 230 | |||
| 231 | \*---------------------------------------------------------------------------*/ | ||
| 232 | |||
| 233 | void synthesis_filter( | ||
| 234 | float res[], /* Nsam input residual (excitation) samples */ | ||
| 235 | float a[], /* LPCs for this frame of speech samples */ | ||
| 236 | int Nsam, /* number of speech samples */ | ||
| 237 | int order, /* LPC order */ | ||
| 238 | float Sn_[] /* Nsam output synthesised speech samples */ | ||
| 239 | ) | ||
| 240 | { | ||
| 241 | int i,j; /* loop variables */ | ||
| 242 | |||
| 243 | /* Filter Nsam samples */ | ||
| 244 | |||
| 245 | for(i=0; i<Nsam; i++) { | ||
| 246 | Sn_[i] = res[i]*a[0]; | ||
| 247 | for(j=1; j<=order; j++) | ||
| 248 | Sn_[i] -= Sn_[i-j]*a[j]; | ||
| 249 | } | ||
| 250 | } | ||
| 251 | |||
| 252 | /*---------------------------------------------------------------------------*\ | ||
| 253 | |||
| 254 | find_aks() | ||
| 255 | |||
| 256 | This function takes a frame of samples, and determines the linear | ||
| 257 | prediction coefficients for that frame of samples. | ||
| 258 | |||
| 259 | \*---------------------------------------------------------------------------*/ | ||
| 260 | |||
| 261 | void find_aks( | ||
| 262 | float Sn[], /* Nsam samples with order sample memory */ | ||
| 263 | float a[], /* order+1 LPCs with first coeff 1.0 */ | ||
| 264 | int Nsam, /* number of input speech samples */ | ||
| 265 | int order, /* order of the LPC analysis */ | ||
| 266 | float *E /* residual energy */ | ||
| 267 | ) | ||
| 268 | { | ||
| 269 | float Wn[LPC_MAX_N]; /* windowed frame of Nsam speech samples */ | ||
| 270 | float R[order+1]; /* order+1 autocorrelation values of Sn[] */ | ||
| 271 | int i; | ||
| 272 | |||
| 273 | assert(Nsam < LPC_MAX_N); | ||
| 274 | |||
| 275 | hanning_window(Sn,Wn,Nsam); | ||
| 276 | autocorrelate(Wn,R,Nsam,order); | ||
| 277 | levinson_durbin(R,a,order); | ||
| 278 | |||
| 279 | *E = 0.0; | ||
| 280 | for(i=0; i<=order; i++) | ||
| 281 | *E += a[i]*R[i]; | ||
| 282 | if (*E < 0.0) | ||
| 283 | *E = 1E-12; | ||
| 284 | } | ||
| 285 | |||
| 286 | /*---------------------------------------------------------------------------*\ | ||
| 287 | |||
| 288 | weight() | ||
| 289 | |||
| 290 | Weights a vector of LPCs. | ||
| 291 | |||
| 292 | \*---------------------------------------------------------------------------*/ | ||
| 293 | |||
| 294 | void weight( | ||
| 295 | float ak[], /* vector of order+1 LPCs */ | ||
| 296 | float gamma, /* weighting factor */ | ||
| 297 | int order, /* num LPCs (excluding leading 1.0) */ | ||
| 298 | float akw[] /* weighted vector of order+1 LPCs */ | ||
| 299 | ) | ||
| 300 | { | ||
| 301 | int i; | ||
| 302 | |||
| 303 | for(i=1; i<=order; i++) | ||
| 304 | akw[i] = ak[i]*powf(gamma,(float)i); | ||
| 305 | } | ||
| 306 | |||
