stripdown of version 0.9

author: erdgeist@erdgeist.org <erdgeist@bauklotz.fritz.box> 2019-07-04 23:26:09 +0200
committer: erdgeist@erdgeist.org <erdgeist@bauklotz.fritz.box> 2019-07-04 23:26:09 +0200
commit: f02dfce6e6c34b3d8a7b8a0e784b506178e331fa (patch)
tree: 45556e6104242d4702689760433d7321ae74ec17 /phase.c
1 files changed, 289 insertions, 0 deletions
diff --git a/phase.c b/phase.c
new file mode 100644
index 0000000..e486613
--- /dev/null
+++ b/phase.c
@@ -0,0 +1,289 @@
+/*---------------------------------------------------------------------------*\
+  FILE........: phase.c
+  AUTHOR......: David Rowe
+  DATE CREATED: 1/2/09
+  Functions for modelling and synthesising phase.
+\*---------------------------------------------------------------------------*/
+/*
+  Copyright (C) 2009 David Rowe
+  All rights reserved.
+  This program is free software; you can redistribute it and/or modify
+  it under the terms of the GNU Lesser General Public License version 2.1, as
+  published by the Free Software Foundation.  This program is
+  distributed in the hope that it will be useful, but WITHOUT ANY
+  WARRANTY; without even the implied warranty of MERCHANTABILITY or
+  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+  License for more details.
+  You should have received a copy of the GNU Lesser General Public License
+  along with this program; if not,see <http://www.gnu.org/licenses/>.
+*/
+#include "defines.h"
+#include "phase.h"
+#include "kiss_fft.h"
+#include "comp.h"
+#include "comp_prim.h"
+#include "sine.h"
+#include <assert.h>
+#include <ctype.h>
+#include <math.h>
+#include <string.h>
+#include <stdlib.h>
+/*---------------------------------------------------------------------------*\
+  sample_phase()
+  Samples phase at centre of each harmonic from and array of FFT_ENC
+  DFT samples.
+\*---------------------------------------------------------------------------*/
+void sample_phase(MODEL *model, 
+                  COMP H[], 
+                  COMP A[]        /* LPC analysis filter in freq domain */
+                  )                  
+{
+    int   m, b;
+    float r;
+    r = TWO_PI/(FFT_ENC);
+    /* Sample phase at harmonics */
+    for(m=1; m<=model->L; m++) {
+        b = (int)(m*model->Wo/r + 0.5);
+        H[m] = cconj(A[b]);      /* synth filter 1/A is opposite phase to analysis filter */
+    }
+}
+/*---------------------------------------------------------------------------*\
+   phase_synth_zero_order()
+   Synthesises phases based on SNR and a rule based approach.  No phase
+   parameters are required apart from the SNR (which can be reduced to a
+   1 bit V/UV decision per frame).
+   The phase of each harmonic is modelled as the phase of a synthesis
+   filter excited by an impulse.  In many Codec 2 modes the synthesis
+   filter is a LPC filter. Unlike the first order model the position
+   of the impulse is not transmitted, so we create an excitation pulse
+   train using a rule based approach.
+   Consider a pulse train with a pulse starting time n=0, with pulses
+   repeated at a rate of Wo, the fundamental frequency.  A pulse train
+   in the time domain is equivalent to harmonics in the frequency
+   domain.  We can make an excitation pulse train using a sum of
+   sinsusoids:
+     for(m=1; m<=L; m++)
+       ex[n] = cos(m*Wo*n)
+   Note: the Octave script ../octave/phase.m is an example of this if
+   you would like to try making a pulse train.
+   The phase of each excitation harmonic is:
+     arg(E[m]) = mWo
+   where E[m] are the complex excitation (freq domain) samples,
+   arg(x), just returns the phase of a complex sample x.
+   As we don't transmit the pulse position for this model, we need to
+   synthesise it.  Now the excitation pulses occur at a rate of Wo.
+   This means the phase of the first harmonic advances by N_SAMP samples
+   over a synthesis frame of N_SAMP samples.  For example if Wo is pi/20
+   (200 Hz), then over a 10ms frame (N_SAMP=80 samples), the phase of the
+   first harmonic would advance (pi/20)*80 = 4*pi or two complete
+   cycles.
+   We generate the excitation phase of the fundamental (first
+   harmonic):
+     arg[E[1]] = Wo*N_SAMP;
+   We then relate the phase of the m-th excitation harmonic to the
+   phase of the fundamental as:
+     arg(E[m]) = m*arg(E[1])
+   This E[m] then gets passed through the LPC synthesis filter to
+   determine the final harmonic phase.
+   Comparing to speech synthesised using original phases:
+   - Through headphones speech synthesised with this model is not as
+     good. Through a loudspeaker it is very close to original phases.
+   - If there are voicing errors, the speech can sound clicky or
+     staticy.  If V speech is mistakenly declared UV, this model tends to
+     synthesise impulses or clicks, as there is usually very little shift or
+     dispersion through the LPC synthesis filter.
+   - When combined with LPC amplitude modelling there is an additional
+     drop in quality.  I am not sure why, theory is interformant energy
+     is raised making any phase errors more obvious.
+   NOTES:
+     1/ This synthesis model is effectively the same as a simple LPC-10
+     vocoders, and yet sounds much better.  Why? Conventional wisdom
+     (AMBE, MELP) says mixed voicing is required for high quality
+     speech.
+     2/ I am pretty sure the Lincoln Lab sinusoidal coding guys (like xMBE
+     also from MIT) first described this zero phase model, I need to look
+     up the paper.
+     3/ Note that this approach could cause some discontinuities in
+     the phase at the edge of synthesis frames, as no attempt is made
+     to make sure that the phase tracks are continuous (the excitation
+     phases are continuous, but not the final phases after filtering
+     by the LPC spectra).  Technically this is a bad thing.  However
+     this may actually be a good thing, disturbing the phase tracks a
+     bit.  More research needed, e.g. test a synthesis model that adds
+     a small delta-W to make phase tracks line up for voiced
+     harmonics.
+\*---------------------------------------------------------------------------*/
+void phase_synth_zero_order(
+    int    n_samp,
+    MODEL *model,
+    float *ex_phase,            /* excitation phase of fundamental        */
+    COMP   H[]                  /* L synthesis filter freq domain samples */
+)
+{
+    int   m;
+    float new_phi;
+    COMP  Ex[MAX_AMP+1];          /* excitation samples */
+    COMP  A_[MAX_AMP+1];          /* synthesised harmonic samples */
+    /*
+       Update excitation fundamental phase track, this sets the position
+       of each pitch pulse during voiced speech.  After much experiment
+       I found that using just this frame's Wo improved quality for UV
+       sounds compared to interpolating two frames Wo like this:
+       ex_phase[0] += (*prev_Wo+model->Wo)*N_SAMP/2;
+    */
+    ex_phase[0] += (model->Wo)*n_samp;
+    ex_phase[0] -= TWO_PI*floorf(ex_phase[0]/TWO_PI + 0.5);
+    for(m=1; m<=model->L; m++) {
+        /* generate excitation */
+        if (model->voiced) {
+            Ex[m].real = cosf(ex_phase[0]*m);
+            Ex[m].imag = sinf(ex_phase[0]*m);
+        }
+        else {
+            /* When a few samples were tested I found that LPC filter
+               phase is not needed in the unvoiced case, but no harm in
+               keeping it.
+            */
+            float phi = TWO_PI*(float)codec2_rand()/CODEC2_RAND_MAX;
+            Ex[m].real = cosf(phi);
+            Ex[m].imag = sinf(phi);
+        }
+        /* filter using LPC filter */
+        A_[m].real = H[m].real*Ex[m].real - H[m].imag*Ex[m].imag;
+        A_[m].imag = H[m].imag*Ex[m].real + H[m].real*Ex[m].imag;
+        /* modify sinusoidal phase */
+        new_phi = atan2f(A_[m].imag, A_[m].real+1E-12);
+        model->phi[m] = new_phi;
+    }
+}
+/*---------------------------------------------------------------------------*\
+  FUNCTION....: mag_to_phase
+  AUTHOR......: David Rowe
+  DATE CREATED: Jan 2017
+  Algorithm for http://www.dsprelated.com/showcode/20.php ported to C.  See
+  also Octave function mag_to_phase.m
+  Given a magnitude spectrum in dB, returns a minimum-phase phase
+  spectra.
+\*---------------------------------------------------------------------------*/
+void mag_to_phase(float phase[],             /* Nfft/2+1 output phase samples in radians       */
+                  float Gdbfk[],             /* Nfft/2+1 postive freq amplitudes samples in dB */
+                  int Nfft, 
+                  codec2_fft_cfg fft_fwd_cfg,
+                  codec2_fft_cfg fft_inv_cfg
+                  )
+{
+    COMP Sdb[Nfft], c[Nfft], cf[Nfft], Cf[Nfft];
+    int  Ns = Nfft/2+1;
+    int  i;
+    /* install negative frequency components, 1/Nfft takes into
+       account kiss fft lack of scaling on ifft */
+    Sdb[0].real = Gdbfk[0];
+    Sdb[0].imag = 0.0;
+    for(i=1; i<Ns; i++) {
+        Sdb[i].real = Sdb[Nfft-i].real = Gdbfk[i];
+        Sdb[i].imag = Sdb[Nfft-i].imag = 0.0;
+    }
+    /* compute real cepstrum from log magnitude spectrum */
+    codec2_fft(fft_inv_cfg, Sdb, c);
+    for(i=0; i<Nfft; i++) {
+        c[i].real /= (float)Nfft;
+        c[i].imag /= (float)Nfft;
+    }
+    /* Fold cepstrum to reflect non-min-phase zeros inside unit circle */
+    cf[0] = c[0];
+    for(i=1; i<Ns-1; i++) {
+        cf[i] = cadd(c[i],c[Nfft-i]);
+    }
+    cf[Ns-1] = c[Ns-1];
+    for(i=Ns; i<Nfft; i++) {
+        cf[i].real = 0.0;
+        cf[i].imag = 0.0;
+    }
+    /* Cf = dB_magnitude + j * minimum_phase */
+    codec2_fft(fft_fwd_cfg, cf, Cf);
+    /*  The maths says we are meant to be using log(x), not 20*log10(x),
+        so we need to scale the phase to account for this:
+        log(x) = 20*log10(x)/scale */
+                          
+    float scale = (20.0/logf(10.0));
+    
+    for(i=0; i<Ns; i++) {
+        phase[i] = Cf[i].imag/scale;
+    }
+    
+}
author	erdgeist@erdgeist.org <erdgeist@bauklotz.fritz.box>	2019-07-04 23:26:09 +0200
committer	erdgeist@erdgeist.org <erdgeist@bauklotz.fritz.box>	2019-07-04 23:26:09 +0200
commit	f02dfce6e6c34b3d8a7b8a0e784b506178e331fa (patch)
tree	45556e6104242d4702689760433d7321ae74ec17 /phase.c

diff --git a/phase.c b/phase.c new file mode 100644 index 0000000..e486613 --- /dev/null +++ b/phase.c
@@ -0,0 +1,289 @@
	1	/---------------------------------------------------------------------------\
	2
	3	FILE........: phase.c
	4	AUTHOR......: David Rowe
	5	DATE CREATED: 1/2/09
	6
	7	Functions for modelling and synthesising phase.
	8
	9	\---------------------------------------------------------------------------/
	10
	11	/*
	12	Copyright (C) 2009 David Rowe
	13
	14	All rights reserved.
	15
	16	This program is free software; you can redistribute it and/or modify
	17	it under the terms of the GNU Lesser General Public License version 2.1, as
	18	published by the Free Software Foundation. This program is
	19	distributed in the hope that it will be useful, but WITHOUT ANY
	20	WARRANTY; without even the implied warranty of MERCHANTABILITY or
	21	FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
	22	License for more details.
	23
	24	You should have received a copy of the GNU Lesser General Public License
	25	along with this program; if not,see <http://www.gnu.org/licenses/>.
	26	*/
	27
	28	#include "defines.h"
	29	#include "phase.h"
	30	#include "kiss_fft.h"
	31	#include "comp.h"
	32	#include "comp_prim.h"
	33	#include "sine.h"
	34
	35	#include <assert.h>
	36	#include <ctype.h>
	37	#include <math.h>
	38	#include <string.h>
	39	#include <stdlib.h>
	40
	41	/---------------------------------------------------------------------------\
	42
	43	sample_phase()
	44
	45	Samples phase at centre of each harmonic from and array of FFT_ENC
	46	DFT samples.
	47
	48	\---------------------------------------------------------------------------/
	49
	50	void sample_phase(MODEL *model,
	51	COMP H[],
	52	COMP A[] /* LPC analysis filter in freq domain */
	53	)
	54	{
	55	int m, b;
	56	float r;
	57
	58	r = TWO_PI/(FFT_ENC);
	59
	60	/* Sample phase at harmonics */
	61
	62	for(m=1; m<=model->L; m++) {
	63	b = (int)(m*model->Wo/r + 0.5);
	64	H[m] = cconj(A[b]); /* synth filter 1/A is opposite phase to analysis filter */
	65	}
	66	}
	67
	68
	69	/---------------------------------------------------------------------------\
	70
	71	phase_synth_zero_order()
	72
	73	Synthesises phases based on SNR and a rule based approach. No phase
	74	parameters are required apart from the SNR (which can be reduced to a
	75	1 bit V/UV decision per frame).
	76
	77	The phase of each harmonic is modelled as the phase of a synthesis
	78	filter excited by an impulse. In many Codec 2 modes the synthesis
	79	filter is a LPC filter. Unlike the first order model the position
	80	of the impulse is not transmitted, so we create an excitation pulse
	81	train using a rule based approach.
	82
	83	Consider a pulse train with a pulse starting time n=0, with pulses
	84	repeated at a rate of Wo, the fundamental frequency. A pulse train
	85	in the time domain is equivalent to harmonics in the frequency
	86	domain. We can make an excitation pulse train using a sum of
	87	sinsusoids:
	88
	89	for(m=1; m<=L; m++)
	90	ex[n] = cos(mWon)
	91
	92	Note: the Octave script ../octave/phase.m is an example of this if
	93	you would like to try making a pulse train.
	94
	95	The phase of each excitation harmonic is:
	96
	97	arg(E[m]) = mWo
	98
	99	where E[m] are the complex excitation (freq domain) samples,
	100	arg(x), just returns the phase of a complex sample x.
	101
	102	As we don't transmit the pulse position for this model, we need to
	103	synthesise it. Now the excitation pulses occur at a rate of Wo.
	104	This means the phase of the first harmonic advances by N_SAMP samples
	105	over a synthesis frame of N_SAMP samples. For example if Wo is pi/20
	106	(200 Hz), then over a 10ms frame (N_SAMP=80 samples), the phase of the
	107	first harmonic would advance (pi/20)80 = 4pi or two complete
	108	cycles.
	109
	110	We generate the excitation phase of the fundamental (first
	111	harmonic):
	112
	113	arg[E[1]] = Wo*N_SAMP;
	114
	115	We then relate the phase of the m-th excitation harmonic to the
	116	phase of the fundamental as:
	117
	118	arg(E[m]) = m*arg(E[1])
	119
	120	This E[m] then gets passed through the LPC synthesis filter to
	121	determine the final harmonic phase.
	122
	123	Comparing to speech synthesised using original phases:
	124
	125	- Through headphones speech synthesised with this model is not as
	126	good. Through a loudspeaker it is very close to original phases.
	127
	128	- If there are voicing errors, the speech can sound clicky or
	129	staticy. If V speech is mistakenly declared UV, this model tends to
	130	synthesise impulses or clicks, as there is usually very little shift or
	131	dispersion through the LPC synthesis filter.
	132
	133	- When combined with LPC amplitude modelling there is an additional
	134	drop in quality. I am not sure why, theory is interformant energy
	135	is raised making any phase errors more obvious.
	136
	137	NOTES:
	138
	139	1/ This synthesis model is effectively the same as a simple LPC-10
	140	vocoders, and yet sounds much better. Why? Conventional wisdom
	141	(AMBE, MELP) says mixed voicing is required for high quality
	142	speech.
	143
	144	2/ I am pretty sure the Lincoln Lab sinusoidal coding guys (like xMBE
	145	also from MIT) first described this zero phase model, I need to look
	146	up the paper.
	147
	148	3/ Note that this approach could cause some discontinuities in
	149	the phase at the edge of synthesis frames, as no attempt is made
	150	to make sure that the phase tracks are continuous (the excitation
	151	phases are continuous, but not the final phases after filtering
	152	by the LPC spectra). Technically this is a bad thing. However
	153	this may actually be a good thing, disturbing the phase tracks a
	154	bit. More research needed, e.g. test a synthesis model that adds
	155	a small delta-W to make phase tracks line up for voiced
	156	harmonics.
	157
	158	\---------------------------------------------------------------------------/
	159
	160	void phase_synth_zero_order(
	161	int n_samp,
	162	MODEL *model,
	163	float ex_phase, / excitation phase of fundamental */
	164	COMP H[] /* L synthesis filter freq domain samples */
	165
	166	)
	167	{
	168	int m;
	169	float new_phi;
	170	COMP Ex[MAX_AMP+1]; /* excitation samples */
	171	COMP A_[MAX_AMP+1]; /* synthesised harmonic samples */
	172
	173	/*
	174	Update excitation fundamental phase track, this sets the position
	175	of each pitch pulse during voiced speech. After much experiment
	176	I found that using just this frame's Wo improved quality for UV
	177	sounds compared to interpolating two frames Wo like this:
	178
	179	ex_phase[0] += (prev_Wo+model->Wo)N_SAMP/2;
	180	*/
	181
	182	ex_phase[0] += (model->Wo)*n_samp;
	183	ex_phase[0] -= TWO_PI*floorf(ex_phase[0]/TWO_PI + 0.5);
	184
	185	for(m=1; m<=model->L; m++) {
	186
	187	/* generate excitation */
	188
	189	if (model->voiced) {
	190
	191	Ex[m].real = cosf(ex_phase[0]*m);
	192	Ex[m].imag = sinf(ex_phase[0]*m);
	193	}
	194	else {
	195
	196	/* When a few samples were tested I found that LPC filter
	197	phase is not needed in the unvoiced case, but no harm in
	198	keeping it.
	199	*/
	200	float phi = TWO_PI*(float)codec2_rand()/CODEC2_RAND_MAX;
	201	Ex[m].real = cosf(phi);
	202	Ex[m].imag = sinf(phi);
	203	}
	204
	205	/* filter using LPC filter */
	206
	207	A_[m].real = H[m].realEx[m].real - H[m].imagEx[m].imag;
	208	A_[m].imag = H[m].imagEx[m].real + H[m].realEx[m].imag;
	209
	210	/* modify sinusoidal phase */
	211
	212	new_phi = atan2f(A_[m].imag, A_[m].real+1E-12);
	213	model->phi[m] = new_phi;
	214	}
	215
	216	}
	217
	218
	219	/---------------------------------------------------------------------------\
	220
	221	FUNCTION....: mag_to_phase
	222	AUTHOR......: David Rowe
	223	DATE CREATED: Jan 2017
	224
	225	Algorithm for http://www.dsprelated.com/showcode/20.php ported to C. See
	226	also Octave function mag_to_phase.m
	227
	228	Given a magnitude spectrum in dB, returns a minimum-phase phase
	229	spectra.
	230
	231	\---------------------------------------------------------------------------/
	232
	233	void mag_to_phase(float phase[], /* Nfft/2+1 output phase samples in radians */
	234	float Gdbfk[], /* Nfft/2+1 postive freq amplitudes samples in dB */
	235	int Nfft,
	236	codec2_fft_cfg fft_fwd_cfg,
	237	codec2_fft_cfg fft_inv_cfg
	238	)
	239	{
	240	COMP Sdb[Nfft], c[Nfft], cf[Nfft], Cf[Nfft];
	241	int Ns = Nfft/2+1;
	242	int i;
	243
	244	/* install negative frequency components, 1/Nfft takes into
	245	account kiss fft lack of scaling on ifft */
	246
	247	Sdb[0].real = Gdbfk[0];
	248	Sdb[0].imag = 0.0;
	249	for(i=1; i<Ns; i++) {
	250	Sdb[i].real = Sdb[Nfft-i].real = Gdbfk[i];
	251	Sdb[i].imag = Sdb[Nfft-i].imag = 0.0;
	252	}
	253
	254	/* compute real cepstrum from log magnitude spectrum */
	255
	256	codec2_fft(fft_inv_cfg, Sdb, c);
	257	for(i=0; i<Nfft; i++) {
	258	c[i].real /= (float)Nfft;
	259	c[i].imag /= (float)Nfft;
	260	}
	261
	262	/* Fold cepstrum to reflect non-min-phase zeros inside unit circle */
	263
	264	cf[0] = c[0];
	265	for(i=1; i<Ns-1; i++) {
	266	cf[i] = cadd(c[i],c[Nfft-i]);
	267	}
	268	cf[Ns-1] = c[Ns-1];
	269	for(i=Ns; i<Nfft; i++) {
	270	cf[i].real = 0.0;
	271	cf[i].imag = 0.0;
	272	}
	273
	274	/* Cf = dB_magnitude + j * minimum_phase */
	275
	276	codec2_fft(fft_fwd_cfg, cf, Cf);
	277
	278	/* The maths says we are meant to be using log(x), not 20*log10(x),
	279	so we need to scale the phase to account for this:
	280	log(x) = 20log10(x)/scale /
	281
	282	float scale = (20.0/logf(10.0));
	283
	284	for(i=0; i<Ns; i++) {
	285	phase[i] = Cf[i].imag/scale;
	286	}
	287
	288
	289	}