Commit 4bae37da authored by Alfred E. Heggestad's avatar Alfred E. Heggestad Committed by Jean-Marc Valin

Bit-stream definition in the manual with simplified source code

parent 4cf06906
#LyX 1.6.0rc2 created this file. For more info see http://www.lyx.org/
\lyxformat 340
#LyX 1.6.1 created this file. For more info see http://www.lyx.org/
\lyxformat 345
\begin_document
\begin_header
\textclass scrbook
\use_default_options true
\language english
\inputencoding auto
\font_roman times
\font_sans helvet
\font_typewriter courier
\font_roman default
\font_sans default
\font_typewriter default
\font_default_family default
\font_sc false
\font_osf false
......@@ -21,7 +22,7 @@
\papersize letterpaper
\use_geometry true
\use_amsmath 2
\use_esint 0
\use_esint 2
\cite_engine basic
\use_bibtopic false
\paperorientation portrait
......@@ -37,7 +38,6 @@
\papercolumns 1
\papersides 1
\paperpagestyle headings
\listings_params "basicstyle={\ttfamily},breaklines=true,language=C,xleftmargin=0mm"
\tracking_changes false
\output_changes false
\author ""
......@@ -8417,6 +8417,427 @@ Perceptual enhancement
\begin_layout Standard
Optional, implementation-defined.
\end_layout
\begin_layout Subsection
Bit-stream definition
\end_layout
\begin_layout Standard
This section defines the bit-stream that is transmitted on the wire.
One speex packet consist of 1 frame header and 4 sub-frames:
\end_layout
\begin_layout Standard
\begin_inset Tabular
<lyxtabular version="3" rows="1" columns="5">
<features>
<column alignment="center" valignment="top" width="0">
<column alignment="center" valignment="top" width="0">
<column alignment="center" valignment="top" width="0">
<column alignment="center" valignment="top" width="0">
<column alignment="center" valignment="top" width="0">
<row>
<cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
\begin_inset Text
\begin_layout Plain Layout
Frame Header
\end_layout
\end_inset
</cell>
<cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
\begin_inset Text
\begin_layout Plain Layout
Subframe 1
\end_layout
\end_inset
</cell>
<cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
\begin_inset Text
\begin_layout Plain Layout
Subframe2
\end_layout
\end_inset
</cell>
<cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
\begin_inset Text
\begin_layout Plain Layout
Subframe 3
\end_layout
\end_inset
</cell>
<cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" rightline="true" usebox="none">
\begin_inset Text
\begin_layout Plain Layout
Subframe 4
\end_layout
\end_inset
</cell>
</row>
</lyxtabular>
\end_inset
\end_layout
\begin_layout Standard
The frame header is variable length, depending on decoding mode and submode.
The narrowband frame header is defined as follows:
\end_layout
\begin_layout Standard
\begin_inset Tabular
<lyxtabular version="3" rows="1" columns="6">
<features>
<column alignment="center" valignment="top" width="0">
<column alignment="center" valignment="top" width="0">
<column alignment="center" valignment="top" width="0">
<column alignment="center" valignment="top" width="0">
<column alignment="center" valignment="top" width="0">
<column alignment="center" valignment="top" width="0">
<row>
<cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
\begin_inset Text
\begin_layout Plain Layout
wb bit
\end_layout
\end_inset
</cell>
<cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
\begin_inset Text
\begin_layout Plain Layout
modeid
\end_layout
\end_inset
</cell>
<cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
\begin_inset Text
\begin_layout Plain Layout
LSP
\end_layout
\end_inset
</cell>
<cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
\begin_inset Text
\begin_layout Plain Layout
OL-pitch
\end_layout
\end_inset
</cell>
<cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
\begin_inset Text
\begin_layout Plain Layout
OL-pitchgain
\end_layout
\end_inset
</cell>
<cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" rightline="true" usebox="none">
\begin_inset Text
\begin_layout Plain Layout
OL ExcGain
\end_layout
\end_inset
</cell>
</row>
</lyxtabular>
\end_inset
\end_layout
\begin_layout Standard
wb-bit: Wideband bit (1 bit) 0=narrowband, 1=wideband
\end_layout
\begin_layout Standard
modeid: Mode identifier (4 bits)
\end_layout
\begin_layout Standard
LSP: Line Spectral Pairs (0, 18 or 30 bits)
\end_layout
\begin_layout Standard
OL-pitch: Open Loop Pitch (0 or 7 bits)
\end_layout
\begin_layout Standard
OL-pitchgain: Open Loop Pitch Gain (0 or 4 bits)
\end_layout
\begin_layout Standard
OL-ExcGain: Open Loop Excitation Gain (0 or 5 bits)
\end_layout
\begin_layout Standard
...
\end_layout
\begin_layout Standard
Each subframe is defined as follows:
\end_layout
\begin_layout Standard
\begin_inset Tabular
<lyxtabular version="3" rows="1" columns="4">
<features>
<column alignment="center" valignment="top" width="0">
<column alignment="center" valignment="top" width="0">
<column alignment="center" valignment="top" width="0">
<column alignment="center" valignment="top" width="0">
<row>
<cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
\begin_inset Text
\begin_layout Plain Layout
FinePitch
\end_layout
\end_inset
</cell>
<cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
\begin_inset Text
\begin_layout Plain Layout
PitchGain
\end_layout
\end_inset
</cell>
<cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
\begin_inset Text
\begin_layout Plain Layout
InnovationGain
\end_layout
\end_inset
</cell>
<cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" rightline="true" usebox="none">
\begin_inset Text
\begin_layout Plain Layout
Innovation VQ
\end_layout
\end_inset
</cell>
</row>
</lyxtabular>
\end_inset
\end_layout
\begin_layout Standard
FinePitch: (0 or 7 bits)
\end_layout
\begin_layout Standard
PitchGain: (0, 5, or 7 bits)
\end_layout
\begin_layout Standard
Innovation Gain: (0, 1, 3 bits)
\end_layout
\begin_layout Standard
Innovation VQ: (0-96 bits)
\end_layout
\begin_layout Standard
...
\end_layout
\begin_layout Subsection
Sample decoder
\end_layout
\begin_layout Standard
This section contains some sample source code, showing how a basic Speex
decoder can be implemented.
The sample decoder is narrowband submode 3 only, and with no advanced features
like enhancement, vbr etc.
\end_layout
\begin_layout Standard
...
\end_layout
\begin_layout Standard
\begin_inset CommandInset include
LatexCommand lstinputlisting
filename "nb_celp.c"
lstparams "caption={Sample Decoder}"
\end_inset
\end_layout
\begin_layout Subsection
Lookup tables
\end_layout
\begin_layout Standard
The Speex decoder includes a set of lookup tables and codebooks, which are
used to convert between values of different domains.
This includes:
\end_layout
\begin_layout Standard
- Excitation 10x16 (3200 bps)
\end_layout
\begin_layout Standard
- Excitation 10x32 (4000 bps)
\end_layout
\begin_layout Standard
- Excitation 20x32 (2000 bps)
\end_layout
\begin_layout Standard
- Excitation 5x256 (12800 bps)
\end_layout
\begin_layout Standard
- Excitation 5x64 (9600 bps)
\end_layout
\begin_layout Standard
- Excitation 8x128 (7000 bps)
\end_layout
\begin_layout Standard
- Codebook for 3-tap pitch prediction gain (Normal and Low Bitrate)
\end_layout
\begin_layout Standard
- Codebook for LSPs in narrowband CELP mode
\end_layout
\begin_layout Standard
...
\end_layout
\begin_layout Standard
The exact lookup tables are included here for reference.
\end_layout
\begin_layout Standard
\begin_inset CommandInset include
LatexCommand lstinputlisting
filename "../libspeex/exc_5_64_table.c"
\end_inset
\end_layout
\begin_layout Standard
\begin_inset CommandInset include
LatexCommand lstinputlisting
filename "../libspeex/exc_5_256_table.c"
\end_inset
\end_layout
\begin_layout Standard
\begin_inset CommandInset include
LatexCommand lstinputlisting
filename "../libspeex/exc_8_128_table.c"
\end_inset
\end_layout
\begin_layout Standard
\begin_inset CommandInset include
LatexCommand lstinputlisting
filename "../libspeex/exc_10_16_table.c"
\end_inset
\end_layout
\begin_layout Standard
\begin_inset CommandInset include
LatexCommand lstinputlisting
filename "../libspeex/exc_10_32_table.c"
\end_inset
\end_layout
\begin_layout Standard
\begin_inset CommandInset include
LatexCommand lstinputlisting
filename "../libspeex/exc_20_32_table.c"
\end_inset
\end_layout
\begin_layout Standard
\begin_inset CommandInset include
LatexCommand lstinputlisting
filename "../libspeex/gain_table.c"
\end_inset
\end_layout
\begin_layout Standard
\begin_inset CommandInset include
LatexCommand lstinputlisting
filename "../libspeex/gain_table_lbr.c"
\end_inset
\end_layout
\begin_layout Standard
\begin_inset CommandInset include
LatexCommand lstinputlisting
filename "../libspeex/lsp_tables_nb.c"
\end_inset
\end_layout
\begin_layout Section
......
#include <math.h>
#include "nb_celp.h"
#include "lsp.h"
#include "ltp.h"
#include "quant_lsp.h"
#include "cb_search.h"
#include "filters.h"
#include "../include/speex/speex_bits.h"
#include "os_support.h"
#ifndef NULL
#define NULL 0
#endif
#define LSP_MARGIN .002f
#define SIG_SCALING 1.f
#define NB_DEC_BUFFER (NB_FRAME_SIZE+2*NB_PITCH_END+NB_SUBFRAME_SIZE+12)
#define NB_ORDER 10
#define NB_FRAME_SIZE 160
#define NB_SUBFRAME_SIZE 40
#define NB_NB_SUBFRAMES 4
#define NB_PITCH_START 17
#define NB_PITCH_END 144
struct speex_decode_state {
float excBuf[NB_DEC_BUFFER]; /**< Excitation buffer */
float *exc; /**< Start of excitation frame */
float old_qlsp[10]; /**< Quantized LSPs for previous frame */
float interp_qlpc[10]; /**< Interpolated quantized LPCs */
float mem_sp[10]; /**< Filter memory for synthesis signal */
int first; /**< Is this the first frame? */
};
static const float exc_gain_quant_scal1[2] = {0.70469f, 1.05127f};
struct speex_decode_state *nb_decoder_init(void)
{
struct speex_decode_state *st;
st = malloc(sizeof(*st));
if (!st)
return NULL;
memset(st, 0, sizeof(*st));
st->first = 1;
return st;
}
void nb_decoder_destroy(struct speex_decode_state *state)
{
if (state)
free(state);
}
/* basic decoder using mode3 only */
int nb_decode(struct speex_decode_state *st, SpeexBits *bits, float *out)
{
int i, sub, wideband, mode, qe;
float ol_gain;
float innov[NB_SUBFRAME_SIZE];
float exc32[NB_SUBFRAME_SIZE];
float qlsp[NB_ORDER], interp_qlsp[NB_ORDER];
float ak[NB_ORDER];
if (!bits)
return -1;
st->exc = st->excBuf + 2*NB_PITCH_END + NB_SUBFRAME_SIZE + 6;
/* Decode Sub-modes */
do {
if (speex_bits_remaining(bits) < 5)
return -1;
wideband = speex_bits_unpack_unsigned(bits, 1);
if (wideband) {
printf("wideband not supported\n");
return -2;
}
mode = speex_bits_unpack_unsigned(bits, 4);
if (mode == 15)
return -1;
} while (mode > 8);
if (mode != 3) {
printf("only mode 3 supported\n");
return -2;
}
/* Shift all buffers by one frame */
SPEEX_MOVE(st->excBuf, st->excBuf+NB_FRAME_SIZE,
2*NB_PITCH_END + NB_SUBFRAME_SIZE + 12);
/* Unquantize LSPs */
lsp_unquant_lbr(qlsp, NB_ORDER, bits);
/* Handle first frame */
if (st->first) {
st->first = 0;
for (i=0; i<NB_ORDER; i++)
st->old_qlsp[i] = qlsp[i];
}
/* Get global excitation gain */
qe = speex_bits_unpack_unsigned(bits, 5);
ol_gain = SIG_SCALING*exp(qe/3.5);
/* Loop on subframes */
for (sub=0; sub<4; sub++) {
int offset, q_energy;
float *exc, *sp;
float ener;
offset = NB_SUBFRAME_SIZE*sub;
exc = st->exc + offset;
sp = out + offset;
SPEEX_MEMSET(exc, 0, NB_SUBFRAME_SIZE);
/* Adaptive codebook contribution */
pitch_unquant_3tap(exc, exc32, NB_PITCH_START,
NB_SUBFRAME_SIZE, bits, 0);
sanitize_values32(exc32, -32000, 32000, NB_SUBFRAME_SIZE);
/* Unquantize the innovation */
SPEEX_MEMSET(innov, 0, NB_SUBFRAME_SIZE);
/* Decode sub-frame gain correction */
q_energy = speex_bits_unpack_unsigned(bits, 1);
ener = exc_gain_quant_scal1[q_energy] * ol_gain;
/* Fixed codebook contribution */
split_cb_shape_sign_unquant(innov, bits);
/* De-normalize innovation and update excitation */
signal_mul(innov, innov, ener, NB_SUBFRAME_SIZE);
for (i=0; i<NB_SUBFRAME_SIZE; i++) {
exc[i] = exc32[i] + innov[i];
}
}
SPEEX_COPY(out, &st->exc[-NB_SUBFRAME_SIZE], NB_FRAME_SIZE);
/* Loop on subframes */
for (sub=0; sub<4; sub++) {
const int offset = NB_SUBFRAME_SIZE*sub;
float *sp, *exc;
sp = out + offset;
exc = st->exc + offset;
/* LSP interpolation (quantized and unquantized) */
lsp_interpolate(st->old_qlsp, qlsp, interp_qlsp, NB_ORDER,
sub, NB_NB_SUBFRAMES, LSP_MARGIN);
/* Compute interpolated LPCs (unquantized) */
lsp_to_lpc(interp_qlsp, ak, NB_ORDER);
iir_mem16(sp, st->interp_qlpc, sp, NB_SUBFRAME_SIZE,
NB_ORDER, st->mem_sp);
/* Save for interpolation in next frame */
for (i=0; i<NB_ORDER; i++)
st->interp_qlpc[i] = ak[i];
}
/* Store the LSPs for interpolation in the next frame */
for (i=0; i<NB_ORDER; i++)
st->old_qlsp[i] = qlsp[i];
return 0;
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment