DSP编程

开发平台：
C/C++

speaker_recognition_train.c：源码内容
							/***************************************************************************** 
 *
 * speaker_recognition_train.c
 *
 * Main Program to Identify a Speaker.
 *
 * The aim of this project is to determine the identity of the speaker
 * from the speech sample of the speaker and the trained vectors.
 *
 * Trained vectors are derived from the speech sample of the speaker at
 * a different time.
 * 
 * First the input analog speech signal is digitized at 8KhZ Sampling
 * Frequency using the on board ADC (Analog to Digital Converter)
 * The Speech sample is stored in an one-dimensional array.
 * Speech signal's are quasi-stationary. It means that the 
 * speech signal over a very short time frame can be considered to be a
 * stationary. The speech signal is split into frames. Each frame consists
 * of 256 Samples of Speech signal and the subsequent frame will start from
 * the 100th sample of the previous frame. Thus each frame will overlap
 * with two other subsequent other frames. This technique is called
 * Framing. Speech sample in one frame is considered to be stationary.
 *
 * After Framing, to prevent the spectral lekage we apply windowing. 
 * Here  Hamming window with 256 co-efficients is used.
 *
 * Third step is to convert the Time domain speech Signal into Frequency
 * Domain using Discrete Fourier Transform. Here Fast Fourier Transform
 * is used.
 *
 * The resultant transformation will result in a signal beeing complex
 * in nature. Speech is a real signal but its Fourier Transform will be 
 * a complex one (Signal having both real and imaginary). 
 *
 * The power of the signal in Frequency domain is calculated by summing
 * the square of Real and Imaginary part of the signal in Frequency Domain.
 * The power signal will be a real one. Since second half of the samples
 * in the frame will be symmetric to the first half (because the speech signal
 * is a real one) we ignore the second half (second 128 samples in each frame)
 *
 * Triangular filters are designed using Mel Frequency Scale. These bank of 
 * filters will approximate our ears. The power signal is then applied to 
 * these bank of filters to determine the frequency content across each filter.
 * In our implementation we choose total number of filters to be 20.
 * These 20 filters are uniformly spaced in Mel Frequency scale between 
 * 0-4KhZ.
 *
 * After computing the Mel-Frequency Spectrum, log of Mel-Frequency Spectrum
 * is computed.
 *
 * Discrete Cosine Tranform of the resulting signal will result in the 
 * computation of the Mel-Frequency Cepstral Co-efficient.
 *
 * Euclidean distance between the trained vectors and the Mel-Frequency
 * Cepstral Co-efficients are computed for each trained vectors. The
 * trained vector that produces the smallest Euclidean distance will  
 * be identified as the speaker.
 *
 *
 * Written by Vasanthan Rangan and Sowmya Narayanan
 * 
 *
 ******************************************************************************/
/*****************************************************************************
 * Include Header Files
 ******************************************************************************/
#include "dsk6713_aic23.h"
Uint32 fs=DSK6713_AIC23_FREQ_8KHZ;
#include <stdio.h>
#include <math.h>
#include "block_dc.h" // Header file for identifying the start of speech signal
#include "detect_envelope.h" // Header file for identfying the start of speech signal
/*****************************************************************************
 * Definition of Variables
 *****************************************************************************/
#define PI 3.14159
#define Number_Of_Filters 20 // Number of Mel-Frequency Filters
#define column_length 256 // Frame Length of the one speech signal
#define row_length 100 // Total number of Frames in the given speech signal
/*****************************************************************************
 * Custom Structure Definition
 *****************************************************************************/
struct complex { 
	float real;
	float imag;
}; // Generic Structure to represent real and imaginary part of a signal
struct buffer {
	struct complex data[row_length][column_length];
}; // Structure to store the input speech sample
struct mfcc {
	float data[row_length][Number_Of_Filters];
}; // Structure to store the Mel-Frequency Co-efficients
/*****************************************************************************
 * Assigning the data structures to external memory
 *****************************************************************************/
#pragma DATA_SECTION(real_buffer,".EXTRAM")
struct buffer real_buffer; //real_buffer is used to store the input speech.
#pragma DATA_SECTION(coeff,".EXTRAM")
struct mfcc coeff; //coeff is used to store the Mel-Frequency Spectrum.
#pragma DATA_SECTION(mfcc_ct,".EXTRAM")
struct mfcc mfcc_ct; //mfcc_ct is used to store the Mel-Frequency Cepstral Co-efficients.
/*****************************************************************************
 * Variable Declaration
 *****************************************************************************/
int gain;           /* output gain (Used during Play-Back */
int signal_status; /* Variable to detect speech signal */
int count; /* Variable to count */
int column; /* Variable used for incrementing column (Samples inside Frame)*/
int row; /* Variable used for incrementing row(Number of Frames)*/
int program_control; /* Variable to identify where the program is
							Example: program_control=0 means program is 
							capturing input speech signal
							program_control=1 means that program has finished
							capturing input and ready for processing. At this
							time the input speech signal is replayed back
							program_control=2 means program is ready for 
							idenitification. */
float mfcc_vector[20]; /* Variable to store the vector of the speech signal */
FILE *fptr;
/*****************************************************************************
 * Function Declaration
 *****************************************************************************/
 
void fft (struct buffer *, int , int ); /* Function to compute Fast Fouruer Transform */
short playback(); /* Function for play back */
void log_energy(struct mfcc *); /* Function to compute Log of Power Signal */
void mfcc_coeff(struct mfcc * , struct mfcc *); /* Function to compute MFCC */
void mfcc_vect(struct mfcc * , float *); /* Funciton to compute MFCC Vector */
interrupt void c_int11()  {           /* interrupt service routine */
	short sample_data;
	short out_sample;
	if ( program_control == 0 ) { /* Beginning of Capturing input speech */
		sample_data = input_sample();	          /* input data */
		signal_status = framing_windowing(sample_data, &real_buffer); /* Signal Identification
																	   * and Framing and Windowing */
		out_sample = 0;							/* Output Data */
		if (signal_status > 0) {
			program_control = 1;		       /* Capturing input signal is done */
		}
		output_sample(out_sample);		/* play nothing */
	}
	if ( program_control == 1 ) { /* Beginning of the Play back */
		out_sample = playback(); /* call the playback funciton to get the 
								  * stored speech sample */
		output_sample(out_sample); /* play the output speech sample */
	}
	return;
}
void main()  {	/* Main Function of the program */
/****************************************************************************
 * Declaring Local Variables
 *****************************************************************************/
	int i; /* Variable used for counters */
  	int j; /* Variable used for Counters */
  	int stages; /* Variable to identify total number of stages */
/*****************************************************************************
 * Execution of functions start
 ******************************************************************************/
	comm_intr();   /* init DSK, codec, McBSP */
/******************************************************************************
 * Initializing Variables
 *****************************************************************************/
 	gain = 1;
	column = 0;
	row = 0;
	program_control = 0;
	signal_status = 0;
	count = 0;				  
	stages=8;	/* Total Number of stages in FFT = 8 */
	for ( i=0; i < row_length ; i++ ) { /* Total Number of Frames */
  		for ( j = 0; j < column_length ; j++) { /* Total Number of Samples in a Frame */
	  		real_buffer.data[i][j].real = 0.0; /* Initializing real part to be zero */
	  		real_buffer.data[i][j].imag = 0.0; /* Initializing imaginary part to be zero*/
		}
  	}
  	for ( i=0; i<row_length; i++) { /* Total Number of Frames */
  		for ( j=0; j<Number_Of_Filters; j++) { /* Total Number of Filters */
			coeff.data[i][j] = 0.0; /* Initializing the co-effecient array */
			mfcc_ct.data[i][j] = 0.0; /* Initializing the array for storing MFCC */
		}
	} /* End of Initializing the variables to zero */
/*****************************************************************************
* Begining of the execution of the functions.
*****************************************************************************/
	while(program_control == 0);      /* infinite loop For Receiving/capturing alone*/
  	while(program_control ==1);		/* infinite loop for playback alone*/
/* Compute FFT of the input speech signal after Framing and Windowing */
	fft(&real_buffer,column_length,stages);
/* Compute Power Spectrum of the speech signal in Frequency Domain Representation */
	power_spectrum(&real_buffer);
/* Compute Mel-Frequency Spectrum of the speech signal in Power Spectrum Form */
	mel_freq_spectrum(&real_buffer,&coeff);
/* Computation of Log of the Power Spectrum */
	log_energy(&coeff);
/* Computation of Discrete Cosine Transform */
	mfcc_coeff(&mfcc_ct,&coeff);
/* Compute Vector */
	mfcc_vect(&mfcc_ct,mfcc_vector);
/* Store the Vector in a Flat File */
 	fptr = fopen("train_vect.dat","w");
	fprintf(fptr, "{");
  	for ( i =0; i < Number_Of_Filters ; i++) {
  		if ( i == (Number_Of_Filters-1) ) {
			fprintf(fptr, "%f ",mfcc_vector[i]);
		} else {
			fprintf(fptr, "%f, ",mfcc_vector[i]);
		}
  	}
  	fprintf(fptr,"}");
 	fclose(fptr);
}
/* Function to Compute Fast Fourier Transform */
void fft (struct buffer *input_data, int n, int m) {/* Input speech Data, n = 2^m, m = total number of stages */
	int n1,n2,i,j,k,l,row_index; /* Declare Variables
								  * n1 is the difference between upper and lower 
								  * i,j,k,l are counters
								  * row_index is used to index every frame */
	float xt,yt,c,s,e,a; /* declare variables for storing temporary values
	 					  * xt,yt for temporary real and Imaginary respectively
	 					  * c for cosine
	 					  * s for sine
	 					  * e and a for computing the input to cosine and sine
	 					  */
	for ( row_index = 0; row_index < row_length; row_index++) { /* For every frame */
/* Loop through all the stages */
		n2 = n;
		for ( k=0; k<m; k++) {
			n1 = n2;
			n2 = n2/2;
			e = PI/n1;
/* Compute Twiddle Factors */
			for ( j= 0; j<n2; j++) {
				a = j*e;
				c = (float) cos(a);
				s = (float) sin(a);
/* Do the Butterflies for all 256 samples */
				for (i=j; i<n; i+= n1) {
					l = i+n2;
					xt = input_data->data[row_index][i].real - input_data->data[row_index][l].real;
					input_data->data[row_index][i].real = input_data->data[row_index][i].real+input_data->data[row_index][l].real;
					yt = input_data->data[row_index][i].imag - input_data->data[row_index][l].imag;
					input_data->data[row_index][i].imag = input_data->data[row_index][i].imag+input_data->data[row_index][l].imag;
					input_data->data[row_index][l].real = c*xt + s*yt;
					input_data->data[row_index][l].imag = c*yt - s*yt;
				}
			}
		}
/* Bit Reversal */
		j = 0;
		for ( i=0; i<n-1; i++) {
			if (i<j) {
				xt = input_data->data[row_index][j].real;
				input_data->data[row_index][j].real = input_data->data[row_index][i].real;
				input_data->data[row_index][i].real = xt;
				yt = input_data->data[row_index][j].imag;
				input_data->data[row_index][j].imag = input_data->data[row_index][i].imag;
				input_data->data[row_index][i].imag = yt;
			}
		}
	}
	return;
}			
/* Function to compute log of Mel-Frequency spectrum */
void log_energy(struct mfcc *co_eff) {
	int i,j; /* Variables declared to act as counters */
  	for ( i=0; i<row_length; i++) { /* For all the frames (100 Frames)) */
		for ( j=0; j<Number_Of_Filters; j++ ) { /* For all the filters (20 Filters)*/
			co_eff->data[i][j] = (float) log((double) co_eff->data[i][j]); /* Compute log of co-efficients */
		}
  	}
}
/* Function to compute Discrete Cosine Transform */
void mfcc_coeff(struct mfcc *mfccct, struct mfcc *co_eff) {
	int i,j,k; /* Variable declared to act as counters */
	for ( i=0; i<row_length; i++) { /* For all the frames (100 Frames) */
  		for (j=0; j<Number_Of_Filters; j++ ) { /* For all the filters */
  			mfccct->data[i][j] = 0.0;
/* Compute Cosine Transform of the Signal */
  			for ( k=0; k<Number_Of_Filters; k++) {
 				mfccct->data[i][j] = mfccct->data[i][j] + co_eff->data[i][k]*cos((double)((PI*j*(k-1/2))/Number_Of_Filters));
			}
  		}
  	}
}
/* Function to compute Euclidean distance and conversion to Vector */
void mfcc_vect(struct mfcc *mfccct, float *mfccvector) {
	int i,j; /* variables declared to act as counters */
	for ( i=0; i< Number_Of_Filters; i++ ) { /* Total Number of Filters */
  		mfccvector[i] = 0; /* Initialize the Vector to Zero */
  		for (j=0; j< row_length; j++) { /* For all the Frames Compute the distance */
  			mfccvector[i] = mfccvector[i] + ((mfccct->data[j][i])*(mfccct->data[j][i]));
  		}
  	}
}
/* Function to play back the speech signal */
short playback() {
	column++; /* Variable to store the index of speech sample in a frame */
	if ( column >= column_length ) { /* If Colum >=256 reset it to zero
								  * and increment the frame number */
		column = 0;		/* initialize the sample number back to zero */
		row++; 	/* Increment the Frame Number */
	}
	if ( row >= row_length ) { /* If Total Frame Number reaches 100 initialize
							* row to be zero
							* and change the program control inidcating
							* end of playback */
		program_control = 2; /* End of Playback */
		row = 0; /* Initialize the frame number back to zero */
	}
	return ((int)real_buffer.data[row][column].real); /* Return the stored speech Sample */
}