pairwiseAlign.c
资源名称:MSA_MST.rar [点击查看]
上传用户:szpanda
上传日期:2016-03-09
资源大小:9k
文件大小:6k
源码类别:
DNA
开发平台:
C/C++
- /* File: pairwiseAlign.c */
- /* Author:Qichan Ma, Student No.:250494898 */
- #include "globals.h"
- #include "pairwiseAlign.h"
- /* get the maximal number from 2 numbers */
- int max(int x1, int x2) {
- return (x1 > x2 ? x1 : x2);
- }
- /* get the maximal number from 3 numbers */
- int max1(int x1, int x2, int x3) {
- return max(max(x1, x2), x3);
- }
- /* compute the optimal pairwise alignment score of two sequences */
- int pairwiseScore(char *x, char *y){
- int n = strlen(x);
- int m = strlen(y);
- int i, j;
- /* use dynamic programming to compute the matrix, and find the maximum alignment score */
- /* computation matrix of optimal alignment */
- int **A = (int **)malloc((n+1)*sizeof(int *));
- for (i=0; i<n+1; i++)
- A[i]=(int *)malloc((m+1)*sizeof(int));
- /* computation matrix of optimal alignment ends with deletion */
- int **I = (int **)malloc((n+1)*sizeof(int *));
- for (i=0; i<n+1; i++)
- I[i]=(int *)malloc((m+1)*sizeof(int));
- /* computation matrix of optimal alignment ends with insertion*/
- int **D = (int **)malloc((n+1)*sizeof(int *));
- for (i=0; i<n+1; i++)
- D[i]=(int *)malloc((m+1)*sizeof(int));
- /* initialize the 3 computation matrices*/
- A[0][0] = 0;
- for(i=1; i<=n; i++)
- A[i][0] = gap_ini + i*gap_ext;
- for(j=1; j<=m; j++)
- A[0][j] = gap_ini + j*gap_ext;
- for(i=0; i<=n; i++)
- I[i][0] = A[i][0] + gap_ini;
- for(j=0; j<=m; j++)
- D[0][j] = A[0][j] + gap_ini;
- for(i=1; i<=n; i++)
- D[i][0] = D[i-1][0] + gap_ext;
- for(j=1; j<=m; j++)
- I[0][j] = I[0][j-1] + gap_ext;
- /* compute the 3 matrices */
- for(i=1; i<=n; i++) {
- for(j=1; j<=m; j++) {
- D[i][j] = max(D[i-1][j]+gap_ext, A[i-1][j]+gap_ini+gap_ext);
- I[i][j] = max(I[i][j-1]+gap_ext, A[i][j-1]+gap_ini+gap_ext);
- A[i][j] = max1(A[i-1][j-1]+score[alphaIndex[x[i-1]-65]][alphaIndex[y[j-1]-65]], D[i][j], I[i][j]);
- }
- }
- for(i=0; i<n+1; i++) {
- free(A[i]);
- free(I[i]);
- free(D[i]);
- }
- free(A);
- free(I);
- free(D);
- return A[n][m];
- }
- /* compute the optimal pairwise alignment score of two sequences and do traceback to get the alignment */
- int pairwiseAlign(char *x, char *y){
- int n = strlen(x);
- int m = strlen(y);
- int i, j;
- /* use dynamic programming to compute the matrix, and find the maximum alignment score */
- /* computation matrix of optimal alignment */
- int **A = (int **)malloc((n+1)*sizeof(int *));
- for (i=0; i<n+1; i++)
- A[i]=(int *)malloc((m+1)*sizeof(int));
- /* computation matrix of optimal alignment ends with deletion */
- int **I = (int **)malloc((n+1)*sizeof(int *));
- for (i=0; i<n+1; i++)
- I[i]=(int *)malloc((m+1)*sizeof(int));
- /* computation matrix of optimal alignment ends with insertion*/
- int **D = (int **)malloc((n+1)*sizeof(int *));
- for (i=0; i<n+1; i++)
- D[i]=(int *)malloc((m+1)*sizeof(int));
- /* initialize the 3 computation matrices*/
- A[0][0] = 0;
- for(i=1; i<=n; i++)
- A[i][0] = gap_ini + i*gap_ext;
- for(j=1; j<=m; j++)
- A[0][j] = gap_ini + j*gap_ext;
- for(i=0; i<=n; i++)
- I[i][0] = A[i][0] + gap_ini;
- for(j=0; j<=m; j++)
- D[0][j] = A[0][j] + gap_ini;
- for(i=1; i<=n; i++)
- D[i][0] = D[i-1][0] + gap_ext;
- for(j=1; j<=m; j++)
- I[0][j] = I[0][j-1] + gap_ext;
- /* compute the 3 matrices */
- for(i=1; i<=n; i++) {
- for(j=1; j<=m; j++) {
- D[i][j] = max(D[i-1][j]+gap_ext, A[i-1][j]+gap_ini+gap_ext);
- I[i][j] = max(I[i][j-1]+gap_ext, A[i][j-1]+gap_ini+gap_ext);
- A[i][j] = max1(A[i-1][j-1]+score[alphaIndex[x[i-1]-65]][alphaIndex[y[j-1]-65]], D[i][j], I[i][j]);
- }
- }
- /* traceback to find the optimal alignment */
- char *align1Buf = (char *) malloc((n+m)*sizeof(char));
- char *align2Buf = (char *) malloc((n+m)*sizeof(char));
- int bufIndex=0;
- int state=0; /* the current traceback state.
- state==0 -> in matrix A;
- state==1 -> in matrix D;
- state==2 -> in matrix I; */
- for(i=n,j=m; (i>=1 && j>=1); ) {
- switch(state) {
- case 0:
- if(A[i][j] == A[i-1][j-1]+score[alphaIndex[x[i-1]-65]][alphaIndex[y[j-1]-65]]) {
- align1Buf[bufIndex] = x[i-1];
- align2Buf[bufIndex] = y[j-1];
- bufIndex++;
- i--;
- j--;
- }
- else if (A[i][j] == D[i][j])
- state = 1;
- else if (A[i][j] == I[i][j])
- state = 2;
- break;
- case 1:
- if(D[i][j] == A[i-1][j]+gap_ini+gap_ext)
- state = 0;
- align1Buf[bufIndex] = x[i-1];
- align2Buf[bufIndex] = '-';
- bufIndex++;
- i--;
- break;
- case 2:
- if(I[i][j] == A[i][j-1]+gap_ini+gap_ext)
- state = 0;
- align1Buf[bufIndex] = '-';
- align2Buf[bufIndex] = y[j-1];
- bufIndex++;
- j--;
- break;
- }
- }
- while(i>=1) {
- align1Buf[bufIndex] = x[i-1];
- align2Buf[bufIndex] = '-';
- bufIndex++;
- i--;
- }
- while(j>=1) {
- align1Buf[bufIndex] = '-';
- align2Buf[bufIndex] = y[j-1];
- bufIndex++;
- j--;
- }
- for(i=bufIndex-1; i>=0; i--)
- align1[bufIndex-1-i]=align1Buf[i];
- for(i=bufIndex-1; i>=0; i--)
- align2[bufIndex-1-i]=align2Buf[i];
- align1[bufIndex]=align2[bufIndex]=' ';
- /* output the optimal alignment score and alignment */
- /* fprintf(out,"The score of the optimal pairwise alignment is: %dn", A[n][m]);
- fprintf(out,"The optimal pairwise alignment is:n");
- fprintf(out,"%sn",align1);
- fprintf(out,"%sn",align2);
- */
- free(align1Buf);
- free(align2Buf);
- for(i=0; i<n+1; i++) {
- free(A[i]);
- free(I[i]);
- free(D[i]);
- }
- free(A);
- free(I);
- free(D);
- return A[n][m];
- }