readInput.c
资源名称:MSA_MST.rar [点击查看]
上传用户:szpanda
上传日期:2016-03-09
资源大小:9k
文件大小:7k
源码类别:
DNA
开发平台:
C/C++
- /* File: readInput.c */
- /* Author:Qichan Ma, Student No.:250494898 */
- #include "globals.h"
- #include "readInput.h"
- /* read the residues, substitution score matrix & gap penalty from the input file */
- void readScore() {
- FILE *in;
- char infile[20];
- printf("Please input the name of the file that includes the residues, substitution score matrix & gap penalty (e.g.: blosum62.txt): ");
- scanf("%s",infile);
- if((in = fopen(infile, "r")) == NULL)
- {
- printf("cannot open the filen");
- exit(0);
- }
- int i;
- /* initialize the index array of letters */
- for(i=0;i<26;i++)
- alphaIndex[i] = -1;
- residuesLen=0; /* the number of the residues (say for protein, it's 20; for DNA/RNA, it's 4)*/
- char ch;
- ch=getc(in);
- if(ch == EOF) {
- fprintf(stderr, "the score file is not rightn");
- fclose(in);
- exit(1);
- }
- while(ch!='r' && ch!='n') {
- ch = getc(in);
- if(ch == EOF) {
- fprintf(stderr, "the score file is not rightn");
- fclose(in);
- exit(1);
- }
- }
- while(!isalnum(ch)) {
- ch = getc(in);
- if(ch == EOF) {
- fprintf(stderr, "the residues missedn");
- fclose(in);
- exit(1);
- }
- }
- /* read the residues */
- while(ch!='r' && ch!='n') {
- if(isalnum(ch))
- alphaIndex[ch-65]=residuesLen++;
- ch = getc(in);
- if(ch == EOF) {
- fprintf(stderr, "the residues missedn");
- fclose(in);
- exit(1);
- }
- }
- /* the substitution score matrix */
- score = (int **)malloc(residuesLen*sizeof(int *));
- for (i=0; i<residuesLen; i++)
- score[i]=(int *)malloc(residuesLen*sizeof(int));
- int j, temp;
- /* read the substitution score matrix */
- for(i=0; i<residuesLen; i++) {
- ch = getc(in);
- if(ch == EOF) {
- fprintf(stderr, "the score file is not rightn");
- fclose(in);
- exit(1);
- }
- while(!isalpha(ch)) {
- ch = getc(in);
- if(ch == EOF) {
- fprintf(stderr, "the score file is not rightn");
- fclose(in);
- exit(1);
- }
- }
- for(j=0; j<=i; j++) {
- if(fscanf(in, "%d", &temp) == EOF) {
- fprintf(stderr, "substitution score matrix is not rightn");
- fclose(in);
- exit(1);
- }
- score[i][j]=temp;
- }
- }
- for(i=0; i<residuesLen; i++)
- for(j=i+1; j<residuesLen; j++)
- score[i][j] = score[j][i];
- ch = getc(in);
- while(!isalpha(ch)) {
- ch = getc(in);
- if(ch == EOF) {
- fprintf(stderr, "gap_initiation score missedn");
- fclose(in);
- exit(1);
- }
- }
- while(!isspace(ch)) {
- ch = getc(in);
- if(ch == EOF) {
- fprintf(stderr, "gap_initiation score missedn");
- fclose(in);
- exit(1);
- }
- }
- /* read gap penalty */
- if(fscanf(in, "%d", &temp) == EOF) {
- fprintf(stderr, "gap_initiation score missedn");
- fclose(in);
- exit(1);
- }
- gap_ini = temp; /* the gap penalty */
- ch = getc(in);
- while(!isalpha(ch)) {
- ch = getc(in);
- if(ch == EOF) {
- fprintf(stderr, "gap_extension score missedn");
- fclose(in);
- exit(1);
- }
- }
- while(!isspace(ch)) {
- ch = getc(in);
- if(ch == EOF) {
- fprintf(stderr, "gap_extension score missedn");
- fclose(in);
- exit(1);
- }
- }
- /* read gap penalty */
- if(fscanf(in, "%d", &temp) == EOF) {
- fprintf(stderr, "gap_extension score missedn");
- fclose(in);
- exit(1);
- }
- gap_ext = temp; /* the gap penalty */
- fclose(in);
- }
- /* get the number of sequences */
- int getNumOfSeq(char *infile) {
- FILE *in;
- if((in = fopen(infile, "r")) == NULL)
- {
- printf("cannot open the filen");
- exit(0);
- }
- int lineNo = 0; /* the number of lines of the sequences file */
- char temp[1000];
- while(fgets(temp, 1000, in) != NULL)
- lineNo++;
- /* the number of lines of the sequences file must be multiple of 3 */
- float tmp = lineNo;
- if(tmp/3.0 != (int)(tmp/3)) {
- fprintf(stderr, "the sequences file is not rightn");
- fclose(in);
- exit(1);
- }
- fclose(in);
- return lineNo/3;
- }
- /* read the sequences from the input file */
- void readSeq() {
- FILE *in;
- char infile[20];
- printf("Please input the name of the sequences file (e.g.: protein.txt): ");
- scanf("%s",infile);
- if((in = fopen(infile, "r")) == NULL)
- {
- printf("cannot open the infilen");
- exit(0);
- }
- /* the number of sequences */
- numOfSeq = getNumOfSeq(infile);
- /* the array of name of sequences */
- seqName = (char **)malloc(numOfSeq*sizeof(char *));
- /* the array of sequences */
- seq = (char **)malloc(numOfSeq*sizeof(char *));
- /* the length of the name of sequences */
- int seqNameLen;
- /* the array of sequences' length */
- seqLen = (int *)malloc(numOfSeq*sizeof(int));
- char temp[1000];
- int i=0, j=0, k=0;
- /* read the sequences and the names of sequences from the input file */
- while(fgets(temp, 1000, in) != NULL) {
- /* read the names of sequences */
- if(fgets(temp, 1000, in) == NULL) {
- fprintf(stderr, "the sequences file is not rightn");
- fclose(in);
- exit(1);
- }
- seqNameLen = strlen(temp);
- temp[seqNameLen-1]=' ';
- seqName[i]=(char *)malloc(seqNameLen*sizeof(char));
- strcpy(seqName[i],temp);
- /* read the sequences */
- if(fgets(temp, 1000, in) == NULL) {
- fprintf(stderr, "the sequences file is not rightn");
- fclose(in);
- exit(1);
- }
- seqLen[i] = 0;
- for(j=0; !isalpha(temp[j]);j++);
- for( ;isalpha(temp[j]);j++)
- seqLen[i]++;
- seq[i]=(char *)malloc((seqLen[i]+1)*sizeof(char));
- for(j=0; !isalpha(temp[j]);j++);
- for(k=0; isalpha(temp[j]);j++,k++) {
- seq[i][k] = temp[j];
- /* if the sequences contains illegal letter */
- if(alphaIndex[seq[i][k]-65] == -1) {
- fprintf(stderr, "the sequences contain illegal lettern");
- fclose(in);
- exit(1);
- }
- }
- seq[i][seqLen[i]]=' ';
- i++;
- }
- fclose(in);
- }