SSRFinder.java
资源名称:SSRFinder.rar [点击查看]
上传用户:reomax
上传日期:2010-03-14
资源大小:5k
文件大小:4k
源码类别:
生物技术
开发平台:
Java
- /*
- * Author: ruanjue
- * Created: 11/29/2003 15:12:16
- * Modified: 11/29/2003 15:12:16
- */
- package org.genomics.ssr;
- import java.io.*;
- import java.util.*;
- /**
- *@author 阮珏
- */
- public class SSRFinder
- {
- /**
- *the minimum of total repeat region length
- */
- public static int MIN_LENGTH=12;
- /**
- *we use 2-8 nt repeat unit,however there is another criterion 1-5,if you will use 1-5 please change those tow static parameter
- *to (1,5).
- */
- public static int MIN_REPEAT=2;
- public static int MAX_REPEAT=8;
- /**
- *set the number of the length of the left and right sequence of ssr.
- */
- public static int LEFT_SEQ_LENGTH=15;
- public static int RIGHT_SEQ_LENGTH=15;
- /**
- *call this method to find ssr
- *@reader must be genbank fasta style,the sequence can't suffer space character
- */
- public static SSR[] find(Reader reader)throws IOException{
- BufferedReader in=new BufferedReader(reader);
- String str=null;
- StringBuffer sb=new StringBuffer(512);
- String header=null;
- Vector v=new Vector();
- //int lineNum=0;
- while((str=in.readLine())!=null){
- //System.out.println(++lineNum);
- if(str.startsWith(">")){
- if(sb.length()>0){
- find(header,sb.toString(),v);
- sb.delete(0,sb.length());
- }
- int i=1;
- for(;i<str.length();i++){
- if(str.charAt(i)==' '||str.charAt(i)=='t'){
- break;
- }
- }
- header=str.substring(1,i);
- }
- else {
- sb.append(str);
- }
- }
- find(header,sb.toString(),v);
- sb.delete(0,sb.length());
- in.close();
- SSR[] ssrs=new SSR[v.size()];
- for(int i=0;i<ssrs.length;i++){
- ssrs[i]=(SSR)v.get(i);
- }
- return ssrs;
- }
- protected static int find(String header,String seq,Vector v)throws IOException{
- if(seq.length()<MIN_LENGTH){
- return 0;
- }
- int count=0;
- int pointer=0;
- int ptr=0;
- char[] repeat=new char[MAX_REPEAT];
- for(int i=0;i<seq.length()-MIN_REPEAT;i++){
- if(i-ptr>=MAX_REPEAT-1){
- ptr++;
- }
- //scan repeat
- for(int t=MIN_REPEAT;t<=(i-ptr+1);t++){
- int num=0;
- for(int m=t+i;m<seq.length();m+=t){
- int n=0;
- for(;n<t;n++){
- if(seq.charAt(i-n)!=seq.charAt(m-n)){
- break;
- }
- }
- if(n!=t){
- break;
- }
- else {
- num++;
- }
- }
- if((num+1)*t>=MIN_LENGTH){//notice:num should add one
- boolean ok=false;
- char c=seq.charAt(i-t+1);
- for(int k=i-t+2;k<=i;k++){
- if(seq.charAt(k)!=c){
- ok=true;
- break;
- }
- }
- if(ok){
- SSR ssr=new SSR();
- ssr.seq=seq.substring(i-t+1,i+1);
- ssr.src=seq;
- if(header==null||header.length()==0){
- ssr.name="undefined_"+count;
- }
- else {
- ssr.name=header+"_"+count;
- }
- int left=i-t-LEFT_SEQ_LENGTH+1;
- if(left<0){
- left=0;
- }
- ssr.left_seq=seq.substring(left,i-t+1);
- ssr.left_pos=i-t+1;
- int right=i+num*t+RIGHT_SEQ_LENGTH+1;
- if(right>=seq.length()){
- right=seq.length()-1;
- }
- int right_start=i+num*t+1;
- if(right_start>=seq.length()){
- right_start=seq.length()-1;
- }
- ssr.right_seq=seq.substring(right_start,right);
- ssr.right_pos=i+num*t;
- ssr.repeat=num+1;
- v.add(ssr);
- count++;
- //skip the repeat region ,continue scan at new pot
- }
- i+=num*t-1;
- ptr=i;
- break;
- }
- else {
- continue;
- }
- }
- }
- return count;
- }
- public static void main(String[] args)throws Exception{
- FileReader reader=new FileReader(args[0]);
- long time=System.currentTimeMillis();
- SSR[] ssrs=find(reader);
- System.out.println("find "+ssrs.length+"t"+( System.currentTimeMillis()-time)+" ms");
- for(int i=0;i<ssrs.length;i++){
- System.out.println(">"+(i+1));
- System.out.println(ssrs[i]);
- }
- }
- }