中间件编程

开发平台：
Visual C++

LinkExtractor.java：源码内容
							
//package org.apache.lucene.index;
import java.io.*;
import org.htmlparser.Node;
import org.htmlparser.Parser;
import org.htmlparser.tags.LinkTag;
import org.htmlparser.util.ParserException;
import java.util.*;
import java.net.*;
/**
 * LinkExtractor extracts all the links from the given webpage
 * and prints them on standard output.
 */
public class LinkExtractor {
	
	private String location;
	private Parser parser;
	private static int b=0;
	private static int tID;
	private static int iNode;
	
	public final int DEEP=3; //遍历的深度
	
	public static Vector svecLink, svecOutlink;
	public static String hostName;
    public static boolean bl;
	
	public LinkExtractor(String location) {
		this.location = location;
	    hostName=GetHostName(location);
	    System.out.println("主机名称是 "+hostName);
	     
	    bl=false;
		svecLink=new Vector();
		svecOutlink=new Vector();
        svecLink.add(location);
		
	}
	public void extractLinks(String loc) throws ParserException {
	    System.out.println("Parsing "+loc+" for links...");
	    
	    Vector vecTemp=new Vector();
	    
		try {
			this.parser   = new Parser(loc); // Create the parser object
			parser.registerScanners(); // Register standard scanners (Very Important)
	        bl=true;
		}
		catch (ParserException e) {
			bl=false;	
	    	e.printStackTrace();
		}
		
		
		
		String ss,str1;
        URL wwwurl;
        boolean byes;
        
 	    int a=0;
        b++;
		
		Node [] links = parser.extractAllNodesThatAre(LinkTag.class);
		for (int i = 0;i < links.length;i++) {
	      if(bl)
	      {
	      	byes=true;
	        System.out.println("Total url is "+links.length+"This page has url "+i);
	      
	      	LinkTag linkTag = (LinkTag)links[i];
            str1=linkTag.getLink();	
            	
           // System.out.println("the url is "+str1);&&!svecOutlink.contains(str1)
            if(str1.equals("")) continue;
            
            if(str1.charAt(str1.length()-1)=='/'
            ||str1.charAt(str1.length()-1)=='\') 
            str1=str1.substring(0,str1.length()-1); 
			if(!svecLink.contains(str1)) 
			{   
			    try
			    {
				   wwwurl=new URL(str1);
				   wwwurl.getContent();
			    }
			    catch(MalformedURLException e)
			    {
			        byes=false;
			    }
			    catch(IOException e)
			    {
			        byes=false;
			    }
				if(GetHostName(str1).equals(hostName) && byes)
				{
			    	a++;
			    	tID++;
			    	svecLink.add(str1);
			    	vecTemp.add(str1);
			    	System.out.println("the url is "+str1);
		    	}
		    	else
		    	{
		    	   
		    	    svecOutlink.add(str1);
		    	}
			} 	
           } 
           
		}
    	String strNew;
    	if(a>0&&b<=DEEP)
    	{    
    	     
    	     for(int i=0;i<vecTemp.size();i++)
	         {
	           strNew=(String)vecTemp.get(i);
	           System.out.println("this is "+strNew);
	           extractLinks(strNew);
	         }
	    }
	 
	}
	boolean linkAttribute(String strLink)
	{
	 return true;
	}
    static void printCol(Enumeration col)
    {
    	
    	String str;
        while(col.hasMoreElements())
        {
        str=(String)col.nextElement();
        System.out.println(str);
        }
    }
    
    public String GetHostName(String hostname)
    {
    	URL aurl;
    	String ss=" ";
    	try
		{
	    aurl=new URL(hostname);
	    ss=aurl.getHost();
    	}
	    catch(MalformedURLException e)
	    {
	      e.printStackTrace();
	     //return "null";
	    }
	    return ss;
    }
    
	public static void main(String[] args) {
		/*
		if (args.length<0) {
			System.err.println("Syntax Error : Please provide the location(URL or file) to parse");
			System.exit(-1);
		}*/
	    Vector allLink=new Vector();
		String strNew,strall1,strall2,str;
		String ss="http://www.dlut.edu.cn/";
		LinkExtractor linkExtractor = new LinkExtractor(ss);
		try {
	        linkExtractor.extractLinks(ss);
			
		    Enumeration col=svecLink.elements();
            while(col.hasMoreElements())
            {
             str=(String)col.nextElement();
             System.out.println(str);
            }
            
		}
		catch (ParserException e) {
			e.printStackTrace();
		}
	}
}