WebParser.java
上传用户:cctqzzy
上传日期:2022-03-14
资源大小:12198k
文件大小:2k
- package chapter2;
- import java.io.*;
- import java.net.*;
- import java.io.File;
- import java.io.FileReader;
- import java.io.FileWriter;
- public class WebParser {
- private static String src_File_Path = "D:\workshop\ch2\htmlsrc.html";
- private static String dst_File_Path = "D:\workshop\ch2\puresrc.txt";
- public static void main(String[] args) throws IOException {
-
- try {
-
- Parser();
- } catch (IOException e) {
- System.err.println("下载失败,请检查输入地址是否正确。");
- System.exit(1);
- }
- }
-
- public static void Parser() throws IOException {
- try {
-
- boolean bContent = true;
- StringBuffer sBuffer = new StringBuffer(8096*2);
- char[] cBuffer = new char[8096*2];
- int nCount = 0;
-
- File srcfile = new File(src_File_Path);
- FileReader fpReader = new FileReader(srcfile);
-
- File dstfile = new File(dst_File_Path);
- FileWriter fpWriter = new FileWriter(dstfile);
- nCount = fpReader.read(cBuffer);
- for(int i = 0; i < nCount;i++)
- {
- if( bContent == false )
- {
- if(cBuffer[i] == '>')
- bContent = true;
- else
- continue;
- } else {
- if(cBuffer[i] == '<')
- {
- bContent = false;
- continue;
- } else if(cBuffer[i] == 'n' || cBuffer[i] == ' ' || cBuffer[i] == ' ' || cBuffer[i] == ' ')
- {
- continue;
- }else if( cBuffer[i] == '&' && cBuffer[i+1] == 'n'
- && cBuffer[i+2] == 'b' && cBuffer[i+3] == 's'
- && cBuffer[i+4] == 'p' && cBuffer[i+5] == ';')
- {
- i =i+5;
- continue;
- }
-
- sBuffer.append(cBuffer[i]);
- }
- }
-
- System.out.println(sBuffer.toString());
- fpWriter.write(sBuffer.toString());
- fpReader.close();
- fpWriter.close();
- } catch (UnknownHostException e) {
- System.err.println("无法访问指定主机.");
- System.exit(1);
- } catch (IOException e) {
- throw e;
- }
- }
- }