WebParamCrawler.java
上传用户:cctqzzy
上传日期:2022-03-14
资源大小:12198k
文件大小:3k
- package chapter2;
- import java.io.*;
- import java.net.*;
- import java.io.File;
- import java.io.FileReader;
- import java.io.FileWriter;
- public class WebParamCrawler {
- private static String Text_File_Path = "D:\workshop\ch2\htmlsrc.html";
- private static String urllist_File_Path = "D:\workshop\ch2\urllist.txt";
- public static void main(String[] args) throws IOException {
- try {
- String s;
- String dnsname;
- //char[] cBuffer = new char[8096*2];
- File file = new File(Text_File_Path);
- FileWriter fpWriter = new FileWriter(file);
- BufferedReader brList = new BufferedReader(new FileReader(urllist_File_Path));
- while ((s = brList.readLine()) != null) { //判断是否读到了最后一行
- System.out.println(s);
-
- //dnsname = s.substring( s.indexOf(ch, fromIndex))
- // 生成下载对象
- Socket webclient = new Socket("www.bnu.edu.cn", 80);
- PrintWriter result = new PrintWriter(webclient.getOutputStream(), true);
- BufferedReader receiver = new BufferedReader(new InputStreamReader(webclient.getInputStream()));
- //发送HTTP request请求
- result.println("GET / HTTP/1.1");
- result.println("Host: bnu.edu.cn");
- result.println("Connection: Close");
- result.println();
-
- //接收HTTP Response 返回的结果信息
- boolean bRet = true;
- StringBuffer sBuffer = new StringBuffer(8096);
- while (bRet) {
- if (receiver.ready()) {
- int idx = 0;
- while (idx != -1) {
- idx = receiver.read();
- if(idx == '<')
- break;
- }
- while (idx != -1) {
- sBuffer.append((char) idx);
- idx = receiver.read();
- }
- bRet = false;
- }
- }
-
- // 显示获得的网页正文,打印到控制台
- System.out.println(sBuffer.toString());
- fpWriter.write(sBuffer.toString());
- webclient.close();
- }
- brList.close();
- fpWriter.close();
- } catch (UnknownHostException e) {
- System.err.println("无法访问指定主机.");
- //System.exit(1);
- } catch (IOException e) {
- System.err.println("下载失败,请检查输入地址是否正确。");
- //System.exit(1);
- }
- }
-
- }