Unit1.~pas
资源名称:PickURL.rar [点击查看]
上传用户:zstyzgs
上传日期:2008-06-25
资源大小:214k
文件大小:10k
源码类别:
Delphi控件源码
开发平台:
Delphi
- unit Unit1;
- interface
- uses
- Windows, Messages, SysUtils, Variants, Classes, Graphics, Controls, Forms,
- Dialogs, StdCtrls, WinHTTP, SyncObjs, ExtCtrls, ComCtrls, Spin;
- type
- TMyThread = class(TThread)
- private
- FHTTP: TWinHTTP;
- FEvent: TEvent;
- FHTML: string;
- FEntryURL: string;
- FURL: string;
- FURLs: TStrings;
- FData: TStrings;
- function GetURLs(const AURL: string): string;
- procedure HTTPDone(Sender: TObject; const ContentType: string;
- FileSize: Integer; Stream: TStream);
- procedure AddURL;
- procedure AddData;
- procedure AddURLs;
- procedure GetURL;
- function IsURL(const AURL: string): Boolean;
- function ParseString(const ARE, AInputString, ATemplate: string): string;
- overload;
- function ParseString(const ARE, AInputString: string): string; overload;
- function ParseEmails(const ARE, AInputString: string): string;
- protected
- procedure Execute; override;
- procedure DoTerminate; override;
- procedure Terminate;
- public
- constructor Create(const AEntryURL: string = '');
- destructor Destroy; override;
- end;
- TForm1 = class(TForm)
- WinHTTP1: TWinHTTP;
- Memo1: TMemo;
- StatusBar1: TStatusBar;
- Panel1: TPanel;
- Edit1: TEdit;
- Button1: TButton;
- Button2: TButton;
- Label1: TLabel;
- SpinEdit1: TSpinEdit;
- Memo2: TMemo;
- procedure FormCreate(Sender: TObject);
- procedure FormDestroy(Sender: TObject);
- procedure Button1Click(Sender: TObject);
- procedure Button2Click(Sender: TObject);
- procedure Memo1Change(Sender: TObject);
- procedure Edit1Change(Sender: TObject);
- private
- FList: TThreadList;
- FLock: TCriticalSection;
- FIndex: Integer;
- procedure RemoveThread(Sender: TMyThread);
- public
- procedure Lock;
- procedure Unlock;
- function GetURL(var AURL: string): Boolean;
- end;
- var
- Form1: TForm1;
- const
- URLRE =
- '<[aAscriptSCRIPTiframeIFRAMElinkLINK]{1,6}.*?[hrefHREFsrcSRC]{3,4}s*=s*["''s]*(.*?)["''s>]+.*?>';
- URLPathRE = '((http://)*[w+][.w]+[w/.:-]*/)';
- URLHostRE = '((http://)*[w+][.w-]+[w.:]*)';
- EmailRE = '([_a-zA-Zd-.]+[@#][_a-zA-Zd-]+(.[_a-zA-Zd-]+)+)';
- implementation
- uses RegExpr;
- {$R *.dfm}
- { TForm1 }
- procedure TForm1.FormCreate(Sender: TObject);
- begin
- FList := TThreadList.Create;
- FLock := TCriticalSection.Create;
- end;
- procedure TForm1.FormDestroy(Sender: TObject);
- begin
- if Assigned(FLock) then
- FLock.Free;
- if Assigned(FList) then
- FList.Free;
- end;
- procedure TForm1.RemoveThread(Sender: TMyThread);
- var
- List: TList;
- begin
- List := FList.LockList;
- try
- List.Remove(Sender);
- finally
- FList.UnlockList;
- end;
- end;
- procedure TForm1.Lock;
- begin
- FLock.Enter;
- end;
- procedure TForm1.Unlock;
- begin
- FLock.Leave;
- end;
- function TForm1.GetURL(var AURL: string): Boolean;
- begin
- Lock;
- try
- Result := FIndex < Memo1.Lines.Count;
- if Result then
- begin
- AURL := Memo1.Lines.Strings[FIndex];
- Inc(FIndex);
- end;
- finally
- Unlock;
- end;
- end;
- { TMyThread }
- procedure TMyThread.AddURL;
- begin
- if (Form1.Memo1.Lines.IndexOf(FURL) = -1) and IsURL(FURL) then
- begin
- Form1.Memo1.Lines.Add(FURL);
- Application.ProcessMessages;
- end;
- end;
- constructor TMyThread.Create(const AEntryURL: string);
- begin
- FHTTP := TWinHTTP.Create(nil);
- FHTTP.OnDone := HTTPDone;
- FEvent := TEvent.Create(nil, True, False, '');
- FEntryURL := AEntryURL;
- FData := TStringList.Create;
- FURLs := TStringList.Create;
- FreeOnTerminate := True;
- inherited Create(True);
- end;
- destructor TMyThread.Destroy;
- begin
- if Assigned(FURLs) then
- FURLs.Free;
- if Assigned(FData) then
- FData.Free;
- if Assigned(FEvent) then
- FEvent.Free;
- if Assigned(FHTTP) then
- FHTTP.Free;
- inherited;
- end;
- procedure TMyThread.DoTerminate;
- begin
- Form1.RemoveThread(Self);
- inherited;
- end;
- procedure TMyThread.Execute;
- var
- url: string;
- begin
- while not Terminated do
- begin
- if not Form1.GetURL(url) then
- ;
- //Break;
- FEvent.ResetEvent;
- GetURLs(url);
- end;
- end;
- function TMyThread.GetURLs(const AURL: string): string;
- {var
- re: TRegExpr;
- HTML: string;}
- begin
- FHTTP.URL := AURL;
- FHTTP.Referer := AURL;
- FHTTP.Read();
- FEvent.WaitFor(10000);
- //FEvent.WaitFor(INFINITE);
- //HTML := FHTML;
- //FData.CommaText := ParseEmails(EmailRE, FHTML);
- //Synchronize(AddData);
- FURLs.CommaText := ParseString(URLRE, FHTML);
- Synchronize(AddURLs);
- {try
- re := TRegExpr.Create;
- try
- re.Expression := URLRE;
- if re.Exec(FHTML) then
- repeat
- FURL := re.Match[1];
- if not IsURL(FURL) then Continue;
- if FURL[1] = '/' then
- FURL := ParseString(URLHostRE, AURL, '$1') + FURL
- else if LowerCase(Copy(FURL, 1, 4)) <> 'http' then
- FURL := ParseString(URLPathRE, AURL, '$1') + FURL;
- Synchronize(AddURL);
- //Sleep(5);
- //GetData(FURL);
- if Terminated then
- Exit;
- until not re.ExecNext;
- finally
- re.Free;
- end;
- except
- end;}
- Result := FHTML;
- end;
- procedure TMyThread.GetURL;
- var
- RE: TRegExpr;
- begin
- re := TRegExpr.Create;
- try
- re.Expression := URLRE;
- if re.Exec(FHTML) then
- repeat
- FURL := re.Match[1];
- Synchronize(AddURL);
- if Terminated then
- Exit;
- until not re.ExecNext;
- finally
- re.Free;
- end;
- end;
- procedure TMyThread.HTTPDone(Sender: TObject; const ContentType: string;
- FileSize: Integer; Stream: TStream);
- var
- Str: string;
- begin
- with Stream as TMemoryStream do
- begin
- SetLength(Str, Size);
- Move(Memory^, Str[1], Size);
- FHTML := Str;
- FEvent.SetEvent;
- end
- end;
- function TMyThread.ParseString(const ARE, AInputString,
- ATemplate: string): string;
- var
- r: TRegExpr;
- begin
- r := TRegExpr.Create;
- try
- r.Expression := ARE;
- if r.Exec(AInputString) then
- Result := r.Substitute(ATemplate)
- else
- Result := '';
- finally
- r.Free;
- end;
- end;
- function TMyThread.IsURL(const AURL: string): Boolean;
- var
- URL: string;
- begin
- URL := LowerCase(AURL);
- Result := (URL <> '') and
- (URL <> '#') and
- (URL <> '+') and
- (URL <> '') and
- (URL <> 'url;') and
- (URL <> ';') and
- (URL <> 'about:blank') and
- (Pos('.js', URL) = 0) and
- (Pos('.gif', URL) = 0) and
- (Pos('.jpg', URL) = 0) and
- (Pos('.bmp', URL) = 0) and
- (Pos('.swf', URL) = 0) and
- (Pos('.jpeg', URL) = 0) and
- (Pos('.ico', URL) = 0) and
- (Pos('.css', URL) = 0) and
- (Pos('.mp3', URL) = 0) and
- (Pos('.rm', URL) = 0) and
- (Pos('.avi', URL) = 0) and
- (Pos('.mpeg', URL) = 0) and
- (Pos('.pdf', URL) = 0) and
- (Pos('.emf', URL) = 0) and
- (Pos('.pix', AURL) = 0) and
- (Pos('.png', AURL) = 0) and
- (Pos('.wmf', URL) = 0) and
- (Pos('.exe', URL) = 0) and
- (Pos('.rar', URL) = 0) and
- (Pos('.zip', URL) = 0) and
- (Pos('javascript', URL) = 0) and
- (Pos('mailto', URL) = 0);
- end;
- function TMyThread.ParseString(const ARE, AInputString: string): string;
- var
- r: TRegExpr;
- s: string;
- begin
- r := TRegExpr.Create;
- try
- r.Expression := ARE;
- if r.Exec(AInputString) then
- repeat
- s := r.Match[1];
- if not IsURL(s) then
- Continue;
- if s[1] = '/' then
- s := ParseString(URLHostRE, FHTTP.URL, '$1') + s
- else if LowerCase(Copy(s, 1, 4)) <> 'http' then
- s := ParseString(URLPathRE, FHTTP.URL, '$1') + s;
- s := StringReplace(s, ',', '^', [rfReplaceAll]);
- //s := StringReplace(s, ' ', '%20', [rfReplaceAll]);
- Result := Result + s + ',';
- until not r.ExecNext;
- finally
- r.Free;
- end;
- end;
- procedure TMyThread.Terminate;
- begin
- inherited Terminate;
- FEvent.SetEvent;
- FHTTP.Abort(False, False);
- end;
- procedure TForm1.Button1Click(Sender: TObject);
- var
- MyThread: TMyThread;
- i: Integer;
- begin
- FIndex := 0;
- Memo1.Clear;
- Memo1.Lines.Add(Edit1.Text);
- for i := 0 to SpinEdit1.Value - 1 do
- begin
- MyThread := TMyThread.Create(Edit1.Text);
- FList.Add(MyThread);
- MyThread.Resume;
- end;
- end;
- procedure TForm1.Button2Click(Sender: TObject);
- var
- List: TList;
- i: Integer;
- begin
- List := FList.LockList;
- try
- for i := 0 to List.Count - 1 do
- TMyThread(List[i]).Terminate;
- finally
- FList.UnlockList;
- end;
- end;
- procedure TForm1.Memo1Change(Sender: TObject);
- begin
- StatusBar1.Panels[0].Text := Format('%d URLs, Current Index: %d',
- [Memo1.Lines.Count, FIndex]);
- end;
- procedure TForm1.Edit1Change(Sender: TObject);
- begin
- Button1.Enabled := (Sender as TEdit).Text <> '';
- end;
- procedure TMyThread.AddData;
- begin
- Form1.Memo2.Lines.BeginUpdate;
- try
- FData.Text := StringReplace(FData.Text, '#', '@', [rfReplaceAll]);
- //FData.Text := StringReplace(FData.Text,'%20', ' ', [rfReplaceAll]);
- Form1.Memo2.Lines.AddStrings(FData);
- Form1.Memo2.Lines.Delete(Form1.Memo2.Lines.Count - 1);
- Application.ProcessMessages;
- finally
- Form1.Memo2.Lines.EndUpdate;
- end;
- end;
- procedure TMyThread.AddURLs;
- begin
- Form1.Memo1.Lines.BeginUpdate;
- try
- FURLs.Text := StringReplace(FURLs.Text, '^', ',', [rfReplaceAll]);
- FURLs.Text := StringReplace(FURLs.Text, '%20', ' ', [rfReplaceAll]);
- Form1.Memo1.Lines.AddStrings(FURLs);
- Form1.Memo1.Lines.Delete(Form1.Memo1.Lines.Count - 1);
- Application.ProcessMessages;
- finally
- Form1.Memo1.Lines.EndUpdate;
- end;
- end;
- function TMyThread.ParseEmails(const ARE, AInputString: string): string;
- var
- r: TRegExpr;
- s: string;
- begin
- r := TRegExpr.Create;
- try
- r.Expression := ARE;
- if r.Exec(AInputString) then
- repeat
- s := r.Match[1];
- s := StringReplace(s, ' ', '%20', [rfReplaceAll]);
- Result := Result + s + ',';
- until not r.ExecNext;
- finally
- r.Free;
- end;
- end;
- end.