http_fetcher.cpp
资源名称:p2p_vod.rar [点击查看]
上传用户:liguizhu
上传日期:2015-11-01
资源大小:2422k
文件大小:17k
源码类别:
P2P编程
开发平台:
Visual C++
- /* http_fetcher.c - HTTP handling functions
- HTTP Fetcher
- Copyright (C) 2001, 2003, 2004 Lyle Hanson (lhanson@users.sourceforge.net)
- This library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Library General Public
- License as published by the Free Software Foundation; either
- version 2 of the License, or (at your option) any later version.
- This library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Library General Public License for more details.
- See LICENSE file for details
- */
- #include "stdafx.h"
- #include "http_fetcher.h"
- HttpFetcher::HttpFetcher() : DEFAULT_VERSION("1.1"), HTTP_VERSION("HTTP/1.0"), DEFAULT_USER_AGENT("Chaos Client")
- {
- errorSource = 0;
- http_errno = 0;
- errorInt = 0; /* When the error message has a %d in it,
- * this variable is inserted */
- /* Note that '%d' cannot be escaped at this time */
- http_errlist[HF_SUCCESS] ="成功";
- http_errlist[HF_METAERROR] ="内部错误.";
- http_errlist[HF_NULLURL] ="无内容的URL";
- http_errlist[HF_HEADTIMEOUT] ="超时; %d秒钟内没有收到HTTP头";
- http_errlist[HF_DATATIMEOUT] ="超时; %d秒钟内没有收到数据";
- http_errlist[HF_FRETURNCODE] ="无返回代码";
- http_errlist[HF_CRETURNCODE] ="无效的返回代码";
- http_errlist[HF_STATUSCODE] ="状态代码是: %d. rn401:需要认证, 403:无法访问, 404:没有找到, 500:服务器出错";
- http_errlist[HF_CONTENTLEN] ="无效的内容长度(Content-Length)";
- http_errlist[HF_HERROR] ="网络错误 (description unavailable)";
- http_errlist[HF_CANTREDIRECT] ="状态代码是: %d,但是没有"Location"项";
- http_errlist[HF_MAXREDIRECTS] ="已经达到最大重定向次数(%d)";
- }
- /*
- * Actually downloads the url to localFile.
- * Returns size of download on success, -1 on error is set,
- */
- int HttpFetcher::http_fetch(const char *url_tmp, HANDLE localFile, UINT& fileSize, UINT& downloadedSize, double limitSpeed)
- {
- fd_set rfds;
- struct timeval tv;
- char headerBuf[HEADER_BUF_SIZE];
- char *tmp, *url, *requestBuf = NULL, *host, *charIndex;
- int sock, bufsize = REQUEST_BUF_SIZE;
- int i,
- ret = -1,
- tempSize,
- selectRet,
- found = 0, /* For redirects */
- redirectsFollowed = 0;
- downloadedSize = 0;
- fileSize = -1;
- if(url_tmp == NULL)
- {
- errorSource = FETCHER_ERROR;
- http_errno = HF_NULLURL;
- return -1;
- }
- /* Copy the url passed in into a buffer we can work with, change, etc. */
- url = (char*)malloc(strlen(url_tmp)+1);
- if(url == NULL)
- {
- errorSource = ERRNO;
- return -1;
- }
- strncpy(url, url_tmp, strlen(url_tmp) + 1);
- /* This loop allows us to follow redirects if need be. An afterthought,
- * added to provide this basic functionality. Will hopefully be designed
- * better in 2.x.x ;) */
- do {
- /* Seek to the file path portion of the url */
- charIndex = strstr(url, "://");
- if(charIndex != NULL)
- {
- /* url contains a protocol field */
- charIndex += strlen("://");
- host = charIndex;
- charIndex = strchr(charIndex, '/');
- }
- else
- {
- host = (char *)url;
- charIndex = strchr(url, '/');
- }
- /* Compose a request string */
- requestBuf = (char*)malloc(bufsize);
- if(requestBuf == NULL)
- {
- free(url);
- errorSource = ERRNO;
- return -1;
- }
- requestBuf[0] = 0;
- if(charIndex == NULL)
- {
- /* The url has no '/' in it, assume the user is making a root-level
- * request */
- tempSize = strlen("GET /") + strlen(HTTP_VERSION) + 2;
- if(_checkBufSize(&requestBuf, &bufsize, tempSize) ||
- _snprintf(requestBuf, bufsize, "GET / %srn", HTTP_VERSION) < 0)
- {
- free(url);
- free(requestBuf);
- errorSource = ERRNO;
- return -1;
- }
- }
- else
- {
- tempSize = strlen("GET ") + strlen(charIndex) +
- strlen(HTTP_VERSION) + 4;
- /* + 4 is for ' ', 'r', 'n', and NULL */
- if(_checkBufSize(&requestBuf, &bufsize, tempSize) ||
- _snprintf(requestBuf, bufsize, "GET %s %srn",
- charIndex, HTTP_VERSION) < 0)
- {
- free(url);
- free(requestBuf);
- errorSource = ERRNO;
- return -1;
- }
- }
- /* Null out the end of the hostname if need be */
- if(charIndex != NULL)
- *charIndex = 0;
- /* Use Host: even though 1.0 doesn't specify it. Some servers
- * won't play nice if we don't send Host, and it shouldn't
- * hurt anything */
- ret = bufsize - strlen(requestBuf); /* Space left in buffer */
- tempSize = (int)strlen("Host: ") + (int)strlen(host) + 3;
- /* +3 for "rn " */
- if(_checkBufSize(&requestBuf, &bufsize, tempSize + 128))
- {
- free(url);
- free(requestBuf);
- errorSource = ERRNO;
- return -1;
- }
- strcat(requestBuf, "Host: ");
- strcat(requestBuf, host);
- strcat(requestBuf, "rn");
- tempSize = (int)strlen("User-Agent: ") +
- (int)strlen(DEFAULT_USER_AGENT) + (int)strlen(DEFAULT_VERSION) + 4;
- /* + 4 is for '', 'r', 'n', and NULL */
- if(_checkBufSize(&requestBuf, &bufsize, tempSize))
- {
- free(url);
- free(requestBuf);
- errorSource = ERRNO;
- return -1;
- }
- strcat(requestBuf, "User-Agent: ");
- strcat(requestBuf, DEFAULT_USER_AGENT);
- strcat(requestBuf, "/");
- strcat(requestBuf, DEFAULT_VERSION);
- strcat(requestBuf, "rn");
- tempSize = (int)strlen("Connection: Closernrn");
- if(_checkBufSize(&requestBuf, &bufsize, tempSize))
- {
- free(url);
- free(requestBuf);
- errorSource = ERRNO;
- return -1;
- }
- strcat(requestBuf, "Connection: Closernrn");
- /* Now free any excess memory allocated to the buffer */
- tmp = (char*)realloc(requestBuf, strlen(requestBuf) + 1);
- if(tmp == NULL)
- {
- free(url);
- free(requestBuf);
- errorSource = ERRNO;
- return -1;
- }
- requestBuf = tmp;
- sock = makeSocket(host); /* errorSource set within makeSocket */
- if(sock == -1) { free(url); free(requestBuf); return -1;}
- free(url);
- url = NULL;
- if(send(sock, requestBuf, strlen(requestBuf), 0) == -1) // added by jarjar
- //if(write(sock, requestBuf, strlen(requestBuf)) == -1) // removed by jarjar
- {
- closesocket(sock); // added by jarjar
- //close(sock); // removed by jarjar
- free(requestBuf);
- errorSource = ERRNO;
- return -1;
- }
- free(requestBuf);
- requestBuf = NULL;
- /* Grab enough of the response to get the metadata */
- ret = _http_read_header(sock, headerBuf); /* errorSource set within */
- if(ret < 0)
- {
- closesocket(sock); // added by jarjar
- //close(sock); // removed by jarjar
- return -1;
- }
- /* Get the return code */
- charIndex = strstr(headerBuf, "HTTP/");
- if(charIndex == NULL)
- {
- closesocket(sock); // added by jarjar
- //close(sock); // removed by jarjar
- errorSource = FETCHER_ERROR;
- http_errno = HF_FRETURNCODE;
- return -1;
- }
- while(*charIndex != ' ')
- charIndex++;
- charIndex++;
- ret = sscanf(charIndex, "%d", &i);
- if(ret != 1)
- {
- closesocket(sock); // added by jarjar
- //close(sock); // removed by jarjar
- errorSource = FETCHER_ERROR;
- http_errno = HF_CRETURNCODE;
- return -1;
- }
- if(i<200 || i>307)
- {
- closesocket(sock); // added by jarjar
- //close(sock); // removed by jarjar
- errorInt = i; /* Status code, to be inserted in error string */
- errorSource = FETCHER_ERROR;
- http_errno = HF_STATUSCODE;
- return -1;
- }
- /* If a redirect, repeat operation until final URL is found or we
- * redirect DEFAULT_REDIRECTS times. Note the case sensitive "Location",
- * should probably be made more robust in the future (without relying
- * on the non-standard strcasecmp()).
- * This bit mostly by Dean Wilder, tweaked by me */
- if(i >= 300)
- {
- redirectsFollowed++;
- /* Pick up redirect URL, allocate new url, and repeat process */
- charIndex = strstr(headerBuf, "Location:");
- if(!charIndex)
- {
- closesocket(sock); // added by jarjar
- //close(sock); // removed by jarjar
- errorInt = i; /* Status code, to be inserted in error string */
- errorSource = FETCHER_ERROR;
- http_errno = HF_CANTREDIRECT;
- return -1;
- }
- charIndex += strlen("Location:");
- /* Skip any whitespace... */
- while(*charIndex != ' ' && isspace(*charIndex))
- charIndex++;
- if(*charIndex == ' ')
- {
- closesocket(sock); // added by jarjar
- //close(sock); // removed by jarjar
- errorInt = i; /* Status code, to be inserted in error string */
- errorSource = FETCHER_ERROR;
- http_errno = HF_CANTREDIRECT;
- return -1;
- }
- i = strcspn(charIndex, " rn");
- if(i > 0)
- {
- url = (char *)malloc(i + 1);
- strncpy(url, charIndex, i);
- url[i] = ' ';
- }
- else
- /* Found 'Location:' but contains no URL! We'll handle it as
- * 'found', hopefully the resulting document will give the user
- * a hint as to what happened. */
- found = 1;
- }
- else
- found = 1;
- } while(!found && redirectsFollowed <= DEFAULT_REDIRECTS);
- if(url) /* Redirection code may malloc this, then exceed DEFAULT_REDIRECTS */
- {
- free(url);
- url = NULL;
- }
- if(redirectsFollowed >= DEFAULT_REDIRECTS && !found)
- {
- closesocket(sock); // added by jarjar
- //close(sock); // removed by jarjar
- errorInt = DEFAULT_REDIRECTS; /* To be inserted in error string */
- errorSource = FETCHER_ERROR;
- http_errno = HF_MAXREDIRECTS;
- return -1;
- }
- /*
- * Parse out about how big the data segment is.
- * Note that under current HTTP standards (1.1 and prior), the
- * Content-Length field is not guaranteed to be accurate or even present.
- * I just use it here so I can allocate a ballpark amount of memory.
- *
- * Note that some servers use different capitalization
- */
- charIndex = strstr(headerBuf, "Content-Length:");
- if(charIndex == NULL)
- charIndex = strstr(headerBuf, "Content-length:");
- if(charIndex != NULL)
- {
- ret = sscanf(charIndex + strlen("content-length: "), "%d",
- &fileSize);
- if(ret < 1)
- {
- closesocket(sock); // added by jarjar
- //close(sock); // removed by jarjar
- errorSource = FETCHER_ERROR;
- http_errno = HF_CONTENTLEN;
- return -1;
- }
- }
- char tmpReadBuf[BLOCK_SIZE];
- DWORD tmpWriteBytes = 0;
- tv.tv_sec = DEFAULT_READ_TIMEOUT;
- tv.tv_usec = 0;
- DWORD dwStartTicks = GetTickCount();
- /* Begin reading the body of the file */
- while(ret > 0 && (fileSize == -1 || downloadedSize < fileSize) && localFile != INVALID_HANDLE_VALUE)
- {
- FD_ZERO(&rfds);
- FD_SET((SOCKET)sock, &rfds);
- selectRet = select(sock+1, &rfds, NULL, NULL, &tv);
- if(selectRet == 0)
- {
- // timeout means nothing
- continue;
- /*
- errorSource = FETCHER_ERROR;
- http_errno = HF_DATATIMEOUT;
- errorInt = DEFAULT_READ_TIMEOUT;
- closesocket(sock); // added by jarjar
- //close(sock); // removed by jarjar
- return -1;
- */
- }
- else if(selectRet == -1)
- {
- closesocket(sock); // added by jarjar
- //close(sock); // removed by jarjar
- errorSource = ERRNO;
- return -1;
- }
- ret = recv(sock, tmpReadBuf, BLOCK_SIZE, 0); // added by jarjar
- if(ret == -1)
- {
- closesocket(sock); // added by jarjar
- //close(sock); // removed by jarjar
- errorSource = ERRNO;
- return -1;
- }
- if(INVALID_SET_FILE_POINTER == SetFilePointer(localFile, downloadedSize, 0, FILE_BEGIN))
- {
- int xxx = GetLastError();
- closesocket(sock); // added by jarjar
- //close(sock); // removed by jarjar
- errorSource = ERRNO;
- return -1;
- }
- downloadedSize += ret;
- if(!WriteFile(localFile, tmpReadBuf, ret, &tmpWriteBytes, NULL) || tmpWriteBytes != ret)
- {
- int xxx = GetLastError();
- closesocket(sock); // added by jarjar
- //close(sock); // removed by jarjar
- errorSource = ERRNO;
- return -1;
- }
- // For bandwidth throttling
- if (limitSpeed > 0.0f)
- {
- double t = (double)(GetTickCount() - dwStartTicks);
- double q = (double)((double)downloadedSize / t);
- if (q > limitSpeed)
- Sleep((DWORD)((((q*t)/limitSpeed)-t)));
- }
- }
- ::FlushFileBuffers(localFile);
- closesocket(sock); // added by jarjar
- //close(sock); // removed by jarjar
- return downloadedSize;
- }
- /*
- * Returns a pointer to the current error description message. The
- * message pointed to is only good until the next call to http_strerror(),
- * so if you need to hold on to the message for a while you should make
- * a copy of it
- */
- const char* HttpFetcher::http_strerror()
- {
- int errno;
- if(errorSource == ERRNO)
- return strerror(errno);
- else if(errorSource == FETCHER_ERROR)
- {
- if(strstr(http_errlist[http_errno], "%d") == NULL)
- return http_errlist[http_errno];
- else
- {
- /* The error string has a %d in it, we need to insert errorInt.
- * convertedError[128] has been declared for that purpose */
- char *stringIndex, *originalError;
- originalError = (char *)http_errlist[http_errno];
- convertedError[0] = 0; /* Start off with NULL */
- stringIndex = strstr(originalError, "%d");
- strncat(convertedError, originalError, /* Copy up to %d */
- abs(stringIndex - originalError));
- sprintf(&convertedError[strlen(convertedError)],"%d",errorInt);
- stringIndex += 2; /* Skip past the %d */
- strcat(convertedError, stringIndex);
- return convertedError;
- }
- }
- return http_errlist[HF_METAERROR]; /* Should NEVER happen */
- }
- /*
- * Reads the metadata of an HTTP response.
- * Perhaps a little inefficient, as it reads 1 byte at a time, but
- * I don't think it's that much of a loss (most headers aren't HUGE).
- * Returns:
- * # of bytes read on success, or
- * -1 on error
- */
- int HttpFetcher::_http_read_header(int sock, char *headerPtr)
- {
- fd_set rfds;
- struct timeval tv;
- int bytesRead = 0, newlines = 0, ret, selectRet;
- while(newlines != 2 && bytesRead != HEADER_BUF_SIZE)
- {
- FD_ZERO(&rfds);
- FD_SET((SOCKET)sock, &rfds);
- tv.tv_sec = DEFAULT_READ_TIMEOUT;
- tv.tv_usec = 0;
- selectRet = select(sock+1, &rfds, NULL, NULL, &tv);
- if(selectRet == 0)
- {
- errorSource = FETCHER_ERROR;
- http_errno = HF_HEADTIMEOUT;
- errorInt = DEFAULT_READ_TIMEOUT;
- return -1;
- }
- else if(selectRet == -1) { errorSource = ERRNO; return -1; }
- ret = recv(sock, headerPtr, 1, 0);
- //ret = read(sock, headerPtr, 1); //removed by jarjar
- if(ret == -1) { errorSource = ERRNO; return -1; }
- bytesRead++;
- if(*headerPtr == 'r') /* Ignore CR */
- {
- /* Basically do nothing special, just don't set newlines
- * to 0 */
- headerPtr++;
- continue;
- }
- else if(*headerPtr == 'n') /* LF is the separator */
- newlines++;
- else
- newlines = 0;
- headerPtr++;
- }
- headerPtr -= 3; /* Snip the trailing LF's */
- *headerPtr = ' ';
- return bytesRead;
- }
- /*
- * Opens a TCP socket and returns the descriptor
- * Returns:
- * socket descriptor, or
- * -1 on error
- */
- int HttpFetcher::makeSocket(const char *host)
- {
- int sock; /* Socket descriptor */
- struct sockaddr_in sa; /* Socket address */
- struct hostent *hp; /* Host entity */
- int ret;
- u_short port; // added by jarjar
- //int port; // removed by jarjar
- char *p;
- /* Check for port number specified in URL */
- p = strchr(host, ':');
- if(p)
- {
- port = atoi(p + 1);
- *p = ' ';
- }
- else
- port = PORT_NUMBER;
- hp = gethostbyname(host);
- if(hp == NULL) { errorSource = ERRNO; return -1; }
- /* Copy host address from hostent to (server) socket address */
- memcpy((char *)&sa.sin_addr, (char *)hp->h_addr, hp->h_length);
- sa.sin_family = hp->h_addrtype; /* Set service sin_family to PF_INET */
- sa.sin_port = htons(port); /* Put portnum into sockaddr */
- sock = socket(hp->h_addrtype, SOCK_STREAM, 0);
- if(sock == -1) { errorSource = ERRNO; return -1; }
- ret = connect(sock, (struct sockaddr *)&sa, sizeof(sa));
- if(ret == -1) { errorSource = ERRNO; return -1; }
- return sock;
- }
- /*
- * Determines if the given NULL-terminated buffer is large enough to
- * concatenate the given number of characters. If not, it attempts to
- * grow the buffer to fit.
- * Returns:
- * 0 on success, or
- * -1 on error (original buffer is unchanged).
- */
- int HttpFetcher::_checkBufSize(char **buf, int *bufsize, int more)
- {
- char *tmp;
- int roomLeft = *bufsize - (strlen(*buf) + 1);
- if(roomLeft > more)
- return 0;
- tmp = (char*)realloc(*buf, *bufsize + more + 1);
- if(tmp == NULL)
- return -1;
- *buf = tmp;
- *bufsize += more + 1;
- return 0;
- }