| /***************************************************************************** |
| * _ _ ____ _ |
| * Project ___| | | | _ \| | |
| * / __| | | | |_) | | |
| * | (__| |_| | _ <| |___ |
| * \___|\___/|_| \_\_____| |
| * |
| * The contents of this file are subject to the Mozilla Public License |
| * Version 1.0 (the "License"); you may not use this file except in |
| * compliance with the License. You may obtain a copy of the License at |
| * http://www.mozilla.org/MPL/ |
| * |
| * Software distributed under the License is distributed on an "AS IS" |
| * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the |
| * License for the specific language governing rights and limitations |
| * under the License. |
| * |
| * The Original Code is Curl. |
| * |
| * The Initial Developer of the Original Code is Daniel Stenberg. |
| * |
| * Portions created by the Initial Developer are Copyright (C) 1998. |
| * All Rights Reserved. |
| * |
| * ------------------------------------------------------------ |
| * Main author: |
| * - Daniel Stenberg <Daniel.Stenberg@haxx.nu> |
| * |
| * http://curl.haxx.nu |
| * |
| * $Source$ |
| * $Revision$ |
| * $Date$ |
| * $Author$ |
| * $State$ |
| * $Locker$ |
| * |
| * ------------------------------------------------------------ |
| ****************************************************************************/ |
| |
| #include <stdlib.h> |
| #include <stdio.h> |
| #include <string.h> |
| |
| #include "setup.h" |
| |
| #ifdef HAVE_UNISTD_H |
| #include <unistd.h> |
| #endif |
| #ifdef HAVE_SYS_SELECT_H |
| #include <sys/select.h> |
| #endif |
| |
| #include "urldata.h" |
| #include <curl/curl.h> |
| |
| #ifdef __BEOS__ |
| #include <net/socket.h> |
| #endif |
| |
| #ifdef WIN32 |
| #if !defined( __GNUC__) || defined(__MINGW32__) |
| #include <winsock.h> |
| #endif |
| #include <time.h> /* for the time_t typedef! */ |
| |
| #if defined(__GNUC__) && defined(TIME_WITH_SYS_TIME) |
| #include <sys/time.h> |
| #endif |
| |
| #endif |
| |
| #include "progress.h" |
| #include "speedcheck.h" |
| #include "sendf.h" |
| |
| #ifdef USE_ZLIB |
| #include <zlib.h> |
| #endif |
| |
| #define MAX(x,y) ((x)>(y)?(x):(y)) |
| |
| /* --- download a stream from a socket --- */ |
| |
| /* This newly edited version of Download() was brought to us by the friendly |
| Mark Butler <butlerm@xmission.com>. Re-indented with the indent command. */ |
| |
| UrgError |
| Download (struct UrlData *data, |
| int sockfd, /* socket to read from */ |
| int size, /* -1 if unknown at this point */ |
| bool getheader, /* TRUE if header parsing is wanted */ |
| long *bytecountp /* return number of bytes read */ |
| ) |
| { |
| char *buf = data->buffer; |
| size_t nread; |
| int bytecount = 0; |
| long contentlength=0; |
| struct timeval start = tvnow(); |
| struct timeval now = start; |
| bool header = TRUE; |
| int headerline = 0; /* counts header lines to better track the first one */ |
| |
| char *hbufp; /* points at *end* of header line */ |
| int hbuflen = 0; |
| char *str; /* within buf */ |
| char *str_start; /* within buf */ |
| char *end_ptr; /* within buf */ |
| char *p; /* within headerbuff */ |
| bool content_range = FALSE; /* set TRUE if Content-Range: was found */ |
| int offset = 0; /* possible resume offset read from the |
| Content-Range: header */ |
| int code = 0; /* error code from the 'HTTP/1.? XXX' line */ |
| #ifdef USE_ZLIB |
| gzFile gzfile=NULL; |
| #endif |
| |
| /* for the low speed checks: */ |
| UrgError urg; |
| time_t timeofdoc=0; |
| long bodywrites=0; |
| |
| char newurl[URL_MAX_LENGTH]; /* buffer for Location: URL */ |
| |
| hbufp = data->headerbuff; |
| |
| myalarm (0); /* switch off the alarm-style timeout */ |
| |
| now = tvnow(); |
| start = now; |
| |
| if (!getheader) { |
| header = FALSE; |
| ProgressInit (data, size); |
| } |
| { |
| fd_set readfd; |
| fd_set keepfd; |
| struct timeval interval; |
| bool keepon = TRUE; |
| |
| /* timeout every X second |
| - makes a better progressmeter (i.e even when no data is read, the |
| meter can be updated and reflect reality) |
| - allows removal of the alarm() crap |
| - variable timeout is easier |
| */ |
| |
| FD_ZERO (&readfd); /* clear it */ |
| FD_SET (sockfd, &readfd); |
| |
| keepfd = readfd; |
| #ifdef USE_ZLIB |
| gzfile = gzdopen(sockfd, "rb"); |
| #endif |
| while (keepon) { |
| readfd = keepfd; /* set this every lap in the loop */ |
| interval.tv_sec = 2; |
| interval.tv_usec = 0; |
| |
| switch (select (sockfd + 1, &readfd, NULL, NULL, &interval)) { |
| case -1: /* error, stop reading */ |
| keepon = FALSE; |
| continue; |
| case 0: /* timeout */ |
| break; |
| default: /* read! */ |
| #ifdef USE_SSLEAY |
| if (data->use_ssl) { |
| nread = SSL_read (data->ssl, buf, BUFSIZE - 1); |
| } |
| else { |
| #endif |
| #ifdef USE_ZLIB |
| nread = gzread(gzfile, buf, BUFSIZE -1 ); |
| #else |
| nread = sread (sockfd, buf, BUFSIZE - 1); |
| #endif |
| #ifdef USE_SSLEAY |
| } |
| #endif /* USE_SSLEAY */ |
| |
| /* NULL terminate, allowing string ops to be used */ |
| if (0 < (signed int) nread) |
| buf[nread] = 0; |
| |
| /* if we receive 0 or less here, the server closed the connection and |
| we bail out from this! */ |
| else if (0 >= (signed int) nread) { |
| keepon = FALSE; |
| break; |
| } |
| |
| str = buf; /* Default buffer to use when we write the |
| buffer, it may be changed in the flow below |
| before the actual storing is done. */ |
| |
| /* Since this is a two-state thing, we check if we are parsing |
| headers at the moment or not. */ |
| |
| if (header) { |
| /* we are in parse-the-header-mode */ |
| |
| /* header line within buffer loop */ |
| do { |
| int hbufp_index; |
| |
| str_start = str; /* str_start is start of line within buf */ |
| |
| end_ptr = strchr (str_start, '\n'); |
| |
| if (!end_ptr) { |
| /* no more complete header lines within buffer */ |
| /* copy what is remaining into headerbuff */ |
| int str_length = (int)strlen(str); |
| |
| if (hbuflen + (int)str_length >= data->headersize) { |
| char *newbuff; |
| long newsize=MAX((hbuflen+str_length)*3/2, |
| data->headersize*2); |
| hbufp_index = hbufp - data->headerbuff; |
| newbuff = (char *)realloc(data->headerbuff, newsize); |
| if(!newbuff) { |
| failf (data, "Failed to alloc memory for big header!"); |
| return URG_READ_ERROR; |
| } |
| data->headersize=newsize; |
| data->headerbuff = newbuff; |
| hbufp = data->headerbuff + hbufp_index; |
| } |
| strcpy (hbufp, str); |
| hbufp += strlen (str); |
| hbuflen += strlen (str); |
| break; /* read more and try again */ |
| } |
| |
| str = end_ptr + 1; /* move just past new line */ |
| |
| if (hbuflen + (str - str_start) >= data->headersize) { |
| char *newbuff; |
| long newsize=MAX((hbuflen+(str-str_start))*3/2, |
| data->headersize*2); |
| hbufp_index = hbufp - data->headerbuff; |
| newbuff = (char *)realloc(data->headerbuff, newsize); |
| if(!newbuff) { |
| failf (data, "Failed to alloc memory for big header!"); |
| return URG_READ_ERROR; |
| } |
| data->headersize= newsize; |
| data->headerbuff = newbuff; |
| hbufp = data->headerbuff + hbufp_index; |
| } |
| |
| /* copy to end of line */ |
| strncpy (hbufp, str_start, str - str_start); |
| hbufp += str - str_start; |
| hbuflen += str - str_start; |
| *hbufp = 0; |
| |
| p = data->headerbuff; |
| |
| /* we now have a full line that p points to */ |
| if (('\n' == *p) || ('\r' == *p)) { |
| /* Zero-length line means end of header! */ |
| if (-1 != size) /* if known */ |
| size += bytecount; /* we append the already read size */ |
| |
| |
| if ('\r' == *p) |
| p++; /* pass the \r byte */ |
| if ('\n' == *p) |
| p++; /* pass the \n byte */ |
| |
| ProgressInit (data, size); /* init progress meter */ |
| header = FALSE; /* no more header to parse! */ |
| |
| /* now, only output this if the header AND body are requested: */ |
| if ((data->conf & (CONF_HEADER | CONF_NOBODY)) == CONF_HEADER) { |
| if((p - data->headerbuff) != |
| data->fwrite (data->headerbuff, 1, |
| p - data->headerbuff, data->out)) { |
| failf (data, "Failed writing output"); |
| return URG_WRITE_ERROR; |
| } |
| } |
| if(data->writeheader) { |
| /* obviously, the header is requested to be written to |
| this file: */ |
| if((p - data->headerbuff) != |
| fwrite (data->headerbuff, 1, p - data->headerbuff, |
| data->writeheader)) { |
| failf (data, "Failed writing output"); |
| return URG_WRITE_ERROR; |
| } |
| } |
| break; /* exit header line loop */ |
| } |
| |
| if (!headerline++) { |
| /* This is the first header, it MUST be the error code line |
| or else we consiser this to be the body right away! */ |
| if (sscanf (p, " HTTP/1.%*c %3d", &code)) { |
| /* 404 -> URL not found! */ |
| if ( |
| ( ((data->conf & CONF_FOLLOWLOCATION) && (code >= 400)) || |
| !(data->conf & CONF_FOLLOWLOCATION) && (code >= 300)) |
| && (data->conf & CONF_FAILONERROR)) { |
| /* If we have been told to fail hard on HTTP-errors, |
| here is the check for that: */ |
| /* serious error, go home! */ |
| failf (data, "The requested file was not found"); |
| return URG_HTTP_NOT_FOUND; |
| } |
| } |
| else { |
| header = FALSE; /* this is not a header line */ |
| break; |
| } |
| } |
| /* check for Content-Length: header lines to get size */ |
| if (strnequal("Content-Length", p, 14) && |
| sscanf (p+14, ": %ld", &contentlength)) |
| size = contentlength; |
| else if (strnequal("Content-Range", p, 13) && |
| sscanf (p+13, ": bytes %d-", &offset)) { |
| if (data->resume_from == offset) { |
| /* we asked for a resume and we got it */ |
| content_range = TRUE; |
| } |
| } |
| else if(data->cookies && |
| strnequal("Set-Cookie: ", p, 11)) { |
| cookie_add(data->cookies, TRUE, &p[12]); |
| } |
| else if(strnequal("Last-Modified:", p, strlen("Last-Modified:")) && |
| data->timecondition) { |
| time_t secs=time(NULL); |
| timeofdoc = get_date(p+strlen("Last-Modified:"), &secs); |
| } |
| else if ((code >= 300 && code < 400) && |
| (data->conf & CONF_FOLLOWLOCATION) && |
| strnequal("Location", p, 8) && |
| sscanf (p+8, ": %" URL_MAX_LENGTH_TXT "s", newurl)) { |
| /* this is the URL that the server advices us to get |
| instead */ |
| data->newurl = strdup (newurl); |
| } |
| |
| if (data->conf & CONF_HEADER) { |
| if(hbuflen != data->fwrite (p, 1, hbuflen, data->out)) { |
| failf (data, "Failed writing output"); |
| return URG_WRITE_ERROR; |
| } |
| } |
| if(data->writeheader) { |
| /* the header is requested to be written to this file */ |
| if(hbuflen != fwrite (p, 1, hbuflen, data->writeheader)) { |
| failf (data, "Failed writing output"); |
| return URG_WRITE_ERROR; |
| } |
| } |
| |
| /* reset hbufp pointer && hbuflen */ |
| hbufp = data->headerbuff; |
| hbuflen = 0; |
| } |
| while (*str); /* header line within buffer */ |
| |
| /* We might have reached the end of the header part here, but |
| there might be a non-header part left in the end of the read |
| buffer. */ |
| |
| if (!header) { |
| /* the next token and forward is not part of |
| the header! */ |
| |
| /* we subtract the remaining header size from the buffer */ |
| nread -= (str - buf); |
| } |
| |
| } /* end if header mode */ |
| |
| /* This is not an 'else if' since it may be a rest from the header |
| parsing, where the beginning of the buffer is headers and the end |
| is non-headers. */ |
| if (str && !header && (nread > 0)) { |
| |
| if(0 == bodywrites) { |
| /* These checks are only made the first time we are about to |
| write a chunk of the body */ |
| if(data->conf&CONF_HTTP) { |
| /* HTTP-only checks */ |
| if (data->resume_from && !content_range ) { |
| /* we wanted to resume a download, although the server doesn't |
| seem to support this */ |
| failf (data, "HTTP server doesn't seem to support byte ranges. Cannot resume."); |
| return URG_HTTP_RANGE_ERROR; |
| } |
| else if (data->newurl) { |
| /* abort after the headers if "follow Location" is set */ |
| infof (data, "Follow to new URL: %s\n", data->newurl); |
| return URG_OK; |
| } |
| else if(data->timecondition && !data->range) { |
| /* A time condition has been set AND no ranges have been |
| requested. This seems to be what chapter 13.3.4 of RFC 2616 |
| defines to be the correct action for a HTTP/1.1 client */ |
| if((timeofdoc > 0) && (data->timevalue > 0)) { |
| switch(data->timecondition) { |
| case TIMECOND_IFMODSINCE: |
| default: |
| if(timeofdoc < data->timevalue) { |
| infof(data, "The requested document is not new enough"); |
| return URG_OK; |
| } |
| break; |
| case TIMECOND_IFUNMODSINCE: |
| if(timeofdoc > data->timevalue) { |
| infof(data, "The requested document is not old enough"); |
| return URG_OK; |
| } |
| break; |
| } /* switch */ |
| } /* two valid time strings */ |
| } /* we have a time condition */ |
| } /* this is HTTP */ |
| } /* this is the first time we write a body part */ |
| bodywrites++; |
| |
| if(data->maxdownload && |
| (bytecount + nread > data->maxdownload)) { |
| nread = data->maxdownload - bytecount; |
| if(nread < 0 ) /* this should be unusual */ |
| nread = 0; |
| keepon = FALSE; /* we're done now! */ |
| } |
| |
| bytecount += nread; |
| |
| if (nread != data->fwrite (str, 1, nread, data->out)) { |
| failf (data, "Failed writing output"); |
| return URG_WRITE_ERROR; |
| } |
| |
| } |
| break; |
| } |
| now = tvnow(); |
| if (!header) { |
| ProgressShow (data, bytecount, start, now, FALSE); |
| } |
| urg = speedcheck (data, now); |
| if (urg) |
| return urg; |
| |
| if (data->timeout && (tvdiff (now, start) > data->timeout)) { |
| failf (data, "Operation timed out with %d out of %d bytes received", |
| bytecount, size); |
| return URG_OPERATION_TIMEOUTED; |
| } |
| #ifdef MULTIDOC |
| if(contentlength && bytecount >= contentlength) { |
| /* we're done with this download, now stop it */ |
| break; |
| } |
| #endif |
| } |
| } |
| if(contentlength && (bytecount != contentlength)) { |
| failf(data, "transfer closed with %d bytes remaining", contentlength-bytecount); |
| return URG_PARTIAL_FILE; |
| } |
| ProgressShow (data, bytecount, start, now, TRUE); |
| |
| *bytecountp = bytecount; |
| |
| #ifdef USE_ZLIB |
| gzclose(gzfile); |
| #endif |
| return URG_OK; |
| } |
| |
| |