aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorWilmer van der Gaast <wilmer@gaast.net>2013-06-09 22:17:45 +0100
committerWilmer van der Gaast <wilmer@gaast.net>2013-06-09 22:17:45 +0100
commitca8037e28d09ed96448509316a935eb130e6d3db (patch)
tree553b0a3a879d4708758874e812c86af814cd2bfc
parent41a94dd69dcbb5d4ef1fda5949196fed63994c03 (diff)
Add better handling of HTTP/1.1 and/or keepalive connections. This should
let me close #641, and more importantly, prepares the Twitter module for an upcoming API change. https://dev.twitter.com/blog/deprecating-http-1.0-streaming-api
-rw-r--r--lib/http_client.c232
-rw-r--r--lib/http_client.h6
-rw-r--r--lib/oauth.c1
-rw-r--r--lib/oauth2.c1
-rw-r--r--lib/proxy.c2
5 files changed, 171 insertions, 71 deletions
diff --git a/lib/http_client.c b/lib/http_client.c
index b384e1f0..a5ec5867 100644
--- a/lib/http_client.c
+++ b/lib/http_client.c
@@ -1,7 +1,7 @@
/********************************************************************\
* BitlBee -- An IRC to other IM-networks gateway *
* *
- * Copyright 2002-2012 Wilmer van der Gaast and others *
+ * Copyright 2002-2013 Wilmer van der Gaast and others *
\********************************************************************/
/* HTTP(S) module */
@@ -68,6 +68,7 @@ struct http_request *http_dorequest( char *host, int port, int ssl, char *reques
req->request = g_strdup( request );
req->request_length = strlen( request );
req->redir_ttl = 3;
+ req->content_length = -1;
if( getenv( "BITLBEE_DEBUG" ) )
printf( "About to send HTTP request:\n%s\n", req->request );
@@ -95,7 +96,6 @@ struct http_request *http_dorequest_url( char *url_string, http_input_function f
request = g_strdup_printf( "GET %s HTTP/1.0\r\n"
"Host: %s\r\n"
- "Connection: close\r\n"
"User-Agent: BitlBee " BITLBEE_VERSION " " ARCH "/" CPU "\r\n"
"\r\n", url->file, url->host );
@@ -192,14 +192,21 @@ static gboolean http_ssl_connected( gpointer data, int returncode, void *source,
return http_connected( data, req->fd, cond );
}
+typedef enum {
+ CR_OK,
+ CR_EOF,
+ CR_ERROR,
+ CR_ABORT,
+} http_ret_t;
+
static gboolean http_handle_headers( struct http_request *req );
+static http_ret_t http_process_chunked_data( struct http_request *req, const char *buffer, int len );
+static http_ret_t http_process_data( struct http_request *req, const char *buffer, int len );
static gboolean http_incoming_data( gpointer data, int source, b_input_condition cond )
{
struct http_request *req = data;
char buffer[4096];
- char *s;
- size_t content_length;
int st;
if( req->inpa > 0 )
@@ -243,53 +250,25 @@ static gboolean http_incoming_data( gpointer data, int source, b_input_condition
}
}
- if( st > 0 && !req->sbuf )
+ if( st > 0 )
{
- req->reply_headers = g_realloc( req->reply_headers, req->bytes_read + st + 1 );
- memcpy( req->reply_headers + req->bytes_read, buffer, st );
- req->bytes_read += st;
+ http_ret_t c;
- st = 0;
- }
-
- if( st >= 0 && ( req->flags & HTTPC_STREAMING ) )
- {
- if( !req->reply_body &&
- ( strstr( req->reply_headers, "\r\n\r\n" ) ||
- strstr( req->reply_headers, "\n\n" ) ) )
- {
- size_t hlen;
-
- /* We've now received all headers, so process them once
- before we start feeding back data. */
- if( !http_handle_headers( req ) )
- return FALSE;
-
- hlen = req->reply_body - req->reply_headers;
-
- req->sblen = req->bytes_read - hlen;
- req->sbuf = g_memdup( req->reply_body, req->sblen + 1 );
- req->reply_headers = g_realloc( req->reply_headers, hlen + 1 );
-
- req->reply_body = req->sbuf;
- }
-
- if( st > 0 )
- {
- int pos = req->reply_body - req->sbuf;
- req->sbuf = g_realloc( req->sbuf, req->sblen + st + 1 );
- memcpy( req->sbuf + req->sblen, buffer, st );
- req->bytes_read += st;
- req->sblen += st;
- req->sbuf[req->sblen] = '\0';
- req->reply_body = req->sbuf + pos;
- req->body_size = req->sblen - pos;
- }
+ if( req->flags & HTTPC_CHUNKED )
+ c = http_process_chunked_data( req, buffer, st );
+ else
+ c = http_process_data( req, buffer, st );
- if( req->reply_body )
- req->func( req );
+ if( c == CR_EOF )
+ goto eof;
+ else if( c == CR_ERROR || c == CR_ABORT )
+ return FALSE;
}
+ if( req->content_length != -1 &&
+ req->body_size >= req->content_length )
+ goto eof;
+
if( ssl_pending( req->ssl ) )
return http_incoming_data( data, source, cond );
@@ -310,14 +289,6 @@ eof:
req->status_string = g_strdup( "Empty HTTP reply" );
goto cleanup;
}
-
- if( !( req->flags & HTTPC_STREAMING ) )
- {
- /* Returns FALSE if we were redirected, in which case we should abort
- and not run any callback yet. */
- if( !http_handle_headers( req ) )
- return FALSE;
- }
cleanup:
if( req->ssl )
@@ -325,17 +296,12 @@ cleanup:
else
closesocket( req->fd );
- if( ( s = get_rfc822_header( req->reply_headers, "Content-Length", 0 ) ) &&
- sscanf( s, "%zd", &content_length ) == 1 )
+ if( req->body_size < req->content_length )
{
- if( content_length < req->body_size )
- {
- req->status_code = -1;
- g_free( req->status_string );
- req->status_string = g_strdup( "Response truncated" );
- }
+ req->status_code = -1;
+ g_free( req->status_string );
+ req->status_string = g_strdup( "Response truncated" );
}
- g_free( s );
if( getenv( "BITLBEE_DEBUG" ) && req )
printf( "Finishing HTTP request with status: %s\n",
@@ -346,11 +312,118 @@ cleanup:
return FALSE;
}
+static http_ret_t http_process_chunked_data( struct http_request *req, const char *buffer, int len )
+{
+ char *chunk, *eos, *s;
+
+ if( len < 0 )
+ return TRUE;
+
+ if( len > 0 )
+ {
+ req->cbuf = g_realloc( req->cbuf, req->cblen + len + 1 );
+ memcpy( req->cbuf + req->cblen, buffer, len );
+ req->cblen += len;
+ req->cbuf[req->cblen] = '\0';
+ }
+
+ /* Turns out writing a proper chunked-encoding state machine is not
+ that simple. :-( */
+ chunk = req->cbuf;
+ eos = req->cbuf + req->cblen;
+ while( TRUE )
+ {
+ int clen = 0;
+
+ /* Might be a \r\n from the last chunk. */
+ s = chunk;
+ while( isspace( *s ) )
+ s ++;
+ /* Chunk length. Might be incomplete. */
+ if( s < eos && sscanf( s, "%x", &clen ) != 1 )
+ return CR_ERROR;
+ while( isxdigit( *s ) )
+ s ++;
+
+ /* If we read anything here, it *must* be \r\n. */
+ if( strncmp( s, "\r\n", MIN( 2, eos - s ) ) != 0 )
+ return CR_ERROR;
+ s += 2;
+
+ if( s >= eos )
+ break;
+
+ /* 0-length chunk means end of response. */
+ if( clen == 0 )
+ return CR_EOF;
+
+ if( s + clen > eos )
+ break;
+ if( http_process_data( req, s, clen ) != CR_OK )
+ return CR_ABORT;
+
+ chunk = s + clen;
+ }
+
+ if( chunk != req->cbuf )
+ {
+ req->cblen = eos - chunk;
+ s = g_memdup( chunk, req->cblen + 1 );
+ g_free( req->cbuf );
+ req->cbuf = s;
+ }
+
+ return CR_OK;
+}
+
+static http_ret_t http_process_data( struct http_request *req, const char *buffer, int len )
+{
+ if( len <= 0 )
+ return CR_OK;
+
+ if( !req->reply_body )
+ {
+ req->reply_headers = g_realloc( req->reply_headers, req->bytes_read + len + 1 );
+ memcpy( req->reply_headers + req->bytes_read, buffer, len );
+ req->bytes_read += len;
+ req->reply_headers[req->bytes_read] = '\0';
+
+ if( strstr( req->reply_headers, "\r\n\r\n" ) ||
+ strstr( req->reply_headers, "\n\n" ) )
+ {
+ /* We've now received all headers. Look for something
+ interesting. */
+ if( !http_handle_headers( req ) )
+ return CR_ABORT;
+
+ /* Start parsing the body as chunked if required. */
+ if( req->flags & HTTPC_CHUNKED )
+ return http_process_chunked_data( req, NULL, 0 );
+ }
+ }
+ else
+ {
+ int pos = req->reply_body - req->sbuf;
+ req->sbuf = g_realloc( req->sbuf, req->sblen + len + 1 );
+ memcpy( req->sbuf + req->sblen, buffer, len );
+ req->bytes_read += len;
+ req->sblen += len;
+ req->sbuf[req->sblen] = '\0';
+ req->reply_body = req->sbuf + pos;
+ req->body_size = req->sblen - pos;
+ }
+
+ if( ( req->flags & HTTPC_STREAMING ) && req->reply_body )
+ req->func( req );
+
+ return CR_OK;
+}
+
/* Splits headers and body. Checks result code, in case of 300s it'll handle
redirects. If this returns FALSE, don't call any callbacks! */
static gboolean http_handle_headers( struct http_request *req )
{
- char *end1, *end2;
+ char *end1, *end2, *s;
int evil_server = 0;
/* Zero termination is very convenient. */
@@ -376,7 +449,7 @@ static gboolean http_handle_headers( struct http_request *req )
return TRUE;
}
- *end1 = 0;
+ *end1 = '\0';
if( getenv( "BITLBEE_DEBUG" ) )
printf( "HTTP response headers:\n%s\n", req->reply_headers );
@@ -386,7 +459,10 @@ static gboolean http_handle_headers( struct http_request *req )
else
req->reply_body = end1 + 2;
- req->body_size = req->reply_headers + req->bytes_read - req->reply_body;
+ /* Separately allocated space for headers and body. */
+ req->sblen = req->body_size = req->reply_headers + req->bytes_read - req->reply_body;
+ req->sbuf = req->reply_body = g_memdup( req->reply_body, req->body_size + 1 );
+ req->reply_headers = g_realloc( req->reply_headers, end1 - req->reply_headers + 1 );
if( ( end1 = strchr( req->reply_headers, ' ' ) ) != NULL )
{
@@ -451,7 +527,7 @@ static gboolean http_handle_headers( struct http_request *req )
/* Since we don't cache the servername, and since we
don't need this yet anyway, I won't implement it. */
- req->status_string = g_strdup( "Can't handle recursive redirects" );
+ req->status_string = g_strdup( "Can't handle relative redirects" );
return TRUE;
}
@@ -507,7 +583,7 @@ static gboolean http_handle_headers( struct http_request *req )
new_method = "POST";
/* Okay, this isn't fun! We have to rebuild the request... :-( */
- new_request = g_strdup_printf( "%s %s HTTP/1.0\r\nHost: %s%s",
+ new_request = g_strdup_printf( "%s %s HTTP/1.1\r\nHost: %s%s",
new_method, url->file, url->host, s );
new_host = g_strdup( url->host );
@@ -563,6 +639,25 @@ static gboolean http_handle_headers( struct http_request *req )
return FALSE;
}
+
+ if( ( s = get_rfc822_header( req->reply_headers, "Content-Length", 0 ) ) &&
+ sscanf( s, "%d", &req->content_length ) != 1 )
+ req->content_length = -1;
+ g_free( s );
+
+ if( ( s = get_rfc822_header( req->reply_headers, "Transfer-Encoding", 0 ) ) )
+ {
+ if( strcasestr( s, "chunked" ) )
+ {
+ req->flags |= HTTPC_CHUNKED;
+ req->cbuf = req->sbuf;
+ req->cblen = req->sblen;
+
+ req->reply_body = req->sbuf = g_strdup( "" );
+ req->body_size = req->sblen = 0;
+ }
+ g_free( s );
+ }
return TRUE;
}
@@ -606,5 +701,6 @@ static void http_free( struct http_request *req )
g_free( req->reply_headers );
g_free( req->status_string );
g_free( req->sbuf );
+ g_free( req->cbuf );
g_free( req );
}
diff --git a/lib/http_client.h b/lib/http_client.h
index ca427118..1b86f228 100644
--- a/lib/http_client.h
+++ b/lib/http_client.h
@@ -41,6 +41,7 @@ typedef enum http_client_flags
{
HTTPC_STREAMING = 1,
HTTPC_EOF = 2,
+ HTTPC_CHUNKED = 4,
/* Let's reserve 0x1000000+ for lib users. */
} http_client_flags_t;
@@ -76,10 +77,15 @@ struct http_request
int inpa;
int bytes_written;
int bytes_read;
+ int content_length; /* "Content-Length:" header or -1 */
/* Used in streaming mode. Caller should read from reply_body. */
char *sbuf;
size_t sblen;
+
+ /* Chunked encoding only. Raw chunked stream is decoded from here. */
+ char *cbuf;
+ size_t cblen;
};
/* The _url variant is probably more useful than the raw version. The raw
diff --git a/lib/oauth.c b/lib/oauth.c
index 04949e1b..c78b4a43 100644
--- a/lib/oauth.c
+++ b/lib/oauth.c
@@ -261,7 +261,6 @@ static void *oauth_post_request( const char *url, GSList **params_, http_input_f
"Host: %s\r\n"
"Content-Type: application/x-www-form-urlencoded\r\n"
"Content-Length: %zd\r\n"
- "Connection: close\r\n"
"\r\n"
"%s", url_p.file, url_p.host, strlen( post ), post );
g_free( post );
diff --git a/lib/oauth2.c b/lib/oauth2.c
index 69ce9d58..bfd4b143 100644
--- a/lib/oauth2.c
+++ b/lib/oauth2.c
@@ -95,7 +95,6 @@ int oauth2_access_token( const struct oauth2_service *sp,
"Host: %s\r\n"
"Content-Type: application/x-www-form-urlencoded\r\n"
"Content-Length: %zd\r\n"
- "Connection: close\r\n"
"\r\n"
"%s", url_p.file, url_p.host, strlen( args_s ), args_s );
g_free( args_s );
diff --git a/lib/proxy.c b/lib/proxy.c
index 3e5c9d49..b6b02d72 100644
--- a/lib/proxy.c
+++ b/lib/proxy.c
@@ -157,7 +157,7 @@ static int proxy_connect_none(const char *host, unsigned short port_, struct PHB
event_debug("bind( %d, \"%s\" ) failure\n", fd, global.conf->iface_out);
}
- event_debug("proxy_connect_none( \"%s\", %d ) = %d\n", host, port, fd);
+ event_debug("proxy_connect_none( \"%s\", %d ) = %d\n", host, port_, fd);
if (connect(fd, phb->gai_cur->ai_addr, phb->gai_cur->ai_addrlen) < 0 && !sockerr_again()) {
event_debug( "connect failed: %s\n", strerror(errno));