diff options
author | Wilmer van der Gaast <wilmer@gaast.net> | 2013-06-16 13:15:15 +0100 |
---|---|---|
committer | Wilmer van der Gaast <wilmer@gaast.net> | 2013-06-16 13:15:15 +0100 |
commit | ab19567e25a35beb23f922303d1f60ed13228356 (patch) | |
tree | 8a29cfdea7ba920861c13fe98242e449615f8bbc | |
parent | 41a94dd69dcbb5d4ef1fda5949196fed63994c03 (diff) | |
parent | dd7b931d0fe950c5a6646c72565739fd8835c136 (diff) |
Merging HTTP/1.1 branch. This implements HTTP/1.1 support in http_client.
Little benefit as I'm not burning my fingers on keepalive connecitons for
now, but eventually the Twitter streaming API is going to drop 1.0 support:
https://dev.twitter.com/blog/deprecating-http-1.0-streaming-api
-rw-r--r-- | lib/http_client.c | 253 | ||||
-rw-r--r-- | lib/http_client.h | 6 | ||||
-rw-r--r-- | lib/oauth.c | 1 | ||||
-rw-r--r-- | lib/oauth2.c | 1 | ||||
-rw-r--r-- | lib/proxy.c | 2 | ||||
-rw-r--r-- | protocols/twitter/twitter.c | 5 | ||||
-rw-r--r-- | protocols/twitter/twitter_http.c | 2 |
7 files changed, 195 insertions, 75 deletions
diff --git a/lib/http_client.c b/lib/http_client.c index b384e1f0..b509c839 100644 --- a/lib/http_client.c +++ b/lib/http_client.c @@ -1,7 +1,7 @@ /********************************************************************\ * BitlBee -- An IRC to other IM-networks gateway * * * - * Copyright 2002-2012 Wilmer van der Gaast and others * + * Copyright 2002-2013 Wilmer van der Gaast and others * \********************************************************************/ /* HTTP(S) module */ @@ -68,6 +68,7 @@ struct http_request *http_dorequest( char *host, int port, int ssl, char *reques req->request = g_strdup( request ); req->request_length = strlen( request ); req->redir_ttl = 3; + req->content_length = -1; if( getenv( "BITLBEE_DEBUG" ) ) printf( "About to send HTTP request:\n%s\n", req->request ); @@ -95,7 +96,6 @@ struct http_request *http_dorequest_url( char *url_string, http_input_function f request = g_strdup_printf( "GET %s HTTP/1.0\r\n" "Host: %s\r\n" - "Connection: close\r\n" "User-Agent: BitlBee " BITLBEE_VERSION " " ARCH "/" CPU "\r\n" "\r\n", url->file, url->host ); @@ -192,14 +192,21 @@ static gboolean http_ssl_connected( gpointer data, int returncode, void *source, return http_connected( data, req->fd, cond ); } +typedef enum { + CR_OK, + CR_EOF, + CR_ERROR, + CR_ABORT, +} http_ret_t; + static gboolean http_handle_headers( struct http_request *req ); +static http_ret_t http_process_chunked_data( struct http_request *req, const char *buffer, int len ); +static http_ret_t http_process_data( struct http_request *req, const char *buffer, int len ); static gboolean http_incoming_data( gpointer data, int source, b_input_condition cond ) { struct http_request *req = data; char buffer[4096]; - char *s; - size_t content_length; int st; if( req->inpa > 0 ) @@ -243,53 +250,25 @@ static gboolean http_incoming_data( gpointer data, int source, b_input_condition } } - if( st > 0 && !req->sbuf ) + if( st > 0 ) { - req->reply_headers = g_realloc( req->reply_headers, req->bytes_read + st + 1 ); - memcpy( req->reply_headers + req->bytes_read, buffer, st ); - req->bytes_read += st; + http_ret_t c; - st = 0; - } - - if( st >= 0 && ( req->flags & HTTPC_STREAMING ) ) - { - if( !req->reply_body && - ( strstr( req->reply_headers, "\r\n\r\n" ) || - strstr( req->reply_headers, "\n\n" ) ) ) - { - size_t hlen; - - /* We've now received all headers, so process them once - before we start feeding back data. */ - if( !http_handle_headers( req ) ) - return FALSE; - - hlen = req->reply_body - req->reply_headers; - - req->sblen = req->bytes_read - hlen; - req->sbuf = g_memdup( req->reply_body, req->sblen + 1 ); - req->reply_headers = g_realloc( req->reply_headers, hlen + 1 ); - - req->reply_body = req->sbuf; - } - - if( st > 0 ) - { - int pos = req->reply_body - req->sbuf; - req->sbuf = g_realloc( req->sbuf, req->sblen + st + 1 ); - memcpy( req->sbuf + req->sblen, buffer, st ); - req->bytes_read += st; - req->sblen += st; - req->sbuf[req->sblen] = '\0'; - req->reply_body = req->sbuf + pos; - req->body_size = req->sblen - pos; - } + if( req->flags & HTTPC_CHUNKED ) + c = http_process_chunked_data( req, buffer, st ); + else + c = http_process_data( req, buffer, st ); - if( req->reply_body ) - req->func( req ); + if( c == CR_EOF ) + goto eof; + else if( c == CR_ERROR || c == CR_ABORT ) + return FALSE; } + if( req->content_length != -1 && + req->body_size >= req->content_length ) + goto eof; + if( ssl_pending( req->ssl ) ) return http_incoming_data( data, source, cond ); @@ -310,14 +289,6 @@ eof: req->status_string = g_strdup( "Empty HTTP reply" ); goto cleanup; } - - if( !( req->flags & HTTPC_STREAMING ) ) - { - /* Returns FALSE if we were redirected, in which case we should abort - and not run any callback yet. */ - if( !http_handle_headers( req ) ) - return FALSE; - } cleanup: if( req->ssl ) @@ -325,17 +296,12 @@ cleanup: else closesocket( req->fd ); - if( ( s = get_rfc822_header( req->reply_headers, "Content-Length", 0 ) ) && - sscanf( s, "%zd", &content_length ) == 1 ) + if( req->body_size < req->content_length ) { - if( content_length < req->body_size ) - { - req->status_code = -1; - g_free( req->status_string ); - req->status_string = g_strdup( "Response truncated" ); - } + req->status_code = -1; + g_free( req->status_string ); + req->status_string = g_strdup( "Response truncated" ); } - g_free( s ); if( getenv( "BITLBEE_DEBUG" ) && req ) printf( "Finishing HTTP request with status: %s\n", @@ -346,11 +312,120 @@ cleanup: return FALSE; } +static http_ret_t http_process_chunked_data( struct http_request *req, const char *buffer, int len ) +{ + char *chunk, *eos, *s; + + if( len < 0 ) + return TRUE; + + if( len > 0 ) + { + req->cbuf = g_realloc( req->cbuf, req->cblen + len + 1 ); + memcpy( req->cbuf + req->cblen, buffer, len ); + req->cblen += len; + req->cbuf[req->cblen] = '\0'; + } + + /* Turns out writing a proper chunked-encoding state machine is not + that simple. :-( I've tested this one feeding it byte by byte so + I hope it's solid now. */ + chunk = req->cbuf; + eos = req->cbuf + req->cblen; + while( TRUE ) + { + int clen = 0; + + /* Might be a \r\n from the last chunk. */ + s = chunk; + while( isspace( *s ) ) + s ++; + /* Chunk length. Might be incomplete. */ + if( s < eos && sscanf( s, "%x", &clen ) != 1 ) + return CR_ERROR; + while( isxdigit( *s ) ) + s ++; + + /* If we read anything here, it *must* be \r\n. */ + if( strncmp( s, "\r\n", MIN( 2, eos - s ) ) != 0 ) + return CR_ERROR; + s += 2; + + if( s >= eos ) + break; + + /* 0-length chunk means end of response. */ + if( clen == 0 ) + return CR_EOF; + + /* Wait for the whole chunk to arrive. */ + if( s + clen > eos ) + break; + if( http_process_data( req, s, clen ) != CR_OK ) + return CR_ABORT; + + chunk = s + clen; + } + + if( chunk != req->cbuf ) + { + req->cblen = eos - chunk; + s = g_memdup( chunk, req->cblen + 1 ); + g_free( req->cbuf ); + req->cbuf = s; + } + + return CR_OK; +} + +static http_ret_t http_process_data( struct http_request *req, const char *buffer, int len ) +{ + if( len <= 0 ) + return CR_OK; + + if( !req->reply_body ) + { + req->reply_headers = g_realloc( req->reply_headers, req->bytes_read + len + 1 ); + memcpy( req->reply_headers + req->bytes_read, buffer, len ); + req->bytes_read += len; + req->reply_headers[req->bytes_read] = '\0'; + + if( strstr( req->reply_headers, "\r\n\r\n" ) || + strstr( req->reply_headers, "\n\n" ) ) + { + /* We've now received all headers. Look for something + interesting. */ + if( !http_handle_headers( req ) ) + return CR_ABORT; + + /* Start parsing the body as chunked if required. */ + if( req->flags & HTTPC_CHUNKED ) + return http_process_chunked_data( req, NULL, 0 ); + } + } + else + { + int pos = req->reply_body - req->sbuf; + req->sbuf = g_realloc( req->sbuf, req->sblen + len + 1 ); + memcpy( req->sbuf + req->sblen, buffer, len ); + req->bytes_read += len; + req->sblen += len; + req->sbuf[req->sblen] = '\0'; + req->reply_body = req->sbuf + pos; + req->body_size = req->sblen - pos; + } + + if( ( req->flags & HTTPC_STREAMING ) && req->reply_body ) + req->func( req ); + + return CR_OK; +} + /* Splits headers and body. Checks result code, in case of 300s it'll handle redirects. If this returns FALSE, don't call any callbacks! */ static gboolean http_handle_headers( struct http_request *req ) { - char *end1, *end2; + char *end1, *end2, *s; int evil_server = 0; /* Zero termination is very convenient. */ @@ -376,7 +451,7 @@ static gboolean http_handle_headers( struct http_request *req ) return TRUE; } - *end1 = 0; + *end1 = '\0'; if( getenv( "BITLBEE_DEBUG" ) ) printf( "HTTP response headers:\n%s\n", req->reply_headers ); @@ -386,7 +461,10 @@ static gboolean http_handle_headers( struct http_request *req ) else req->reply_body = end1 + 2; - req->body_size = req->reply_headers + req->bytes_read - req->reply_body; + /* Separately allocated space for headers and body. */ + req->sblen = req->body_size = req->reply_headers + req->bytes_read - req->reply_body; + req->sbuf = req->reply_body = g_memdup( req->reply_body, req->body_size + 1 ); + req->reply_headers = g_realloc( req->reply_headers, end1 - req->reply_headers + 1 ); if( ( end1 = strchr( req->reply_headers, ' ' ) ) != NULL ) { @@ -451,7 +529,7 @@ static gboolean http_handle_headers( struct http_request *req ) /* Since we don't cache the servername, and since we don't need this yet anyway, I won't implement it. */ - req->status_string = g_strdup( "Can't handle recursive redirects" ); + req->status_string = g_strdup( "Can't handle relative redirects" ); return TRUE; } @@ -459,7 +537,7 @@ static gboolean http_handle_headers( struct http_request *req ) { /* A whole URL */ url_t *url; - char *s; + char *s, *version, *headers; const char *new_method; s = strstr( loc, "\r\n" ); @@ -487,6 +565,7 @@ static gboolean http_handle_headers( struct http_request *req ) g_free( url ); return TRUE; } + headers = s; /* More or less HTTP/1.0 compliant, from my reading of RFC 2616. Always perform a GET request unless we received a 301. 303 was @@ -506,9 +585,19 @@ static gboolean http_handle_headers( struct http_request *req ) /* 301 de-facto should stay POST, 307 specifally RFC 2616#10.3.8 */ new_method = "POST"; + if( ( version = strstr( req->request, " HTTP/" ) ) && + ( s = strstr( version, "\r\n" ) ) ) + { + version ++; + version = g_strndup( version, s - version ); + } + else + version = g_strdup( "HTTP/1.0" ); + /* Okay, this isn't fun! We have to rebuild the request... :-( */ - new_request = g_strdup_printf( "%s %s HTTP/1.0\r\nHost: %s%s", - new_method, url->file, url->host, s ); + new_request = g_strdup_printf( "%s %s %s\r\nHost: %s%s", + new_method, url->file, version, + url->host, headers ); new_host = g_strdup( url->host ); new_port = url->port; @@ -520,6 +609,7 @@ static gboolean http_handle_headers( struct http_request *req ) s[4] = '\0'; g_free( url ); + g_free( version ); } if( req->ssl ) @@ -556,13 +646,35 @@ static gboolean http_handle_headers( struct http_request *req ) g_free( req->request ); g_free( req->reply_headers ); + g_free( req->sbuf ); req->request = new_request; req->request_length = strlen( new_request ); req->bytes_read = req->bytes_written = req->inpa = 0; req->reply_headers = req->reply_body = NULL; + req->sbuf = req->cbuf = NULL; + req->sblen = req->cblen = 0; return FALSE; } + + if( ( s = get_rfc822_header( req->reply_headers, "Content-Length", 0 ) ) && + sscanf( s, "%d", &req->content_length ) != 1 ) + req->content_length = -1; + g_free( s ); + + if( ( s = get_rfc822_header( req->reply_headers, "Transfer-Encoding", 0 ) ) ) + { + if( strcasestr( s, "chunked" ) ) + { + req->flags |= HTTPC_CHUNKED; + req->cbuf = req->sbuf; + req->cblen = req->sblen; + + req->reply_body = req->sbuf = g_strdup( "" ); + req->body_size = req->sblen = 0; + } + g_free( s ); + } return TRUE; } @@ -606,5 +718,6 @@ static void http_free( struct http_request *req ) g_free( req->reply_headers ); g_free( req->status_string ); g_free( req->sbuf ); + g_free( req->cbuf ); g_free( req ); } diff --git a/lib/http_client.h b/lib/http_client.h index ca427118..1b86f228 100644 --- a/lib/http_client.h +++ b/lib/http_client.h @@ -41,6 +41,7 @@ typedef enum http_client_flags { HTTPC_STREAMING = 1, HTTPC_EOF = 2, + HTTPC_CHUNKED = 4, /* Let's reserve 0x1000000+ for lib users. */ } http_client_flags_t; @@ -76,10 +77,15 @@ struct http_request int inpa; int bytes_written; int bytes_read; + int content_length; /* "Content-Length:" header or -1 */ /* Used in streaming mode. Caller should read from reply_body. */ char *sbuf; size_t sblen; + + /* Chunked encoding only. Raw chunked stream is decoded from here. */ + char *cbuf; + size_t cblen; }; /* The _url variant is probably more useful than the raw version. The raw diff --git a/lib/oauth.c b/lib/oauth.c index 04949e1b..c78b4a43 100644 --- a/lib/oauth.c +++ b/lib/oauth.c @@ -261,7 +261,6 @@ static void *oauth_post_request( const char *url, GSList **params_, http_input_f "Host: %s\r\n" "Content-Type: application/x-www-form-urlencoded\r\n" "Content-Length: %zd\r\n" - "Connection: close\r\n" "\r\n" "%s", url_p.file, url_p.host, strlen( post ), post ); g_free( post ); diff --git a/lib/oauth2.c b/lib/oauth2.c index 69ce9d58..bfd4b143 100644 --- a/lib/oauth2.c +++ b/lib/oauth2.c @@ -95,7 +95,6 @@ int oauth2_access_token( const struct oauth2_service *sp, "Host: %s\r\n" "Content-Type: application/x-www-form-urlencoded\r\n" "Content-Length: %zd\r\n" - "Connection: close\r\n" "\r\n" "%s", url_p.file, url_p.host, strlen( args_s ), args_s ); g_free( args_s ); diff --git a/lib/proxy.c b/lib/proxy.c index 3e5c9d49..b6b02d72 100644 --- a/lib/proxy.c +++ b/lib/proxy.c @@ -157,7 +157,7 @@ static int proxy_connect_none(const char *host, unsigned short port_, struct PHB event_debug("bind( %d, \"%s\" ) failure\n", fd, global.conf->iface_out); } - event_debug("proxy_connect_none( \"%s\", %d ) = %d\n", host, port, fd); + event_debug("proxy_connect_none( \"%s\", %d ) = %d\n", host, port_, fd); if (connect(fd, phb->gai_cur->ai_addr, phb->gai_cur->ai_addrlen) < 0 && !sockerr_again()) { event_debug( "connect failed: %s\n", strerror(errno)); diff --git a/protocols/twitter/twitter.c b/protocols/twitter/twitter.c index 6d8b3fd0..4626cf55 100644 --- a/protocols/twitter/twitter.c +++ b/protocols/twitter/twitter.c @@ -288,13 +288,16 @@ static void twitter_init(account_t * acc) set_t *s; char *def_url; char *def_tul; + char *def_mentions; if (strcmp(acc->prpl->name, "twitter") == 0) { def_url = TWITTER_API_URL; def_tul = "20"; + def_mentions = "true"; } else { /* if( strcmp( acc->prpl->name, "identica" ) == 0 ) */ def_url = IDENTICA_API_URL; def_tul = "0"; + def_mentions = "false"; } s = set_add(&acc->set, "auto_reply_timeout", "10800", set_eval_int, acc); @@ -307,7 +310,7 @@ static void twitter_init(account_t * acc) s = set_add(&acc->set, "fetch_interval", "60", set_eval_int, acc); s->flags |= ACC_SET_OFFLINE_ONLY; - s = set_add(&acc->set, "fetch_mentions", "true", set_eval_bool, acc); + s = set_add(&acc->set, "fetch_mentions", def_mentions, set_eval_bool, acc); s = set_add(&acc->set, "message_length", "140", set_eval_int, acc); diff --git a/protocols/twitter/twitter_http.c b/protocols/twitter/twitter_http.c index 0f1ab518..f7ab6e18 100644 --- a/protocols/twitter/twitter_http.c +++ b/protocols/twitter/twitter_http.c @@ -77,7 +77,7 @@ struct http_request *twitter_http(struct im_connection *ic, char *url_string, ht } // Make the request. - g_string_printf(request, "%s %s%s%s%s HTTP/1.0\r\n" + g_string_printf(request, "%s %s%s%s%s HTTP/1.1\r\n" "Host: %s\r\n" "User-Agent: BitlBee " BITLBEE_VERSION " " ARCH "/" CPU "\r\n", is_post ? "POST" : "GET", |