From 8bd866f3d336cb695006bc48592cf7475139aa36 Mon Sep 17 00:00:00 2001 From: Wilmer van der Gaast Date: Sat, 10 Nov 2012 22:25:58 +0000 Subject: Reworked http_client a little bit to support streaming besides just buffering a complete response before giving it back to the caller. --- lib/http_client.c | 157 ++++++++++++++++++++++++++++++++++++++---------------- 1 file changed, 110 insertions(+), 47 deletions(-) (limited to 'lib/http_client.c') diff --git a/lib/http_client.c b/lib/http_client.c index 7ed539d0..17e21a49 100644 --- a/lib/http_client.c +++ b/lib/http_client.c @@ -192,12 +192,13 @@ static gboolean http_ssl_connected( gpointer data, int returncode, void *source, return http_connected( data, req->fd, cond ); } +static gboolean http_handle_headers( struct http_request *req ); + static gboolean http_incoming_data( gpointer data, int source, b_input_condition cond ) { struct http_request *req = data; - int evil_server = 0; char buffer[2048]; - char *end1, *end2, *s; + char *s; size_t content_length; int st; @@ -217,12 +218,12 @@ static gboolean http_incoming_data( gpointer data, int source, b_input_condition servers that LOVE to send invalid TLS packets that abort connections! \o/ */ - goto got_reply; + goto eof; } } else if( st == 0 ) { - goto got_reply; + goto eof; } } else @@ -238,28 +239,67 @@ static gboolean http_incoming_data( gpointer data, int source, b_input_condition } else if( st == 0 ) { - goto got_reply; + goto eof; } } - if( st > 0 ) + if( st > 0 && !req->sbuf ) { req->reply_headers = g_realloc( req->reply_headers, req->bytes_read + st + 1 ); memcpy( req->reply_headers + req->bytes_read, buffer, st ); req->bytes_read += st; + + st = 0; + } + + if( st >= 0 && ( req->flags & HTTPC_STREAMING ) ) + { + if( !req->reply_body && + ( strstr( req->reply_headers, "\r\n\r\n" ) || + strstr( req->reply_headers, "\n\n" ) ) ) + { + size_t hlen; + + /* We've now received all headers, so process them once + before we start feeding back data. */ + if( !http_handle_headers( req ) ) + return FALSE; + + hlen = req->reply_body - req->reply_headers; + + req->sblen = req->bytes_read - hlen; + req->sbuf = g_memdup( req->reply_body, req->sblen + 1 ); + req->reply_headers = g_realloc( req->reply_headers, hlen + 1 ); + + req->reply_body = req->sbuf; + } + + if( st > 0 ) + { + int pos = req->reply_body - req->sbuf; + req->sbuf = g_realloc( req->sbuf, req->sblen + st + 1 ); + memcpy( req->sbuf + req->sblen, buffer, st ); + req->bytes_read += st; + req->sblen += st; + req->reply_body = req->sbuf + pos; + req->body_size = req->sblen - pos; + } + + if( req->reply_body ) + req->func( req ); } + if( ssl_pending( req->ssl ) ) + return http_incoming_data( data, source, cond ); + /* There will be more! */ req->inpa = b_input_add( req->fd, req->ssl ? ssl_getdirection( req->ssl ) : B_EV_IO_READ, http_incoming_data, req ); - if( ssl_pending( req->ssl ) ) - return http_incoming_data( data, source, cond ); - else - return FALSE; + return FALSE; -got_reply: +eof: /* Maybe if the webserver is overloaded, or when there's bad SSL support... */ if( req->bytes_read == 0 ) @@ -268,8 +308,50 @@ got_reply: goto cleanup; } + if( !( req->flags & HTTPC_STREAMING ) ) + { + /* Returns FALSE if we were redirected, in which case we should abort + and not run any callback yet. */ + if( !http_handle_headers( req ) ) + return FALSE; + } + +cleanup: + if( req->ssl ) + ssl_disconnect( req->ssl ); + else + closesocket( req->fd ); + + if( ( s = get_rfc822_header( req->reply_headers, "Content-Length", 0 ) ) && + sscanf( s, "%zd", &content_length ) == 1 ) + { + if( content_length < req->body_size ) + { + req->status_code = -1; + g_free( req->status_string ); + req->status_string = g_strdup( "Response truncated" ); + } + } + g_free( s ); + + if( getenv( "BITLBEE_DEBUG" ) && req ) + printf( "Finishing HTTP request with status: %s\n", + req->status_string ? req->status_string : "NULL" ); + + req->func( req ); + http_free( req ); + return FALSE; +} + +/* Splits headers and body. Checks result code, in case of 300s it'll handle + redirects. If this returns FALSE, don't call any callbacks! */ +static gboolean http_handle_headers( struct http_request *req ) +{ + char *end1, *end2; + int evil_server = 0; + /* Zero termination is very convenient. */ - req->reply_headers[req->bytes_read] = 0; + req->reply_headers[req->bytes_read] = '\0'; /* Find the separation between headers and body, and keep stupid webservers in mind. */ @@ -288,7 +370,7 @@ got_reply: else { req->status_string = g_strdup( "Malformed HTTP reply" ); - goto cleanup; + return TRUE; } *end1 = 0; @@ -305,7 +387,7 @@ got_reply: if( ( end1 = strchr( req->reply_headers, ' ' ) ) != NULL ) { - if( sscanf( end1 + 1, "%d", &req->status_code ) != 1 ) + if( sscanf( end1 + 1, "%hd", &req->status_code ) != 1 ) { req->status_string = g_strdup( "Can't parse status code" ); req->status_code = -1; @@ -348,7 +430,7 @@ got_reply: if( loc == NULL ) /* We can't handle this redirect... */ { req->status_string = g_strdup( "Can't locate Location: header" ); - goto cleanup; + return TRUE; } loc += 11; @@ -368,7 +450,7 @@ got_reply: req->status_string = g_strdup( "Can't handle recursive redirects" ); - goto cleanup; + return TRUE; } else { @@ -379,7 +461,7 @@ got_reply: s = strstr( loc, "\r\n" ); if( s == NULL ) - goto cleanup; + return TRUE; url = g_new0( url_t, 1 ); *s = 0; @@ -388,7 +470,7 @@ got_reply: { req->status_string = g_strdup( "Malformed redirect URL" ); g_free( url ); - goto cleanup; + return TRUE; } /* Find all headers and, if necessary, the POST request contents. @@ -400,7 +482,7 @@ got_reply: { req->status_string = g_strdup( "Error while rebuilding request string" ); g_free( url ); - goto cleanup; + return TRUE; } /* More or less HTTP/1.0 compliant, from my reading of RFC 2616. @@ -466,7 +548,7 @@ got_reply: { req->status_string = g_strdup( "Connection problem during redirect" ); g_free( new_request ); - goto cleanup; + return TRUE; } g_free( req->request ); @@ -479,35 +561,16 @@ got_reply: return FALSE; } - /* Assume that a closed connection means we're finished, this indeed - breaks with keep-alive connections and faulty connections. */ - /* req->finished = 1; */ + return TRUE; +} -cleanup: - if( req->ssl ) - ssl_disconnect( req->ssl ); - else - closesocket( req->fd ); - - if( ( s = get_rfc822_header( req->reply_headers, "Content-Length", 0 ) ) && - sscanf( s, "%zd", &content_length ) == 1 ) +void http_flush_bytes( struct http_request *req, size_t len ) +{ + if( len > 0 && len <= req->body_size ) { - if( content_length < req->body_size ) - { - req->status_code = -1; - g_free( req->status_string ); - req->status_string = g_strdup( "Response truncated" ); - } + req->reply_body += len; + req->body_size -= len; } - g_free( s ); - - if( getenv( "BITLBEE_DEBUG" ) && req ) - printf( "Finishing HTTP request with status: %s\n", - req->status_string ? req->status_string : "NULL" ); - - req->func( req ); - http_free( req ); - return FALSE; } static void http_free( struct http_request *req ) @@ -515,6 +578,6 @@ static void http_free( struct http_request *req ) g_free( req->request ); g_free( req->reply_headers ); g_free( req->status_string ); + g_free( req->sbuf ); g_free( req ); } - -- cgit v1.2.3 From ddc2de54664ec25b95bbce997fbbb6a7104f1203 Mon Sep 17 00:00:00 2001 From: Wilmer van der Gaast Date: Sat, 10 Nov 2012 23:52:21 +0000 Subject: Very immature code for reading from the streaming API. It reads from a fixed URL and tried to parse individual JSON objects. Not doing anything useful with it. --- lib/http_client.c | 1 + 1 file changed, 1 insertion(+) (limited to 'lib/http_client.c') diff --git a/lib/http_client.c b/lib/http_client.c index 17e21a49..acbf230c 100644 --- a/lib/http_client.c +++ b/lib/http_client.c @@ -281,6 +281,7 @@ static gboolean http_incoming_data( gpointer data, int source, b_input_condition memcpy( req->sbuf + req->sblen, buffer, st ); req->bytes_read += st; req->sblen += st; + req->sbuf[req->sblen] = '\0'; req->reply_body = req->sbuf + pos; req->body_size = req->sblen - pos; } -- cgit v1.2.3 From 1388d303ba0d2097ff745d4a17192195cebbd349 Mon Sep 17 00:00:00 2001 From: Wilmer van der Gaast Date: Sun, 11 Nov 2012 17:57:20 +0000 Subject: Mostly finished HTTP streaming support: Shrink the buffer and add a http_close(). --- lib/http_client.c | 28 +++++++++++++++++++++++++--- 1 file changed, 25 insertions(+), 3 deletions(-) (limited to 'lib/http_client.c') diff --git a/lib/http_client.c b/lib/http_client.c index acbf230c..5fc7731f 100644 --- a/lib/http_client.c +++ b/lib/http_client.c @@ -567,13 +567,35 @@ static gboolean http_handle_headers( struct http_request *req ) void http_flush_bytes( struct http_request *req, size_t len ) { - if( len > 0 && len <= req->body_size ) + if( len <= 0 || len > req->body_size || !( req->flags & HTTPC_STREAMING ) ) + return; + + req->reply_body += len; + req->body_size -= len; + + if( req->reply_body - req->sbuf >= 512 ) { - req->reply_body += len; - req->body_size -= len; + printf( "Wasting %ld bytes, cleaning up stream buffer\n", req->reply_body - req->sbuf ); + char *new = g_memdup( req->reply_body, req->body_size + 1 ); + g_free( req->sbuf ); + req->reply_body = req->sbuf = new; + req->sblen = req->body_size; } } +void http_close( struct http_request *req ) +{ + if( !req ) + return; + + if( req->ssl ) + ssl_disconnect( req->ssl ); + else + closesocket( req->fd ); + + http_free( req ); +} + static void http_free( struct http_request *req ) { g_free( req->request ); -- cgit v1.2.3 From dd672e2c4d0dcf73a30be3d8f7fc2ec38cb6450e Mon Sep 17 00:00:00 2001 From: Wilmer van der Gaast Date: Sun, 11 Nov 2012 21:52:26 +0000 Subject: Detect and handle streaming connection loss. --- lib/http_client.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'lib/http_client.c') diff --git a/lib/http_client.c b/lib/http_client.c index 5fc7731f..793c5cc1 100644 --- a/lib/http_client.c +++ b/lib/http_client.c @@ -301,6 +301,8 @@ static gboolean http_incoming_data( gpointer data, int source, b_input_condition return FALSE; eof: + req->flags |= HTTPC_EOF; + /* Maybe if the webserver is overloaded, or when there's bad SSL support... */ if( req->bytes_read == 0 ) -- cgit v1.2.3 From 5f2f72849ec0713d65b709e6962aeaac25aa89c7 Mon Sep 17 00:00:00 2001 From: Wilmer van der Gaast Date: Sat, 24 Nov 2012 19:20:57 +0000 Subject: Increasing http_client buffersize to 4KB. You'd think that with things like MTU in mind it's not a very useful change, but I seem to be getting data in bigger chunks most of the time. Likely because with SSL you have record (?) boundaries and compression. This should reduce I/O overhead just a little bit. --- lib/http_client.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib/http_client.c') diff --git a/lib/http_client.c b/lib/http_client.c index 793c5cc1..35d8081f 100644 --- a/lib/http_client.c +++ b/lib/http_client.c @@ -197,7 +197,7 @@ static gboolean http_handle_headers( struct http_request *req ); static gboolean http_incoming_data( gpointer data, int source, b_input_condition cond ) { struct http_request *req = data; - char buffer[2048]; + char buffer[4096]; char *s; size_t content_length; int st; -- cgit v1.2.3 From 898c08e1d8abde8cc842ae619281b6cf4c1b7fdd Mon Sep 17 00:00:00 2001 From: Wilmer van der Gaast Date: Sun, 25 Nov 2012 11:47:23 +0000 Subject: Clean up streaming HTTP event handler, which should fix crash-bugs with GLib. Forgot to do this so far because my libevent code let me get away with it. --- lib/http_client.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'lib/http_client.c') diff --git a/lib/http_client.c b/lib/http_client.c index 35d8081f..e368c0dc 100644 --- a/lib/http_client.c +++ b/lib/http_client.c @@ -590,6 +590,9 @@ void http_close( struct http_request *req ) if( !req ) return; + if( req->inpa > 0 ) + b_event_remove( req->inpa ); + if( req->ssl ) ssl_disconnect( req->ssl ); else -- cgit v1.2.3