/********************************************************************\
* BitlBee -- An IRC to other IM-networks gateway *
* *
* Copyright 2002-2013 Wilmer van der Gaast and others *
\********************************************************************/
/* HTTP(S) module */
/*
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License with
the Debian GNU/Linux distribution in /usr/share/common-licenses/GPL;
if not, write to the Free Software Foundation, Inc., 51 Franklin St.,
Fifth Floor, Boston, MA 02110-1301 USA
*/
#include <string.h>
#include <stdio.h>
#include "http_client.h"
#include "url.h"
#include "sock.h"
static gboolean http_connected(gpointer data, int source, b_input_condition cond);
static gboolean http_ssl_connected(gpointer data, int returncode, void *source, b_input_condition cond);
static gboolean http_incoming_data(gpointer data, int source, b_input_condition cond);
static void http_free(struct http_request *req);
struct http_request *http_dorequest(char *host, int port, int ssl, char *request, http_input_function func,
gpointer data)
{
struct http_request *req;
int error = 0;
req = g_new0(struct http_request, 1);
if (ssl) {
req->ssl = ssl_connect(host, port, TRUE, http_ssl_connected, req);
if (req->ssl == NULL) {
error = 1;
}
} else {
req->fd = proxy_connect(host, port, http_connected, req);
if (req->fd < 0) {
error = 1;
}
}
if (error) {
http_free(req);
return NULL;
}
req->func = func;
req->data = data;
req->request = g_strdup(request);
req->request_length = strlen(request);
req->redir_ttl = 3;
req->content_length = -1;
if (getenv("BITLBEE_DEBUG")) {
printf("About to send HTTP request:\n%s\n", req->request);
}
return req;
}
struct http_request *http_dorequest_url(char *url_string, http_input_function func, gpointer data)
{
url_t *url = g_new0(url_t, 1);
char *request;
void *ret;
if (!url_set(url, url_string)) {
g_free(url);
return NULL;
}
if (url->proto != PROTO_HTTP && url->proto != PROTO_HTTPS) {
g_free(url);
return NULL;
}
request = g_strdup_printf("GET %s HTTP/1.0\r\n"
"Host: %s\r\n"
"User-Agent: BitlBee " BITLBEE_VERSION " " ARCH "/" CPU "\r\n"
"\r\n", url->file, url->host);
ret = http_dorequest(url->host, url->port,
url->proto == PROTO_HTTPS, request, func, data);
g_free(url);
g_free(request);
return ret;
}
/* This one is actually pretty simple... Might get more calls if we can't write
the whole request at once. */
static gboolean http_connected(gpointer data, int source, b_input_condition cond)
{
struct http_request *req = data;
int st;
if (source < 0) {
goto error;
}
if (req->inpa > 0) {
b_event_remove(req->inpa);
}
sock_make_nonblocking(req->fd);
if (req->ssl) {
st = ssl_write(req->ssl, req->request + req->bytes_written,
req->request_length - req->bytes_written);
if (st < 0) {
if (ssl_errno != SSL_AGAIN) {
ssl_disconnect(req->ssl);
goto error;
}
}
} else {
st = write(source, req->request + req->bytes_written,
req->request_length - req->bytes_written);
if (st < 0) {
if (!sockerr_again()) {
closesocket(req->fd);
goto error;
}
}
}
if (st > 0) {
req->bytes_written += st;
}
if (req->bytes_written < req->request_length) {
req->inpa = b_input_add(source,
req->ssl ? ssl_getdirection(req->ssl) : B_EV_IO_WRITE,
http_connected, req);
} else {
req->inpa = b_input_add(source, B_EV_IO_READ, http_incoming_data, req);
}
return FALSE;
error:
if (req->status_string == NULL) {
req->status_string = g_strdup("Error while writing HTTP request");
}
req->func(req);
http_free(req);
return FALSE;
}
static gboolean http_ssl_connected(gpointer data, int returncode, void *source, b_input_condition cond)
{
struct http_request *req = data;
if (source == NULL) {
if (returncode != 0) {
char *err = ssl_verify_strerror(returncode);
req->status_string = g_strdup_printf(
"Certificate verification problem 0x%x: %s",
returncode, err ? err : "Unknown");
g_free(err);
}
return http_connected(data, -1, cond);
}
req->fd = ssl_getfd(source);
return http_connected(data, req->fd, cond);
}
typedef enum {
CR_OK,
CR_EOF,
CR_ERROR,
CR_ABORT,
} http_ret_t;
static gboolean http_handle_headers(struct http_request *req);
static http_ret_t http_process_chunked_data(struct http_request *req, const char *buffer, int len);
static http_ret_t http_process_data(struct http_request *req, const char *buffer, int len);
static gboolean http_incoming_data(gpointer data, int source, b_input_condition cond)
{
struct http_request *req = data;
char buffer[4096];
int st;
if (req->inpa > 0) {
b_event_remove(req->inpa);
req->inpa = 0;
}
if (req->ssl) {
st = ssl_read(req->ssl, buffer, sizeof(buffer));
if (st < 0) {
if (ssl_errno != SSL_AGAIN) {
/* goto cleanup; */
/* YAY! We have to deal with crappy Microsoft
servers that LOVE to send invalid TLS
packets that abort connections! \o/ */
goto eof;
}
} else if (st == 0) {
goto eof;
}
} else {
st = read(req->fd, buffer, sizeof(buffer));
if (st < 0) {
if (!sockerr_again()) {
req->status_string = g_strdup(strerror(errno));
goto cleanup;
}
} else if (st == 0) {
goto eof;
}
}
if (st > 0) {
http_ret_t c;
if (req->flags & HTTPC_CHUNKED) {
c = http_process_chunked_data(req, buffer, st);
} else {
c = http_process_data(req, buffer, st);
}
if (c == CR_EOF) {
goto eof;
} else if (c == CR_ERROR || c == CR_ABORT) {
return FALSE;
}
}
if (req->content_length != -1 &&
req->body_size >= req->content_length) {
goto eof;
}
if (ssl_pending(req->ssl)) {
return http_incoming_data(data, source, cond);
}
/* There will be more! */
req->inpa = b_input_add(req->fd,
req->ssl ? ssl_getdirection(req->ssl) : B_EV_IO_READ,
http_incoming_data, req);
return FALSE;
eof:
req->flags |= HTTPC_EOF;
/* Maybe if the webserver is overloaded, or when there's bad SSL
support... */
if (req->bytes_read == 0) {
req->status_string = g_strdup("Empty HTTP reply");
goto cleanup;
}
cleanup:
/* Avoid g_source_remove warnings */
req->inpa = 0;
if (req->ssl) {
ssl_disconnect(req->ssl);
} else {
closesocket(req->fd);
}
if (req->body_size < req->content_length) {
req->status_code = -1;
g_free(req->status_string);
req->status_string = g_strdup("Response truncated");
}
if (getenv("BITLBEE_DEBUG") && req) {
printf("Finishing HTTP request with status: %s\n",
req->status_string ? req->status_string : "NULL");
}
req->func(req);
http_free(req);
return FALSE;
}
static http_ret_t http_process_chunked_data(struct http_request *req, const char *buffer, int len)
{
char *chunk, *eos, *s;
if (len < 0) {
return TRUE;
}
if (len > 0) {
req->cbuf = g_realloc(req->cbuf, req->cblen + len + 1);
memcpy(req->cbuf + req->cblen, buffer, len);
req->cblen += len;
req->cbuf[req->cblen] = '\0';
}
/* Turns out writing a proper chunked-encoding state machine is not
that simple. :-( I've tested this one feeding it byte by byte so
I hope it's solid now. */
chunk = req->cbuf;
eos = req->cbuf + req->cblen;
while (TRUE) {
int clen = 0;
/* Might be a \r\n from the last chunk. */
s = chunk;
while (g_ascii_isspace(*s)) {
s++;
}
/* Chunk length. Might be incomplete. */
if (s < eos && sscanf(s, "%x", &clen) != 1) {
return CR_ERROR;
}
while (g_ascii_isxdigit(*s)) {
s++;
}
/* If we read anything here, it *must* be \r\n. */
if (strncmp(s, "\r\n", MIN(2, eos - s)) != 0) {
return CR_ERROR;
}
s += 2;
if (s >= eos) {
break;
}
/* 0-length chunk means end of response. */
if (clen == 0) {
return CR_EOF;
}
/* Wait for the whole chunk to arrive. */
if (s + clen > eos) {
break;
}
if (http_process_data(req, s, clen) != CR_OK) {
return CR_ABORT;
}
chunk = s + clen;
}
if (chunk != req->cbuf) {
req->cblen = eos - chunk;
s = g_memdup(chunk, req->cblen + 1);
g_free(req->cbuf);
req->cbuf = s;
}
return CR_OK;
}
static http_ret_t http_process_data(struct http_request *req, const char *buffer, int len)
{
if (len <= 0) {
return CR_OK;
}
if (!req->reply_body) {
req->reply_headers = g_realloc(req->reply_headers, req->bytes_read + len + 1);
memcpy(req->reply_headers + req->bytes_read, buffer, len);
req->bytes_read += len;
req->reply_headers[req->bytes_read] = '\0';
if (strstr(req->reply_headers, "\r\n\r\n") ||
strstr(req->reply_headers, "\n\n")) {
/* We've now received all headers. Look for something
interesting. */
if (!http_handle_headers(req)) {
return CR_ABORT;
}
/* Start parsing the body as chunked if required. */
if (req->flags & HTTPC_CHUNKED) {
return http_process_chunked_data(req, NULL, 0);
}
}
} else {
int pos = req->reply_body - req->sbuf;
req->sbuf = g_realloc(req->sbuf, req->sblen + len + 1);
memcpy(req->sbuf + req->sblen, buffer, len);
req->bytes_read += len;
req->sblen += len;
req->sbuf[req->sblen] = '\0';
req->reply_body = req->sbuf + pos;
req->body_size = req->sblen - pos;
}
if ((req->flags & HTTPC_STREAMING) && req->reply_body) {
req->func(req);
}
return CR_OK;
}
/* Splits headers and body. Checks result code, in case of 300s it'll handle
redirects. If this returns FALSE, don't call any callbacks! */
static gboolean http_handle_headers(struct http_request *req)
{
char *end1, *end2, *s;
int evil_server = 0;
/* Zero termination is very convenient. */
req->reply_headers[req->bytes_read] = '\0';
/* Find the separation between headers and body, and keep stupid
webservers in mind. */
end1 = strstr(req->reply_headers, "\r\n\r\n");
end2 = strstr(req->reply_headers, "\n\n");
if (end2 && end2 < end1) {
end1 = end2 + 1;
evil_server = 1;
} else if (end1) {
end1 += 2;
} else {
req->status_string = g_strdup("Malformed HTTP reply");
return TRUE;
}
*end1 = '\0';
if (getenv("BITLBEE_DEBUG")) {
printf("HTTP response headers:\n%s\n", req->reply_headers);
}
if (evil_server) {
req->reply_body = end1 + 1;
} else {
req->reply_body = end1 + 2;
}
/* Separately allocated space for headers and body. */
req->sblen = req->body_size = req->reply_headers + req->bytes_read - req->reply_body;
req->sbuf = req->reply_body = g_memdup(req->reply_body, req->body_size + 1);
req->reply_headers = g_realloc(req->reply_headers, end1 - req->reply_headers + 1);
if ((end1 = strchr(req->reply_headers, ' ')) != NULL) {
if (sscanf(end1 + 1, "%hd", &req->status_code) != 1) {
req->status_string = g_strdup("Can't parse status code");
req->status_code = -1;
} else {
char *eol;
if (evil_server) {
eol = strchr(end1, '\n');
} else {
eol = strchr(end1, '\r');
}
req->status_string = g_strndup(end1 + 1, eol - end1 - 1);
/* Just to be sure... */
if ((eol = strchr(req->status_string, '\r'))) {
*eol = 0;
}
if ((eol = strchr(req->status_string, '\n'))) {
*eol = 0;
}
}
} else {
req->status_string = g_strdup("Can't locate status code");
req->status_code = -1;
}
if (((req->status_code >= 301 && req->status_code <= 303) ||
req->status_code == 307) && req->redir_ttl-- > 0) {
char *loc, *new_request, *new_host;
int error = 0, new_port, new_proto;
/* We might fill it again, so let's not leak any memory. */
g_free(req->status_string);
req->status_string = NULL;
loc = strstr(req->reply_headers, "\nLocation: ");
if (loc == NULL) { /* We can't handle this redirect... */
req->status_string = g_strdup("Can't locate Location: header");
return TRUE;
}
loc += 11;
while (*loc == ' ') {
loc++;
}
/* TODO/FIXME: Possibly have to handle relative redirections,
and rewrite Host: headers. Not necessary for now, it's
enough for passport authentication like this. */
if (*loc == '/') {
/* Just a different pathname... */
/* Since we don't cache the servername, and since we
don't need this yet anyway, I won't implement it. */
req->status_string = g_strdup("Can't handle relative redirects");
return TRUE;
} else {
/* A whole URL */
url_t *url;
char *s, *version, *headers;
const char *new_method;
s = strstr(loc, "\r\n");
if (s == NULL) {
return TRUE;
}
url = g_new0(url_t, 1);
*s = 0;
if (!url_set(url, loc)) {
req->status_string = g_strdup("Malformed redirect URL");
g_free(url);
return TRUE;
}
/* Find all headers and, if necessary, the POST request contents.
Skip the old Host: header though. This crappy code here means
anything using this http_client MUST put the Host: header at
the top. */
if (!((s = strstr(req->request, "\r\nHost: ")) &&
(s = strstr(s + strlen("\r\nHost: "), "\r\n")))) {
req->status_string = g_strdup("Error while rebuilding request string");
g_free(url);
return TRUE;
}
headers = s;
/* More or less HTTP/1.0 compliant, from my reading of RFC 2616.
Always perform a GET request unless we received a 301. 303 was
meant for this but it's HTTP/1.1-only and we're specifically
speaking HTTP/1.0. ...
Well except someone at identi.ca's didn't bother reading any
RFCs and just return HTTP/1.1-specific status codes to HTTP/1.0
requests. Fuckers. So here we are, handle 301..303,307. */
if (strncmp(req->request, "GET", 3) == 0) {
/* GETs never become POSTs. */
new_method = "GET";
} else if (req->status_code == 302 || req->status_code == 303) {
/* 302 de-facto becomes GET, 303 as specified by RFC 2616#10.3.3 */
new_method = "GET";
} else {
/* 301 de-facto should stay POST, 307 specifally RFC 2616#10.3.8 */
new_method = "POST";
}
if ((version = strstr(req->request, " HTTP/")) &&
(s = strstr(version, "\r\n"))) {
version++;
version = g_strndup(version, s - version);
} else {
version = g_strdup("HTTP/1.0");
}
/* Okay, this isn't fun! We have to rebuild the request... :-( */
new_request = g_strdup_printf("%s %s %s\r\nHost: %s%s",
new_method, url->file, version,
url->host, headers);
new_host = g_strdup(url->host);
new_port = url->port;
new_proto = url->proto;
/* If we went from POST to GET, truncate the request content. */
if (new_request[0] != req->request[0] && new_request[0] == 'G' &&
(s = strstr(new_request, "\r\n\r\n"))) {
s[4] = '\0';
}
g_free(url);
g_free(version);
}
if (req->ssl) {
ssl_disconnect(req->ssl);
} else {
closesocket(req->fd);
}
req->fd = -1;
req->ssl = NULL;
if (getenv("BITLBEE_DEBUG")) {
printf("New headers for redirected HTTP request:\n%s\n", new_request);
}
if (new_proto == PROTO_HTTPS) {
req->ssl = ssl_connect(new_host, new_port, TRUE, http_ssl_connected, req);
if (req->ssl == NULL) {
error = 1;
}
} else {
req->fd = proxy_connect(new_host, new_port, http_connected, req);
if (req->fd < 0) {
error = 1;
}
}
g_free(new_host);
if (error) {
req->status_string = g_strdup("Connection problem during redirect");
g_free(new_request);
return TRUE;
}
g_free(req->request);
g_free(req->reply_headers);
g_free(req->sbuf);
req->request = new_request;
req->request_length = strlen(new_request);
req->bytes_read = req->bytes_written = req->inpa = 0;
req->reply_headers = req->reply_body = NULL;
req->sbuf = req->cbuf = NULL;
req->sblen = req->cblen = 0;
return FALSE;
}
if ((s = get_rfc822_header(req->reply_headers, "Content-Length", 0)) &&
sscanf(s, "%d", &req->content_length) != 1) {
req->content_length = -1;
}
g_free(s);
if ((s = get_rfc822_header(req->reply_headers, "Transfer-Encoding", 0))) {
if (strcasestr(s, "chunked")) {
req->flags |= HTTPC_CHUNKED;
req->cbuf = req->sbuf;
req->cblen = req->sblen;
req->reply_body = req->sbuf = g_strdup("");
req->body_size = req->sblen = 0;
}
g_free(s);
}
return TRUE;
}
void http_flush_bytes(struct http_request *req, size_t len)
{
if (len <= 0 || len > req->body_size || !(req->flags & HTTPC_STREAMING)) {
return;
}
req->reply_body += len;
req->body_size -= len;
if (req->reply_body - req->sbuf >= 512) {
char *new = g_memdup(req->reply_body, req->body_size + 1);
g_free(req->sbuf);
req->reply_body = req->sbuf = new;
req->sblen = req->body_size;
}
}
void http_close(struct http_request *req)
{
if (!req) {
return;
}
if (req->inpa > 0) {
b_event_remove(req->inpa);
}
if (req->ssl) {
ssl_disconnect(req->ssl);
} else {
closesocket(req->fd);
}
http_free(req);
}
static void http_free(struct http_request *req)
{
g_free(req->request);
g_free(req->reply_headers);
g_free(req->status_string);
g_free(req->sbuf);
g_free(req->cbuf);
g_free(req);
}