diff options
-rw-r--r-- | tileserver/cdb.c | 444 | ||||
-rw-r--r-- | tileserver/cdb.h | 56 | ||||
-rwxr-xr-x | tileserver/cdbtestrun | 149 |
3 files changed, 649 insertions, 0 deletions
diff --git a/tileserver/cdb.c b/tileserver/cdb.c new file mode 100644 index 000000000..a669bf91c --- /dev/null +++ b/tileserver/cdb.c @@ -0,0 +1,444 @@ +/* + * cdb.c: + * Read data from Dan-Bernstein-style CDB files. + * + * See: http://cr.yp.to/cdb/cdb.txt + * + * Copyright (c) 2006 UK Citizens Online Democracy. All rights reserved. + * Email: chris@mysociety.org; WWW: http://www.mysociety.org/ + * + */ + +static const char rcsid[] = "$Id: cdb.c,v 1.1 2006-09-19 18:26:35 chris Exp $"; + +#include <sys/types.h> + +#include <errno.h> +#include <stdbool.h> +#include <stdint.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include <sys/stat.h> + +#define CDB_IMPL +#include "cdb.h" + +#ifdef VERBOSE_DEBUGGING +# define debug(...) \ + do { \ + fprintf(stderr, __VA_ARGS__); \ + fprintf(stderr, "\n"); \ + } while (0) +#else +# define debug(...) do { } while (0) +#endif /* VERBOSE_DEBUGGING */ + +/* struct cdb + * Internals of CDB object. */ +struct cdb { + FILE *c_fp; + struct stat c_st; + bool c_close_on_destroy; + struct { + uint32_t off, len; + } c_hashlocs[256]; +}; + +/* Length of the initial portion of the file which gives the hash table + * locations. */ +#define HASHPTR_LEN (256 * 8) + +static size_t do_fread(FILE *fp, void *buf, const size_t len) { + return fread(buf, 1, len, fp); +} + +cdb_hash_t cdb_hash(const unsigned char *buf, const size_t len) { + uint32_t h = 5381; + size_t i; + for (i = 0; i < len; ++i) { + h = ((h << 5) + h) ^ buf[i]; + } + return h; +} + +cdb_hash_t cdb_hash_str(const char *s) { + return cdb_hash((unsigned char*)s, strlen(s)); +} + +cdb_hash_t cdb_hash_datum(const cdb_datum d) { + return cdb_hash(d->cd_buf, d->cd_len); +} + +/* cdb_open_fp FP + * Open a CDB file for which a stdio file pointer FP is available. Returns a + * cdb object on success or NULL on failure, setting cdb_errno. */ +cdb cdb_open_fp(FILE *fp) { + struct cdb *C = NULL, Cz = {0}; + unsigned char buf[HASHPTR_LEN]; + struct stat st; + int i; + +#define FAIL(e) do { cdb_errno = e; goto fail; } while (0) + + if (-1 == fstat(fileno(fp), &st)) + FAIL(errno); + + if (st.st_size < HASHPTR_LEN) + FAIL(CDB_FILE_TOO_SMALL); + + if (!(C = malloc(sizeof *C))) + FAIL(CDB_OUT_OF_MEMORY); + + if (st.st_size > 0xffffffffll) + FAIL(CDB_FILE_TOO_BIG); + + *C = Cz; + C->c_fp = fp; + C->c_st = st; + + if (HASHPTR_LEN != do_fread(fp, buf, HASHPTR_LEN)) { + if (feof(fp)) + FAIL(CDB_FILE_TRUNCATED); + else + FAIL(errno); + } + + for (i = 0; i < 256; ++i) { + memcpy(&C->c_hashlocs[i].off, buf + 8 * i, 4); + memcpy(&C->c_hashlocs[i].len, buf + 8 * i + 4, 4); + /* byte ordering -- CDB is defined as a little-endian format, so + * this is fine on i386, but not elsewhere. */ + /* NB len is in slots not bytes */ + if (C->c_hashlocs[i].off < HASHPTR_LEN + || C->c_hashlocs[i].off > C->c_st.st_size + || C->c_st.st_size - C->c_hashlocs[i].off < C->c_hashlocs[i].len * 8) + FAIL(CDB_BAD_HASHLOC_PTR); + } + + return C; + +fail: + free(C); + return NULL; + +#undef FAIL +} + +/* yuk */ +#define ALIGNMENT 4 /* __alignof(long double) on i386 */ +typedef uint32_t ptr_int_t; /* XXX needs changing on 64-bit architectures */ + +/* cdb_datum_alloc LEN + * Allocate space for a single cdb_datum holding up to LEN bytes. Free with + * cdb_datum_free. Returns the newly-allocated datum on success or NULL on + * failure (out of memory). */ +cdb_datum cdb_datum_alloc(const size_t len) { + cdb_datum d; + if (!(d = malloc((sizeof *d) + ALIGNMENT + len))) { + cdb_errno = CDB_OUT_OF_MEMORY; + return NULL; + } + d->cd_buf = (void*)(((ptr_int_t)d + (sizeof *d) + ALIGNMENT) & ~(ALIGNMENT - 1)); + d->cd_len = len; + return d; +} + +/* cdb_datum_free D + * Free storage associated with D. */ +void cdb_datum_free(cdb_datum d) { + if (d) free(d); +} + +/* cdb_open FILE + * Open the named CDB FILE, returning a cdb object on success or NULL on + * failure, setting cdb_errno. */ +cdb cdb_open(const char *name) { + cdb C = NULL; + FILE *fp; + if (!(fp = fopen(name, "rb"))) { + cdb_errno = errno; + return NULL; + } else if (!(C = cdb_open_fp(fp))) + fclose(fp); + else + C->c_close_on_destroy = 1; + return C; +} + +/* cdb_close C + * Free storage associated with C, and, if it was opened by cdb_open, also + * close the associated file pointer. */ +void cdb_close(cdb C) { + if (!C) return; + if (C->c_close_on_destroy) + fclose(C->c_fp); + free(C); +} + +/* get_slot C OFFSET SLOT HASH WHERE + * Save in *HASH and *WHERE the hash value and offset in the file pointed to by + * the indexed SLOT in the hash table beginning at OFFSET. Returns 0 on success + * or an error code on failure. */ +static cdb_result_t get_slot(cdb C, const uint32_t offset, const uint32_t slot, + cdb_hash_t *hash, uint32_t *where) { + unsigned char buf[8]; + + if (-1 == fseek(C->c_fp, offset + 8 * slot, SEEK_SET)) + return errno; + + if (8 != do_fread(C->c_fp, buf, 8)) { + if (feof(C->c_fp)) + return CDB_FILE_TRUNCATED; + else + return errno; + } + + memcpy(hash, buf, 4); + memcpy(where, buf + 4, 4); + + return 0; +} + +/* cdb_get C KEY + * Look up the database entry identified by KEY. Returns the data retrieved on + * success or NULL on failure, setting cdb_errno. The returned data should be + * freed with cdb_datum_free. */ +/* XXX add a mode where caller supplies storage */ +cdb_datum cdb_get(cdb C, const cdb_datum key) { + cdb_hash_t h, h8, sh; + uint32_t slot0, slot; + cdb_datum val = NULL; + +#define FAIL(e) do { cdb_errno = e; goto fail; } while (0) + + if (key->cd_len > C->c_st.st_size) + FAIL(CDB_NOT_FOUND); + + h = cdb_hash_datum(key); + h8 = h & 0xff; + + debug("key len = %u", (unsigned)key->cd_len); + debug("hash = %08x, hash255 = %02x", (unsigned)h, (unsigned)h8); + + if (!C->c_hashlocs[h8].off || !C->c_hashlocs[h8].len) + FAIL(CDB_NOT_FOUND); + + debug("hash table %u starts at offset %u and has %u slots", + (unsigned)h8, + (unsigned)C->c_hashlocs[h8].off, + (unsigned)C->c_hashlocs[h8].len); + + slot = slot0 = (h >> 8) % C->c_hashlocs[h8].len; + + debug(" %06x %% %u = %u", (unsigned)(h >> 8), C->c_hashlocs[h8].len, (unsigned)slot); + + do { + unsigned char buf[8]; + uint32_t where, keylen, vallen; + cdb_result_t e; + + debug(" looking in slot %u", (unsigned)slot); + + if ((e = get_slot(C, C->c_hashlocs[h8].off, slot, &sh, &where))) + FAIL(e); + + debug(" hash = %08x, offset = %u", (unsigned)sh, (unsigned)where); + + if (sh == h) { + if (0 == where) + FAIL(CDB_NOT_FOUND); + + /* Have a potential slot. Grab the key and value length. */ + if (-1 == fseek(C->c_fp, where, SEEK_SET)) + FAIL(errno); + else if (8 != do_fread(C->c_fp, buf, 8)) { + if (feof(C->c_fp)) + FAIL(CDB_FILE_TRUNCATED); + else + FAIL(errno); + } + + memcpy(&keylen, buf, 4); + memcpy(&vallen, buf + 4, 4); + + debug(" key len = %u, val len = %u", + (unsigned)keylen, (unsigned)vallen); + + if (keylen == key->cd_len) { + size_t i; + for (i = 0; i < key->cd_len; ++i) { + int c; + if (EOF == (c = getc(C->c_fp))) { + if (feof(C->c_fp)) + FAIL(CDB_FILE_TRUNCATED); + else + FAIL(errno); + } else if (c != (int)(((unsigned char*)key->cd_buf)[i])) + break; + } + + if (i == key->cd_len) { + /* Got it. */ + if (!(val = cdb_datum_alloc(vallen))) + FAIL(CDB_OUT_OF_MEMORY); + else if (val->cd_len != do_fread(C->c_fp, val->cd_buf, + val->cd_len)) { + if (feof(C->c_fp)) + FAIL(CDB_FILE_TRUNCATED); + else + FAIL(errno); + } else + return val; + } + } + } + + slot = (slot + 1) % C->c_hashlocs[h8].len; + } while (slot != slot0); + + cdb_errno = CDB_NOT_FOUND; + +fail: + if (val) cdb_datum_free(val); + return NULL; + +#undef FAIL +} + +/* cdb_strerror E + * Return the text of the error message corresponding to E. */ +char *cdb_strerror(const cdb_result_t e) { + if (e > 0) + return strerror(e); + else if (e == 0) + return "Success"; + else { + switch (e) { + case CDB_OUT_OF_MEMORY: + return "Out of memory"; + case CDB_FILE_TOO_SMALL: + return "File is too small to be a valid CDB file"; + case CDB_FILE_TOO_BIG: + return "File is too large to be a valid CDB file"; + case CDB_BAD_HASHLOC_PTR: + return "Bad hash-table location pointer in CDB file header"; + case CDB_BAD_RECORD_PTR: + return "Bad record location pointer in CDB hash table"; + case CDB_NOT_FOUND: + return "Record not found in CDB file"; + default: + return "Unknown CDB internal error code"; + } + } +} + +#ifdef CDB_TEST_PROGRAM + +/* + * Little test program -- reads keys as netstrings on standard input, and + * writes on standard output either "X" for not found, or netstrings giving the + * values of those keys. + */ + +#include <ctype.h> + +#define err(...) \ + do { \ + fprintf(stderr, "cdbtest: "); \ + fprintf(stderr, __VA_ARGS__); \ + fprintf(stderr, "\n"); \ + } while (0) +#define die(...) do { err(__VA_ARGS__); exit(1); } while (0) + +static cdb_datum netstring_read(FILE *fp) { + unsigned int len = 0; + int c; + cdb_datum d; + +#define FAIL(what) \ + do { \ + if (feof(fp)) \ + die("%s: Premature EOF", what); \ + else \ + die("%s: %s", what, strerror(errno)); \ + } while (0) + + while (EOF != (c = getc(fp))) { + if (isdigit(c)) + len = 10 * len + c - '0'; + else if (c == ':') + break; + else + die("bad character '%c' in netstring length", c); + } + + if (feof(fp) || ferror(fp)) + FAIL("while reading netstring length"); + + if (!(d = cdb_datum_alloc(len))) + die("while reading netstring: cdb_datum_alloc(%u): %s", + len, cdb_strerror(cdb_errno)); + + if (d->cd_len != do_fread(fp, d->cd_buf, d->cd_len)) + FAIL("while reading netstring data"); + + if (EOF == (c = getc(fp))) { + if (feof(fp)) + die("while reading netstring trailer: Premature EOF"); + else + die("while reading netstring trailer: %s", strerror(errno)); + } + + return d; +} + +void netstring_write(FILE *fp, const cdb_datum d) { + fprintf(fp, "%u:", (unsigned)d->cd_len); + if (d->cd_len != fwrite(d->cd_buf, 1, d->cd_len, fp)) + die("while writing netstring value: %s", strerror(errno)); + if (1 != fprintf(fp, ",")) + die("while writing netstring trailer: %s", strerror(errno)); +} + +/* main ARGC ARGV + * Entry point. */ +int main(int argc, char *argv[]) { + cdb C; + if (argc != 2) + die("single argument should be name of CDB file"); + else if (!(C = cdb_open(argv[1]))) + die("%s: %s", argv[1], cdb_strerror(cdb_errno)); + + while (1) { + cdb_datum key, val; + int c; + + c = getc(stdin); + if ('X' == c) + break; + else + ungetc(c, stdin); + key = netstring_read(stdin); + + if (!(val = cdb_get(C, key))) { + if (CDB_NOT_FOUND == cdb_errno) + putc('X', stdout); + else + die("cdb_get: %s", cdb_strerror(cdb_errno)); + } else { + netstring_write(stdout, val); + cdb_datum_free(val); + } + fflush(stdout); + + cdb_datum_free(key); + } + + cdb_close(C); + + return 0; +} + +#endif /* CDB_TEST_PROGRAM */ diff --git a/tileserver/cdb.h b/tileserver/cdb.h new file mode 100644 index 000000000..b48e89845 --- /dev/null +++ b/tileserver/cdb.h @@ -0,0 +1,56 @@ +/* + * cdb.h: + * Interface to Dan-Bernstein-style CDB files. + * + * Copyright (c) 2006 UK Citizens Online Democracy. All rights reserved. + * Email: chris@mysociety.org; WWW: http://www.mysociety.org/ + * + * $Id: cdb.h,v 1.1 2006-09-19 18:26:35 chris Exp $ + * + */ + +#ifndef __CDB_H_ /* include guard */ +#define __CDB_H_ + +#include <sys/types.h> +#include <stdint.h> + +typedef int cdb_result_t; +typedef uint32_t cdb_hash_t; + +typedef struct cdb_datum { + void *cd_buf; + size_t cd_len; +} *cdb_datum; + +typedef struct cdb *cdb; + +/* error codes */ +#define CDB_OUT_OF_MEMORY -1 +#define CDB_FILE_TOO_SMALL -2 +#define CDB_FILE_TOO_BIG -3 +#define CDB_FILE_TRUNCATED -4 + /* one of the initial 256 pointers pointed outside the file */ +#define CDB_BAD_HASHLOC_PTR -5 + /* a datum pointer pointed outside the file or otherwise somewhere bogus */ +#define CDB_BAD_RECORD_PTR -6 + /* the record wasn't found */ +#define CDB_NOT_FOUND -7 + +#ifndef CDB_IMPL +extern +#endif /* CDB_IMPL */ + cdb_result_t cdb_errno; /* XXX threads */ + +/* cdb.c */ +cdb_hash_t cdb_hash(const unsigned char *buf, const size_t len); +cdb_hash_t cdb_hashz(const char *s); +cdb_hash_t cdb_hash_datum(const cdb_datum d); +cdb cdb_open_fp(FILE *fp); +cdb_datum cdb_datum_alloc(const size_t len); +void cdb_datum_free(cdb_datum d); +cdb cdb_open(const char *name); +cdb_datum cdb_get(cdb C, const cdb_datum key); +char *cdb_strerror(const cdb_result_t e); + +#endif /* __CDB_H_ */ diff --git a/tileserver/cdbtestrun b/tileserver/cdbtestrun new file mode 100755 index 000000000..a83c23e60 --- /dev/null +++ b/tileserver/cdbtestrun @@ -0,0 +1,149 @@ +#!/usr/bin/perl -w +# +# cdbtestrun: +# Run the CDB test program. +# +# Copyright (c) 2006 UK Citizens Online Democracy. All rights reserved. +# Email: chris@mysociety.org; WWW: http://www.mysociety.org/ +# + +my $rcsid = ''; $rcsid .= '$Id: cdbtestrun,v 1.1 2006-09-19 18:26:35 chris Exp $'; + +use strict; + +use CDB_File; +use IO::Pipe; +use POSIX qw(); + +sub random_string ($) { + my $len = shift; + my $x = ''; + while (length($x) < $len) { + $x .= pack('N', int(rand(0xffffffff))); + } + return substr($x, 0, $len); +} + +my %h; +my $C = new CDB_File("test.cdb", "test.tmp") + or die "test.tmp: $!"; + +my $nkeys = shift(@ARGV); +$nkeys ||= 100000; +die "'$nkeys' is not a valid number of keys" if ($nkeys !~ /^[1-9]\d*$/); + +for (my $i = 0; $i < $nkeys; ++$i) { + my $key; + do { + $key = random_string(1 + int(rand(16))); + } while (exists($h{$key})); + my $val = random_string(int(rand(100))); + + $h{$key} = $val; + $C->insert($key, $val); + + printf STDERR "\rwriting keys: %d/%d", $i, $nkeys + if (0 == ($i % 100)); +} + +print STDERR "\rwriting keys: $nkeys/$nkeys\n"; +print STDERR "finalising database... "; +$C->finish(); +print STDERR "done\n"; + +# Make a pipe to the test program and fork it. +my $p1 = new IO::Pipe(); # parent writes to this +my $p2 = new IO::Pipe(); # and reads from this. + +my $pid = fork(); +if (!defined($pid)) { + die "fork: $!"; +} elsif (0 == $pid) { + $p1->reader(); + POSIX::close(0); + POSIX::dup($p1->fileno()); + $p2->writer(); + POSIX::close(1); + POSIX::dup($p2->fileno()); + +# { exec("valgrind", "./cdbtest", "test.cdb"); } + { exec("./cdbtest", "test.cdb"); } + + print STDERR "exec ./cdbtest: $!\n"; + POSIX::_exit(255); +} + +sub netstring_read ($) { + my $h = shift; + my $len = 0; + while (defined(my $c = $h->getc())) { + if ($c eq 'X' && $len == 0) { + return undef; + } elsif ($c eq ':') { + last; + } elsif ($c !~ /^\d$/) { + die "bad character '$c' in netstring length"; + } else { + $len = $len * 10 + ord($c) - ord('0'); + } + } + + my $buf = ''; + $h->read($buf, $len, 0); + + if ($h->getc() ne ',') { + die "bad character at netstring trailer"; + } + + return $buf; +} + +sub netstring_write ($$) { + my $h = shift; + $h->printf('%d:', length($_[0])); + $h->print($_[0]); + $h->print(','); + $h->flush(); +} + +$p1->writer(); +$p2->reader(); + +print STDERR "child pid = $pid... press enter to continue\n"; +<STDIN>; + +my $n = 0; +foreach my $key (keys(%h)) { + if (rand() < 0.05) { + my $key; + do { + $key = random_string(1 + int(rand(16))); + } while (exists($h{$key})); + netstring_write($p1, $key); + my $val = netstring_read($p2); + if (defined($val)) { + die "test code said value present when it wasn't"; + } + } + + netstring_write($p1, $key); + my $val = netstring_read($p2); + if (!defined($val)) { + die "test code said val was not present when it was"; + } elsif ($val ne $h{$key}) { + print "GOT=$val\n"; + print "WANTED=$val\n"; + die "value mismatch"; + } + + ++$n; + printf STDERR "\rtesting %d/%d", $n, $nkeys + if (0 == ($n % 100)); +} +print STDERR "\rtesting $n/$nkeys\n"; + +$p1->write('X'); +$p1->flush(); +wait(); + +print STDERR "completed with no errors\n"; |