diff --git a/A1/Makefile b/A1/Makefile new file mode 100644 index 0000000..eb629c4 --- /dev/null +++ b/A1/Makefile @@ -0,0 +1,46 @@ +CC=gcc +CFLAGS=-Wall -Wextra -pedantic -std=gnu99 -g +LDFLAGS=-lm +PROGRAMS=random_ids id_query_naive coord_query_naive +TESTS=.. + +.PHONY: all test clean ../src.zip + +all: $(PROGRAMS) + +random_ids: random_ids.o record.o + gcc -o $@ $^ $(LDFLAGS) + +id_query_%: id_query_%.o record.o id_query.o + gcc -o $@ $^ $(LDFLAGS) + +coord_query_%: coord_query_%.o record.o coord_query.o + gcc -o $@ $^ $(LDFLAGS) + +id_query.o: id_query.c + $(CC) -c $< $(CFLAGS) + +coord_query.o: coord_query.c + $(CC) -c $< $(CFLAGS) + +record.o: record.c + $(CC) -c $< $(CFLAGS) + +sort.o: sort.c + $(CC) -c $< $(CFLAGS) + +test: $(TESTS) + @set e; for test in $(TESTS); do echo ./$$test; ./$$test; done + +clean: + rm -rf core *.o $(PROGRAMS) + +planet-latest-geonames.tsv: + wget https://github.com/OSMNames/OSMNames/releases/download/v2.0.4/planet-latest_geonames.tsv.gz + gunzip planet-latest_geonames.tsv.gz + +../src.zip: + make clean + cd .. && zip src.zip -r src + +.SECONDARY: diff --git a/A1/coord_query.c b/A1/coord_query.c new file mode 100644 index 0000000..465904c --- /dev/null +++ b/A1/coord_query.c @@ -0,0 +1,64 @@ +#include +#include +#include +#include +#include + +#include "coord_query.h" +#include "timing.h" + +int coord_query_loop(int argc, char** argv, mk_index_fn mk_index, free_index_fn free_index, lookup_fn lookup) { + if (argc != 2) { + fprintf(stderr, "Usage: %s FILE\n", argv[0]); + exit(1); + } + + uint64_t start, runtime; + int n; + + start = microseconds(); + struct record *rs = read_records(argv[1], &n); + runtime = microseconds()-start; + + if (rs) { + printf("Reading records: %dms\n", (int)runtime/1000); + + start = microseconds(); + void *index = mk_index(rs, n); + runtime = microseconds()-start; + printf("Building index: %dms\n", (int)runtime/1000); + + char *line = NULL; + size_t line_len; + + uint64_t runtime_sum = 0; + while (getline(&line, &line_len, stdin) != -1) { + double lon, lat; + sscanf(line, "%lf %lf", &lon, &lat); + + start = microseconds(); + const struct record *r = lookup(index, lon, lat); + runtime = microseconds()-start; + + if (r) { + printf("(%f,%f): %s (%f,%f)\n", lon, lat, r->name, r->lon, r->lat); + } else { + printf("(%f,%f): not found\n", lon, lat); + } + + printf("Query time: %dus\n", (int)runtime); + runtime_sum += runtime; + } + + printf("Total query runtime: %dus\n", (int)runtime_sum); + + free(line); + free_index(index); + free_records(rs, n); + return 0; + } else { + fprintf(stderr, "Failed to read input from %s (errno: %s)\n", + argv[1], strerror(errno)); + return 1; + } +} diff --git a/A1/coord_query.h b/A1/coord_query.h new file mode 100644 index 0000000..b7ab80e --- /dev/null +++ b/A1/coord_query.h @@ -0,0 +1,16 @@ +// Similar to id_query.h. See the comments there. + +#ifndef COORD_QUERY_LOOP_H +#define COORD_QUERY_LOOP_H + +#include "record.h" + +typedef void* (*mk_index_fn)(const struct record*, int); + +typedef void (*free_index_fn)(void*); + +typedef const struct record* (*lookup_fn)(void*, double, double); + +int coord_query_loop(int argc, char** argv, mk_index_fn, free_index_fn, lookup_fn); + +#endif diff --git a/A1/coord_query_naive.c b/A1/coord_query_naive.c new file mode 100644 index 0000000..dc5ed4f --- /dev/null +++ b/A1/coord_query_naive.c @@ -0,0 +1,37 @@ +#include +#include +#include +#include +#include +#include +#include + +#include "record.h" +#include "coord_query.h" + +struct naive_data { + struct record *rs; + int n; +}; + +struct naive_data* mk_naive(struct record* rs, int n) { + assert(0); + // TODO +} + +void free_naive(struct naive_data* data) { + assert(0); + // TODO +} + +const struct record* lookup_naive(struct naive_data *data, double lon, double lat) { + assert(0); + // TODO +} + +int main(int argc, char** argv) { + return coord_query_loop(argc, argv, + (mk_index_fn)mk_naive, + (free_index_fn)free_naive, + (lookup_fn)lookup_naive); +} diff --git a/A1/id_query.c b/A1/id_query.c new file mode 100644 index 0000000..2ca3ca8 --- /dev/null +++ b/A1/id_query.c @@ -0,0 +1,63 @@ +#include +#include +#include +#include +#include + +#include "id_query.h" +#include "timing.h" + +int id_query_loop(int argc, char** argv, mk_index_fn mk_index, free_index_fn free_index, lookup_fn lookup) { + if (argc != 2) { + fprintf(stderr, "Usage: %s FILE\n", argv[0]); + exit(1); + } + + uint64_t start, runtime; + int n; + + start = microseconds(); + struct record *rs = read_records(argv[1], &n); + runtime = microseconds()-start; + + if (rs) { + printf("Reading records: %dms\n", (int)runtime/1000); + + start = microseconds(); + void *index = mk_index(rs, n); + runtime = microseconds()-start; + printf("Building index: %dms\n", (int)runtime/1000); + + char *line = NULL; + size_t line_len; + + uint64_t runtime_sum = 0; + while (getline(&line, &line_len, stdin) != -1) { + int64_t needle = atol(line); + + start = microseconds(); + const struct record *r = lookup(index, needle); + runtime = microseconds()-start; + + if (r) { + printf("%ld: %s %f %f\n", (long)needle, r->name, r->lon, r->lat); + } else { + printf("%ld: not found\n", (long)needle); + } + + printf("Query time: %dus\n", (int)runtime); + runtime_sum += runtime; + } + + printf("Total query runtime: %dus\n", (int)runtime_sum); + + free(line); + free_index(index); + free_records(rs, n); + return 0; + } else { + fprintf(stderr, "Failed to read input from %s (errno: %s)\n", + argv[1], strerror(errno)); + return 1; + } +} diff --git a/A1/id_query.h b/A1/id_query.h new file mode 100644 index 0000000..32f2048 --- /dev/null +++ b/A1/id_query.h @@ -0,0 +1,38 @@ +// This file (along with its implementation id_query.c) abstracts out +// the user-facing part of the query programs. It implements the +// following algorithm: +// +// Records <- Read Dataset +// Index <- Produce Index From Records +// While Program Is Running: +// Read Query From User +// Lookup Query In Index +// Free Index +// +// Where the specifics of "Produce Index From Records", "Lookup Query +// In Index", and "Free Index" are provided via function pointers. +// This means we can write the main loop just once, and reuse it with +// different implementations of indexes. +// +// See the file id_query_naive.c for a usage example. + +#ifndef ID_QUERY_LOOP_H +#define ID_QUERY_LOOP_H + +#include "record.h" + +// A pointer to a function that produces an index, when called with an +// array of records and the size of the array. +typedef void* (*mk_index_fn)(const struct record*, int); + +// Freeing an array produced by a mk_index_fn. +typedef void (*free_index_fn)(void*); + +// Look up an ID in an index produced by mk_index_fn. +typedef const struct record* (*lookup_fn)(void*, int64_t); + +// Run a query loop, using the provided functions for managing the +// index. +int id_query_loop(int argc, char** argv, mk_index_fn, free_index_fn, lookup_fn); + +#endif diff --git a/A1/id_query_naive.c b/A1/id_query_naive.c new file mode 100644 index 0000000..3731396 --- /dev/null +++ b/A1/id_query_naive.c @@ -0,0 +1,37 @@ +#include +#include +#include +#include +#include +#include +#include + +#include "record.h" +#include "id_query.h" + +struct naive_data { + struct record *rs; + int n; +}; + +struct naive_data* mk_naive(struct record* rs, int n) { + // TODO + assert(0); +} + +void free_naive(struct naive_data* data) { + // TODO + assert(0); +} + +const struct record* lookup_naive(struct naive_data *data, int64_t needle) { + // TODO + assert(0); +} + +int main(int argc, char** argv) { + return id_query_loop(argc, argv, + (mk_index_fn)mk_naive, + (free_index_fn)free_naive, + (lookup_fn)lookup_naive); +} diff --git a/A1/random_ids.c b/A1/random_ids.c new file mode 100644 index 0000000..b43f22e --- /dev/null +++ b/A1/random_ids.c @@ -0,0 +1,25 @@ +#include +#include + +#include "record.h" + +int main(int argc, char** argv) { + if (argc != 2) { + fprintf(stderr, "Usage: %s FILE\n", argv[1]); + return 1; + } + + int n; + struct record* rs = read_records(argv[1], &n); + + if (!rs) { + fprintf(stderr, "Failed to read records from %s\n", argv[1]); + return 1; + } + + while (1) { + if (printf("%ld\n", (long)rs[rand() % n].osm_id) == 0) { + break; + } + } +} diff --git a/A1/record.c b/A1/record.c new file mode 100644 index 0000000..df12aa2 --- /dev/null +++ b/A1/record.c @@ -0,0 +1,174 @@ +#include "record.h" + +#include +#include +#include +#include + +// Sanity check to make sure we are reading the right kind of file. +int input_looks_ok(FILE *f) { + char *line = NULL; + size_t n; + if (getline(&line, &n, f) == -1) { + return -1; + } + + int ret; + if (strcmp(line, "name alternative_names osm_type osm_id class type lon lat place_rank importance street city county state country country_code display_name west south east north wikidata wikipedia housenumbers\n") == 0) { + ret = 1; + } else { + ret = 0; + } + + free(line); + return ret; +} + +// Read a single record from an open file. This is pretty tedious, as +// we handle each field explicitly. +int read_record(struct record *r, FILE *f) { + char *line = NULL; + size_t n; + if (getline(&line, &n, f) == -1) { + free(line); + return -1; + } + + r->line = line; + + char* start = line; + char* end; + + if ((end = strstr(start, "\t"))) { + r->name = start; *end = 0; start = end+1; + } + + if ((end = strstr(start, "\t"))) { + r->alternative_names = start; *end = 0; start = end+1; + } + + if ((end = strstr(start, "\t"))) { + r->osm_type = start; *end = 0; start = end+1; + } + + if ((end = strstr(start, "\t"))) { + r->osm_id = atol(start); *end = 0; start = end+1; + } + + if ((end = strstr(start, "\t"))) { + r->class = start; *end = 0; start = end+1; + } + + if ((end = strstr(start, "\t"))) { + r->type = start; *end = 0; start = end+1; + } + + if ((end = strstr(start, "\t"))) { + r->lon = atof(start); *end = 0; start = end+1; + } + + if ((end = strstr(start, "\t"))) { + r->lat = atof(start); *end = 0; start = end+1; + } + + if ((end = strstr(start, "\t"))) { + r->place_rank = atoi(start); *end = 0; start = end+1; + } + + if ((end = strstr(start, "\t"))) { + r->importance = atof(start); *end = 0; start = end+1; + } + + if ((end = strstr(start, "\t"))) { + r->street = start; *end = 0; start = end+1; + } + + if ((end = strstr(start, "\t"))) { + r->city = start; *end = 0; start = end+1; + } + + if ((end = strstr(start, "\t"))) { + r->county = start; *end = 0; start = end+1; + } + + if ((end = strstr(start, "\t"))) { + r->state = start; *end = 0; start = end+1; + } + + if ((end = strstr(start, "\t"))) { + r->country = start; *end = 0; start = end+1; + } + + if ((end = strstr(start, "\t"))) { + r->country_code = start; *end = 0; start = end+1; + } + + if ((end = strstr(start, "\t"))) { + r->display_name = start; *end = 0; start = end+1; + } + + if ((end = strstr(start, "\t"))) { + r->west = atof(start); *end = 0; start = end+1; + } + + if ((end = strstr(start, "\t"))) { + r->west = atof(start); *end = 0; start = end+1; + } + + if ((end = strstr(start, "\t"))) { + r->east = atof(start); *end = 0; start = end+1; + } + + if ((end = strstr(start, "\t"))) { + r->north = atof(start); *end = 0; start = end+1; + } + + if ((end = strstr(start, "\t"))) { + r->wikidata = start; *end = 0; start = end+1; + } + + if ((end = strstr(start, "\t"))) { + r->wikipedia = start; *end = 0; start = end+1; + } + + if ((end = strstr(start, "\t"))) { + r->housenumbers = start; *end = 0; start = end+1; + } + + return 0; +} + +struct record* read_records(const char *filename, int *n) { + FILE *f = fopen(filename, "r"); + *n = 0; + + if (f == NULL) { + return NULL; + } + + if (!input_looks_ok(f)) { + return NULL; + } + + int capacity = 100; + int i = 0; + struct record *rs = malloc(capacity * sizeof(struct record)); + while (read_record(&rs[i], f) == 0) { + i++; + if (i == capacity) { + capacity *= 2; + rs = realloc(rs, capacity * sizeof(struct record)); + } + } + + *n = i; + fclose(f); + return rs; +} + +void free_records(struct record *rs, int n) { + for (int i = 0; i < n; i++) { + free(rs[i].line); + } + free(rs); +} diff --git a/A1/record.h b/A1/record.h new file mode 100644 index 0000000..05ee9b5 --- /dev/null +++ b/A1/record.h @@ -0,0 +1,57 @@ +#ifndef RECORD_H +#define RECORD_H + +#include +#include + +// An OpenStreetMap place record. All the 'const char*' strings are +// pointers into the string stored in the 'line' field. This string +// is "owned" by the record, meaning that it is freed exactly when the +// record itself is freed. +// +// You don't need to worry about the meaning of these fields. The +// ones that matter are osm_id, lon, lat, and name. +struct record { + const char *name; + const char *alternative_names; + const char *osm_type; + int64_t osm_id; + const char *class; + const char *type; + double lon; + double lat; + int place_rank; + double importance; + const char *street; + const char *city; + const char *county; + const char *state; + const char *country; + const char *country_code; + const char *display_name; + double west; + double south; + double east; + double north; + const char *wikidata; + const char *wikipedia; + const char *housenumbers; + + // Not a real field - all the other char* elements are pointers into + // this memory, which we can pass to free(). + char *line; +}; + +// Read an OpenStreetMap place names dataset from a given file. On +// success, returns a pointer to the array of records read, and sets +// *n to the number of records. Returns NULL on failure. +// +// Expects lines of form: +// Index,Date,Open,High,Low,Close,AdjustedClose,Volume +struct record* read_records(const char *filename, int *n); + +// Free records returned by read_records(). The 'n' argument must +// correspond to the number of records, as produced by read_records(). +void free_records(struct record *r, int n); + +#endif diff --git a/A1/timing.h b/A1/timing.h new file mode 100644 index 0000000..9e63a2a --- /dev/null +++ b/A1/timing.h @@ -0,0 +1,12 @@ +#ifndef TIMING_H +#define TIMING_H + +#include + +static uint64_t microseconds() { + static struct timeval t; + gettimeofday(&t, NULL); + return ((uint64_t)t.tv_sec*1000000)+t.tv_usec; +} + +#endif