This commit is contained in:
Nikolaj
2021-09-27 12:36:29 +02:00
parent 4e879f3163
commit 7f22e73b91
11 changed files with 569 additions and 0 deletions

46
A1/Makefile Normal file
View File

@ -0,0 +1,46 @@
CC=gcc
CFLAGS=-Wall -Wextra -pedantic -std=gnu99 -g
LDFLAGS=-lm
PROGRAMS=random_ids id_query_naive coord_query_naive
TESTS=..
.PHONY: all test clean ../src.zip
all: $(PROGRAMS)
random_ids: random_ids.o record.o
gcc -o $@ $^ $(LDFLAGS)
id_query_%: id_query_%.o record.o id_query.o
gcc -o $@ $^ $(LDFLAGS)
coord_query_%: coord_query_%.o record.o coord_query.o
gcc -o $@ $^ $(LDFLAGS)
id_query.o: id_query.c
$(CC) -c $< $(CFLAGS)
coord_query.o: coord_query.c
$(CC) -c $< $(CFLAGS)
record.o: record.c
$(CC) -c $< $(CFLAGS)
sort.o: sort.c
$(CC) -c $< $(CFLAGS)
test: $(TESTS)
@set e; for test in $(TESTS); do echo ./$$test; ./$$test; done
clean:
rm -rf core *.o $(PROGRAMS)
planet-latest-geonames.tsv:
wget https://github.com/OSMNames/OSMNames/releases/download/v2.0.4/planet-latest_geonames.tsv.gz
gunzip planet-latest_geonames.tsv.gz
../src.zip:
make clean
cd .. && zip src.zip -r src
.SECONDARY:

64
A1/coord_query.c Normal file
View File

@ -0,0 +1,64 @@
#include <stdio.h>
#include <errno.h>
#include <stdint.h>
#include <string.h>
#include <stdlib.h>
#include "coord_query.h"
#include "timing.h"
int coord_query_loop(int argc, char** argv, mk_index_fn mk_index, free_index_fn free_index, lookup_fn lookup) {
if (argc != 2) {
fprintf(stderr, "Usage: %s FILE\n", argv[0]);
exit(1);
}
uint64_t start, runtime;
int n;
start = microseconds();
struct record *rs = read_records(argv[1], &n);
runtime = microseconds()-start;
if (rs) {
printf("Reading records: %dms\n", (int)runtime/1000);
start = microseconds();
void *index = mk_index(rs, n);
runtime = microseconds()-start;
printf("Building index: %dms\n", (int)runtime/1000);
char *line = NULL;
size_t line_len;
uint64_t runtime_sum = 0;
while (getline(&line, &line_len, stdin) != -1) {
double lon, lat;
sscanf(line, "%lf %lf", &lon, &lat);
start = microseconds();
const struct record *r = lookup(index, lon, lat);
runtime = microseconds()-start;
if (r) {
printf("(%f,%f): %s (%f,%f)\n", lon, lat, r->name, r->lon, r->lat);
} else {
printf("(%f,%f): not found\n", lon, lat);
}
printf("Query time: %dus\n", (int)runtime);
runtime_sum += runtime;
}
printf("Total query runtime: %dus\n", (int)runtime_sum);
free(line);
free_index(index);
free_records(rs, n);
return 0;
} else {
fprintf(stderr, "Failed to read input from %s (errno: %s)\n",
argv[1], strerror(errno));
return 1;
}
}

16
A1/coord_query.h Normal file
View File

@ -0,0 +1,16 @@
// Similar to id_query.h. See the comments there.
#ifndef COORD_QUERY_LOOP_H
#define COORD_QUERY_LOOP_H
#include "record.h"
typedef void* (*mk_index_fn)(const struct record*, int);
typedef void (*free_index_fn)(void*);
typedef const struct record* (*lookup_fn)(void*, double, double);
int coord_query_loop(int argc, char** argv, mk_index_fn, free_index_fn, lookup_fn);
#endif

37
A1/coord_query_naive.c Normal file
View File

@ -0,0 +1,37 @@
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/time.h>
#include <stdint.h>
#include <errno.h>
#include <assert.h>
#include "record.h"
#include "coord_query.h"
struct naive_data {
struct record *rs;
int n;
};
struct naive_data* mk_naive(struct record* rs, int n) {
assert(0);
// TODO
}
void free_naive(struct naive_data* data) {
assert(0);
// TODO
}
const struct record* lookup_naive(struct naive_data *data, double lon, double lat) {
assert(0);
// TODO
}
int main(int argc, char** argv) {
return coord_query_loop(argc, argv,
(mk_index_fn)mk_naive,
(free_index_fn)free_naive,
(lookup_fn)lookup_naive);
}

63
A1/id_query.c Normal file
View File

@ -0,0 +1,63 @@
#include <stdio.h>
#include <errno.h>
#include <stdint.h>
#include <string.h>
#include <stdlib.h>
#include "id_query.h"
#include "timing.h"
int id_query_loop(int argc, char** argv, mk_index_fn mk_index, free_index_fn free_index, lookup_fn lookup) {
if (argc != 2) {
fprintf(stderr, "Usage: %s FILE\n", argv[0]);
exit(1);
}
uint64_t start, runtime;
int n;
start = microseconds();
struct record *rs = read_records(argv[1], &n);
runtime = microseconds()-start;
if (rs) {
printf("Reading records: %dms\n", (int)runtime/1000);
start = microseconds();
void *index = mk_index(rs, n);
runtime = microseconds()-start;
printf("Building index: %dms\n", (int)runtime/1000);
char *line = NULL;
size_t line_len;
uint64_t runtime_sum = 0;
while (getline(&line, &line_len, stdin) != -1) {
int64_t needle = atol(line);
start = microseconds();
const struct record *r = lookup(index, needle);
runtime = microseconds()-start;
if (r) {
printf("%ld: %s %f %f\n", (long)needle, r->name, r->lon, r->lat);
} else {
printf("%ld: not found\n", (long)needle);
}
printf("Query time: %dus\n", (int)runtime);
runtime_sum += runtime;
}
printf("Total query runtime: %dus\n", (int)runtime_sum);
free(line);
free_index(index);
free_records(rs, n);
return 0;
} else {
fprintf(stderr, "Failed to read input from %s (errno: %s)\n",
argv[1], strerror(errno));
return 1;
}
}

38
A1/id_query.h Normal file
View File

@ -0,0 +1,38 @@
// This file (along with its implementation id_query.c) abstracts out
// the user-facing part of the query programs. It implements the
// following algorithm:
//
// Records <- Read Dataset
// Index <- Produce Index From Records
// While Program Is Running:
// Read Query From User
// Lookup Query In Index
// Free Index
//
// Where the specifics of "Produce Index From Records", "Lookup Query
// In Index", and "Free Index" are provided via function pointers.
// This means we can write the main loop just once, and reuse it with
// different implementations of indexes.
//
// See the file id_query_naive.c for a usage example.
#ifndef ID_QUERY_LOOP_H
#define ID_QUERY_LOOP_H
#include "record.h"
// A pointer to a function that produces an index, when called with an
// array of records and the size of the array.
typedef void* (*mk_index_fn)(const struct record*, int);
// Freeing an array produced by a mk_index_fn.
typedef void (*free_index_fn)(void*);
// Look up an ID in an index produced by mk_index_fn.
typedef const struct record* (*lookup_fn)(void*, int64_t);
// Run a query loop, using the provided functions for managing the
// index.
int id_query_loop(int argc, char** argv, mk_index_fn, free_index_fn, lookup_fn);
#endif

37
A1/id_query_naive.c Normal file
View File

@ -0,0 +1,37 @@
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/time.h>
#include <stdint.h>
#include <errno.h>
#include <assert.h>
#include "record.h"
#include "id_query.h"
struct naive_data {
struct record *rs;
int n;
};
struct naive_data* mk_naive(struct record* rs, int n) {
// TODO
assert(0);
}
void free_naive(struct naive_data* data) {
// TODO
assert(0);
}
const struct record* lookup_naive(struct naive_data *data, int64_t needle) {
// TODO
assert(0);
}
int main(int argc, char** argv) {
return id_query_loop(argc, argv,
(mk_index_fn)mk_naive,
(free_index_fn)free_naive,
(lookup_fn)lookup_naive);
}

25
A1/random_ids.c Normal file
View File

@ -0,0 +1,25 @@
#include <stdio.h>
#include <stdlib.h>
#include "record.h"
int main(int argc, char** argv) {
if (argc != 2) {
fprintf(stderr, "Usage: %s FILE\n", argv[1]);
return 1;
}
int n;
struct record* rs = read_records(argv[1], &n);
if (!rs) {
fprintf(stderr, "Failed to read records from %s\n", argv[1]);
return 1;
}
while (1) {
if (printf("%ld\n", (long)rs[rand() % n].osm_id) == 0) {
break;
}
}
}

174
A1/record.c Normal file
View File

@ -0,0 +1,174 @@
#include "record.h"
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <stdint.h>
// Sanity check to make sure we are reading the right kind of file.
int input_looks_ok(FILE *f) {
char *line = NULL;
size_t n;
if (getline(&line, &n, f) == -1) {
return -1;
}
int ret;
if (strcmp(line, "name alternative_names osm_type osm_id class type lon lat place_rank importance street city county state country country_code display_name west south east north wikidata wikipedia housenumbers\n") == 0) {
ret = 1;
} else {
ret = 0;
}
free(line);
return ret;
}
// Read a single record from an open file. This is pretty tedious, as
// we handle each field explicitly.
int read_record(struct record *r, FILE *f) {
char *line = NULL;
size_t n;
if (getline(&line, &n, f) == -1) {
free(line);
return -1;
}
r->line = line;
char* start = line;
char* end;
if ((end = strstr(start, "\t"))) {
r->name = start; *end = 0; start = end+1;
}
if ((end = strstr(start, "\t"))) {
r->alternative_names = start; *end = 0; start = end+1;
}
if ((end = strstr(start, "\t"))) {
r->osm_type = start; *end = 0; start = end+1;
}
if ((end = strstr(start, "\t"))) {
r->osm_id = atol(start); *end = 0; start = end+1;
}
if ((end = strstr(start, "\t"))) {
r->class = start; *end = 0; start = end+1;
}
if ((end = strstr(start, "\t"))) {
r->type = start; *end = 0; start = end+1;
}
if ((end = strstr(start, "\t"))) {
r->lon = atof(start); *end = 0; start = end+1;
}
if ((end = strstr(start, "\t"))) {
r->lat = atof(start); *end = 0; start = end+1;
}
if ((end = strstr(start, "\t"))) {
r->place_rank = atoi(start); *end = 0; start = end+1;
}
if ((end = strstr(start, "\t"))) {
r->importance = atof(start); *end = 0; start = end+1;
}
if ((end = strstr(start, "\t"))) {
r->street = start; *end = 0; start = end+1;
}
if ((end = strstr(start, "\t"))) {
r->city = start; *end = 0; start = end+1;
}
if ((end = strstr(start, "\t"))) {
r->county = start; *end = 0; start = end+1;
}
if ((end = strstr(start, "\t"))) {
r->state = start; *end = 0; start = end+1;
}
if ((end = strstr(start, "\t"))) {
r->country = start; *end = 0; start = end+1;
}
if ((end = strstr(start, "\t"))) {
r->country_code = start; *end = 0; start = end+1;
}
if ((end = strstr(start, "\t"))) {
r->display_name = start; *end = 0; start = end+1;
}
if ((end = strstr(start, "\t"))) {
r->west = atof(start); *end = 0; start = end+1;
}
if ((end = strstr(start, "\t"))) {
r->west = atof(start); *end = 0; start = end+1;
}
if ((end = strstr(start, "\t"))) {
r->east = atof(start); *end = 0; start = end+1;
}
if ((end = strstr(start, "\t"))) {
r->north = atof(start); *end = 0; start = end+1;
}
if ((end = strstr(start, "\t"))) {
r->wikidata = start; *end = 0; start = end+1;
}
if ((end = strstr(start, "\t"))) {
r->wikipedia = start; *end = 0; start = end+1;
}
if ((end = strstr(start, "\t"))) {
r->housenumbers = start; *end = 0; start = end+1;
}
return 0;
}
struct record* read_records(const char *filename, int *n) {
FILE *f = fopen(filename, "r");
*n = 0;
if (f == NULL) {
return NULL;
}
if (!input_looks_ok(f)) {
return NULL;
}
int capacity = 100;
int i = 0;
struct record *rs = malloc(capacity * sizeof(struct record));
while (read_record(&rs[i], f) == 0) {
i++;
if (i == capacity) {
capacity *= 2;
rs = realloc(rs, capacity * sizeof(struct record));
}
}
*n = i;
fclose(f);
return rs;
}
void free_records(struct record *rs, int n) {
for (int i = 0; i < n; i++) {
free(rs[i].line);
}
free(rs);
}

57
A1/record.h Normal file
View File

@ -0,0 +1,57 @@
#ifndef RECORD_H
#define RECORD_H
#include <stdio.h>
#include <stdint.h>
// An OpenStreetMap place record. All the 'const char*' strings are
// pointers into the string stored in the 'line' field. This string
// is "owned" by the record, meaning that it is freed exactly when the
// record itself is freed.
//
// You don't need to worry about the meaning of these fields. The
// ones that matter are osm_id, lon, lat, and name.
struct record {
const char *name;
const char *alternative_names;
const char *osm_type;
int64_t osm_id;
const char *class;
const char *type;
double lon;
double lat;
int place_rank;
double importance;
const char *street;
const char *city;
const char *county;
const char *state;
const char *country;
const char *country_code;
const char *display_name;
double west;
double south;
double east;
double north;
const char *wikidata;
const char *wikipedia;
const char *housenumbers;
// Not a real field - all the other char* elements are pointers into
// this memory, which we can pass to free().
char *line;
};
// Read an OpenStreetMap place names dataset from a given file. On
// success, returns a pointer to the array of records read, and sets
// *n to the number of records. Returns NULL on failure.
//
// Expects lines of form:
// Index,Date,Open,High,Low,Close,AdjustedClose,Volume
struct record* read_records(const char *filename, int *n);
// Free records returned by read_records(). The 'n' argument must
// correspond to the number of records, as produced by read_records().
void free_records(struct record *r, int n);
#endif

12
A1/timing.h Normal file
View File

@ -0,0 +1,12 @@
#ifndef TIMING_H
#define TIMING_H
#include <sys/time.h>
static uint64_t microseconds() {
static struct timeval t;
gettimeofday(&t, NULL);
return ((uint64_t)t.tv_sec*1000000)+t.tv_usec;
}
#endif