diff --git a/A0/file b/A0/file index d2e5309..720d513 100755 Binary files a/A0/file and b/A0/file differ diff --git a/A0/file.c b/A0/file.c index 0106c1d..24cda25 100644 --- a/A0/file.c +++ b/A0/file.c @@ -8,12 +8,15 @@ enum FileType{ASCII, ISO, UTF, data}; bool is_utf8(FILE* f) { rewind(f); - char byte; + unsigned char byte; int char_length = -1; for (;;) { if (fread(&byte, 1, 1, f) == 0) { break; } + if (byte == 0){ + return false; + } for (int j = 0 ; j <= 8 ; j++) { if (byte >> 7 == 0) { char_length = j; @@ -40,7 +43,7 @@ bool is_iso8859(FILE* f) { for (int i = 0 ; i < 7 ; i++) { iso8859_CHARACTERS[i] = i+7; } - iso8859_CHARACTERS[7] = 27; + iso8859_CHARACTERS[7] = 27; for (int i = 0 ; i < 94 ; i++) { iso8859_CHARACTERS[i+8] = i+32; } @@ -48,23 +51,23 @@ bool is_iso8859(FILE* f) { for (int i = 0 ; i < 95 ; i++) { iso8859_CHARACTERS[i+102] = i+160; } - char byte; - bool is_iso8859; + unsigned char byte; + bool iso_byte; for (;;) { if (fread(&byte, 1, 1, f) == 0) { break; } - is_iso8859 = false; - for (int i = 0 ; i < 95 ; i++) { + iso_byte = false; + for (int i = 0 ; i < 197 ; i++) { if (byte == iso8859_CHARACTERS[i]) { - is_iso8859 = true; + iso_byte = true; break; } } - if (! is_iso8859) { + if (! iso_byte) { return false; } - + } return true; } @@ -81,7 +84,7 @@ bool is_ascii(FILE* f) { ASCII_CHARACTERS[i+8] = i+32; } - char byte; + unsigned char byte; bool ascii_byte; for (;;) { if (fread(&byte, 1, 1, f) == 0) { @@ -97,10 +100,10 @@ bool is_ascii(FILE* f) { if (! ascii_byte) { return false; } - + } return true; - + } int print_error(char* path, int errnum) { @@ -115,7 +118,7 @@ int main(int argc, char* argv[]) { } FILE* f = fopen(argv[1], "r"); - + if (f == NULL) { print_error(argv[2], errno); } @@ -126,9 +129,11 @@ int main(int argc, char* argv[]) { fprintf(stdout, "%s: ASCII text\n", argv[1]); } else if (is_iso8859(f)) { fprintf(stdout, "%s: ISO-8859 text\n", argv[1]); + } else if (is_utf8(f)) { + fprintf(stdout, "%s: UTF-8 Unicode text\n", argv[1]); } else { fprintf(stdout, "%s: data\n", argv[1]); - } + } fclose(f); return EXIT_SUCCESS; diff --git a/A0/test.sh b/A0/test.sh index af8bd1f..b65a9e4 100755 --- a/A0/test.sh +++ b/A0/test.sh @@ -12,26 +12,28 @@ rm -f test_files/* echo "Generating test files.." -printf "Hello, World!\n" > test_files/ascii.input +printf "Hello, World\x04!\n" > test_files/ascii.input printf "Hello, World!" > test_files/ascii2.input +printf "Test file\nTest\nThis is a test file\n\n\n\n" > test_files/ascii3.input printf "Hello,\x00World!\n" > test_files/data.input -printf "Hællo,\x00World!\n" > test_files/iso1.input -printf "Hello,\x00Wørld!\n" > test_files/iso2.input -printf "Hello,\x00World!\n" > test_files/utf81.input -printf "Hello,\x00World!\n" > test_files/utf82.input +printf "Hello,\x00World!\n" > test_files/data2.input +printf "Hello,\xa0World!\n" > test_files/iso1.input +printf "Hello,\xa1\xbbWorld!\n" > test_files/iso2.input +printf "Hello, \xaaWorld!\n" > test_files/iso3.input +printf "\x24\xe0\xa4\xb9\xf0\x90\x8d\x88\n" > test_files/utf81.input +printf "مرحبا عالمي\n" > test_files/utf82.input +printf "Hello, World or should I say, 안녕하세요 세계!\n" > test_files/utf83.input printf "" > test_files/empty.input -### TODO: Generate more test files ### - echo "Running the tests.." exitcode=0 -for f in test_files/*.input +for f in test_files/*.input # For loop do - echo ">>> Testing ${f}.." - file "${f}" | sed 's/ASCII text.*/ASCII text/' > "${f}.expected" - ./file "${f}" > "${f}.actual" + echo ">>> Testing ${f}.." # print + file "${f}" | sed 's/ASCII text.*/ASCII text/' > "${f}.expected" # fjerner tekst der kun bliver givet ved ASCII filer og gemmer svaret i ".expected" filen. + ./file "${f}" > "${f}.actual" # giver vores *egen* kodes svar i ".actual" filen. - if ! diff -u "${f}.expected" "${f}.actual" + if ! diff -u "${f}.expected" "${f}.actual" # sammenligner vores egen kodes svar og "file" kommandoens svar then echo ">>> Failed :-(" exitcode=1